Bump version to 0.1.4

Fix failure cache not being populated
Handle case where album contains incorrect metadata
2024-06-08 20:50:25 +02:00 · 2024-06-08 20:48:26 +02:00 · 2024-06-08 17:56:06 +02:00 · 2024-06-08 16:41:15 +02:00 · 2024-06-08 12:55:09 +02:00 · 2024-06-08 12:54:27 +02:00
4 changed files with 37 additions and 18 deletions
--- a/example_config.yml
+++ b/example_config.yml
@@ -16,4 +16,4 @@ spotify:

 # increasing these parameters should increase the search speed, while decreasing reduces likelihood of 429 errors
 max_concurrency: 10 # max concurrent connections at any given time
-rate_limit:      12 # max sustained connections per second
+rate_limit:      10 # max sustained connections per second
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "spotify_to_tidal"
-version = "0.1.1"
+version = "0.1.4"
 requires-python = ">= 3.10"

 dependencies = [
--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,4 @@
-A command line tool for importing your Spotify playlists into Tidal
+A command line tool for importing your Spotify playlists into Tidal. Due to various performance optimisations, it is particularly suited for periodic synchronisation of very large collections.

 Installation
 -----------
--- a/src/spotify_to_tidal/sync.py
+++ b/src/spotify_to_tidal/sync.py
@@ -2,6 +2,8 @@

 import asyncio
 from .cache import failure_cache, track_match_cache
+import datetime
+from difflib import SequenceMatcher
 from functools import partial
 from typing import List, Sequence, Set, Mapping
 import math
@@ -50,7 +52,7 @@ def name_match(tidal_track, spotify_track) -> bool:
    simple_spotify_track = simple(spotify_track['name'].lower()).split('feat.')[0].strip()
    return simple_spotify_track in tidal_track.name.lower() or normalize(simple_spotify_track) in normalize(tidal_track.name.lower())

-def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
+def artist_match(tidal: tidalapi.Track | tidalapi.Album, spotify) -> bool:
    def split_artist_name(artist: str) -> Sequence[str]:
       if '&' in artist:
           return artist.split('&')
@@ -59,9 +61,9 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
       else:
           return [artist]

-    def get_tidal_artists(tidal_track: tidalapi.Track, do_normalize=False) -> Set[str]:
+    def get_tidal_artists(tidal: tidalapi.Track | tidalapi.Album, do_normalize=False) -> Set[str]:
        result: list[str] = []
-        for artist in tidal_track.artists:
+        for artist in tidal.artists:
            if do_normalize:
                artist_name = normalize(artist.name)
            else:
@@ -69,9 +71,9 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
            result.extend(split_artist_name(artist_name))
        return set([simple(x.strip().lower()) for x in result])

-    def get_spotify_artists(spotify_track: t_spotify.SpotifyTrack, do_normalize=False) -> Set[str]:
+    def get_spotify_artists(spotify, do_normalize=False) -> Set[str]:
        result: list[str] = []
-        for artist in spotify_track['artists']:
+        for artist in spotify['artists']:
            if do_normalize:
                artist_name = normalize(artist['name'])
            else:
@@ -80,32 +82,42 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
        return set([simple(x.strip().lower()) for x in result])
    # There must be at least one overlapping artist between the Tidal and Spotify track
    # Try with both un-normalized and then normalized
-    if get_tidal_artists(tidal_track).intersection(get_spotify_artists(spotify_track)) != set():
+    if get_tidal_artists(tidal).intersection(get_spotify_artists(spotify)) != set():
        return True
-    return get_tidal_artists(tidal_track, True).intersection(get_spotify_artists(spotify_track, True)) != set()
+    return get_tidal_artists(tidal, True).intersection(get_spotify_artists(spotify, True)) != set()

 def match(tidal_track, spotify_track) -> bool:
+    if not spotify_track['id']: return False
    return isrc_match(tidal_track, spotify_track) or (
        duration_match(tidal_track, spotify_track)
        and name_match(tidal_track, spotify_track)
        and artist_match(tidal_track, spotify_track)
    )

+def test_album_similarity(spotify_album, tidal_album, threshold=0.6):
+    return SequenceMatcher(None, simple(spotify_album['name']), simple(tidal_album.name)).ratio() >= threshold and artist_match(tidal_album, spotify_album)
+
 async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Session) -> tidalapi.Track | None:
    def _search_for_track_in_album():
        # search for album name and first album artist
        if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']):
-            album_result = tidal_session.search(simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name']), models=[tidalapi.album.Album])
+            query = simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name'])
+            album_result = tidal_session.search(query, models=[tidalapi.album.Album])
            for album in album_result['albums']:
-                album_tracks = album.tracks()
-                if len(album_tracks) >= spotify_track['track_number']:
+                if album.num_tracks >= spotify_track['track_number'] and test_album_similarity(spotify_track['album'], album):
+                    album_tracks = album.tracks()
+                    if len(album_tracks) < spotify_track['track_number']:
+                        assert( not len(album_tracks) == album.num_tracks ) # incorrect metadata :(
+                        continue
                    track = album_tracks[spotify_track['track_number'] - 1]
                    if match(track, spotify_track):
                        failure_cache.remove_match_failure(spotify_track['id'])
                        return track
+
    def _search_for_standalone_track():
        # if album search fails then search for track name and first artist
-        for track in tidal_session.search(simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name']), models=[tidalapi.media.Track])['tracks']:
+        query = simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name'])
+        for track in tidal_session.search(query, models=[tidalapi.media.Track])['tracks']:
            if match(track, spotify_track):
                failure_cache.remove_match_failure(spotify_track['id'])
                return track
@@ -117,7 +129,6 @@ async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Sess
    track_search = await asyncio.to_thread( _search_for_standalone_track )
    if track_search:
        return track_search
-    return None

    # if none of the search modes succeeded then store the track id to the failure cache
    failure_cache.cache_match_failure(spotify_track['id'])
@@ -214,6 +225,7 @@ def get_tracks_for_new_tidal_playlist(spotify_tracks: Sequence[t_spotify.Spotify
    output = []
    seen_tracks = set()
    for spotify_track in spotify_tracks:
+        if not spotify_track['id']: continue
        tidal_id = track_match_cache.get(spotify_track['id'])
        if tidal_id and not tidal_id in seen_tracks:
            output.append(tidal_id)
@@ -222,10 +234,16 @@ def get_tracks_for_new_tidal_playlist(spotify_tracks: Sequence[t_spotify.Spotify

 async def sync_playlist(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, spotify_playlist, tidal_playlist: tidalapi.Playlist | None, config):
    async def _run_rate_limiter(semaphore):
-        ''' Leaky bucket algorithm for rate limiting. Periodically releases an item from semaphore at rate_limit'''
+        ''' Leaky bucket algorithm for rate limiting. Periodically releases items from semaphore at rate_limit'''
+        _sleep_time = config.get('max_concurrency', 10)/config.get('rate_limit', 10)/4 # aim to sleep approx time to drain 1/4 of 'bucket'
+        t0 = datetime.datetime.now()
        while True:
-            await asyncio.sleep(1/config.get('rate_limit', 12)) # sleep for min time between new function executions
-            semaphore.release() # leak one item from the 'bucket'
+            await asyncio.sleep(_sleep_time)
+            t = datetime.datetime.now()
+            dt = (t - t0).total_seconds()
+            new_items = round(config.get('rate_limit', 10)*dt)
+            t0 = t
+            [semaphore.release() for i in range(new_items)] # leak new_items from the 'bucket'

    # Create a new Tidal playlist if required
    if not tidal_playlist:
@@ -294,6 +312,7 @@ async def get_playlists_from_spotify(spotify_session: spotipy.Spotify, config):
    print("Loading Spotify playlists")
    results = spotify_session.user_playlists(config['spotify']['username'])
    exclude_list = set([x.split(':')[-1] for x in config.get('excluded_playlists', [])])
+    playlists.extend([p for p in results['items'] if p['owner']['id'] == config['spotify']['username'] and not p['id'] in exclude_list])
      
    # get all the remaining playlists in parallel
    if results['next']:
Author	SHA1	Message	Date
Tim Rae	8692624a8c	Bump version to 0.1.4	2024-06-08 20:50:25 +02:00
Tim Rae	dc959f2657	Fix failure cache not being populated	2024-06-08 20:48:26 +02:00
Tim Rae	d9312d89dd	Handle case where album contains incorrect metadata	2024-06-08 17:56:06 +02:00
Tim Rae	1a7ff4f083	Performance optimisation: Don't query tracks for uninteresting albums The call to tidal_album.tracks() can take a very long time to execute which can significantly slow down the search in certain cases	2024-06-08 16:41:15 +02:00
Tim Rae	0c859cc9aa	Bump version to 0.1.3	2024-06-08 12:55:09 +02:00
Tim Rae	bb0f3cffd0	Fix rate limit accuracy Batching multiple updates to the leaky bucket at a fixed interval improves the accuracy of the rate limiter. Previously the rate would drop substantially over the course of the sync operation.	2024-06-08 12:54:27 +02:00
Adria Jimenez	ecc642ba7d	fix: sync first playlists page This commit: `1e8366a0e8` broke the loading of playlists. The first results coming back from the Spotify API were not being added to the playlists array.	2024-06-06 13:33:40 +02:00
Tim Rae	3e9b2ef0ec	Update readme.md	2024-06-05 09:22:24 +02:00
Tim Rae	a16f764bee	Bump version to 0.1.2	2024-06-03 23:38:21 +02:00
Tim Rae	c1956d19cc	Fix bug where occasionally wrong track is inserted	2024-06-03 23:38:21 +02:00