8 Commits
0.1.2 ... 0.1.4

Author SHA1 Message Date
Tim Rae
8692624a8c Bump version to 0.1.4 2024-06-08 20:50:25 +02:00
Tim Rae
dc959f2657 Fix failure cache not being populated 2024-06-08 20:48:26 +02:00
Tim Rae
d9312d89dd Handle case where album contains incorrect metadata 2024-06-08 17:56:06 +02:00
Tim Rae
1a7ff4f083 Performance optimisation: Don't query tracks for uninteresting albums
The call to tidal_album.tracks() can take a very long time to execute
which can significantly slow down the search in certain cases
2024-06-08 16:41:15 +02:00
Tim Rae
0c859cc9aa Bump version to 0.1.3 2024-06-08 12:55:09 +02:00
Tim Rae
bb0f3cffd0 Fix rate limit accuracy
Batching multiple updates to the leaky bucket at a fixed interval
improves the accuracy of the rate limiter. Previously the rate would
drop substantially over the course of the sync operation.
2024-06-08 12:54:27 +02:00
Adria Jimenez
ecc642ba7d fix: sync first playlists page
This commit: 1e8366a0e8 broke the loading of playlists. The first results coming back from the Spotify API were not being added to the playlists array.
2024-06-06 13:33:40 +02:00
Tim Rae
3e9b2ef0ec Update readme.md 2024-06-05 09:22:24 +02:00
4 changed files with 35 additions and 18 deletions

View File

@@ -16,4 +16,4 @@ spotify:
# increasing these parameters should increase the search speed, while decreasing reduces likelihood of 429 errors # increasing these parameters should increase the search speed, while decreasing reduces likelihood of 429 errors
max_concurrency: 10 # max concurrent connections at any given time max_concurrency: 10 # max concurrent connections at any given time
rate_limit: 12 # max sustained connections per second rate_limit: 10 # max sustained connections per second

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "spotify_to_tidal" name = "spotify_to_tidal"
version = "0.1.2" version = "0.1.4"
requires-python = ">= 3.10" requires-python = ">= 3.10"
dependencies = [ dependencies = [

View File

@@ -1,4 +1,4 @@
A command line tool for importing your Spotify playlists into Tidal A command line tool for importing your Spotify playlists into Tidal. Due to various performance optimisations, it is particularly suited for periodic synchronisation of very large collections.
Installation Installation
----------- -----------

View File

@@ -2,6 +2,8 @@
import asyncio import asyncio
from .cache import failure_cache, track_match_cache from .cache import failure_cache, track_match_cache
import datetime
from difflib import SequenceMatcher
from functools import partial from functools import partial
from typing import List, Sequence, Set, Mapping from typing import List, Sequence, Set, Mapping
import math import math
@@ -50,7 +52,7 @@ def name_match(tidal_track, spotify_track) -> bool:
simple_spotify_track = simple(spotify_track['name'].lower()).split('feat.')[0].strip() simple_spotify_track = simple(spotify_track['name'].lower()).split('feat.')[0].strip()
return simple_spotify_track in tidal_track.name.lower() or normalize(simple_spotify_track) in normalize(tidal_track.name.lower()) return simple_spotify_track in tidal_track.name.lower() or normalize(simple_spotify_track) in normalize(tidal_track.name.lower())
def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool: def artist_match(tidal: tidalapi.Track | tidalapi.Album, spotify) -> bool:
def split_artist_name(artist: str) -> Sequence[str]: def split_artist_name(artist: str) -> Sequence[str]:
if '&' in artist: if '&' in artist:
return artist.split('&') return artist.split('&')
@@ -59,9 +61,9 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
else: else:
return [artist] return [artist]
def get_tidal_artists(tidal_track: tidalapi.Track, do_normalize=False) -> Set[str]: def get_tidal_artists(tidal: tidalapi.Track | tidalapi.Album, do_normalize=False) -> Set[str]:
result: list[str] = [] result: list[str] = []
for artist in tidal_track.artists: for artist in tidal.artists:
if do_normalize: if do_normalize:
artist_name = normalize(artist.name) artist_name = normalize(artist.name)
else: else:
@@ -69,9 +71,9 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
result.extend(split_artist_name(artist_name)) result.extend(split_artist_name(artist_name))
return set([simple(x.strip().lower()) for x in result]) return set([simple(x.strip().lower()) for x in result])
def get_spotify_artists(spotify_track: t_spotify.SpotifyTrack, do_normalize=False) -> Set[str]: def get_spotify_artists(spotify, do_normalize=False) -> Set[str]:
result: list[str] = [] result: list[str] = []
for artist in spotify_track['artists']: for artist in spotify['artists']:
if do_normalize: if do_normalize:
artist_name = normalize(artist['name']) artist_name = normalize(artist['name'])
else: else:
@@ -80,9 +82,9 @@ def artist_match(tidal_track: tidalapi.Track, spotify_track) -> bool:
return set([simple(x.strip().lower()) for x in result]) return set([simple(x.strip().lower()) for x in result])
# There must be at least one overlapping artist between the Tidal and Spotify track # There must be at least one overlapping artist between the Tidal and Spotify track
# Try with both un-normalized and then normalized # Try with both un-normalized and then normalized
if get_tidal_artists(tidal_track).intersection(get_spotify_artists(spotify_track)) != set(): if get_tidal_artists(tidal).intersection(get_spotify_artists(spotify)) != set():
return True return True
return get_tidal_artists(tidal_track, True).intersection(get_spotify_artists(spotify_track, True)) != set() return get_tidal_artists(tidal, True).intersection(get_spotify_artists(spotify, True)) != set()
def match(tidal_track, spotify_track) -> bool: def match(tidal_track, spotify_track) -> bool:
if not spotify_track['id']: return False if not spotify_track['id']: return False
@@ -92,21 +94,30 @@ def match(tidal_track, spotify_track) -> bool:
and artist_match(tidal_track, spotify_track) and artist_match(tidal_track, spotify_track)
) )
def test_album_similarity(spotify_album, tidal_album, threshold=0.6):
return SequenceMatcher(None, simple(spotify_album['name']), simple(tidal_album.name)).ratio() >= threshold and artist_match(tidal_album, spotify_album)
async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Session) -> tidalapi.Track | None: async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Session) -> tidalapi.Track | None:
def _search_for_track_in_album(): def _search_for_track_in_album():
# search for album name and first album artist # search for album name and first album artist
if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']): if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']):
album_result = tidal_session.search(simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name']), models=[tidalapi.album.Album]) query = simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name'])
album_result = tidal_session.search(query, models=[tidalapi.album.Album])
for album in album_result['albums']: for album in album_result['albums']:
if album.num_tracks >= spotify_track['track_number'] and test_album_similarity(spotify_track['album'], album):
album_tracks = album.tracks() album_tracks = album.tracks()
if len(album_tracks) >= spotify_track['track_number']: if len(album_tracks) < spotify_track['track_number']:
assert( not len(album_tracks) == album.num_tracks ) # incorrect metadata :(
continue
track = album_tracks[spotify_track['track_number'] - 1] track = album_tracks[spotify_track['track_number'] - 1]
if match(track, spotify_track): if match(track, spotify_track):
failure_cache.remove_match_failure(spotify_track['id']) failure_cache.remove_match_failure(spotify_track['id'])
return track return track
def _search_for_standalone_track(): def _search_for_standalone_track():
# if album search fails then search for track name and first artist # if album search fails then search for track name and first artist
for track in tidal_session.search(simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name']), models=[tidalapi.media.Track])['tracks']: query = simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name'])
for track in tidal_session.search(query, models=[tidalapi.media.Track])['tracks']:
if match(track, spotify_track): if match(track, spotify_track):
failure_cache.remove_match_failure(spotify_track['id']) failure_cache.remove_match_failure(spotify_track['id'])
return track return track
@@ -118,7 +129,6 @@ async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Sess
track_search = await asyncio.to_thread( _search_for_standalone_track ) track_search = await asyncio.to_thread( _search_for_standalone_track )
if track_search: if track_search:
return track_search return track_search
return None
# if none of the search modes succeeded then store the track id to the failure cache # if none of the search modes succeeded then store the track id to the failure cache
failure_cache.cache_match_failure(spotify_track['id']) failure_cache.cache_match_failure(spotify_track['id'])
@@ -224,10 +234,16 @@ def get_tracks_for_new_tidal_playlist(spotify_tracks: Sequence[t_spotify.Spotify
async def sync_playlist(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, spotify_playlist, tidal_playlist: tidalapi.Playlist | None, config): async def sync_playlist(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, spotify_playlist, tidal_playlist: tidalapi.Playlist | None, config):
async def _run_rate_limiter(semaphore): async def _run_rate_limiter(semaphore):
''' Leaky bucket algorithm for rate limiting. Periodically releases an item from semaphore at rate_limit''' ''' Leaky bucket algorithm for rate limiting. Periodically releases items from semaphore at rate_limit'''
_sleep_time = config.get('max_concurrency', 10)/config.get('rate_limit', 10)/4 # aim to sleep approx time to drain 1/4 of 'bucket'
t0 = datetime.datetime.now()
while True: while True:
await asyncio.sleep(1/config.get('rate_limit', 12)) # sleep for min time between new function executions await asyncio.sleep(_sleep_time)
semaphore.release() # leak one item from the 'bucket' t = datetime.datetime.now()
dt = (t - t0).total_seconds()
new_items = round(config.get('rate_limit', 10)*dt)
t0 = t
[semaphore.release() for i in range(new_items)] # leak new_items from the 'bucket'
# Create a new Tidal playlist if required # Create a new Tidal playlist if required
if not tidal_playlist: if not tidal_playlist:
@@ -296,6 +312,7 @@ async def get_playlists_from_spotify(spotify_session: spotipy.Spotify, config):
print("Loading Spotify playlists") print("Loading Spotify playlists")
results = spotify_session.user_playlists(config['spotify']['username']) results = spotify_session.user_playlists(config['spotify']['username'])
exclude_list = set([x.split(':')[-1] for x in config.get('excluded_playlists', [])]) exclude_list = set([x.split(':')[-1] for x in config.get('excluded_playlists', [])])
playlists.extend([p for p in results['items'] if p['owner']['id'] == config['spotify']['username'] and not p['id'] in exclude_list])
# get all the remaining playlists in parallel # get all the remaining playlists in parallel
if results['next']: if results['next']: