Add cache of match failures
This change introduces an sqlite database that contains the track_id, db insertion time, and ttl in the cache. The ttl starts with one week, and increases exponentially by a factor of 2 each time the same track_id is added to the database. This significantly reduces the execution of the time script when there are a lot of match failures accumulating, which do not need to check every time.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,7 +1,7 @@
|
|||||||
# Config and cache files
|
# Config and cache files
|
||||||
config.yml
|
config.yml
|
||||||
config.yaml
|
config.yaml
|
||||||
.cache-*
|
.cache*
|
||||||
.session.yml
|
.session.yml
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ dependencies = [
|
|||||||
"tidalapi~=0.7",
|
"tidalapi~=0.7",
|
||||||
"pyyaml~=6.0",
|
"pyyaml~=6.0",
|
||||||
"tqdm~=4.64",
|
"tqdm~=4.64",
|
||||||
|
"sqlalchemy~=2.0"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
63
src/spotify_to_tidal/database.py
Normal file
63
src/spotify_to_tidal/database.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import datetime
|
||||||
|
import sqlalchemy
|
||||||
|
from sqlalchemy import Table, Column, String, DateTime, MetaData, insert, select, update, delete
|
||||||
|
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
def __init__(self, filename='.cache.db'):
|
||||||
|
self.engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
|
||||||
|
meta = MetaData()
|
||||||
|
self.match_failures = Table('match_failures', meta,
|
||||||
|
Column('track_id', String,
|
||||||
|
primary_key=True),
|
||||||
|
Column('insert_time', DateTime),
|
||||||
|
Column('next_retry', DateTime),
|
||||||
|
sqlite_autoincrement=False)
|
||||||
|
meta.create_all(self.engine)
|
||||||
|
|
||||||
|
def _get_next_retry_time(self, insert_time=None):
|
||||||
|
if insert_time:
|
||||||
|
# double interval on each retry
|
||||||
|
interval = 2 * (datetime.datetime.now() - insert_time)
|
||||||
|
else:
|
||||||
|
interval = datetime.timedelta(days=7)
|
||||||
|
return datetime.datetime.now() + interval
|
||||||
|
|
||||||
|
def cache_match_failure(self, track_id):
|
||||||
|
""" notifies that matching failed for the given track_id """
|
||||||
|
fetch_statement = select(self.match_failures).where(
|
||||||
|
self.match_failures.c.track_id == track_id)
|
||||||
|
with self.engine.connect() as connection:
|
||||||
|
with connection.begin():
|
||||||
|
# Either update the next_retry time if track_id already exists, otherwise create a new entry
|
||||||
|
existing_failure = connection.execute(
|
||||||
|
fetch_statement).fetchone()
|
||||||
|
if existing_failure:
|
||||||
|
update_statement = update(self.match_failures).where(
|
||||||
|
self.match_failures.c.track_id == track_id).values(next_retry=self._get_next_retry_time())
|
||||||
|
connection.execute(update_statement)
|
||||||
|
else:
|
||||||
|
connection.execute(insert(self.match_failures), {
|
||||||
|
"track_id": track_id, "insert_time": datetime.datetime.now(), "next_retry": self._get_next_retry_time()})
|
||||||
|
|
||||||
|
def has_match_failure(self, track_id):
|
||||||
|
""" checks if there was a recent search for which matching failed with the given track_id """
|
||||||
|
statement = select(self.match_failures.c.next_retry).where(
|
||||||
|
self.match_failures.c.track_id == track_id)
|
||||||
|
with self.engine.connect() as connection:
|
||||||
|
match_failure = connection.execute(statement).fetchone()
|
||||||
|
if match_failure:
|
||||||
|
return match_failure.next_retry > datetime.datetime.now()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def remove_match_failure(self, track_id):
|
||||||
|
""" removes match failure from the database """
|
||||||
|
statement = delete(self.match_failures).where(
|
||||||
|
self.match_failures.c.track_id == track_id)
|
||||||
|
with self.engine.connect() as connection:
|
||||||
|
with connection.begin():
|
||||||
|
connection.execute(statement)
|
||||||
|
|
||||||
|
|
||||||
|
# Main singleton instance
|
||||||
|
failure_cache = Database()
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from .database import failure_cache
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Sequence, Set, Mapping
|
from typing import Sequence, Set, Mapping
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
@@ -89,10 +90,12 @@ def match(tidal_track, spotify_track) -> bool:
|
|||||||
and artist_match(tidal_track, spotify_track)
|
and artist_match(tidal_track, spotify_track)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def tidal_search(spotify_track_and_cache, tidal_session: tidalapi.Session) -> tidalapi.Track | None:
|
def tidal_search(spotify_track_and_cache, tidal_session: tidalapi.Session) -> tidalapi.Track | None:
|
||||||
spotify_track, cached_tidal_track = spotify_track_and_cache
|
spotify_track, cached_tidal_track = spotify_track_and_cache
|
||||||
if cached_tidal_track: return cached_tidal_track
|
if cached_tidal_track: return cached_tidal_track
|
||||||
|
if spotify_track['id'] is None: return None
|
||||||
|
if failure_cache.has_match_failure(spotify_track['id']):
|
||||||
|
return None
|
||||||
# search for album name and first album artist
|
# search for album name and first album artist
|
||||||
if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']):
|
if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']):
|
||||||
album_result = tidal_session.search(simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name']), models=[tidalapi.album.Album])
|
album_result = tidal_session.search(simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name']), models=[tidalapi.album.Album])
|
||||||
@@ -101,11 +104,14 @@ def tidal_search(spotify_track_and_cache, tidal_session: tidalapi.Session) -> ti
|
|||||||
if len(album_tracks) >= spotify_track['track_number']:
|
if len(album_tracks) >= spotify_track['track_number']:
|
||||||
track = album_tracks[spotify_track['track_number'] - 1]
|
track = album_tracks[spotify_track['track_number'] - 1]
|
||||||
if match(track, spotify_track):
|
if match(track, spotify_track):
|
||||||
|
failure_cache.remove_match_failure(spotify_track['id'])
|
||||||
return track
|
return track
|
||||||
# if that fails then search for track name and first artist
|
# if that fails then search for track name and first artist
|
||||||
for track in tidal_session.search(simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name']), models=[tidalapi.media.Track])['tracks']:
|
for track in tidal_session.search(simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name']), models=[tidalapi.media.Track])['tracks']:
|
||||||
if match(track, spotify_track):
|
if match(track, spotify_track):
|
||||||
|
failure_cache.remove_match_failure(spotify_track['id'])
|
||||||
return track
|
return track
|
||||||
|
failure_cache.cache_match_failure(spotify_track['id'])
|
||||||
|
|
||||||
def get_tidal_playlists_dict(tidal_session: tidalapi.Session) -> Mapping[str, tidalapi.Playlist]:
|
def get_tidal_playlists_dict(tidal_session: tidalapi.Session) -> Mapping[str, tidalapi.Playlist]:
|
||||||
# a dictionary of name --> playlist
|
# a dictionary of name --> playlist
|
||||||
@@ -284,4 +290,4 @@ def get_playlists_from_spotify(spotify_session: spotipy.Spotify, config):
|
|||||||
|
|
||||||
def get_playlists_from_config(config):
|
def get_playlists_from_config(config):
|
||||||
# get the list of playlist sync mappings from the configuration file
|
# get the list of playlist sync mappings from the configuration file
|
||||||
return [(item['spotify_id'], item['tidal_id']) for item in config['sync_playlists']]
|
return [(item['spotify_id'], item['tidal_id']) for item in config['sync_playlists']]
|
||||||
|
|||||||
Reference in New Issue
Block a user