From 8ff36623516b31dc3dbc8aae21634b7503231844 Mon Sep 17 00:00:00 2001 From: George <41969151+geo-martino@users.noreply.github.com> Date: Thu, 4 Apr 2024 14:22:46 -0400 Subject: [PATCH] add limiter deduplication preprocess on xautopf files (#61) * add concrete implmentations of length on all base collection types * add deduplication before limit on xautopf files + fix BasicCollection --- musify/libraries/collection.py | 5 +++ musify/libraries/core/collection.py | 6 +++ musify/libraries/core/object.py | 40 +++++++++++------- musify/libraries/local/collection.py | 6 --- musify/libraries/local/playlist/base.py | 3 -- musify/libraries/local/playlist/xautopf.py | 41 ++++++++++++++++--- musify/libraries/remote/core/object.py | 2 +- musify/libraries/remote/spotify/object.py | 5 --- musify/processors/limit.py | 10 ++--- .../playlist/Recently Added.xautopf | 2 +- .../libraries/local/playlist/test_xautopf.py | 18 +++++++- 11 files changed, 96 insertions(+), 42 deletions(-) diff --git a/musify/libraries/collection.py b/musify/libraries/collection.py index 6a544c6c..eb0cb19e 100644 --- a/musify/libraries/collection.py +++ b/musify/libraries/collection.py @@ -36,6 +36,11 @@ def name(self): def items(self) -> list[T]: return self._items + @property + def length(self): + lengths = {getattr(item, "length", None) for item in self.items} + return sum({length for length in lengths if length}) if lengths else None + def __init__(self, name: str, items: Collection[T]): super().__init__() self._name = name diff --git a/musify/libraries/core/collection.py b/musify/libraries/core/collection.py index 5e46d883..765017f0 100644 --- a/musify/libraries/core/collection.py +++ b/musify/libraries/core/collection.py @@ -109,6 +109,12 @@ def items(self) -> list[T]: """The items in this collection""" raise NotImplementedError + @property + @abstractmethod + def length(self) -> float | None: + """Total duration of all items in this collection in seconds""" + raise NotImplementedError + @staticmethod @abstractmethod def _validate_item_type(items: Any | Iterable[Any]) -> bool: diff --git a/musify/libraries/core/object.py b/musify/libraries/core/object.py index 6ca55134..bf02c048 100644 --- a/musify/libraries/core/object.py +++ b/musify/libraries/core/object.py @@ -161,7 +161,7 @@ def has_image(self) -> bool: @property @abstractmethod - def length(self) -> float: + def length(self) -> float | None: """Total duration of this track in seconds""" raise NotImplementedError @@ -225,7 +225,7 @@ def has_image(self) -> bool: return len(self.image_links) > 0 @property - def length(self) -> float | None: + def length(self): """Total duration of all tracks in this playlist in seconds""" lengths = {track.length for track in self.tracks} return sum(lengths) if lengths else None @@ -309,6 +309,12 @@ def items(self): """The tracks in this collection""" return self.tracks + @property + def length(self): + """Total duration of all tracks in this library in seconds""" + lengths = {track.length for track in self.tracks} + return sum(lengths) if lengths else None + @property @abstractmethod def tracks(self) -> list[T]: @@ -516,10 +522,10 @@ def compilation(self) -> bool: raise NotImplementedError @property - @abstractmethod - def length(self) -> float | None: - """Total duration of all tracks in this folder""" - raise NotImplementedError + def length(self): + """Total duration of all tracks in this folder in seconds""" + lengths = {track.length for track in self.tracks} + return sum(lengths) if lengths else None class Album[T: Track](MusifyCollection[T], metaclass=ABCMeta): @@ -633,10 +639,10 @@ def has_image(self) -> bool: return len(self.image_links) > 0 @property - @abstractmethod - def length(self) -> float | None: + def length(self): """Total duration of all tracks on this album in seconds""" - raise NotImplementedError + lengths = {track.length for track in self.tracks} + return sum(lengths) if lengths else None @property @abstractmethod @@ -645,7 +651,7 @@ def rating(self) -> float | None: raise NotImplementedError -class Artist[T: Track](MusifyCollection[T], metaclass=ABCMeta): +class Artist[T: (Track, Album)](MusifyCollection[T], metaclass=ABCMeta): """An artist of items and their derived properties/objects.""" __attributes_classes__ = MusifyCollection @@ -704,10 +710,10 @@ def genres(self) -> list[str]: raise NotImplementedError @property - @abstractmethod - def length(self) -> float | None: - """Total duration of all tracks by this artist""" - raise NotImplementedError + def length(self): + """Total duration of all tracks by this artist in seconds""" + lengths = {track.length for track in self.tracks} + return sum(lengths) if lengths else None @property @abstractmethod @@ -761,3 +767,9 @@ def albums(self) -> list[str]: def genres(self) -> list[str]: """List of genres ordered by frequency of appearance on the tracks for this genre""" raise NotImplementedError + + @property + def length(self): + """Total duration of all tracks with this genre in seconds""" + lengths = {track.length for track in self.tracks} + return sum(lengths) if lengths else None diff --git a/musify/libraries/local/collection.py b/musify/libraries/local/collection.py index d27eca62..f6774279 100644 --- a/musify/libraries/local/collection.py +++ b/musify/libraries/local/collection.py @@ -71,12 +71,6 @@ def genres(self) -> list[str]: genres = (genre for track in self.tracks for genre in (track.genres if track.genres else [])) return get_most_common_values(genres) - @property - def length(self) -> float | None: - """Total duration of all tracks in this collection in seconds""" - lengths = {track.length for track in self.tracks} - return sum(lengths) if lengths else None - @property def last_modified(self) -> datetime: """Timestamp of the last modified track in this collection""" diff --git a/musify/libraries/local/playlist/base.py b/musify/libraries/local/playlist/base.py index 44f8b2ce..2f33d3eb 100644 --- a/musify/libraries/local/playlist/base.py +++ b/musify/libraries/local/playlist/base.py @@ -103,7 +103,6 @@ def __init__( self._original: list[LocalTrack] = [] def _match(self, tracks: Collection[LocalTrack] = (), reference: LocalTrack | None = None) -> None: - """Wrapper for matcher operations""" if self.matcher is None or not tracks: return @@ -113,14 +112,12 @@ def _match(self, tracks: Collection[LocalTrack] = (), reference: LocalTrack | No self.tracks: list[LocalTrack] = list(self.matcher(values=tracks, reference=reference)) def _limit(self, ignore: Collection[str | LocalTrack]) -> None: - """Wrapper for limiter operations""" if self.limiter is not None and self.tracks is not None: track_path_map = {track.path: track for track in self.tracks} ignore: set[LocalTrack] = {i if isinstance(i, LocalTrack) else track_path_map.get(i) for i in ignore} self.limiter(items=self.tracks, ignore=ignore) def _sort(self) -> None: - """Wrapper for sorter operations""" if self.sorter is not None and self.tracks is not None: self.sorter(items=self.tracks) diff --git a/musify/libraries/local/playlist/xautopf.py b/musify/libraries/local/playlist/xautopf.py index 84911877..547a8277 100644 --- a/musify/libraries/local/playlist/xautopf.py +++ b/musify/libraries/local/playlist/xautopf.py @@ -102,6 +102,15 @@ def description(self): def description(self, value: str | None): self._parser.description = value + @property + def limiter_deduplication(self) -> bool: + """This setting controls whether duplicates should be filtered out before running limiter operations.""" + return self._limiter_deduplication + + @limiter_deduplication.setter + def limiter_deduplication(self, value: bool): + self._limiter_deduplication = value + @property def image_links(self): return {} @@ -125,6 +134,8 @@ def __init__( self._parser.parse_limiter() self._parser.parse_sorter() + self._limiter_deduplication: bool = self._parser.limiter_deduplication + super().__init__( path=path, matcher=self._parser.get_matcher(), @@ -146,6 +157,21 @@ def load(self, tracks: Collection[LocalTrack] = ()) -> list[LocalTrack]: self._original = self.tracks.copy() return self.tracks + def _limit(self, ignore: Collection[LocalTrack]) -> None: + if self.limiter is not None and self.tracks is not None and self.limiter_deduplication: + # preprocess tracks by applying deduplication first before sending to the actual limiter + tracks_keys_seen = set() + tracks_deduplicated: list[LocalTrack] = [] + + for track in self.tracks: + track_key = "_".join([track.title, track.artist]) + if track in ignore or track_key not in tracks_keys_seen: + tracks_keys_seen.add(track_key) + tracks_deduplicated.append(track) + + self.tracks = tracks_deduplicated + super()._limit(ignore=ignore) + def save(self, dry_run: bool = True, *_, **__) -> SyncResultXAutoPF: """ Write the tracks in this Playlist and its settings (if applicable) to file. @@ -159,7 +185,7 @@ def save(self, dry_run: bool = True, *_, **__) -> SyncResultXAutoPF: parser = self._parser if not dry_run else deepcopy(self._parser) parser.parse_matcher(self.matcher) parser.parse_exception_paths(self.matcher, items=self.tracks, original=self._original) - parser.parse_limiter(self.limiter) + parser.parse_limiter(self.limiter, deduplicate=self.limiter_deduplication) parser.parse_sorter(self.sorter) parser.save(dry_run=dry_run) @@ -482,7 +508,6 @@ def get_limiter(self) -> ItemLimiter | None: conditions: Mapping[str, str] = self.xml_source["Limit"] if conditions["@Enabled"] != "True": return - # filter_duplicates = conditions["@FilterDuplicates"] == "True" # MusicBee appears to have some extra allowance on time and byte limits of ~1.25 return ItemLimiter( @@ -492,12 +517,16 @@ def get_limiter(self) -> ItemLimiter | None: allowance=1.25 ) - def parse_limiter(self, limiter: ItemLimiter | None = None) -> None: + @property + def limiter_deduplication(self) -> bool: + """This setting controls whether duplicates should be filtered out before running limiter operations.""" + return self.xml_source["Limit"]["@FilterDuplicates"] == "True" + + def parse_limiter(self, limiter: ItemLimiter | None = None, deduplicate: bool = False) -> None: """Update the loaded ``xml`` object by parsing the given ``limiter`` to its XML playlist representation.""" - xml: dict[str, str] if limiter is None: # default value xml = { - "@FilterDuplicates": "True", + "@FilterDuplicates": str(deduplicate).title(), "@Enabled": "False", "@Count": "25", "@Type": "Items", @@ -505,7 +534,7 @@ def parse_limiter(self, limiter: ItemLimiter | None = None) -> None: } else: xml = { - "@FilterDuplicates": "False", + "@FilterDuplicates": str(deduplicate).title(), "@Enabled": "True", "@Count": str(limiter.limit_max), "@Type": limiter.kind.name.title(), diff --git a/musify/libraries/remote/core/object.py b/musify/libraries/remote/core/object.py index 8f3fa282..fd03c149 100644 --- a/musify/libraries/remote/core/object.py +++ b/musify/libraries/remote/core/object.py @@ -269,7 +269,7 @@ def artists(self) -> list[RemoteArtist[T]]: raise NotImplementedError -class RemoteArtist[T: RemoteTrack](Artist[T], RemoteCollectionLoader[T], metaclass=ABCMeta): +class RemoteArtist[T: (RemoteTrack, RemoteAlbum)](Artist[T], RemoteCollectionLoader[T], metaclass=ABCMeta): """Extracts key ``artist`` data from a remote API JSON response.""" __attributes_classes__ = (Artist, RemoteCollectionLoader) diff --git a/musify/libraries/remote/spotify/object.py b/musify/libraries/remote/spotify/object.py index 3bc47a5f..d8a76f0f 100644 --- a/musify/libraries/remote/spotify/object.py +++ b/musify/libraries/remote/spotify/object.py @@ -639,11 +639,6 @@ def image_links(self): def has_image(self): return len(self.response["images"]) > 0 - @property - def length(self): - lengths = {track.length for track in self.tracks} - return sum(lengths) if lengths else None - @property def rating(self): return self.response.get("popularity") diff --git a/musify/processors/limit.py b/musify/processors/limit.py index 40da9891..8169ad7b 100644 --- a/musify/processors/limit.py +++ b/musify/processors/limit.py @@ -1,7 +1,7 @@ """ Processor that limits the items in a given collection of items """ -from collections.abc import Collection +from collections.abc import Collection, MutableSequence from functools import reduce from operator import mul from random import shuffle @@ -112,7 +112,7 @@ def limit[T: MusifyItem](self, items: list[T], ignore: Collection[T] = ()) -> No else: items.extend(self._limit_on_numeric(items_limit)) - def _limit_on_albums[T: MusifyItem](self, items: list[T]) -> list[T]: + def _limit_on_albums[T: MusifyItem](self, items: MutableSequence[T]) -> list[T]: seen_albums = [] result = [] @@ -128,7 +128,7 @@ def _limit_on_albums[T: MusifyItem](self, items: list[T]) -> list[T]: return result - def _limit_on_numeric[T: MusifyItem](self, items: list[T]) -> list[T]: + def _limit_on_numeric[T: MusifyItem](self, items: MutableSequence[T]) -> list[T]: count = 0 result = [] @@ -149,7 +149,7 @@ def _convert(self, item: MusifyItem) -> float: :raise ItemLimiterError: When the given limit type cannot be found """ if 10 < self.kind.value < 20: - if not hasattr(item, "length"): + if getattr(item, "length", None) is None: # TODO: there should be a better way of handling this... raise LimiterProcessorError("The given item cannot be limited on length as it does not have a length.") factors = (1, 60, 60, 24, 7)[:self.kind.value % 10] @@ -167,7 +167,7 @@ def _convert(self, item: MusifyItem) -> float: raise LimiterProcessorError(f"Unrecognised LimitType: {self.kind}") @dynamicprocessormethod - def _random(self, items: list[MusifyItem]) -> None: + def _random(self, items: MutableSequence[MusifyItem]) -> None: shuffle(items) @dynamicprocessormethod diff --git a/tests/__resources/playlist/Recently Added.xautopf b/tests/__resources/playlist/Recently Added.xautopf index 00e20e93..a6d6a5c7 100644 --- a/tests/__resources/playlist/Recently Added.xautopf +++ b/tests/__resources/playlist/Recently Added.xautopf @@ -5,7 +5,7 @@ - + diff --git a/tests/libraries/local/playlist/test_xautopf.py b/tests/libraries/local/playlist/test_xautopf.py index 13e42924..ccc2274b 100644 --- a/tests/libraries/local/playlist/test_xautopf.py +++ b/tests/libraries/local/playlist/test_xautopf.py @@ -58,6 +58,7 @@ def test_load_playlist_bp_settings(self, tracks: list[LocalTrack], path_mapper: assert pl.matcher.ready assert len(pl.matcher.comparers.comparers) == 3 assert not pl.limiter + assert not pl.limiter_deduplication assert pl.sorter pl.load(tracks) @@ -97,6 +98,7 @@ def test_load_playlist_ra_settings(self, path_mapper: PathMapper): assert not pl.matcher.ready assert not pl.matcher.comparers assert pl.limiter + assert pl.limiter_deduplication assert pl.sorter def test_load_playlist_ra_tracks(self, path_mapper: PathMapper): @@ -112,6 +114,19 @@ def test_load_playlist_ra_tracks(self, path_mapper: PathMapper): tracks_expected = sorted(tracks, key=lambda t: t.date_added, reverse=True)[:limit] assert pl.tracks == sorted(tracks_expected, key=lambda t: t.date_added, reverse=True) + def test_limiter_deduplication(self): + tracks = random_tracks(10) + + pl = XAutoPF(path=path_playlist_xautopf_ra, tracks=tracks) + limit = pl.limiter.limit_max + tracks_expected = sorted(tracks, key=lambda t: t.date_added, reverse=True)[:limit] + assert pl.limiter_deduplication + assert pl.tracks == tracks_expected + + pl = XAutoPF(path=path_playlist_xautopf_ra, tracks=tracks + tracks) + assert pl.limiter_deduplication + assert pl.tracks == tracks_expected + def test_save_new_file(self, tmp_path: str): path = join(tmp_path, random_str() + ".xautopf") pl = XAutoPF(path=path) @@ -457,8 +472,9 @@ def test_parse_limiter(self): # default values cause getter to not return any processor parser_initial.parse_limiter() assert parser_initial.get_limiter() is None + assert not parser_initial.limiter_deduplication # always False by default - parser_initial.parse_limiter(final) + parser_initial.parse_limiter(final, deduplicate=True) new = parser_initial.get_limiter() assert new is not None assert new.limit_max == final.limit_max