From 8ff36623516b31dc3dbc8aae21634b7503231844 Mon Sep 17 00:00:00 2001
From: George <41969151+geo-martino@users.noreply.github.com>
Date: Thu, 4 Apr 2024 14:22:46 -0400
Subject: [PATCH] add limiter deduplication preprocess on xautopf files (#61)
* add concrete implmentations of length on all base collection types
* add deduplication before limit on xautopf files + fix BasicCollection
---
musify/libraries/collection.py | 5 +++
musify/libraries/core/collection.py | 6 +++
musify/libraries/core/object.py | 40 +++++++++++-------
musify/libraries/local/collection.py | 6 ---
musify/libraries/local/playlist/base.py | 3 --
musify/libraries/local/playlist/xautopf.py | 41 ++++++++++++++++---
musify/libraries/remote/core/object.py | 2 +-
musify/libraries/remote/spotify/object.py | 5 ---
musify/processors/limit.py | 10 ++---
.../playlist/Recently Added.xautopf | 2 +-
.../libraries/local/playlist/test_xautopf.py | 18 +++++++-
11 files changed, 96 insertions(+), 42 deletions(-)
diff --git a/musify/libraries/collection.py b/musify/libraries/collection.py
index 6a544c6c..eb0cb19e 100644
--- a/musify/libraries/collection.py
+++ b/musify/libraries/collection.py
@@ -36,6 +36,11 @@ def name(self):
def items(self) -> list[T]:
return self._items
+ @property
+ def length(self):
+ lengths = {getattr(item, "length", None) for item in self.items}
+ return sum({length for length in lengths if length}) if lengths else None
+
def __init__(self, name: str, items: Collection[T]):
super().__init__()
self._name = name
diff --git a/musify/libraries/core/collection.py b/musify/libraries/core/collection.py
index 5e46d883..765017f0 100644
--- a/musify/libraries/core/collection.py
+++ b/musify/libraries/core/collection.py
@@ -109,6 +109,12 @@ def items(self) -> list[T]:
"""The items in this collection"""
raise NotImplementedError
+ @property
+ @abstractmethod
+ def length(self) -> float | None:
+ """Total duration of all items in this collection in seconds"""
+ raise NotImplementedError
+
@staticmethod
@abstractmethod
def _validate_item_type(items: Any | Iterable[Any]) -> bool:
diff --git a/musify/libraries/core/object.py b/musify/libraries/core/object.py
index 6ca55134..bf02c048 100644
--- a/musify/libraries/core/object.py
+++ b/musify/libraries/core/object.py
@@ -161,7 +161,7 @@ def has_image(self) -> bool:
@property
@abstractmethod
- def length(self) -> float:
+ def length(self) -> float | None:
"""Total duration of this track in seconds"""
raise NotImplementedError
@@ -225,7 +225,7 @@ def has_image(self) -> bool:
return len(self.image_links) > 0
@property
- def length(self) -> float | None:
+ def length(self):
"""Total duration of all tracks in this playlist in seconds"""
lengths = {track.length for track in self.tracks}
return sum(lengths) if lengths else None
@@ -309,6 +309,12 @@ def items(self):
"""The tracks in this collection"""
return self.tracks
+ @property
+ def length(self):
+ """Total duration of all tracks in this library in seconds"""
+ lengths = {track.length for track in self.tracks}
+ return sum(lengths) if lengths else None
+
@property
@abstractmethod
def tracks(self) -> list[T]:
@@ -516,10 +522,10 @@ def compilation(self) -> bool:
raise NotImplementedError
@property
- @abstractmethod
- def length(self) -> float | None:
- """Total duration of all tracks in this folder"""
- raise NotImplementedError
+ def length(self):
+ """Total duration of all tracks in this folder in seconds"""
+ lengths = {track.length for track in self.tracks}
+ return sum(lengths) if lengths else None
class Album[T: Track](MusifyCollection[T], metaclass=ABCMeta):
@@ -633,10 +639,10 @@ def has_image(self) -> bool:
return len(self.image_links) > 0
@property
- @abstractmethod
- def length(self) -> float | None:
+ def length(self):
"""Total duration of all tracks on this album in seconds"""
- raise NotImplementedError
+ lengths = {track.length for track in self.tracks}
+ return sum(lengths) if lengths else None
@property
@abstractmethod
@@ -645,7 +651,7 @@ def rating(self) -> float | None:
raise NotImplementedError
-class Artist[T: Track](MusifyCollection[T], metaclass=ABCMeta):
+class Artist[T: (Track, Album)](MusifyCollection[T], metaclass=ABCMeta):
"""An artist of items and their derived properties/objects."""
__attributes_classes__ = MusifyCollection
@@ -704,10 +710,10 @@ def genres(self) -> list[str]:
raise NotImplementedError
@property
- @abstractmethod
- def length(self) -> float | None:
- """Total duration of all tracks by this artist"""
- raise NotImplementedError
+ def length(self):
+ """Total duration of all tracks by this artist in seconds"""
+ lengths = {track.length for track in self.tracks}
+ return sum(lengths) if lengths else None
@property
@abstractmethod
@@ -761,3 +767,9 @@ def albums(self) -> list[str]:
def genres(self) -> list[str]:
"""List of genres ordered by frequency of appearance on the tracks for this genre"""
raise NotImplementedError
+
+ @property
+ def length(self):
+ """Total duration of all tracks with this genre in seconds"""
+ lengths = {track.length for track in self.tracks}
+ return sum(lengths) if lengths else None
diff --git a/musify/libraries/local/collection.py b/musify/libraries/local/collection.py
index d27eca62..f6774279 100644
--- a/musify/libraries/local/collection.py
+++ b/musify/libraries/local/collection.py
@@ -71,12 +71,6 @@ def genres(self) -> list[str]:
genres = (genre for track in self.tracks for genre in (track.genres if track.genres else []))
return get_most_common_values(genres)
- @property
- def length(self) -> float | None:
- """Total duration of all tracks in this collection in seconds"""
- lengths = {track.length for track in self.tracks}
- return sum(lengths) if lengths else None
-
@property
def last_modified(self) -> datetime:
"""Timestamp of the last modified track in this collection"""
diff --git a/musify/libraries/local/playlist/base.py b/musify/libraries/local/playlist/base.py
index 44f8b2ce..2f33d3eb 100644
--- a/musify/libraries/local/playlist/base.py
+++ b/musify/libraries/local/playlist/base.py
@@ -103,7 +103,6 @@ def __init__(
self._original: list[LocalTrack] = []
def _match(self, tracks: Collection[LocalTrack] = (), reference: LocalTrack | None = None) -> None:
- """Wrapper for matcher operations"""
if self.matcher is None or not tracks:
return
@@ -113,14 +112,12 @@ def _match(self, tracks: Collection[LocalTrack] = (), reference: LocalTrack | No
self.tracks: list[LocalTrack] = list(self.matcher(values=tracks, reference=reference))
def _limit(self, ignore: Collection[str | LocalTrack]) -> None:
- """Wrapper for limiter operations"""
if self.limiter is not None and self.tracks is not None:
track_path_map = {track.path: track for track in self.tracks}
ignore: set[LocalTrack] = {i if isinstance(i, LocalTrack) else track_path_map.get(i) for i in ignore}
self.limiter(items=self.tracks, ignore=ignore)
def _sort(self) -> None:
- """Wrapper for sorter operations"""
if self.sorter is not None and self.tracks is not None:
self.sorter(items=self.tracks)
diff --git a/musify/libraries/local/playlist/xautopf.py b/musify/libraries/local/playlist/xautopf.py
index 84911877..547a8277 100644
--- a/musify/libraries/local/playlist/xautopf.py
+++ b/musify/libraries/local/playlist/xautopf.py
@@ -102,6 +102,15 @@ def description(self):
def description(self, value: str | None):
self._parser.description = value
+ @property
+ def limiter_deduplication(self) -> bool:
+ """This setting controls whether duplicates should be filtered out before running limiter operations."""
+ return self._limiter_deduplication
+
+ @limiter_deduplication.setter
+ def limiter_deduplication(self, value: bool):
+ self._limiter_deduplication = value
+
@property
def image_links(self):
return {}
@@ -125,6 +134,8 @@ def __init__(
self._parser.parse_limiter()
self._parser.parse_sorter()
+ self._limiter_deduplication: bool = self._parser.limiter_deduplication
+
super().__init__(
path=path,
matcher=self._parser.get_matcher(),
@@ -146,6 +157,21 @@ def load(self, tracks: Collection[LocalTrack] = ()) -> list[LocalTrack]:
self._original = self.tracks.copy()
return self.tracks
+ def _limit(self, ignore: Collection[LocalTrack]) -> None:
+ if self.limiter is not None and self.tracks is not None and self.limiter_deduplication:
+ # preprocess tracks by applying deduplication first before sending to the actual limiter
+ tracks_keys_seen = set()
+ tracks_deduplicated: list[LocalTrack] = []
+
+ for track in self.tracks:
+ track_key = "_".join([track.title, track.artist])
+ if track in ignore or track_key not in tracks_keys_seen:
+ tracks_keys_seen.add(track_key)
+ tracks_deduplicated.append(track)
+
+ self.tracks = tracks_deduplicated
+ super()._limit(ignore=ignore)
+
def save(self, dry_run: bool = True, *_, **__) -> SyncResultXAutoPF:
"""
Write the tracks in this Playlist and its settings (if applicable) to file.
@@ -159,7 +185,7 @@ def save(self, dry_run: bool = True, *_, **__) -> SyncResultXAutoPF:
parser = self._parser if not dry_run else deepcopy(self._parser)
parser.parse_matcher(self.matcher)
parser.parse_exception_paths(self.matcher, items=self.tracks, original=self._original)
- parser.parse_limiter(self.limiter)
+ parser.parse_limiter(self.limiter, deduplicate=self.limiter_deduplication)
parser.parse_sorter(self.sorter)
parser.save(dry_run=dry_run)
@@ -482,7 +508,6 @@ def get_limiter(self) -> ItemLimiter | None:
conditions: Mapping[str, str] = self.xml_source["Limit"]
if conditions["@Enabled"] != "True":
return
- # filter_duplicates = conditions["@FilterDuplicates"] == "True"
# MusicBee appears to have some extra allowance on time and byte limits of ~1.25
return ItemLimiter(
@@ -492,12 +517,16 @@ def get_limiter(self) -> ItemLimiter | None:
allowance=1.25
)
- def parse_limiter(self, limiter: ItemLimiter | None = None) -> None:
+ @property
+ def limiter_deduplication(self) -> bool:
+ """This setting controls whether duplicates should be filtered out before running limiter operations."""
+ return self.xml_source["Limit"]["@FilterDuplicates"] == "True"
+
+ def parse_limiter(self, limiter: ItemLimiter | None = None, deduplicate: bool = False) -> None:
"""Update the loaded ``xml`` object by parsing the given ``limiter`` to its XML playlist representation."""
- xml: dict[str, str]
if limiter is None: # default value
xml = {
- "@FilterDuplicates": "True",
+ "@FilterDuplicates": str(deduplicate).title(),
"@Enabled": "False",
"@Count": "25",
"@Type": "Items",
@@ -505,7 +534,7 @@ def parse_limiter(self, limiter: ItemLimiter | None = None) -> None:
}
else:
xml = {
- "@FilterDuplicates": "False",
+ "@FilterDuplicates": str(deduplicate).title(),
"@Enabled": "True",
"@Count": str(limiter.limit_max),
"@Type": limiter.kind.name.title(),
diff --git a/musify/libraries/remote/core/object.py b/musify/libraries/remote/core/object.py
index 8f3fa282..fd03c149 100644
--- a/musify/libraries/remote/core/object.py
+++ b/musify/libraries/remote/core/object.py
@@ -269,7 +269,7 @@ def artists(self) -> list[RemoteArtist[T]]:
raise NotImplementedError
-class RemoteArtist[T: RemoteTrack](Artist[T], RemoteCollectionLoader[T], metaclass=ABCMeta):
+class RemoteArtist[T: (RemoteTrack, RemoteAlbum)](Artist[T], RemoteCollectionLoader[T], metaclass=ABCMeta):
"""Extracts key ``artist`` data from a remote API JSON response."""
__attributes_classes__ = (Artist, RemoteCollectionLoader)
diff --git a/musify/libraries/remote/spotify/object.py b/musify/libraries/remote/spotify/object.py
index 3bc47a5f..d8a76f0f 100644
--- a/musify/libraries/remote/spotify/object.py
+++ b/musify/libraries/remote/spotify/object.py
@@ -639,11 +639,6 @@ def image_links(self):
def has_image(self):
return len(self.response["images"]) > 0
- @property
- def length(self):
- lengths = {track.length for track in self.tracks}
- return sum(lengths) if lengths else None
-
@property
def rating(self):
return self.response.get("popularity")
diff --git a/musify/processors/limit.py b/musify/processors/limit.py
index 40da9891..8169ad7b 100644
--- a/musify/processors/limit.py
+++ b/musify/processors/limit.py
@@ -1,7 +1,7 @@
"""
Processor that limits the items in a given collection of items
"""
-from collections.abc import Collection
+from collections.abc import Collection, MutableSequence
from functools import reduce
from operator import mul
from random import shuffle
@@ -112,7 +112,7 @@ def limit[T: MusifyItem](self, items: list[T], ignore: Collection[T] = ()) -> No
else:
items.extend(self._limit_on_numeric(items_limit))
- def _limit_on_albums[T: MusifyItem](self, items: list[T]) -> list[T]:
+ def _limit_on_albums[T: MusifyItem](self, items: MutableSequence[T]) -> list[T]:
seen_albums = []
result = []
@@ -128,7 +128,7 @@ def _limit_on_albums[T: MusifyItem](self, items: list[T]) -> list[T]:
return result
- def _limit_on_numeric[T: MusifyItem](self, items: list[T]) -> list[T]:
+ def _limit_on_numeric[T: MusifyItem](self, items: MutableSequence[T]) -> list[T]:
count = 0
result = []
@@ -149,7 +149,7 @@ def _convert(self, item: MusifyItem) -> float:
:raise ItemLimiterError: When the given limit type cannot be found
"""
if 10 < self.kind.value < 20:
- if not hasattr(item, "length"):
+ if getattr(item, "length", None) is None: # TODO: there should be a better way of handling this...
raise LimiterProcessorError("The given item cannot be limited on length as it does not have a length.")
factors = (1, 60, 60, 24, 7)[:self.kind.value % 10]
@@ -167,7 +167,7 @@ def _convert(self, item: MusifyItem) -> float:
raise LimiterProcessorError(f"Unrecognised LimitType: {self.kind}")
@dynamicprocessormethod
- def _random(self, items: list[MusifyItem]) -> None:
+ def _random(self, items: MutableSequence[MusifyItem]) -> None:
shuffle(items)
@dynamicprocessormethod
diff --git a/tests/__resources/playlist/Recently Added.xautopf b/tests/__resources/playlist/Recently Added.xautopf
index 00e20e93..a6d6a5c7 100644
--- a/tests/__resources/playlist/Recently Added.xautopf
+++ b/tests/__resources/playlist/Recently Added.xautopf
@@ -5,7 +5,7 @@
-
+
diff --git a/tests/libraries/local/playlist/test_xautopf.py b/tests/libraries/local/playlist/test_xautopf.py
index 13e42924..ccc2274b 100644
--- a/tests/libraries/local/playlist/test_xautopf.py
+++ b/tests/libraries/local/playlist/test_xautopf.py
@@ -58,6 +58,7 @@ def test_load_playlist_bp_settings(self, tracks: list[LocalTrack], path_mapper:
assert pl.matcher.ready
assert len(pl.matcher.comparers.comparers) == 3
assert not pl.limiter
+ assert not pl.limiter_deduplication
assert pl.sorter
pl.load(tracks)
@@ -97,6 +98,7 @@ def test_load_playlist_ra_settings(self, path_mapper: PathMapper):
assert not pl.matcher.ready
assert not pl.matcher.comparers
assert pl.limiter
+ assert pl.limiter_deduplication
assert pl.sorter
def test_load_playlist_ra_tracks(self, path_mapper: PathMapper):
@@ -112,6 +114,19 @@ def test_load_playlist_ra_tracks(self, path_mapper: PathMapper):
tracks_expected = sorted(tracks, key=lambda t: t.date_added, reverse=True)[:limit]
assert pl.tracks == sorted(tracks_expected, key=lambda t: t.date_added, reverse=True)
+ def test_limiter_deduplication(self):
+ tracks = random_tracks(10)
+
+ pl = XAutoPF(path=path_playlist_xautopf_ra, tracks=tracks)
+ limit = pl.limiter.limit_max
+ tracks_expected = sorted(tracks, key=lambda t: t.date_added, reverse=True)[:limit]
+ assert pl.limiter_deduplication
+ assert pl.tracks == tracks_expected
+
+ pl = XAutoPF(path=path_playlist_xautopf_ra, tracks=tracks + tracks)
+ assert pl.limiter_deduplication
+ assert pl.tracks == tracks_expected
+
def test_save_new_file(self, tmp_path: str):
path = join(tmp_path, random_str() + ".xautopf")
pl = XAutoPF(path=path)
@@ -457,8 +472,9 @@ def test_parse_limiter(self):
# default values cause getter to not return any processor
parser_initial.parse_limiter()
assert parser_initial.get_limiter() is None
+ assert not parser_initial.limiter_deduplication # always False by default
- parser_initial.parse_limiter(final)
+ parser_initial.parse_limiter(final, deduplicate=True)
new = parser_initial.get_limiter()
assert new is not None
assert new.limit_max == final.limit_max