Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PICARD-2935: apply genre filters and threshold before selecting minimal usage #2517

Merged
merged 1 commit into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions picard/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,17 @@ def skip(self, tag):
return True
return False

def filter(self, counter):
for name, count in counter:
def filter(self, counter: Counter, minusage=0) -> Counter:
result = Counter()
for name, count in counter.items():
if not self.skip(name):
yield (name, count)
result[name] = count
topcount = result.most_common(1)[0][1]
for name, count in counter.items():
percent = 100 * count // topcount
if percent < minusage:
del result[name]
return result

def format_errors(self):
fmt = _("Error line %(lineno)d: %(error)s")
Expand Down Expand Up @@ -318,18 +325,13 @@ def _genres_to_metadata(genres, limit=None, minusage=0, filters='', join_with=No
if not genres:
return []

# Find most common genres
most_common_genres = genres.most_common(limit)
topcount = most_common_genres[0][1]

# Filter by name and usage
genres_filter = TagGenreFilter(filters)
genres_list = []
for name, count in genres_filter.filter(most_common_genres):
percent = 100 * count // topcount
if percent < minusage:
break
genres_list.append(name.title())
genres = genres_filter.filter(genres, minusage=minusage)

# Find most common genres
most_common_genres = genres.most_common(limit)
genres_list = [name.title() for name, _count in most_common_genres]
genres_list.sort()

# And generate the genre metadata tag
Expand Down
12 changes: 10 additions & 2 deletions test/test_taggenrefilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from collections import Counter

from test.picardtestcase import PicardTestCase

Expand Down Expand Up @@ -168,5 +169,12 @@ def test_whitespaces_filter(self):

def test_filter_method(self):
tag_filter = TagGenreFilter("-a*")
result = list(tag_filter.filter([("ax", 1), ("bx", 2), ("ay", 3), ("by", 4)]))
self.assertEqual([('bx', 2), ('by', 4)], result)
genres = Counter(ax=1, bx=2, ay=3, by=4)
result = tag_filter.filter(genres)
self.assertEqual([('bx', 2), ('by', 4)], list(result.items()))

def test_filter_method_minusage(self):
tag_filter = TagGenreFilter("-a*")
genres = Counter(ax=4, bx=5, ay=20, by=10, bz=4)
result = tag_filter.filter(genres, minusage=50)
self.assertEqual([('bx', 5), ('by', 10)], list(result.items()))
6 changes: 6 additions & 0 deletions test/test_track.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def test_limit_0(self):
ret = Track._genres_to_metadata(genres, limit=0)
self.assertEqual(ret, [])

def test_limit_after_filter(self):
genres = Counter(rock=5, blues=7, pop=1, psychedelic=3)
filters = '-rock'
ret = Track._genres_to_metadata(genres, limit=3, filters=filters)
self.assertEqual(ret, ['Blues', 'Pop', 'Psychedelic'])

def test_minusage(self):
genres = Counter(pop=6, rock=7, blues=2)
ret = Track._genres_to_metadata(genres, minusage=10)
Expand Down
Loading