Skip to content

Commit

Permalink
Preference or disfavor per language type (hearing impaired, foreign o…
Browse files Browse the repository at this point in the history
…nly) (#1175)

* Use three-valued hearing_impaired and foreign_only

* rename forced -> foreign_only

* sort by language type before sorting by score

* add support for foreign_only from opensubtitles.com

* add news

* Use multiple tags to work well with config file options.
  • Loading branch information
getzze authored Nov 4, 2024
1 parent 0f72149 commit 0c4a957
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 77 deletions.
1 change: 1 addition & 0 deletions changelog.d/1175.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add cli option to prefer or disfavor hearing impaired (-hi/-HI) or foreign only (-fo/-FO) subtitles.
1 change: 1 addition & 0 deletions docs/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ provider = ["addic7ed", "opensubtitlescom", "opensubtitles"]
refiner = ["metadata", "hash", "omdb"]
ignore_refiner = ["tmdb"]
language = ["fr", "en", "pt-br"]
foreign_only = false
encoding = "utf-8"
min_score = 50
archives = true
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ extend-ignore-re = [
"(?Rm)^.*#\\s*spellchecker:\\s*disable-line$",
"#\\s*spellchecker:off\\s*\\n.*\\n\\s*#\\s*spellchecker:on"
]
[tool.typos.default.extend-words]
fo = "fo"
[tool.typos.default.extend-identifiers]
tha = "tha"
bre = "bre"
67 changes: 57 additions & 10 deletions subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
)
from subliminal.core import ARCHIVE_EXTENSIONS, scan_name, search_external_subtitles
from subliminal.extensions import get_default_providers, get_default_refiners
from subliminal.score import match_hearing_impaired
from subliminal.utils import merge_extend_and_ignore_unions

if TYPE_CHECKING:
Expand Down Expand Up @@ -142,6 +141,12 @@ def configure(ctx: click.Context, param: click.Parameter | None, filename: str |

# make download options
download_dict = toml_dict.setdefault('download', {})
# handle language types
for lt in ('hearing_impaired', 'foreign_only'):
# if an option was defined in the config file, make it a tuple, the expected type
if lt in download_dict and (isinstance(download_dict[lt], bool) or download_dict[lt] is None):
download_dict[lt] = (download_dict[lt],)

# remove the provider and refiner lists to select, extend and ignore
provider_lists = {
'select': download_dict.pop('provider', []),
Expand Down Expand Up @@ -411,7 +416,42 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
),
)
@click.option('-f', '--force', is_flag=True, default=False, help='Force download even if a subtitle already exist.')
@click.option('-hi', '--hearing-impaired', is_flag=True, default=False, help='Prefer hearing impaired subtitles.')
@click.option(
'-fo',
'--foreign-only',
'foreign_only',
is_flag=True,
flag_value=True,
multiple=True,
help='Prefer foreign-only subtitles.',
)
@click.option(
'-FO',
'--no-foreign-only',
'foreign_only',
is_flag=True,
flag_value=False,
multiple=True,
help='Disfavor foreign-only subtitles.',
)
@click.option(
'-hi',
'--hearing-impaired',
'hearing_impaired',
is_flag=True,
flag_value=True,
multiple=True,
help='Prefer hearing-impaired subtitles.',
)
@click.option(
'-HI',
'--no-hearing-impaired',
'hearing_impaired',
is_flag=True,
flag_value=False,
multiple=True,
help='Disfavor hearing-impaired subtitles.',
)
@click.option(
'-m',
'--min-score',
Expand All @@ -423,7 +463,7 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
'--language-type-suffix',
is_flag=True,
default=False,
help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign part subtitle.',
help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign only subtitle.',
)
@click.option(
'--language-format',
Expand Down Expand Up @@ -468,7 +508,8 @@ def download(
original_encoding: bool,
single: bool,
force: bool,
hearing_impaired: bool,
hearing_impaired: tuple[bool | None, ...],
foreign_only: tuple[bool | None, ...],
min_score: int,
language_type_suffix: bool,
language_format: str,
Expand Down Expand Up @@ -496,6 +537,14 @@ def download(
elif encoding is None:
encoding = 'utf-8'

# language_type
hearing_impaired_flag: bool | None = None
if len(hearing_impaired) > 0:
hearing_impaired_flag = hearing_impaired[-1]
foreign_only_flag: bool | None = None
if len(foreign_only) > 0:
foreign_only_flag = foreign_only[-1]

debug = obj.get('debug', False)
if debug:
verbose = 3
Expand Down Expand Up @@ -649,7 +698,8 @@ def download(
v,
language_set,
min_score=scores['hash'] * min_score // 100,
hearing_impaired=hearing_impaired,
hearing_impaired=hearing_impaired_flag,
foreign_only=foreign_only_flag,
only_one=single,
ignore_subtitles=ignore_subtitles,
)
Expand Down Expand Up @@ -701,11 +751,8 @@ def download(
else:
score_color = 'green'

# scale score from 0 to 100 taking out preferences
scaled_score = score
if match_hearing_impaired(s, hearing_impaired=hearing_impaired):
scaled_score -= scores['hearing_impaired']
scaled_score *= 100 / scores['hash']
# scale score from 0 to 100
scaled_score = score * 100 / scores['hash']

# echo some nice colored output
language_str = (
Expand Down
35 changes: 25 additions & 10 deletions subliminal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
refiner_manager,
)
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS, Subtitle
from .subtitle import SUBTITLE_EXTENSIONS, LanguageType, Subtitle
from .utils import get_age, handle_exception
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video

Expand Down Expand Up @@ -148,7 +148,7 @@ def list_subtitles_provider(self, provider: str, video: Video, languages: Set[La
try:
return self[provider].list_subtitles(video, provider_languages)
except Exception as e: # noqa: BLE001
handle_exception(e, 'Provider {provider}')
handle_exception(e, f'Provider {provider}')

return []

Expand Down Expand Up @@ -220,7 +220,8 @@ def download_best_subtitles(
languages: Set[Language],
*,
min_score: int = 0,
hearing_impaired: bool = False,
hearing_impaired: bool | None = None,
foreign_only: bool | None = None,
only_one: bool = False,
compute_score: ComputeScore | None = None,
ignore_subtitles: Sequence[str] | None = None,
Expand All @@ -234,10 +235,11 @@ def download_best_subtitles(
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent).
:param (bool | None) foreign_only: foreign only preference (yes/no/indifferent).
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
and returns the score.
:param ignore_subtitles: list of subtitle ids to ignore (None defaults to an empty list).
:return: downloaded subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
Expand All @@ -249,9 +251,19 @@ def download_best_subtitles(
# ignore subtitles
subtitles = [s for s in subtitles if s.id not in ignore_subtitles]

# sort by hearing impaired and foreign only
language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, foreign_only=foreign_only)
if language_type != LanguageType.UNKNOWN:
logger.info('Sort subtitles by %s types first', language_type.value)
subtitles = sorted(
subtitles,
key=lambda s: s.language_type == language_type,
reverse=True,
)

# sort subtitles by score
scored_subtitles = sorted(
[(s, compute_score(s, video, hearing_impaired=hearing_impaired)) for s in subtitles],
[(s, compute_score(s, video)) for s in subtitles],
key=operator.itemgetter(1),
reverse=True,
)
Expand Down Expand Up @@ -411,7 +423,7 @@ def parse_subtitle_filename(subtitle_filename: str, video_filename: str) -> Subt
except (ValueError, LanguageReverseError):
logger.exception('Cannot parse language code %r', language_code)

# TODO: extract the hearing_impaired or forced attribute
# TODO: extract the hearing_impaired or foreign_only attribute

return Subtitle(language, subtitle_id=subtitle_filename)

Expand Down Expand Up @@ -775,7 +787,8 @@ def download_best_subtitles(
languages: Set[Language],
*,
min_score: int = 0,
hearing_impaired: bool = False,
hearing_impaired: bool | None = None,
foreign_only: bool | None = None,
only_one: bool = False,
compute_score: ComputeScore | None = None,
pool_class: type[ProviderPool] = ProviderPool,
Expand All @@ -790,7 +803,8 @@ def download_best_subtitles(
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent).
:param (bool | None) foreign_only: foreign only preference (yes/no/indifferent).
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
Expand Down Expand Up @@ -825,6 +839,7 @@ def download_best_subtitles(
languages,
min_score=min_score,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
only_one=only_one,
compute_score=compute_score,
)
Expand Down Expand Up @@ -861,7 +876,7 @@ def save_subtitles(
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:param (str | None) extension: the subtitle extension, default is to match to the subtitle format.
:param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False.
:param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. Default to False.
:param str language_format: format of the language suffix. Default to 'alpha2'.
:return: the saved subtitles
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
Expand Down
12 changes: 11 additions & 1 deletion subliminal/providers/opensubtitlescom.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def __init__(
subtitle_id: str,
*,
hearing_impaired: bool = False,
foreign_only: bool = False,
movie_kind: str | None = None,
release: str | None = None,
movie_title: str | None = None,
Expand All @@ -199,7 +200,14 @@ def __init__(
file_id: int = 0,
file_name: str = '',
) -> None:
super().__init__(language, subtitle_id, hearing_impaired=hearing_impaired, page_link=None, encoding='utf-8')
super().__init__(
language,
subtitle_id,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
page_link=None,
encoding='utf-8',
)
self.movie_kind = movie_kind
self.release = release
self.movie_title = movie_title
Expand Down Expand Up @@ -235,6 +243,7 @@ def from_response(
attributes = response.get('attributes', {})
language = Language.fromopensubtitlescom(str(attributes.get('language')))
hearing_impaired = bool(int(attributes.get('hearing_impaired')))
foreign_only = bool(int(attributes.get('foreign_parts_only')))
release = str(attributes.get('release'))
moviehash_match = bool(attributes.get('moviehash_match', False))
download_count = int(attributes.get('download_count'))
Expand Down Expand Up @@ -266,6 +275,7 @@ def from_response(
language,
subtitle_id,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
movie_kind=movie_kind,
release=release,
movie_title=movie_title,
Expand Down
16 changes: 5 additions & 11 deletions subliminal/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
class ComputeScore(Protocol):
"""Compute the score of a subtitle matching a video."""

def __call__(self, subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None) -> int: ... # noqa: D102
def __call__(self, subtitle: Subtitle, video: Video) -> int: ... # noqa: D102


# Check if sympy is installed (for tests)
Expand Down Expand Up @@ -141,8 +141,8 @@ def match_hearing_impaired(subtitle: Subtitle, *, hearing_impaired: bool | None
)


def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None = None) -> int:
"""Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.
def compute_score(subtitle: Subtitle, video: Video, **kwargs: Any) -> int:
"""Compute the score of the `subtitle` against the `video`.
:func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and
applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.
Expand All @@ -151,12 +151,11 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool |
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:param (bool | None) hearing_impaired: hearing impaired preference (None if no preference).
:return: score of the subtitle.
:rtype: int
"""
logger.info('Computing score of %r for video %r with %r', subtitle, video, {'hearing_impaired': hearing_impaired})
logger.info('Computing score of %r for video %r', subtitle, video)

# get the scores dict
scores = get_scores(video)
Expand Down Expand Up @@ -193,17 +192,12 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool |
logger.debug('Adding imdb_id match equivalents')
matches |= {'title', 'year', 'country'}

# handle hearing impaired
if match_hearing_impaired(subtitle, hearing_impaired=hearing_impaired):
logger.debug('Matched hearing_impaired')
matches.add('hearing_impaired')

# compute the score
score = int(sum(scores.get(match, 0) for match in matches))
logger.info('Computed score %r with final matches %r', score, matches)

# ensure score is within valid bounds
max_score = scores['hash'] + scores['hearing_impaired']
max_score = scores['hash']
if not (0 <= score <= max_score): # pragma: no cover
logger.info('Clip score between 0 and %d: %d', max_score, score)
score = int(clip(score, 0, max_score))
Expand Down
Loading

0 comments on commit 0c4a957

Please sign in to comment.