diff --git a/libs/subliminal_patch/providers/utils.py b/libs/subliminal_patch/providers/utils.py index 0158abfee..1748bfe9c 100644 --- a/libs/subliminal_patch/providers/utils.py +++ b/libs/subliminal_patch/providers/utils.py @@ -1,59 +1,86 @@ +from collections import namedtuple +from difflib import SequenceMatcher import io import logging import os +import re import zipfile -import rarfile - from guessit import guessit - +import rarfile from subliminal.subtitle import fix_line_ending - from subliminal_patch.core import Episode from subliminal_patch.subtitle import guess_matches from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS - logger = logging.getLogger(__name__) -def _get_matching_sub(sub_names, forced=False, episode=None): - matching_sub = None +_MatchingSub = namedtuple("_MatchingSub", ("file", "priority")) + +def _get_matching_sub(sub_names, forced=False, episode=None, episode_title=None): guess_options = {"single_value": True} if episode is not None: guess_options["type"] = "episode" # type: ignore + matching_subs = [] + for sub_name in sub_names: if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"): logger.debug("Ignoring forced subtitle: %s", sub_name) continue # If it's a movie then get the first subtitle - if episode is None: + if episode is None and episode_title is None: logger.debug("Movie subtitle found: %s", sub_name) - matching_sub = sub_name + matching_subs.append(_MatchingSub(sub_name, 2)) break guess = guessit(sub_name, options=guess_options) - if guess.get("episode") is None: - logger.debug("No episode info found in file: %s", sub_name) - continue + matched_episode_num = guess.get("episode") + if matched_episode_num: + logger.debug("No episode number found in file: %s", sub_name) + + matched_title = None + if episode_title is not None: + matched_title = _analize_sub_name(sub_name, episode_title) + + if episode == matched_episode_num: + logger.debug("Episode matched from number: %s", sub_name) + matching_subs.append(_MatchingSub(sub_name, 2)) + elif matched_title: + matching_subs.append(_MatchingSub(sub_name, 1)) + else: + logger.debug("Ignoring incorrect episode: '%s'", sub_name) + + if matching_subs: + matching_subs.sort(key=lambda x: x.priority, reverse=True) + logger.debug("Matches: %s", matching_subs) + return matching_subs[0].file + else: + logger.debug("Nothing matched") + return None - if episode == guess["episode"]: - logger.debug("Episode matched: %s", sub_name) - matching_sub = sub_name - break - logger.debug("Ignoring incorrect episode: %s", sub_name) +def _analize_sub_name(sub_name: str, title_): + titles = re.split(r"[.-]", os.path.splitext(sub_name)[0]) + for title in titles: + ratio = SequenceMatcher(None, title, title_).ratio() + if ratio > 0.85: + logger.debug( + "Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio + ) + return True - return matching_sub + logger.debug("No episode title matched from file") + return False def get_subtitle_from_archive( - archive, forced=False, episode=None, get_first_subtitle=False + archive, forced=False, episode=None, get_first_subtitle=False, **kwargs ): "Get subtitle from Rarfile/Zipfile object. Return None if nothing is found." subs_in_archive = [ @@ -72,7 +99,7 @@ def get_subtitle_from_archive( logger.debug("Getting first subtitle in archive: %s", subs_in_archive) return fix_line_ending(archive.read(subs_in_archive[0])) - matching_sub = _get_matching_sub(subs_in_archive, forced, episode) + matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs) if matching_sub is not None: logger.info("Using %s from archive", matching_sub) diff --git a/tests/subliminal_patch/test_utils.py b/tests/subliminal_patch/test_utils.py index e4a8b4efb..409b4eb18 100644 --- a/tests/subliminal_patch/test_utils.py +++ b/tests/subliminal_patch/test_utils.py @@ -27,6 +27,44 @@ def test_get_matching_sub(sub_names, episode, forced, expected): assert utils._get_matching_sub(sub_names, forced, episode) == expected +def test_get_matching_sub_complex_season_pack(): + files = [ + "30. Hard Drive Courage. The Ride Of The Valkyries.srt", + "34. So In Louvre Are We Two. Night Of The Scarecrow.srt", + "31. Scuba Scuba Doo. Conway The Contaminationist.srt", + "32. Katz Under The Sea. Curtain Of Cruelty.srt", + "27. Muriel Meets Her Match. Courage Vs. Mecha-Courage.srt", + "36. Fishy Business. Angry Nasty People.srt", + "28. Campsite Of Terror. The Record Deal.srt", + "33. Feast Of The Bullfrogs. Tulip's Worm.srt", + "37. Dome Of Doom. Snowman's Revenge.srt", + "35. Mondo Magic. Watch The Birdies.srt", + "29. Stormy Weather. The Sandman Sleeps.srt", + "38. The Quilt Club. Swindlin' Wind.srt", + ] + # Courage the Cowardly Dog S03E17 "Mondo Magic" + matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic") + assert matched == "35. Mondo Magic. Watch The Birdies.srt" + + +def test_get_matching_sub_complex_season_pack_mixed_files(): + files = [ + "30. Hard Drive Courage. The Ride Of The Valkyries.srt", + "S03E15.srt", + "S03E16.srt", + "S03E17.srt", + "28. Campsite Of Terror. The Record Deal.srt", + "33. Feast Of The Bullfrogs. Tulip's Worm.srt", + "37. Dome Of Doom. Snowman's Revenge.srt", + "35. Mondo Magic. Watch The Birdies.srt", + "29. Stormy Weather. The Sandman Sleeps.srt", + "38. The Quilt Club. Swindlin' Wind.srt", + ] + # Courage the Cowardly Dog S03E17 "Mondo Magic" + matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic") + assert matched == "S03E17.srt" + + def test_get_subtitle_from_archive_movie(data): with ZipFile(os.path.join(data, "archive_1.zip")) as zf: assert utils.get_subtitle_from_archive(zf) is not None