diff --git a/libs/subliminal_patch/providers/supersubtitles.py b/libs/subliminal_patch/providers/supersubtitles.py index cfc6bff00..7f779fedb 100644 --- a/libs/subliminal_patch/providers/supersubtitles.py +++ b/libs/subliminal_patch/providers/supersubtitles.py @@ -1,32 +1,34 @@ # coding=utf-8 -import io import logging +from random import randint import re import time +import urllib.parse from babelfish import language_converters -from subzero.language import Language +from bs4.element import NavigableString +from bs4.element import Tag +from guessit import guessit from requests import Session from requests.exceptions import JSONDecodeError -import urllib.parse -from random import randint - -from subliminal.subtitle import fix_line_ending -from subliminal_patch.providers import Provider -from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal.providers import ParserBeautifulSoup -from bs4.element import Tag, NavigableString from subliminal.score import get_equivalent_release_groups -from subliminal_patch.subtitle import Subtitle, guess_matches +from subliminal.utils import sanitize +from subliminal.utils import sanitize_release_group +from subliminal.video import Episode +from subliminal.video import Movie from subliminal_patch.exceptions import APIThrottled -from subliminal.utils import sanitize, sanitize_release_group -from subliminal.video import Episode, Movie -from zipfile import ZipFile, is_zipfile -from rarfile import RarFile, is_rarfile -from subliminal_patch.utils import sanitize, fix_inconsistent_naming -from guessit import guessit -from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST +from subliminal_patch.providers import Provider +from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin +from subliminal_patch.subtitle import Subtitle +from subliminal_patch.utils import fix_inconsistent_naming +from subliminal_patch.utils import sanitize +from subzero.language import Language +from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST +from .utils import get_archive_from_bytes +from .utils import get_subtitle_from_archive +from .utils import update_matches logger = logging.getLogger(__name__) @@ -78,7 +80,7 @@ class SuperSubtitlesSubtitle(Subtitle): self.season = season self.episode = episode self.version = version - self.releases = releases + self.releases = releases or [] self.year = year self.uploader = uploader if year: @@ -91,7 +93,7 @@ class SuperSubtitlesSubtitle(Subtitle): self.asked_for_episode = asked_for_episode self.imdb_id = imdb_id self.is_pack = True - self.matches = None + self.matches = set() def numeric_id(self): return self.subtitle_id @@ -109,8 +111,8 @@ class SuperSubtitlesSubtitle(Subtitle): return str(self.subtitle_id) def get_matches(self, video): - type_ = "movie" if isinstance(video, Movie) else "episode" - matches = guess_matches(video, guessit(self.release_info, {"type": type_})) + matches = set() + update_matches(matches, video, self.releases) # episode if isinstance(video, Episode): @@ -543,21 +545,12 @@ class SuperSubtitlesProvider(Provider, ProviderSubtitleArchiveMixin): return subtitles def download_subtitle(self, subtitle): - - # download as a zip - logger.info('Downloading subtitle %r', subtitle.subtitle_id) r = self.session.get(subtitle.page_link, timeout=10) r.raise_for_status() - archive_stream = io.BytesIO(r.content) - archive = None + archive = get_archive_from_bytes(r.content) - if is_rarfile(archive_stream): - archive = RarFile(archive_stream) - elif is_zipfile(archive_stream): - archive = ZipFile(archive_stream) - else: - subtitle.content = fix_line_ending(r.content) + if archive is None: + raise APIThrottled(f"Invalid archive from {subtitle.page_link}") - if archive is not None: - subtitle.content = self.get_subtitle_from_archive(subtitle, archive) + subtitle.content = get_subtitle_from_archive(archive, episode=subtitle.episode or None) diff --git a/libs/subliminal_patch/providers/utils.py b/libs/subliminal_patch/providers/utils.py index bf3aa856b..ea8e411e6 100644 --- a/libs/subliminal_patch/providers/utils.py +++ b/libs/subliminal_patch/providers/utils.py @@ -4,9 +4,12 @@ import io import logging import os import re +import tempfile +from typing import Iterable, Union import zipfile from guessit import guessit +import pysubs2 import rarfile from subliminal.subtitle import fix_line_ending from subliminal_patch.core import Episode @@ -119,10 +122,10 @@ def is_episode(content): def get_archive_from_bytes(content: bytes): - """Get RarFile/ZipFile object from bytes. Return None is something else - is found.""" - # open the archive + """Get RarFile/ZipFile object from bytes. A ZipFile instance will be returned + if a subtitle-like stream is found. Return None if something else is found.""" archive_stream = io.BytesIO(content) + if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") return rarfile.RarFile(archive_stream) @@ -130,18 +133,50 @@ def get_archive_from_bytes(content: bytes): logger.debug("Identified zip archive") return zipfile.ZipFile(archive_stream) - logger.debug("Unknown compression format") + logger.debug("No compression format found. Trying with subtitle-like files") + + # If the file is a subtitle-like file + with tempfile.NamedTemporaryFile(prefix="spsub", suffix=".srt") as tmp_f: + try: + tmp_f.write(content) + sub = pysubs2.load(tmp_f.name) + except Exception as error: + logger.debug("Couldn't load file: '%s'", error) + else: + if sub is not None: + logger.debug("Identified subtitle file: %s", sub) + zip_obj = zipfile.ZipFile(io.BytesIO(), mode="x") + zip_obj.write(tmp_f.name, os.path.basename(tmp_f.name)) + return zip_obj + + logger.debug("Nothing found") return None -def update_matches(matches, video, release_info: str, **guessit_options): - "Update matches set from release info string. New lines are iterated." +def update_matches( + matches, + video, + release_info: Union[str, Iterable[str]], + split="\n", + **guessit_options +): + """Update matches set from release info string or Iterable. + + Use the split parameter to iterate over the set delimiter; set None to avoid split.""" + guessit_options["type"] = "episode" if isinstance(video, Episode) else "movie" + logger.debug("Guessit options to update matches: %s", guessit_options) - for release in release_info.split("\n"): - logger.debug("Updating matches from release info: %s", release) - matches |= guess_matches(video, guessit(release.strip(), guessit_options)) - logger.debug("New matches: %s", matches) + if isinstance(release_info, str): + release_info = release_info.split(split) + + for release in release_info: + for release_split in release.split(split): + logger.debug("Updating matches from release info: %s", release) + matches |= guess_matches( + video, guessit(release_split.strip(), guessit_options) + ) + logger.debug("New matches: %s", matches) return matches diff --git a/tests/subliminal_patch/test_supersubtitles.py b/tests/subliminal_patch/test_supersubtitles.py index 6111cabc0..3794a04ca 100644 --- a/tests/subliminal_patch/test_supersubtitles.py +++ b/tests/subliminal_patch/test_supersubtitles.py @@ -44,7 +44,7 @@ def test_list_episode_subtitles(episode): def test_download_episode_subtitle(episode): subtitle = SuperSubtitlesSubtitle( Language.fromalpha2("en"), - "https://www.feliratok.info/index.php?action=letolt&felirat=1643361676", + "https://www.feliratok.eu/index.php?action=letolt&felirat=1643361676", 1643361676, "All of us are dead", 1, @@ -82,7 +82,7 @@ def test_download_movie_subtitle(movies): subtitle = SuperSubtitlesSubtitle( Language.fromalpha2("en"), - "https://www.feliratok.info/index.php?action=letolt&felirat=1634579718", + "https://www.feliratok.eu/index.php?action=letolt&felirat=1634579718", 1634579718, "Dune", 0, diff --git a/tests/subliminal_patch/test_utils.py b/tests/subliminal_patch/test_utils.py index eb8dc2421..c90cf972a 100644 --- a/tests/subliminal_patch/test_utils.py +++ b/tests/subliminal_patch/test_utils.py @@ -122,6 +122,14 @@ def test_update_matches(movies): assert "source" in matches +def test_update_matches_iterable(movies): + matches = set() + utils.update_matches( + matches, movies["dune"], ["Subs for dune 2021 bluray x264", "Dune webrip x264"] + ) + assert "source" in matches + + @pytest.mark.parametrize( "content,expected", [("the.wire.s01e01", True), ("taxi driver 1976", False)] )