bazarr/libs/subliminal_patch/providers/utils.py

from collections import namedtuple
from difflib import SequenceMatcher
import io
import logging
import os
import re
import zipfile

from guessit import guessit
import rarfile
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
from subliminal_patch.subtitle import guess_matches

from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS

logger = logging.getLogger(__name__)


_MatchingSub = namedtuple("_MatchingSub", ("file", "priority", "context"))


def _get_matching_sub(
    sub_names, forced=False, episode=None, episode_title=None, **kwargs
):
    guess_options = {"single_value": True}
    if episode is not None:
        guess_options["type"] = "episode"  # type: ignore

    matching_subs = []

    for sub_name in sub_names:
        if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"):
            logger.debug("Ignoring forced subtitle: %s", sub_name)
            continue

        # If it's a movie then get the first subtitle
        if episode is None and episode_title is None:
            logger.debug("Movie subtitle found: %s", sub_name)
            matching_subs.append(_MatchingSub(sub_name, 2, "Movie subtitle"))
            break

        guess = guessit(sub_name, options=guess_options)

        matched_episode_num = guess.get("episode")
        if matched_episode_num:
            logger.debug("No episode number found in file: %s", sub_name)

        if episode_title is not None:
            from_name = _analize_sub_name(sub_name, episode_title)
            if from_name is not None:
                matching_subs.append(from_name)

        if episode == matched_episode_num:
            logger.debug("Episode matched from number: %s", sub_name)
            matching_subs.append(_MatchingSub(sub_name, 2, "Episode number matched"))

    if matching_subs:
        matching_subs.sort(key=lambda x: x.priority, reverse=True)
        logger.debug("Matches: %s", matching_subs)
        return matching_subs[0].file
    else:
        logger.debug("Nothing matched")
        return None


def _analize_sub_name(sub_name: str, title_):
    titles = re.split(r"[.-]", os.path.splitext(sub_name)[0])
    for title in titles:
        title = title.strip()
        ratio = SequenceMatcher(None, title, title_).ratio()
        if ratio > 0.85:
            logger.debug(
                "Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio
            )

            # Avoid false positives with short titles
            if len(title_) > 4 and ratio >= 0.98:
                return _MatchingSub(sub_name, 3, "Perfect title ratio")

            return _MatchingSub(sub_name, 1, "Normal title ratio")

    logger.debug("No episode title matched from file: %s", sub_name)
    return None


def get_subtitle_from_archive(
    archive, forced=False, episode=None, get_first_subtitle=False, **kwargs
):
    "Get subtitle from Rarfile/Zipfile object. Return None if nothing is found."
    subs_in_archive = [
        name
        for name in archive.namelist()
        if name.endswith((".srt", ".sub", ".ssa", ".ass"))
    ]

    if not subs_in_archive:
        logger.info("No subtitles found in archive")
        return None

    logger.debug("Subtitles in archive: %s", subs_in_archive)

    if len(subs_in_archive) == 1 or get_first_subtitle:
        logger.debug("Getting first subtitle in archive: %s", subs_in_archive)
        return fix_line_ending(archive.read(subs_in_archive[0]))

    matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs)

    if matching_sub is not None:
        logger.info("Using %s from archive", matching_sub)
        return fix_line_ending(archive.read(matching_sub))

    logger.debug("No subtitle found in archive")
    return None


def is_episode(content):
    return "episode" in guessit(content, {"type": "episode"})


def get_archive_from_bytes(content: bytes):
    """Get RarFile/ZipFile object from bytes. Return None is something else
    is found."""
    # open the archive
    archive_stream = io.BytesIO(content)
    if rarfile.is_rarfile(archive_stream):
        logger.debug("Identified rar archive")
        return rarfile.RarFile(archive_stream)
    elif zipfile.is_zipfile(archive_stream):
        logger.debug("Identified zip archive")
        return zipfile.ZipFile(archive_stream)

    logger.debug("Unknown compression format")
    return None


def update_matches(matches, video, release_info: str, **guessit_options):
    "Update matches set from release info string. New lines are iterated."
    guessit_options["type"] = "episode" if isinstance(video, Episode) else "movie"
    logger.debug("Guessit options to update matches: %s", guessit_options)

    for release in release_info.split("\n"):
        logger.debug("Updating matches from release info: %s", release)
        matches |= guess_matches(video, guessit(release.strip(), guessit_options))
        logger.debug("New matches: %s", matches)

    return matches