From c06dd620b759d6dd37125caf19a22711fc426f37 Mon Sep 17 00:00:00 2001 From: Vitiko Date: Wed, 17 May 2023 01:45:29 -0400 Subject: [PATCH] Add mediainfo support for custom languages --- bazarr/languages/custom_lang.py | 13 + bazarr/utilities/video_analyzer.py | 12 +- tests/bazarr/test_utilities_video_analyzer.py | 229 ++++++++++++++++++ 3 files changed, 251 insertions(+), 3 deletions(-) create mode 100644 tests/bazarr/test_utilities_video_analyzer.py diff --git a/bazarr/languages/custom_lang.py b/bazarr/languages/custom_lang.py index 81beca99a..2b19d0a5f 100644 --- a/bazarr/languages/custom_lang.py +++ b/bazarr/languages/custom_lang.py @@ -4,6 +4,7 @@ import logging import os from subzero.language import Language +from babelfish.script import Script logger = logging.getLogger(__name__) @@ -18,6 +19,7 @@ class CustomLanguage: official_alpha3 = "por" name = "Brazilian Portuguese" iso = "BR" + _scripts = [] _possible_matches = ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil") _extensions = (".pt-br", ".pob", ".pb") _extensions_forced = (".pt-br.forced", ".pob.forced", ".pb.forced") @@ -86,6 +88,15 @@ class CustomLanguage: return any(ext in name for ext in self._possible_matches) + def language_found(self, language: Language): + if str(language.country) == self.iso: + return True + + if language.script and language.script in self._scripts: + return True + + return False + class BrazilianPortuguese(CustomLanguage): # Same attributes as base class @@ -100,6 +111,7 @@ class ChineseTraditional(CustomLanguage): official_alpha3 = "zho" name = "Chinese Traditional" iso = "TW" + _scripts = (Script("Hant"),) _extensions = ( ".cht", ".tc", @@ -211,6 +223,7 @@ class LatinAmericanSpanish(CustomLanguage): official_alpha3 = "spa" name = "Latin American Spanish" iso = "MX" # Not fair, but ok + _scripts = (Script("419"),) _possible_matches = ( "es-la", "spa-la", diff --git a/bazarr/utilities/video_analyzer.py b/bazarr/utilities/video_analyzer.py index 2454c5149..89bc48ff0 100644 --- a/bazarr/utilities/video_analyzer.py +++ b/bazarr/utilities/video_analyzer.py @@ -16,13 +16,19 @@ def _handle_alpha3(detected_language: dict): alpha3 = detected_language["language"].alpha3 custom = CustomLanguage.from_value(alpha3, "official_alpha3") - if custom and custom.ffprobe_found(detected_language): + if not custom: + return alpha3 + + found = custom.language_found(detected_language["language"]) + if not found: + found = custom.ffprobe_found(detected_language) + + if found: logging.debug("Custom embedded language found: %s", custom.name) return custom.alpha3 return alpha3 - def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True): data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id, use_cache=use_cache) und_default_language = alpha3_from_alpha2(settings.general.default_und_embedded_subtitles_lang) @@ -33,7 +39,7 @@ def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=No return subtitles_list cache_provider = None - if data["ffprobe"] and "subtitle" in data["ffprobe"]: + if "ffprobe" in data and data["ffprobe"] and "subtitle" in data["ffprobe"]: cache_provider = 'ffprobe' elif 'mediainfo' in data and data["mediainfo"] and "subtitle" in data["mediainfo"]: cache_provider = 'mediainfo' diff --git a/tests/bazarr/test_utilities_video_analyzer.py b/tests/bazarr/test_utilities_video_analyzer.py new file mode 100644 index 000000000..893415e13 --- /dev/null +++ b/tests/bazarr/test_utilities_video_analyzer.py @@ -0,0 +1,229 @@ +import logging + +import pytest + +from bazarr.utilities import video_analyzer + +logging.getLogger("knowit").setLevel(logging.WARNING) + +M_INFO = { + "creatingLibrary": { + "name": "MediaInfoLib", + "version": "23.03", + "url": "https://mediaarea.net/MediaInfo", + }, + "media": { + "@ref": "/mnt/media/Hocus.Pocus.1993.1080p.DSNP.WEB-DL.DDP5.1.H.264.DUAL-PD.mkv", + "track": [ + { + "@type": "General", + "UniqueID": "177986280948425821736023466260510750529", + "VideoCount": "1", + "AudioCount": "2", + "TextCount": "31", + "FileExtension": "mkv", + "Format": "Matroska", + "Format_Version": "4", + "FileSize": "6468058376", + "Duration": "5766.219", + "OverallBitRate_Mode": "VBR", + "OverallBitRate": "8973726", + "FrameRate": "23.976", + "FrameCount": "138251", + "StreamSize": "2607619", + "IsStreamable": "Yes", + "Encoded_Date": "2023-04-22 16:46:57 UTC", + "File_Modified_Date": "2023-05-17 01:49:55 UTC", + "File_Modified_Date_Local": "2023-05-16 21:49:55", + "Encoded_Application": "mkvmerge v75.0.0 ('Goliath') 64-bit", + "Encoded_Library": "libebml v1.4.4 + libmatroska v1.7.1", + }, + { + "@type": "Video", + "StreamOrder": "0", + "ID": "1", + "UniqueID": "9393509843335289949", + "Format": "AVC", + "Format_Profile": "High", + "Format_Level": "4", + "Format_Settings_CABAC": "Yes", + "Format_Settings_RefFrames": "4", + "CodecID": "V_MPEG4/ISO/AVC", + "Duration": "5766.219000000", + "BitRate_Mode": "VBR", + "BitRate": "8458733", + "BitRate_Maximum": "12749952", + "Width": "1920", + "Height": "1080", + "Stored_Height": "1088", + "Sampled_Width": "1920", + "Sampled_Height": "1080", + "PixelAspectRatio": "1.000", + "DisplayAspectRatio": "1.778", + "FrameRate_Mode": "CFR", + "FrameRate": "23.976", + "FrameCount": "138251", + "ColorSpace": "YUV", + "ChromaSubsampling": "4:2:0", + "BitDepth": "8", + "ScanType": "Progressive", + "Delay": "0.000", + "Delay_Source": "Container", + "StreamSize": "6096863666", + "Default": "Yes", + "Forced": "No", + "BufferSize": "17000000", + "colour_description_present": "Yes", + "colour_description_present_Source": "Container / Stream", + "colour_range": "Limited", + "colour_range_Source": "Stream", + "colour_primaries": "BT.709", + "colour_primaries_Source": "Container / Stream", + "transfer_characteristics": "BT.709", + "transfer_characteristics_Source": "Container / Stream", + "matrix_coefficients": "BT.709", + "matrix_coefficients_Source": "Container / Stream", + }, + { + "@type": "Text", + "@typeorder": "7", + "StreamOrder": "9", + "ID": "10", + "UniqueID": "2233390560797234737", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "5480.360000000", + "BitRate": "45", + "FrameRate": "0.206", + "FrameCount": "1129", + "ElementCount": "1129", + "StreamSize": "31194", + "Language": "es-419", + "Default": "No", + "Forced": "No", + }, + { + "@type": "Text", + "@typeorder": "9", + "StreamOrder": "11", + "ID": "12", + "UniqueID": "1345374948683222936", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "5561.600000000", + "BitRate": "46", + "FrameRate": "0.164", + "FrameCount": "914", + "ElementCount": "914", + "StreamSize": "32145", + "Language": "es-ES", + "Default": "No", + "Forced": "No", + }, + { + "@type": "Text", + "@typeorder": "11", + "StreamOrder": "13", + "ID": "14", + "UniqueID": "17039172451186467602", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "4966.120000000", + "BitRate": "1", + "FrameRate": "0.007", + "FrameCount": "35", + "ElementCount": "35", + "StreamSize": "1011", + "Language": "fr-CA", + "Default": "No", + "Forced": "No", + }, + { + "@type": "Text", + "@typeorder": "24", + "StreamOrder": "26", + "ID": "27", + "UniqueID": "16221047442617815320", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "4961.520000000", + "BitRate": "0", + "FrameRate": "0.002", + "FrameCount": "11", + "ElementCount": "11", + "StreamSize": "379", + "Language": "pt-BR", + "Default": "No", + "Forced": "No", + }, + { + "@type": "Text", + "@typeorder": "30", + "StreamOrder": "32", + "ID": "33", + "UniqueID": "4259582444071016270", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "5507.508000000", + "BitRate": "50", + "FrameRate": "0.253", + "FrameCount": "1392", + "ElementCount": "1392", + "StreamSize": "34539", + "Language": "zh-Hans", + "Default": "No", + "Forced": "No", + }, + { + "@type": "Text", + "@typeorder": "31", + "StreamOrder": "33", + "ID": "34", + "UniqueID": "4890027048965677919", + "Format": "UTF-8", + "CodecID": "S_TEXT/UTF8", + "Duration": "5730.725000000", + "BitRate": "43", + "FrameRate": "0.207", + "FrameCount": "1186", + "ElementCount": "1186", + "StreamSize": "31154", + "Language": "zh-Hant", + "Default": "No", + "Forced": "No", + }, + ], + }, +} + + +@pytest.fixture +def video_file(): + return "tests/subliminal_patch/data/file_1.mkv" + + +@pytest.fixture +def mediainfo_data(mocker, video_file): + mocker.patch( + "knowit.providers.mediainfo.MediaInfoCTypesExecutor._execute", + return_value=M_INFO, + ) + data = video_analyzer.know( + video_path=video_file, + context={"provider": "mediainfo"}, + ) + yield data + + +def test_embedded_subs_reader(mocker, mediainfo_data, video_file): + mocker.patch( + "bazarr.utilities.video_analyzer.parse_video_metadata", + return_value={"mediainfo": mediainfo_data}, + ) + mocker.patch( + "bazarr.utilities.video_analyzer.alpha3_from_alpha2", return_value=None + ) + result = video_analyzer.embedded_subs_reader(1e6, video_file) + assert ["spl", False, False, "SubRip"] in result + assert ["pob", False, False, "SubRip"] in result + assert ["zht", False, False, "SubRip"] in result