You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/subliminal_patch/utils.py

64 lines
1.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# coding=utf-8
from __future__ import absolute_import
import re
def sanitize(string, ignore_characters=None, default_characters={'-', ':', '(', ')', '.'}):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if not isinstance(string, str):
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = default_characters - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\'', '´', '`', ''} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()
def fix_inconsistent_naming(title, inconsistent_titles_dict=None, no_sanitize=False):
"""Fix titles with inconsistent naming using dictionary and sanitize them.
:param str title: original title.
:param dict inconsistent_titles_dict: dictionary of titles with inconsistent naming.
:param bool no_sanitize: indication to not sanitize title.
:return: new title.
:rtype: str
"""
# only deal with strings
if title is None:
return
# fix titles with inconsistent naming using dictionary
inconsistent_titles_dict = inconsistent_titles_dict or {}
if inconsistent_titles_dict:
pattern = re.compile('|'.join(re.escape(key) for key in inconsistent_titles_dict.keys()))
title = pattern.sub(lambda x: inconsistent_titles_dict[x.group()], title)
if no_sanitize:
return title
else:
return sanitize(title)
# return fixed and sanitized title