You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/subliminal_patch/utils.py

64 lines
1.9 KiB

6 years ago
# coding=utf-8
5 years ago
from __future__ import absolute_import
6 years ago
import re
def sanitize(string, ignore_characters=None, default_characters={'-', ':', '(', ')', '.'}):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if not isinstance(string, str):
6 years ago
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = default_characters - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\'', '´', '`', ''} - ignore_characters
6 years ago
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()
def fix_inconsistent_naming(title, inconsistent_titles_dict=None, no_sanitize=False):
6 years ago
"""Fix titles with inconsistent naming using dictionary and sanitize them.
:param str title: original title.
:param dict inconsistent_titles_dict: dictionary of titles with inconsistent naming.
:param bool no_sanitize: indication to not sanitize title.
6 years ago
:return: new title.
:rtype: str
"""
# only deal with strings
if title is None:
return
# fix titles with inconsistent naming using dictionary
inconsistent_titles_dict = inconsistent_titles_dict or {}
if inconsistent_titles_dict:
pattern = re.compile('|'.join(re.escape(key) for key in inconsistent_titles_dict.keys()))
title = pattern.sub(lambda x: inconsistent_titles_dict[x.group()], title)
if no_sanitize:
return title
else:
return sanitize(title)
6 years ago
# return fixed and sanitized title