|
|
|
@ -13,6 +13,7 @@ from tld import get_tld
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ENGLISH = Language("eng")
|
|
|
|
|
SPANISH = (Language("spa"), Language("spa", "MX"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CommonFixes(SubtitleTextModification):
|
|
|
|
@ -105,12 +106,16 @@ class CommonFixes(SubtitleTextModification):
|
|
|
|
|
|
|
|
|
|
# uppercase after dot
|
|
|
|
|
NReProcessor(re.compile(r'(?u)((?<!(?=\s*[A-ZÀ-Ž-_0-9.]\s*))(?:[^.\s])+\.\s+)([a-zà-ž])'),
|
|
|
|
|
lambda match: r'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
|
|
|
|
|
lambda match: r'%s%s' % (match.group(1), match.group(2).upper()) if len(match.group(1)) > 4 else r"%s%s" % (match.group(1), match.group(2)),
|
|
|
|
|
name="CM_uppercase_after_dot"),
|
|
|
|
|
|
|
|
|
|
# remove double interpunction
|
|
|
|
|
NReProcessor(re.compile(r'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
|
|
|
|
|
lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
|
|
|
|
|
name="CM_double_interpunct"),
|
|
|
|
|
name="CM_double_interpunct",
|
|
|
|
|
# Double interpunction is valid for spanish
|
|
|
|
|
# https://www.rae.es/duda-linguistica/es-correcto-combinar-los-signos-de-interrogacion-y-exclamacion
|
|
|
|
|
supported=lambda p: p.language not in SPANISH),
|
|
|
|
|
|
|
|
|
|
# remove spaces before punctuation; don't break spaced ellipses
|
|
|
|
|
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),
|
|
|
|
|