bazarr/libs/guessit/rules/common/formatters.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Formatters
"""
from rebulk.formatters import formatters
from rebulk.remodule import re
from . import seps

_excluded_clean_chars = ',:;-/\\'
clean_chars = ""
for sep in seps:
    if sep not in _excluded_clean_chars:
        clean_chars += sep


def _potential_before(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's before it.

    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps


def _potential_after(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's after it.

    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i + 2 >= len(input_string) or \
           input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps


def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)

    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)

    :param input_string:
    :type input_string: str
    :return:
    :rtype:
    """
    clean_string = input_string
    for char in clean_chars:
        clean_string = clean_string.replace(char, ' ')

    # Restore input separator if they separate single characters.
    # Useful for Mavels Agents of S.H.I.E.L.D.
    # https://github.com/guessit-io/guessit/issues/278

    indices = [i for i, letter in enumerate(clean_string) if letter in seps]

    dots = set()
    if indices:
        clean_list = list(clean_string)

        potential_indices = []

        for i in indices:
            if _potential_before(i, input_string) and _potential_after(i, input_string):
                potential_indices.append(i)

        replace_indices = []

        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)

        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)

    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))

    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string


def strip(input_string, chars=seps):
    """
    Strip separators from input_string
    :param input_string:
    :param chars:
    :type input_string:
    :return:
    :rtype:
    """
    return input_string.strip(chars)


def raw_cleanup(raw):
    """
    Cleanup a raw value to perform raw comparison
    :param raw:
    :type raw:
    :return:
    :rtype:
    """
    return formatters(cleanup, strip)(raw.lower())


def reorder_title(title, articles=('the',), separators=(',', ', ')):
    """
    Reorder the title
    :param title:
    :type title:
    :param articles:
    :type articles:
    :param separators:
    :type separators:
    :return:
    :rtype:
    """
    ltitle = title.lower()
    for article in articles:
        for separator in separators:
            suffix = separator + article
            if ltitle[-len(suffix):] == suffix:
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title
Include dependencies and remove requirements.txt 6 years ago			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`"""`
			`Formatters`
			`"""`
			`from rebulk.formatters import formatters`
			`from rebulk.remodule import re`
			`from . import seps`

			`_excluded_clean_chars = ',:;-/\\'`
			`clean_chars = ""`
			`for sep in seps:`
			`if sep not in _excluded_clean_chars:`
			`clean_chars += sep`


			`def _potential_before(i, input_string):`
			`"""`
			`Check if the character at position i can be a potential single char separator considering what's before it.`

			`:param i:`
			`:type i: int`
			`:param input_string:`
			`:type input_string: str`
			`:return:`
			`:rtype: bool`
			`"""`
			`return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps`


			`def _potential_after(i, input_string):`
			`"""`
			`Check if the character at position i can be a potential single char separator considering what's after it.`

			`:param i:`
			`:type i: int`
			`:param input_string:`
			`:type input_string: str`
			`:return:`
			`:rtype: bool`
			`"""`
			`return i + 2 >= len(input_string) or \`
			`input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps`


			`def cleanup(input_string):`
			`"""`
			`Removes and strip separators from input_string (but keep ',;' characters)`

			`It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)`

			`:param input_string:`
			`:type input_string: str`
			`:return:`
			`:rtype:`
			`"""`
			`clean_string = input_string`
			`for char in clean_chars:`
			`clean_string = clean_string.replace(char, ' ')`

			`# Restore input separator if they separate single characters.`
			`# Useful for Mavels Agents of S.H.I.E.L.D.`
			`# https://github.com/guessit-io/guessit/issues/278`

			`indices = [i for i, letter in enumerate(clean_string) if letter in seps]`

			`dots = set()`
			`if indices:`
			`clean_list = list(clean_string)`

			`potential_indices = []`

			`for i in indices:`
			`if _potential_before(i, input_string) and _potential_after(i, input_string):`
			`potential_indices.append(i)`

			`replace_indices = []`

			`for potential_index in potential_indices:`
			`if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:`
			`replace_indices.append(potential_index)`

			`if replace_indices:`
			`for replace_index in replace_indices:`
			`dots.add(input_string[replace_index])`
			`clean_list[replace_index] = input_string[replace_index]`
			`clean_string = ''.join(clean_list)`

			`clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))`

			`clean_string = re.sub(' +', ' ', clean_string)`
			`return clean_string`


			`def strip(input_string, chars=seps):`
			`"""`
			`Strip separators from input_string`
			`:param input_string:`
			`:param chars:`
			`:type input_string:`
			`:return:`
			`:rtype:`
			`"""`
			`return input_string.strip(chars)`


			`def raw_cleanup(raw):`
			`"""`
			`Cleanup a raw value to perform raw comparison`
			`:param raw:`
			`:type raw:`
			`:return:`
			`:rtype:`
			`"""`
			`return formatters(cleanup, strip)(raw.lower())`


			`def reorder_title(title, articles=('the',), separators=(',', ', ')):`
			`"""`
			`Reorder the title`
			`:param title:`
			`:type title:`
			`:param articles:`
			`:type articles:`
			`:param separators:`
			`:type separators:`
			`:return:`
			`:rtype:`
			`"""`
			`ltitle = title.lower()`
			`for article in articles:`
			`for separator in separators:`
			`suffix = separator + article`
			`if ltitle[-len(suffix):] == suffix:`
			`return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]`
			`return title`