bazarr/libs/pygments/regexopt.py

"""
    pygments.regexopt
    ~~~~~~~~~~~~~~~~~

    An algorithm that generates optimized regexes for matching long lists of
    literal strings.

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re
from re import escape
from os.path import commonprefix
from itertools import groupby
from operator import itemgetter

CS_ESCAPE = re.compile(r'[\[\^\\\-\]]')
FIRST_ELEMENT = itemgetter(0)


def make_charset(letters):
    return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'


def regex_opt_inner(strings, open_paren):
    """Return a regex that matches any string in the sorted list of strings."""
    close_paren = open_paren and ')' or ''
    # print strings, repr(open_paren)
    if not strings:
        # print '-> nothing left'
        return ''
    first = strings[0]
    if len(strings) == 1:
        # print '-> only 1 string'
        return open_paren + escape(first) + close_paren
    if not first:
        # print '-> first string empty'
        return open_paren + regex_opt_inner(strings[1:], '(?:') \
            + '?' + close_paren
    if len(first) == 1:
        # multiple one-char strings? make a charset
        oneletter = []
        rest = []
        for s in strings:
            if len(s) == 1:
                oneletter.append(s)
            else:
                rest.append(s)
        if len(oneletter) > 1:  # do we have more than one oneletter string?
            if rest:
                # print '-> 1-character + rest'
                return open_paren + regex_opt_inner(rest, '') + '|' \
                    + make_charset(oneletter) + close_paren
            # print '-> only 1-character'
            return open_paren + make_charset(oneletter) + close_paren
    prefix = commonprefix(strings)
    if prefix:
        plen = len(prefix)
        # we have a prefix for all strings
        # print '-> prefix:', prefix
        return open_paren + escape(prefix) \
            + regex_opt_inner([s[plen:] for s in strings], '(?:') \
            + close_paren
    # is there a suffix?
    strings_rev = [s[::-1] for s in strings]
    suffix = commonprefix(strings_rev)
    if suffix:
        slen = len(suffix)
        # print '-> suffix:', suffix[::-1]
        return open_paren \
            + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
            + escape(suffix[::-1]) + close_paren
    # recurse on common 1-string prefixes
    # print '-> last resort'
    return open_paren + \
        '|'.join(regex_opt_inner(list(group[1]), '')
                 for group in groupby(strings, lambda s: s[0] == first[0])) \
        + close_paren


def regex_opt(strings, prefix='', suffix=''):
    """Return a compiled regex that matches any string in the given list.

    The strings to match must be literal strings, not regexes.  They will be
    regex-escaped.

    *prefix* and *suffix* are pre- and appended to the final regex.
    """
    strings = sorted(strings)
    return prefix + regex_opt_inner(strings, '(') + suffix
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`"""`
			`pygments.regexopt`
			`~~~~~~~~~~~~~~~~~`

			`An algorithm that generates optimized regexes for matching long lists of`
			`literal strings.`

			`:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`import re`
			`from re import escape`
			`from os.path import commonprefix`
			`from itertools import groupby`
			`from operator import itemgetter`

Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 3 years ago			`CS_ESCAPE = re.compile(r'[\[\^\\\-\]]')`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`FIRST_ELEMENT = itemgetter(0)`


			`def make_charset(letters):`
			`return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'`


			`def regex_opt_inner(strings, open_paren):`
			`"""Return a regex that matches any string in the sorted list of strings."""`
			`close_paren = open_paren and ')' or ''`
			`# print strings, repr(open_paren)`
			`if not strings:`
			`# print '-> nothing left'`
			`return ''`
			`first = strings[0]`
			`if len(strings) == 1:`
			`# print '-> only 1 string'`
			`return open_paren + escape(first) + close_paren`
			`if not first:`
			`# print '-> first string empty'`
			`return open_paren + regex_opt_inner(strings[1:], '(?:') \`
			`+ '?' + close_paren`
			`if len(first) == 1:`
			`# multiple one-char strings? make a charset`
			`oneletter = []`
			`rest = []`
			`for s in strings:`
			`if len(s) == 1:`
			`oneletter.append(s)`
			`else:`
			`rest.append(s)`
			`if len(oneletter) > 1: # do we have more than one oneletter string?`
			`if rest:`
			`# print '-> 1-character + rest'`
			`return open_paren + regex_opt_inner(rest, '') + '\|' \`
			`+ make_charset(oneletter) + close_paren`
			`# print '-> only 1-character'`
			`return open_paren + make_charset(oneletter) + close_paren`
			`prefix = commonprefix(strings)`
			`if prefix:`
			`plen = len(prefix)`
			`# we have a prefix for all strings`
			`# print '-> prefix:', prefix`
			`return open_paren + escape(prefix) \`
			`+ regex_opt_inner([s[plen:] for s in strings], '(?:') \`
			`+ close_paren`
			`# is there a suffix?`
			`strings_rev = [s[::-1] for s in strings]`
			`suffix = commonprefix(strings_rev)`
			`if suffix:`
			`slen = len(suffix)`
			`# print '-> suffix:', suffix[::-1]`
			`return open_paren \`
			`+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \`
			`+ escape(suffix[::-1]) + close_paren`
			`# recurse on common 1-string prefixes`
			`# print '-> last resort'`
			`return open_paren + \`
			`'\|'.join(regex_opt_inner(list(group[1]), '')`
			`for group in groupby(strings, lambda s: s[0] == first[0])) \`
			`+ close_paren`


			`def regex_opt(strings, prefix='', suffix=''):`
			`"""Return a compiled regex that matches any string in the given list.`

			`The strings to match must be literal strings, not regexes. They will be`
			`regex-escaped.`

			`prefix and suffix are pre- and appended to the final regex.`
			`"""`
			`strings = sorted(strings)`
			`return prefix + regex_opt_inner(strings, '(') + suffix`