bazarr/libs/pygments/lexers/special.py

"""
    pygments.lexers.special
    ~~~~~~~~~~~~~~~~~~~~~~~

    Special lexers.

    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import ast
import re

from pygments.lexer import Lexer
from pygments.token import Token, Error, Text, Generic
from pygments.util import get_choice_opt


__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']


class TextLexer(Lexer):
    """
    "Null" lexer, doesn't highlight anything.
    """
    name = 'Text only'
    aliases = ['text']
    filenames = ['*.txt']
    mimetypes = ['text/plain']
    priority = 0.01

    def get_tokens_unprocessed(self, text):
        yield 0, Text, text

    def analyse_text(text):
        return TextLexer.priority


class OutputLexer(Lexer):
    """
    Simple lexer that highlights everything as ``Token.Generic.Output``.

    .. versionadded:: 2.10
    """
    name = 'Text output'
    aliases = ['output']

    def get_tokens_unprocessed(self, text):
        yield 0, Generic.Output, text


_ttype_cache = {}

line_re = re.compile('.*?\n')


class RawTokenLexer(Lexer):
    """
    Recreate a token stream formatted with the `RawTokenFormatter`.

    Additional options accepted:

    `compress`
        If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
        the given compression algorithm before lexing (default: ``""``).
    """
    name = 'Raw token data'
    aliases = []
    filenames = []
    mimetypes = ['application/x-pygments-tokens']

    def __init__(self, **options):
        self.compress = get_choice_opt(options, 'compress',
                                       ['', 'none', 'gz', 'bz2'], '')
        Lexer.__init__(self, **options)

    def get_tokens(self, text):
        if self.compress:
            if isinstance(text, str):
                text = text.encode('latin1')
            try:
                if self.compress == 'gz':
                    import gzip
                    text = gzip.decompress(text)
                elif self.compress == 'bz2':
                    import bz2
                    text = bz2.decompress(text)
            except OSError:
                yield Error, text.decode('latin1')
        if isinstance(text, bytes):
            text = text.decode('latin1')

        # do not call Lexer.get_tokens() because stripping is not optional.
        text = text.strip('\n') + '\n'
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

    def get_tokens_unprocessed(self, text):
        length = 0
        for match in line_re.finditer(text):
            try:
                ttypestr, val = match.group().rstrip().split('\t', 1)
                ttype = _ttype_cache.get(ttypestr)
                if not ttype:
                    ttype = Token
                    ttypes = ttypestr.split('.')[1:]
                    for ttype_ in ttypes:
                        if not ttype_ or not ttype_[0].isupper():
                            raise ValueError('malformed token name')
                        ttype = getattr(ttype, ttype_)
                    _ttype_cache[ttypestr] = ttype
                val = ast.literal_eval(val)
                if not isinstance(val, str):
                    raise ValueError('expected str')
            except (SyntaxError, ValueError):
                val = match.group()
                ttype = Error
            yield length, ttype, val
            length += len(val)
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`"""`
			`pygments.lexers.special`
			`~~~~~~~~~~~~~~~~~~~~~~~`

			`Special lexers.`

Updated vendored dependencies. 2 years ago			`:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`:license: BSD, see LICENSE for details.`
			`"""`

			`import ast`
			`import re`

			`from pygments.lexer import Lexer`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 3 years ago			`from pygments.token import Token, Error, Text, Generic`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`from pygments.util import get_choice_opt`


Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 3 years ago			`__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago

			`class TextLexer(Lexer):`
			`"""`
			`"Null" lexer, doesn't highlight anything.`
			`"""`
			`name = 'Text only'`
			`aliases = ['text']`
			`filenames = ['*.txt']`
			`mimetypes = ['text/plain']`
			`priority = 0.01`

			`def get_tokens_unprocessed(self, text):`
			`yield 0, Text, text`

			`def analyse_text(text):`
			`return TextLexer.priority`


Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 3 years ago			`class OutputLexer(Lexer):`
			`"""`
			Simple lexer that highlights everything as ``Token.Generic.Output``.

			`.. versionadded:: 2.10`
			`"""`
			`name = 'Text output'`
			`aliases = ['output']`

			`def get_tokens_unprocessed(self, text):`
			`yield 0, Generic.Output, text`


Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`_ttype_cache = {}`

			`line_re = re.compile('.*?\n')`


			`class RawTokenLexer(Lexer):`
			`"""`
			Recreate a token stream formatted with the `RawTokenFormatter`.

			`Additional options accepted:`

			`compress`
			If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
			the given compression algorithm before lexing (default: ``""``).
			`"""`
			`name = 'Raw token data'`
			`aliases = []`
			`filenames = []`
			`mimetypes = ['application/x-pygments-tokens']`

			`def __init__(self, **options):`
			`self.compress = get_choice_opt(options, 'compress',`
			`['', 'none', 'gz', 'bz2'], '')`
			`Lexer.__init__(self, **options)`

			`def get_tokens(self, text):`
			`if self.compress:`
			`if isinstance(text, str):`
			`text = text.encode('latin1')`
			`try:`
			`if self.compress == 'gz':`
			`import gzip`
			`text = gzip.decompress(text)`
			`elif self.compress == 'bz2':`
			`import bz2`
			`text = bz2.decompress(text)`
			`except OSError:`
			`yield Error, text.decode('latin1')`
			`if isinstance(text, bytes):`
			`text = text.decode('latin1')`

			`# do not call Lexer.get_tokens() because stripping is not optional.`
			`text = text.strip('\n') + '\n'`
			`for i, t, v in self.get_tokens_unprocessed(text):`
			`yield t, v`

			`def get_tokens_unprocessed(self, text):`
			`length = 0`
			`for match in line_re.finditer(text):`
			`try:`
			`ttypestr, val = match.group().rstrip().split('\t', 1)`
			`ttype = _ttype_cache.get(ttypestr)`
			`if not ttype:`
			`ttype = Token`
			`ttypes = ttypestr.split('.')[1:]`
			`for ttype_ in ttypes:`
			`if not ttype_ or not ttype_[0].isupper():`
			`raise ValueError('malformed token name')`
			`ttype = getattr(ttype, ttype_)`
			`_ttype_cache[ttypestr] = ttype`
			`val = ast.literal_eval(val)`
			`if not isinstance(val, str):`
			`raise ValueError('expected str')`
			`except (SyntaxError, ValueError):`
			`val = match.group()`
			`ttype = Error`
			`yield length, ttype, val`
			`length += len(val)`