You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
107 lines
3.1 KiB
107 lines
3.1 KiB
4 years ago
|
"""
|
||
|
pygments.lexers.special
|
||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Special lexers.
|
||
|
|
||
|
:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
import ast
|
||
|
import re
|
||
|
|
||
|
from pygments.lexer import Lexer
|
||
|
from pygments.token import Token, Error, Text
|
||
|
from pygments.util import get_choice_opt
|
||
|
|
||
|
|
||
|
__all__ = ['TextLexer', 'RawTokenLexer']
|
||
|
|
||
|
|
||
|
class TextLexer(Lexer):
|
||
|
"""
|
||
|
"Null" lexer, doesn't highlight anything.
|
||
|
"""
|
||
|
name = 'Text only'
|
||
|
aliases = ['text']
|
||
|
filenames = ['*.txt']
|
||
|
mimetypes = ['text/plain']
|
||
|
priority = 0.01
|
||
|
|
||
|
def get_tokens_unprocessed(self, text):
|
||
|
yield 0, Text, text
|
||
|
|
||
|
def analyse_text(text):
|
||
|
return TextLexer.priority
|
||
|
|
||
|
|
||
|
_ttype_cache = {}
|
||
|
|
||
|
line_re = re.compile('.*?\n')
|
||
|
|
||
|
|
||
|
class RawTokenLexer(Lexer):
|
||
|
"""
|
||
|
Recreate a token stream formatted with the `RawTokenFormatter`.
|
||
|
|
||
|
Additional options accepted:
|
||
|
|
||
|
`compress`
|
||
|
If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
|
||
|
the given compression algorithm before lexing (default: ``""``).
|
||
|
"""
|
||
|
name = 'Raw token data'
|
||
|
aliases = []
|
||
|
filenames = []
|
||
|
mimetypes = ['application/x-pygments-tokens']
|
||
|
|
||
|
def __init__(self, **options):
|
||
|
self.compress = get_choice_opt(options, 'compress',
|
||
|
['', 'none', 'gz', 'bz2'], '')
|
||
|
Lexer.__init__(self, **options)
|
||
|
|
||
|
def get_tokens(self, text):
|
||
|
if self.compress:
|
||
|
if isinstance(text, str):
|
||
|
text = text.encode('latin1')
|
||
|
try:
|
||
|
if self.compress == 'gz':
|
||
|
import gzip
|
||
|
text = gzip.decompress(text)
|
||
|
elif self.compress == 'bz2':
|
||
|
import bz2
|
||
|
text = bz2.decompress(text)
|
||
|
except OSError:
|
||
|
yield Error, text.decode('latin1')
|
||
|
if isinstance(text, bytes):
|
||
|
text = text.decode('latin1')
|
||
|
|
||
|
# do not call Lexer.get_tokens() because stripping is not optional.
|
||
|
text = text.strip('\n') + '\n'
|
||
|
for i, t, v in self.get_tokens_unprocessed(text):
|
||
|
yield t, v
|
||
|
|
||
|
def get_tokens_unprocessed(self, text):
|
||
|
length = 0
|
||
|
for match in line_re.finditer(text):
|
||
|
try:
|
||
|
ttypestr, val = match.group().rstrip().split('\t', 1)
|
||
|
ttype = _ttype_cache.get(ttypestr)
|
||
|
if not ttype:
|
||
|
ttype = Token
|
||
|
ttypes = ttypestr.split('.')[1:]
|
||
|
for ttype_ in ttypes:
|
||
|
if not ttype_ or not ttype_[0].isupper():
|
||
|
raise ValueError('malformed token name')
|
||
|
ttype = getattr(ttype, ttype_)
|
||
|
_ttype_cache[ttypestr] = ttype
|
||
|
val = ast.literal_eval(val)
|
||
|
if not isinstance(val, str):
|
||
|
raise ValueError('expected str')
|
||
|
except (SyntaxError, ValueError):
|
||
|
val = match.group()
|
||
|
ttype = Error
|
||
|
yield length, ttype, val
|
||
|
length += len(val)
|