bazarr/libs/pygments/lexers/r.py

"""
    pygments.lexers.r
    ~~~~~~~~~~~~~~~~~

    Lexers for the R/S languages.

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import Lexer, RegexLexer, include, do_insertions
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
    Number, Punctuation, Generic

__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']


line_re  = re.compile('.*?\n')


class RConsoleLexer(Lexer):
    """
    For R console transcripts or R CMD BATCH output files.
    """

    name = 'RConsole'
    aliases = ['rconsole', 'rout']
    filenames = ['*.Rout']

    def get_tokens_unprocessed(self, text):
        slexer = SLexer(**self.options)

        current_code_block = ''
        insertions = []

        for match in line_re.finditer(text):
            line = match.group()
            if line.startswith('>') or line.startswith('+'):
                # Colorize the prompt as such,
                # then put rest of line into current_code_block
                insertions.append((len(current_code_block),
                                   [(0, Generic.Prompt, line[:2])]))
                current_code_block += line[2:]
            else:
                # We have reached a non-prompt line!
                # If we have stored prompt lines, need to process them first.
                if current_code_block:
                    # Weave together the prompts and highlight code.
                    yield from do_insertions(
                        insertions, slexer.get_tokens_unprocessed(current_code_block))
                    # Reset vars for next code block.
                    current_code_block = ''
                    insertions = []
                # Now process the actual line itself, this is output from R.
                yield match.start(), Generic.Output, line

        # If we happen to end on a code block with nothing after it, need to
        # process the last code block. This is neither elegant nor DRY so
        # should be changed.
        if current_code_block:
            yield from do_insertions(
                insertions, slexer.get_tokens_unprocessed(current_code_block))


class SLexer(RegexLexer):
    """
    For S, S-plus, and R source code.

    .. versionadded:: 0.10
    """

    name = 'S'
    aliases = ['splus', 's', 'r']
    filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
    mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
                 'text/x-R', 'text/x-r-history', 'text/x-r-profile']

    valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
    tokens = {
        'comments': [
            (r'#.*$', Comment.Single),
        ],
        'valid_name': [
            (valid_name, Name),
        ],
        'punctuation': [
            (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
        ],
        'keywords': [
            (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
             r'(?![\w.])',
             Keyword.Reserved),
        ],
        'operators': [
            (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
            (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
        ],
        'builtin_symbols': [
            (r'(NULL|NA(_(integer|real|complex|character)_)?|'
             r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
             r'(?![\w.])',
             Keyword.Constant),
            (r'(T|F)\b', Name.Builtin.Pseudo),
        ],
        'numbers': [
            # hex number
            (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
            # decimal number
            (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
             Number),
        ],
        'statements': [
            include('comments'),
            # whitespaces
            (r'\s+', Text),
            (r'\'', String, 'string_squote'),
            (r'\"', String, 'string_dquote'),
            include('builtin_symbols'),
            include('valid_name'),
            include('numbers'),
            include('keywords'),
            include('punctuation'),
            include('operators'),
        ],
        'root': [
            # calls:
            (r'(%s)\s*(?=\()' % valid_name, Name.Function),
            include('statements'),
            # blocks:
            (r'\{|\}', Punctuation),
            # (r'\{', Punctuation, 'block'),
            (r'.', Text),
        ],
        # 'block': [
        #    include('statements'),
        #    ('\{', Punctuation, '#push'),
        #    ('\}', Punctuation, '#pop')
        # ],
        'string_squote': [
            (r'([^\'\\]|\\.)*\'', String, '#pop'),
        ],
        'string_dquote': [
            (r'([^"\\]|\\.)*"', String, '#pop'),
        ],
    }

    def analyse_text(text):
        if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
            return 0.11


class RdLexer(RegexLexer):
    """
    Pygments Lexer for R documentation (Rd) files

    This is a very minimal implementation, highlighting little more
    than the macros. A description of Rd syntax is found in `Writing R
    Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
    and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.

    .. versionadded:: 1.6
    """
    name = 'Rd'
    aliases = ['rd']
    filenames = ['*.Rd']
    mimetypes = ['text/x-r-doc']

    # To account for verbatim / LaTeX-like / and R-like areas
    # would require parsing.
    tokens = {
        'root': [
            # catch escaped brackets and percent sign
            (r'\\[\\{}%]', String.Escape),
            # comments
            (r'%.*$', Comment),
            # special macros with no arguments
            (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
            # macros
            (r'\\[a-zA-Z]+\b', Keyword),
            # special preprocessor macros
            (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
            # non-escaped brackets
            (r'[{}]', Name.Builtin),
            # everything else
            (r'[^\\%\n{}]+', Text),
            (r'.', Text),
        ]
    }
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`"""`
			`pygments.lexers.r`
			`~~~~~~~~~~~~~~~~~`

			`Lexers for the R/S languages.`

			`:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`import re`

Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 3 years ago			`from pygments.lexer import Lexer, RegexLexer, include, do_insertions`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`from pygments.token import Text, Comment, Operator, Keyword, Name, String, \`
			`Number, Punctuation, Generic`

			`__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']`


			`line_re = re.compile('.*?\n')`


			`class RConsoleLexer(Lexer):`
			`"""`
			`For R console transcripts or R CMD BATCH output files.`
			`"""`

			`name = 'RConsole'`
			`aliases = ['rconsole', 'rout']`
			`filenames = ['*.Rout']`

			`def get_tokens_unprocessed(self, text):`
			`slexer = SLexer(**self.options)`

			`current_code_block = ''`
			`insertions = []`

			`for match in line_re.finditer(text):`
			`line = match.group()`
			`if line.startswith('>') or line.startswith('+'):`
			`# Colorize the prompt as such,`
			`# then put rest of line into current_code_block`
			`insertions.append((len(current_code_block),`
			`[(0, Generic.Prompt, line[:2])]))`
			`current_code_block += line[2:]`
			`else:`
			`# We have reached a non-prompt line!`
			`# If we have stored prompt lines, need to process them first.`
			`if current_code_block:`
			`# Weave together the prompts and highlight code.`
			`yield from do_insertions(`
			`insertions, slexer.get_tokens_unprocessed(current_code_block))`
			`# Reset vars for next code block.`
			`current_code_block = ''`
			`insertions = []`
			`# Now process the actual line itself, this is output from R.`
			`yield match.start(), Generic.Output, line`

			`# If we happen to end on a code block with nothing after it, need to`
			`# process the last code block. This is neither elegant nor DRY so`
			`# should be changed.`
			`if current_code_block:`
			`yield from do_insertions(`
			`insertions, slexer.get_tokens_unprocessed(current_code_block))`


			`class SLexer(RegexLexer):`
			`"""`
			`For S, S-plus, and R source code.`

			`.. versionadded:: 0.10`
			`"""`

			`name = 'S'`
			`aliases = ['splus', 's', 'r']`
			`filenames = ['.S', '.R', '.Rhistory', '.Rprofile', '.Renviron']`
			`mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',`
			`'text/x-R', 'text/x-r-history', 'text/x-r-profile']`

			valid_name = r'`[^`\\](?:\\.[^`\\])`\|(?:[a-zA-Z]\|\.[A-Za-z_.])[\w.]\|\.'
			`tokens = {`
			`'comments': [`
			`(r'#.*$', Comment.Single),`
			`],`
			`'valid_name': [`
			`(valid_name, Name),`
			`],`
			`'punctuation': [`
			`(r'\[{1,2}\|\]{1,2}\|\(\|\)\|;\|,', Punctuation),`
			`],`
			`'keywords': [`
			`(r'(if\|else\|for\|while\|repeat\|in\|next\|break\|return\|switch\|function)'`
			`r'(?![\w.])',`
			`Keyword.Reserved),`
			`],`
			`'operators': [`
			`(r'<<?-\|->>?\|-\|==\|<=\|>=\|<\|>\|&&?\|!=\|\\|\\|?\|\?', Operator),`
			`(r'\\|\+\|\^\|/\|!\|%[^%]%\|=\|~\|\$\|@\|:{1,3}', Operator),`
			`],`
			`'builtin_symbols': [`
			`(r'(NULL\|NA(_(integer\|real\|complex\|character)_)?\|'`
			`r'letters\|LETTERS\|Inf\|TRUE\|FALSE\|NaN\|pi\|\.\.(\.\|[0-9]+))'`
			`r'(?![\w.])',`
			`Keyword.Constant),`
			`(r'(T\|F)\b', Name.Builtin.Pseudo),`
			`],`
			`'numbers': [`
			`# hex number`
			`(r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),`
			`# decimal number`
			`(r'[+-]?([0-9]+(\.[0-9]+)?\|\.[0-9]+\|\.)([eE][+-]?[0-9]+)?[Li]?',`
			`Number),`
			`],`
			`'statements': [`
			`include('comments'),`
			`# whitespaces`
			`(r'\s+', Text),`
			`(r'\'', String, 'string_squote'),`
			`(r'\"', String, 'string_dquote'),`
			`include('builtin_symbols'),`
			`include('valid_name'),`
			`include('numbers'),`
			`include('keywords'),`
			`include('punctuation'),`
			`include('operators'),`
			`],`
			`'root': [`
			`# calls:`
			`(r'(%s)\s*(?=\()' % valid_name, Name.Function),`
			`include('statements'),`
			`# blocks:`
			`(r'\{\|\}', Punctuation),`
			`# (r'\{', Punctuation, 'block'),`
			`(r'.', Text),`
			`],`
			`# 'block': [`
			`# include('statements'),`
			`# ('\{', Punctuation, '#push'),`
			`# ('\}', Punctuation, '#pop')`
			`# ],`
			`'string_squote': [`
			`(r'([^\'\\]\|\\.)*\'', String, '#pop'),`
			`],`
			`'string_dquote': [`
			`(r'([^"\\]\|\\.)*"', String, '#pop'),`
			`],`
			`}`

			`def analyse_text(text):`
			`if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):`
			`return 0.11`


			`class RdLexer(RegexLexer):`
			`"""`
			`Pygments Lexer for R documentation (Rd) files`

			`This is a very minimal implementation, highlighting little more`
			than the macros. A description of Rd syntax is found in `Writing R
			Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
			and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.

			`.. versionadded:: 1.6`
			`"""`
			`name = 'Rd'`
			`aliases = ['rd']`
			`filenames = ['*.Rd']`
			`mimetypes = ['text/x-r-doc']`

			`# To account for verbatim / LaTeX-like / and R-like areas`
			`# would require parsing.`
			`tokens = {`
			`'root': [`
			`# catch escaped brackets and percent sign`
			`(r'\\[\\{}%]', String.Escape),`
			`# comments`
			`(r'%.*$', Comment),`
			`# special macros with no arguments`
			`(r'\\(?:cr\|l?dots\|R\|tab)\b', Keyword.Constant),`
			`# macros`
			`(r'\\[a-zA-Z]+\b', Keyword),`
			`# special preprocessor macros`
			`(r'^\s#(?:ifn?def\|endif).\b', Comment.Preproc),`
			`# non-escaped brackets`
			`(r'[{}]', Name.Builtin),`
			`# everything else`
			`(r'[^\\%\n{}]+', Text),`
			`(r'.', Text),`
			`]`
			`}`