bazarr/libs/pygments/lexers/textfmts.py

"""
    pygments.lexers.textfmts
    ~~~~~~~~~~~~~~~~~~~~~~~~

    Lexers for various text formats.

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexers import guess_lexer, get_lexer_by_name
from pygments.lexer import RegexLexer, bygroups, default, include
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
    Number, Generic, Literal, Punctuation
from pygments.util import ClassNotFound

__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
           'NotmuchLexer', 'KernelLogLexer']


class IrcLogsLexer(RegexLexer):
    """
    Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
    """

    name = 'IRC logs'
    aliases = ['irc']
    filenames = ['*.weechatlog']
    mimetypes = ['text/x-irclog']

    flags = re.VERBOSE | re.MULTILINE
    timestamp = r"""
        (
          # irssi / xchat and others
          (?: \[|\()?                  # Opening bracket or paren for the timestamp
            (?:                        # Timestamp
                (?: (?:\d{1,4} [-/])*  # Date as - or /-separated groups of digits
                    (?:\d{1,4})
                 [T ])?                # Date/time separator: T or space
                (?: \d?\d [:.])*       # Time as :/.-separated groups of 1 or 2 digits
                    (?: \d?\d)
            )
          (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
        |
          # weechat
          \d{4}\s\w{3}\s\d{2}\s        # Date
          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
        |
          # xchat
          \w{3}\s\d{2}\s               # Date
          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
        )?
    """
    tokens = {
        'root': [
            # log start/end
            (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
            # hack
            ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
            # normal msgs
            ("^" + timestamp + r"""
                (\s*<.*?>\s*)          # Nick """,
             bygroups(Comment.Preproc, Name.Tag), 'msg'),
            # /me msgs
            ("^" + timestamp + r"""
                (\s*[*]\s+)            # Star
                (\S+\s+.*?\n)          # Nick + rest of message """,
             bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
            # join/part msgs
            ("^" + timestamp + r"""
                (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
                (\S+\s+)                     # Nick + Space
                (.*?\n)                         # Rest of message """,
             bygroups(Comment.Preproc, Keyword, String, Comment)),
            (r"^.*?\n", Text),
        ],
        'msg': [
            (r"\S+:(?!//)", Name.Attribute),  # Prefix
            (r".*\n", Text, '#pop'),
        ],
    }


class GettextLexer(RegexLexer):
    """
    Lexer for Gettext catalog files.

    .. versionadded:: 0.9
    """
    name = 'Gettext Catalog'
    aliases = ['pot', 'po']
    filenames = ['*.pot', '*.po']
    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']

    tokens = {
        'root': [
            (r'^#,\s.*?$', Keyword.Type),
            (r'^#:\s.*?$', Keyword.Declaration),
            # (r'^#$', Comment),
            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
            (r'^(")([A-Za-z-]+:)(.*")$',
             bygroups(String, Name.Property, String)),
            (r'^".*"$', String),
            (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
             bygroups(Name.Variable, Text, String)),
            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
             bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
        ]
    }


class HttpLexer(RegexLexer):
    """
    Lexer for HTTP sessions.

    .. versionadded:: 1.5
    """

    name = 'HTTP'
    aliases = ['http']

    flags = re.DOTALL

    def get_tokens_unprocessed(self, text, stack=('root',)):
        """Reset the content-type state."""
        self.content_type = None
        return RegexLexer.get_tokens_unprocessed(self, text, stack)

    def header_callback(self, match):
        if match.group(1).lower() == 'content-type':
            content_type = match.group(5).strip()
            if ';' in content_type:
                content_type = content_type[:content_type.find(';')].strip()
            self.content_type = content_type
        yield match.start(1), Name.Attribute, match.group(1)
        yield match.start(2), Text, match.group(2)
        yield match.start(3), Operator, match.group(3)
        yield match.start(4), Text, match.group(4)
        yield match.start(5), Literal, match.group(5)
        yield match.start(6), Text, match.group(6)

    def continuous_header_callback(self, match):
        yield match.start(1), Text, match.group(1)
        yield match.start(2), Literal, match.group(2)
        yield match.start(3), Text, match.group(3)

    def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

    tokens = {
        'root': [
            (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)'
             r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
             bygroups(Name.Function, Text, Name.Namespace, Text,
                      Keyword.Reserved, Operator, Number, Text),
             'headers'),
            (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
             bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
                      Name.Exception, Text),
             'headers'),
        ],
        'headers': [
            (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),
            (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
            (r'\r?\n', Text, 'content')
        ],
        'content': [
            (r'.+', content_callback)
        ]
    }

    def analyse_text(text):
        return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',
                                'OPTIONS /', 'TRACE /', 'PATCH /'))


class TodotxtLexer(RegexLexer):
    """
    Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format.

    .. versionadded:: 2.0
    """

    name = 'Todotxt'
    aliases = ['todotxt']
    # *.todotxt is not a standard extension for Todo.txt files; including it
    # makes testing easier, and also makes autodetecting file type easier.
    filenames = ['todo.txt', '*.todotxt']
    mimetypes = ['text/x-todo']

    # Aliases mapping standard token types of Todo.txt format concepts
    CompleteTaskText = Operator  # Chosen to de-emphasize complete tasks
    IncompleteTaskText = Text    # Incomplete tasks should look like plain text

    # Priority should have most emphasis to indicate importance of tasks
    Priority = Generic.Heading
    # Dates should have next most emphasis because time is important
    Date = Generic.Subheading

    # Project and context should have equal weight, and be in different colors
    Project = Generic.Error
    Context = String

    # If tag functionality is added, it should have the same weight as Project
    # and Context, and a different color. Generic.Traceback would work well.

    # Regex patterns for building up rules; dates, priorities, projects, and
    # contexts are all atomic
    # TODO: Make date regex more ISO 8601 compliant
    date_regex = r'\d{4,}-\d{2}-\d{2}'
    priority_regex = r'\([A-Z]\)'
    project_regex = r'\+\S+'
    context_regex = r'@\S+'

    # Compound regex expressions
    complete_one_date_regex = r'(x )(' + date_regex + r')'
    complete_two_date_regex = (complete_one_date_regex + r'( )(' +
                               date_regex + r')')
    priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'

    tokens = {
        # Should parse starting at beginning of line; each line is a task
        'root': [
            # Complete task entry points: two total:
            # 1. Complete task with two dates
            (complete_two_date_regex, bygroups(CompleteTaskText, Date,
                                               CompleteTaskText, Date),
             'complete'),
            # 2. Complete task with one date
            (complete_one_date_regex, bygroups(CompleteTaskText, Date),
             'complete'),

            # Incomplete task entry points: six total:
            # 1. Priority plus date
            (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
             'incomplete'),
            # 2. Priority only
            (priority_regex, Priority, 'incomplete'),
            # 3. Leading date
            (date_regex, Date, 'incomplete'),
            # 4. Leading context
            (context_regex, Context, 'incomplete'),
            # 5. Leading project
            (project_regex, Project, 'incomplete'),
            # 6. Non-whitespace catch-all
            (r'\S+', IncompleteTaskText, 'incomplete'),
        ],

        # Parse a complete task
        'complete': [
            # Newline indicates end of task, should return to root
            (r'\s*\n', CompleteTaskText, '#pop'),
            # Tokenize contexts and projects
            (context_regex, Context),
            (project_regex, Project),
            # Tokenize non-whitespace text
            (r'\S+', CompleteTaskText),
            # Tokenize whitespace not containing a newline
            (r'\s+', CompleteTaskText),
        ],

        # Parse an incomplete task
        'incomplete': [
            # Newline indicates end of task, should return to root
            (r'\s*\n', IncompleteTaskText, '#pop'),
            # Tokenize contexts and projects
            (context_regex, Context),
            (project_regex, Project),
            # Tokenize non-whitespace text
            (r'\S+', IncompleteTaskText),
            # Tokenize whitespace not containing a newline
            (r'\s+', IncompleteTaskText),
        ],
    }


class NotmuchLexer(RegexLexer):
    """
    For `Notmuch <https://notmuchmail.org/>`_ email text format.

    .. versionadded:: 2.5

    Additional options accepted:

    `body_lexer`
        If given, highlight the contents of the message body with the specified
        lexer, else guess it according to the body content (default: ``None``).
    """

    name = 'Notmuch'
    aliases = ['notmuch']

    def _highlight_code(self, match):
        code = match.group(1)

        try:
            if self.body_lexer:
                lexer = get_lexer_by_name(self.body_lexer)
            else:
                lexer = guess_lexer(code.strip())
        except ClassNotFound:
            lexer = get_lexer_by_name('text')

        yield from lexer.get_tokens_unprocessed(code)

    tokens = {
        'root': [
            (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
        ],
        'message-attr': [
            (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
             bygroups(Name.Attribute, Number.Integer)),
            (r'(\s*filename:\s*)(.+\n)',
             bygroups(Name.Attribute, String)),
            default('#pop'),
        ],
        'message': [
            (r'\fmessage\}\n', Keyword, '#pop'),
            (r'\fheader\{\n', Keyword, 'header'),
            (r'\fbody\{\n', Keyword, 'body'),
        ],
        'header': [
            (r'\fheader\}\n', Keyword, '#pop'),
            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
             bygroups(Name.Attribute, String)),
            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
             bygroups(Generic.Strong, Literal, Name.Tag)),
        ],
        'body': [
            (r'\fpart\{\n', Keyword, 'part'),
            (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
            (r'\fbody\}\n', Keyword, '#pop'),
        ],
        'part-attr': [
            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
             bygroups(Punctuation, Name.Attribute, String)),
            (r'(,\s*)(Content-type:\s*)(.+\n)',
             bygroups(Punctuation, Name.Attribute, String)),
            default('#pop'),
        ],
        'part': [
            (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
            (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
            (r'^Non-text part: .*\n', Comment),
            (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
        ],
    }

    def analyse_text(text):
        return 1.0 if text.startswith('\fmessage{') else 0.0

    def __init__(self, **options):
        self.body_lexer = options.get('body_lexer', None)
        RegexLexer.__init__(self, **options)


class KernelLogLexer(RegexLexer):
    """
    For Linux Kernel log ("dmesg") output.

    .. versionadded:: 2.6
    """
    name = 'Kernel log'
    aliases = ['kmsg', 'dmesg']
    filenames = ['*.kmsg', '*.dmesg']

    tokens = {
        'root': [
            (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
            (r'^[^:]+:info  : (?=\[)', Text, 'info'),
            (r'^[^:]+:warn  : (?=\[)', Text, 'warn'),
            (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
            (r'^[^:]+:err   : (?=\[)', Text, 'error'),
            (r'^[^:]+:crit  : (?=\[)', Text, 'error'),
            (r'^(?=\[)', Text, 'unknown'),
        ],
        'unknown': [
            (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
            (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
            default('info'),
        ],
        'base': [
            (r'\[[0-9. ]+\] ', Number),
            (r'(?<=\] ).+?:', Keyword),
            (r'\n', Text, '#pop'),
        ],
        'debug': [
            include('base'),
            (r'.+\n', Comment, '#pop')
        ],
        'info': [
            include('base'),
            (r'.+\n', Text, '#pop')
        ],
        'warn': [
            include('base'),
            (r'.+\n', Generic.Strong, '#pop')
        ],
        'error': [
            include('base'),
            (r'.+\n', Generic.Error, '#pop')
        ]
    }
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 4 years ago			`"""`
			`pygments.lexers.textfmts`
			`~~~~~~~~~~~~~~~~~~~~~~~~`

			`Lexers for various text formats.`

			`:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`import re`

			`from pygments.lexers import guess_lexer, get_lexer_by_name`
			`from pygments.lexer import RegexLexer, bygroups, default, include`
			`from pygments.token import Text, Comment, Operator, Keyword, Name, String, \`
			`Number, Generic, Literal, Punctuation`
			`from pygments.util import ClassNotFound`

			`__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',`
			`'NotmuchLexer', 'KernelLogLexer']`


			`class IrcLogsLexer(RegexLexer):`
			`"""`
			`Lexer for IRC logs in irssi, xchat or weechat style.`
			`"""`

			`name = 'IRC logs'`
			`aliases = ['irc']`
			`filenames = ['*.weechatlog']`
			`mimetypes = ['text/x-irclog']`

			`flags = re.VERBOSE \| re.MULTILINE`
			`timestamp = r"""`
			`(`
			`# irssi / xchat and others`
			`(?: \[\|\()? # Opening bracket or paren for the timestamp`
			`(?: # Timestamp`
			`(?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits`
			`(?:\d{1,4})`
			`[T ])? # Date/time separator: T or space`
			`(?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits`
			`(?: \d?\d)`
			`)`
			`(?: \]\|\))?\s+ # Closing bracket or paren for the timestamp`
			`\|`
			`# weechat`
			`\d{4}\s\w{3}\s\d{2}\s # Date`
			`\d{2}:\d{2}:\d{2}\s+ # Time + Whitespace`
			`\|`
			`# xchat`
			`\w{3}\s\d{2}\s # Date`
			`\d{2}:\d{2}:\d{2}\s+ # Time + Whitespace`
			`)?`
			`"""`
			`tokens = {`
			`'root': [`
			`# log start/end`
			`(r'^\\\\(.)\\\\*$', Comment),`
			`# hack`
			`("^" + timestamp + r'(\s<[^>]>\s*)$', bygroups(Comment.Preproc, Name.Tag)),`
			`# normal msgs`
			`("^" + timestamp + r"""`
			`(\s<.?>\s*) # Nick """,`
			`bygroups(Comment.Preproc, Name.Tag), 'msg'),`
			`# /me msgs`
			`("^" + timestamp + r"""`
			`(\s[]\s+) # Star`
			`(\S+\s+.*?\n) # Nick + rest of message """,`
			`bygroups(Comment.Preproc, Keyword, Generic.Inserted)),`
			`# join/part msgs`
			`("^" + timestamp + r"""`
			`(\s(?:\{3}\|<?-[!@=P]?->?)\s*) # Star(s) or symbols`
			`(\S+\s+) # Nick + Space`
			`(.*?\n) # Rest of message """,`
			`bygroups(Comment.Preproc, Keyword, String, Comment)),`
			`(r"^.*?\n", Text),`
			`],`
			`'msg': [`
			`(r"\S+:(?!//)", Name.Attribute), # Prefix`
			`(r".*\n", Text, '#pop'),`
			`],`
			`}`


			`class GettextLexer(RegexLexer):`
			`"""`
			`Lexer for Gettext catalog files.`

			`.. versionadded:: 0.9`
			`"""`
			`name = 'Gettext Catalog'`
			`aliases = ['pot', 'po']`
			`filenames = ['.pot', '.po']`
			`mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']`

			`tokens = {`
			`'root': [`
			`(r'^#,\s.*?$', Keyword.Type),`
			`(r'^#:\s.*?$', Keyword.Declaration),`
			`# (r'^#$', Comment),`
			`(r'^(#\|#\.\s\|#\\|\s\|#~\s\|#\s).*$', Comment.Single),`
			`(r'^(")([A-Za-z-]+:)(.*")$',`
			`bygroups(String, Name.Property, String)),`
			`(r'^".*"$', String),`
			`(r'^(msgid\|msgid_plural\|msgstr\|msgctxt)(\s+)(".*")$',`
			`bygroups(Name.Variable, Text, String)),`
			`(r'^(msgstr\[)(\d)(\])(\s+)(".*")$',`
			`bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),`
			`]`
			`}`


			`class HttpLexer(RegexLexer):`
			`"""`
			`Lexer for HTTP sessions.`

			`.. versionadded:: 1.5`
			`"""`

			`name = 'HTTP'`
			`aliases = ['http']`

			`flags = re.DOTALL`

			`def get_tokens_unprocessed(self, text, stack=('root',)):`
			`"""Reset the content-type state."""`
			`self.content_type = None`
			`return RegexLexer.get_tokens_unprocessed(self, text, stack)`

			`def header_callback(self, match):`
			`if match.group(1).lower() == 'content-type':`
			`content_type = match.group(5).strip()`
			`if ';' in content_type:`
			`content_type = content_type[:content_type.find(';')].strip()`
			`self.content_type = content_type`
			`yield match.start(1), Name.Attribute, match.group(1)`
			`yield match.start(2), Text, match.group(2)`
			`yield match.start(3), Operator, match.group(3)`
			`yield match.start(4), Text, match.group(4)`
			`yield match.start(5), Literal, match.group(5)`
			`yield match.start(6), Text, match.group(6)`

			`def continuous_header_callback(self, match):`
			`yield match.start(1), Text, match.group(1)`
			`yield match.start(2), Literal, match.group(2)`
			`yield match.start(3), Text, match.group(3)`

			`def content_callback(self, match):`
			`content_type = getattr(self, 'content_type', None)`
			`content = match.group()`
			`offset = match.start()`
			`if content_type:`
			`from pygments.lexers import get_lexer_for_mimetype`
			`possible_lexer_mimetypes = [content_type]`
			`if '+' in content_type:`
			`# application/calendar+xml can be treated as application/xml`
			`# if there's not a better match.`
			`general_type = re.sub(r'^(.)/.\+(.*)$', r'\1/\2',`
			`content_type)`
			`possible_lexer_mimetypes.append(general_type)`

			`for i in possible_lexer_mimetypes:`
			`try:`
			`lexer = get_lexer_for_mimetype(i)`
			`except ClassNotFound:`
			`pass`
			`else:`
			`for idx, token, value in lexer.get_tokens_unprocessed(content):`
			`yield offset + idx, token, value`
			`return`
			`yield offset, Text, content`

			`tokens = {`
			`'root': [`
			`(r'(GET\|POST\|PUT\|DELETE\|HEAD\|OPTIONS\|TRACE\|PATCH)( +)([^ ]+)( +)'`
			`r'(HTTP)(/)(1\.[01]\|2(?:\.0)?\|3)(\r?\n\|\Z)',`
			`bygroups(Name.Function, Text, Name.Namespace, Text,`
			`Keyword.Reserved, Operator, Number, Text),`
			`'headers'),`
			`(r'(HTTP)(/)(1\.[01]\|2(?:\.0)?\|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n\|\Z)',`
			`bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,`
			`Name.Exception, Text),`
			`'headers'),`
			`],`
			`'headers': [`
			`(r'([^\s:]+)( )(:)( )([^\r\n]+)(\r?\n\|\Z)', header_callback),`
			`(r'([\t ]+)([^\r\n]+)(\r?\n\|\Z)', continuous_header_callback),`
			`(r'\r?\n', Text, 'content')`
			`],`
			`'content': [`
			`(r'.+', content_callback)`
			`]`
			`}`

			`def analyse_text(text):`
			`return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',`
			`'OPTIONS /', 'TRACE /', 'PATCH /'))`


			`class TodotxtLexer(RegexLexer):`
			`"""`
			Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format.

			`.. versionadded:: 2.0`
			`"""`

			`name = 'Todotxt'`
			`aliases = ['todotxt']`
			`# *.todotxt is not a standard extension for Todo.txt files; including it`
			`# makes testing easier, and also makes autodetecting file type easier.`
			`filenames = ['todo.txt', '*.todotxt']`
			`mimetypes = ['text/x-todo']`

			`# Aliases mapping standard token types of Todo.txt format concepts`
			`CompleteTaskText = Operator # Chosen to de-emphasize complete tasks`
			`IncompleteTaskText = Text # Incomplete tasks should look like plain text`

			`# Priority should have most emphasis to indicate importance of tasks`
			`Priority = Generic.Heading`
			`# Dates should have next most emphasis because time is important`
			`Date = Generic.Subheading`

			`# Project and context should have equal weight, and be in different colors`
			`Project = Generic.Error`
			`Context = String`

			`# If tag functionality is added, it should have the same weight as Project`
			`# and Context, and a different color. Generic.Traceback would work well.`

			`# Regex patterns for building up rules; dates, priorities, projects, and`
			`# contexts are all atomic`
			`# TODO: Make date regex more ISO 8601 compliant`
			`date_regex = r'\d{4,}-\d{2}-\d{2}'`
			`priority_regex = r'\([A-Z]\)'`
			`project_regex = r'\+\S+'`
			`context_regex = r'@\S+'`

			`# Compound regex expressions`
			`complete_one_date_regex = r'(x )(' + date_regex + r')'`
			`complete_two_date_regex = (complete_one_date_regex + r'( )(' +`
			`date_regex + r')')`
			`priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'`

			`tokens = {`
			`# Should parse starting at beginning of line; each line is a task`
			`'root': [`
			`# Complete task entry points: two total:`
			`# 1. Complete task with two dates`
			`(complete_two_date_regex, bygroups(CompleteTaskText, Date,`
			`CompleteTaskText, Date),`
			`'complete'),`
			`# 2. Complete task with one date`
			`(complete_one_date_regex, bygroups(CompleteTaskText, Date),`
			`'complete'),`

			`# Incomplete task entry points: six total:`
			`# 1. Priority plus date`
			`(priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),`
			`'incomplete'),`
			`# 2. Priority only`
			`(priority_regex, Priority, 'incomplete'),`
			`# 3. Leading date`
			`(date_regex, Date, 'incomplete'),`
			`# 4. Leading context`
			`(context_regex, Context, 'incomplete'),`
			`# 5. Leading project`
			`(project_regex, Project, 'incomplete'),`
			`# 6. Non-whitespace catch-all`
			`(r'\S+', IncompleteTaskText, 'incomplete'),`
			`],`

			`# Parse a complete task`
			`'complete': [`
			`# Newline indicates end of task, should return to root`
			`(r'\s*\n', CompleteTaskText, '#pop'),`
			`# Tokenize contexts and projects`
			`(context_regex, Context),`
			`(project_regex, Project),`
			`# Tokenize non-whitespace text`
			`(r'\S+', CompleteTaskText),`
			`# Tokenize whitespace not containing a newline`
			`(r'\s+', CompleteTaskText),`
			`],`

			`# Parse an incomplete task`
			`'incomplete': [`
			`# Newline indicates end of task, should return to root`
			`(r'\s*\n', IncompleteTaskText, '#pop'),`
			`# Tokenize contexts and projects`
			`(context_regex, Context),`
			`(project_regex, Project),`
			`# Tokenize non-whitespace text`
			`(r'\S+', IncompleteTaskText),`
			`# Tokenize whitespace not containing a newline`
			`(r'\s+', IncompleteTaskText),`
			`],`
			`}`


			`class NotmuchLexer(RegexLexer):`
			`"""`
			For `Notmuch <https://notmuchmail.org/>`_ email text format.

			`.. versionadded:: 2.5`

			`Additional options accepted:`

			`body_lexer`
			`If given, highlight the contents of the message body with the specified`
			lexer, else guess it according to the body content (default: ``None``).
			`"""`

			`name = 'Notmuch'`
			`aliases = ['notmuch']`

			`def _highlight_code(self, match):`
			`code = match.group(1)`

			`try:`
			`if self.body_lexer:`
			`lexer = get_lexer_by_name(self.body_lexer)`
			`else:`
			`lexer = guess_lexer(code.strip())`
			`except ClassNotFound:`
			`lexer = get_lexer_by_name('text')`

			`yield from lexer.get_tokens_unprocessed(code)`

			`tokens = {`
			`'root': [`
			`(r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),`
			`],`
			`'message-attr': [`
			`(r'(\sid:\s)(\S+)', bygroups(Name.Attribute, String)),`
			`(r'(\s(?:depth\|match\|excluded):\s)(\d+)',`
			`bygroups(Name.Attribute, Number.Integer)),`
			`(r'(\sfilename:\s)(.+\n)',`
			`bygroups(Name.Attribute, String)),`
			`default('#pop'),`
			`],`
			`'message': [`
			`(r'\fmessage\}\n', Keyword, '#pop'),`
			`(r'\fheader\{\n', Keyword, 'header'),`
			`(r'\fbody\{\n', Keyword, 'body'),`
			`],`
			`'header': [`
			`(r'\fheader\}\n', Keyword, '#pop'),`
			`(r'((?:Subject\|From\|To\|Cc\|Date):\s)(.\n)',`
			`bygroups(Name.Attribute, String)),`
			`(r'(.)(\s\(.\))(\s\(.*\)\n)',`
			`bygroups(Generic.Strong, Literal, Name.Tag)),`
			`],`
			`'body': [`
			`(r'\fpart\{\n', Keyword, 'part'),`
			`(r'\f(part\|attachment)\{\s*', Keyword, ('part', 'part-attr')),`
			`(r'\fbody\}\n', Keyword, '#pop'),`
			`],`
			`'part-attr': [`
			`(r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),`
			`(r'(,\s)((?:Filename\|Content-id):\s)([^,]+)',`
			`bygroups(Punctuation, Name.Attribute, String)),`
			`(r'(,\s)(Content-type:\s)(.+\n)',`
			`bygroups(Punctuation, Name.Attribute, String)),`
			`default('#pop'),`
			`],`
			`'part': [`
			`(r'\f(?:part\|attachment)\}\n', Keyword, '#pop'),`
			`(r'\f(?:part\|attachment)\{\s*', Keyword, ('#push', 'part-attr')),`
			`(r'^Non-text part: .*\n', Comment),`
			`(r'(?s)(.*?(?=\f(?:part\|attachment)\}\n))', _highlight_code),`
			`],`
			`}`

			`def analyse_text(text):`
			`return 1.0 if text.startswith('\fmessage{') else 0.0`

			`def __init__(self, **options):`
			`self.body_lexer = options.get('body_lexer', None)`
			`RegexLexer.__init__(self, **options)`


			`class KernelLogLexer(RegexLexer):`
			`"""`
			`For Linux Kernel log ("dmesg") output.`

			`.. versionadded:: 2.6`
			`"""`
			`name = 'Kernel log'`
			`aliases = ['kmsg', 'dmesg']`
			`filenames = ['.kmsg', '.dmesg']`

			`tokens = {`
			`'root': [`
			`(r'^[^:]+:debug : (?=\[)', Text, 'debug'),`
			`(r'^[^:]+:info : (?=\[)', Text, 'info'),`
			`(r'^[^:]+:warn : (?=\[)', Text, 'warn'),`
			`(r'^[^:]+:notice: (?=\[)', Text, 'warn'),`
			`(r'^[^:]+:err : (?=\[)', Text, 'error'),`
			`(r'^[^:]+:crit : (?=\[)', Text, 'error'),`
			`(r'^(?=\[)', Text, 'unknown'),`
			`],`
			`'unknown': [`
			`(r'^(?=.+(warning\|notice\|audit\|deprecated))', Text, 'warn'),`
			`(r'^(?=.+(error\|critical\|fail\|Bug))', Text, 'error'),`
			`default('info'),`
			`],`
			`'base': [`
			`(r'\[[0-9. ]+\] ', Number),`
			`(r'(?<=\] ).+?:', Keyword),`
			`(r'\n', Text, '#pop'),`
			`],`
			`'debug': [`
			`include('base'),`
			`(r'.+\n', Comment, '#pop')`
			`],`
			`'info': [`
			`include('base'),`
			`(r'.+\n', Text, '#pop')`
			`],`
			`'warn': [`
			`include('base'),`
			`(r'.+\n', Generic.Strong, '#pop')`
			`],`
			`'error': [`
			`include('base'),`
			`(r'.+\n', Generic.Error, '#pop')`
			`]`
			`}`