"""
pygments . lexers . textedit
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
Lexers for languages related to text processing .
: copyright : Copyright 2006 - 2022 by the Pygments team , see AUTHORS .
: license : BSD , see LICENSE for details .
"""
import re
from bisect import bisect
from pygments . lexer import RegexLexer , bygroups , default , include , this , using
from pygments . lexers . python import PythonLexer
from pygments . token import Comment , Error , Keyword , Name , Number , Operator , \
Punctuation , String , Text , Whitespace
__all__ = [ ' AwkLexer ' , ' SedLexer ' , ' VimLexer ' ]
class AwkLexer ( RegexLexer ) :
"""
For Awk scripts .
. . versionadded : : 1.5
"""
name = ' Awk '
aliases = [ ' awk ' , ' gawk ' , ' mawk ' , ' nawk ' ]
filenames = [ ' *.awk ' ]
mimetypes = [ ' application/x-awk ' ]
tokens = {
' commentsandwhitespace ' : [
( r ' \ s+ ' , Text ) ,
( r ' #.*$ ' , Comment . Single )
] ,
' slashstartsregex ' : [
include ( ' commentsandwhitespace ' ) ,
( r ' /( \\ .|[^[/ \\ \ n]| \ [( \\ .|[^ \ ] \\ \ n])*])+/ '
r ' \ B ' , String . Regex , ' #pop ' ) ,
( r ' (?=/) ' , Text , ( ' #pop ' , ' badregex ' ) ) ,
default ( ' #pop ' )
] ,
' badregex ' : [
( r ' \ n ' , Text , ' #pop ' )
] ,
' root ' : [
( r ' ^(?= \ s|/) ' , Text , ' slashstartsregex ' ) ,
include ( ' commentsandwhitespace ' ) ,
( r ' \ + \ +|--| \ | \ ||&&|in \ b| \ $|!?~| '
r ' ( \ * \ *|[-<>+* % \ ^/!=|])=? ' , Operator , ' slashstartsregex ' ) ,
( r ' [ { ( \ [;,] ' , Punctuation , ' slashstartsregex ' ) ,
( r ' [}) \ ].] ' , Punctuation ) ,
( r ' (break|continue|do|while|exit|for|if|else| '
r ' return) \ b ' , Keyword , ' slashstartsregex ' ) ,
( r ' function \ b ' , Keyword . Declaration , ' slashstartsregex ' ) ,
( r ' (atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index| '
r ' length|match|split|sprintf|sub|substr|tolower|toupper|close| '
r ' fflush|getline|next|nextfile|print|printf|strftime|systime| '
r ' delete|system) \ b ' , Keyword . Reserved ) ,
( r ' (ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS| '
r ' FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS| '
r ' RSTART|RT|SUBSEP) \ b ' , Name . Builtin ) ,
( r ' [$a-zA-Z_] \ w* ' , Name . Other ) ,
( r ' [0-9][0-9]* \ .[0-9]+([eE][0-9]+)?[fd]? ' , Number . Float ) ,
( r ' 0x[0-9a-fA-F]+ ' , Number . Hex ) ,
( r ' [0-9]+ ' , Number . Integer ) ,
( r ' " ( \\ \\ | \\ [^ \\ ]|[^ " \\ ])* " ' , String . Double ) ,
( r " ' ( \\ \\ | \\ [^ \\ ]|[^ ' \\ ])* ' " , String . Single ) ,
]
}
class SedLexer ( RegexLexer ) :
"""
Lexer for Sed script files .
"""
name = ' Sed '
aliases = [ ' sed ' , ' gsed ' , ' ssed ' ]
filenames = [ ' *.sed ' , ' *.[gs]sed ' ]
mimetypes = [ ' text/x-sed ' ]
flags = re . MULTILINE
# Match the contents within delimiters such as /<contents>/
_inside_delims = r ' ((?:(?: \\ [^ \ n]|[^ \\ ])*? \\ \ n)*?(?: \\ .|[^ \\ ])*?) '
tokens = {
' root ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' #.*$ ' , Comment . Single ) ,
( r ' [0-9]+ ' , Number . Integer ) ,
( r ' \ $ ' , Operator ) ,
( r ' [ {} ;,!] ' , Punctuation ) ,
( r ' [dDFgGhHlnNpPqQxz=] ' , Keyword ) ,
( r ' ([berRtTvwW:])([^; \ n]*) ' , bygroups ( Keyword , String . Single ) ) ,
( r ' ([aci])((?:.*? \\ \ n)*(?:.*?[^ \\ ]$)) ' , bygroups ( Keyword , String . Double ) ) ,
( r ' ([qQ])([0-9]*) ' , bygroups ( Keyword , Number . Integer ) ) ,
( r ' (/) ' + _inside_delims + r ' (/) ' , bygroups ( Punctuation , String . Regex , Punctuation ) ) ,
( r ' ( \\ (.)) ' + _inside_delims + r ' ( \ 2) ' ,
bygroups ( Punctuation , None , String . Regex , Punctuation ) ) ,
( r ' (y)(.) ' + _inside_delims + r ' ( \ 2) ' + _inside_delims + r ' ( \ 2) ' ,
bygroups ( Keyword , Punctuation , String . Single , Punctuation , String . Single , Punctuation ) ) ,
( r ' (s)(.) ' + _inside_delims + r ' ( \ 2) ' + _inside_delims + r ' ( \ 2)((?:[gpeIiMm]|[0-9])*) ' ,
bygroups ( Keyword , Punctuation , String . Regex , Punctuation , String . Single , Punctuation ,
Keyword ) )
]
}
class VimLexer ( RegexLexer ) :
"""
Lexer for VimL script files .
. . versionadded : : 0.8
"""
name = ' VimL '
aliases = [ ' vim ' ]
filenames = [ ' *.vim ' , ' .vimrc ' , ' .exrc ' , ' .gvimrc ' ,
' _vimrc ' , ' _exrc ' , ' _gvimrc ' , ' vimrc ' , ' gvimrc ' ]
mimetypes = [ ' text/x-vim ' ]
flags = re . MULTILINE
_python = r ' py(?:t(?:h(?:o(?:n)?)?)?)? '
tokens = {
' root ' : [
( r ' ^([ \ t:]*)( ' + _python + r ' )([ \ t]*)(<<)([ \ t]*)(.*)((?: \ n|.)*)( \ 6) ' ,
bygroups ( using ( this ) , Keyword , Text , Operator , Text , Text ,
using ( PythonLexer ) , Text ) ) ,
( r ' ^([ \ t:]*)( ' + _python + r ' )([ \ t])(.*) ' ,
bygroups ( using ( this ) , Keyword , Text , using ( PythonLexer ) ) ) ,
( r ' ^ \ s* " .* ' , Comment ) ,
( r ' [ \ t]+ ' , Text ) ,
# TODO: regexes can have other delims
( r ' /[^/ \\ \ n]*(?: \\ [ \ s \ S][^/ \\ \ n]*)*/ ' , String . Regex ) ,
( r ' " [^ " \\ \ n]*(?: \\ [ \ s \ S][^ " \\ \ n]*)* " ' , String . Double ) ,
( r " ' [^ \ n ' ]*(?: ' ' [^ \ n ' ]*)* ' " , String . Single ) ,
# Who decided that doublequote was a good comment character??
( r ' (?<= \ s) " [^ \ -:. % #=*].* ' , Comment ) ,
( r ' -? \ d+ ' , Number ) ,
( r ' #[0-9a-f] {6} ' , Number . Hex ) ,
( r ' ^: ' , Punctuation ) ,
( r ' [()<>+=!|,~-] ' , Punctuation ) , # Inexact list. Looks decent.
( r ' \ b(let|if|else|endif|elseif|fun|function|endfunction) \ b ' ,
Keyword ) ,
( r ' \ b(NONE|bold|italic|underline|dark|light) \ b ' , Name . Builtin ) ,
( r ' \ b \ w+ \ b ' , Name . Other ) , # These are postprocessed below
( r ' . ' , Text ) ,
] ,
}
def __init__ ( self , * * options ) :
from pygments . lexers . _vim_builtins import auto , command , option
self . _cmd = command
self . _opt = option
self . _aut = auto
RegexLexer . __init__ ( self , * * options )
def is_in ( self , w , mapping ) :
r """
It ' s kind of difficult to decide if something might be a keyword
in VimL because it allows you to abbreviate them . In fact ,
' ab[breviate] ' is a good example . : ab , : abbre , or : abbreviate are
valid ways to call it so rather than making really awful regexps
like : :
\bab ( ? : b ( ? : r ( ? : e ( ? : v ( ? : i ( ? : a ( ? : t ( ? : e ) ? ) ? ) ? ) ? ) ? ) ? ) ? ) ? \b
we match ` \b \w + \b ` and then call is_in ( ) on those tokens . See
` scripts / get_vimkw . py ` for how the lists are extracted .
"""
p = bisect ( mapping , ( w , ) )
if p > 0 :
if mapping [ p - 1 ] [ 0 ] == w [ : len ( mapping [ p - 1 ] [ 0 ] ) ] and \
mapping [ p - 1 ] [ 1 ] [ : len ( w ) ] == w :
return True
if p < len ( mapping ) :
return mapping [ p ] [ 0 ] == w [ : len ( mapping [ p ] [ 0 ] ) ] and \
mapping [ p ] [ 1 ] [ : len ( w ) ] == w
return False
def get_tokens_unprocessed ( self , text ) :
# TODO: builtins are only subsequent tokens on lines
# and 'keywords' only happen at the beginning except
# for :au ones
for index , token , value in \
RegexLexer . get_tokens_unprocessed ( self , text ) :
if token is Name . Other :
if self . is_in ( value , self . _cmd ) :
yield index , Keyword , value
elif self . is_in ( value , self . _opt ) or \
self . is_in ( value , self . _aut ) :
yield index , Name . Builtin , value
else :
yield index , Text , value
else :
yield index , token , value