"""
pygments . lexers . haskell
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
Lexers for Haskell and related languages .
: copyright : Copyright 2006 - 2023 by the Pygments team , see AUTHORS .
: license : BSD , see LICENSE for details .
"""
import re
from pygments . lexer import Lexer , RegexLexer , bygroups , do_insertions , \
default , include , inherit , line_re
from pygments . token import Text , Comment , Operator , Keyword , Name , String , \
Number , Punctuation , Generic , Whitespace
from pygments import unistring as uni
__all__ = [ ' HaskellLexer ' , ' HspecLexer ' , ' IdrisLexer ' , ' AgdaLexer ' , ' CryptolLexer ' ,
' LiterateHaskellLexer ' , ' LiterateIdrisLexer ' , ' LiterateAgdaLexer ' ,
' LiterateCryptolLexer ' , ' KokaLexer ' ]
class HaskellLexer ( RegexLexer ) :
"""
A Haskell lexer based on the lexemes defined in the Haskell 98 Report .
. . versionadded : : 0.8
"""
name = ' Haskell '
url = ' https://www.haskell.org/ '
aliases = [ ' haskell ' , ' hs ' ]
filenames = [ ' *.hs ' ]
mimetypes = [ ' text/x-haskell ' ]
reserved = ( ' case ' , ' class ' , ' data ' , ' default ' , ' deriving ' , ' do ' , ' else ' ,
' family ' , ' if ' , ' in ' , ' infix[lr]? ' , ' instance ' ,
' let ' , ' newtype ' , ' of ' , ' then ' , ' type ' , ' where ' , ' _ ' )
ascii = ( ' NUL ' , ' SOH ' , ' [SE]TX ' , ' EOT ' , ' ENQ ' , ' ACK ' ,
' BEL ' , ' BS ' , ' HT ' , ' LF ' , ' VT ' , ' FF ' , ' CR ' , ' S[OI] ' , ' DLE ' ,
' DC[1-4] ' , ' NAK ' , ' SYN ' , ' ETB ' , ' CAN ' ,
' EM ' , ' SUB ' , ' ESC ' , ' [FGRU]S ' , ' SP ' , ' DEL ' )
tokens = {
' root ' : [
# Whitespace:
( r ' \ s+ ' , Whitespace ) ,
# (r'--\s*|.*$', Comment.Doc),
( r ' --(?![!#$ % &*+./<=>?@^|_~: \\ ]).*?$ ' , Comment . Single ) ,
( r ' \ { - ' , Comment . Multiline , ' comment ' ) ,
# Lexemes:
# Identifiers
( r ' \ bimport \ b ' , Keyword . Reserved , ' import ' ) ,
( r ' \ bmodule \ b ' , Keyword . Reserved , ' module ' ) ,
( r ' \ berror \ b ' , Name . Exception ) ,
( r ' \ b( %s )(?! \' ) \ b ' % ' | ' . join ( reserved ) , Keyword . Reserved ) ,
( r " ' [^ \\ ] ' " , String . Char ) , # this has to come before the TH quote
( r ' ^[_ ' + uni . Ll + r ' ][ \ w \' ]* ' , Name . Function ) ,
( r " ' ?[_ " + uni . Ll + r " ][ \ w ' ]* " , Name ) ,
( r " ( ' ' )?[ " + uni . Lu + r " ][ \ w \ ' ]* " , Keyword . Type ) ,
( r " ( ' )[ " + uni . Lu + r " ][ \ w \ ' ]* " , Keyword . Type ) ,
( r " ( ' ) \ [[^ \ ]]* \ ] " , Keyword . Type ) , # tuples and lists get special treatment in GHC
( r " ( ' ) \ ([^)]* \ ) " , Keyword . Type ) , # ..
( r " ( ' )[:!#$ % &*+. \\ /<=>?@^|~-]+ " , Keyword . Type ) , # promoted type operators
# Operators
( r ' \\ (?![:!#$ % &*+. \\ /<=>?@^|~-]+) ' , Name . Function ) , # lambda operator
( r ' (<-|::|->|=>|=)(?![:!#$ % &*+. \\ /<=>?@^|~-]+) ' , Operator . Word ) , # specials
( r ' :[:!#$ % &*+. \\ /<=>?@^|~-]* ' , Keyword . Type ) , # Constructor operators
( r ' [:!#$ % &*+. \\ /<=>?@^|~-]+ ' , Operator ) , # Other operators
# Numbers
( r ' 0[xX]_*[ \ da-fA-F](_*[ \ da-fA-F])*_*[pP][+-]? \ d(_* \ d)* ' , Number . Float ) ,
( r ' 0[xX]_*[ \ da-fA-F](_*[ \ da-fA-F])* \ .[ \ da-fA-F](_*[ \ da-fA-F])* '
r ' (_*[pP][+-]? \ d(_* \ d)*)? ' , Number . Float ) ,
( r ' \ d(_* \ d)*_*[eE][+-]? \ d(_* \ d)* ' , Number . Float ) ,
( r ' \ d(_* \ d)* \ . \ d(_* \ d)*(_*[eE][+-]? \ d(_* \ d)*)? ' , Number . Float ) ,
( r ' 0[bB]_*[01](_*[01])* ' , Number . Bin ) ,
( r ' 0[oO]_*[0-7](_*[0-7])* ' , Number . Oct ) ,
( r ' 0[xX]_*[ \ da-fA-F](_*[ \ da-fA-F])* ' , Number . Hex ) ,
( r ' \ d(_* \ d)* ' , Number . Integer ) ,
# Character/String Literals
( r " ' " , String . Char , ' character ' ) ,
( r ' " ' , String , ' string ' ) ,
# Special
( r ' \ [ \ ] ' , Keyword . Type ) ,
( r ' \ ( \ ) ' , Name . Builtin ) ,
( r ' [][(),;` {} ] ' , Punctuation ) ,
] ,
' import ' : [
# Import statements
( r ' \ s+ ' , Whitespace ) ,
( r ' " ' , String , ' string ' ) ,
# after "funclist" state
( r ' \ ) ' , Punctuation , ' #pop ' ) ,
( r ' qualified \ b ' , Keyword ) ,
# import X as Y
( r ' ([ ' + uni . Lu + r ' ][ \ w.]*)( \ s+)(as)( \ s+)([ ' + uni . Lu + r ' ][ \ w.]*) ' ,
bygroups ( Name . Namespace , Whitespace , Keyword , Whitespace , Name ) , ' #pop ' ) ,
# import X hiding (functions)
( r ' ([ ' + uni . Lu + r ' ][ \ w.]*)( \ s+)(hiding)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Keyword , Whitespace , Punctuation ) , ' funclist ' ) ,
# import X (functions)
( r ' ([ ' + uni . Lu + r ' ][ \ w.]*)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Punctuation ) , ' funclist ' ) ,
# import X
( r ' [ \ w.]+ ' , Name . Namespace , ' #pop ' ) ,
] ,
' module ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' ([ ' + uni . Lu + r ' ][ \ w.]*)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Punctuation ) , ' funclist ' ) ,
( r ' [ ' + uni . Lu + r ' ][ \ w.]* ' , Name . Namespace , ' #pop ' ) ,
] ,
' funclist ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' [ ' + uni . Lu + r ' ] \ w* ' , Keyword . Type ) ,
( r ' (_[ \ w \' ]+|[ ' + uni . Ll + r ' ][ \ w \' ]*) ' , Name . Function ) ,
( r ' --(?![!#$ % &*+./<=>?@^|_~: \\ ]).*?$ ' , Comment . Single ) ,
( r ' \ { - ' , Comment . Multiline , ' comment ' ) ,
( r ' , ' , Punctuation ) ,
( r ' [:!#$ % &*+. \\ /<=>?@^|~-]+ ' , Operator ) ,
# (HACK, but it makes sense to push two instances, believe me)
( r ' \ ( ' , Punctuation , ( ' funclist ' , ' funclist ' ) ) ,
( r ' \ ) ' , Punctuation , ' #pop:2 ' ) ,
] ,
# NOTE: the next four states are shared in the AgdaLexer; make sure
# any change is compatible with Agda as well or copy over and change
' comment ' : [
# Multiline Comments
( r ' [^- {} ]+ ' , Comment . Multiline ) ,
( r ' \ { - ' , Comment . Multiline , ' #push ' ) ,
( r ' - \ } ' , Comment . Multiline , ' #pop ' ) ,
( r ' [- {} ] ' , Comment . Multiline ) ,
] ,
' character ' : [
# Allows multi-chars, incorrectly.
( r " [^ \\ ' ] ' " , String . Char , ' #pop ' ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( " ' " , String . Char , ' #pop ' ) ,
] ,
' string ' : [
( r ' [^ \\ " ]+ ' , String ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( ' " ' , String , ' #pop ' ) ,
] ,
' escape ' : [
( r ' [abfnrtv " \' & \\ ] ' , String . Escape , ' #pop ' ) ,
( r ' \ ^[][ ' + uni . Lu + r ' @^_] ' , String . Escape , ' #pop ' ) ,
( ' | ' . join ( ascii ) , String . Escape , ' #pop ' ) ,
( r ' o[0-7]+ ' , String . Escape , ' #pop ' ) ,
( r ' x[ \ da-fA-F]+ ' , String . Escape , ' #pop ' ) ,
( r ' \ d+ ' , String . Escape , ' #pop ' ) ,
( r ' ( \ s+)( \\ ) ' , bygroups ( Whitespace , String . Escape ) , ' #pop ' ) ,
] ,
}
class HspecLexer ( HaskellLexer ) :
"""
A Haskell lexer with support for Hspec constructs .
. . versionadded : : 2.4 .0
"""
name = ' Hspec '
aliases = [ ' hspec ' ]
filenames = [ ' *Spec.hs ' ]
mimetypes = [ ]
tokens = {
' root ' : [
( r ' (it)( \ s*)( " [^ " ]* " ) ' , bygroups ( Text , Whitespace , String . Doc ) ) ,
( r ' (describe)( \ s*)( " [^ " ]* " ) ' , bygroups ( Text , Whitespace , String . Doc ) ) ,
( r ' (context)( \ s*)( " [^ " ]* " ) ' , bygroups ( Text , Whitespace , String . Doc ) ) ,
inherit ,
] ,
}
class IdrisLexer ( RegexLexer ) :
"""
A lexer for the dependently typed programming language Idris .
Based on the Haskell and Agda Lexer .
. . versionadded : : 2.0
"""
name = ' Idris '
url = ' https://www.idris-lang.org/ '
aliases = [ ' idris ' , ' idr ' ]
filenames = [ ' *.idr ' ]
mimetypes = [ ' text/x-idris ' ]
reserved = ( ' case ' , ' class ' , ' data ' , ' default ' , ' using ' , ' do ' , ' else ' ,
' if ' , ' in ' , ' infix[lr]? ' , ' instance ' , ' rewrite ' , ' auto ' ,
' namespace ' , ' codata ' , ' mutual ' , ' private ' , ' public ' , ' abstract ' ,
' total ' , ' partial ' ,
' interface ' , ' implementation ' , ' export ' , ' covering ' , ' constructor ' ,
' let ' , ' proof ' , ' of ' , ' then ' , ' static ' , ' where ' , ' _ ' , ' with ' ,
' pattern ' , ' term ' , ' syntax ' , ' prefix ' ,
' postulate ' , ' parameters ' , ' record ' , ' dsl ' , ' impossible ' , ' implicit ' ,
' tactics ' , ' intros ' , ' intro ' , ' compute ' , ' refine ' , ' exact ' , ' trivial ' )
ascii = ( ' NUL ' , ' SOH ' , ' [SE]TX ' , ' EOT ' , ' ENQ ' , ' ACK ' ,
' BEL ' , ' BS ' , ' HT ' , ' LF ' , ' VT ' , ' FF ' , ' CR ' , ' S[OI] ' , ' DLE ' ,
' DC[1-4] ' , ' NAK ' , ' SYN ' , ' ETB ' , ' CAN ' ,
' EM ' , ' SUB ' , ' ESC ' , ' [FGRU]S ' , ' SP ' , ' DEL ' )
directives = ( ' lib ' , ' link ' , ' flag ' , ' include ' , ' hide ' , ' freeze ' , ' access ' ,
' default ' , ' logging ' , ' dynamic ' , ' name ' , ' error_handlers ' , ' language ' )
tokens = {
' root ' : [
# Comments
( r ' ^( \ s*)( %% ( %s )) ' % ' | ' . join ( directives ) ,
bygroups ( Whitespace , Keyword . Reserved ) ) ,
( r ' ( \ s*)(--(?![!#$ % &*+./<=>?@^|_~: \\ ]).*?)$ ' , bygroups ( Whitespace , Comment . Single ) ) ,
( r ' ( \ s*)( \ | {3} .*?)$ ' , bygroups ( Whitespace , Comment . Single ) ) ,
( r ' ( \ s*)( \ { -) ' , bygroups ( Whitespace , Comment . Multiline ) , ' comment ' ) ,
# Declaration
( r ' ^( \ s*)([^ \ s() {} ]+)( \ s*)(:)( \ s*) ' ,
bygroups ( Whitespace , Name . Function , Whitespace , Operator . Word , Whitespace ) ) ,
# Identifiers
( r ' \ b( %s )(?! \' ) \ b ' % ' | ' . join ( reserved ) , Keyword . Reserved ) ,
( r ' (import|module)( \ s+) ' , bygroups ( Keyword . Reserved , Whitespace ) , ' module ' ) ,
( r " ( ' ' )?[A-Z][ \ w \ ' ]* " , Keyword . Type ) ,
( r ' [a-z][ \ w \' ]* ' , Text ) ,
# Special Symbols
( r ' (<-|::|->|=>|=) ' , Operator . Word ) , # specials
( r ' ([() {} \ [ \ ]:!#$ % &*+. \\ /<=>?@^|~-]+) ' , Operator . Word ) , # specials
# Numbers
( r ' \ d+[eE][+-]? \ d+ ' , Number . Float ) ,
( r ' \ d+ \ . \ d+([eE][+-]? \ d+)? ' , Number . Float ) ,
( r ' 0[xX][ \ da-fA-F]+ ' , Number . Hex ) ,
( r ' \ d+ ' , Number . Integer ) ,
# Strings
( r " ' " , String . Char , ' character ' ) ,
( r ' " ' , String , ' string ' ) ,
( r ' [^ \ s() {} ]+ ' , Text ) ,
( r ' \ s+? ' , Whitespace ) , # Whitespace
] ,
' module ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' ([A-Z][ \ w.]*)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Punctuation ) , ' funclist ' ) ,
( r ' [A-Z][ \ w.]* ' , Name . Namespace , ' #pop ' ) ,
] ,
' funclist ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' [A-Z] \ w* ' , Keyword . Type ) ,
( r ' (_[ \ w \' ]+|[a-z][ \ w \' ]*) ' , Name . Function ) ,
( r ' --.*$ ' , Comment . Single ) ,
( r ' \ { - ' , Comment . Multiline , ' comment ' ) ,
( r ' , ' , Punctuation ) ,
( r ' [:!#$ % &*+. \\ /<=>?@^|~-]+ ' , Operator ) ,
# (HACK, but it makes sense to push two instances, believe me)
( r ' \ ( ' , Punctuation , ( ' funclist ' , ' funclist ' ) ) ,
( r ' \ ) ' , Punctuation , ' #pop:2 ' ) ,
] ,
# NOTE: the next four states are shared in the AgdaLexer; make sure
# any change is compatible with Agda as well or copy over and change
' comment ' : [
# Multiline Comments
( r ' [^- {} ]+ ' , Comment . Multiline ) ,
( r ' \ { - ' , Comment . Multiline , ' #push ' ) ,
( r ' - \ } ' , Comment . Multiline , ' #pop ' ) ,
( r ' [- {} ] ' , Comment . Multiline ) ,
] ,
' character ' : [
# Allows multi-chars, incorrectly.
( r " [^ \\ ' ] " , String . Char ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( " ' " , String . Char , ' #pop ' ) ,
] ,
' string ' : [
( r ' [^ \\ " ]+ ' , String ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( ' " ' , String , ' #pop ' ) ,
] ,
' escape ' : [
( r ' [abfnrtv " \' & \\ ] ' , String . Escape , ' #pop ' ) ,
( r ' \ ^[][A-Z@^_] ' , String . Escape , ' #pop ' ) ,
( ' | ' . join ( ascii ) , String . Escape , ' #pop ' ) ,
( r ' o[0-7]+ ' , String . Escape , ' #pop ' ) ,
( r ' x[ \ da-fA-F]+ ' , String . Escape , ' #pop ' ) ,
( r ' \ d+ ' , String . Escape , ' #pop ' ) ,
( r ' ( \ s+)( \\ ) ' , bygroups ( Whitespace , String . Escape ) , ' #pop ' )
] ,
}
class AgdaLexer ( RegexLexer ) :
"""
For the Agda dependently typed functional programming language and
proof assistant .
. . versionadded : : 2.0
"""
name = ' Agda '
url = ' http://wiki.portal.chalmers.se/agda/pmwiki.php '
aliases = [ ' agda ' ]
filenames = [ ' *.agda ' ]
mimetypes = [ ' text/x-agda ' ]
reserved = (
' abstract ' , ' codata ' , ' coinductive ' , ' constructor ' , ' data ' , ' do ' ,
' eta-equality ' , ' field ' , ' forall ' , ' hiding ' , ' in ' , ' inductive ' , ' infix ' ,
' infixl ' , ' infixr ' , ' instance ' , ' interleaved ' , ' let ' , ' macro ' , ' mutual ' ,
' no-eta-equality ' , ' open ' , ' overlap ' , ' pattern ' , ' postulate ' , ' primitive ' ,
' private ' , ' quote ' , ' quoteTerm ' , ' record ' , ' renaming ' , ' rewrite ' ,
' syntax ' , ' tactic ' , ' unquote ' , ' unquoteDecl ' , ' unquoteDef ' , ' using ' ,
' variable ' , ' where ' , ' with ' ,
)
tokens = {
' root ' : [
# Declaration
( r ' ^( \ s*)([^ \ s() {} ]+)( \ s*)(:)( \ s*) ' ,
bygroups ( Whitespace , Name . Function , Whitespace ,
Operator . Word , Whitespace ) ) ,
# Comments
( r ' --(?![!#$ % &*+./<=>?@^|_~: \\ ]).*?$ ' , Comment . Single ) ,
( r ' \ { - ' , Comment . Multiline , ' comment ' ) ,
# Holes
( r ' \ { ! ' , Comment . Directive , ' hole ' ) ,
# Lexemes:
# Identifiers
( r ' \ b( %s )(?! \' ) \ b ' % ' | ' . join ( reserved ) , Keyword . Reserved ) ,
( r ' (import|module)( \ s+) ' , bygroups ( Keyword . Reserved , Whitespace ) ,
' module ' ) ,
( r ' \ b(Set|Prop)[ \ u2080- \ u2089]* \ b ' , Keyword . Type ) ,
# Special Symbols
( r ' ( \ (| \ )| \ { | \ }) ' , Operator ) ,
( r ' ( \ . { 1,3}| \ || \ u03BB| \ u2200| \ u2192|:|=|->) ' , Operator . Word ) ,
# Numbers
( r ' \ d+[eE][+-]? \ d+ ' , Number . Float ) ,
( r ' \ d+ \ . \ d+([eE][+-]? \ d+)? ' , Number . Float ) ,
( r ' 0[xX][ \ da-fA-F]+ ' , Number . Hex ) ,
( r ' \ d+ ' , Number . Integer ) ,
# Strings
( r " ' " , String . Char , ' character ' ) ,
( r ' " ' , String , ' string ' ) ,
( r ' [^ \ s() {} ]+ ' , Text ) ,
( r ' \ s+? ' , Whitespace ) , # Whitespace
] ,
' hole ' : [
# Holes
( r ' [^! {} ]+ ' , Comment . Directive ) ,
( r ' \ { ! ' , Comment . Directive , ' #push ' ) ,
( r ' ! \ } ' , Comment . Directive , ' #pop ' ) ,
( r ' [! {} ] ' , Comment . Directive ) ,
] ,
' module ' : [
( r ' \ { - ' , Comment . Multiline , ' comment ' ) ,
( r ' [a-zA-Z][ \ w. \' ]* ' , Name , ' #pop ' ) ,
( r ' [ \ W0-9_]+ ' , Text )
] ,
' comment ' : HaskellLexer . tokens [ ' comment ' ] ,
' character ' : HaskellLexer . tokens [ ' character ' ] ,
' string ' : HaskellLexer . tokens [ ' string ' ] ,
' escape ' : HaskellLexer . tokens [ ' escape ' ]
}
class CryptolLexer ( RegexLexer ) :
"""
FIXME : A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report .
. . versionadded : : 2.0
"""
name = ' Cryptol '
aliases = [ ' cryptol ' , ' cry ' ]
filenames = [ ' *.cry ' ]
mimetypes = [ ' text/x-cryptol ' ]
reserved = ( ' Arith ' , ' Bit ' , ' Cmp ' , ' False ' , ' Inf ' , ' True ' , ' else ' ,
' export ' , ' extern ' , ' fin ' , ' if ' , ' import ' , ' inf ' , ' lg2 ' ,
' max ' , ' min ' , ' module ' , ' newtype ' , ' pragma ' , ' property ' ,
' then ' , ' type ' , ' where ' , ' width ' )
ascii = ( ' NUL ' , ' SOH ' , ' [SE]TX ' , ' EOT ' , ' ENQ ' , ' ACK ' ,
' BEL ' , ' BS ' , ' HT ' , ' LF ' , ' VT ' , ' FF ' , ' CR ' , ' S[OI] ' , ' DLE ' ,
' DC[1-4] ' , ' NAK ' , ' SYN ' , ' ETB ' , ' CAN ' ,
' EM ' , ' SUB ' , ' ESC ' , ' [FGRU]S ' , ' SP ' , ' DEL ' )
tokens = {
' root ' : [
# Whitespace:
( r ' \ s+ ' , Whitespace ) ,
# (r'--\s*|.*$', Comment.Doc),
( r ' //.*$ ' , Comment . Single ) ,
( r ' / \ * ' , Comment . Multiline , ' comment ' ) ,
# Lexemes:
# Identifiers
( r ' \ bimport \ b ' , Keyword . Reserved , ' import ' ) ,
( r ' \ bmodule \ b ' , Keyword . Reserved , ' module ' ) ,
( r ' \ berror \ b ' , Name . Exception ) ,
( r ' \ b( %s )(?! \' ) \ b ' % ' | ' . join ( reserved ) , Keyword . Reserved ) ,
( r ' ^[_a-z][ \ w \' ]* ' , Name . Function ) ,
( r " ' ?[_a-z][ \ w ' ]* " , Name ) ,
( r " ( ' ' )?[A-Z][ \ w \ ' ]* " , Keyword . Type ) ,
# Operators
( r ' \\ (?![:!#$ % &*+. \\ /<=>?@^|~-]+) ' , Name . Function ) , # lambda operator
( r ' (<-|::|->|=>|=)(?![:!#$ % &*+. \\ /<=>?@^|~-]+) ' , Operator . Word ) , # specials
( r ' :[:!#$ % &*+. \\ /<=>?@^|~-]* ' , Keyword . Type ) , # Constructor operators
( r ' [:!#$ % &*+. \\ /<=>?@^|~-]+ ' , Operator ) , # Other operators
# Numbers
( r ' \ d+[eE][+-]? \ d+ ' , Number . Float ) ,
( r ' \ d+ \ . \ d+([eE][+-]? \ d+)? ' , Number . Float ) ,
( r ' 0[oO][0-7]+ ' , Number . Oct ) ,
( r ' 0[xX][ \ da-fA-F]+ ' , Number . Hex ) ,
( r ' \ d+ ' , Number . Integer ) ,
# Character/String Literals
( r " ' " , String . Char , ' character ' ) ,
( r ' " ' , String , ' string ' ) ,
# Special
( r ' \ [ \ ] ' , Keyword . Type ) ,
( r ' \ ( \ ) ' , Name . Builtin ) ,
( r ' [][(),;` {} ] ' , Punctuation ) ,
] ,
' import ' : [
# Import statements
( r ' \ s+ ' , Whitespace ) ,
( r ' " ' , String , ' string ' ) ,
# after "funclist" state
( r ' \ ) ' , Punctuation , ' #pop ' ) ,
( r ' qualified \ b ' , Keyword ) ,
# import X as Y
( r ' ([A-Z][ \ w.]*)( \ s+)(as)( \ s+)([A-Z][ \ w.]*) ' ,
bygroups ( Name . Namespace , Whitespace , Keyword , Whitespace , Name ) , ' #pop ' ) ,
# import X hiding (functions)
( r ' ([A-Z][ \ w.]*)( \ s+)(hiding)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Keyword , Whitespace , Punctuation ) , ' funclist ' ) ,
# import X (functions)
( r ' ([A-Z][ \ w.]*)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Punctuation ) , ' funclist ' ) ,
# import X
( r ' [ \ w.]+ ' , Name . Namespace , ' #pop ' ) ,
] ,
' module ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' ([A-Z][ \ w.]*)( \ s+)( \ () ' ,
bygroups ( Name . Namespace , Whitespace , Punctuation ) , ' funclist ' ) ,
( r ' [A-Z][ \ w.]* ' , Name . Namespace , ' #pop ' ) ,
] ,
' funclist ' : [
( r ' \ s+ ' , Whitespace ) ,
( r ' [A-Z] \ w* ' , Keyword . Type ) ,
( r ' (_[ \ w \' ]+|[a-z][ \ w \' ]*) ' , Name . Function ) ,
# TODO: these don't match the comments in docs, remove.
# (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
# (r'{-', Comment.Multiline, 'comment'),
( r ' , ' , Punctuation ) ,
( r ' [:!#$ % &*+. \\ /<=>?@^|~-]+ ' , Operator ) ,
# (HACK, but it makes sense to push two instances, believe me)
( r ' \ ( ' , Punctuation , ( ' funclist ' , ' funclist ' ) ) ,
( r ' \ ) ' , Punctuation , ' #pop:2 ' ) ,
] ,
' comment ' : [
# Multiline Comments
( r ' [^/*]+ ' , Comment . Multiline ) ,
( r ' / \ * ' , Comment . Multiline , ' #push ' ) ,
( r ' \ */ ' , Comment . Multiline , ' #pop ' ) ,
( r ' [*/] ' , Comment . Multiline ) ,
] ,
' character ' : [
# Allows multi-chars, incorrectly.
( r " [^ \\ ' ] ' " , String . Char , ' #pop ' ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( " ' " , String . Char , ' #pop ' ) ,
] ,
' string ' : [
( r ' [^ \\ " ]+ ' , String ) ,
( r " \\ " , String . Escape , ' escape ' ) ,
( ' " ' , String , ' #pop ' ) ,
] ,
' escape ' : [
( r ' [abfnrtv " \' & \\ ] ' , String . Escape , ' #pop ' ) ,
( r ' \ ^[][A-Z@^_] ' , String . Escape , ' #pop ' ) ,
( ' | ' . join ( ascii ) , String . Escape , ' #pop ' ) ,
( r ' o[0-7]+ ' , String . Escape , ' #pop ' ) ,
( r ' x[ \ da-fA-F]+ ' , String . Escape , ' #pop ' ) ,
( r ' \ d+ ' , String . Escape , ' #pop ' ) ,
( r ' ( \ s+)( \\ ) ' , bygroups ( Whitespace , String . Escape ) , ' #pop ' ) ,
] ,
}
EXTRA_KEYWORDS = { ' join ' , ' split ' , ' reverse ' , ' transpose ' , ' width ' ,
' length ' , ' tail ' , ' << ' , ' >> ' , ' <<< ' , ' >>> ' , ' const ' ,
' reg ' , ' par ' , ' seq ' , ' ASSERT ' , ' undefined ' , ' error ' ,
' trace ' }
def get_tokens_unprocessed ( self , text ) :
stack = [ ' root ' ]
for index , token , value in \
RegexLexer . get_tokens_unprocessed ( self , text , stack ) :
if token is Name and value in self . EXTRA_KEYWORDS :
yield index , Name . Builtin , value
else :
yield index , token , value
class LiterateLexer ( Lexer ) :
"""
Base class for lexers of literate file formats based on LaTeX or Bird - style
( prefixing each code line with " > " ) .
Additional options accepted :
` litstyle `
If given , must be ` ` " bird " ` ` or ` ` " latex " ` ` . If not given , the style
is autodetected : if the first non - whitespace character in the source
is a backslash or percent character , LaTeX is assumed , else Bird .
"""
bird_re = re . compile ( r ' (>[ \ t]*)(.* \ n) ' )
def __init__ ( self , baselexer , * * options ) :
self . baselexer = baselexer
Lexer . __init__ ( self , * * options )
def get_tokens_unprocessed ( self , text ) :
style = self . options . get ( ' litstyle ' )
if style is None :
style = ( text . lstrip ( ) [ 0 : 1 ] in ' % \\ ' ) and ' latex ' or ' bird '
code = ' '
insertions = [ ]
if style == ' bird ' :
# bird-style
for match in line_re . finditer ( text ) :
line = match . group ( )
m = self . bird_re . match ( line )
if m :
insertions . append ( ( len ( code ) ,
[ ( 0 , Comment . Special , m . group ( 1 ) ) ] ) )
code + = m . group ( 2 )
else :
insertions . append ( ( len ( code ) , [ ( 0 , Text , line ) ] ) )
else :
# latex-style
from pygments . lexers . markup import TexLexer
lxlexer = TexLexer ( * * self . options )
codelines = 0
latex = ' '
for match in line_re . finditer ( text ) :
line = match . group ( )
if codelines :
if line . lstrip ( ) . startswith ( ' \\ end {code} ' ) :
codelines = 0
latex + = line
else :
code + = line
elif line . lstrip ( ) . startswith ( ' \\ begin {code} ' ) :
codelines = 1
latex + = line
insertions . append ( ( len ( code ) ,
list ( lxlexer . get_tokens_unprocessed ( latex ) ) ) )
latex = ' '
else :
latex + = line
insertions . append ( ( len ( code ) ,
list ( lxlexer . get_tokens_unprocessed ( latex ) ) ) )
yield from do_insertions ( insertions , self . baselexer . get_tokens_unprocessed ( code ) )
class LiterateHaskellLexer ( LiterateLexer ) :
"""
For Literate Haskell ( Bird - style or LaTeX ) source .
Additional options accepted :
` litstyle `
If given , must be ` ` " bird " ` ` or ` ` " latex " ` ` . If not given , the style
is autodetected : if the first non - whitespace character in the source
is a backslash or percent character , LaTeX is assumed , else Bird .
. . versionadded : : 0.9
"""
name = ' Literate Haskell '
aliases = [ ' literate-haskell ' , ' lhaskell ' , ' lhs ' ]
filenames = [ ' *.lhs ' ]
mimetypes = [ ' text/x-literate-haskell ' ]
def __init__ ( self , * * options ) :
hslexer = HaskellLexer ( * * options )
LiterateLexer . __init__ ( self , hslexer , * * options )
class LiterateIdrisLexer ( LiterateLexer ) :
"""
For Literate Idris ( Bird - style or LaTeX ) source .
Additional options accepted :
` litstyle `
If given , must be ` ` " bird " ` ` or ` ` " latex " ` ` . If not given , the style
is autodetected : if the first non - whitespace character in the source
is a backslash or percent character , LaTeX is assumed , else Bird .
. . versionadded : : 2.0
"""
name = ' Literate Idris '
aliases = [ ' literate-idris ' , ' lidris ' , ' lidr ' ]
filenames = [ ' *.lidr ' ]
mimetypes = [ ' text/x-literate-idris ' ]
def __init__ ( self , * * options ) :
hslexer = IdrisLexer ( * * options )
LiterateLexer . __init__ ( self , hslexer , * * options )
class LiterateAgdaLexer ( LiterateLexer ) :
"""
For Literate Agda source .
Additional options accepted :
` litstyle `
If given , must be ` ` " bird " ` ` or ` ` " latex " ` ` . If not given , the style
is autodetected : if the first non - whitespace character in the source
is a backslash or percent character , LaTeX is assumed , else Bird .
. . versionadded : : 2.0
"""
name = ' Literate Agda '
aliases = [ ' literate-agda ' , ' lagda ' ]
filenames = [ ' *.lagda ' ]
mimetypes = [ ' text/x-literate-agda ' ]
def __init__ ( self , * * options ) :
agdalexer = AgdaLexer ( * * options )
LiterateLexer . __init__ ( self , agdalexer , litstyle = ' latex ' , * * options )
class LiterateCryptolLexer ( LiterateLexer ) :
"""
For Literate Cryptol ( Bird - style or LaTeX ) source .
Additional options accepted :
` litstyle `
If given , must be ` ` " bird " ` ` or ` ` " latex " ` ` . If not given , the style
is autodetected : if the first non - whitespace character in the source
is a backslash or percent character , LaTeX is assumed , else Bird .
. . versionadded : : 2.0
"""
name = ' Literate Cryptol '
aliases = [ ' literate-cryptol ' , ' lcryptol ' , ' lcry ' ]
filenames = [ ' *.lcry ' ]
mimetypes = [ ' text/x-literate-cryptol ' ]
def __init__ ( self , * * options ) :
crylexer = CryptolLexer ( * * options )
LiterateLexer . __init__ ( self , crylexer , * * options )
class KokaLexer ( RegexLexer ) :
"""
Lexer for the Koka language .
. . versionadded : : 1.6
"""
name = ' Koka '
url = ' https://koka-lang.github.io/koka/doc/index.html '
aliases = [ ' koka ' ]
filenames = [ ' *.kk ' , ' *.kki ' ]
mimetypes = [ ' text/x-koka ' ]
keywords = [
' infix ' , ' infixr ' , ' infixl ' ,
' type ' , ' cotype ' , ' rectype ' , ' alias ' ,
' struct ' , ' con ' ,
' fun ' , ' function ' , ' val ' , ' var ' ,
' external ' ,
' if ' , ' then ' , ' else ' , ' elif ' , ' return ' , ' match ' ,
' private ' , ' public ' , ' private ' ,
' module ' , ' import ' , ' as ' ,
' include ' , ' inline ' ,
' rec ' ,
' try ' , ' yield ' , ' enum ' ,
' interface ' , ' instance ' ,
]
# keywords that are followed by a type
typeStartKeywords = [
' type ' , ' cotype ' , ' rectype ' , ' alias ' , ' struct ' , ' enum ' ,
]
# keywords valid in a type
typekeywords = [
' forall ' , ' exists ' , ' some ' , ' with ' ,
]
# builtin names and special names
builtin = [
' for ' , ' while ' , ' repeat ' ,
' foreach ' , ' foreach-indexed ' ,
' error ' , ' catch ' , ' finally ' ,
' cs ' , ' js ' , ' file ' , ' ref ' , ' assigned ' ,
]
# symbols that can be in an operator
symbols = r ' [$ % &*+@!/ \\ ^~=.: \ -?|<>]+ '
# symbol boundary: an operator keyword should not be followed by any of these
sboundary = ' (?! ' + symbols + ' ) '
# name boundary: a keyword should not be followed by any of these
boundary = r ' (?![ \ w/]) '
# koka token abstractions
tokenType = Name . Attribute
tokenTypeDef = Name . Class
tokenConstructor = Generic . Emph
# main lexer
tokens = {
' root ' : [
include ( ' whitespace ' ) ,
# go into type mode
( r ' ::? ' + sboundary , tokenType , ' type ' ) ,
( r ' (alias)( \ s+)([a-z] \ w*)? ' , bygroups ( Keyword , Whitespace , tokenTypeDef ) ,
' alias-type ' ) ,
( r ' (struct)( \ s+)([a-z] \ w*)? ' , bygroups ( Keyword , Whitespace , tokenTypeDef ) ,
' struct-type ' ) ,
( ( r ' ( %s ) ' % ' | ' . join ( typeStartKeywords ) ) +
r ' ( \ s+)([a-z] \ w*)? ' , bygroups ( Keyword , Whitespace , tokenTypeDef ) ,
' type ' ) ,
# special sequences of tokens (we use ?: for non-capturing group as
# required by 'bygroups')
( r ' (module)( \ s+)(interface(?= \ s))?( \ s+)?((?:[a-z] \ w*/)*[a-z] \ w*) ' ,
bygroups ( Keyword , Whitespace , Keyword , Whitespace , Name . Namespace ) ) ,
( r ' (import)( \ s+)((?:[a-z] \ w*/)*[a-z] \ w*) '
r ' (?:( \ s*)(=)( \ s*)(qualified)?( \ s*) '
r ' ((?:[a-z] \ w*/)*[a-z] \ w*))? ' ,
bygroups ( Keyword , Whitespace , Name . Namespace , Whitespace , Keyword , Whitespace ,
Keyword , Whitespace , Name . Namespace ) ) ,
( r ' ^(public|private)?( \ s+)?(function|fun|val) '
r ' ( \ s+)([a-z] \ w*| \ ((?: ' + symbols + r ' |/) \ )) ' ,
bygroups ( Keyword , Whitespace , Keyword , Whitespace , Name . Function ) ) ,
( r ' ^(?:(public|private)(?= \ s+external))?((?<!^) \ s+)?(external)( \ s+)(inline(?= \ s))?( \ s+)? '
r ' ([a-z] \ w*| \ ((?: ' + symbols + r ' |/) \ )) ' ,
bygroups ( Keyword , Whitespace , Keyword , Whitespace , Keyword , Whitespace , Name . Function ) ) ,
# keywords
( r ' ( %s ) ' % ' | ' . join ( typekeywords ) + boundary , Keyword . Type ) ,
( r ' ( %s ) ' % ' | ' . join ( keywords ) + boundary , Keyword ) ,
( r ' ( %s ) ' % ' | ' . join ( builtin ) + boundary , Keyword . Pseudo ) ,
( r ' ::?|:=| \ ->|[=.] ' + sboundary , Keyword ) ,
# names
( r ' ((?:[a-z] \ w*/)*)([A-Z] \ w*) ' ,
bygroups ( Name . Namespace , tokenConstructor ) ) ,
( r ' ((?:[a-z] \ w*/)*)([a-z] \ w*) ' , bygroups ( Name . Namespace , Name ) ) ,
( r ' ((?:[a-z] \ w*/)*)( \ ((?: ' + symbols + r ' |/) \ )) ' ,
bygroups ( Name . Namespace , Name ) ) ,
( r ' _ \ w* ' , Name . Variable ) ,
# literal string
( r ' @ " ' , String . Double , ' litstring ' ) ,
# operators
( symbols + " |/(?![*/]) " , Operator ) ,
( r ' ` ' , Operator ) ,
( r ' [ {} () \ [ \ ];,] ' , Punctuation ) ,
# literals. No check for literal characters with len > 1
( r ' [0-9]+ \ .[0-9]+([eE][ \ -+]?[0-9]+)? ' , Number . Float ) ,
( r ' 0[xX][0-9a-fA-F]+ ' , Number . Hex ) ,
( r ' [0-9]+ ' , Number . Integer ) ,
( r " ' " , String . Char , ' char ' ) ,
( r ' " ' , String . Double , ' string ' ) ,
] ,
# type started by alias
' alias-type ' : [
( r ' = ' , Keyword ) ,
include ( ' type ' )
] ,
# type started by struct
' struct-type ' : [
( r ' (?= \ ((?!,* \ ))) ' , Punctuation , ' #pop ' ) ,
include ( ' type ' )
] ,
# type started by colon
' type ' : [
( r ' [( \ [<] ' , tokenType , ' type-nested ' ) ,
include ( ' type-content ' )
] ,
# type nested in brackets: can contain parameters, comma etc.
' type-nested ' : [
( r ' [) \ ]>] ' , tokenType , ' #pop ' ) ,
( r ' [( \ [<] ' , tokenType , ' type-nested ' ) ,
( r ' , ' , tokenType ) ,
( r ' ([a-z] \ w*)( \ s*)(:)(?!:) ' ,
bygroups ( Name , Whitespace , tokenType ) ) , # parameter name
include ( ' type-content ' )
] ,
# shared contents of a type
' type-content ' : [
include ( ' whitespace ' ) ,
# keywords
( r ' ( %s ) ' % ' | ' . join ( typekeywords ) + boundary , Keyword ) ,
( r ' (?=(( %s ) ' % ' | ' . join ( keywords ) + boundary + ' )) ' ,
Keyword , ' #pop ' ) , # need to match because names overlap...
# kinds
( r ' [EPHVX] ' + boundary , tokenType ) ,
# type names
( r ' [a-z][0-9]*(?![ \ w/]) ' , tokenType ) ,
( r ' _ \ w* ' , tokenType . Variable ) , # Generic.Emph
( r ' ((?:[a-z] \ w*/)*)([A-Z] \ w*) ' ,
bygroups ( Name . Namespace , tokenType ) ) ,
( r ' ((?:[a-z] \ w*/)*)([a-z] \ w+) ' ,
bygroups ( Name . Namespace , tokenType ) ) ,
# type keyword operators
( r ' ::|->|[.:|] ' , tokenType ) ,
# catchall
default ( ' #pop ' )
] ,
# comments and literals
' whitespace ' : [
( r ' ( \ n \ s*)(#.*)$ ' , bygroups ( Whitespace , Comment . Preproc ) ) ,
( r ' \ s+ ' , Whitespace ) ,
( r ' / \ * ' , Comment . Multiline , ' comment ' ) ,
( r ' //.*$ ' , Comment . Single )
] ,
' comment ' : [
( r ' [^/*]+ ' , Comment . Multiline ) ,
( r ' / \ * ' , Comment . Multiline , ' #push ' ) ,
( r ' \ */ ' , Comment . Multiline , ' #pop ' ) ,
( r ' [*/] ' , Comment . Multiline ) ,
] ,
' litstring ' : [
( r ' [^ " ]+ ' , String . Double ) ,
( r ' " " ' , String . Escape ) ,
( r ' " ' , String . Double , ' #pop ' ) ,
] ,
' string ' : [
( r ' [^ \\ " \ n]+ ' , String . Double ) ,
include ( ' escape-sequence ' ) ,
( r ' [ " \ n] ' , String . Double , ' #pop ' ) ,
] ,
' char ' : [
( r ' [^ \\ \' \ n]+ ' , String . Char ) ,
include ( ' escape-sequence ' ) ,
( r ' [ \' \ n] ' , String . Char , ' #pop ' ) ,
] ,
' escape-sequence ' : [
( r ' \\ [nrt \\ " \' ] ' , String . Escape ) ,
( r ' \\ x[0-9a-fA-F] {2} ' , String . Escape ) ,
( r ' \\ u[0-9a-fA-F] {4} ' , String . Escape ) ,
# Yes, \U literals are 6 hex digits.
( r ' \\ U[0-9a-fA-F] {6} ' , String . Escape )
]
}