"""
pygments . scanner
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
This library implements a regex based scanner . Some languages
like Pascal are easy to parse but have some keywords that
depend on the context . Because of this it ' s impossible to lex
that just by using a regular expression lexer like the
` RegexLexer ` .
Have a look at the ` DelphiLexer ` to get an idea of how to use
this scanner .
: copyright : Copyright 2006 - 2023 by the Pygments team , see AUTHORS .
: license : BSD , see LICENSE for details .
"""
import re
class EndOfText ( RuntimeError ) :
"""
Raise if end of text is reached and the user
tried to call a match function .
"""
class Scanner :
"""
Simple scanner
All method patterns are regular expression strings ( not
compiled expressions ! )
"""
def __init__ ( self , text , flags = 0 ) :
"""
: param text : The text which should be scanned
: param flags : default regular expression flags
"""
self . data = text
self . data_length = len ( text )
self . start_pos = 0
self . pos = 0
self . flags = flags
self . last = None
self . match = None
self . _re_cache = { }
def eos ( self ) :
""" `True` if the scanner reached the end of text. """
return self . pos > = self . data_length
eos = property ( eos , eos . __doc__ )
def check ( self , pattern ) :
"""
Apply ` pattern ` on the current position and return
the match object . ( Doesn ' t touch pos). Use this for
lookahead .
"""
if self . eos :
raise EndOfText ( )
if pattern not in self . _re_cache :
self . _re_cache [ pattern ] = re . compile ( pattern , self . flags )
return self . _re_cache [ pattern ] . match ( self . data , self . pos )
def test ( self , pattern ) :
""" Apply a pattern on the current position and check
if it patches . Doesn ' t touch pos.
"""
return self . check ( pattern ) is not None
def scan ( self , pattern ) :
"""
Scan the text for the given pattern and update pos / match
and related fields . The return value is a boolean that
indicates if the pattern matched . The matched value is
stored on the instance as ` ` match ` ` , the last value is
stored as ` ` last ` ` . ` ` start_pos ` ` is the position of the
pointer before the pattern was matched , ` ` pos ` ` is the
end position .
"""
if self . eos :
raise EndOfText ( )
if pattern not in self . _re_cache :
self . _re_cache [ pattern ] = re . compile ( pattern , self . flags )
self . last = self . match
m = self . _re_cache [ pattern ] . match ( self . data , self . pos )
if m is None :
return False
self . start_pos = m . start ( )
self . pos = m . end ( )
self . match = m . group ( )
return True
def get_char ( self ) :
""" Scan exactly one char. """
self . scan ( ' . ' )
def __repr__ ( self ) :
return ' < %s %d / %d > ' % (
self . __class__ . __name__ ,
self . pos ,
self . data_length
)