"""
pygments . cmdline
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
Command line interface .
: copyright : Copyright 2006 - 2023 by the Pygments team , see AUTHORS .
: license : BSD , see LICENSE for details .
"""
import os
import sys
import shutil
import argparse
from textwrap import dedent
from pygments import __version__ , highlight
from pygments . util import ClassNotFound , OptionError , docstring_headline , \
guess_decode , guess_decode_from_terminal , terminal_encoding , \
UnclosingTextIOWrapper
from pygments . lexers import get_all_lexers , get_lexer_by_name , guess_lexer , \
load_lexer_from_file , get_lexer_for_filename , find_lexer_class_for_filename
from pygments . lexers . special import TextLexer
from pygments . formatters . latex import LatexEmbeddedLexer , LatexFormatter
from pygments . formatters import get_all_formatters , get_formatter_by_name , \
load_formatter_from_file , get_formatter_for_filename , find_formatter_class
from pygments . formatters . terminal import TerminalFormatter
from pygments . formatters . terminal256 import Terminal256Formatter , TerminalTrueColorFormatter
from pygments . filters import get_all_filters , find_filter_class
from pygments . styles import get_all_styles , get_style_by_name
def _parse_options ( o_strs ) :
opts = { }
if not o_strs :
return opts
for o_str in o_strs :
if not o_str . strip ( ) :
continue
o_args = o_str . split ( ' , ' )
for o_arg in o_args :
o_arg = o_arg . strip ( )
try :
o_key , o_val = o_arg . split ( ' = ' , 1 )
o_key = o_key . strip ( )
o_val = o_val . strip ( )
except ValueError :
opts [ o_arg ] = True
else :
opts [ o_key ] = o_val
return opts
def _parse_filters ( f_strs ) :
filters = [ ]
if not f_strs :
return filters
for f_str in f_strs :
if ' : ' in f_str :
fname , fopts = f_str . split ( ' : ' , 1 )
filters . append ( ( fname , _parse_options ( [ fopts ] ) ) )
else :
filters . append ( ( f_str , { } ) )
return filters
def _print_help ( what , name ) :
try :
if what == ' lexer ' :
cls = get_lexer_by_name ( name )
print ( " Help on the %s lexer: " % cls . name )
print ( dedent ( cls . __doc__ ) )
elif what == ' formatter ' :
cls = find_formatter_class ( name )
print ( " Help on the %s formatter: " % cls . name )
print ( dedent ( cls . __doc__ ) )
elif what == ' filter ' :
cls = find_filter_class ( name )
print ( " Help on the %s filter: " % name )
print ( dedent ( cls . __doc__ ) )
return 0
except ( AttributeError , ValueError ) :
print ( " %s not found! " % what , file = sys . stderr )
return 1
def _print_list ( what ) :
if what == ' lexer ' :
print ( )
print ( " Lexers: " )
print ( " ~~~~~~~ " )
info = [ ]
for fullname , names , exts , _ in get_all_lexers ( ) :
tup = ( ' , ' . join ( names ) + ' : ' , fullname ,
exts and ' (filenames ' + ' , ' . join ( exts ) + ' ) ' or ' ' )
info . append ( tup )
info . sort ( )
for i in info :
print ( ( ' * %s \n %s %s ' ) % i )
elif what == ' formatter ' :
print ( )
print ( " Formatters: " )
print ( " ~~~~~~~~~~~ " )
info = [ ]
for cls in get_all_formatters ( ) :
doc = docstring_headline ( cls )
tup = ( ' , ' . join ( cls . aliases ) + ' : ' , doc , cls . filenames and
' (filenames ' + ' , ' . join ( cls . filenames ) + ' ) ' or ' ' )
info . append ( tup )
info . sort ( )
for i in info :
print ( ( ' * %s \n %s %s ' ) % i )
elif what == ' filter ' :
print ( )
print ( " Filters: " )
print ( " ~~~~~~~~ " )
for name in get_all_filters ( ) :
cls = find_filter_class ( name )
print ( " * " + name + ' : ' )
print ( " %s " % docstring_headline ( cls ) )
elif what == ' style ' :
print ( )
print ( " Styles: " )
print ( " ~~~~~~~ " )
for name in get_all_styles ( ) :
cls = get_style_by_name ( name )
print ( " * " + name + ' : ' )
print ( " %s " % docstring_headline ( cls ) )
def _print_list_as_json ( requested_items ) :
import json
result = { }
if ' lexer ' in requested_items :
info = { }
for fullname , names , filenames , mimetypes in get_all_lexers ( ) :
info [ fullname ] = {
' aliases ' : names ,
' filenames ' : filenames ,
' mimetypes ' : mimetypes
}
result [ ' lexers ' ] = info
if ' formatter ' in requested_items :
info = { }
for cls in get_all_formatters ( ) :
doc = docstring_headline ( cls )
info [ cls . name ] = {
' aliases ' : cls . aliases ,
' filenames ' : cls . filenames ,
' doc ' : doc
}
result [ ' formatters ' ] = info
if ' filter ' in requested_items :
info = { }
for name in get_all_filters ( ) :
cls = find_filter_class ( name )
info [ name ] = {
' doc ' : docstring_headline ( cls )
}
result [ ' filters ' ] = info
if ' style ' in requested_items :
info = { }
for name in get_all_styles ( ) :
cls = get_style_by_name ( name )
info [ name ] = {
' doc ' : docstring_headline ( cls )
}
result [ ' styles ' ] = info
json . dump ( result , sys . stdout )
def main_inner ( parser , argns ) :
if argns . help :
parser . print_help ( )
return 0
if argns . V :
print ( ' Pygments version %s , (c) 2006-2023 by Georg Brandl, Matthäus '
' Chajdas and contributors. ' % __version__ )
return 0
def is_only_option ( opt ) :
return not any ( v for ( k , v ) in vars ( argns ) . items ( ) if k != opt )
# handle ``pygmentize -L``
if argns . L is not None :
arg_set = set ( )
for k , v in vars ( argns ) . items ( ) :
if v :
arg_set . add ( k )
arg_set . discard ( ' L ' )
arg_set . discard ( ' json ' )
if arg_set :
parser . print_help ( sys . stderr )
return 2
# print version
if not argns . json :
main ( [ ' ' , ' -V ' ] )
allowed_types = { ' lexer ' , ' formatter ' , ' filter ' , ' style ' }
largs = [ arg . rstrip ( ' s ' ) for arg in argns . L ]
if any ( arg not in allowed_types for arg in largs ) :
parser . print_help ( sys . stderr )
return 0
if not largs :
largs = allowed_types
if not argns . json :
for arg in largs :
_print_list ( arg )
else :
_print_list_as_json ( largs )
return 0
# handle ``pygmentize -H``
if argns . H :
if not is_only_option ( ' H ' ) :
parser . print_help ( sys . stderr )
return 2
what , name = argns . H
if what not in ( ' lexer ' , ' formatter ' , ' filter ' ) :
parser . print_help ( sys . stderr )
return 2
return _print_help ( what , name )
# parse -O options
parsed_opts = _parse_options ( argns . O or [ ] )
# parse -P options
for p_opt in argns . P or [ ] :
try :
name , value = p_opt . split ( ' = ' , 1 )
except ValueError :
parsed_opts [ p_opt ] = True
else :
parsed_opts [ name ] = value
# encodings
inencoding = parsed_opts . get ( ' inencoding ' , parsed_opts . get ( ' encoding ' ) )
outencoding = parsed_opts . get ( ' outencoding ' , parsed_opts . get ( ' encoding ' ) )
# handle ``pygmentize -N``
if argns . N :
lexer = find_lexer_class_for_filename ( argns . N )
if lexer is None :
lexer = TextLexer
print ( lexer . aliases [ 0 ] )
return 0
# handle ``pygmentize -C``
if argns . C :
inp = sys . stdin . buffer . read ( )
try :
lexer = guess_lexer ( inp , inencoding = inencoding )
except ClassNotFound :
lexer = TextLexer
print ( lexer . aliases [ 0 ] )
return 0
# handle ``pygmentize -S``
S_opt = argns . S
a_opt = argns . a
if S_opt is not None :
f_opt = argns . f
if not f_opt :
parser . print_help ( sys . stderr )
return 2
if argns . l or argns . INPUTFILE :
parser . print_help ( sys . stderr )
return 2
try :
parsed_opts [ ' style ' ] = S_opt
fmter = get_formatter_by_name ( f_opt , * * parsed_opts )
except ClassNotFound as err :
print ( err , file = sys . stderr )
return 1
print ( fmter . get_style_defs ( a_opt or ' ' ) )
return 0
# if no -S is given, -a is not allowed
if argns . a is not None :
parser . print_help ( sys . stderr )
return 2
# parse -F options
F_opts = _parse_filters ( argns . F or [ ] )
# -x: allow custom (eXternal) lexers and formatters
allow_custom_lexer_formatter = bool ( argns . x )
# select lexer
lexer = None
# given by name?
lexername = argns . l
if lexername :
# custom lexer, located relative to user's cwd
if allow_custom_lexer_formatter and ' .py ' in lexername :
try :
filename = None
name = None
if ' : ' in lexername :
filename , name = lexername . rsplit ( ' : ' , 1 )
if ' .py ' in name :
# This can happen on Windows: If the lexername is
# C:\lexer.py -- return to normal load path in that case
name = None
if filename and name :
lexer = load_lexer_from_file ( filename , name ,
* * parsed_opts )
else :
lexer = load_lexer_from_file ( lexername , * * parsed_opts )
except ClassNotFound as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
else :
try :
lexer = get_lexer_by_name ( lexername , * * parsed_opts )
except ( OptionError , ClassNotFound ) as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
# read input code
code = None
if argns . INPUTFILE :
if argns . s :
print ( ' Error: -s option not usable when input file specified ' ,
file = sys . stderr )
return 2
infn = argns . INPUTFILE
try :
with open ( infn , ' rb ' ) as infp :
code = infp . read ( )
except Exception as err :
print ( ' Error: cannot read infile: ' , err , file = sys . stderr )
return 1
if not inencoding :
code , inencoding = guess_decode ( code )
# do we have to guess the lexer?
if not lexer :
try :
lexer = get_lexer_for_filename ( infn , code , * * parsed_opts )
except ClassNotFound as err :
if argns . g :
try :
lexer = guess_lexer ( code , * * parsed_opts )
except ClassNotFound :
lexer = TextLexer ( * * parsed_opts )
else :
print ( ' Error: ' , err , file = sys . stderr )
return 1
except OptionError as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
elif not argns . s : # treat stdin as full file (-s support is later)
# read code from terminal, always in binary mode since we want to
# decode ourselves and be tolerant with it
code = sys . stdin . buffer . read ( ) # use .buffer to get a binary stream
if not inencoding :
code , inencoding = guess_decode_from_terminal ( code , sys . stdin )
# else the lexer will do the decoding
if not lexer :
try :
lexer = guess_lexer ( code , * * parsed_opts )
except ClassNotFound :
lexer = TextLexer ( * * parsed_opts )
else : # -s option needs a lexer with -l
if not lexer :
print ( ' Error: when using -s a lexer has to be selected with -l ' ,
file = sys . stderr )
return 2
# process filters
for fname , fopts in F_opts :
try :
lexer . add_filter ( fname , * * fopts )
except ClassNotFound as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
# select formatter
outfn = argns . o
fmter = argns . f
if fmter :
# custom formatter, located relative to user's cwd
if allow_custom_lexer_formatter and ' .py ' in fmter :
try :
filename = None
name = None
if ' : ' in fmter :
# Same logic as above for custom lexer
filename , name = fmter . rsplit ( ' : ' , 1 )
if ' .py ' in name :
name = None
if filename and name :
fmter = load_formatter_from_file ( filename , name ,
* * parsed_opts )
else :
fmter = load_formatter_from_file ( fmter , * * parsed_opts )
except ClassNotFound as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
else :
try :
fmter = get_formatter_by_name ( fmter , * * parsed_opts )
except ( OptionError , ClassNotFound ) as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
if outfn :
if not fmter :
try :
fmter = get_formatter_for_filename ( outfn , * * parsed_opts )
except ( OptionError , ClassNotFound ) as err :
print ( ' Error: ' , err , file = sys . stderr )
return 1
try :
outfile = open ( outfn , ' wb ' )
except Exception as err :
print ( ' Error: cannot open outfile: ' , err , file = sys . stderr )
return 1
else :
if not fmter :
if os . environ . get ( ' COLORTERM ' , ' ' ) in ( ' truecolor ' , ' 24bit ' ) :
fmter = TerminalTrueColorFormatter ( * * parsed_opts )
elif ' 256 ' in os . environ . get ( ' TERM ' , ' ' ) :
fmter = Terminal256Formatter ( * * parsed_opts )
else :
fmter = TerminalFormatter ( * * parsed_opts )
outfile = sys . stdout . buffer
# determine output encoding if not explicitly selected
if not outencoding :
if outfn :
# output file? use lexer encoding for now (can still be None)
fmter . encoding = inencoding
else :
# else use terminal encoding
fmter . encoding = terminal_encoding ( sys . stdout )
# provide coloring under Windows, if possible
if not outfn and sys . platform in ( ' win32 ' , ' cygwin ' ) and \
fmter . name in ( ' Terminal ' , ' Terminal256 ' ) : # pragma: no cover
# unfortunately colorama doesn't support binary streams on Py3
outfile = UnclosingTextIOWrapper ( outfile , encoding = fmter . encoding )
fmter . encoding = None
try :
import colorama . initialise
except ImportError :
pass
else :
outfile = colorama . initialise . wrap_stream (
outfile , convert = None , strip = None , autoreset = False , wrap = True )
# When using the LaTeX formatter and the option `escapeinside` is
# specified, we need a special lexer which collects escaped text
# before running the chosen language lexer.
escapeinside = parsed_opts . get ( ' escapeinside ' , ' ' )
if len ( escapeinside ) == 2 and isinstance ( fmter , LatexFormatter ) :
left = escapeinside [ 0 ]
right = escapeinside [ 1 ]
lexer = LatexEmbeddedLexer ( left , right , lexer )
# ... and do it!
if not argns . s :
# process whole input as per normal...
try :
highlight ( code , lexer , fmter , outfile )
finally :
if outfn :
outfile . close ( )
return 0
else :
# line by line processing of stdin (eg: for 'tail -f')...
try :
while 1 :
line = sys . stdin . buffer . readline ( )
if not line :
break
if not inencoding :
line = guess_decode_from_terminal ( line , sys . stdin ) [ 0 ]
highlight ( line , lexer , fmter , outfile )
if hasattr ( outfile , ' flush ' ) :
outfile . flush ( )
return 0
except KeyboardInterrupt : # pragma: no cover
return 0
finally :
if outfn :
outfile . close ( )
class HelpFormatter ( argparse . HelpFormatter ) :
def __init__ ( self , prog , indent_increment = 2 , max_help_position = 16 , width = None ) :
if width is None :
try :
width = shutil . get_terminal_size ( ) . columns - 2
except Exception :
pass
argparse . HelpFormatter . __init__ ( self , prog , indent_increment ,
max_help_position , width )
def main ( args = sys . argv ) :
"""
Main command line entry point .
"""
desc = " Highlight an input file and write the result to an output file. "
parser = argparse . ArgumentParser ( description = desc , add_help = False ,
formatter_class = HelpFormatter )
operation = parser . add_argument_group ( ' Main operation ' )
lexersel = operation . add_mutually_exclusive_group ( )
lexersel . add_argument (
' -l ' , metavar = ' LEXER ' ,
help = ' Specify the lexer to use. (Query names with -L.) If not '
' given and -g is not present, the lexer is guessed from the filename. ' )
lexersel . add_argument (
' -g ' , action = ' store_true ' ,
help = ' Guess the lexer from the file contents, or pass through '
' as plain text if nothing can be guessed. ' )
operation . add_argument (
' -F ' , metavar = ' FILTER[:options] ' , action = ' append ' ,
help = ' Add a filter to the token stream. (Query names with -L.) '
' Filter options are given after a colon if necessary. ' )
operation . add_argument (
' -f ' , metavar = ' FORMATTER ' ,
help = ' Specify the formatter to use. (Query names with -L.) '
' If not given, the formatter is guessed from the output filename, '
' and defaults to the terminal formatter if the output is to the '
' terminal or an unknown file extension. ' )
operation . add_argument (
' -O ' , metavar = ' OPTION=value[,OPTION=value,...] ' , action = ' append ' ,
help = ' Give options to the lexer and formatter as a comma-separated '
' list of key-value pairs. '
' Example: `-O bg=light,python=cool`. ' )
operation . add_argument (
' -P ' , metavar = ' OPTION=value ' , action = ' append ' ,
help = ' Give a single option to the lexer and formatter - with this '
' you can pass options whose value contains commas and equal signs. '
' Example: `-P " heading=Pygments, the Python highlighter " `. ' )
operation . add_argument (
' -o ' , metavar = ' OUTPUTFILE ' ,
help = ' Where to write the output. Defaults to standard output. ' )
operation . add_argument (
' INPUTFILE ' , nargs = ' ? ' ,
help = ' Where to read the input. Defaults to standard input. ' )
flags = parser . add_argument_group ( ' Operation flags ' )
flags . add_argument (
' -v ' , action = ' store_true ' ,
help = ' Print a detailed traceback on unhandled exceptions, which '
' is useful for debugging and bug reports. ' )
flags . add_argument (
' -s ' , action = ' store_true ' ,
help = ' Process lines one at a time until EOF, rather than waiting to '
' process the entire file. This only works for stdin, only for lexers '
' with no line-spanning constructs, and is intended for streaming '
' input such as you get from `tail -f`. '
' Example usage: `tail -f sql.log | pygmentize -s -l sql`. ' )
flags . add_argument (
' -x ' , action = ' store_true ' ,
help = ' Allow custom lexers and formatters to be loaded from a .py file '
' relative to the current working directory. For example, '
' `-l ./customlexer.py -x`. By default, this option expects a file '
' with a class named CustomLexer or CustomFormatter; you can also '
' specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
' Users should be very careful not to use this option with untrusted '
' files, because it will import and run them. ' )
flags . add_argument ( ' --json ' , help = ' Output as JSON. This can '
' be only used in conjunction with -L. ' ,
default = False ,
action = ' store_true ' )
special_modes_group = parser . add_argument_group (
' Special modes - do not do any highlighting ' )
special_modes = special_modes_group . add_mutually_exclusive_group ( )
special_modes . add_argument (
' -S ' , metavar = ' STYLE -f formatter ' ,
help = ' Print style definitions for STYLE for a formatter '
' given with -f. The argument given by -a is formatter '
' dependent. ' )
special_modes . add_argument (
' -L ' , nargs = ' * ' , metavar = ' WHAT ' ,
help = ' List lexers, formatters, styles or filters -- '
' give additional arguments for the thing(s) you want to list '
' (e.g. " styles " ), or omit them to list everything. ' )
special_modes . add_argument (
' -N ' , metavar = ' FILENAME ' ,
help = ' Guess and print out a lexer name based solely on the given '
' filename. Does not take input or highlight anything. If no specific '
' lexer can be determined, " text " is printed. ' )
special_modes . add_argument (
' -C ' , action = ' store_true ' ,
help = ' Like -N, but print out a lexer name based solely on '
' a given content from standard input. ' )
special_modes . add_argument (
' -H ' , action = ' store ' , nargs = 2 , metavar = ( ' NAME ' , ' TYPE ' ) ,
help = ' Print detailed help for the object <name> of type <type>, '
' where <type> is one of " lexer " , " formatter " or " filter " . ' )
special_modes . add_argument (
' -V ' , action = ' store_true ' ,
help = ' Print the package version. ' )
special_modes . add_argument (
' -h ' , ' --help ' , action = ' store_true ' ,
help = ' Print this help. ' )
special_modes_group . add_argument (
' -a ' , metavar = ' ARG ' ,
help = ' Formatter-specific additional argument for the -S (print '
' style sheet) mode. ' )
argns = parser . parse_args ( args [ 1 : ] )
try :
return main_inner ( parser , argns )
except BrokenPipeError :
# someone closed our stdout, e.g. by quitting a pager.
return 0
except Exception :
if argns . v :
print ( file = sys . stderr )
print ( ' * ' * 65 , file = sys . stderr )
print ( ' An unhandled exception occurred while highlighting. ' ,
file = sys . stderr )
print ( ' Please report the whole traceback to the issue tracker at ' ,
file = sys . stderr )
print ( ' <https://github.com/pygments/pygments/issues>. ' ,
file = sys . stderr )
print ( ' * ' * 65 , file = sys . stderr )
print ( file = sys . stderr )
raise
import traceback
info = traceback . format_exception ( * sys . exc_info ( ) )
msg = info [ - 1 ] . strip ( )
if len ( info ) > = 3 :
# extract relevant file and position info
msg + = ' \n (f %s ) ' % info [ - 2 ] . split ( ' \n ' ) [ 0 ] . strip ( ) [ 1 : ]
print ( file = sys . stderr )
print ( ' *** Error while highlighting: ' , file = sys . stderr )
print ( msg , file = sys . stderr )
print ( ' *** If this is a bug you want to report, please rerun with -v. ' ,
file = sys . stderr )
return 1