Python Markdown
A Python implementation of John Gruber ' s Markdown.
Documentation : https : / / python - markdown . github . io /
GitHub : https : / / github . com / Python - Markdown / markdown /
PyPI : https : / / pypi . org / project / Markdown /
Started by Manfred Stienstra ( http : / / www . dwerg . net / ) .
Maintained for a few years by Yuri Takhteyev ( http : / / www . freewisdom . org ) .
Currently maintained by Waylan Limberg ( https : / / github . com / waylan ) ,
Dmitry Shachnev ( https : / / github . com / mitya57 ) and Isaac Muse ( https : / / github . com / facelessuser ) .
Copyright 2007 - 2018 The Python Markdown Project ( v . 1.7 and later )
Copyright 2004 , 2005 , 2006 Yuri Takhteyev ( v . 0.2 - 1.6 b )
Copyright 2004 Manfred Stienstra ( the original version )
License : BSD ( see LICENSE . md for details ) .
import re
import xml . etree . ElementTree as etree
from . import util
from . import inlinepatterns
def build_treeprocessors ( md , * * kwargs ) :
""" Build the default treeprocessors for Markdown. """
treeprocessors = util . Registry ( )
treeprocessors . register ( InlineProcessor ( md ) , ' inline ' , 20 )
treeprocessors . register ( PrettifyTreeprocessor ( md ) , ' prettify ' , 10 )
treeprocessors . register ( UnescapeTreeprocessor ( md ) , ' unescape ' , 0 )
return treeprocessors
def isString ( s ) :
""" Check if it ' s string """
if not isinstance ( s , util . AtomicString ) :
return isinstance ( s , str )
return False
class Treeprocessor ( util . Processor ) :
Treeprocessors are run on the ElementTree object before serialization .
Each Treeprocessor implements a " run " method that takes a pointer to an
ElementTree , modifies it as necessary and returns an ElementTree
object .
Treeprocessors must extend markdown . Treeprocessor .
def run ( self , root ) :
Subclasses of Treeprocessor should implement a ` run ` method , which
takes a root ElementTree . This method can return another ElementTree
object , and the existing root ElementTree will be replaced , or it can
modify the current tree and return None .
pass # pragma: no cover
class InlineProcessor ( Treeprocessor ) :
A Treeprocessor that traverses a tree , applying inline patterns .
def __init__ ( self , md ) :
self . __placeholder_prefix = util . INLINE_PLACEHOLDER_PREFIX
self . __placeholder_suffix = util . ETX
self . __placeholder_length = 4 + len ( self . __placeholder_prefix ) \
+ len ( self . __placeholder_suffix )
self . __placeholder_re = util . INLINE_PLACEHOLDER_RE
self . md = md
self . inlinePatterns = md . inlinePatterns
self . ancestors = [ ]
def __makePlaceholder ( self , type ) :
""" Generate a placeholder """
id = " %04d " % len ( self . stashed_nodes )
hash = util . INLINE_PLACEHOLDER % id
return hash , id
def __findPlaceholder ( self , data , index ) :
Extract id from data string , start from index
Keyword arguments :
* data : string
* index : index , from which we start search
Returns : placeholder id and string index , after the found placeholder .
m = self . __placeholder_re . search ( data , index )
if m :
return m . group ( 1 ) , m . end ( )
else :
return None , index + 1
def __stashNode ( self , node , type ) :
""" Add node to stash """
placeholder , id = self . __makePlaceholder ( type )
self . stashed_nodes [ id ] = node
return placeholder
def __handleInline ( self , data , patternIndex = 0 ) :
Process string with inline patterns and replace it
with placeholders
Keyword arguments :
* data : A line of Markdown text
* patternIndex : The index of the inlinePattern to start with
Returns : String with placeholders .
if not isinstance ( data , util . AtomicString ) :
startIndex = 0
count = len ( self . inlinePatterns )
while patternIndex < count :
data , matched , startIndex = self . __applyPattern (
self . inlinePatterns [ patternIndex ] , data , patternIndex , startIndex
if not matched :
patternIndex + = 1
return data
def __processElementText ( self , node , subnode , isText = True ) :
Process placeholders in Element . text or Element . tail
of Elements popped from self . stashed_nodes .
Keywords arguments :
* node : parent node
* subnode : processing node
* isText : bool variable , True - it ' s text, False - it ' s tail
Returns : None
if isText :
text = subnode . text
subnode . text = None
else :
text = subnode . tail
subnode . tail = None
childResult = self . __processPlaceholders ( text , subnode , isText )
if not isText and node is not subnode :
pos = list ( node ) . index ( subnode ) + 1
else :
pos = 0
childResult . reverse ( )
for newChild in childResult :
node . insert ( pos , newChild [ 0 ] )
def __processPlaceholders ( self , data , parent , isText = True ) :
Process string with placeholders and generate ElementTree tree .
Keyword arguments :
* data : string with placeholders instead of ElementTree elements .
* parent : Element , which contains processing inline data
Returns : list with ElementTree elements with applied inline patterns .
def linkText ( text ) :
if text :
if result :
if result [ - 1 ] [ 0 ] . tail :
result [ - 1 ] [ 0 ] . tail + = text
else :
result [ - 1 ] [ 0 ] . tail = text
elif not isText :
if parent . tail :
parent . tail + = text
else :
parent . tail = text
else :
if parent . text :
parent . text + = text
else :
parent . text = text
result = [ ]
strartIndex = 0
while data :
index = data . find ( self . __placeholder_prefix , strartIndex )
if index != - 1 :
id , phEndIndex = self . __findPlaceholder ( data , index )
if id in self . stashed_nodes :
node = self . stashed_nodes . get ( id )
if index > 0 :
text = data [ strartIndex : index ]
linkText ( text )
if not isString ( node ) : # it's Element
for child in [ node ] + list ( node ) :
if child . tail :
if child . tail . strip ( ) :
self . __processElementText (
node , child , False
if child . text :
if child . text . strip ( ) :
self . __processElementText ( child , child )
else : # it's just a string
linkText ( node )
strartIndex = phEndIndex
strartIndex = phEndIndex
result . append ( ( node , self . ancestors [ : ] ) )
else : # wrong placeholder
end = index + len ( self . __placeholder_prefix )
linkText ( data [ strartIndex : end ] )
strartIndex = end
else :
text = data [ strartIndex : ]
if isinstance ( data , util . AtomicString ) :
# We don't want to loose the AtomicString
text = util . AtomicString ( text )
linkText ( text )
data = " "
return result
def __applyPattern ( self , pattern , data , patternIndex , startIndex = 0 ) :
Check if the line fits the pattern , create the necessary
elements , add it to stashed_nodes .
Keyword arguments :
* data : the text to be processed
* pattern : the pattern to be checked
* patternIndex : index of current pattern
* startIndex : string index , from which we start searching
Returns : String with placeholders instead of ElementTree elements .
new_style = isinstance ( pattern , inlinepatterns . InlineProcessor )
for exclude in pattern . ANCESTOR_EXCLUDES :
if exclude . lower ( ) in self . ancestors :
return data , False , 0
if new_style :
match = None
# Since handleMatch may reject our first match,
# we iterate over the buffer looking for matches
# until we can't find any more.
for match in pattern . getCompiledRegExp ( ) . finditer ( data , startIndex ) :
node , start , end = pattern . handleMatch ( match , data )
if start is None or end is None :
startIndex + = match . end ( 0 )
match = None
else : # pragma: no cover
match = pattern . getCompiledRegExp ( ) . match ( data [ startIndex : ] )
leftData = data [ : startIndex ]
if not match :
return data , False , 0
if not new_style : # pragma: no cover
node = pattern . handleMatch ( match )
start = match . start ( 0 )
end = match . end ( 0 )
if node is None :
return data , True , end
if not isString ( node ) :
if not isinstance ( node . text , util . AtomicString ) :
# We need to process current node too
for child in [ node ] + list ( node ) :
if not isString ( node ) :
if child . text :
self . ancestors . append ( child . tag . lower ( ) )
child . text = self . __handleInline (
child . text , patternIndex + 1
self . ancestors . pop ( )
if child . tail :
child . tail = self . __handleInline (
child . tail , patternIndex
placeholder = self . __stashNode ( node , pattern . type ( ) )
if new_style :
return " {} {} {} " . format ( data [ : start ] ,
placeholder , data [ end : ] ) , True , 0
else : # pragma: no cover
return " {} {} {} {} " . format ( leftData ,
match . group ( 1 ) ,
placeholder , match . groups ( ) [ - 1 ] ) , True , 0
def __build_ancestors ( self , parent , parents ) :
""" Build the ancestor list. """
ancestors = [ ]
while parent is not None :
if parent is not None :
ancestors . append ( parent . tag . lower ( ) )
parent = self . parent_map . get ( parent )
ancestors . reverse ( )
parents . extend ( ancestors )
def run ( self , tree , ancestors = None ) :
""" Apply inline patterns to a parsed Markdown tree.
Iterate over ElementTree , find elements with inline tag , apply inline
patterns and append newly created Elements to tree . If you don ' t
want to process your data with inline patterns , instead of normal
string , use subclass AtomicString :
node . text = markdown . AtomicString ( " This will not be processed. " )
Arguments :
* tree : ElementTree object , representing Markdown tree .
* ancestors : List of parent tag names that precede the tree node ( if needed ) .
Returns : ElementTree object with applied inline patterns .
self . stashed_nodes = { }
# Ensure a valid parent list, but copy passed in lists
# to ensure we don't have the user accidentally change it on us.
tree_parents = [ ] if ancestors is None else ancestors [ : ]
self . parent_map = { c : p for p in tree . iter ( ) for c in p }
stack = [ ( tree , tree_parents ) ]
while stack :
currElement , parents = stack . pop ( )
self . ancestors = parents
self . __build_ancestors ( currElement , self . ancestors )
insertQueue = [ ]
for child in currElement :
if child . text and not isinstance (
child . text , util . AtomicString
) :
self . ancestors . append ( child . tag . lower ( ) )
text = child . text
child . text = None
lst = self . __processPlaceholders (
self . __handleInline ( text ) , child
for item in lst :
self . parent_map [ item [ 0 ] ] = child
stack + = lst
insertQueue . append ( ( child , lst ) )
self . ancestors . pop ( )
if child . tail :
tail = self . __handleInline ( child . tail )
dumby = etree . Element ( ' d ' )
child . tail = None
tailResult = self . __processPlaceholders ( tail , dumby , False )
if dumby . tail :
child . tail = dumby . tail
pos = list ( currElement ) . index ( child ) + 1
tailResult . reverse ( )
for newChild in tailResult :
self . parent_map [ newChild [ 0 ] ] = currElement
currElement . insert ( pos , newChild [ 0 ] )
if len ( child ) :
self . parent_map [ child ] = currElement
stack . append ( ( child , self . ancestors [ : ] ) )
for element , lst in insertQueue :
for i , obj in enumerate ( lst ) :
newChild = obj [ 0 ]
element . insert ( i , newChild )
return tree
class PrettifyTreeprocessor ( Treeprocessor ) :
""" Add linebreaks to the html document. """
def _prettifyETree ( self , elem ) :
""" Recursively add linebreaks to ElementTree children. """
i = " \n "
if self . md . is_block_level ( elem . tag ) and elem . tag not in [ ' code ' , ' pre ' ] :
if ( not elem . text or not elem . text . strip ( ) ) \
and len ( elem ) and self . md . is_block_level ( elem [ 0 ] . tag ) :
elem . text = i
for e in elem :
if self . md . is_block_level ( e . tag ) :
self . _prettifyETree ( e )
if not elem . tail or not elem . tail . strip ( ) :
elem . tail = i
def run ( self , root ) :
""" Add linebreaks to ElementTree root object. """
self . _prettifyETree ( root )
# Do <br />'s separately as they are often in the middle of
# inline content and missed by _prettifyETree.
brs = root . iter ( ' br ' )
for br in brs :
if not br . tail or not br . tail . strip ( ) :
br . tail = ' \n '
else :
br . tail = ' \n %s ' % br . tail
# Clean up extra empty lines at end of code blocks.
pres = root . iter ( ' pre ' )
for pre in pres :
if len ( pre ) and pre [ 0 ] . tag == ' code ' :
code = pre [ 0 ]
# Only prettify code containing text only
if not len ( code ) and code . text is not None :
code . text = util . AtomicString ( code . text . rstrip ( ) + ' \n ' )
class UnescapeTreeprocessor ( Treeprocessor ) :
""" Restore escaped chars """
RE = re . compile ( r ' {} ( \ d+) {} ' . format ( util . STX , util . ETX ) )
def _unescape ( self , m ) :
return chr ( int ( m . group ( 1 ) ) )
def unescape ( self , text ) :
return self . RE . sub ( self . _unescape , text )
def run ( self , root ) :
""" Loop over all elements and unescape all text. """
for elem in root . iter ( ) :
# Unescape text content
if elem . text and not elem . tag == ' code ' :
elem . text = self . unescape ( elem . text )
# Unescape tail content
if elem . tail :
elem . tail = self . unescape ( elem . tail )
# Unescape attribute values
for key , value in elem . items ( ) :
elem . set ( key , self . unescape ( value ) )