from __future__ import absolute_import , division , unicode_literals
from six import with_metaclass , viewkeys
import types
from . import _inputstream
from . import _tokenizer
from . import treebuilders
from . treebuilders . base import Marker
from . import _utils
from . constants import (
spaceCharacters , asciiUpper2Lower ,
specialElements , headingElements , cdataElements , rcdataElements ,
tokenTypes , tagTokenTypes ,
namespaces ,
htmlIntegrationPointElements , mathmlTextIntegrationPointElements ,
adjustForeignAttributes as adjustForeignAttributesMap ,
adjustMathMLAttributes , adjustSVGAttributes ,
E ,
_ReparseException
)
def parse ( doc , treebuilder = " etree " , namespaceHTMLElements = True , * * kwargs ) :
""" Parse an HTML document as a string or file-like object into a tree
: arg doc : the document to parse as a string or file - like object
: arg treebuilder : the treebuilder to use when parsing
: arg namespaceHTMLElements : whether or not to namespace HTML elements
: returns : parsed tree
Example :
>> > from html5lib . html5parser import parse
>> > parse ( ' <html><body><p>This is a doc</p></body></html> ' )
< Element u ' { http://www.w3.org/1999/xhtml}html ' at 0x7feac4909db0 >
"""
tb = treebuilders . getTreeBuilder ( treebuilder )
p = HTMLParser ( tb , namespaceHTMLElements = namespaceHTMLElements )
return p . parse ( doc , * * kwargs )
def parseFragment ( doc , container = " div " , treebuilder = " etree " , namespaceHTMLElements = True , * * kwargs ) :
""" Parse an HTML fragment as a string or file-like object into a tree
: arg doc : the fragment to parse as a string or file - like object
: arg container : the container context to parse the fragment in
: arg treebuilder : the treebuilder to use when parsing
: arg namespaceHTMLElements : whether or not to namespace HTML elements
: returns : parsed tree
Example :
>> > from html5lib . html5libparser import parseFragment
>> > parseFragment ( ' <b>this is a fragment</b> ' )
< Element u ' DOCUMENT_FRAGMENT ' at 0x7feac484b090 >
"""
tb = treebuilders . getTreeBuilder ( treebuilder )
p = HTMLParser ( tb , namespaceHTMLElements = namespaceHTMLElements )
return p . parseFragment ( doc , container = container , * * kwargs )
def method_decorator_metaclass ( function ) :
class Decorated ( type ) :
def __new__ ( meta , classname , bases , classDict ) :
for attributeName , attribute in classDict . items ( ) :
if isinstance ( attribute , types . FunctionType ) :
attribute = function ( attribute )
classDict [ attributeName ] = attribute
return type . __new__ ( meta , classname , bases , classDict )
return Decorated
class HTMLParser ( object ) :
""" HTML parser
Generates a tree structure from a stream of ( possibly malformed ) HTML .
"""
def __init__ ( self , tree = None , strict = False , namespaceHTMLElements = True , debug = False ) :
"""
: arg tree : a treebuilder class controlling the type of tree that will be
returned . Built in treebuilders can be accessed through
html5lib . treebuilders . getTreeBuilder ( treeType )
: arg strict : raise an exception when a parse error is encountered
: arg namespaceHTMLElements : whether or not to namespace HTML elements
: arg debug : whether or not to enable debug mode which logs things
Example :
>> > from html5lib . html5parser import HTMLParser
>> > parser = HTMLParser ( ) # generates parser with etree builder
>> > parser = HTMLParser ( ' lxml ' , strict = True ) # generates parser with lxml builder which is strict
"""
# Raise an exception on the first error encountered
self . strict = strict
if tree is None :
tree = treebuilders . getTreeBuilder ( " etree " )
self . tree = tree ( namespaceHTMLElements )
self . errors = [ ]
self . phases = { name : cls ( self , self . tree ) for name , cls in
getPhases ( debug ) . items ( ) }
def _parse ( self , stream , innerHTML = False , container = " div " , scripting = False , * * kwargs ) :
self . innerHTMLMode = innerHTML
self . container = container
self . scripting = scripting
self . tokenizer = _tokenizer . HTMLTokenizer ( stream , parser = self , * * kwargs )
self . reset ( )
try :
self . mainLoop ( )
except _ReparseException :
self . reset ( )
self . mainLoop ( )
def reset ( self ) :
self . tree . reset ( )
self . firstStartTag = False
self . errors = [ ]
self . log = [ ] # only used with debug mode
# "quirks" / "limited quirks" / "no quirks"
self . compatMode = " no quirks "
if self . innerHTMLMode :
self . innerHTML = self . container . lower ( )
if self . innerHTML in cdataElements :
self . tokenizer . state = self . tokenizer . rcdataState
elif self . innerHTML in rcdataElements :
self . tokenizer . state = self . tokenizer . rawtextState
elif self . innerHTML == ' plaintext ' :
self . tokenizer . state = self . tokenizer . plaintextState
else :
# state already is data state
# self.tokenizer.state = self.tokenizer.dataState
pass
self . phase = self . phases [ " beforeHtml " ]
self . phase . insertHtmlElement ( )
self . resetInsertionMode ( )
else :
self . innerHTML = False # pylint:disable=redefined-variable-type
self . phase = self . phases [ " initial " ]
self . lastPhase = None
self . beforeRCDataPhase = None
self . framesetOK = True
@property
def documentEncoding ( self ) :
""" Name of the character encoding that was used to decode the input stream, or
: obj : ` None ` if that is not determined yet
"""
if not hasattr ( self , ' tokenizer ' ) :
return None
return self . tokenizer . stream . charEncoding [ 0 ] . name
def isHTMLIntegrationPoint ( self , element ) :
if ( element . name == " annotation-xml " and
element . namespace == namespaces [ " mathml " ] ) :
return ( " encoding " in element . attributes and
element . attributes [ " encoding " ] . translate (
asciiUpper2Lower ) in
( " text/html " , " application/xhtml+xml " ) )
else :
return ( element . namespace , element . name ) in htmlIntegrationPointElements
def isMathMLTextIntegrationPoint ( self , element ) :
return ( element . namespace , element . name ) in mathmlTextIntegrationPointElements
def mainLoop ( self ) :
CharactersToken = tokenTypes [ " Characters " ]
SpaceCharactersToken = tokenTypes [ " SpaceCharacters " ]
StartTagToken = tokenTypes [ " StartTag " ]
EndTagToken = tokenTypes [ " EndTag " ]
CommentToken = tokenTypes [ " Comment " ]
DoctypeToken = tokenTypes [ " Doctype " ]
ParseErrorToken = tokenTypes [ " ParseError " ]
for token in self . tokenizer :
prev_token = None
new_token = token
while new_token is not None :
prev_token = new_token
currentNode = self . tree . openElements [ - 1 ] if self . tree . openElements else None
currentNodeNamespace = currentNode . namespace if currentNode else None
currentNodeName = currentNode . name if currentNode else None
type = new_token [ " type " ]
if type == ParseErrorToken :
self . parseError ( new_token [ " data " ] , new_token . get ( " datavars " , { } ) )
new_token = None
else :
if ( len ( self . tree . openElements ) == 0 or
currentNodeNamespace == self . tree . defaultNamespace or
( self . isMathMLTextIntegrationPoint ( currentNode ) and
( ( type == StartTagToken and
token [ " name " ] not in frozenset ( [ " mglyph " , " malignmark " ] ) ) or
type in ( CharactersToken , SpaceCharactersToken ) ) ) or
( currentNodeNamespace == namespaces [ " mathml " ] and
currentNodeName == " annotation-xml " and
type == StartTagToken and
token [ " name " ] == " svg " ) or
( self . isHTMLIntegrationPoint ( currentNode ) and
type in ( StartTagToken , CharactersToken , SpaceCharactersToken ) ) ) :
phase = self . phase
else :
phase = self . phases [ " inForeignContent " ]
if type == CharactersToken :
new_token = phase . processCharacters ( new_token )
elif type == SpaceCharactersToken :
new_token = phase . processSpaceCharacters ( new_token )
elif type == StartTagToken :
new_token = phase . processStartTag ( new_token )
elif type == EndTagToken :
new_token = phase . processEndTag ( new_token )
elif type == CommentToken :
new_token = phase . processComment ( new_token )
elif type == DoctypeToken :
new_token = phase . processDoctype ( new_token )
if ( type == StartTagToken and prev_token [ " selfClosing " ] and
not prev_token [ " selfClosingAcknowledged " ] ) :
self . parseError ( " non-void-element-with-trailing-solidus " ,
{ " name " : prev_token [ " name " ] } )
# When the loop finishes it's EOF
reprocess = True
phases = [ ]
while reprocess :
phases . append ( self . phase )
reprocess = self . phase . processEOF ( )
if reprocess :
assert self . phase not in phases
def parse ( self , stream , * args , * * kwargs ) :
""" Parse a HTML document into a well-formed tree
: arg stream : a file - like object or string containing the HTML to be parsed
The optional encoding parameter must be a string that indicates
the encoding . If specified , that encoding will be used ,
regardless of any BOM or later declaration ( such as in a meta
element ) .
: arg scripting : treat noscript elements as if JavaScript was turned on
: returns : parsed tree
Example :
>> > from html5lib . html5parser import HTMLParser
>> > parser = HTMLParser ( )
>> > parser . parse ( ' <html><body><p>This is a doc</p></body></html> ' )
< Element u ' { http://www.w3.org/1999/xhtml}html ' at 0x7feac4909db0 >
"""
self . _parse ( stream , False , None , * args , * * kwargs )
return self . tree . getDocument ( )
def parseFragment ( self , stream , * args , * * kwargs ) :
""" Parse a HTML fragment into a well-formed tree fragment
: arg container : name of the element we ' re setting the innerHTML
property if set to None , default to ' div '
: arg stream : a file - like object or string containing the HTML to be parsed
The optional encoding parameter must be a string that indicates
the encoding . If specified , that encoding will be used ,
regardless of any BOM or later declaration ( such as in a meta
element )
: arg scripting : treat noscript elements as if JavaScript was turned on
: returns : parsed tree
Example :
>> > from html5lib . html5libparser import HTMLParser
>> > parser = HTMLParser ( )
>> > parser . parseFragment ( ' <b>this is a fragment</b> ' )
< Element u ' DOCUMENT_FRAGMENT ' at 0x7feac484b090 >
"""
self . _parse ( stream , True , * args , * * kwargs )
return self . tree . getFragment ( )
def parseError ( self , errorcode = " XXX-undefined-error " , datavars = None ) :
# XXX The idea is to make errorcode mandatory.
if datavars is None :
datavars = { }
self . errors . append ( ( self . tokenizer . stream . position ( ) , errorcode , datavars ) )
if self . strict :
raise ParseError ( E [ errorcode ] % datavars )
def adjustMathMLAttributes ( self , token ) :
adjust_attributes ( token , adjustMathMLAttributes )
def adjustSVGAttributes ( self , token ) :
adjust_attributes ( token , adjustSVGAttributes )
def adjustForeignAttributes ( self , token ) :
adjust_attributes ( token , adjustForeignAttributesMap )
def reparseTokenNormal ( self , token ) :
# pylint:disable=unused-argument
self . parser . phase ( )
def resetInsertionMode ( self ) :
# The name of this method is mostly historical. (It's also used in the
# specification.)
last = False
newModes = {
" select " : " inSelect " ,
" td " : " inCell " ,
" th " : " inCell " ,
" tr " : " inRow " ,
" tbody " : " inTableBody " ,
" thead " : " inTableBody " ,
" tfoot " : " inTableBody " ,
" caption " : " inCaption " ,
" colgroup " : " inColumnGroup " ,
" table " : " inTable " ,
" head " : " inBody " ,
" body " : " inBody " ,
" frameset " : " inFrameset " ,
" html " : " beforeHead "
}
for node in self . tree . openElements [ : : - 1 ] :
nodeName = node . name
new_phase = None
if node == self . tree . openElements [ 0 ] :
assert self . innerHTML
last = True
nodeName = self . innerHTML
# Check for conditions that should only happen in the innerHTML
# case
if nodeName in ( " select " , " colgroup " , " head " , " html " ) :
assert self . innerHTML
if not last and node . namespace != self . tree . defaultNamespace :
continue
if nodeName in newModes :
new_phase = self . phases [ newModes [ nodeName ] ]
break
elif last :
new_phase = self . phases [ " inBody " ]
break
self . phase = new_phase
def parseRCDataRawtext ( self , token , contentType ) :
# Generic RCDATA/RAWTEXT Parsing algorithm
assert contentType in ( " RAWTEXT " , " RCDATA " )
self . tree . insertElement ( token )
if contentType == " RAWTEXT " :
self . tokenizer . state = self . tokenizer . rawtextState
else :
self . tokenizer . state = self . tokenizer . rcdataState
self . originalPhase = self . phase
self . phase = self . phases [ " text " ]
@_utils.memoize
def getPhases ( debug ) :
def log ( function ) :
""" Logger that records which phase processes each token """
type_names = { value : key for key , value in tokenTypes . items ( ) }
def wrapped ( self , * args , * * kwargs ) :
if function . __name__ . startswith ( " process " ) and len ( args ) > 0 :
token = args [ 0 ]
info = { " type " : type_names [ token [ ' type ' ] ] }
if token [ ' type ' ] in tagTokenTypes :
info [ " name " ] = token [ ' name ' ]
self . parser . log . append ( ( self . parser . tokenizer . state . __name__ ,
self . parser . phase . __class__ . __name__ ,
self . __class__ . __name__ ,
function . __name__ ,
info ) )
return function ( self , * args , * * kwargs )
else :
return function ( self , * args , * * kwargs )
return wrapped
def getMetaclass ( use_metaclass , metaclass_func ) :
if use_metaclass :
return method_decorator_metaclass ( metaclass_func )
else :
return type
# pylint:disable=unused-argument
class Phase ( with_metaclass ( getMetaclass ( debug , log ) ) ) :
""" Base class for helper object that implements each phase of processing
"""
__slots__ = ( " parser " , " tree " , " __startTagCache " , " __endTagCache " )
def __init__ ( self , parser , tree ) :
self . parser = parser
self . tree = tree
self . __startTagCache = { }
self . __endTagCache = { }
def processEOF ( self ) :
raise NotImplementedError
def processComment ( self , token ) :
# For most phases the following is correct. Where it's not it will be
# overridden.
self . tree . insertComment ( token , self . tree . openElements [ - 1 ] )
def processDoctype ( self , token ) :
self . parser . parseError ( " unexpected-doctype " )
def processCharacters ( self , token ) :
self . tree . insertText ( token [ " data " ] )
def processSpaceCharacters ( self , token ) :
self . tree . insertText ( token [ " data " ] )
def processStartTag ( self , token ) :
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
name = token [ " name " ]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
if name in self . __startTagCache :
func = self . __startTagCache [ name ]
else :
func = self . __startTagCache [ name ] = self . startTagHandler [ name ]
# bound the cache size in case we get loads of unknown tags
while len ( self . __startTagCache ) > len ( self . startTagHandler ) * 1.1 :
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
self . __startTagCache . pop ( next ( iter ( self . __startTagCache ) ) )
return func ( token )
def startTagHtml ( self , token ) :
if not self . parser . firstStartTag and token [ " name " ] == " html " :
self . parser . parseError ( " non-html-root " )
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke self.parser.parseError().
for attr , value in token [ " data " ] . items ( ) :
if attr not in self . tree . openElements [ 0 ] . attributes :
self . tree . openElements [ 0 ] . attributes [ attr ] = value
self . parser . firstStartTag = False
def processEndTag ( self , token ) :
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
name = token [ " name " ]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
if name in self . __endTagCache :
func = self . __endTagCache [ name ]
else :
func = self . __endTagCache [ name ] = self . endTagHandler [ name ]
# bound the cache size in case we get loads of unknown tags
while len ( self . __endTagCache ) > len ( self . endTagHandler ) * 1.1 :
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
self . __endTagCache . pop ( next ( iter ( self . __endTagCache ) ) )
return func ( token )
class InitialPhase ( Phase ) :
__slots__ = tuple ( )
def processSpaceCharacters ( self , token ) :
pass
def processComment ( self , token ) :
self . tree . insertComment ( token , self . tree . document )
def processDoctype ( self , token ) :
name = token [ " name " ]
publicId = token [ " publicId " ]
systemId = token [ " systemId " ]
correct = token [ " correct " ]
if ( name != " html " or publicId is not None or
systemId is not None and systemId != " about:legacy-compat " ) :
self . parser . parseError ( " unknown-doctype " )
if publicId is None :
publicId = " "
self . tree . insertDoctype ( token )
if publicId != " " :
publicId = publicId . translate ( asciiUpper2Lower )
if ( not correct or token [ " name " ] != " html " or
publicId . startswith (
( " +//silmaril//dtd html pro v0r11 19970101// " ,
" -//advasoft ltd//dtd html 3.0 aswedit + extensions// " ,
" -//as//dtd html 3.0 aswedit + extensions// " ,
" -//ietf//dtd html 2.0 level 1// " ,
" -//ietf//dtd html 2.0 level 2// " ,
" -//ietf//dtd html 2.0 strict level 1// " ,
" -//ietf//dtd html 2.0 strict level 2// " ,
" -//ietf//dtd html 2.0 strict// " ,
" -//ietf//dtd html 2.0// " ,
" -//ietf//dtd html 2.1e// " ,
" -//ietf//dtd html 3.0// " ,
" -//ietf//dtd html 3.2 final// " ,
" -//ietf//dtd html 3.2// " ,
" -//ietf//dtd html 3// " ,
" -//ietf//dtd html level 0// " ,
" -//ietf//dtd html level 1// " ,
" -//ietf//dtd html level 2// " ,
" -//ietf//dtd html level 3// " ,
" -//ietf//dtd html strict level 0// " ,
" -//ietf//dtd html strict level 1// " ,
" -//ietf//dtd html strict level 2// " ,
" -//ietf//dtd html strict level 3// " ,
" -//ietf//dtd html strict// " ,
" -//ietf//dtd html// " ,
" -//metrius//dtd metrius presentational// " ,
" -//microsoft//dtd internet explorer 2.0 html strict// " ,
" -//microsoft//dtd internet explorer 2.0 html// " ,
" -//microsoft//dtd internet explorer 2.0 tables// " ,
" -//microsoft//dtd internet explorer 3.0 html strict// " ,
" -//microsoft//dtd internet explorer 3.0 html// " ,
" -//microsoft//dtd internet explorer 3.0 tables// " ,
" -//netscape comm. corp.//dtd html// " ,
" -//netscape comm. corp.//dtd strict html// " ,
" -//o ' reilly and associates//dtd html 2.0// " ,
" -//o ' reilly and associates//dtd html extended 1.0// " ,
" -//o ' reilly and associates//dtd html extended relaxed 1.0// " ,
" -//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0// " ,
" -//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// " ,
" -//spyglass//dtd html 2.0 extended// " ,
" -//sq//dtd html 2.0 hotmetal + extensions// " ,
" -//sun microsystems corp.//dtd hotjava html// " ,
" -//sun microsystems corp.//dtd hotjava strict html// " ,
" -//w3c//dtd html 3 1995-03-24// " ,
" -//w3c//dtd html 3.2 draft// " ,
" -//w3c//dtd html 3.2 final// " ,
" -//w3c//dtd html 3.2// " ,
" -//w3c//dtd html 3.2s draft// " ,
" -//w3c//dtd html 4.0 frameset// " ,
" -//w3c//dtd html 4.0 transitional// " ,
" -//w3c//dtd html experimental 19960712// " ,
" -//w3c//dtd html experimental 970421// " ,
" -//w3c//dtd w3 html// " ,
" -//w3o//dtd w3 html 3.0// " ,
" -//webtechs//dtd mozilla html 2.0// " ,
" -//webtechs//dtd mozilla html// " ) ) or
publicId in ( " -//w3o//dtd w3 html strict 3.0//en// " ,
" -/w3c/dtd html 4.0 transitional/en " ,
" html " ) or
publicId . startswith (
( " -//w3c//dtd html 4.01 frameset// " ,
" -//w3c//dtd html 4.01 transitional// " ) ) and
systemId is None or
systemId and systemId . lower ( ) == " http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd " ) :
self . parser . compatMode = " quirks "
elif ( publicId . startswith (
( " -//w3c//dtd xhtml 1.0 frameset// " ,
" -//w3c//dtd xhtml 1.0 transitional// " ) ) or
publicId . startswith (
( " -//w3c//dtd html 4.01 frameset// " ,
" -//w3c//dtd html 4.01 transitional// " ) ) and
systemId is not None ) :
self . parser . compatMode = " limited quirks "
self . parser . phase = self . parser . phases [ " beforeHtml " ]
def anythingElse ( self ) :
self . parser . compatMode = " quirks "
self . parser . phase = self . parser . phases [ " beforeHtml " ]
def processCharacters ( self , token ) :
self . parser . parseError ( " expected-doctype-but-got-chars " )
self . anythingElse ( )
return token
def processStartTag ( self , token ) :
self . parser . parseError ( " expected-doctype-but-got-start-tag " ,
{ " name " : token [ " name " ] } )
self . anythingElse ( )
return token
def processEndTag ( self , token ) :
self . parser . parseError ( " expected-doctype-but-got-end-tag " ,
{ " name " : token [ " name " ] } )
self . anythingElse ( )
return token
def processEOF ( self ) :
self . parser . parseError ( " expected-doctype-but-got-eof " )
self . anythingElse ( )
return True
class BeforeHtmlPhase ( Phase ) :
__slots__ = tuple ( )
# helper methods
def insertHtmlElement ( self ) :
self . tree . insertRoot ( impliedTagToken ( " html " , " StartTag " ) )
self . parser . phase = self . parser . phases [ " beforeHead " ]
# other
def processEOF ( self ) :
self . insertHtmlElement ( )
return True
def processComment ( self , token ) :
self . tree . insertComment ( token , self . tree . document )
def processSpaceCharacters ( self , token ) :
pass
def processCharacters ( self , token ) :
self . insertHtmlElement ( )
return token
def processStartTag ( self , token ) :
if token [ " name " ] == " html " :
self . parser . firstStartTag = True
self . insertHtmlElement ( )
return token
def processEndTag ( self , token ) :
if token [ " name " ] not in ( " head " , " body " , " html " , " br " ) :
self . parser . parseError ( " unexpected-end-tag-before-html " ,
{ " name " : token [ " name " ] } )
else :
self . insertHtmlElement ( )
return token
class BeforeHeadPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
self . startTagHead ( impliedTagToken ( " head " , " StartTag " ) )
return True
def processSpaceCharacters ( self , token ) :
pass
def processCharacters ( self , token ) :
self . startTagHead ( impliedTagToken ( " head " , " StartTag " ) )
return token
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagHead ( self , token ) :
self . tree . insertElement ( token )
self . tree . headPointer = self . tree . openElements [ - 1 ]
self . parser . phase = self . parser . phases [ " inHead " ]
def startTagOther ( self , token ) :
self . startTagHead ( impliedTagToken ( " head " , " StartTag " ) )
return token
def endTagImplyHead ( self , token ) :
self . startTagHead ( impliedTagToken ( " head " , " StartTag " ) )
return token
def endTagOther ( self , token ) :
self . parser . parseError ( " end-tag-after-implied-root " ,
{ " name " : token [ " name " ] } )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml ) ,
( " head " , startTagHead )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( ( " head " , " body " , " html " , " br " ) , endTagImplyHead )
] )
endTagHandler . default = endTagOther
class InHeadPhase ( Phase ) :
__slots__ = tuple ( )
# the real thing
def processEOF ( self ) :
self . anythingElse ( )
return True
def processCharacters ( self , token ) :
self . anythingElse ( )
return token
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagHead ( self , token ) :
self . parser . parseError ( " two-heads-are-not-better-than-one " )
def startTagBaseLinkCommand ( self , token ) :
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def startTagMeta ( self , token ) :
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
attributes = token [ " data " ]
if self . parser . tokenizer . stream . charEncoding [ 1 ] == " tentative " :
if " charset " in attributes :
self . parser . tokenizer . stream . changeEncoding ( attributes [ " charset " ] )
elif ( " content " in attributes and
" http-equiv " in attributes and
attributes [ " http-equiv " ] . lower ( ) == " content-type " ) :
# Encoding it as UTF-8 here is a hack, as really we should pass
# the abstract Unicode string, and just use the
# ContentAttrParser on that, but using UTF-8 allows all chars
# to be encoded and as a ASCII-superset works.
data = _inputstream . EncodingBytes ( attributes [ " content " ] . encode ( " utf-8 " ) )
parser = _inputstream . ContentAttrParser ( data )
codec = parser . parse ( )
self . parser . tokenizer . stream . changeEncoding ( codec )
def startTagTitle ( self , token ) :
self . parser . parseRCDataRawtext ( token , " RCDATA " )
def startTagNoFramesStyle ( self , token ) :
# Need to decide whether to implement the scripting-disabled case
self . parser . parseRCDataRawtext ( token , " RAWTEXT " )
def startTagNoscript ( self , token ) :
if self . parser . scripting :
self . parser . parseRCDataRawtext ( token , " RAWTEXT " )
else :
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inHeadNoscript " ]
def startTagScript ( self , token ) :
self . tree . insertElement ( token )
self . parser . tokenizer . state = self . parser . tokenizer . scriptDataState
self . parser . originalPhase = self . parser . phase
self . parser . phase = self . parser . phases [ " text " ]
def startTagOther ( self , token ) :
self . anythingElse ( )
return token
def endTagHead ( self , token ) :
node = self . parser . tree . openElements . pop ( )
assert node . name == " head " , " Expected head got %s " % node . name
self . parser . phase = self . parser . phases [ " afterHead " ]
def endTagHtmlBodyBr ( self , token ) :
self . anythingElse ( )
return token
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def anythingElse ( self ) :
self . endTagHead ( impliedTagToken ( " head " ) )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml ) ,
( " title " , startTagTitle ) ,
( ( " noframes " , " style " ) , startTagNoFramesStyle ) ,
( " noscript " , startTagNoscript ) ,
( " script " , startTagScript ) ,
( ( " base " , " basefont " , " bgsound " , " command " , " link " ) ,
startTagBaseLinkCommand ) ,
( " meta " , startTagMeta ) ,
( " head " , startTagHead )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " head " , endTagHead ) ,
( ( " br " , " html " , " body " ) , endTagHtmlBodyBr )
] )
endTagHandler . default = endTagOther
class InHeadNoscriptPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
self . parser . parseError ( " eof-in-head-noscript " )
self . anythingElse ( )
return True
def processComment ( self , token ) :
return self . parser . phases [ " inHead " ] . processComment ( token )
def processCharacters ( self , token ) :
self . parser . parseError ( " char-in-head-noscript " )
self . anythingElse ( )
return token
def processSpaceCharacters ( self , token ) :
return self . parser . phases [ " inHead " ] . processSpaceCharacters ( token )
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagBaseLinkCommand ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagHeadNoscript ( self , token ) :
self . parser . parseError ( " unexpected-start-tag " , { " name " : token [ " name " ] } )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-inhead-noscript-tag " , { " name " : token [ " name " ] } )
self . anythingElse ( )
return token
def endTagNoscript ( self , token ) :
node = self . parser . tree . openElements . pop ( )
assert node . name == " noscript " , " Expected noscript got %s " % node . name
self . parser . phase = self . parser . phases [ " inHead " ]
def endTagBr ( self , token ) :
self . parser . parseError ( " unexpected-inhead-noscript-tag " , { " name " : token [ " name " ] } )
self . anythingElse ( )
return token
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def anythingElse ( self ) :
# Caller must raise parse error first!
self . endTagNoscript ( impliedTagToken ( " noscript " ) )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml ) ,
( ( " basefont " , " bgsound " , " link " , " meta " , " noframes " , " style " ) , startTagBaseLinkCommand ) ,
( ( " head " , " noscript " ) , startTagHeadNoscript ) ,
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " noscript " , endTagNoscript ) ,
( " br " , endTagBr ) ,
] )
endTagHandler . default = endTagOther
class AfterHeadPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
self . anythingElse ( )
return True
def processCharacters ( self , token ) :
self . anythingElse ( )
return token
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagBody ( self , token ) :
self . parser . framesetOK = False
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inBody " ]
def startTagFrameset ( self , token ) :
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inFrameset " ]
def startTagFromHead ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-out-of-my-head " ,
{ " name " : token [ " name " ] } )
self . tree . openElements . append ( self . tree . headPointer )
self . parser . phases [ " inHead " ] . processStartTag ( token )
for node in self . tree . openElements [ : : - 1 ] :
if node . name == " head " :
self . tree . openElements . remove ( node )
break
def startTagHead ( self , token ) :
self . parser . parseError ( " unexpected-start-tag " , { " name " : token [ " name " ] } )
def startTagOther ( self , token ) :
self . anythingElse ( )
return token
def endTagHtmlBodyBr ( self , token ) :
self . anythingElse ( )
return token
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def anythingElse ( self ) :
self . tree . insertElement ( impliedTagToken ( " body " , " StartTag " ) )
self . parser . phase = self . parser . phases [ " inBody " ]
self . parser . framesetOK = True
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml ) ,
( " body " , startTagBody ) ,
( " frameset " , startTagFrameset ) ,
( ( " base " , " basefont " , " bgsound " , " link " , " meta " , " noframes " , " script " ,
" style " , " title " ) ,
startTagFromHead ) ,
( " head " , startTagHead )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [ ( ( " body " , " html " , " br " ) ,
endTagHtmlBodyBr ) ] )
endTagHandler . default = endTagOther
class InBodyPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
# the really-really-really-very crazy mode
__slots__ = ( " processSpaceCharacters " , )
def __init__ ( self , * args , * * kwargs ) :
super ( InBodyPhase , self ) . __init__ ( * args , * * kwargs )
# Set this to the default handler
self . processSpaceCharacters = self . processSpaceCharactersNonPre
def isMatchingFormattingElement ( self , node1 , node2 ) :
return ( node1 . name == node2 . name and
node1 . namespace == node2 . namespace and
node1 . attributes == node2 . attributes )
# helper
def addFormattingElement ( self , token ) :
self . tree . insertElement ( token )
element = self . tree . openElements [ - 1 ]
matchingElements = [ ]
for node in self . tree . activeFormattingElements [ : : - 1 ] :
if node is Marker :
break
elif self . isMatchingFormattingElement ( node , element ) :
matchingElements . append ( node )
assert len ( matchingElements ) < = 3
if len ( matchingElements ) == 3 :
self . tree . activeFormattingElements . remove ( matchingElements [ - 1 ] )
self . tree . activeFormattingElements . append ( element )
# the real deal
def processEOF ( self ) :
allowed_elements = frozenset ( ( " dd " , " dt " , " li " , " p " , " tbody " , " td " ,
" tfoot " , " th " , " thead " , " tr " , " body " ,
" html " ) )
for node in self . tree . openElements [ : : - 1 ] :
if node . name not in allowed_elements :
self . parser . parseError ( " expected-closing-tag-but-got-eof " )
break
# Stop parsing
def processSpaceCharactersDropNewline ( self , token ) :
# Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
# want to drop leading newlines
data = token [ " data " ]
self . processSpaceCharacters = self . processSpaceCharactersNonPre
if ( data . startswith ( " \n " ) and
self . tree . openElements [ - 1 ] . name in ( " pre " , " listing " , " textarea " ) and
not self . tree . openElements [ - 1 ] . hasContent ( ) ) :
data = data [ 1 : ]
if data :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertText ( data )
def processCharacters ( self , token ) :
if token [ " data " ] == " \u0000 " :
# The tokenizer should always emit null on its own
return
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertText ( token [ " data " ] )
# This must be bad for performance
if ( self . parser . framesetOK and
any ( [ char not in spaceCharacters
for char in token [ " data " ] ] ) ) :
self . parser . framesetOK = False
def processSpaceCharactersNonPre ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertText ( token [ " data " ] )
def startTagProcessInHead ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagBody ( self , token ) :
self . parser . parseError ( " unexpected-start-tag " , { " name " : " body " } )
if ( len ( self . tree . openElements ) == 1 or
self . tree . openElements [ 1 ] . name != " body " ) :
assert self . parser . innerHTML
else :
self . parser . framesetOK = False
for attr , value in token [ " data " ] . items ( ) :
if attr not in self . tree . openElements [ 1 ] . attributes :
self . tree . openElements [ 1 ] . attributes [ attr ] = value
def startTagFrameset ( self , token ) :
self . parser . parseError ( " unexpected-start-tag " , { " name " : " frameset " } )
if ( len ( self . tree . openElements ) == 1 or self . tree . openElements [ 1 ] . name != " body " ) :
assert self . parser . innerHTML
elif not self . parser . framesetOK :
pass
else :
if self . tree . openElements [ 1 ] . parent :
self . tree . openElements [ 1 ] . parent . removeChild ( self . tree . openElements [ 1 ] )
while self . tree . openElements [ - 1 ] . name != " html " :
self . tree . openElements . pop ( )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inFrameset " ]
def startTagCloseP ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
def startTagPreListing ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
self . parser . framesetOK = False
self . processSpaceCharacters = self . processSpaceCharactersDropNewline
def startTagForm ( self , token ) :
if self . tree . formPointer :
self . parser . parseError ( " unexpected-start-tag " , { " name " : " form " } )
else :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
self . tree . formPointer = self . tree . openElements [ - 1 ]
def startTagListItem ( self , token ) :
self . parser . framesetOK = False
stopNamesMap = { " li " : [ " li " ] ,
" dt " : [ " dt " , " dd " ] ,
" dd " : [ " dt " , " dd " ] }
stopNames = stopNamesMap [ token [ " name " ] ]
for node in reversed ( self . tree . openElements ) :
if node . name in stopNames :
self . parser . phase . processEndTag (
impliedTagToken ( node . name , " EndTag " ) )
break
if ( node . nameTuple in specialElements and
node . name not in ( " address " , " div " , " p " ) ) :
break
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . parser . phase . processEndTag (
impliedTagToken ( " p " , " EndTag " ) )
self . tree . insertElement ( token )
def startTagPlaintext ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
self . parser . tokenizer . state = self . parser . tokenizer . plaintextState
def startTagHeading ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
if self . tree . openElements [ - 1 ] . name in headingElements :
self . parser . parseError ( " unexpected-start-tag " , { " name " : token [ " name " ] } )
self . tree . openElements . pop ( )
self . tree . insertElement ( token )
def startTagA ( self , token ) :
afeAElement = self . tree . elementInActiveFormattingElements ( " a " )
if afeAElement :
self . parser . parseError ( " unexpected-start-tag-implies-end-tag " ,
{ " startName " : " a " , " endName " : " a " } )
self . endTagFormatting ( impliedTagToken ( " a " ) )
if afeAElement in self . tree . openElements :
self . tree . openElements . remove ( afeAElement )
if afeAElement in self . tree . activeFormattingElements :
self . tree . activeFormattingElements . remove ( afeAElement )
self . tree . reconstructActiveFormattingElements ( )
self . addFormattingElement ( token )
def startTagFormatting ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . addFormattingElement ( token )
def startTagNobr ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
if self . tree . elementInScope ( " nobr " ) :
self . parser . parseError ( " unexpected-start-tag-implies-end-tag " ,
{ " startName " : " nobr " , " endName " : " nobr " } )
self . processEndTag ( impliedTagToken ( " nobr " ) )
# XXX Need tests that trigger the following
self . tree . reconstructActiveFormattingElements ( )
self . addFormattingElement ( token )
def startTagButton ( self , token ) :
if self . tree . elementInScope ( " button " ) :
self . parser . parseError ( " unexpected-start-tag-implies-end-tag " ,
{ " startName " : " button " , " endName " : " button " } )
self . processEndTag ( impliedTagToken ( " button " ) )
return token
else :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( token )
self . parser . framesetOK = False
def startTagAppletMarqueeObject ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( token )
self . tree . activeFormattingElements . append ( Marker )
self . parser . framesetOK = False
def startTagXmp ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . reconstructActiveFormattingElements ( )
self . parser . framesetOK = False
self . parser . parseRCDataRawtext ( token , " RAWTEXT " )
def startTagTable ( self , token ) :
if self . parser . compatMode != " quirks " :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . processEndTag ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
self . parser . framesetOK = False
self . parser . phase = self . parser . phases [ " inTable " ]
def startTagVoidFormatting ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
self . parser . framesetOK = False
def startTagInput ( self , token ) :
framesetOK = self . parser . framesetOK
self . startTagVoidFormatting ( token )
if ( " type " in token [ " data " ] and
token [ " data " ] [ " type " ] . translate ( asciiUpper2Lower ) == " hidden " ) :
# input type=hidden doesn't change framesetOK
self . parser . framesetOK = framesetOK
def startTagParamSource ( self , token ) :
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def startTagHr ( self , token ) :
if self . tree . elementInScope ( " p " , variant = " button " ) :
self . endTagP ( impliedTagToken ( " p " ) )
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
self . parser . framesetOK = False
def startTagImage ( self , token ) :
# No really...
self . parser . parseError ( " unexpected-start-tag-treated-as " ,
{ " originalName " : " image " , " newName " : " img " } )
self . processStartTag ( impliedTagToken ( " img " , " StartTag " ,
attributes = token [ " data " ] ,
selfClosing = token [ " selfClosing " ] ) )
def startTagIsIndex ( self , token ) :
self . parser . parseError ( " deprecated-tag " , { " name " : " isindex " } )
if self . tree . formPointer :
return
form_attrs = { }
if " action " in token [ " data " ] :
form_attrs [ " action " ] = token [ " data " ] [ " action " ]
self . processStartTag ( impliedTagToken ( " form " , " StartTag " ,
attributes = form_attrs ) )
self . processStartTag ( impliedTagToken ( " hr " , " StartTag " ) )
self . processStartTag ( impliedTagToken ( " label " , " StartTag " ) )
# XXX Localization ...
if " prompt " in token [ " data " ] :
prompt = token [ " data " ] [ " prompt " ]
else :
prompt = " This is a searchable index. Enter search keywords: "
self . processCharacters (
{ " type " : tokenTypes [ " Characters " ] , " data " : prompt } )
attributes = token [ " data " ] . copy ( )
if " action " in attributes :
del attributes [ " action " ]
if " prompt " in attributes :
del attributes [ " prompt " ]
attributes [ " name " ] = " isindex "
self . processStartTag ( impliedTagToken ( " input " , " StartTag " ,
attributes = attributes ,
selfClosing = token [ " selfClosing " ] ) )
self . processEndTag ( impliedTagToken ( " label " ) )
self . processStartTag ( impliedTagToken ( " hr " , " StartTag " ) )
self . processEndTag ( impliedTagToken ( " form " ) )
def startTagTextarea ( self , token ) :
self . tree . insertElement ( token )
self . parser . tokenizer . state = self . parser . tokenizer . rcdataState
self . processSpaceCharacters = self . processSpaceCharactersDropNewline
self . parser . framesetOK = False
def startTagIFrame ( self , token ) :
self . parser . framesetOK = False
self . startTagRawtext ( token )
def startTagNoscript ( self , token ) :
if self . parser . scripting :
self . startTagRawtext ( token )
else :
self . startTagOther ( token )
def startTagRawtext ( self , token ) :
""" iframe, noembed noframes, noscript(if scripting enabled) """
self . parser . parseRCDataRawtext ( token , " RAWTEXT " )
def startTagOpt ( self , token ) :
if self . tree . openElements [ - 1 ] . name == " option " :
self . parser . phase . processEndTag ( impliedTagToken ( " option " ) )
self . tree . reconstructActiveFormattingElements ( )
self . parser . tree . insertElement ( token )
def startTagSelect ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( token )
self . parser . framesetOK = False
if self . parser . phase in ( self . parser . phases [ " inTable " ] ,
self . parser . phases [ " inCaption " ] ,
self . parser . phases [ " inColumnGroup " ] ,
self . parser . phases [ " inTableBody " ] ,
self . parser . phases [ " inRow " ] ,
self . parser . phases [ " inCell " ] ) :
self . parser . phase = self . parser . phases [ " inSelectInTable " ]
else :
self . parser . phase = self . parser . phases [ " inSelect " ]
def startTagRpRt ( self , token ) :
if self . tree . elementInScope ( " ruby " ) :
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] . name != " ruby " :
self . parser . parseError ( )
self . tree . insertElement ( token )
def startTagMath ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . parser . adjustMathMLAttributes ( token )
self . parser . adjustForeignAttributes ( token )
token [ " namespace " ] = namespaces [ " mathml " ]
self . tree . insertElement ( token )
# Need to get the parse error right for the case where the token
# has a namespace not equal to the xmlns attribute
if token [ " selfClosing " ] :
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def startTagSvg ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . parser . adjustSVGAttributes ( token )
self . parser . adjustForeignAttributes ( token )
token [ " namespace " ] = namespaces [ " svg " ]
self . tree . insertElement ( token )
# Need to get the parse error right for the case where the token
# has a namespace not equal to the xmlns attribute
if token [ " selfClosing " ] :
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def startTagMisplaced ( self , token ) :
""" Elements that should be children of other elements that have a
different insertion mode ; here they are ignored
" caption " , " col " , " colgroup " , " frame " , " frameset " , " head " ,
" option " , " optgroup " , " tbody " , " td " , " tfoot " , " th " , " thead " ,
" tr " , " noscript "
"""
self . parser . parseError ( " unexpected-start-tag-ignored " , { " name " : token [ " name " ] } )
def startTagOther ( self , token ) :
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( token )
def endTagP ( self , token ) :
if not self . tree . elementInScope ( " p " , variant = " button " ) :
self . startTagCloseP ( impliedTagToken ( " p " , " StartTag " ) )
self . parser . parseError ( " unexpected-end-tag " , { " name " : " p " } )
self . endTagP ( impliedTagToken ( " p " , " EndTag " ) )
else :
self . tree . generateImpliedEndTags ( " p " )
if self . tree . openElements [ - 1 ] . name != " p " :
self . parser . parseError ( " unexpected-end-tag " , { " name " : " p " } )
node = self . tree . openElements . pop ( )
while node . name != " p " :
node = self . tree . openElements . pop ( )
def endTagBody ( self , token ) :
if not self . tree . elementInScope ( " body " ) :
self . parser . parseError ( )
return
elif self . tree . openElements [ - 1 ] . name != " body " :
for node in self . tree . openElements [ 2 : ] :
if node . name not in frozenset ( ( " dd " , " dt " , " li " , " optgroup " ,
" option " , " p " , " rp " , " rt " ,
" tbody " , " td " , " tfoot " ,
" th " , " thead " , " tr " , " body " ,
" html " ) ) :
# Not sure this is the correct name for the parse error
self . parser . parseError (
" expected-one-end-tag-but-got-another " ,
{ " gotName " : " body " , " expectedName " : node . name } )
break
self . parser . phase = self . parser . phases [ " afterBody " ]
def endTagHtml ( self , token ) :
# We repeat the test for the body end tag token being ignored here
if self . tree . elementInScope ( " body " ) :
self . endTagBody ( impliedTagToken ( " body " ) )
return token
def endTagBlock ( self , token ) :
# Put us back in the right whitespace handling mode
if token [ " name " ] == " pre " :
self . processSpaceCharacters = self . processSpaceCharactersNonPre
inScope = self . tree . elementInScope ( token [ " name " ] )
if inScope :
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError ( " end-tag-too-early " , { " name " : token [ " name " ] } )
if inScope :
node = self . tree . openElements . pop ( )
while node . name != token [ " name " ] :
node = self . tree . openElements . pop ( )
def endTagForm ( self , token ) :
node = self . tree . formPointer
self . tree . formPointer = None
if node is None or not self . tree . elementInScope ( node ) :
self . parser . parseError ( " unexpected-end-tag " ,
{ " name " : " form " } )
else :
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] != node :
self . parser . parseError ( " end-tag-too-early-ignored " ,
{ " name " : " form " } )
self . tree . openElements . remove ( node )
def endTagListItem ( self , token ) :
if token [ " name " ] == " li " :
variant = " list "
else :
variant = None
if not self . tree . elementInScope ( token [ " name " ] , variant = variant ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
else :
self . tree . generateImpliedEndTags ( exclude = token [ " name " ] )
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError (
" end-tag-too-early " ,
{ " name " : token [ " name " ] } )
node = self . tree . openElements . pop ( )
while node . name != token [ " name " ] :
node = self . tree . openElements . pop ( )
def endTagHeading ( self , token ) :
for item in headingElements :
if self . tree . elementInScope ( item ) :
self . tree . generateImpliedEndTags ( )
break
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError ( " end-tag-too-early " , { " name " : token [ " name " ] } )
for item in headingElements :
if self . tree . elementInScope ( item ) :
item = self . tree . openElements . pop ( )
while item . name not in headingElements :
item = self . tree . openElements . pop ( )
break
def endTagFormatting ( self , token ) :
""" The much-feared adoption agency algorithm """
# http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
# XXX Better parseError messages appreciated.
# Step 1
outerLoopCounter = 0
# Step 2
while outerLoopCounter < 8 :
# Step 3
outerLoopCounter + = 1
# Step 4:
# Let the formatting element be the last element in
# the list of active formatting elements that:
# - is between the end of the list and the last scope
# marker in the list, if any, or the start of the list
# otherwise, and
# - has the same tag name as the token.
formattingElement = self . tree . elementInActiveFormattingElements (
token [ " name " ] )
if ( not formattingElement or
( formattingElement in self . tree . openElements and
not self . tree . elementInScope ( formattingElement . name ) ) ) :
# If there is no such node, then abort these steps
# and instead act as described in the "any other
# end tag" entry below.
self . endTagOther ( token )
return
# Otherwise, if there is such a node, but that node is
# not in the stack of open elements, then this is a
# parse error; remove the element from the list, and
# abort these steps.
elif formattingElement not in self . tree . openElements :
self . parser . parseError ( " adoption-agency-1.2 " , { " name " : token [ " name " ] } )
self . tree . activeFormattingElements . remove ( formattingElement )
return
# Otherwise, if there is such a node, and that node is
# also in the stack of open elements, but the element
# is not in scope, then this is a parse error; ignore
# the token, and abort these steps.
elif not self . tree . elementInScope ( formattingElement . name ) :
self . parser . parseError ( " adoption-agency-4.4 " , { " name " : token [ " name " ] } )
return
# Otherwise, there is a formatting element and that
# element is in the stack and is in scope. If the
# element is not the current node, this is a parse
# error. In any case, proceed with the algorithm as
# written in the following steps.
else :
if formattingElement != self . tree . openElements [ - 1 ] :
self . parser . parseError ( " adoption-agency-1.3 " , { " name " : token [ " name " ] } )
# Step 5:
# Let the furthest block be the topmost node in the
# stack of open elements that is lower in the stack
# than the formatting element, and is an element in
# the special category. There might not be one.
afeIndex = self . tree . openElements . index ( formattingElement )
furthestBlock = None
for element in self . tree . openElements [ afeIndex : ] :
if element . nameTuple in specialElements :
furthestBlock = element
break
# Step 6:
# If there is no furthest block, then the UA must
# first pop all the nodes from the bottom of the stack
# of open elements, from the current node up to and
# including the formatting element, then remove the
# formatting element from the list of active
# formatting elements, and finally abort these steps.
if furthestBlock is None :
element = self . tree . openElements . pop ( )
while element != formattingElement :
element = self . tree . openElements . pop ( )
self . tree . activeFormattingElements . remove ( element )
return
# Step 7
commonAncestor = self . tree . openElements [ afeIndex - 1 ]
# Step 8:
# The bookmark is supposed to help us identify where to reinsert
# nodes in step 15. We have to ensure that we reinsert nodes after
# the node before the active formatting element. Note the bookmark
# can move in step 9.7
bookmark = self . tree . activeFormattingElements . index ( formattingElement )
# Step 9
lastNode = node = furthestBlock
innerLoopCounter = 0
index = self . tree . openElements . index ( node )
while innerLoopCounter < 3 :
innerLoopCounter + = 1
# Node is element before node in open elements
index - = 1
node = self . tree . openElements [ index ]
if node not in self . tree . activeFormattingElements :
self . tree . openElements . remove ( node )
continue
# Step 9.6
if node == formattingElement :
break
# Step 9.7
if lastNode == furthestBlock :
bookmark = self . tree . activeFormattingElements . index ( node ) + 1
# Step 9.8
clone = node . cloneNode ( )
# Replace node with clone
self . tree . activeFormattingElements [
self . tree . activeFormattingElements . index ( node ) ] = clone
self . tree . openElements [
self . tree . openElements . index ( node ) ] = clone
node = clone
# Step 9.9
# Remove lastNode from its parents, if any
if lastNode . parent :
lastNode . parent . removeChild ( lastNode )
node . appendChild ( lastNode )
# Step 9.10
lastNode = node
# Step 10
# Foster parent lastNode if commonAncestor is a
# table, tbody, tfoot, thead, or tr we need to foster
# parent the lastNode
if lastNode . parent :
lastNode . parent . removeChild ( lastNode )
if commonAncestor . name in frozenset ( ( " table " , " tbody " , " tfoot " , " thead " , " tr " ) ) :
parent , insertBefore = self . tree . getTableMisnestedNodePosition ( )
parent . insertBefore ( lastNode , insertBefore )
else :
commonAncestor . appendChild ( lastNode )
# Step 11
clone = formattingElement . cloneNode ( )
# Step 12
furthestBlock . reparentChildren ( clone )
# Step 13
furthestBlock . appendChild ( clone )
# Step 14
self . tree . activeFormattingElements . remove ( formattingElement )
self . tree . activeFormattingElements . insert ( bookmark , clone )
# Step 15
self . tree . openElements . remove ( formattingElement )
self . tree . openElements . insert (
self . tree . openElements . index ( furthestBlock ) + 1 , clone )
def endTagAppletMarqueeObject ( self , token ) :
if self . tree . elementInScope ( token [ " name " ] ) :
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError ( " end-tag-too-early " , { " name " : token [ " name " ] } )
if self . tree . elementInScope ( token [ " name " ] ) :
element = self . tree . openElements . pop ( )
while element . name != token [ " name " ] :
element = self . tree . openElements . pop ( )
self . tree . clearActiveFormattingElements ( )
def endTagBr ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-treated-as " ,
{ " originalName " : " br " , " newName " : " br element " } )
self . tree . reconstructActiveFormattingElements ( )
self . tree . insertElement ( impliedTagToken ( " br " , " StartTag " ) )
self . tree . openElements . pop ( )
def endTagOther ( self , token ) :
for node in self . tree . openElements [ : : - 1 ] :
if node . name == token [ " name " ] :
self . tree . generateImpliedEndTags ( exclude = token [ " name " ] )
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
while self . tree . openElements . pop ( ) != node :
pass
break
else :
if node . nameTuple in specialElements :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
break
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( ( " base " , " basefont " , " bgsound " , " command " , " link " , " meta " ,
" script " , " style " , " title " ) ,
startTagProcessInHead ) ,
( " body " , startTagBody ) ,
( " frameset " , startTagFrameset ) ,
( ( " address " , " article " , " aside " , " blockquote " , " center " , " details " ,
" dir " , " div " , " dl " , " fieldset " , " figcaption " , " figure " ,
" footer " , " header " , " hgroup " , " main " , " menu " , " nav " , " ol " , " p " ,
" section " , " summary " , " ul " ) ,
startTagCloseP ) ,
( headingElements , startTagHeading ) ,
( ( " pre " , " listing " ) , startTagPreListing ) ,
( " form " , startTagForm ) ,
( ( " li " , " dd " , " dt " ) , startTagListItem ) ,
( " plaintext " , startTagPlaintext ) ,
( " a " , startTagA ) ,
( ( " b " , " big " , " code " , " em " , " font " , " i " , " s " , " small " , " strike " ,
" strong " , " tt " , " u " ) , startTagFormatting ) ,
( " nobr " , startTagNobr ) ,
( " button " , startTagButton ) ,
( ( " applet " , " marquee " , " object " ) , startTagAppletMarqueeObject ) ,
( " xmp " , startTagXmp ) ,
( " table " , startTagTable ) ,
( ( " area " , " br " , " embed " , " img " , " keygen " , " wbr " ) ,
startTagVoidFormatting ) ,
( ( " param " , " source " , " track " ) , startTagParamSource ) ,
( " input " , startTagInput ) ,
( " hr " , startTagHr ) ,
( " image " , startTagImage ) ,
( " isindex " , startTagIsIndex ) ,
( " textarea " , startTagTextarea ) ,
( " iframe " , startTagIFrame ) ,
( " noscript " , startTagNoscript ) ,
( ( " noembed " , " noframes " ) , startTagRawtext ) ,
( " select " , startTagSelect ) ,
( ( " rp " , " rt " ) , startTagRpRt ) ,
( ( " option " , " optgroup " ) , startTagOpt ) ,
( ( " math " ) , startTagMath ) ,
( ( " svg " ) , startTagSvg ) ,
( ( " caption " , " col " , " colgroup " , " frame " , " head " ,
" tbody " , " td " , " tfoot " , " th " , " thead " ,
" tr " ) , startTagMisplaced )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " body " , endTagBody ) ,
( " html " , endTagHtml ) ,
( ( " address " , " article " , " aside " , " blockquote " , " button " , " center " ,
" details " , " dialog " , " dir " , " div " , " dl " , " fieldset " , " figcaption " , " figure " ,
" footer " , " header " , " hgroup " , " listing " , " main " , " menu " , " nav " , " ol " , " pre " ,
" section " , " summary " , " ul " ) , endTagBlock ) ,
( " form " , endTagForm ) ,
( " p " , endTagP ) ,
( ( " dd " , " dt " , " li " ) , endTagListItem ) ,
( headingElements , endTagHeading ) ,
( ( " a " , " b " , " big " , " code " , " em " , " font " , " i " , " nobr " , " s " , " small " ,
" strike " , " strong " , " tt " , " u " ) , endTagFormatting ) ,
( ( " applet " , " marquee " , " object " ) , endTagAppletMarqueeObject ) ,
( " br " , endTagBr ) ,
] )
endTagHandler . default = endTagOther
class TextPhase ( Phase ) :
__slots__ = tuple ( )
def processCharacters ( self , token ) :
self . tree . insertText ( token [ " data " ] )
def processEOF ( self ) :
self . parser . parseError ( " expected-named-closing-tag-but-got-eof " ,
{ " name " : self . tree . openElements [ - 1 ] . name } )
self . tree . openElements . pop ( )
self . parser . phase = self . parser . originalPhase
return True
def startTagOther ( self , token ) :
assert False , " Tried to process start tag %s in RCDATA/RAWTEXT mode " % token [ ' name ' ]
def endTagScript ( self , token ) :
node = self . tree . openElements . pop ( )
assert node . name == " script "
self . parser . phase = self . parser . originalPhase
# The rest of this method is all stuff that only happens if
# document.write works
def endTagOther ( self , token ) :
self . tree . openElements . pop ( )
self . parser . phase = self . parser . originalPhase
startTagHandler = _utils . MethodDispatcher ( [ ] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " script " , endTagScript ) ] )
endTagHandler . default = endTagOther
class InTablePhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
__slots__ = tuple ( )
# helper methods
def clearStackToTableContext ( self ) :
# "clear the stack back to a table context"
while self . tree . openElements [ - 1 ] . name not in ( " table " , " html " ) :
# self.parser.parseError("unexpected-implied-end-tag-in-table",
# {"name": self.tree.openElements[-1].name})
self . tree . openElements . pop ( )
# When the current node is <html> it's an innerHTML case
# processing methods
def processEOF ( self ) :
if self . tree . openElements [ - 1 ] . name != " html " :
self . parser . parseError ( " eof-in-table " )
else :
assert self . parser . innerHTML
# Stop parsing
def processSpaceCharacters ( self , token ) :
originalPhase = self . parser . phase
self . parser . phase = self . parser . phases [ " inTableText " ]
self . parser . phase . originalPhase = originalPhase
self . parser . phase . processSpaceCharacters ( token )
def processCharacters ( self , token ) :
originalPhase = self . parser . phase
self . parser . phase = self . parser . phases [ " inTableText " ]
self . parser . phase . originalPhase = originalPhase
self . parser . phase . processCharacters ( token )
def insertText ( self , token ) :
# If we get here there must be at least one non-whitespace character
# Do the table magic!
self . tree . insertFromTable = True
self . parser . phases [ " inBody " ] . processCharacters ( token )
self . tree . insertFromTable = False
def startTagCaption ( self , token ) :
self . clearStackToTableContext ( )
self . tree . activeFormattingElements . append ( Marker )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inCaption " ]
def startTagColgroup ( self , token ) :
self . clearStackToTableContext ( )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inColumnGroup " ]
def startTagCol ( self , token ) :
self . startTagColgroup ( impliedTagToken ( " colgroup " , " StartTag " ) )
return token
def startTagRowGroup ( self , token ) :
self . clearStackToTableContext ( )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inTableBody " ]
def startTagImplyTbody ( self , token ) :
self . startTagRowGroup ( impliedTagToken ( " tbody " , " StartTag " ) )
return token
def startTagTable ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-implies-end-tag " ,
{ " startName " : " table " , " endName " : " table " } )
self . parser . phase . processEndTag ( impliedTagToken ( " table " ) )
if not self . parser . innerHTML :
return token
def startTagStyleScript ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagInput ( self , token ) :
if ( " type " in token [ " data " ] and
token [ " data " ] [ " type " ] . translate ( asciiUpper2Lower ) == " hidden " ) :
self . parser . parseError ( " unexpected-hidden-input-in-table " )
self . tree . insertElement ( token )
# XXX associate with form
self . tree . openElements . pop ( )
else :
self . startTagOther ( token )
def startTagForm ( self , token ) :
self . parser . parseError ( " unexpected-form-in-table " )
if self . tree . formPointer is None :
self . tree . insertElement ( token )
self . tree . formPointer = self . tree . openElements [ - 1 ]
self . tree . openElements . pop ( )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-implies-table-voodoo " , { " name " : token [ " name " ] } )
# Do the table magic!
self . tree . insertFromTable = True
self . parser . phases [ " inBody " ] . processStartTag ( token )
self . tree . insertFromTable = False
def endTagTable ( self , token ) :
if self . tree . elementInScope ( " table " , variant = " table " ) :
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] . name != " table " :
self . parser . parseError ( " end-tag-too-early-named " ,
{ " gotName " : " table " ,
" expectedName " : self . tree . openElements [ - 1 ] . name } )
while self . tree . openElements [ - 1 ] . name != " table " :
self . tree . openElements . pop ( )
self . tree . openElements . pop ( )
self . parser . resetInsertionMode ( )
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def endTagIgnore ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-implies-table-voodoo " , { " name " : token [ " name " ] } )
# Do the table magic!
self . tree . insertFromTable = True
self . parser . phases [ " inBody " ] . processEndTag ( token )
self . tree . insertFromTable = False
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " caption " , startTagCaption ) ,
( " colgroup " , startTagColgroup ) ,
( " col " , startTagCol ) ,
( ( " tbody " , " tfoot " , " thead " ) , startTagRowGroup ) ,
( ( " td " , " th " , " tr " ) , startTagImplyTbody ) ,
( " table " , startTagTable ) ,
( ( " style " , " script " ) , startTagStyleScript ) ,
( " input " , startTagInput ) ,
( " form " , startTagForm )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " table " , endTagTable ) ,
( ( " body " , " caption " , " col " , " colgroup " , " html " , " tbody " , " td " ,
" tfoot " , " th " , " thead " , " tr " ) , endTagIgnore )
] )
endTagHandler . default = endTagOther
class InTableTextPhase ( Phase ) :
__slots__ = ( " originalPhase " , " characterTokens " )
def __init__ ( self , * args , * * kwargs ) :
super ( InTableTextPhase , self ) . __init__ ( * args , * * kwargs )
self . originalPhase = None
self . characterTokens = [ ]
def flushCharacters ( self ) :
data = " " . join ( [ item [ " data " ] for item in self . characterTokens ] )
if any ( [ item not in spaceCharacters for item in data ] ) :
token = { " type " : tokenTypes [ " Characters " ] , " data " : data }
self . parser . phases [ " inTable " ] . insertText ( token )
elif data :
self . tree . insertText ( data )
self . characterTokens = [ ]
def processComment ( self , token ) :
self . flushCharacters ( )
self . parser . phase = self . originalPhase
return token
def processEOF ( self ) :
self . flushCharacters ( )
self . parser . phase = self . originalPhase
return True
def processCharacters ( self , token ) :
if token [ " data " ] == " \u0000 " :
return
self . characterTokens . append ( token )
def processSpaceCharacters ( self , token ) :
# pretty sure we should never reach here
self . characterTokens . append ( token )
# assert False
def processStartTag ( self , token ) :
self . flushCharacters ( )
self . parser . phase = self . originalPhase
return token
def processEndTag ( self , token ) :
self . flushCharacters ( )
self . parser . phase = self . originalPhase
return token
class InCaptionPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
__slots__ = tuple ( )
def ignoreEndTagCaption ( self ) :
return not self . tree . elementInScope ( " caption " , variant = " table " )
def processEOF ( self ) :
self . parser . phases [ " inBody " ] . processEOF ( )
def processCharacters ( self , token ) :
return self . parser . phases [ " inBody " ] . processCharacters ( token )
def startTagTableElement ( self , token ) :
self . parser . parseError ( )
# XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = self . ignoreEndTagCaption ( )
self . parser . phase . processEndTag ( impliedTagToken ( " caption " ) )
if not ignoreEndTag :
return token
def startTagOther ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def endTagCaption ( self , token ) :
if not self . ignoreEndTagCaption ( ) :
# AT this code is quite similar to endTagTable in "InTable"
self . tree . generateImpliedEndTags ( )
if self . tree . openElements [ - 1 ] . name != " caption " :
self . parser . parseError ( " expected-one-end-tag-but-got-another " ,
{ " gotName " : " caption " ,
" expectedName " : self . tree . openElements [ - 1 ] . name } )
while self . tree . openElements [ - 1 ] . name != " caption " :
self . tree . openElements . pop ( )
self . tree . openElements . pop ( )
self . tree . clearActiveFormattingElements ( )
self . parser . phase = self . parser . phases [ " inTable " ]
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def endTagTable ( self , token ) :
self . parser . parseError ( )
ignoreEndTag = self . ignoreEndTagCaption ( )
self . parser . phase . processEndTag ( impliedTagToken ( " caption " ) )
if not ignoreEndTag :
return token
def endTagIgnore ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def endTagOther ( self , token ) :
return self . parser . phases [ " inBody " ] . processEndTag ( token )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( ( " caption " , " col " , " colgroup " , " tbody " , " td " , " tfoot " , " th " ,
" thead " , " tr " ) , startTagTableElement )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " caption " , endTagCaption ) ,
( " table " , endTagTable ) ,
( ( " body " , " col " , " colgroup " , " html " , " tbody " , " td " , " tfoot " , " th " ,
" thead " , " tr " ) , endTagIgnore )
] )
endTagHandler . default = endTagOther
class InColumnGroupPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
__slots__ = tuple ( )
def ignoreEndTagColgroup ( self ) :
return self . tree . openElements [ - 1 ] . name == " html "
def processEOF ( self ) :
if self . tree . openElements [ - 1 ] . name == " html " :
assert self . parser . innerHTML
return
else :
ignoreEndTag = self . ignoreEndTagColgroup ( )
self . endTagColgroup ( impliedTagToken ( " colgroup " ) )
if not ignoreEndTag :
return True
def processCharacters ( self , token ) :
ignoreEndTag = self . ignoreEndTagColgroup ( )
self . endTagColgroup ( impliedTagToken ( " colgroup " ) )
if not ignoreEndTag :
return token
def startTagCol ( self , token ) :
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def startTagOther ( self , token ) :
ignoreEndTag = self . ignoreEndTagColgroup ( )
self . endTagColgroup ( impliedTagToken ( " colgroup " ) )
if not ignoreEndTag :
return token
def endTagColgroup ( self , token ) :
if self . ignoreEndTagColgroup ( ) :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
else :
self . tree . openElements . pop ( )
self . parser . phase = self . parser . phases [ " inTable " ]
def endTagCol ( self , token ) :
self . parser . parseError ( " no-end-tag " , { " name " : " col " } )
def endTagOther ( self , token ) :
ignoreEndTag = self . ignoreEndTagColgroup ( )
self . endTagColgroup ( impliedTagToken ( " colgroup " ) )
if not ignoreEndTag :
return token
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " col " , startTagCol )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " colgroup " , endTagColgroup ) ,
( " col " , endTagCol )
] )
endTagHandler . default = endTagOther
class InTableBodyPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
__slots__ = tuple ( )
# helper methods
def clearStackToTableBodyContext ( self ) :
while self . tree . openElements [ - 1 ] . name not in ( " tbody " , " tfoot " ,
" thead " , " html " ) :
# self.parser.parseError("unexpected-implied-end-tag-in-table",
# {"name": self.tree.openElements[-1].name})
self . tree . openElements . pop ( )
if self . tree . openElements [ - 1 ] . name == " html " :
assert self . parser . innerHTML
# the rest
def processEOF ( self ) :
self . parser . phases [ " inTable " ] . processEOF ( )
def processSpaceCharacters ( self , token ) :
return self . parser . phases [ " inTable " ] . processSpaceCharacters ( token )
def processCharacters ( self , token ) :
return self . parser . phases [ " inTable " ] . processCharacters ( token )
def startTagTr ( self , token ) :
self . clearStackToTableBodyContext ( )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inRow " ]
def startTagTableCell ( self , token ) :
self . parser . parseError ( " unexpected-cell-in-table-body " ,
{ " name " : token [ " name " ] } )
self . startTagTr ( impliedTagToken ( " tr " , " StartTag " ) )
return token
def startTagTableOther ( self , token ) :
# XXX AT Any ideas on how to share this with endTagTable?
if ( self . tree . elementInScope ( " tbody " , variant = " table " ) or
self . tree . elementInScope ( " thead " , variant = " table " ) or
self . tree . elementInScope ( " tfoot " , variant = " table " ) ) :
self . clearStackToTableBodyContext ( )
self . endTagTableRowGroup (
impliedTagToken ( self . tree . openElements [ - 1 ] . name ) )
return token
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def startTagOther ( self , token ) :
return self . parser . phases [ " inTable " ] . processStartTag ( token )
def endTagTableRowGroup ( self , token ) :
if self . tree . elementInScope ( token [ " name " ] , variant = " table " ) :
self . clearStackToTableBodyContext ( )
self . tree . openElements . pop ( )
self . parser . phase = self . parser . phases [ " inTable " ]
else :
self . parser . parseError ( " unexpected-end-tag-in-table-body " ,
{ " name " : token [ " name " ] } )
def endTagTable ( self , token ) :
if ( self . tree . elementInScope ( " tbody " , variant = " table " ) or
self . tree . elementInScope ( " thead " , variant = " table " ) or
self . tree . elementInScope ( " tfoot " , variant = " table " ) ) :
self . clearStackToTableBodyContext ( )
self . endTagTableRowGroup (
impliedTagToken ( self . tree . openElements [ - 1 ] . name ) )
return token
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def endTagIgnore ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-in-table-body " ,
{ " name " : token [ " name " ] } )
def endTagOther ( self , token ) :
return self . parser . phases [ " inTable " ] . processEndTag ( token )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " tr " , startTagTr ) ,
( ( " td " , " th " ) , startTagTableCell ) ,
( ( " caption " , " col " , " colgroup " , " tbody " , " tfoot " , " thead " ) ,
startTagTableOther )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( ( " tbody " , " tfoot " , " thead " ) , endTagTableRowGroup ) ,
( " table " , endTagTable ) ,
( ( " body " , " caption " , " col " , " colgroup " , " html " , " td " , " th " ,
" tr " ) , endTagIgnore )
] )
endTagHandler . default = endTagOther
class InRowPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
__slots__ = tuple ( )
# helper methods (XXX unify this with other table helper methods)
def clearStackToTableRowContext ( self ) :
while self . tree . openElements [ - 1 ] . name not in ( " tr " , " html " ) :
self . parser . parseError ( " unexpected-implied-end-tag-in-table-row " ,
{ " name " : self . tree . openElements [ - 1 ] . name } )
self . tree . openElements . pop ( )
def ignoreEndTagTr ( self ) :
return not self . tree . elementInScope ( " tr " , variant = " table " )
# the rest
def processEOF ( self ) :
self . parser . phases [ " inTable " ] . processEOF ( )
def processSpaceCharacters ( self , token ) :
return self . parser . phases [ " inTable " ] . processSpaceCharacters ( token )
def processCharacters ( self , token ) :
return self . parser . phases [ " inTable " ] . processCharacters ( token )
def startTagTableCell ( self , token ) :
self . clearStackToTableRowContext ( )
self . tree . insertElement ( token )
self . parser . phase = self . parser . phases [ " inCell " ]
self . tree . activeFormattingElements . append ( Marker )
def startTagTableOther ( self , token ) :
ignoreEndTag = self . ignoreEndTagTr ( )
self . endTagTr ( impliedTagToken ( " tr " ) )
# XXX how are we sure it's always ignored in the innerHTML case?
if not ignoreEndTag :
return token
def startTagOther ( self , token ) :
return self . parser . phases [ " inTable " ] . processStartTag ( token )
def endTagTr ( self , token ) :
if not self . ignoreEndTagTr ( ) :
self . clearStackToTableRowContext ( )
self . tree . openElements . pop ( )
self . parser . phase = self . parser . phases [ " inTableBody " ]
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def endTagTable ( self , token ) :
ignoreEndTag = self . ignoreEndTagTr ( )
self . endTagTr ( impliedTagToken ( " tr " ) )
# Reprocess the current tag if the tr end tag was not ignored
# XXX how are we sure it's always ignored in the innerHTML case?
if not ignoreEndTag :
return token
def endTagTableRowGroup ( self , token ) :
if self . tree . elementInScope ( token [ " name " ] , variant = " table " ) :
self . endTagTr ( impliedTagToken ( " tr " ) )
return token
else :
self . parser . parseError ( )
def endTagIgnore ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-in-table-row " ,
{ " name " : token [ " name " ] } )
def endTagOther ( self , token ) :
return self . parser . phases [ " inTable " ] . processEndTag ( token )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( ( " td " , " th " ) , startTagTableCell ) ,
( ( " caption " , " col " , " colgroup " , " tbody " , " tfoot " , " thead " ,
" tr " ) , startTagTableOther )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " tr " , endTagTr ) ,
( " table " , endTagTable ) ,
( ( " tbody " , " tfoot " , " thead " ) , endTagTableRowGroup ) ,
( ( " body " , " caption " , " col " , " colgroup " , " html " , " td " , " th " ) ,
endTagIgnore )
] )
endTagHandler . default = endTagOther
class InCellPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
__slots__ = tuple ( )
# helper
def closeCell ( self ) :
if self . tree . elementInScope ( " td " , variant = " table " ) :
self . endTagTableCell ( impliedTagToken ( " td " ) )
elif self . tree . elementInScope ( " th " , variant = " table " ) :
self . endTagTableCell ( impliedTagToken ( " th " ) )
# the rest
def processEOF ( self ) :
self . parser . phases [ " inBody " ] . processEOF ( )
def processCharacters ( self , token ) :
return self . parser . phases [ " inBody " ] . processCharacters ( token )
def startTagTableOther ( self , token ) :
if ( self . tree . elementInScope ( " td " , variant = " table " ) or
self . tree . elementInScope ( " th " , variant = " table " ) ) :
self . closeCell ( )
return token
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def startTagOther ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def endTagTableCell ( self , token ) :
if self . tree . elementInScope ( token [ " name " ] , variant = " table " ) :
self . tree . generateImpliedEndTags ( token [ " name " ] )
if self . tree . openElements [ - 1 ] . name != token [ " name " ] :
self . parser . parseError ( " unexpected-cell-end-tag " ,
{ " name " : token [ " name " ] } )
while True :
node = self . tree . openElements . pop ( )
if node . name == token [ " name " ] :
break
else :
self . tree . openElements . pop ( )
self . tree . clearActiveFormattingElements ( )
self . parser . phase = self . parser . phases [ " inRow " ]
else :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def endTagIgnore ( self , token ) :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
def endTagImply ( self , token ) :
if self . tree . elementInScope ( token [ " name " ] , variant = " table " ) :
self . closeCell ( )
return token
else :
# sometimes innerHTML case
self . parser . parseError ( )
def endTagOther ( self , token ) :
return self . parser . phases [ " inBody " ] . processEndTag ( token )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( ( " caption " , " col " , " colgroup " , " tbody " , " td " , " tfoot " , " th " ,
" thead " , " tr " ) , startTagTableOther )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( ( " td " , " th " ) , endTagTableCell ) ,
( ( " body " , " caption " , " col " , " colgroup " , " html " ) , endTagIgnore ) ,
( ( " table " , " tbody " , " tfoot " , " thead " , " tr " ) , endTagImply )
] )
endTagHandler . default = endTagOther
class InSelectPhase ( Phase ) :
__slots__ = tuple ( )
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
def processEOF ( self ) :
if self . tree . openElements [ - 1 ] . name != " html " :
self . parser . parseError ( " eof-in-select " )
else :
assert self . parser . innerHTML
def processCharacters ( self , token ) :
if token [ " data " ] == " \u0000 " :
return
self . tree . insertText ( token [ " data " ] )
def startTagOption ( self , token ) :
# We need to imply </option> if <option> is the current node.
if self . tree . openElements [ - 1 ] . name == " option " :
self . tree . openElements . pop ( )
self . tree . insertElement ( token )
def startTagOptgroup ( self , token ) :
if self . tree . openElements [ - 1 ] . name == " option " :
self . tree . openElements . pop ( )
if self . tree . openElements [ - 1 ] . name == " optgroup " :
self . tree . openElements . pop ( )
self . tree . insertElement ( token )
def startTagSelect ( self , token ) :
self . parser . parseError ( " unexpected-select-in-select " )
self . endTagSelect ( impliedTagToken ( " select " ) )
def startTagInput ( self , token ) :
self . parser . parseError ( " unexpected-input-in-select " )
if self . tree . elementInScope ( " select " , variant = " select " ) :
self . endTagSelect ( impliedTagToken ( " select " ) )
return token
else :
assert self . parser . innerHTML
def startTagScript ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-in-select " ,
{ " name " : token [ " name " ] } )
def endTagOption ( self , token ) :
if self . tree . openElements [ - 1 ] . name == " option " :
self . tree . openElements . pop ( )
else :
self . parser . parseError ( " unexpected-end-tag-in-select " ,
{ " name " : " option " } )
def endTagOptgroup ( self , token ) :
# </optgroup> implicitly closes <option>
if ( self . tree . openElements [ - 1 ] . name == " option " and
self . tree . openElements [ - 2 ] . name == " optgroup " ) :
self . tree . openElements . pop ( )
# It also closes </optgroup>
if self . tree . openElements [ - 1 ] . name == " optgroup " :
self . tree . openElements . pop ( )
# But nothing else
else :
self . parser . parseError ( " unexpected-end-tag-in-select " ,
{ " name " : " optgroup " } )
def endTagSelect ( self , token ) :
if self . tree . elementInScope ( " select " , variant = " select " ) :
node = self . tree . openElements . pop ( )
while node . name != " select " :
node = self . tree . openElements . pop ( )
self . parser . resetInsertionMode ( )
else :
# innerHTML case
assert self . parser . innerHTML
self . parser . parseError ( )
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-in-select " ,
{ " name " : token [ " name " ] } )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " option " , startTagOption ) ,
( " optgroup " , startTagOptgroup ) ,
( " select " , startTagSelect ) ,
( ( " input " , " keygen " , " textarea " ) , startTagInput ) ,
( " script " , startTagScript )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " option " , endTagOption ) ,
( " optgroup " , endTagOptgroup ) ,
( " select " , endTagSelect )
] )
endTagHandler . default = endTagOther
class InSelectInTablePhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
self . parser . phases [ " inSelect " ] . processEOF ( )
def processCharacters ( self , token ) :
return self . parser . phases [ " inSelect " ] . processCharacters ( token )
def startTagTable ( self , token ) :
self . parser . parseError ( " unexpected-table-element-start-tag-in-select-in-table " , { " name " : token [ " name " ] } )
self . endTagOther ( impliedTagToken ( " select " ) )
return token
def startTagOther ( self , token ) :
return self . parser . phases [ " inSelect " ] . processStartTag ( token )
def endTagTable ( self , token ) :
self . parser . parseError ( " unexpected-table-element-end-tag-in-select-in-table " , { " name " : token [ " name " ] } )
if self . tree . elementInScope ( token [ " name " ] , variant = " table " ) :
self . endTagOther ( impliedTagToken ( " select " ) )
return token
def endTagOther ( self , token ) :
return self . parser . phases [ " inSelect " ] . processEndTag ( token )
startTagHandler = _utils . MethodDispatcher ( [
( ( " caption " , " table " , " tbody " , " tfoot " , " thead " , " tr " , " td " , " th " ) ,
startTagTable )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( ( " caption " , " table " , " tbody " , " tfoot " , " thead " , " tr " , " td " , " th " ) ,
endTagTable )
] )
endTagHandler . default = endTagOther
class InForeignContentPhase ( Phase ) :
__slots__ = tuple ( )
breakoutElements = frozenset ( [ " b " , " big " , " blockquote " , " body " , " br " ,
" center " , " code " , " dd " , " div " , " dl " , " dt " ,
" em " , " embed " , " h1 " , " h2 " , " h3 " ,
" h4 " , " h5 " , " h6 " , " head " , " hr " , " i " , " img " ,
" li " , " listing " , " menu " , " meta " , " nobr " ,
" ol " , " p " , " pre " , " ruby " , " s " , " small " ,
" span " , " strong " , " strike " , " sub " , " sup " ,
" table " , " tt " , " u " , " ul " , " var " ] )
def adjustSVGTagNames ( self , token ) :
replacements = { " altglyph " : " altGlyph " ,
" altglyphdef " : " altGlyphDef " ,
" altglyphitem " : " altGlyphItem " ,
" animatecolor " : " animateColor " ,
" animatemotion " : " animateMotion " ,
" animatetransform " : " animateTransform " ,
" clippath " : " clipPath " ,
" feblend " : " feBlend " ,
" fecolormatrix " : " feColorMatrix " ,
" fecomponenttransfer " : " feComponentTransfer " ,
" fecomposite " : " feComposite " ,
" feconvolvematrix " : " feConvolveMatrix " ,
" fediffuselighting " : " feDiffuseLighting " ,
" fedisplacementmap " : " feDisplacementMap " ,
" fedistantlight " : " feDistantLight " ,
" feflood " : " feFlood " ,
" fefunca " : " feFuncA " ,
" fefuncb " : " feFuncB " ,
" fefuncg " : " feFuncG " ,
" fefuncr " : " feFuncR " ,
" fegaussianblur " : " feGaussianBlur " ,
" feimage " : " feImage " ,
" femerge " : " feMerge " ,
" femergenode " : " feMergeNode " ,
" femorphology " : " feMorphology " ,
" feoffset " : " feOffset " ,
" fepointlight " : " fePointLight " ,
" fespecularlighting " : " feSpecularLighting " ,
" fespotlight " : " feSpotLight " ,
" fetile " : " feTile " ,
" feturbulence " : " feTurbulence " ,
" foreignobject " : " foreignObject " ,
" glyphref " : " glyphRef " ,
" lineargradient " : " linearGradient " ,
" radialgradient " : " radialGradient " ,
" textpath " : " textPath " }
if token [ " name " ] in replacements :
token [ " name " ] = replacements [ token [ " name " ] ]
def processCharacters ( self , token ) :
if token [ " data " ] == " \u0000 " :
token [ " data " ] = " \uFFFD "
elif ( self . parser . framesetOK and
any ( char not in spaceCharacters for char in token [ " data " ] ) ) :
self . parser . framesetOK = False
Phase . processCharacters ( self , token )
def processStartTag ( self , token ) :
currentNode = self . tree . openElements [ - 1 ]
if ( token [ " name " ] in self . breakoutElements or
( token [ " name " ] == " font " and
set ( token [ " data " ] . keys ( ) ) & { " color " , " face " , " size " } ) ) :
self . parser . parseError ( " unexpected-html-element-in-foreign-content " ,
{ " name " : token [ " name " ] } )
while ( self . tree . openElements [ - 1 ] . namespace !=
self . tree . defaultNamespace and
not self . parser . isHTMLIntegrationPoint ( self . tree . openElements [ - 1 ] ) and
not self . parser . isMathMLTextIntegrationPoint ( self . tree . openElements [ - 1 ] ) ) :
self . tree . openElements . pop ( )
return token
else :
if currentNode . namespace == namespaces [ " mathml " ] :
self . parser . adjustMathMLAttributes ( token )
elif currentNode . namespace == namespaces [ " svg " ] :
self . adjustSVGTagNames ( token )
self . parser . adjustSVGAttributes ( token )
self . parser . adjustForeignAttributes ( token )
token [ " namespace " ] = currentNode . namespace
self . tree . insertElement ( token )
if token [ " selfClosing " ] :
self . tree . openElements . pop ( )
token [ " selfClosingAcknowledged " ] = True
def processEndTag ( self , token ) :
nodeIndex = len ( self . tree . openElements ) - 1
node = self . tree . openElements [ - 1 ]
if node . name . translate ( asciiUpper2Lower ) != token [ " name " ] :
self . parser . parseError ( " unexpected-end-tag " , { " name " : token [ " name " ] } )
while True :
if node . name . translate ( asciiUpper2Lower ) == token [ " name " ] :
# XXX this isn't in the spec but it seems necessary
if self . parser . phase == self . parser . phases [ " inTableText " ] :
self . parser . phase . flushCharacters ( )
self . parser . phase = self . parser . phase . originalPhase
while self . tree . openElements . pop ( ) != node :
assert self . tree . openElements
new_token = None
break
nodeIndex - = 1
node = self . tree . openElements [ nodeIndex ]
if node . namespace != self . tree . defaultNamespace :
continue
else :
new_token = self . parser . phase . processEndTag ( token )
break
return new_token
class AfterBodyPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
# Stop parsing
pass
def processComment ( self , token ) :
# This is needed because data is to be appended to the <html> element
# here and not to whatever is currently open.
self . tree . insertComment ( token , self . tree . openElements [ 0 ] )
def processCharacters ( self , token ) :
self . parser . parseError ( " unexpected-char-after-body " )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-after-body " ,
{ " name " : token [ " name " ] } )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
def endTagHtml ( self , name ) :
if self . parser . innerHTML :
self . parser . parseError ( " unexpected-end-tag-after-body-innerhtml " )
else :
self . parser . phase = self . parser . phases [ " afterAfterBody " ]
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-after-body " ,
{ " name " : token [ " name " ] } )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [ ( " html " , endTagHtml ) ] )
endTagHandler . default = endTagOther
class InFramesetPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
__slots__ = tuple ( )
def processEOF ( self ) :
if self . tree . openElements [ - 1 ] . name != " html " :
self . parser . parseError ( " eof-in-frameset " )
else :
assert self . parser . innerHTML
def processCharacters ( self , token ) :
self . parser . parseError ( " unexpected-char-in-frameset " )
def startTagFrameset ( self , token ) :
self . tree . insertElement ( token )
def startTagFrame ( self , token ) :
self . tree . insertElement ( token )
self . tree . openElements . pop ( )
def startTagNoframes ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-in-frameset " ,
{ " name " : token [ " name " ] } )
def endTagFrameset ( self , token ) :
if self . tree . openElements [ - 1 ] . name == " html " :
# innerHTML case
self . parser . parseError ( " unexpected-frameset-in-frameset-innerhtml " )
else :
self . tree . openElements . pop ( )
if ( not self . parser . innerHTML and
self . tree . openElements [ - 1 ] . name != " frameset " ) :
# If we're not in innerHTML mode and the current node is not a
# "frameset" element (anymore) then switch.
self . parser . phase = self . parser . phases [ " afterFrameset " ]
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-in-frameset " ,
{ " name " : token [ " name " ] } )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " frameset " , startTagFrameset ) ,
( " frame " , startTagFrame ) ,
( " noframes " , startTagNoframes )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " frameset " , endTagFrameset )
] )
endTagHandler . default = endTagOther
class AfterFramesetPhase ( Phase ) :
# http://www.whatwg.org/specs/web-apps/current-work/#after3
__slots__ = tuple ( )
def processEOF ( self ) :
# Stop parsing
pass
def processCharacters ( self , token ) :
self . parser . parseError ( " unexpected-char-after-frameset " )
def startTagNoframes ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " unexpected-start-tag-after-frameset " ,
{ " name " : token [ " name " ] } )
def endTagHtml ( self , token ) :
self . parser . phase = self . parser . phases [ " afterAfterFrameset " ]
def endTagOther ( self , token ) :
self . parser . parseError ( " unexpected-end-tag-after-frameset " ,
{ " name " : token [ " name " ] } )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , Phase . startTagHtml ) ,
( " noframes " , startTagNoframes )
] )
startTagHandler . default = startTagOther
endTagHandler = _utils . MethodDispatcher ( [
( " html " , endTagHtml )
] )
endTagHandler . default = endTagOther
class AfterAfterBodyPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
pass
def processComment ( self , token ) :
self . tree . insertComment ( token , self . tree . document )
def processSpaceCharacters ( self , token ) :
return self . parser . phases [ " inBody " ] . processSpaceCharacters ( token )
def processCharacters ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-char " )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-start-tag " ,
{ " name " : token [ " name " ] } )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
def processEndTag ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-end-tag " ,
{ " name " : token [ " name " ] } )
self . parser . phase = self . parser . phases [ " inBody " ]
return token
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml )
] )
startTagHandler . default = startTagOther
class AfterAfterFramesetPhase ( Phase ) :
__slots__ = tuple ( )
def processEOF ( self ) :
pass
def processComment ( self , token ) :
self . tree . insertComment ( token , self . tree . document )
def processSpaceCharacters ( self , token ) :
return self . parser . phases [ " inBody " ] . processSpaceCharacters ( token )
def processCharacters ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-char " )
def startTagHtml ( self , token ) :
return self . parser . phases [ " inBody " ] . processStartTag ( token )
def startTagNoFrames ( self , token ) :
return self . parser . phases [ " inHead " ] . processStartTag ( token )
def startTagOther ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-start-tag " ,
{ " name " : token [ " name " ] } )
def processEndTag ( self , token ) :
self . parser . parseError ( " expected-eof-but-got-end-tag " ,
{ " name " : token [ " name " ] } )
startTagHandler = _utils . MethodDispatcher ( [
( " html " , startTagHtml ) ,
( " noframes " , startTagNoFrames )
] )
startTagHandler . default = startTagOther
# pylint:enable=unused-argument
return {
" initial " : InitialPhase ,
" beforeHtml " : BeforeHtmlPhase ,
" beforeHead " : BeforeHeadPhase ,
" inHead " : InHeadPhase ,
" inHeadNoscript " : InHeadNoscriptPhase ,
" afterHead " : AfterHeadPhase ,
" inBody " : InBodyPhase ,
" text " : TextPhase ,
" inTable " : InTablePhase ,
" inTableText " : InTableTextPhase ,
" inCaption " : InCaptionPhase ,
" inColumnGroup " : InColumnGroupPhase ,
" inTableBody " : InTableBodyPhase ,
" inRow " : InRowPhase ,
" inCell " : InCellPhase ,
" inSelect " : InSelectPhase ,
" inSelectInTable " : InSelectInTablePhase ,
" inForeignContent " : InForeignContentPhase ,
" afterBody " : AfterBodyPhase ,
" inFrameset " : InFramesetPhase ,
" afterFrameset " : AfterFramesetPhase ,
" afterAfterBody " : AfterAfterBodyPhase ,
" afterAfterFrameset " : AfterAfterFramesetPhase ,
# XXX after after frameset
}
def adjust_attributes ( token , replacements ) :
needs_adjustment = viewkeys ( token [ ' data ' ] ) & viewkeys ( replacements )
if needs_adjustment :
token [ ' data ' ] = type ( token [ ' data ' ] ) ( ( replacements . get ( k , k ) , v )
for k , v in token [ ' data ' ] . items ( ) )
def impliedTagToken ( name , type = " EndTag " , attributes = None ,
selfClosing = False ) :
if attributes is None :
attributes = { }
return { " type " : tokenTypes [ type ] , " name " : name , " data " : attributes ,
" selfClosing " : selfClosing }
class ParseError ( Exception ) :
""" Error in parsed document """
pass