You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3042 lines
106 KiB
3042 lines
106 KiB
# The MIT License
|
|
#
|
|
# Copyright 2014, 2015 Piotr Dabkowski
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the 'Software'),
|
|
# to deal in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
# the Software, and to permit persons to whom the Software is furnished to do so, subject
|
|
# to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all copies or
|
|
# substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
|
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
|
|
from __future__ import unicode_literals
|
|
from .pyjsparserdata import *
|
|
from .std_nodes import *
|
|
from pprint import pprint
|
|
import sys
|
|
|
|
__all__ = [
|
|
'PyJsParser', 'parse', 'ENABLE_JS2PY_ERRORS', 'ENABLE_PYIMPORT',
|
|
'JsSyntaxError'
|
|
]
|
|
REGEXP_SPECIAL_SINGLE = ('\\', '^', '$', '*', '+', '?', '.', '[', ']', '(',
|
|
')', '{', '{', '|', '-')
|
|
ENABLE_PYIMPORT = False
|
|
ENABLE_JS2PY_ERRORS = False
|
|
|
|
PY3 = sys.version_info >= (3, 0)
|
|
|
|
if PY3:
|
|
basestring = str
|
|
long = int
|
|
xrange = range
|
|
unicode = str
|
|
|
|
ESPRIMA_VERSION = '2.2.0'
|
|
DEBUG = False
|
|
# Small naming convention changes
|
|
# len -> leng
|
|
# id -> d
|
|
# type -> typ
|
|
# str -> st
|
|
true = True
|
|
false = False
|
|
null = None
|
|
|
|
|
|
class PyJsParser:
|
|
""" Usage:
|
|
parser = PyJsParser()
|
|
parser.parse('var JavaScriptCode = 5.1')
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.clean()
|
|
|
|
def test(self, code):
|
|
pprint(self.parse(code))
|
|
|
|
def clean(self):
|
|
self.strict = None
|
|
self.sourceType = None
|
|
self.index = 0
|
|
self.lineNumber = 1
|
|
self.lineStart = 0
|
|
self.hasLineTerminator = None
|
|
self.lastIndex = None
|
|
self.lastLineNumber = None
|
|
self.lastLineStart = None
|
|
self.startIndex = None
|
|
self.startLineNumber = None
|
|
self.startLineStart = None
|
|
self.scanning = None
|
|
self.lookahead = None
|
|
self.state = None
|
|
self.extra = None
|
|
self.isBindingElement = None
|
|
self.isAssignmentTarget = None
|
|
self.firstCoverInitializedNameError = None
|
|
|
|
# 7.4 Comments
|
|
|
|
def skipSingleLineComment(self, offset):
|
|
start = self.index - offset
|
|
while self.index < self.length:
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if isLineTerminator(ch):
|
|
if (ord(ch) == 13 and ord(self.source[self.index]) == 10):
|
|
self.index += 1
|
|
self.lineNumber += 1
|
|
self.hasLineTerminator = True
|
|
self.lineStart = self.index
|
|
return
|
|
|
|
def skipMultiLineComment(self):
|
|
while self.index < self.length:
|
|
ch = ord(self.source[self.index])
|
|
if isLineTerminator(ch):
|
|
if (ch == 0x0D and ord(self.source[self.index + 1]) == 0x0A):
|
|
self.index += 1
|
|
self.lineNumber += 1
|
|
self.index += 1
|
|
self.hasLineTerminator = True
|
|
self.lineStart = self.index
|
|
elif ch == 0x2A:
|
|
# Block comment ends with '*/'.
|
|
if ord(self.source[self.index + 1]) == 0x2F:
|
|
self.index += 2
|
|
return
|
|
self.index += 1
|
|
else:
|
|
self.index += 1
|
|
self.tolerateUnexpectedToken()
|
|
|
|
def skipComment(self):
|
|
self.hasLineTerminator = False
|
|
start = (self.index == 0)
|
|
while self.index < self.length:
|
|
ch = ord(self.source[self.index])
|
|
if isWhiteSpace(ch):
|
|
self.index += 1
|
|
elif isLineTerminator(ch):
|
|
self.hasLineTerminator = True
|
|
self.index += 1
|
|
if (ch == 0x0D and ord(self.source[self.index]) == 0x0A):
|
|
self.index += 1
|
|
self.lineNumber += 1
|
|
self.lineStart = self.index
|
|
start = True
|
|
elif (ch == 0x2F): # U+002F is '/'
|
|
ch = ord(self.source[self.index + 1])
|
|
if (ch == 0x2F):
|
|
self.index += 2
|
|
self.skipSingleLineComment(2)
|
|
start = True
|
|
elif (ch == 0x2A): # U+002A is '*'
|
|
self.index += 2
|
|
self.skipMultiLineComment()
|
|
else:
|
|
break
|
|
elif (start and ch == 0x2D): # U+002D is '-'
|
|
# U+003E is '>'
|
|
if (ord(self.source[self.index + 1]) == 0x2D) and (ord(
|
|
self.source[self.index + 2]) == 0x3E):
|
|
# '-->' is a single-line comment
|
|
self.index += 3
|
|
self.skipSingleLineComment(3)
|
|
else:
|
|
break
|
|
elif (ch == 0x3C): # U+003C is '<'
|
|
if self.source[self.index + 1:self.index + 4] == '!--':
|
|
# <!--
|
|
self.index += 4
|
|
self.skipSingleLineComment(4)
|
|
else:
|
|
break
|
|
else:
|
|
break
|
|
|
|
def scanHexEscape(self, prefix):
|
|
code = 0
|
|
leng = 4 if (prefix == 'u') else 2
|
|
for i in xrange(leng):
|
|
if self.index < self.length and isHexDigit(
|
|
self.source[self.index]):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
code = code * 16 + HEX_CONV[ch]
|
|
else:
|
|
return ''
|
|
return unichr(code)
|
|
|
|
def scanUnicodeCodePointEscape(self):
|
|
ch = self.source[self.index]
|
|
code = 0
|
|
# At least, one hex digit is required.
|
|
if ch == '}':
|
|
self.throwUnexpectedToken()
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if not isHexDigit(ch):
|
|
break
|
|
code = code * 16 + HEX_CONV[ch]
|
|
if code > 0x10FFFF or ch != '}':
|
|
self.throwUnexpectedToken()
|
|
# UTF-16 Encoding
|
|
if (code <= 0xFFFF):
|
|
return unichr(code)
|
|
cu1 = ((code - 0x10000) >> 10) + 0xD800
|
|
cu2 = ((code - 0x10000) & 1023) + 0xDC00
|
|
return unichr(cu1) + unichr(cu2)
|
|
|
|
def ccode(self, offset=0):
|
|
return ord(self.source[self.index + offset])
|
|
|
|
def log_err_case(self):
|
|
if not DEBUG:
|
|
return
|
|
print('INDEX', self.index)
|
|
print(self.source[self.index - 10:self.index + 10])
|
|
print('')
|
|
|
|
def at(self, loc):
|
|
return None if loc >= self.length else self.source[loc]
|
|
|
|
def substr(self, le, offset=0):
|
|
return self.source[self.index + offset:self.index + offset + le]
|
|
|
|
def getEscapedIdentifier(self):
|
|
d = self.source[self.index]
|
|
ch = ord(d)
|
|
self.index += 1
|
|
# '\u' (U+005C, U+0075) denotes an escaped character.
|
|
if (ch == 0x5C):
|
|
if (ord(self.source[self.index]) != 0x75):
|
|
self.throwUnexpectedToken()
|
|
self.index += 1
|
|
ch = self.scanHexEscape('u')
|
|
if not ch or ch == '\\' or not isIdentifierStart(ch[0]):
|
|
self.throwUnexpectedToken()
|
|
d = ch
|
|
while (self.index < self.length):
|
|
ch = self.ccode()
|
|
if not isIdentifierPart(ch):
|
|
break
|
|
self.index += 1
|
|
d += unichr(ch)
|
|
|
|
# '\u' (U+005C, U+0075) denotes an escaped character.
|
|
if (ch == 0x5C):
|
|
d = d[0:len(d) - 1]
|
|
if (self.ccode() != 0x75):
|
|
self.throwUnexpectedToken()
|
|
self.index += 1
|
|
ch = self.scanHexEscape('u')
|
|
if (not ch or ch == '\\' or not isIdentifierPart(ch[0])):
|
|
self.throwUnexpectedToken()
|
|
d += ch
|
|
return d
|
|
|
|
def getIdentifier(self):
|
|
start = self.index
|
|
self.index += 1
|
|
while (self.index < self.length):
|
|
ch = self.ccode()
|
|
if (ch == 0x5C):
|
|
# Blackslash (U+005C) marks Unicode escape sequence.
|
|
self.index = start
|
|
return self.getEscapedIdentifier()
|
|
if (isIdentifierPart(ch)):
|
|
self.index += 1
|
|
else:
|
|
break
|
|
return self.source[start:self.index]
|
|
|
|
def scanIdentifier(self):
|
|
start = self.index
|
|
|
|
# Backslash (U+005C) starts an escaped character.
|
|
d = self.getEscapedIdentifier() if (
|
|
self.ccode() == 0x5C) else self.getIdentifier()
|
|
|
|
# There is no keyword or literal with only one character.
|
|
# Thus, it must be an identifier.
|
|
if (len(d) == 1):
|
|
type = Token.Identifier
|
|
elif (isKeyword(d)):
|
|
type = Token.Keyword
|
|
elif (d == 'null'):
|
|
type = Token.NullLiteral
|
|
elif (d == 'true' or d == 'false'):
|
|
type = Token.BooleanLiteral
|
|
else:
|
|
type = Token.Identifier
|
|
return {
|
|
'type': type,
|
|
'value': d,
|
|
'raw': self.source[start:self.index],
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
# 7.7 Punctuators
|
|
|
|
def scanPunctuator(self):
|
|
token = {
|
|
'type': Token.Punctuator,
|
|
'value': '',
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': self.index,
|
|
'end': self.index
|
|
}
|
|
# Check for most common single-character punctuators.
|
|
st = self.source[self.index]
|
|
if st == '{':
|
|
self.state['curlyStack'].append('{')
|
|
self.index += 1
|
|
elif st == '}':
|
|
self.index += 1
|
|
self.state['curlyStack'].pop()
|
|
elif st in ('.', '(', ')', ';', ',', '[', ']', ':', '?', '~'):
|
|
self.index += 1
|
|
else:
|
|
# 4-character punctuator.
|
|
st = self.substr(4)
|
|
if (st == '>>>='):
|
|
self.index += 4
|
|
else:
|
|
# 3-character punctuators.
|
|
st = st[0:3]
|
|
if st in ('===', '!==', '>>>', '<<=', '>>='):
|
|
self.index += 3
|
|
else:
|
|
# 2-character punctuators.
|
|
st = st[0:2]
|
|
if st in ('&&', '||', '==', '!=', '+=', '-=', '*=', '/=',
|
|
'++', '--', '<<', '>>', '&=', '|=', '^=', '%=',
|
|
'<=', '>=', '=>'):
|
|
self.index += 2
|
|
else:
|
|
# 1-character punctuators.
|
|
st = self.source[self.index]
|
|
if st in ('<', '>', '=', '!', '+', '-', '*', '%', '&',
|
|
'|', '^', '/'):
|
|
self.index += 1
|
|
if self.index == token['start']:
|
|
self.throwUnexpectedToken()
|
|
token['end'] = self.index
|
|
token['value'] = st
|
|
return token
|
|
|
|
# 7.8.3 Numeric Literals
|
|
|
|
def scanHexLiteral(self, start):
|
|
number = ''
|
|
while (self.index < self.length):
|
|
if (not isHexDigit(self.source[self.index])):
|
|
break
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
if not number:
|
|
self.throwUnexpectedToken()
|
|
if isIdentifierStart(self.ccode()):
|
|
self.throwUnexpectedToken()
|
|
return {
|
|
'type': Token.NumericLiteral,
|
|
'value': int(number, 16),
|
|
'raw': self.source[start:self.index],
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def scanBinaryLiteral(self, start):
|
|
number = ''
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
if (ch != '0' and ch != '1'):
|
|
break
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
|
|
if not number:
|
|
# only 0b or 0B
|
|
self.throwUnexpectedToken()
|
|
if (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
# istanbul ignore else
|
|
if (isIdentifierStart(ch) or isDecimalDigit(ch)):
|
|
self.throwUnexpectedToken()
|
|
return {
|
|
'type': Token.NumericLiteral,
|
|
'value': int(number, 2),
|
|
'raw': self.source[start:self.index],
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def scanOctalLiteral(self, prefix, start):
|
|
if isOctalDigit(prefix):
|
|
octal = True
|
|
number = '0' + self.source[self.index]
|
|
self.index += 1
|
|
else:
|
|
octal = False
|
|
self.index += 1
|
|
number = ''
|
|
while (self.index < self.length):
|
|
if (not isOctalDigit(self.source[self.index])):
|
|
break
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
if (not octal and not number):
|
|
# only 0o or 0O
|
|
self.throwUnexpectedToken()
|
|
if (isIdentifierStart(self.ccode()) or isDecimalDigit(self.ccode())):
|
|
self.throwUnexpectedToken()
|
|
return {
|
|
'type': Token.NumericLiteral,
|
|
'value': int(number, 8),
|
|
'raw': self.source[start:self.index],
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def octalToDecimal(self, ch):
|
|
# \0 is not octal escape sequence
|
|
octal = (ch != '0')
|
|
code = int(ch, 8)
|
|
|
|
if (self.index < self.length
|
|
and isOctalDigit(self.source[self.index])):
|
|
octal = True
|
|
code = code * 8 + int(self.source[self.index], 8)
|
|
self.index += 1
|
|
|
|
# 3 digits are only allowed when string starts
|
|
# with 0, 1, 2, 3
|
|
if (ch in '0123' and self.index < self.length
|
|
and isOctalDigit(self.source[self.index])):
|
|
code = code * 8 + int((self.source[self.index]), 8)
|
|
self.index += 1
|
|
return {'code': code, 'octal': octal}
|
|
|
|
def isImplicitOctalLiteral(self):
|
|
# Implicit octal, unless there is a non-octal digit.
|
|
# (Annex B.1.1 on Numeric Literals)
|
|
for i in xrange(self.index + 1, self.length):
|
|
ch = self.source[i]
|
|
if (ch == '8' or ch == '9'):
|
|
return False
|
|
if (not isOctalDigit(ch)):
|
|
return True
|
|
return True
|
|
|
|
def scanNumericLiteral(self):
|
|
ch = self.source[self.index]
|
|
assert isDecimalDigit(ch) or (
|
|
ch == '.'
|
|
), 'Numeric literal must start with a decimal digit or a decimal point'
|
|
start = self.index
|
|
number = ''
|
|
if ch != '.':
|
|
number = self.source[self.index]
|
|
self.index += 1
|
|
ch = self.source[self.index]
|
|
# Hex number starts with '0x'.
|
|
# Octal number starts with '0'.
|
|
# Octal number in ES6 starts with '0o'.
|
|
# Binary number in ES6 starts with '0b'.
|
|
if (number == '0'):
|
|
if (ch == 'x' or ch == 'X'):
|
|
self.index += 1
|
|
return self.scanHexLiteral(start)
|
|
if (ch == 'b' or ch == 'B'):
|
|
self.index += 1
|
|
return self.scanBinaryLiteral(start)
|
|
if (ch == 'o' or ch == 'O'):
|
|
return self.scanOctalLiteral(ch, start)
|
|
if (isOctalDigit(ch)):
|
|
if (self.isImplicitOctalLiteral()):
|
|
return self.scanOctalLiteral(ch, start)
|
|
while (isDecimalDigit(self.ccode())):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
ch = self.source[self.index]
|
|
if (ch == '.'):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
while (isDecimalDigit(self.source[self.index])):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
ch = self.source[self.index]
|
|
if (ch == 'e' or ch == 'E'):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
ch = self.source[self.index]
|
|
if (ch == '+' or ch == '-'):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
if (isDecimalDigit(self.source[self.index])):
|
|
while (isDecimalDigit(self.source[self.index])):
|
|
number += self.source[self.index]
|
|
self.index += 1
|
|
else:
|
|
self.throwUnexpectedToken()
|
|
if (isIdentifierStart(self.source[self.index])):
|
|
self.throwUnexpectedToken()
|
|
return {
|
|
'type': Token.NumericLiteral,
|
|
'value': float(number),
|
|
'raw': self.source[start:self.index],
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
# 7.8.4 String Literals
|
|
|
|
def _interpret_regexp(self, string, flags):
|
|
'''Perform sctring escape - for regexp literals'''
|
|
self.index = 0
|
|
self.length = len(string)
|
|
self.source = string
|
|
self.lineNumber = 0
|
|
self.lineStart = 0
|
|
octal = False
|
|
st = ''
|
|
inside_square = 0
|
|
while (self.index < self.length):
|
|
template = '[%s]' if not inside_square else '%s'
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if ch == '\\':
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if (not isLineTerminator(ch)):
|
|
if ch == 'u':
|
|
digs = self.source[self.index:self.index + 4]
|
|
if len(digs) == 4 and all(isHexDigit(d) for d in digs):
|
|
st += template % unichr(int(digs, 16))
|
|
self.index += 4
|
|
else:
|
|
st += 'u'
|
|
elif ch == 'x':
|
|
digs = self.source[self.index:self.index + 2]
|
|
if len(digs) == 2 and all(isHexDigit(d) for d in digs):
|
|
st += template % unichr(int(digs, 16))
|
|
self.index += 2
|
|
else:
|
|
st += 'x'
|
|
# special meaning - single char.
|
|
elif ch == '0':
|
|
st += '\\0'
|
|
elif ch == 'n':
|
|
st += '\\n'
|
|
elif ch == 'r':
|
|
st += '\\r'
|
|
elif ch == 't':
|
|
st += '\\t'
|
|
elif ch == 'f':
|
|
st += '\\f'
|
|
elif ch == 'v':
|
|
st += '\\v'
|
|
|
|
# unescape special single characters like . so that they are interpreted literally
|
|
elif ch in REGEXP_SPECIAL_SINGLE:
|
|
st += '\\' + ch
|
|
|
|
# character groups
|
|
elif ch == 'b':
|
|
st += '\\b'
|
|
elif ch == 'B':
|
|
st += '\\B'
|
|
elif ch == 'w':
|
|
st += '\\w'
|
|
elif ch == 'W':
|
|
st += '\\W'
|
|
elif ch == 'd':
|
|
st += '\\d'
|
|
elif ch == 'D':
|
|
st += '\\D'
|
|
elif ch == 's':
|
|
st += template % u' \f\n\r\t\v\u00a0\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff'
|
|
elif ch == 'S':
|
|
st += template % u'\u0000-\u0008\u000e-\u001f\u0021-\u009f\u00a1-\u167f\u1681-\u180d\u180f-\u1fff\u200b-\u2027\u202a-\u202e\u2030-\u205e\u2060-\u2fff\u3001-\ufefe\uff00-\uffff'
|
|
else:
|
|
if isDecimalDigit(ch):
|
|
num = ch
|
|
while self.index < self.length and isDecimalDigit(
|
|
self.source[self.index]):
|
|
num += self.source[self.index]
|
|
self.index += 1
|
|
st += '\\' + num
|
|
|
|
else:
|
|
st += ch # DONT ESCAPE!!!
|
|
else:
|
|
self.lineNumber += 1
|
|
if (ch == '\r' and self.source[self.index] == '\n'):
|
|
self.index += 1
|
|
self.lineStart = self.index
|
|
else:
|
|
if ch == '[':
|
|
inside_square = True
|
|
elif ch == ']':
|
|
inside_square = False
|
|
st += ch
|
|
# print string, 'was transformed to', st
|
|
return st
|
|
|
|
def scanStringLiteral(self):
|
|
st = ''
|
|
octal = False
|
|
|
|
quote = self.source[self.index]
|
|
assert quote == '\'' or quote == '"', 'String literal must starts with a quote'
|
|
start = self.index
|
|
self.index += 1
|
|
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if (ch == quote):
|
|
quote = ''
|
|
break
|
|
elif (ch == '\\'):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if (not isLineTerminator(ch)):
|
|
if ch in 'ux':
|
|
if (self.source[self.index] == '{'):
|
|
self.index += 1
|
|
st += self.scanUnicodeCodePointEscape()
|
|
else:
|
|
unescaped = self.scanHexEscape(ch)
|
|
if (not unescaped):
|
|
self.throwUnexpectedToken(
|
|
) # with throw I don't know whats the difference
|
|
st += unescaped
|
|
elif ch == 'n':
|
|
st += '\n'
|
|
elif ch == 'r':
|
|
st += '\r'
|
|
elif ch == 't':
|
|
st += '\t'
|
|
elif ch == 'b':
|
|
st += '\b'
|
|
elif ch == 'f':
|
|
st += '\f'
|
|
elif ch == 'v':
|
|
st += '\x0B'
|
|
# elif ch in '89':
|
|
# self.throwUnexpectedToken() # again with throw....
|
|
else:
|
|
if isOctalDigit(ch):
|
|
octToDec = self.octalToDecimal(ch)
|
|
octal = octToDec.get('octal') or octal
|
|
st += unichr(octToDec['code'])
|
|
else:
|
|
st += ch
|
|
else:
|
|
self.lineNumber += 1
|
|
if (ch == '\r' and self.source[self.index] == '\n'):
|
|
self.index += 1
|
|
self.lineStart = self.index
|
|
elif isLineTerminator(ch):
|
|
break
|
|
else:
|
|
st += ch
|
|
if (quote != ''):
|
|
self.throwUnexpectedToken()
|
|
return {
|
|
'type': Token.StringLiteral,
|
|
'value': st,
|
|
'raw': self.source[start:self.index],
|
|
'octal': octal,
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.startLineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def scanTemplate(self):
|
|
cooked = ''
|
|
terminated = False
|
|
tail = False
|
|
start = self.index
|
|
head = (self.source[self.index] == '`')
|
|
rawOffset = 2
|
|
|
|
self.index += 1
|
|
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if (ch == '`'):
|
|
rawOffset = 1
|
|
tail = True
|
|
terminated = True
|
|
break
|
|
elif (ch == '$'):
|
|
if (self.source[self.index] == '{'):
|
|
self.state['curlyStack'].append('${')
|
|
self.index += 1
|
|
terminated = True
|
|
break
|
|
cooked += ch
|
|
elif (ch == '\\'):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
if (not isLineTerminator(ch)):
|
|
if ch == 'n':
|
|
cooked += '\n'
|
|
elif ch == 'r':
|
|
cooked += '\r'
|
|
elif ch == 't':
|
|
cooked += '\t'
|
|
elif ch in 'ux':
|
|
if (self.source[self.index] == '{'):
|
|
self.index += 1
|
|
cooked += self.scanUnicodeCodePointEscape()
|
|
else:
|
|
restore = self.index
|
|
unescaped = self.scanHexEscape(ch)
|
|
if (unescaped):
|
|
cooked += unescaped
|
|
else:
|
|
self.index = restore
|
|
cooked += ch
|
|
elif ch == 'b':
|
|
cooked += '\b'
|
|
elif ch == 'f':
|
|
cooked += '\f'
|
|
elif ch == 'v':
|
|
cooked += '\v'
|
|
else:
|
|
if (ch == '0'):
|
|
if isDecimalDigit(self.ccode()):
|
|
# Illegal: \01 \02 and so on
|
|
self.throwError(Messages.TemplateOctalLiteral)
|
|
cooked += '\0'
|
|
elif (isOctalDigit(ch)):
|
|
# Illegal: \1 \2
|
|
self.throwError(Messages.TemplateOctalLiteral)
|
|
else:
|
|
cooked += ch
|
|
else:
|
|
self.lineNumber += 1
|
|
if (ch == '\r' and self.source[self.index] == '\n'):
|
|
self.index += 1
|
|
self.lineStart = self.index
|
|
elif (isLineTerminator(ch)):
|
|
self.lineNumber += 1
|
|
if (ch == '\r' and self.source[self.index] == '\n'):
|
|
self.index += 1
|
|
self.lineStart = self.index
|
|
cooked += '\n'
|
|
else:
|
|
cooked += ch
|
|
if (not terminated):
|
|
self.throwUnexpectedToken()
|
|
|
|
if (not head):
|
|
self.state['curlyStack'].pop()
|
|
|
|
return {
|
|
'type': Token.Template,
|
|
'value': {
|
|
'cooked': cooked,
|
|
'raw': self.source[start + 1:self.index - rawOffset]
|
|
},
|
|
'head': head,
|
|
'tail': tail,
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def testRegExp(self, pattern, flags):
|
|
# todo: you should return python regexp object
|
|
return (pattern, flags)
|
|
|
|
def scanRegExpBody(self):
|
|
ch = self.source[self.index]
|
|
assert ch == '/', 'Regular expression literal must start with a slash'
|
|
st = ch
|
|
self.index += 1
|
|
|
|
classMarker = False
|
|
terminated = False
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
st += ch
|
|
if (ch == '\\'):
|
|
ch = self.source[self.index]
|
|
self.index += 1
|
|
# ECMA-262 7.8.5
|
|
if (isLineTerminator(ch)):
|
|
self.throwUnexpectedToken(None,
|
|
Messages.UnterminatedRegExp)
|
|
st += ch
|
|
elif (isLineTerminator(ch)):
|
|
self.throwUnexpectedToken(None, Messages.UnterminatedRegExp)
|
|
elif (classMarker):
|
|
if (ch == ']'):
|
|
classMarker = False
|
|
else:
|
|
if (ch == '/'):
|
|
terminated = True
|
|
break
|
|
elif (ch == '['):
|
|
classMarker = True
|
|
if (not terminated):
|
|
self.throwUnexpectedToken(None, Messages.UnterminatedRegExp)
|
|
|
|
# Exclude leading and trailing slash.
|
|
body = st[1:-1]
|
|
return {'value': body, 'literal': st}
|
|
|
|
def scanRegExpFlags(self):
|
|
st = ''
|
|
flags = ''
|
|
while (self.index < self.length):
|
|
ch = self.source[self.index]
|
|
if (not isIdentifierPart(ch)):
|
|
break
|
|
self.index += 1
|
|
if (ch == '\\' and self.index < self.length):
|
|
ch = self.source[self.index]
|
|
if (ch == 'u'):
|
|
self.index += 1
|
|
restore = self.index
|
|
ch = self.scanHexEscape('u')
|
|
if (ch):
|
|
flags += ch
|
|
st += '\\u'
|
|
while restore < self.index:
|
|
st += self.source[restore]
|
|
restore += 1
|
|
else:
|
|
self.index = restore
|
|
flags += 'u'
|
|
st += '\\u'
|
|
self.tolerateUnexpectedToken()
|
|
else:
|
|
st += '\\'
|
|
self.tolerateUnexpectedToken()
|
|
else:
|
|
flags += ch
|
|
st += ch
|
|
return {'value': flags, 'literal': st}
|
|
|
|
def scanRegExp(self):
|
|
self.scanning = True
|
|
self.lookahead = None
|
|
self.skipComment()
|
|
start = self.index
|
|
|
|
body = self.scanRegExpBody()
|
|
flags = self.scanRegExpFlags()
|
|
value = self.testRegExp(body['value'], flags['value'])
|
|
scanning = False
|
|
return {
|
|
'literal': body['literal'] + flags['literal'],
|
|
'value': value,
|
|
'raw': self.source[start:self.index],
|
|
'regex': {
|
|
'pattern': body['value'],
|
|
'flags': flags['value']
|
|
},
|
|
'start': start,
|
|
'end': self.index
|
|
}
|
|
|
|
def collectRegex(self):
|
|
self.skipComment()
|
|
return self.scanRegExp()
|
|
|
|
def isIdentifierName(self, token):
|
|
return token['type'] in (1, 3, 4, 5)
|
|
|
|
# def advanceSlash(self): ???
|
|
|
|
def advance(self):
|
|
if (self.index >= self.length):
|
|
return {
|
|
'type': Token.EOF,
|
|
'lineNumber': self.lineNumber,
|
|
'lineStart': self.lineStart,
|
|
'start': self.index,
|
|
'end': self.index
|
|
}
|
|
ch = self.ccode()
|
|
|
|
if isIdentifierStart(ch):
|
|
token = self.scanIdentifier()
|
|
if (self.strict and isStrictModeReservedWord(token['value'])):
|
|
token['type'] = Token.Keyword
|
|
return token
|
|
# Very common: ( and ) and ;
|
|
if (ch == 0x28 or ch == 0x29 or ch == 0x3B):
|
|
return self.scanPunctuator()
|
|
|
|
# String literal starts with single quote (U+0027) or double quote (U+0022).
|
|
if (ch == 0x27 or ch == 0x22):
|
|
return self.scanStringLiteral()
|
|
|
|
# Dot (.) U+002E can also start a floating-point number, hence the need
|
|
# to check the next character.
|
|
if (ch == 0x2E):
|
|
if (isDecimalDigit(self.ccode(1))):
|
|
return self.scanNumericLiteral()
|
|
return self.scanPunctuator()
|
|
|
|
if (isDecimalDigit(ch)):
|
|
return self.scanNumericLiteral()
|
|
|
|
# Slash (/) U+002F can also start a regex.
|
|
# if (extra.tokenize && ch == 0x2F):
|
|
# return advanceSlash();
|
|
|
|
# Template literals start with ` (U+0060) for template head
|
|
# or } (U+007D) for template middle or template tail.
|
|
if (ch == 0x60
|
|
or (ch == 0x7D
|
|
and self.state['curlyStack'][len(self.state['curlyStack'])
|
|
- 1] == '${')):
|
|
return self.scanTemplate()
|
|
return self.scanPunctuator()
|
|
|
|
# def collectToken(self):
|
|
# loc = {
|
|
# 'start': {
|
|
# 'line': self.lineNumber,
|
|
# 'column': self.index - self.lineStart}}
|
|
#
|
|
# token = self.advance()
|
|
#
|
|
# loc['end'] = {
|
|
# 'line': self.lineNumber,
|
|
# 'column': self.index - self.lineStart}
|
|
# if (token['type'] != Token.EOF):
|
|
# value = self.source[token['start']: token['end']]
|
|
# entry = {
|
|
# 'type': TokenName[token['type']],
|
|
# 'value': value,
|
|
# 'range': [token['start'], token['end']],
|
|
# 'loc': loc}
|
|
# if (token.get('regex')):
|
|
# entry['regex'] = {
|
|
# 'pattern': token['regex']['pattern'],
|
|
# 'flags': token['regex']['flags']}
|
|
# self.extra['tokens'].append(entry)
|
|
# return token;
|
|
|
|
def lex(self):
|
|
self.scanning = True
|
|
|
|
self.lastIndex = self.index
|
|
self.lastLineNumber = self.lineNumber
|
|
self.lastLineStart = self.lineStart
|
|
|
|
self.skipComment()
|
|
|
|
token = self.lookahead
|
|
|
|
self.startIndex = self.index
|
|
self.startLineNumber = self.lineNumber
|
|
self.startLineStart = self.lineStart
|
|
|
|
self.lookahead = self.advance()
|
|
self.scanning = False
|
|
return token
|
|
|
|
def peek(self):
|
|
self.scanning = True
|
|
|
|
self.skipComment()
|
|
|
|
self.lastIndex = self.index
|
|
self.lastLineNumber = self.lineNumber
|
|
self.lastLineStart = self.lineStart
|
|
|
|
self.startIndex = self.index
|
|
self.startLineNumber = self.lineNumber
|
|
self.startLineStart = self.lineStart
|
|
|
|
self.lookahead = self.advance()
|
|
self.scanning = False
|
|
|
|
def createError(self, line, pos, description):
|
|
global ENABLE_PYIMPORT
|
|
msg = 'Line ' + unicode(line) + ': ' + unicode(description)
|
|
if ENABLE_JS2PY_ERRORS:
|
|
return ENABLE_JS2PY_ERRORS(msg)
|
|
else:
|
|
return JsSyntaxError(msg)
|
|
|
|
# Throw an exception
|
|
|
|
def throwError(self, messageFormat, *args):
|
|
msg = messageFormat % tuple(unicode(e) for e in args)
|
|
raise self.createError(self.lastLineNumber, self.lastIndex, msg)
|
|
|
|
def tolerateError(self, messageFormat, *args):
|
|
return self.throwError(messageFormat, *args)
|
|
|
|
# Throw an exception because of the token.
|
|
|
|
def unexpectedTokenError(self, token={}, message=''):
|
|
msg = message or Messages.UnexpectedToken
|
|
if (token):
|
|
typ = token['type']
|
|
if (not message):
|
|
if typ == Token.EOF:
|
|
msg = Messages.UnexpectedEOS
|
|
elif (typ == Token.Identifier):
|
|
msg = Messages.UnexpectedIdentifier
|
|
elif (typ == Token.NumericLiteral):
|
|
msg = Messages.UnexpectedNumber
|
|
elif (typ == Token.StringLiteral):
|
|
msg = Messages.UnexpectedString
|
|
elif (typ == Token.Template):
|
|
msg = Messages.UnexpectedTemplate
|
|
else:
|
|
msg = Messages.UnexpectedToken
|
|
if (typ == Token.Keyword):
|
|
if (isFutureReservedWord(token['value'])):
|
|
msg = Messages.UnexpectedReserved
|
|
elif (self.strict
|
|
and isStrictModeReservedWord(token['value'])):
|
|
msg = Messages.StrictReservedWord
|
|
value = token['value']['raw'] if (
|
|
typ == Token.Template) else token.get('value')
|
|
else:
|
|
value = 'ILLEGAL'
|
|
msg = msg.replace('%s', unicode(value))
|
|
|
|
return (self.createError(token['lineNumber'], token['start'], msg) if
|
|
(token and token.get('lineNumber')) else self.createError(
|
|
self.lineNumber if self.scanning else self.lastLineNumber,
|
|
self.index if self.scanning else self.lastIndex, msg))
|
|
|
|
def throwUnexpectedToken(self, token={}, message=''):
|
|
raise self.unexpectedTokenError(token, message)
|
|
|
|
def tolerateUnexpectedToken(self, token={}, message=''):
|
|
self.throwUnexpectedToken(token, message)
|
|
|
|
# Expect the next token to match the specified punctuator.
|
|
# If not, an exception will be thrown.
|
|
|
|
def expect(self, value):
|
|
token = self.lex()
|
|
if (token['type'] != Token.Punctuator or token['value'] != value):
|
|
self.throwUnexpectedToken(token)
|
|
|
|
# /**
|
|
# * @name expectCommaSeparator
|
|
# * @description Quietly expect a comma when in tolerant mode, otherwise delegates
|
|
# * to <code>expect(value)</code>
|
|
# * @since 2.0
|
|
# */
|
|
def expectCommaSeparator(self):
|
|
self.expect(',')
|
|
|
|
# Expect the next token to match the specified keyword.
|
|
# If not, an exception will be thrown.
|
|
|
|
def expectKeyword(self, keyword):
|
|
token = self.lex()
|
|
if (token['type'] != Token.Keyword or token['value'] != keyword):
|
|
self.throwUnexpectedToken(token)
|
|
|
|
# Return true if the next token matches the specified punctuator.
|
|
|
|
def match(self, value):
|
|
return self.lookahead['type'] == Token.Punctuator and self.lookahead[
|
|
'value'] == value
|
|
|
|
# Return true if the next token matches the specified keyword
|
|
|
|
def matchKeyword(self, keyword):
|
|
return self.lookahead['type'] == Token.Keyword and self.lookahead[
|
|
'value'] == keyword
|
|
|
|
# Return true if the next token matches the specified contextual keyword
|
|
# (where an identifier is sometimes a keyword depending on the context)
|
|
|
|
def matchContextualKeyword(self, keyword):
|
|
return self.lookahead['type'] == Token.Identifier and self.lookahead[
|
|
'value'] == keyword
|
|
|
|
# Return true if the next token is an assignment operator
|
|
|
|
def matchAssign(self):
|
|
if (self.lookahead['type'] != Token.Punctuator):
|
|
return False
|
|
op = self.lookahead['value']
|
|
return op in ('=', '*=', '/=', '%=', '+=', '-=', '<<=', '>>=', '>>>=',
|
|
'&=', '^=', '|=')
|
|
|
|
def consumeSemicolon(self):
|
|
# Catch the very common case first: immediately a semicolon (U+003B).
|
|
|
|
if (self.at(self.startIndex) == ';' or self.match(';')):
|
|
self.lex()
|
|
return
|
|
|
|
if (self.hasLineTerminator):
|
|
return
|
|
|
|
# TODO: FIXME(ikarienator): this is seemingly an issue in the previous location info convention.
|
|
self.lastIndex = self.startIndex
|
|
self.lastLineNumber = self.startLineNumber
|
|
self.lastLineStart = self.startLineStart
|
|
|
|
if (self.lookahead['type'] != Token.EOF and not self.match('}')):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
|
|
# // Cover grammar support.
|
|
# //
|
|
# // When an assignment expression position starts with an left parenthesis, the determination of the type
|
|
# // of the syntax is to be deferred arbitrarily long until the end of the parentheses pair (plus a lookahead)
|
|
# // or the first comma. This situation also defers the determination of all the expressions nested in the pair.
|
|
# //
|
|
# // There are three productions that can be parsed in a parentheses pair that needs to be determined
|
|
# // after the outermost pair is closed. They are:
|
|
# //
|
|
# // 1. AssignmentExpression
|
|
# // 2. BindingElements
|
|
# // 3. AssignmentTargets
|
|
# //
|
|
# // In order to avoid exponential backtracking, we use two flags to denote if the production can be
|
|
# // binding element or assignment target.
|
|
# //
|
|
# // The three productions have the relationship:
|
|
# //
|
|
# // BindingElements <= AssignmentTargets <= AssignmentExpression
|
|
# //
|
|
# // with a single exception that CoverInitializedName when used directly in an Expression, generates
|
|
# // an early error. Therefore, we need the third state, firstCoverInitializedNameError, to track the
|
|
# // first usage of CoverInitializedName and report it when we reached the end of the parentheses pair.
|
|
# //
|
|
# // isolateCoverGrammar function runs the given parser function with a new cover grammar context, and it does not
|
|
# // effect the current flags. This means the production the parser parses is only used as an expression. Therefore
|
|
# // the CoverInitializedName check is conducted.
|
|
# //
|
|
# // inheritCoverGrammar function runs the given parse function with a new cover grammar context, and it propagates
|
|
# // the flags outside of the parser. This means the production the parser parses is used as a part of a potential
|
|
# // pattern. The CoverInitializedName check is deferred.
|
|
|
|
def isolateCoverGrammar(self, parser):
|
|
oldIsBindingElement = self.isBindingElement
|
|
oldIsAssignmentTarget = self.isAssignmentTarget
|
|
oldFirstCoverInitializedNameError = self.firstCoverInitializedNameError
|
|
self.isBindingElement = true
|
|
self.isAssignmentTarget = true
|
|
self.firstCoverInitializedNameError = null
|
|
result = parser()
|
|
if (self.firstCoverInitializedNameError != null):
|
|
self.throwUnexpectedToken(self.firstCoverInitializedNameError)
|
|
self.isBindingElement = oldIsBindingElement
|
|
self.isAssignmentTarget = oldIsAssignmentTarget
|
|
self.firstCoverInitializedNameError = oldFirstCoverInitializedNameError
|
|
return result
|
|
|
|
def inheritCoverGrammar(self, parser):
|
|
oldIsBindingElement = self.isBindingElement
|
|
oldIsAssignmentTarget = self.isAssignmentTarget
|
|
oldFirstCoverInitializedNameError = self.firstCoverInitializedNameError
|
|
self.isBindingElement = true
|
|
self.isAssignmentTarget = true
|
|
self.firstCoverInitializedNameError = null
|
|
result = parser()
|
|
self.isBindingElement = self.isBindingElement and oldIsBindingElement
|
|
self.isAssignmentTarget = self.isAssignmentTarget and oldIsAssignmentTarget
|
|
self.firstCoverInitializedNameError = oldFirstCoverInitializedNameError or self.firstCoverInitializedNameError
|
|
return result
|
|
|
|
def parseArrayPattern(self):
|
|
raise Ecma51NotSupported('ArrayPattern')
|
|
|
|
node = Node()
|
|
elements = []
|
|
self.expect('[')
|
|
while (not self.match(']')):
|
|
if (self.match(',')):
|
|
self.lex()
|
|
elements.append(null)
|
|
else:
|
|
if (self.match('...')):
|
|
restNode = Node()
|
|
self.lex()
|
|
rest = self.parseVariableIdentifier()
|
|
elements.append(restNode.finishRestElement(rest))
|
|
break
|
|
else:
|
|
elements.append(self.parsePatternWithDefault())
|
|
if (not self.match(']')):
|
|
self.expect(',')
|
|
self.expect(']')
|
|
return node.finishArrayPattern(elements)
|
|
|
|
def parsePropertyPattern(self):
|
|
node = Node()
|
|
computed = self.match('[')
|
|
if (self.lookahead['type'] == Token.Identifier):
|
|
key = self.parseVariableIdentifier()
|
|
if (self.match('=')):
|
|
self.lex()
|
|
init = self.parseAssignmentExpression()
|
|
return node.finishProperty(
|
|
'init', key, false,
|
|
WrappingNode(key).finishAssignmentPattern(key, init),
|
|
false, false)
|
|
elif (not self.match(':')):
|
|
return node.finishProperty('init', key, false, key, false,
|
|
true)
|
|
else:
|
|
key = self.parseObjectPropertyKey()
|
|
self.expect(':')
|
|
init = self.parsePatternWithDefault()
|
|
return node.finishProperty('init', key, computed, init, false, false)
|
|
|
|
def parseObjectPattern(self):
|
|
raise Ecma51NotSupported('ObjectPattern')
|
|
node = Node()
|
|
properties = []
|
|
self.expect('{')
|
|
while (not self.match('}')):
|
|
properties.append(self.parsePropertyPattern())
|
|
if (not self.match('}')):
|
|
self.expect(',')
|
|
self.lex()
|
|
return node.finishObjectPattern(properties)
|
|
|
|
def parsePattern(self):
|
|
if (self.lookahead['type'] == Token.Identifier):
|
|
return self.parseVariableIdentifier()
|
|
elif (self.match('[')):
|
|
return self.parseArrayPattern()
|
|
elif (self.match('{')):
|
|
return self.parseObjectPattern()
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
|
|
def parsePatternWithDefault(self):
|
|
startToken = self.lookahead
|
|
|
|
pattern = self.parsePattern()
|
|
if (self.match('=')):
|
|
self.lex()
|
|
right = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
pattern = WrappingNode(startToken).finishAssignmentPattern(
|
|
pattern, right)
|
|
return pattern
|
|
|
|
# 11.1.4 Array Initialiser
|
|
|
|
def parseArrayInitialiser(self):
|
|
elements = []
|
|
node = Node()
|
|
|
|
self.expect('[')
|
|
|
|
while (not self.match(']')):
|
|
if (self.match(',')):
|
|
self.lex()
|
|
elements.append(null)
|
|
elif (self.match('...')):
|
|
restSpread = Node()
|
|
self.lex()
|
|
restSpread.finishSpreadElement(
|
|
self.inheritCoverGrammar(self.parseAssignmentExpression))
|
|
if (not self.match(']')):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
self.expect(',')
|
|
elements.append(restSpread)
|
|
else:
|
|
elements.append(
|
|
self.inheritCoverGrammar(self.parseAssignmentExpression))
|
|
if (not self.match(']')):
|
|
self.expect(',')
|
|
self.lex()
|
|
|
|
return node.finishArrayExpression(elements)
|
|
|
|
# 11.1.5 Object Initialiser
|
|
|
|
def parsePropertyFunction(self, node, paramInfo):
|
|
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
|
|
previousStrict = self.strict
|
|
body = self.isolateCoverGrammar(self.parseFunctionSourceElements)
|
|
|
|
if (self.strict and paramInfo['firstRestricted']):
|
|
self.tolerateUnexpectedToken(paramInfo['firstRestricted'],
|
|
paramInfo.get('message'))
|
|
if (self.strict and paramInfo.get('stricted')):
|
|
self.tolerateUnexpectedToken(
|
|
paramInfo.get('stricted'), paramInfo.get('message'))
|
|
|
|
self.strict = previousStrict
|
|
return node.finishFunctionExpression(null, paramInfo.get('params'),
|
|
paramInfo.get('defaults'), body)
|
|
|
|
def parsePropertyMethodFunction(self):
|
|
node = Node()
|
|
|
|
params = self.parseParams(null)
|
|
method = self.parsePropertyFunction(node, params)
|
|
return method
|
|
|
|
def parseObjectPropertyKey(self):
|
|
node = Node()
|
|
|
|
token = self.lex()
|
|
|
|
# // Note: This function is called only from parseObjectProperty(), where
|
|
# // EOF and Punctuator tokens are already filtered out.
|
|
|
|
typ = token['type']
|
|
|
|
if typ in [Token.StringLiteral, Token.NumericLiteral]:
|
|
if self.strict and token.get('octal'):
|
|
self.tolerateUnexpectedToken(token,
|
|
Messages.StrictOctalLiteral)
|
|
return node.finishLiteral(token)
|
|
elif typ in (Token.Identifier, Token.BooleanLiteral, Token.NullLiteral,
|
|
Token.Keyword):
|
|
return node.finishIdentifier(token['value'])
|
|
elif typ == Token.Punctuator:
|
|
if (token['value'] == '['):
|
|
expr = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
self.expect(']')
|
|
return expr
|
|
self.throwUnexpectedToken(token)
|
|
|
|
def lookaheadPropertyName(self):
|
|
typ = self.lookahead['type']
|
|
if typ in (Token.Identifier, Token.StringLiteral, Token.BooleanLiteral,
|
|
Token.NullLiteral, Token.NumericLiteral, Token.Keyword):
|
|
return true
|
|
if typ == Token.Punctuator:
|
|
return self.lookahead['value'] == '['
|
|
return false
|
|
|
|
# // This function is to try to parse a MethodDefinition as defined in 14.3. But in the case of object literals,
|
|
# // it might be called at a position where there is in fact a short hand identifier pattern or a data property.
|
|
# // This can only be determined after we consumed up to the left parentheses.
|
|
# //
|
|
# // In order to avoid back tracking, it returns `null` if the position is not a MethodDefinition and the caller
|
|
# // is responsible to visit other options.
|
|
def tryParseMethodDefinition(self, token, key, computed, node):
|
|
if (token['type'] == Token.Identifier):
|
|
# check for `get` and `set`;
|
|
|
|
if (token['value'] == 'get' and self.lookaheadPropertyName()):
|
|
computed = self.match('[')
|
|
key = self.parseObjectPropertyKey()
|
|
methodNode = Node()
|
|
self.expect('(')
|
|
self.expect(')')
|
|
value = self.parsePropertyFunction(
|
|
methodNode, {
|
|
'params': [],
|
|
'defaults': [],
|
|
'stricted': null,
|
|
'firstRestricted': null,
|
|
'message': null
|
|
})
|
|
return node.finishProperty('get', key, computed, value, false,
|
|
false)
|
|
elif (token['value'] == 'set' and self.lookaheadPropertyName()):
|
|
computed = self.match('[')
|
|
key = self.parseObjectPropertyKey()
|
|
methodNode = Node()
|
|
self.expect('(')
|
|
|
|
options = {
|
|
'params': [],
|
|
'defaultCount': 0,
|
|
'defaults': [],
|
|
'firstRestricted': null,
|
|
'paramSet': {}
|
|
}
|
|
if (self.match(')')):
|
|
self.tolerateUnexpectedToken(self.lookahead)
|
|
else:
|
|
self.parseParam(options)
|
|
if (options['defaultCount'] == 0):
|
|
options['defaults'] = []
|
|
self.expect(')')
|
|
|
|
value = self.parsePropertyFunction(methodNode, options)
|
|
return node.finishProperty('set', key, computed, value, false,
|
|
false)
|
|
if (self.match('(')):
|
|
value = self.parsePropertyMethodFunction()
|
|
return node.finishProperty('init', key, computed, value, true,
|
|
false)
|
|
return null
|
|
|
|
def checkProto(self, key, computed, hasProto):
|
|
return
|
|
if (computed == false and
|
|
(key['type'] == Syntax.Identifier and key['name'] == '__proto__' or
|
|
key['type'] == Syntax.Literal and key['value'] == '__proto__')):
|
|
if (hasProto['value']):
|
|
self.tolerateError(Messages.DuplicateProtoProperty)
|
|
else:
|
|
hasProto['value'] = true
|
|
|
|
def parseObjectProperty(self, hasProto):
|
|
token = self.lookahead
|
|
node = Node()
|
|
|
|
computed = self.match('[')
|
|
key = self.parseObjectPropertyKey()
|
|
maybeMethod = self.tryParseMethodDefinition(token, key, computed, node)
|
|
|
|
if (maybeMethod):
|
|
self.checkProto(maybeMethod['key'], maybeMethod['computed'],
|
|
hasProto)
|
|
return maybeMethod
|
|
|
|
# // init property or short hand property.
|
|
self.checkProto(key, computed, hasProto)
|
|
|
|
if (self.match(':')):
|
|
self.lex()
|
|
value = self.inheritCoverGrammar(self.parseAssignmentExpression)
|
|
return node.finishProperty('init', key, computed, value, false,
|
|
false)
|
|
|
|
if (token['type'] == Token.Identifier):
|
|
if (self.match('=')):
|
|
self.firstCoverInitializedNameError = self.lookahead
|
|
self.lex()
|
|
value = self.isolateCoverGrammar(
|
|
self.parseAssignmentExpression)
|
|
return node.finishProperty(
|
|
'init', key, computed,
|
|
WrappingNode(token).finishAssignmentPattern(key, value),
|
|
false, true)
|
|
return node.finishProperty('init', key, computed, key, false, true)
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
|
|
def parseObjectInitialiser(self):
|
|
properties = []
|
|
hasProto = {'value': false}
|
|
node = Node()
|
|
|
|
self.expect('{')
|
|
|
|
while (not self.match('}')):
|
|
properties.append(self.parseObjectProperty(hasProto))
|
|
|
|
if (not self.match('}')):
|
|
self.expectCommaSeparator()
|
|
self.expect('}')
|
|
return node.finishObjectExpression(properties)
|
|
|
|
def reinterpretExpressionAsPattern(self, expr):
|
|
typ = (expr['type'])
|
|
if typ in (Syntax.Identifier, Syntax.MemberExpression,
|
|
Syntax.RestElement, Syntax.AssignmentPattern):
|
|
pass
|
|
elif typ == Syntax.SpreadElement:
|
|
expr['type'] = Syntax.RestElement
|
|
self.reinterpretExpressionAsPattern(expr.argument)
|
|
elif typ == Syntax.ArrayExpression:
|
|
expr['type'] = Syntax.ArrayPattern
|
|
for i in xrange(len(expr['elements'])):
|
|
if (expr['elements'][i] != null):
|
|
self.reinterpretExpressionAsPattern(expr['elements'][i])
|
|
elif typ == Syntax.ObjectExpression:
|
|
expr['type'] = Syntax.ObjectPattern
|
|
for i in xrange(len(expr['properties'])):
|
|
self.reinterpretExpressionAsPattern(
|
|
expr['properties'][i]['value'])
|
|
elif Syntax.AssignmentExpression:
|
|
raise Ecma51NotSupported('AssignmentPattern')
|
|
expr['type'] = Syntax.AssignmentPattern
|
|
self.reinterpretExpressionAsPattern(expr['left'])
|
|
else:
|
|
# // Allow other node type for tolerant parsing.
|
|
return
|
|
|
|
def parseTemplateElement(self, option):
|
|
|
|
if (self.lookahead['type'] != Token.Template
|
|
or (option['head'] and not self.lookahead['head'])):
|
|
self.throwUnexpectedToken()
|
|
|
|
node = Node()
|
|
token = self.lex()
|
|
|
|
return node.finishTemplateElement({
|
|
'raw': token['value']['raw'],
|
|
'cooked': token['value']['cooked']
|
|
}, token['tail'])
|
|
|
|
def parseTemplateLiteral(self):
|
|
node = Node()
|
|
|
|
quasi = self.parseTemplateElement({'head': true})
|
|
quasis = [quasi]
|
|
expressions = []
|
|
|
|
while (not quasi['tail']):
|
|
expressions.append(self.parseExpression())
|
|
quasi = self.parseTemplateElement({
|
|
'head': false
|
|
})
|
|
quasis.append(quasi)
|
|
return node.finishTemplateLiteral(quasis, expressions)
|
|
|
|
# 11.1.6 The Grouping Operator
|
|
|
|
def parseGroupExpression(self):
|
|
self.expect('(')
|
|
|
|
if (self.match(')')):
|
|
raise Ecma51NotSupported('ArrowFunction')
|
|
self.lex()
|
|
if (not self.match('=>')):
|
|
self.expect('=>')
|
|
return {
|
|
'type': PlaceHolders.ArrowParameterPlaceHolder,
|
|
'params': []
|
|
}
|
|
|
|
startToken = self.lookahead
|
|
if (self.match('...')):
|
|
expr = self.parseRestElement()
|
|
self.expect(')')
|
|
if (not self.match('=>')):
|
|
self.expect('=>')
|
|
return {
|
|
'type': PlaceHolders.ArrowParameterPlaceHolder,
|
|
'params': [expr]
|
|
}
|
|
|
|
self.isBindingElement = true
|
|
expr = self.inheritCoverGrammar(self.parseAssignmentExpression)
|
|
|
|
if (self.match(',')):
|
|
self.isAssignmentTarget = false
|
|
expressions = [expr]
|
|
|
|
while (self.startIndex < self.length):
|
|
if (not self.match(',')):
|
|
break
|
|
self.lex()
|
|
|
|
if (self.match('...')):
|
|
raise Ecma51NotSupported('ArrowFunction')
|
|
if (not self.isBindingElement):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
expressions.append(self.parseRestElement())
|
|
self.expect(')')
|
|
if (not self.match('=>')):
|
|
self.expect('=>')
|
|
self.isBindingElement = false
|
|
for i in xrange(len(expressions)):
|
|
self.reinterpretExpressionAsPattern(expressions[i])
|
|
return {
|
|
'type': PlaceHolders.ArrowParameterPlaceHolder,
|
|
'params': expressions
|
|
}
|
|
expressions.append(
|
|
self.inheritCoverGrammar(self.parseAssignmentExpression))
|
|
expr = WrappingNode(startToken).finishSequenceExpression(
|
|
expressions)
|
|
self.expect(')')
|
|
|
|
if (self.match('=>')):
|
|
raise Ecma51NotSupported('ArrowFunction')
|
|
if (not self.isBindingElement):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
if (expr['type'] == Syntax.SequenceExpression):
|
|
for i in xrange(len(expr.expressions)):
|
|
self.reinterpretExpressionAsPattern(expr['expressions'][i])
|
|
else:
|
|
self.reinterpretExpressionAsPattern(expr)
|
|
expr = {
|
|
'type':
|
|
PlaceHolders.ArrowParameterPlaceHolder,
|
|
'params':
|
|
expr['expressions']
|
|
if expr['type'] == Syntax.SequenceExpression else [expr]
|
|
}
|
|
self.isBindingElement = false
|
|
return expr
|
|
|
|
# 11.1 Primary Expressions
|
|
|
|
def parsePrimaryExpression(self):
|
|
if (self.match('(')):
|
|
self.isBindingElement = false
|
|
return self.inheritCoverGrammar(self.parseGroupExpression)
|
|
if (self.match('[')):
|
|
return self.inheritCoverGrammar(self.parseArrayInitialiser)
|
|
|
|
if (self.match('{')):
|
|
return self.inheritCoverGrammar(self.parseObjectInitialiser)
|
|
|
|
typ = self.lookahead['type']
|
|
node = Node()
|
|
|
|
if (typ == Token.Identifier):
|
|
expr = node.finishIdentifier(self.lex()['value'])
|
|
elif (typ == Token.StringLiteral or typ == Token.NumericLiteral):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
if (self.strict and self.lookahead.get('octal')):
|
|
self.tolerateUnexpectedToken(self.lookahead,
|
|
Messages.StrictOctalLiteral)
|
|
expr = node.finishLiteral(self.lex())
|
|
elif (typ == Token.Keyword):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
if (self.matchKeyword('function')):
|
|
return self.parseFunctionExpression()
|
|
if (self.matchKeyword('this')):
|
|
self.lex()
|
|
return node.finishThisExpression()
|
|
if (self.matchKeyword('class')):
|
|
return self.parseClassExpression()
|
|
self.throwUnexpectedToken(self.lex())
|
|
elif (typ == Token.BooleanLiteral):
|
|
isAssignmentTarget = self.isBindingElement = false
|
|
token = self.lex()
|
|
token['value'] = (token['value'] == 'true')
|
|
expr = node.finishLiteral(token)
|
|
elif (typ == Token.NullLiteral):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
token = self.lex()
|
|
token['value'] = null
|
|
expr = node.finishLiteral(token)
|
|
elif (self.match('/') or self.match('/=')):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
self.index = self.startIndex
|
|
token = self.scanRegExp()
|
|
# hehe, here you are!
|
|
self.lex()
|
|
expr = node.finishLiteral(token)
|
|
elif (typ == Token.Template):
|
|
expr = self.parseTemplateLiteral()
|
|
else:
|
|
self.throwUnexpectedToken(self.lex())
|
|
return expr
|
|
|
|
# 11.2 Left-Hand-Side Expressions
|
|
|
|
def parseArguments(self):
|
|
args = []
|
|
|
|
self.expect('(')
|
|
if (not self.match(')')):
|
|
while (self.startIndex < self.length):
|
|
args.append(
|
|
self.isolateCoverGrammar(self.parseAssignmentExpression))
|
|
if (self.match(')')):
|
|
break
|
|
self.expectCommaSeparator()
|
|
self.expect(')')
|
|
return args
|
|
|
|
def parseNonComputedProperty(self):
|
|
node = Node()
|
|
|
|
token = self.lex()
|
|
|
|
if (not self.isIdentifierName(token)):
|
|
self.throwUnexpectedToken(token)
|
|
return node.finishIdentifier(token['value'])
|
|
|
|
def parseNonComputedMember(self):
|
|
self.expect('.')
|
|
return self.parseNonComputedProperty()
|
|
|
|
def parseComputedMember(self):
|
|
self.expect('[')
|
|
|
|
expr = self.isolateCoverGrammar(self.parseExpression)
|
|
self.expect(']')
|
|
|
|
return expr
|
|
|
|
def parseNewExpression(self):
|
|
node = Node()
|
|
self.expectKeyword('new')
|
|
callee = self.isolateCoverGrammar(self.parseLeftHandSideExpression)
|
|
args = self.parseArguments() if self.match('(') else []
|
|
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
|
|
return node.finishNewExpression(callee, args)
|
|
|
|
def parseLeftHandSideExpressionAllowCall(self):
|
|
previousAllowIn = self.state['allowIn']
|
|
|
|
startToken = self.lookahead
|
|
self.state['allowIn'] = true
|
|
|
|
if (self.matchKeyword('super') and self.state['inFunctionBody']):
|
|
expr = Node()
|
|
self.lex()
|
|
expr = expr.finishSuper()
|
|
if (not self.match('(') and not self.match('.')
|
|
and not self.match('[')):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
else:
|
|
expr = self.inheritCoverGrammar(
|
|
self.parseNewExpression if self.matchKeyword('new') else self.
|
|
parsePrimaryExpression)
|
|
while True:
|
|
if (self.match('.')):
|
|
self.isBindingElement = false
|
|
self.isAssignmentTarget = true
|
|
property = self.parseNonComputedMember()
|
|
expr = WrappingNode(startToken).finishMemberExpression(
|
|
'.', expr, property)
|
|
elif (self.match('(')):
|
|
self.isBindingElement = false
|
|
self.isAssignmentTarget = false
|
|
args = self.parseArguments()
|
|
expr = WrappingNode(startToken).finishCallExpression(
|
|
expr, args)
|
|
elif (self.match('[')):
|
|
self.isBindingElement = false
|
|
self.isAssignmentTarget = true
|
|
property = self.parseComputedMember()
|
|
expr = WrappingNode(startToken).finishMemberExpression(
|
|
'[', expr, property)
|
|
elif (self.lookahead['type'] == Token.Template
|
|
and self.lookahead['head']):
|
|
quasi = self.parseTemplateLiteral()
|
|
expr = WrappingNode(startToken).finishTaggedTemplateExpression(
|
|
expr, quasi)
|
|
else:
|
|
break
|
|
self.state['allowIn'] = previousAllowIn
|
|
|
|
return expr
|
|
|
|
def parseLeftHandSideExpression(self):
|
|
assert self.state[
|
|
'allowIn'], 'callee of new expression always allow in keyword.'
|
|
|
|
startToken = self.lookahead
|
|
|
|
if (self.matchKeyword('super') and self.state['inFunctionBody']):
|
|
expr = Node()
|
|
self.lex()
|
|
expr = expr.finishSuper()
|
|
if (not self.match('[') and not self.match('.')):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
else:
|
|
expr = self.inheritCoverGrammar(
|
|
self.parseNewExpression if self.matchKeyword('new') else self.
|
|
parsePrimaryExpression)
|
|
|
|
while True:
|
|
if (self.match('[')):
|
|
self.isBindingElement = false
|
|
self.isAssignmentTarget = true
|
|
property = self.parseComputedMember()
|
|
expr = WrappingNode(startToken).finishMemberExpression(
|
|
'[', expr, property)
|
|
elif (self.match('.')):
|
|
self.isBindingElement = false
|
|
self.isAssignmentTarget = true
|
|
property = self.parseNonComputedMember()
|
|
expr = WrappingNode(startToken).finishMemberExpression(
|
|
'.', expr, property)
|
|
elif (self.lookahead['type'] == Token.Template
|
|
and self.lookahead['head']):
|
|
quasi = self.parseTemplateLiteral()
|
|
expr = WrappingNode(startToken).finishTaggedTemplateExpression(
|
|
expr, quasi)
|
|
else:
|
|
break
|
|
return expr
|
|
|
|
# 11.3 Postfix Expressions
|
|
|
|
def parsePostfixExpression(self):
|
|
startToken = self.lookahead
|
|
|
|
expr = self.inheritCoverGrammar(
|
|
self.parseLeftHandSideExpressionAllowCall)
|
|
|
|
if (not self.hasLineTerminator
|
|
and self.lookahead['type'] == Token.Punctuator):
|
|
if (self.match('++') or self.match('--')):
|
|
# 11.3.1, 11.3.2
|
|
if (self.strict and expr.type == Syntax.Identifier
|
|
and isRestrictedWord(expr.name)):
|
|
self.tolerateError(Messages.StrictLHSPostfix)
|
|
if (not self.isAssignmentTarget):
|
|
self.tolerateError(Messages.InvalidLHSInAssignment)
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
|
|
token = self.lex()
|
|
expr = WrappingNode(startToken).finishPostfixExpression(
|
|
token['value'], expr)
|
|
return expr
|
|
|
|
# 11.4 Unary Operators
|
|
|
|
def parseUnaryExpression(self):
|
|
|
|
if (self.lookahead['type'] != Token.Punctuator
|
|
and self.lookahead['type'] != Token.Keyword):
|
|
expr = self.parsePostfixExpression()
|
|
elif (self.match('++') or self.match('--')):
|
|
startToken = self.lookahead
|
|
token = self.lex()
|
|
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
|
|
# 11.4.4, 11.4.5
|
|
if (self.strict and expr.type == Syntax.Identifier
|
|
and isRestrictedWord(expr.name)):
|
|
self.tolerateError(Messages.StrictLHSPrefix)
|
|
if (not self.isAssignmentTarget):
|
|
self.tolerateError(Messages.InvalidLHSInAssignment)
|
|
expr = WrappingNode(startToken).finishUnaryExpression(
|
|
token['value'], expr)
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
elif (self.match('+') or self.match('-') or self.match('~')
|
|
or self.match('!')):
|
|
startToken = self.lookahead
|
|
token = self.lex()
|
|
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
|
|
expr = WrappingNode(startToken).finishUnaryExpression(
|
|
token['value'], expr)
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
elif (self.matchKeyword('delete') or self.matchKeyword('void')
|
|
or self.matchKeyword('typeof')):
|
|
startToken = self.lookahead
|
|
token = self.lex()
|
|
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
|
|
expr = WrappingNode(startToken).finishUnaryExpression(
|
|
token['value'], expr)
|
|
if (self.strict and expr.operator == 'delete'
|
|
and expr.argument.type == Syntax.Identifier):
|
|
self.tolerateError(Messages.StrictDelete)
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
else:
|
|
expr = self.parsePostfixExpression()
|
|
return expr
|
|
|
|
def binaryPrecedence(self, token, allowIn):
|
|
prec = 0
|
|
typ = token['type']
|
|
if (typ != Token.Punctuator and typ != Token.Keyword):
|
|
return 0
|
|
val = token['value']
|
|
if val == 'in' and not allowIn:
|
|
return 0
|
|
return PRECEDENCE.get(val, 0)
|
|
|
|
# 11.5 Multiplicative Operators
|
|
# 11.6 Additive Operators
|
|
# 11.7 Bitwise Shift Operators
|
|
# 11.8 Relational Operators
|
|
# 11.9 Equality Operators
|
|
# 11.10 Binary Bitwise Operators
|
|
# 11.11 Binary Logical Operators
|
|
|
|
def parseBinaryExpression(self):
|
|
|
|
marker = self.lookahead
|
|
left = self.inheritCoverGrammar(self.parseUnaryExpression)
|
|
|
|
token = self.lookahead
|
|
prec = self.binaryPrecedence(token, self.state['allowIn'])
|
|
if (prec == 0):
|
|
return left
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
token['prec'] = prec
|
|
self.lex()
|
|
|
|
markers = [marker, self.lookahead]
|
|
right = self.isolateCoverGrammar(self.parseUnaryExpression)
|
|
|
|
stack = [left, token, right]
|
|
|
|
while True:
|
|
prec = self.binaryPrecedence(self.lookahead, self.state['allowIn'])
|
|
if not prec > 0:
|
|
break
|
|
# Reduce: make a binary expression from the three topmost entries.
|
|
while ((len(stack) > 2)
|
|
and (prec <= stack[len(stack) - 2]['prec'])):
|
|
right = stack.pop()
|
|
operator = stack.pop()['value']
|
|
left = stack.pop()
|
|
markers.pop()
|
|
expr = WrappingNode(
|
|
markers[len(markers) - 1]).finishBinaryExpression(
|
|
operator, left, right)
|
|
stack.append(expr)
|
|
|
|
# Shift
|
|
token = self.lex()
|
|
token['prec'] = prec
|
|
stack.append(token)
|
|
markers.append(self.lookahead)
|
|
expr = self.isolateCoverGrammar(self.parseUnaryExpression)
|
|
stack.append(expr)
|
|
|
|
# Final reduce to clean-up the stack.
|
|
i = len(stack) - 1
|
|
expr = stack[i]
|
|
markers.pop()
|
|
while (i > 1):
|
|
expr = WrappingNode(markers.pop()).finishBinaryExpression(
|
|
stack[i - 1]['value'], stack[i - 2], expr)
|
|
i -= 2
|
|
return expr
|
|
|
|
# 11.12 Conditional Operator
|
|
|
|
def parseConditionalExpression(self):
|
|
|
|
startToken = self.lookahead
|
|
|
|
expr = self.inheritCoverGrammar(self.parseBinaryExpression)
|
|
if (self.match('?')):
|
|
self.lex()
|
|
previousAllowIn = self.state['allowIn']
|
|
self.state['allowIn'] = true
|
|
consequent = self.isolateCoverGrammar(
|
|
self.parseAssignmentExpression)
|
|
self.state['allowIn'] = previousAllowIn
|
|
self.expect(':')
|
|
alternate = self.isolateCoverGrammar(
|
|
self.parseAssignmentExpression)
|
|
|
|
expr = WrappingNode(startToken).finishConditionalExpression(
|
|
expr, consequent, alternate)
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
return expr
|
|
|
|
# [ES6] 14.2 Arrow Function
|
|
|
|
def parseConciseBody(self):
|
|
if (self.match('{')):
|
|
return self.parseFunctionSourceElements()
|
|
return self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
|
|
def checkPatternParam(self, options, param):
|
|
typ = param.type
|
|
if typ == Syntax.Identifier:
|
|
self.validateParam(options, param, param.name)
|
|
elif typ == Syntax.RestElement:
|
|
self.checkPatternParam(options, param.argument)
|
|
elif typ == Syntax.AssignmentPattern:
|
|
self.checkPatternParam(options, param.left)
|
|
elif typ == Syntax.ArrayPattern:
|
|
for i in xrange(len(param.elements)):
|
|
if (param.elements[i] != null):
|
|
self.checkPatternParam(options, param.elements[i])
|
|
else:
|
|
assert typ == Syntax.ObjectPattern, 'Invalid type'
|
|
for i in xrange(len(param.properties)):
|
|
self.checkPatternParam(options, param.properties[i]['value'])
|
|
|
|
def reinterpretAsCoverFormalsList(self, expr):
|
|
defaults = []
|
|
defaultCount = 0
|
|
params = [expr]
|
|
typ = expr.type
|
|
if typ == Syntax.Identifier:
|
|
pass
|
|
elif typ == PlaceHolders.ArrowParameterPlaceHolder:
|
|
params = expr.params
|
|
else:
|
|
return null
|
|
options = {'paramSet': {}}
|
|
le = len(params)
|
|
for i in xrange(le):
|
|
param = params[i]
|
|
if param.type == Syntax.AssignmentPattern:
|
|
params[i] = param.left
|
|
defaults.append(param.right)
|
|
defaultCount += 1
|
|
self.checkPatternParam(options, param.left)
|
|
else:
|
|
self.checkPatternParam(options, param)
|
|
params[i] = param
|
|
defaults.append(null)
|
|
if (options.get('message') == Messages.StrictParamDupe):
|
|
token = options.get(
|
|
'stricted') if self.strict else options['firstRestricted']
|
|
self.throwUnexpectedToken(token, options.get('message'))
|
|
if (defaultCount == 0):
|
|
defaults = []
|
|
return {
|
|
'params': params,
|
|
'defaults': defaults,
|
|
'stricted': options['stricted'],
|
|
'firstRestricted': options['firstRestricted'],
|
|
'message': options.get('message')
|
|
}
|
|
|
|
def parseArrowFunctionExpression(self, options, node):
|
|
raise Ecma51NotSupported('ArrowFunctionExpression')
|
|
if (self.hasLineTerminator):
|
|
self.tolerateUnexpectedToken(self.lookahead)
|
|
self.expect('=>')
|
|
previousStrict = self.strict
|
|
|
|
body = self.parseConciseBody()
|
|
|
|
if (self.strict and options['firstRestricted']):
|
|
self.throwUnexpectedToken(options['firstRestricted'],
|
|
options.get('message'))
|
|
if (self.strict and options['stricted']):
|
|
self.tolerateUnexpectedToken(options['stricted'],
|
|
options['message'])
|
|
|
|
self.strict = previousStrict
|
|
|
|
return node.finishArrowFunctionExpression(
|
|
options['params'], options['defaults'], body,
|
|
body.type != Syntax.BlockStatement)
|
|
|
|
# 11.13 Assignment Operators
|
|
|
|
def parseAssignmentExpression(self):
|
|
startToken = self.lookahead
|
|
token = self.lookahead
|
|
|
|
expr = self.parseConditionalExpression()
|
|
|
|
if (expr.type == PlaceHolders.ArrowParameterPlaceHolder
|
|
or self.match('=>')):
|
|
raise Ecma51NotSupported('ArrowFunctionExpression')
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
lis = self.reinterpretAsCoverFormalsList(expr)
|
|
|
|
if (lis):
|
|
self.firstCoverInitializedNameError = null
|
|
return self.parseArrowFunctionExpression(
|
|
lis, WrappingNode(startToken))
|
|
return expr
|
|
|
|
if (self.matchAssign()):
|
|
if (not self.isAssignmentTarget):
|
|
self.tolerateError(Messages.InvalidLHSInAssignment)
|
|
# 11.13.1
|
|
|
|
if (self.strict and expr.type == Syntax.Identifier
|
|
and isRestrictedWord(expr.name)):
|
|
self.tolerateUnexpectedToken(token,
|
|
Messages.StrictLHSAssignment)
|
|
if (not self.match('=')):
|
|
self.isAssignmentTarget = self.isBindingElement = false
|
|
else:
|
|
self.reinterpretExpressionAsPattern(expr)
|
|
token = self.lex()
|
|
right = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
expr = WrappingNode(startToken).finishAssignmentExpression(
|
|
token['value'], expr, right)
|
|
self.firstCoverInitializedNameError = null
|
|
return expr
|
|
|
|
# 11.14 Comma Operator
|
|
|
|
def parseExpression(self):
|
|
startToken = self.lookahead
|
|
expr = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
|
|
if (self.match(',')):
|
|
expressions = [expr]
|
|
|
|
while (self.startIndex < self.length):
|
|
if (not self.match(',')):
|
|
break
|
|
self.lex()
|
|
expressions.append(
|
|
self.isolateCoverGrammar(self.parseAssignmentExpression))
|
|
expr = WrappingNode(startToken).finishSequenceExpression(
|
|
expressions)
|
|
return expr
|
|
|
|
# 12.1 Block
|
|
|
|
def parseStatementListItem(self):
|
|
if (self.lookahead['type'] == Token.Keyword):
|
|
val = (self.lookahead['value'])
|
|
if val == 'export':
|
|
raise Ecma51NotSupported('ExportDeclaration')
|
|
elif val == 'import':
|
|
raise Ecma51NotSupported('ImportDeclaration')
|
|
elif val == 'const' or val == 'let':
|
|
return self.parseLexicalDeclaration({
|
|
'inFor': false
|
|
})
|
|
elif val == 'function':
|
|
return self.parseFunctionDeclaration(Node())
|
|
elif val == 'class':
|
|
raise Ecma51NotSupported('ClassDeclaration')
|
|
elif ENABLE_PYIMPORT and val == 'pyimport': # <<<<< MODIFIED HERE
|
|
return self.parsePyimportStatement()
|
|
return self.parseStatement()
|
|
|
|
def parsePyimportStatement(self):
|
|
if not ENABLE_PYIMPORT:
|
|
raise Ecma51NotSupported('PyimportStatement')
|
|
n = Node()
|
|
self.lex()
|
|
n.finishPyimport(self.parseVariableIdentifier())
|
|
self.consumeSemicolon()
|
|
return n
|
|
|
|
def parseStatementList(self):
|
|
list = []
|
|
while (self.startIndex < self.length):
|
|
if (self.match('}')):
|
|
break
|
|
list.append(self.parseStatementListItem())
|
|
return list
|
|
|
|
def parseBlock(self):
|
|
node = Node()
|
|
|
|
self.expect('{')
|
|
|
|
block = self.parseStatementList()
|
|
|
|
self.expect('}')
|
|
|
|
return node.finishBlockStatement(block)
|
|
|
|
# 12.2 Variable Statement
|
|
|
|
def parseVariableIdentifier(self):
|
|
node = Node()
|
|
|
|
token = self.lex()
|
|
|
|
if (token['type'] != Token.Identifier):
|
|
if (self.strict and token['type'] == Token.Keyword
|
|
and isStrictModeReservedWord(token['value'])):
|
|
self.tolerateUnexpectedToken(token,
|
|
Messages.StrictReservedWord)
|
|
else:
|
|
self.throwUnexpectedToken(token)
|
|
return node.finishIdentifier(token['value'])
|
|
|
|
def parseVariableDeclaration(self):
|
|
init = null
|
|
node = Node()
|
|
|
|
d = self.parsePattern()
|
|
|
|
# 12.2.1
|
|
if (self.strict and isRestrictedWord(d.name)):
|
|
self.tolerateError(Messages.StrictVarName)
|
|
|
|
if (self.match('=')):
|
|
self.lex()
|
|
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
elif (d.type != Syntax.Identifier):
|
|
self.expect('=')
|
|
return node.finishVariableDeclarator(d, init)
|
|
|
|
def parseVariableDeclarationList(self):
|
|
lis = []
|
|
|
|
while True:
|
|
lis.append(self.parseVariableDeclaration())
|
|
if (not self.match(',')):
|
|
break
|
|
self.lex()
|
|
if not (self.startIndex < self.length):
|
|
break
|
|
|
|
return lis
|
|
|
|
def parseVariableStatement(self, node):
|
|
self.expectKeyword('var')
|
|
|
|
declarations = self.parseVariableDeclarationList()
|
|
|
|
self.consumeSemicolon()
|
|
|
|
return node.finishVariableDeclaration(declarations)
|
|
|
|
def parseLexicalBinding(self, kind, options):
|
|
init = null
|
|
node = Node()
|
|
|
|
d = self.parsePattern()
|
|
|
|
# 12.2.1
|
|
if (self.strict and d.type == Syntax.Identifier
|
|
and isRestrictedWord(d.name)):
|
|
self.tolerateError(Messages.StrictVarName)
|
|
|
|
if (kind == 'const'):
|
|
if (not self.matchKeyword('in')):
|
|
self.expect('=')
|
|
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
elif ((not options['inFor'] and d.type != Syntax.Identifier)
|
|
or self.match('=')):
|
|
self.expect('=')
|
|
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
|
|
return node.finishVariableDeclarator(d, init)
|
|
|
|
def parseBindingList(self, kind, options):
|
|
list = []
|
|
|
|
while True:
|
|
list.append(self.parseLexicalBinding(kind, options))
|
|
if (not self.match(',')):
|
|
break
|
|
self.lex()
|
|
if not (self.startIndex < self.length):
|
|
break
|
|
return list
|
|
|
|
def parseLexicalDeclaration(self, options):
|
|
node = Node()
|
|
|
|
kind = self.lex()['value']
|
|
assert kind == 'let' or kind == 'const', 'Lexical declaration must be either let or const'
|
|
declarations = self.parseBindingList(kind, options)
|
|
self.consumeSemicolon()
|
|
return node.finishLexicalDeclaration(declarations, kind)
|
|
|
|
def parseRestElement(self):
|
|
raise Ecma51NotSupported('RestElement')
|
|
node = Node()
|
|
|
|
self.lex()
|
|
|
|
if (self.match('{')):
|
|
self.throwError(Messages.ObjectPatternAsRestParameter)
|
|
param = self.parseVariableIdentifier()
|
|
if (self.match('=')):
|
|
self.throwError(Messages.DefaultRestParameter)
|
|
|
|
if (not self.match(')')):
|
|
self.throwError(Messages.ParameterAfterRestParameter)
|
|
return node.finishRestElement(param)
|
|
|
|
# 12.3 Empty Statement
|
|
|
|
def parseEmptyStatement(self, node):
|
|
self.expect(';')
|
|
return node.finishEmptyStatement()
|
|
|
|
# 12.4 Expression Statement
|
|
|
|
def parseExpressionStatement(self, node):
|
|
expr = self.parseExpression()
|
|
self.consumeSemicolon()
|
|
return node.finishExpressionStatement(expr)
|
|
|
|
# 12.5 If statement
|
|
|
|
def parseIfStatement(self, node):
|
|
self.expectKeyword('if')
|
|
|
|
self.expect('(')
|
|
|
|
test = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
consequent = self.parseStatement()
|
|
|
|
if (self.matchKeyword('else')):
|
|
self.lex()
|
|
alternate = self.parseStatement()
|
|
else:
|
|
alternate = null
|
|
return node.finishIfStatement(test, consequent, alternate)
|
|
|
|
# 12.6 Iteration Statements
|
|
|
|
def parseDoWhileStatement(self, node):
|
|
|
|
self.expectKeyword('do')
|
|
|
|
oldInIteration = self.state['inIteration']
|
|
self.state['inIteration'] = true
|
|
|
|
body = self.parseStatement()
|
|
|
|
self.state['inIteration'] = oldInIteration
|
|
|
|
self.expectKeyword('while')
|
|
|
|
self.expect('(')
|
|
|
|
test = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
if (self.match(';')):
|
|
self.lex()
|
|
return node.finishDoWhileStatement(body, test)
|
|
|
|
def parseWhileStatement(self, node):
|
|
|
|
self.expectKeyword('while')
|
|
|
|
self.expect('(')
|
|
|
|
test = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
oldInIteration = self.state['inIteration']
|
|
self.state['inIteration'] = true
|
|
|
|
body = self.parseStatement()
|
|
|
|
self.state['inIteration'] = oldInIteration
|
|
|
|
return node.finishWhileStatement(test, body)
|
|
|
|
def parseForStatement(self, node):
|
|
previousAllowIn = self.state['allowIn']
|
|
|
|
init = test = update = null
|
|
|
|
self.expectKeyword('for')
|
|
|
|
self.expect('(')
|
|
|
|
if (self.match(';')):
|
|
self.lex()
|
|
else:
|
|
if (self.matchKeyword('var')):
|
|
init = Node()
|
|
self.lex()
|
|
|
|
self.state['allowIn'] = false
|
|
init = init.finishVariableDeclaration(
|
|
self.parseVariableDeclarationList())
|
|
self.state['allowIn'] = previousAllowIn
|
|
|
|
if (len(init.declarations) == 1 and self.matchKeyword('in')):
|
|
self.lex()
|
|
left = init
|
|
right = self.parseExpression()
|
|
init = null
|
|
else:
|
|
self.expect(';')
|
|
elif (self.matchKeyword('const') or self.matchKeyword('let')):
|
|
init = Node()
|
|
kind = self.lex()['value']
|
|
|
|
self.state['allowIn'] = false
|
|
declarations = self.parseBindingList(kind, {'inFor': true})
|
|
self.state['allowIn'] = previousAllowIn
|
|
|
|
if (len(declarations) == 1 and declarations[0].init == null
|
|
and self.matchKeyword('in')):
|
|
init = init.finishLexicalDeclaration(declarations, kind)
|
|
self.lex()
|
|
left = init
|
|
right = self.parseExpression()
|
|
init = null
|
|
else:
|
|
self.consumeSemicolon()
|
|
init = init.finishLexicalDeclaration(declarations, kind)
|
|
else:
|
|
initStartToken = self.lookahead
|
|
self.state['allowIn'] = false
|
|
init = self.inheritCoverGrammar(self.parseAssignmentExpression)
|
|
self.state['allowIn'] = previousAllowIn
|
|
|
|
if (self.matchKeyword('in')):
|
|
if (not self.isAssignmentTarget):
|
|
self.tolerateError(Messages.InvalidLHSInForIn)
|
|
self.lex()
|
|
self.reinterpretExpressionAsPattern(init)
|
|
left = init
|
|
right = self.parseExpression()
|
|
init = null
|
|
else:
|
|
if (self.match(',')):
|
|
initSeq = [init]
|
|
while (self.match(',')):
|
|
self.lex()
|
|
initSeq.append(
|
|
self.isolateCoverGrammar(
|
|
self.parseAssignmentExpression))
|
|
init = WrappingNode(
|
|
initStartToken).finishSequenceExpression(initSeq)
|
|
self.expect(';')
|
|
|
|
if ('left' not in locals()):
|
|
if (not self.match(';')):
|
|
test = self.parseExpression()
|
|
|
|
self.expect(';')
|
|
|
|
if (not self.match(')')):
|
|
update = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
oldInIteration = self.state['inIteration']
|
|
self.state['inIteration'] = true
|
|
|
|
body = self.isolateCoverGrammar(self.parseStatement)
|
|
|
|
self.state['inIteration'] = oldInIteration
|
|
|
|
return node.finishForStatement(init, test, update, body) if (
|
|
'left' not in locals()) else node.finishForInStatement(
|
|
left, right, body)
|
|
|
|
# 12.7 The continue statement
|
|
|
|
def parseContinueStatement(self, node):
|
|
label = null
|
|
|
|
self.expectKeyword('continue')
|
|
|
|
# Optimize the most common form: 'continue;'.
|
|
if ord(self.source[self.startIndex]) == 0x3B:
|
|
self.lex()
|
|
if (not self.state['inIteration']):
|
|
self.throwError(Messages.IllegalContinue)
|
|
return node.finishContinueStatement(null)
|
|
if (self.hasLineTerminator):
|
|
if (not self.state['inIteration']):
|
|
self.throwError(Messages.IllegalContinue)
|
|
return node.finishContinueStatement(null)
|
|
|
|
if (self.lookahead['type'] == Token.Identifier):
|
|
label = self.parseVariableIdentifier()
|
|
|
|
key = '$' + label.name
|
|
if not key in self.state['labelSet']: # todo make sure its correct!
|
|
self.throwError(Messages.UnknownLabel, label.name)
|
|
self.consumeSemicolon()
|
|
|
|
if (label == null and not self.state['inIteration']):
|
|
self.throwError(Messages.IllegalContinue)
|
|
return node.finishContinueStatement(label)
|
|
|
|
# 12.8 The break statement
|
|
|
|
def parseBreakStatement(self, node):
|
|
label = null
|
|
|
|
self.expectKeyword('break')
|
|
|
|
# Catch the very common case first: immediately a semicolon (U+003B).
|
|
if (ord(self.source[self.lastIndex]) == 0x3B):
|
|
self.lex()
|
|
|
|
if (not (self.state['inIteration'] or self.state['inSwitch'])):
|
|
self.throwError(Messages.IllegalBreak)
|
|
return node.finishBreakStatement(null)
|
|
if (self.hasLineTerminator):
|
|
if (not (self.state['inIteration'] or self.state['inSwitch'])):
|
|
self.throwError(Messages.IllegalBreak)
|
|
return node.finishBreakStatement(null)
|
|
if (self.lookahead['type'] == Token.Identifier):
|
|
label = self.parseVariableIdentifier()
|
|
|
|
key = '$' + label.name
|
|
if not (key in self.state['labelSet']):
|
|
self.throwError(Messages.UnknownLabel, label.name)
|
|
self.consumeSemicolon()
|
|
|
|
if (label == null
|
|
and not (self.state['inIteration'] or self.state['inSwitch'])):
|
|
self.throwError(Messages.IllegalBreak)
|
|
return node.finishBreakStatement(label)
|
|
|
|
# 12.9 The return statement
|
|
|
|
def parseReturnStatement(self, node):
|
|
argument = null
|
|
|
|
self.expectKeyword('return')
|
|
|
|
if (not self.state['inFunctionBody']):
|
|
self.tolerateError(Messages.IllegalReturn)
|
|
|
|
# 'return' followed by a space and an identifier is very common.
|
|
if (ord(self.source[self.lastIndex]) == 0x20):
|
|
if (isIdentifierStart(self.source[self.lastIndex + 1])):
|
|
argument = self.parseExpression()
|
|
self.consumeSemicolon()
|
|
return node.finishReturnStatement(argument)
|
|
if (self.hasLineTerminator):
|
|
# HACK
|
|
return node.finishReturnStatement(null)
|
|
|
|
if (not self.match(';')):
|
|
if (not self.match('}') and self.lookahead['type'] != Token.EOF):
|
|
argument = self.parseExpression()
|
|
self.consumeSemicolon()
|
|
|
|
return node.finishReturnStatement(argument)
|
|
|
|
# 12.10 The with statement
|
|
|
|
def parseWithStatement(self, node):
|
|
if (self.strict):
|
|
self.tolerateError(Messages.StrictModeWith)
|
|
|
|
self.expectKeyword('with')
|
|
|
|
self.expect('(')
|
|
|
|
obj = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
body = self.parseStatement()
|
|
|
|
return node.finishWithStatement(obj, body)
|
|
|
|
# 12.10 The swith statement
|
|
|
|
def parseSwitchCase(self):
|
|
consequent = []
|
|
node = Node()
|
|
|
|
if (self.matchKeyword('default')):
|
|
self.lex()
|
|
test = null
|
|
else:
|
|
self.expectKeyword('case')
|
|
test = self.parseExpression()
|
|
|
|
self.expect(':')
|
|
|
|
while (self.startIndex < self.length):
|
|
if (self.match('}') or self.matchKeyword('default')
|
|
or self.matchKeyword('case')):
|
|
break
|
|
statement = self.parseStatementListItem()
|
|
consequent.append(statement)
|
|
return node.finishSwitchCase(test, consequent)
|
|
|
|
def parseSwitchStatement(self, node):
|
|
|
|
self.expectKeyword('switch')
|
|
|
|
self.expect('(')
|
|
|
|
discriminant = self.parseExpression()
|
|
|
|
self.expect(')')
|
|
|
|
self.expect('{')
|
|
|
|
cases = []
|
|
|
|
if (self.match('}')):
|
|
self.lex()
|
|
return node.finishSwitchStatement(discriminant, cases)
|
|
|
|
oldInSwitch = self.state['inSwitch']
|
|
self.state['inSwitch'] = true
|
|
defaultFound = false
|
|
|
|
while (self.startIndex < self.length):
|
|
if (self.match('}')):
|
|
break
|
|
clause = self.parseSwitchCase()
|
|
if (clause.test == null):
|
|
if (defaultFound):
|
|
self.throwError(Messages.MultipleDefaultsInSwitch)
|
|
defaultFound = true
|
|
cases.append(clause)
|
|
|
|
self.state['inSwitch'] = oldInSwitch
|
|
|
|
self.expect('}')
|
|
|
|
return node.finishSwitchStatement(discriminant, cases)
|
|
|
|
# 12.13 The throw statement
|
|
|
|
def parseThrowStatement(self, node):
|
|
|
|
self.expectKeyword('throw')
|
|
|
|
if (self.hasLineTerminator):
|
|
self.throwError(Messages.NewlineAfterThrow)
|
|
|
|
argument = self.parseExpression()
|
|
|
|
self.consumeSemicolon()
|
|
|
|
return node.finishThrowStatement(argument)
|
|
|
|
# 12.14 The try statement
|
|
|
|
def parseCatchClause(self):
|
|
node = Node()
|
|
|
|
self.expectKeyword('catch')
|
|
|
|
self.expect('(')
|
|
if (self.match(')')):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
param = self.parsePattern()
|
|
|
|
# 12.14.1
|
|
if (self.strict and isRestrictedWord(param.name)):
|
|
self.tolerateError(Messages.StrictCatchVariable)
|
|
|
|
self.expect(')')
|
|
body = self.parseBlock()
|
|
return node.finishCatchClause(param, body)
|
|
|
|
def parseTryStatement(self, node):
|
|
handler = null
|
|
finalizer = null
|
|
|
|
self.expectKeyword('try')
|
|
|
|
block = self.parseBlock()
|
|
|
|
if (self.matchKeyword('catch')):
|
|
handler = self.parseCatchClause()
|
|
|
|
if (self.matchKeyword('finally')):
|
|
self.lex()
|
|
finalizer = self.parseBlock()
|
|
|
|
if (not handler and not finalizer):
|
|
self.throwError(Messages.NoCatchOrFinally)
|
|
|
|
return node.finishTryStatement(block, handler, finalizer)
|
|
|
|
# 12.15 The debugger statement
|
|
|
|
def parseDebuggerStatement(self, node):
|
|
self.expectKeyword('debugger')
|
|
|
|
self.consumeSemicolon()
|
|
|
|
return node.finishDebuggerStatement()
|
|
|
|
# 12 Statements
|
|
|
|
def parseStatement(self):
|
|
typ = self.lookahead['type']
|
|
|
|
if (typ == Token.EOF):
|
|
self.throwUnexpectedToken(self.lookahead)
|
|
|
|
if (typ == Token.Punctuator and self.lookahead['value'] == '{'):
|
|
return self.parseBlock()
|
|
|
|
self.isAssignmentTarget = self.isBindingElement = true
|
|
node = Node()
|
|
val = self.lookahead['value']
|
|
|
|
if (typ == Token.Punctuator):
|
|
if val == ';':
|
|
return self.parseEmptyStatement(node)
|
|
elif val == '(':
|
|
return self.parseExpressionStatement(node)
|
|
elif (typ == Token.Keyword):
|
|
if val == 'break':
|
|
return self.parseBreakStatement(node)
|
|
elif val == 'continue':
|
|
return self.parseContinueStatement(node)
|
|
elif val == 'debugger':
|
|
return self.parseDebuggerStatement(node)
|
|
elif val == 'do':
|
|
return self.parseDoWhileStatement(node)
|
|
elif val == 'for':
|
|
return self.parseForStatement(node)
|
|
elif val == 'function':
|
|
return self.parseFunctionDeclaration(node)
|
|
elif val == 'if':
|
|
return self.parseIfStatement(node)
|
|
elif val == 'return':
|
|
return self.parseReturnStatement(node)
|
|
elif val == 'switch':
|
|
return self.parseSwitchStatement(node)
|
|
elif val == 'throw':
|
|
return self.parseThrowStatement(node)
|
|
elif val == 'try':
|
|
return self.parseTryStatement(node)
|
|
elif val == 'var':
|
|
return self.parseVariableStatement(node)
|
|
elif val == 'while':
|
|
return self.parseWhileStatement(node)
|
|
elif val == 'with':
|
|
return self.parseWithStatement(node)
|
|
|
|
expr = self.parseExpression()
|
|
|
|
# 12.12 Labelled Statements
|
|
if ((expr.type == Syntax.Identifier) and self.match(':')):
|
|
self.lex()
|
|
|
|
key = '$' + expr.name
|
|
if key in self.state['labelSet']:
|
|
self.throwError(Messages.Redeclaration, 'Label', expr.name)
|
|
self.state['labelSet'][key] = true
|
|
labeledBody = self.parseStatement()
|
|
del self.state['labelSet'][key]
|
|
return node.finishLabeledStatement(expr, labeledBody)
|
|
self.consumeSemicolon()
|
|
return node.finishExpressionStatement(expr)
|
|
|
|
# 13 Function Definition
|
|
|
|
def parseFunctionSourceElements(self):
|
|
body = []
|
|
node = Node()
|
|
firstRestricted = None
|
|
|
|
self.expect('{')
|
|
|
|
while (self.startIndex < self.length):
|
|
if (self.lookahead['type'] != Token.StringLiteral):
|
|
break
|
|
token = self.lookahead
|
|
|
|
statement = self.parseStatementListItem()
|
|
body.append(statement)
|
|
if (statement.expression.type != Syntax.Literal):
|
|
# this is not directive
|
|
break
|
|
directive = self.source[token['start'] + 1:token['end'] - 1]
|
|
if (directive == 'use strict'):
|
|
self.strict = true
|
|
if (firstRestricted):
|
|
self.tolerateUnexpectedToken(firstRestricted,
|
|
Messages.StrictOctalLiteral)
|
|
else:
|
|
if (not firstRestricted and token.get('octal')):
|
|
firstRestricted = token
|
|
|
|
oldLabelSet = self.state['labelSet']
|
|
oldInIteration = self.state['inIteration']
|
|
oldInSwitch = self.state['inSwitch']
|
|
oldInFunctionBody = self.state['inFunctionBody']
|
|
oldParenthesisCount = self.state['parenthesizedCount']
|
|
|
|
self.state['labelSet'] = {}
|
|
self.state['inIteration'] = false
|
|
self.state['inSwitch'] = false
|
|
self.state['inFunctionBody'] = true
|
|
self.state['parenthesizedCount'] = 0
|
|
|
|
while (self.startIndex < self.length):
|
|
if (self.match('}')):
|
|
break
|
|
body.append(self.parseStatementListItem())
|
|
self.expect('}')
|
|
|
|
self.state['labelSet'] = oldLabelSet
|
|
self.state['inIteration'] = oldInIteration
|
|
self.state['inSwitch'] = oldInSwitch
|
|
self.state['inFunctionBody'] = oldInFunctionBody
|
|
self.state['parenthesizedCount'] = oldParenthesisCount
|
|
|
|
return node.finishBlockStatement(body)
|
|
|
|
def validateParam(self, options, param, name):
|
|
key = '$' + name
|
|
if (self.strict):
|
|
if (isRestrictedWord(name)):
|
|
options['stricted'] = param
|
|
options['message'] = Messages.StrictParamName
|
|
if key in options['paramSet']:
|
|
options['stricted'] = param
|
|
options['message'] = Messages.StrictParamDupe
|
|
elif (not options['firstRestricted']):
|
|
if (isRestrictedWord(name)):
|
|
options['firstRestricted'] = param
|
|
options['message'] = Messages.StrictParamName
|
|
elif (isStrictModeReservedWord(name)):
|
|
options['firstRestricted'] = param
|
|
options['message'] = Messages.StrictReservedWord
|
|
elif key in options['paramSet']:
|
|
options['firstRestricted'] = param
|
|
options['message'] = Messages.StrictParamDupe
|
|
options['paramSet'][key] = true
|
|
|
|
def parseParam(self, options):
|
|
token = self.lookahead
|
|
de = None
|
|
if (token['value'] == '...'):
|
|
param = self.parseRestElement()
|
|
self.validateParam(options, param.argument, param.argument.name)
|
|
options['params'].append(param)
|
|
options['defaults'].append(null)
|
|
return false
|
|
param = self.parsePatternWithDefault()
|
|
self.validateParam(options, token, token['value'])
|
|
|
|
if (param.type == Syntax.AssignmentPattern):
|
|
de = param.right
|
|
param = param.left
|
|
options['defaultCount'] += 1
|
|
options['params'].append(param)
|
|
options['defaults'].append(de)
|
|
return not self.match(')')
|
|
|
|
def parseParams(self, firstRestricted):
|
|
options = {
|
|
'params': [],
|
|
'defaultCount': 0,
|
|
'defaults': [],
|
|
'firstRestricted': firstRestricted
|
|
}
|
|
|
|
self.expect('(')
|
|
|
|
if (not self.match(')')):
|
|
options['paramSet'] = {}
|
|
while (self.startIndex < self.length):
|
|
if (not self.parseParam(options)):
|
|
break
|
|
self.expect(',')
|
|
self.expect(')')
|
|
|
|
if (options['defaultCount'] == 0):
|
|
options['defaults'] = []
|
|
|
|
return {
|
|
'params': options['params'],
|
|
'defaults': options['defaults'],
|
|
'stricted': options.get('stricted'),
|
|
'firstRestricted': options.get('firstRestricted'),
|
|
'message': options.get('message')
|
|
}
|
|
|
|
def parseFunctionDeclaration(self, node, identifierIsOptional=None):
|
|
d = null
|
|
params = []
|
|
defaults = []
|
|
message = None
|
|
firstRestricted = None
|
|
|
|
self.expectKeyword('function')
|
|
if (identifierIsOptional or not self.match('(')):
|
|
token = self.lookahead
|
|
d = self.parseVariableIdentifier()
|
|
if (self.strict):
|
|
if (isRestrictedWord(token['value'])):
|
|
self.tolerateUnexpectedToken(token,
|
|
Messages.StrictFunctionName)
|
|
else:
|
|
if (isRestrictedWord(token['value'])):
|
|
firstRestricted = token
|
|
message = Messages.StrictFunctionName
|
|
elif (isStrictModeReservedWord(token['value'])):
|
|
firstRestricted = token
|
|
message = Messages.StrictReservedWord
|
|
|
|
tmp = self.parseParams(firstRestricted)
|
|
params = tmp['params']
|
|
defaults = tmp['defaults']
|
|
stricted = tmp.get('stricted')
|
|
firstRestricted = tmp['firstRestricted']
|
|
if (tmp.get('message')):
|
|
message = tmp['message']
|
|
|
|
previousStrict = self.strict
|
|
body = self.parseFunctionSourceElements()
|
|
if (self.strict and firstRestricted):
|
|
self.throwUnexpectedToken(firstRestricted, message)
|
|
|
|
if (self.strict and stricted):
|
|
self.tolerateUnexpectedToken(stricted, message)
|
|
self.strict = previousStrict
|
|
|
|
return node.finishFunctionDeclaration(d, params, defaults, body)
|
|
|
|
def parseFunctionExpression(self):
|
|
id = null
|
|
params = []
|
|
defaults = []
|
|
node = Node()
|
|
firstRestricted = None
|
|
message = None
|
|
|
|
self.expectKeyword('function')
|
|
|
|
if (not self.match('(')):
|
|
token = self.lookahead
|
|
id = self.parseVariableIdentifier()
|
|
if (self.strict):
|
|
if (isRestrictedWord(token['value'])):
|
|
self.tolerateUnexpectedToken(token,
|
|
Messages.StrictFunctionName)
|
|
else:
|
|
if (isRestrictedWord(token['value'])):
|
|
firstRestricted = token
|
|
message = Messages.StrictFunctionName
|
|
elif (isStrictModeReservedWord(token['value'])):
|
|
firstRestricted = token
|
|
message = Messages.StrictReservedWord
|
|
tmp = self.parseParams(firstRestricted)
|
|
params = tmp['params']
|
|
defaults = tmp['defaults']
|
|
stricted = tmp.get('stricted')
|
|
firstRestricted = tmp['firstRestricted']
|
|
if (tmp.get('message')):
|
|
message = tmp['message']
|
|
|
|
previousStrict = self.strict
|
|
body = self.parseFunctionSourceElements()
|
|
if (self.strict and firstRestricted):
|
|
self.throwUnexpectedToken(firstRestricted, message)
|
|
if (self.strict and stricted):
|
|
self.tolerateUnexpectedToken(stricted, message)
|
|
self.strict = previousStrict
|
|
|
|
return node.finishFunctionExpression(id, params, defaults, body)
|
|
|
|
# todo Translate parse class functions!
|
|
|
|
def parseClassExpression(self):
|
|
raise Ecma51NotSupported('ClassExpression')
|
|
|
|
def parseClassDeclaration(self):
|
|
raise Ecma51NotSupported('ClassDeclaration')
|
|
|
|
# 14 Program
|
|
|
|
def parseScriptBody(self):
|
|
body = []
|
|
firstRestricted = None
|
|
|
|
while (self.startIndex < self.length):
|
|
token = self.lookahead
|
|
if (token['type'] != Token.StringLiteral):
|
|
break
|
|
statement = self.parseStatementListItem()
|
|
body.append(statement)
|
|
if (statement.expression.type != Syntax.Literal):
|
|
# this is not directive
|
|
break
|
|
directive = self.source[token['start'] + 1:token['end'] - 1]
|
|
if (directive == 'use strict'):
|
|
self.strict = true
|
|
if (firstRestricted):
|
|
self.tolerateUnexpectedToken(firstRestricted,
|
|
Messages.StrictOctalLiteral)
|
|
else:
|
|
if (not firstRestricted and token.get('octal')):
|
|
firstRestricted = token
|
|
while (self.startIndex < self.length):
|
|
statement = self.parseStatementListItem()
|
|
# istanbul ignore if
|
|
if (statement is None):
|
|
break
|
|
body.append(statement)
|
|
return body
|
|
|
|
def parseProgram(self):
|
|
self.peek()
|
|
node = Node()
|
|
|
|
body = self.parseScriptBody()
|
|
return node.finishProgram(body)
|
|
|
|
# DONE!!!
|
|
def parse(self, code, options={}):
|
|
if options:
|
|
raise NotImplementedError(
|
|
'Options not implemented! You can only use default settings.')
|
|
|
|
self.clean()
|
|
self.source = unicode(
|
|
code
|
|
) + ' \n ; //END' # I have to add it in order not to check for EOF every time
|
|
self.index = 0
|
|
self.lineNumber = 1 if len(self.source) > 0 else 0
|
|
self.lineStart = 0
|
|
self.startIndex = self.index
|
|
self.startLineNumber = self.lineNumber
|
|
self.startLineStart = self.lineStart
|
|
self.length = len(self.source)
|
|
self.lookahead = null
|
|
self.state = {
|
|
'allowIn': true,
|
|
'labelSet': {},
|
|
'inFunctionBody': false,
|
|
'inIteration': false,
|
|
'inSwitch': false,
|
|
'lastCommentStart': -1,
|
|
'curlyStack': [],
|
|
'parenthesizedCount': None
|
|
}
|
|
self.sourceType = 'script'
|
|
self.strict = false
|
|
try:
|
|
program = self.parseProgram()
|
|
except Ecma51NotSupported as e:
|
|
raise self.createError(self.lineNumber, self.lastIndex, unicode(e))
|
|
return node_to_dict(program)
|
|
|
|
|
|
def parse(javascript_code):
|
|
"""Returns syntax tree of javascript_code.
|
|
Same as PyJsParser().parse For your convenience :) """
|
|
p = PyJsParser()
|
|
return p.parse(javascript_code)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import time
|
|
|
|
test_path = None
|
|
if test_path:
|
|
f = open(test_path, 'rb')
|
|
x = f.read()
|
|
f.close()
|
|
else:
|
|
x = 'var $ = "Hello!"'
|
|
p = PyJsParser()
|
|
t = time.time()
|
|
res = p.parse(x)
|
|
dt = time.time() - t + 0.000000001
|
|
if test_path:
|
|
print(len(res))
|
|
else:
|
|
pprint(res)
|
|
print()
|
|
print('Parsed everyting in', round(dt, 5), 'seconds.')
|
|
print('Thats %d characters per second' % int(len(x) / dt))
|