You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
408 lines
12 KiB
408 lines
12 KiB
# The MIT License
|
|
#
|
|
# Copyright 2014, 2015 Piotr Dabkowski
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the 'Software'),
|
|
# to deal in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
# the Software, and to permit persons to whom the Software is furnished to do so, subject
|
|
# to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all copies or
|
|
# substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
|
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
|
|
from __future__ import unicode_literals
|
|
|
|
import sys
|
|
import unicodedata
|
|
from collections import defaultdict
|
|
|
|
PY3 = sys.version_info >= (3, 0)
|
|
|
|
if PY3:
|
|
unichr = chr
|
|
xrange = range
|
|
unicode = str
|
|
|
|
token = {
|
|
'BooleanLiteral': 1,
|
|
'EOF': 2,
|
|
'Identifier': 3,
|
|
'Keyword': 4,
|
|
'NullLiteral': 5,
|
|
'NumericLiteral': 6,
|
|
'Punctuator': 7,
|
|
'StringLiteral': 8,
|
|
'RegularExpression': 9,
|
|
'Template': 10
|
|
}
|
|
|
|
TokenName = dict((v, k) for k, v in token.items())
|
|
|
|
FnExprTokens = [
|
|
'(',
|
|
'{',
|
|
'[',
|
|
'in',
|
|
'typeof',
|
|
'instanceof',
|
|
'new',
|
|
'return',
|
|
'case',
|
|
'delete',
|
|
'throw',
|
|
'void',
|
|
# assignment operators
|
|
'=',
|
|
'+=',
|
|
'-=',
|
|
'*=',
|
|
'/=',
|
|
'%=',
|
|
'<<=',
|
|
'>>=',
|
|
'>>>=',
|
|
'&=',
|
|
'|=',
|
|
'^=',
|
|
',',
|
|
# binary/unary operators
|
|
'+',
|
|
'-',
|
|
'*',
|
|
'/',
|
|
'%',
|
|
'++',
|
|
'--',
|
|
'<<',
|
|
'>>',
|
|
'>>>',
|
|
'&',
|
|
'|',
|
|
'^',
|
|
'!',
|
|
'~',
|
|
'&&',
|
|
'||',
|
|
'?',
|
|
':',
|
|
'===',
|
|
'==',
|
|
'>=',
|
|
'<=',
|
|
'<',
|
|
'>',
|
|
'!=',
|
|
'!=='
|
|
]
|
|
|
|
syntax = set(
|
|
('AssignmentExpression', 'AssignmentPattern', 'ArrayExpression',
|
|
'ArrayPattern', 'ArrowFunctionExpression', 'BlockStatement',
|
|
'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
|
|
'ClassBody', 'ClassDeclaration', 'ClassExpression',
|
|
'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
|
|
'DebuggerStatement', 'EmptyStatement', 'ExportAllDeclaration',
|
|
'ExportDefaultDeclaration', 'ExportNamedDeclaration', 'ExportSpecifier',
|
|
'ExpressionStatement', 'ForStatement', 'ForInStatement',
|
|
'FunctionDeclaration', 'FunctionExpression', 'Identifier', 'IfStatement',
|
|
'ImportDeclaration', 'ImportDefaultSpecifier', 'ImportNamespaceSpecifier',
|
|
'ImportSpecifier', 'Literal', 'LabeledStatement', 'LogicalExpression',
|
|
'MemberExpression', 'MethodDefinition', 'NewExpression',
|
|
'ObjectExpression', 'ObjectPattern', 'Program', 'Property', 'RestElement',
|
|
'ReturnStatement', 'SequenceExpression', 'SpreadElement', 'Super',
|
|
'SwitchCase', 'SwitchStatement', 'TaggedTemplateExpression',
|
|
'TemplateElement', 'TemplateLiteral', 'ThisExpression', 'ThrowStatement',
|
|
'TryStatement', 'UnaryExpression', 'UpdateExpression',
|
|
'VariableDeclaration', 'VariableDeclarator', 'WhileStatement',
|
|
'WithStatement'))
|
|
|
|
supported_syntax = set(
|
|
('AssignmentExpression', 'ArrayExpression', 'BlockStatement',
|
|
'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
|
|
'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
|
|
'DebuggerStatement', 'EmptyStatement', 'ExpressionStatement',
|
|
'ForStatement', 'ForInStatement', 'FunctionDeclaration',
|
|
'FunctionExpression', 'Identifier', 'IfStatement', 'Literal',
|
|
'LabeledStatement', 'LogicalExpression', 'MemberExpression',
|
|
'MethodDefinition', 'NewExpression', 'ObjectExpression', 'Program',
|
|
'Property', 'ReturnStatement', 'SequenceExpression', 'SwitchCase',
|
|
'SwitchStatement', 'ThisExpression', 'ThrowStatement', 'TryStatement',
|
|
'UnaryExpression', 'UpdateExpression', 'VariableDeclaration',
|
|
'VariableDeclarator', 'WhileStatement', 'WithStatement'))
|
|
|
|
# Error messages should be identical to V8.
|
|
messages = {
|
|
'UnexpectedToken':
|
|
'Unexpected token %s',
|
|
'UnexpectedNumber':
|
|
'Unexpected number',
|
|
'UnexpectedString':
|
|
'Unexpected string',
|
|
'UnexpectedIdentifier':
|
|
'Unexpected identifier',
|
|
'UnexpectedReserved':
|
|
'Unexpected reserved word',
|
|
'UnexpectedTemplate':
|
|
'Unexpected quasi %s',
|
|
'UnexpectedEOS':
|
|
'Unexpected end of input',
|
|
'NewlineAfterThrow':
|
|
'Illegal newline after throw',
|
|
'InvalidRegExp':
|
|
'Invalid regular expression',
|
|
'UnterminatedRegExp':
|
|
'Invalid regular expression: missing /',
|
|
'InvalidLHSInAssignment':
|
|
'Invalid left-hand side in assignment',
|
|
'InvalidLHSInForIn':
|
|
'Invalid left-hand side in for-in',
|
|
'MultipleDefaultsInSwitch':
|
|
'More than one default clause in switch statement',
|
|
'NoCatchOrFinally':
|
|
'Missing catch or finally after try',
|
|
'UnknownLabel':
|
|
'Undefined label \'%s\'',
|
|
'Redeclaration':
|
|
'%s \'%s\' has already been declared',
|
|
'IllegalContinue':
|
|
'Illegal continue statement',
|
|
'IllegalBreak':
|
|
'Illegal break statement',
|
|
'IllegalReturn':
|
|
'Illegal return statement',
|
|
'StrictModeWith':
|
|
'Strict mode code may not include a with statement',
|
|
'StrictCatchVariable':
|
|
'Catch variable may not be eval or arguments in strict mode',
|
|
'StrictVarName':
|
|
'Variable name may not be eval or arguments in strict mode',
|
|
'StrictParamName':
|
|
'Parameter name eval or arguments is not allowed in strict mode',
|
|
'StrictParamDupe':
|
|
'Strict mode function may not have duplicate parameter names',
|
|
'StrictFunctionName':
|
|
'Function name may not be eval or arguments in strict mode',
|
|
'StrictOctalLiteral':
|
|
'Octal literals are not allowed in strict mode.',
|
|
'StrictDelete':
|
|
'Delete of an unqualified identifier in strict mode.',
|
|
'StrictLHSAssignment':
|
|
'Assignment to eval or arguments is not allowed in strict mode',
|
|
'StrictLHSPostfix':
|
|
'Postfix increment/decrement may not have eval or arguments operand in strict mode',
|
|
'StrictLHSPrefix':
|
|
'Prefix increment/decrement may not have eval or arguments operand in strict mode',
|
|
'StrictReservedWord':
|
|
'Use of future reserved word in strict mode',
|
|
'TemplateOctalLiteral':
|
|
'Octal literals are not allowed in template strings.',
|
|
'ParameterAfterRestParameter':
|
|
'Rest parameter must be last formal parameter',
|
|
'DefaultRestParameter':
|
|
'Unexpected token =',
|
|
'ObjectPatternAsRestParameter':
|
|
'Unexpected token {',
|
|
'DuplicateProtoProperty':
|
|
'Duplicate __proto__ fields are not allowed in object literals',
|
|
'ConstructorSpecialMethod':
|
|
'Class constructor may not be an accessor',
|
|
'DuplicateConstructor':
|
|
'A class may only have one constructor',
|
|
'StaticPrototype':
|
|
'Classes may not have static property named prototype',
|
|
'MissingFromClause':
|
|
'Unexpected token',
|
|
'NoAsAfterImportNamespace':
|
|
'Unexpected token',
|
|
'InvalidModuleSpecifier':
|
|
'Unexpected token',
|
|
'IllegalImportDeclaration':
|
|
'Unexpected token',
|
|
'IllegalExportDeclaration':
|
|
'Unexpected token'
|
|
}
|
|
|
|
PRECEDENCE = {
|
|
'||': 1,
|
|
'&&': 2,
|
|
'|': 3,
|
|
'^': 4,
|
|
'&': 5,
|
|
'==': 6,
|
|
'!=': 6,
|
|
'===': 6,
|
|
'!==': 6,
|
|
'<': 7,
|
|
'>': 7,
|
|
'<=': 7,
|
|
'>=': 7,
|
|
'instanceof': 7,
|
|
'in': 7,
|
|
'<<': 8,
|
|
'>>': 8,
|
|
'>>>': 8,
|
|
'+': 9,
|
|
'-': 9,
|
|
'*': 11,
|
|
'/': 11,
|
|
'%': 11
|
|
}
|
|
|
|
|
|
class Token:
|
|
pass
|
|
|
|
|
|
class Syntax:
|
|
pass
|
|
|
|
|
|
class Messages:
|
|
pass
|
|
|
|
|
|
class PlaceHolders:
|
|
ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
|
|
|
|
|
|
for k, v in token.items():
|
|
setattr(Token, k, v)
|
|
|
|
for e in syntax:
|
|
setattr(Syntax, e, e)
|
|
|
|
for k, v in messages.items():
|
|
setattr(Messages, k, v)
|
|
|
|
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
|
|
BOM = u'\uFEFF'
|
|
ZWJ = u'\u200D'
|
|
ZWNJ = u'\u200C'
|
|
TAB = u'\u0009'
|
|
VT = u'\u000B'
|
|
FF = u'\u000C'
|
|
SP = u'\u0020'
|
|
NBSP = u'\u00A0'
|
|
LF = u'\u000A'
|
|
CR = u'\u000D'
|
|
LS = u'\u2028'
|
|
PS = u'\u2029'
|
|
|
|
|
|
LETTER_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'])
|
|
|
|
COMBINING_MARK_CATEGORIES = set(['Mn', 'Mc'])
|
|
DIGIT_CATEGORIES = set(['Nd'])
|
|
CONNECTOR_PUNCTUATION_CATEGORIES = set(['Pc'])
|
|
IDENTIFIER_START_CATEGORIES = LETTER_CATEGORIES.copy() # and some fucking unicode escape sequence
|
|
IDENTIFIER_PART_CATEGORIES = IDENTIFIER_START_CATEGORIES.union(COMBINING_MARK_CATEGORIES).union(DIGIT_CATEGORIES)\
|
|
.union(CONNECTOR_PUNCTUATION_CATEGORIES)
|
|
|
|
EXTRA_IDENTIFIER_START_CHARS = set(('$','_', '\\'))
|
|
EXTRA_IDENTIFIER_PART_CHARS = EXTRA_IDENTIFIER_START_CHARS.union(set((ZWJ, ZWNJ)))
|
|
|
|
WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680, 0x180E, 0x2000,
|
|
0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007,
|
|
0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF))
|
|
|
|
LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))
|
|
|
|
|
|
def isIdentifierStart(ch):
|
|
uch = (ch if isinstance(ch, unicode) else unichr(ch))
|
|
return unicodedata.category(uch) in IDENTIFIER_START_CATEGORIES or uch in EXTRA_IDENTIFIER_START_CHARS
|
|
|
|
|
|
def isIdentifierPart(ch):
|
|
uch = (ch if isinstance(ch, unicode) else unichr(ch))
|
|
return unicodedata.category(uch) in IDENTIFIER_PART_CATEGORIES or uch in EXTRA_IDENTIFIER_PART_CHARS
|
|
|
|
|
|
def isValidIdentifier(name):
|
|
if not name or isKeyword(name):
|
|
return False
|
|
check = isIdentifierStart
|
|
for e in name:
|
|
if not check(e):
|
|
return False
|
|
check = isIdentifierPart
|
|
return True
|
|
|
|
|
|
def isWhiteSpace(ch):
|
|
return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
|
|
|
|
|
|
def isLineTerminator(ch):
|
|
return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
|
|
|
|
|
|
OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
|
|
DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
|
|
HEX = set('0123456789abcdefABCDEF')
|
|
HEX_CONV = dict(('0123456789abcdef' [n], n) for n in xrange(16))
|
|
for i, e in enumerate('ABCDEF', 10):
|
|
HEX_CONV[e] = i
|
|
|
|
|
|
def isDecimalDigit(ch):
|
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
|
|
|
|
|
|
def isHexDigit(ch):
|
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
|
|
|
|
|
|
def isOctalDigit(ch):
|
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
|
|
|
|
|
|
def isFutureReservedWord(w):
|
|
return w in ('enum', 'export', 'import', 'super')
|
|
|
|
|
|
RESERVED_WORD = set(('implements', 'interface', 'package', 'private',
|
|
'protected', 'public', 'static', 'yield', 'let'))
|
|
|
|
|
|
def isStrictModeReservedWord(w):
|
|
return w in RESERVED_WORD
|
|
|
|
|
|
def isRestrictedWord(w):
|
|
return w in ('eval', 'arguments')
|
|
|
|
|
|
KEYWORDS = set(
|
|
('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else',
|
|
'case', 'void', 'with', 'enum', 'while', 'break', 'catch', 'throw',
|
|
'const', 'yield', 'class', 'super', 'return', 'typeof', 'delete',
|
|
'switch', 'export', 'import', 'default', 'finally', 'extends', 'function',
|
|
'continue', 'debugger', 'instanceof', 'pyimport'))
|
|
|
|
|
|
def isKeyword(w):
|
|
# 'const' is specialized as Keyword in V8.
|
|
# 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
|
|
# Some others are from future reserved words.
|
|
return w in KEYWORDS
|
|
|
|
|
|
class JsSyntaxError(Exception):
|
|
pass
|
|
|
|
|
|
if __name__ == '__main__':
|
|
assert isLineTerminator('\n')
|
|
assert isLineTerminator(0x0A)
|
|
assert isIdentifierStart('$')
|
|
assert isIdentifierStart(100)
|
|
assert isWhiteSpace(' ')
|