# The MIT License # # Copyright 2014, 2015 Piotr Dabkowski # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the 'Software'), # to deal in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of # the Software, and to permit persons to whom the Software is furnished to do so, subject # to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or # substantial portions of the Software. # # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT # LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE from __future__ import unicode_literals import sys import unicodedata from collections import defaultdict PY3 = sys.version_info >= (3, 0) if PY3: unichr = chr xrange = range unicode = str token = { 'BooleanLiteral': 1, 'EOF': 2, 'Identifier': 3, 'Keyword': 4, 'NullLiteral': 5, 'NumericLiteral': 6, 'Punctuator': 7, 'StringLiteral': 8, 'RegularExpression': 9, 'Template': 10 } TokenName = dict((v, k) for k, v in token.items()) FnExprTokens = [ '(', '{', '[', 'in', 'typeof', 'instanceof', 'new', 'return', 'case', 'delete', 'throw', 'void', # assignment operators '=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=', '&=', '|=', '^=', ',', # binary/unary operators '+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&', '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=', '<=', '<', '>', '!=', '!==' ] syntax = set( ('AssignmentExpression', 'AssignmentPattern', 'ArrayExpression', 'ArrayPattern', 'ArrowFunctionExpression', 'BlockStatement', 'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause', 'ClassBody', 'ClassDeclaration', 'ClassExpression', 'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement', 'DebuggerStatement', 'EmptyStatement', 'ExportAllDeclaration', 'ExportDefaultDeclaration', 'ExportNamedDeclaration', 'ExportSpecifier', 'ExpressionStatement', 'ForStatement', 'ForInStatement', 'FunctionDeclaration', 'FunctionExpression', 'Identifier', 'IfStatement', 'ImportDeclaration', 'ImportDefaultSpecifier', 'ImportNamespaceSpecifier', 'ImportSpecifier', 'Literal', 'LabeledStatement', 'LogicalExpression', 'MemberExpression', 'MethodDefinition', 'NewExpression', 'ObjectExpression', 'ObjectPattern', 'Program', 'Property', 'RestElement', 'ReturnStatement', 'SequenceExpression', 'SpreadElement', 'Super', 'SwitchCase', 'SwitchStatement', 'TaggedTemplateExpression', 'TemplateElement', 'TemplateLiteral', 'ThisExpression', 'ThrowStatement', 'TryStatement', 'UnaryExpression', 'UpdateExpression', 'VariableDeclaration', 'VariableDeclarator', 'WhileStatement', 'WithStatement')) supported_syntax = set( ('AssignmentExpression', 'ArrayExpression', 'BlockStatement', 'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause', 'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement', 'DebuggerStatement', 'EmptyStatement', 'ExpressionStatement', 'ForStatement', 'ForInStatement', 'FunctionDeclaration', 'FunctionExpression', 'Identifier', 'IfStatement', 'Literal', 'LabeledStatement', 'LogicalExpression', 'MemberExpression', 'MethodDefinition', 'NewExpression', 'ObjectExpression', 'Program', 'Property', 'ReturnStatement', 'SequenceExpression', 'SwitchCase', 'SwitchStatement', 'ThisExpression', 'ThrowStatement', 'TryStatement', 'UnaryExpression', 'UpdateExpression', 'VariableDeclaration', 'VariableDeclarator', 'WhileStatement', 'WithStatement')) # Error messages should be identical to V8. messages = { 'UnexpectedToken': 'Unexpected token %s', 'UnexpectedNumber': 'Unexpected number', 'UnexpectedString': 'Unexpected string', 'UnexpectedIdentifier': 'Unexpected identifier', 'UnexpectedReserved': 'Unexpected reserved word', 'UnexpectedTemplate': 'Unexpected quasi %s', 'UnexpectedEOS': 'Unexpected end of input', 'NewlineAfterThrow': 'Illegal newline after throw', 'InvalidRegExp': 'Invalid regular expression', 'UnterminatedRegExp': 'Invalid regular expression: missing /', 'InvalidLHSInAssignment': 'Invalid left-hand side in assignment', 'InvalidLHSInForIn': 'Invalid left-hand side in for-in', 'MultipleDefaultsInSwitch': 'More than one default clause in switch statement', 'NoCatchOrFinally': 'Missing catch or finally after try', 'UnknownLabel': 'Undefined label \'%s\'', 'Redeclaration': '%s \'%s\' has already been declared', 'IllegalContinue': 'Illegal continue statement', 'IllegalBreak': 'Illegal break statement', 'IllegalReturn': 'Illegal return statement', 'StrictModeWith': 'Strict mode code may not include a with statement', 'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode', 'StrictVarName': 'Variable name may not be eval or arguments in strict mode', 'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode', 'StrictParamDupe': 'Strict mode function may not have duplicate parameter names', 'StrictFunctionName': 'Function name may not be eval or arguments in strict mode', 'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.', 'StrictDelete': 'Delete of an unqualified identifier in strict mode.', 'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode', 'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode', 'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode', 'StrictReservedWord': 'Use of future reserved word in strict mode', 'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.', 'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter', 'DefaultRestParameter': 'Unexpected token =', 'ObjectPatternAsRestParameter': 'Unexpected token {', 'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals', 'ConstructorSpecialMethod': 'Class constructor may not be an accessor', 'DuplicateConstructor': 'A class may only have one constructor', 'StaticPrototype': 'Classes may not have static property named prototype', 'MissingFromClause': 'Unexpected token', 'NoAsAfterImportNamespace': 'Unexpected token', 'InvalidModuleSpecifier': 'Unexpected token', 'IllegalImportDeclaration': 'Unexpected token', 'IllegalExportDeclaration': 'Unexpected token' } PRECEDENCE = { '||': 1, '&&': 2, '|': 3, '^': 4, '&': 5, '==': 6, '!=': 6, '===': 6, '!==': 6, '<': 7, '>': 7, '<=': 7, '>=': 7, 'instanceof': 7, 'in': 7, '<<': 8, '>>': 8, '>>>': 8, '+': 9, '-': 9, '*': 11, '/': 11, '%': 11 } class Token: pass class Syntax: pass class Messages: pass class PlaceHolders: ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder' for k, v in token.items(): setattr(Token, k, v) for e in syntax: setattr(Syntax, e, e) for k, v in messages.items(): setattr(Messages, k, v) #http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category BOM = u'\uFEFF' ZWJ = u'\u200D' ZWNJ = u'\u200C' TAB = u'\u0009' VT = u'\u000B' FF = u'\u000C' SP = u'\u0020' NBSP = u'\u00A0' LF = u'\u000A' CR = u'\u000D' LS = u'\u2028' PS = u'\u2029' LETTER_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']) COMBINING_MARK_CATEGORIES = set(['Mn', 'Mc']) DIGIT_CATEGORIES = set(['Nd']) CONNECTOR_PUNCTUATION_CATEGORIES = set(['Pc']) IDENTIFIER_START_CATEGORIES = LETTER_CATEGORIES.copy() # and some fucking unicode escape sequence IDENTIFIER_PART_CATEGORIES = IDENTIFIER_START_CATEGORIES.union(COMBINING_MARK_CATEGORIES).union(DIGIT_CATEGORIES)\ .union(CONNECTOR_PUNCTUATION_CATEGORIES) EXTRA_IDENTIFIER_START_CHARS = set(('$','_', '\\')) EXTRA_IDENTIFIER_PART_CHARS = EXTRA_IDENTIFIER_START_CHARS.union(set((ZWJ, ZWNJ))) WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF)) LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029)) def isIdentifierStart(ch): uch = (ch if isinstance(ch, unicode) else unichr(ch)) return unicodedata.category(uch) in IDENTIFIER_START_CATEGORIES or uch in EXTRA_IDENTIFIER_START_CHARS def isIdentifierPart(ch): uch = (ch if isinstance(ch, unicode) else unichr(ch)) return unicodedata.category(uch) in IDENTIFIER_PART_CATEGORIES or uch in EXTRA_IDENTIFIER_PART_CHARS def isValidIdentifier(name): if not name or isKeyword(name): return False check = isIdentifierStart for e in name: if not check(e): return False check = isIdentifierPart return True def isWhiteSpace(ch): return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE def isLineTerminator(ch): return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7')) DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) HEX = set('0123456789abcdefABCDEF') HEX_CONV = dict(('0123456789abcdef' [n], n) for n in xrange(16)) for i, e in enumerate('ABCDEF', 10): HEX_CONV[e] = i def isDecimalDigit(ch): return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC def isHexDigit(ch): return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX def isOctalDigit(ch): return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL def isFutureReservedWord(w): return w in ('enum', 'export', 'import', 'super') RESERVED_WORD = set(('implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let')) def isStrictModeReservedWord(w): return w in RESERVED_WORD def isRestrictedWord(w): return w in ('eval', 'arguments') KEYWORDS = set( ('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case', 'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield', 'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import', 'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport')) def isKeyword(w): # 'const' is specialized as Keyword in V8. # 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next. # Some others are from future reserved words. return w in KEYWORDS class JsSyntaxError(Exception): pass if __name__ == '__main__': assert isLineTerminator('\n') assert isLineTerminator(0x0A) assert isIdentifierStart('$') assert isIdentifierStart(100) assert isWhiteSpace(' ')