bazarr/libs/pyjsparser/pyjsparserdata.py

# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
#  OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals

import sys
import unicodedata
from collections import defaultdict

PY3 = sys.version_info >= (3,0)

if PY3:
    unichr = chr
    xrange = range
    unicode = str

token = {
        'BooleanLiteral': 1,
        'EOF': 2,
        'Identifier': 3,
        'Keyword': 4,
        'NullLiteral': 5,
        'NumericLiteral': 6,
        'Punctuator': 7,
        'StringLiteral': 8,
        'RegularExpression': 9,
        'Template': 10
    }


TokenName = dict((v,k) for k,v in token.items())

FnExprTokens = ['(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
                    'return', 'case', 'delete', 'throw', 'void',
                    # assignment operators
                    '=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=',
                    '&=', '|=', '^=', ',',
                    # binary/unary operators
                    '+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
                    '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
                    '<=', '<', '>', '!=', '!==']

syntax= set(('AssignmentExpression',
         'AssignmentPattern',
         'ArrayExpression',
         'ArrayPattern',
         'ArrowFunctionExpression',
         'BlockStatement',
         'BinaryExpression',
         'BreakStatement',
         'CallExpression',
         'CatchClause',
         'ClassBody',
         'ClassDeclaration',
         'ClassExpression',
         'ConditionalExpression',
         'ContinueStatement',
         'DoWhileStatement',
         'DebuggerStatement',
         'EmptyStatement',
         'ExportAllDeclaration',
         'ExportDefaultDeclaration',
         'ExportNamedDeclaration',
         'ExportSpecifier',
         'ExpressionStatement',
         'ForStatement',
         'ForInStatement',
         'FunctionDeclaration',
         'FunctionExpression',
         'Identifier',
         'IfStatement',
         'ImportDeclaration',
         'ImportDefaultSpecifier',
         'ImportNamespaceSpecifier',
         'ImportSpecifier',
         'Literal',
         'LabeledStatement',
         'LogicalExpression',
         'MemberExpression',
         'MethodDefinition',
         'NewExpression',
         'ObjectExpression',
         'ObjectPattern',
         'Program',
         'Property',
         'RestElement',
         'ReturnStatement',
         'SequenceExpression',
         'SpreadElement',
         'Super',
         'SwitchCase',
         'SwitchStatement',
         'TaggedTemplateExpression',
         'TemplateElement',
         'TemplateLiteral',
         'ThisExpression',
         'ThrowStatement',
         'TryStatement',
         'UnaryExpression',
         'UpdateExpression',
         'VariableDeclaration',
         'VariableDeclarator',
         'WhileStatement',
         'WithStatement'))


# Error messages should be identical to V8.
messages = {
        'UnexpectedToken': 'Unexpected token %s',
        'UnexpectedNumber': 'Unexpected number',
        'UnexpectedString': 'Unexpected string',
        'UnexpectedIdentifier': 'Unexpected identifier',
        'UnexpectedReserved': 'Unexpected reserved word',
        'UnexpectedTemplate': 'Unexpected quasi %s',
        'UnexpectedEOS': 'Unexpected end of input',
        'NewlineAfterThrow': 'Illegal newline after throw',
        'InvalidRegExp': 'Invalid regular expression',
        'UnterminatedRegExp': 'Invalid regular expression: missing /',
        'InvalidLHSInAssignment': 'Invalid left-hand side in assignment',
        'InvalidLHSInForIn': 'Invalid left-hand side in for-in',
        'MultipleDefaultsInSwitch': 'More than one default clause in switch statement',
        'NoCatchOrFinally': 'Missing catch or finally after try',
        'UnknownLabel': 'Undefined label \'%s\'',
        'Redeclaration': '%s \'%s\' has already been declared',
        'IllegalContinue': 'Illegal continue statement',
        'IllegalBreak': 'Illegal break statement',
        'IllegalReturn': 'Illegal return statement',
        'StrictModeWith': 'Strict mode code may not include a with statement',
        'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode',
        'StrictVarName': 'Variable name may not be eval or arguments in strict mode',
        'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode',
        'StrictParamDupe': 'Strict mode function may not have duplicate parameter names',
        'StrictFunctionName': 'Function name may not be eval or arguments in strict mode',
        'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.',
        'StrictDelete': 'Delete of an unqualified identifier in strict mode.',
        'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode',
        'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode',
        'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode',
        'StrictReservedWord': 'Use of future reserved word in strict mode',
        'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.',
        'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter',
        'DefaultRestParameter': 'Unexpected token =',
        'ObjectPatternAsRestParameter': 'Unexpected token {',
        'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals',
        'ConstructorSpecialMethod': 'Class constructor may not be an accessor',
        'DuplicateConstructor': 'A class may only have one constructor',
        'StaticPrototype': 'Classes may not have static property named prototype',
        'MissingFromClause': 'Unexpected token',
        'NoAsAfterImportNamespace': 'Unexpected token',
        'InvalidModuleSpecifier': 'Unexpected token',
        'IllegalImportDeclaration': 'Unexpected token',
        'IllegalExportDeclaration': 'Unexpected token'}

PRECEDENCE = {'||':1,
             '&&':2,
             '|':3,
             '^':4,
             '&':5,
             '==':6,
             '!=':6,
             '===':6,
             '!==':6,
             '<':7,
             '>':7,
             '<=':7,
             '>=':7,
             'instanceof':7,
             'in':7,
             '<<':8,
             '>>':8,
             '>>>':8,
             '+':9,
             '-':9,
             '*':11,
             '/':11,
             '%':11}

class Token: pass
class Syntax: pass
class Messages: pass
class PlaceHolders:
    ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'

for k,v in token.items():
    setattr(Token, k, v)

for e in syntax:
    setattr(Syntax, e, e)

for k,v in messages.items():
    setattr(Messages, k, v)

#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'

U_CATEGORIES = defaultdict(list)
for c in map(unichr, range(sys.maxunicode + 1)):
    U_CATEGORIES[unicodedata.category(c)].append(c)
UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
                     U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
                     U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
IDENTIFIER_START = UNICODE_LETTER.union(set(('$','_', '\\'))) # and some fucking unicode escape sequence
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT)\
    .union(UNICODE_CONNECTOR_PUNCTUATION).union(set((ZWJ, ZWNJ)))

WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680,
               0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
                0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
                0x2009, 0x200A, 0x202F, 0x205F, 0x3000,
                0xFEFF))

LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))

def isIdentifierStart(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch))  in IDENTIFIER_START

def isIdentifierPart(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch))  in IDENTIFIER_PART

def isWhiteSpace(ch):
    return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE

def isLineTerminator(ch):
    return (ord(ch) if isinstance(ch, unicode) else ch)  in LINE_TERMINATORS

OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
HEX = set('0123456789abcdefABCDEF')
HEX_CONV = dict(('0123456789abcdef'[n],n) for n in xrange(16))
for i,e in enumerate('ABCDEF', 10):
    HEX_CONV[e] = i


def isDecimalDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC

def isHexDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch))  in HEX

def isOctalDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch))  in OCTAL

def isFutureReservedWord(w):
    return w in ('enum', 'export', 'import', 'super')


RESERVED_WORD = set(('implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let'))
def isStrictModeReservedWord(w):
    return w in RESERVED_WORD

def isRestrictedWord(w):
    return w in  ('eval', 'arguments')


KEYWORDS = set(('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case',
                     'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield',
                     'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import',
                     'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport'))
def isKeyword(w):
        # 'const' is specialized as Keyword in V8.
        # 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
        # Some others are from future reserved words.
        return w in KEYWORDS


class JsSyntaxError(Exception): pass

if __name__=='__main__':
    assert isLineTerminator('\n')
    assert isLineTerminator(0x0A)
    assert isIdentifierStart('$')
    assert isIdentifierStart(100)
    assert isWhiteSpace(' ')