bazarr/libs/pyjsparser/pyjsparserdata.py

# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
#  OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals

import sys
import unicodedata
from collections import defaultdict

PY3 = sys.version_info >= (3, 0)

if PY3:
    unichr = chr
    xrange = range
    unicode = str

token = {
    'BooleanLiteral': 1,
    'EOF': 2,
    'Identifier': 3,
    'Keyword': 4,
    'NullLiteral': 5,
    'NumericLiteral': 6,
    'Punctuator': 7,
    'StringLiteral': 8,
    'RegularExpression': 9,
    'Template': 10
}

TokenName = dict((v, k) for k, v in token.items())

FnExprTokens = [
    '(',
    '{',
    '[',
    'in',
    'typeof',
    'instanceof',
    'new',
    'return',
    'case',
    'delete',
    'throw',
    'void',
    # assignment operators
    '=',
    '+=',
    '-=',
    '*=',
    '/=',
    '%=',
    '<<=',
    '>>=',
    '>>>=',
    '&=',
    '|=',
    '^=',
    ',',
    # binary/unary operators
    '+',
    '-',
    '*',
    '/',
    '%',
    '++',
    '--',
    '<<',
    '>>',
    '>>>',
    '&',
    '|',
    '^',
    '!',
    '~',
    '&&',
    '||',
    '?',
    ':',
    '===',
    '==',
    '>=',
    '<=',
    '<',
    '>',
    '!=',
    '!=='
]

syntax = set(
    ('AssignmentExpression', 'AssignmentPattern', 'ArrayExpression',
     'ArrayPattern', 'ArrowFunctionExpression', 'BlockStatement',
     'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
     'ClassBody', 'ClassDeclaration', 'ClassExpression',
     'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
     'DebuggerStatement', 'EmptyStatement', 'ExportAllDeclaration',
     'ExportDefaultDeclaration', 'ExportNamedDeclaration', 'ExportSpecifier',
     'ExpressionStatement', 'ForStatement', 'ForInStatement',
     'FunctionDeclaration', 'FunctionExpression', 'Identifier', 'IfStatement',
     'ImportDeclaration', 'ImportDefaultSpecifier', 'ImportNamespaceSpecifier',
     'ImportSpecifier', 'Literal', 'LabeledStatement', 'LogicalExpression',
     'MemberExpression', 'MethodDefinition', 'NewExpression',
     'ObjectExpression', 'ObjectPattern', 'Program', 'Property', 'RestElement',
     'ReturnStatement', 'SequenceExpression', 'SpreadElement', 'Super',
     'SwitchCase', 'SwitchStatement', 'TaggedTemplateExpression',
     'TemplateElement', 'TemplateLiteral', 'ThisExpression', 'ThrowStatement',
     'TryStatement', 'UnaryExpression', 'UpdateExpression',
     'VariableDeclaration', 'VariableDeclarator', 'WhileStatement',
     'WithStatement'))

supported_syntax = set(
    ('AssignmentExpression', 'ArrayExpression', 'BlockStatement',
     'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
     'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
     'DebuggerStatement', 'EmptyStatement', 'ExpressionStatement',
     'ForStatement', 'ForInStatement', 'FunctionDeclaration',
     'FunctionExpression', 'Identifier', 'IfStatement', 'Literal',
     'LabeledStatement', 'LogicalExpression', 'MemberExpression',
     'MethodDefinition', 'NewExpression', 'ObjectExpression', 'Program',
     'Property', 'ReturnStatement', 'SequenceExpression', 'SwitchCase',
     'SwitchStatement', 'ThisExpression', 'ThrowStatement', 'TryStatement',
     'UnaryExpression', 'UpdateExpression', 'VariableDeclaration',
     'VariableDeclarator', 'WhileStatement', 'WithStatement'))

# Error messages should be identical to V8.
messages = {
    'UnexpectedToken':
    'Unexpected token %s',
    'UnexpectedNumber':
    'Unexpected number',
    'UnexpectedString':
    'Unexpected string',
    'UnexpectedIdentifier':
    'Unexpected identifier',
    'UnexpectedReserved':
    'Unexpected reserved word',
    'UnexpectedTemplate':
    'Unexpected quasi %s',
    'UnexpectedEOS':
    'Unexpected end of input',
    'NewlineAfterThrow':
    'Illegal newline after throw',
    'InvalidRegExp':
    'Invalid regular expression',
    'UnterminatedRegExp':
    'Invalid regular expression: missing /',
    'InvalidLHSInAssignment':
    'Invalid left-hand side in assignment',
    'InvalidLHSInForIn':
    'Invalid left-hand side in for-in',
    'MultipleDefaultsInSwitch':
    'More than one default clause in switch statement',
    'NoCatchOrFinally':
    'Missing catch or finally after try',
    'UnknownLabel':
    'Undefined label \'%s\'',
    'Redeclaration':
    '%s \'%s\' has already been declared',
    'IllegalContinue':
    'Illegal continue statement',
    'IllegalBreak':
    'Illegal break statement',
    'IllegalReturn':
    'Illegal return statement',
    'StrictModeWith':
    'Strict mode code may not include a with statement',
    'StrictCatchVariable':
    'Catch variable may not be eval or arguments in strict mode',
    'StrictVarName':
    'Variable name may not be eval or arguments in strict mode',
    'StrictParamName':
    'Parameter name eval or arguments is not allowed in strict mode',
    'StrictParamDupe':
    'Strict mode function may not have duplicate parameter names',
    'StrictFunctionName':
    'Function name may not be eval or arguments in strict mode',
    'StrictOctalLiteral':
    'Octal literals are not allowed in strict mode.',
    'StrictDelete':
    'Delete of an unqualified identifier in strict mode.',
    'StrictLHSAssignment':
    'Assignment to eval or arguments is not allowed in strict mode',
    'StrictLHSPostfix':
    'Postfix increment/decrement may not have eval or arguments operand in strict mode',
    'StrictLHSPrefix':
    'Prefix increment/decrement may not have eval or arguments operand in strict mode',
    'StrictReservedWord':
    'Use of future reserved word in strict mode',
    'TemplateOctalLiteral':
    'Octal literals are not allowed in template strings.',
    'ParameterAfterRestParameter':
    'Rest parameter must be last formal parameter',
    'DefaultRestParameter':
    'Unexpected token =',
    'ObjectPatternAsRestParameter':
    'Unexpected token {',
    'DuplicateProtoProperty':
    'Duplicate __proto__ fields are not allowed in object literals',
    'ConstructorSpecialMethod':
    'Class constructor may not be an accessor',
    'DuplicateConstructor':
    'A class may only have one constructor',
    'StaticPrototype':
    'Classes may not have static property named prototype',
    'MissingFromClause':
    'Unexpected token',
    'NoAsAfterImportNamespace':
    'Unexpected token',
    'InvalidModuleSpecifier':
    'Unexpected token',
    'IllegalImportDeclaration':
    'Unexpected token',
    'IllegalExportDeclaration':
    'Unexpected token'
}

PRECEDENCE = {
    '||': 1,
    '&&': 2,
    '|': 3,
    '^': 4,
    '&': 5,
    '==': 6,
    '!=': 6,
    '===': 6,
    '!==': 6,
    '<': 7,
    '>': 7,
    '<=': 7,
    '>=': 7,
    'instanceof': 7,
    'in': 7,
    '<<': 8,
    '>>': 8,
    '>>>': 8,
    '+': 9,
    '-': 9,
    '*': 11,
    '/': 11,
    '%': 11
}


class Token:
    pass


class Syntax:
    pass


class Messages:
    pass


class PlaceHolders:
    ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'


for k, v in token.items():
    setattr(Token, k, v)

for e in syntax:
    setattr(Syntax, e, e)

for k, v in messages.items():
    setattr(Messages, k, v)

#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'


LETTER_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'])

COMBINING_MARK_CATEGORIES = set(['Mn', 'Mc'])
DIGIT_CATEGORIES = set(['Nd'])
CONNECTOR_PUNCTUATION_CATEGORIES = set(['Pc'])
IDENTIFIER_START_CATEGORIES = LETTER_CATEGORIES.copy()  # and some fucking unicode escape sequence
IDENTIFIER_PART_CATEGORIES = IDENTIFIER_START_CATEGORIES.union(COMBINING_MARK_CATEGORIES).union(DIGIT_CATEGORIES)\
    .union(CONNECTOR_PUNCTUATION_CATEGORIES)

EXTRA_IDENTIFIER_START_CHARS = set(('$','_', '\\'))
EXTRA_IDENTIFIER_PART_CHARS = EXTRA_IDENTIFIER_START_CHARS.union(set((ZWJ, ZWNJ)))

WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680, 0x180E, 0x2000,
                   0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007,
                   0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF))

LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))


def isIdentifierStart(ch):
    uch = (ch if isinstance(ch, unicode) else unichr(ch))
    return unicodedata.category(uch) in IDENTIFIER_START_CATEGORIES or uch in EXTRA_IDENTIFIER_START_CHARS


def isIdentifierPart(ch):
    uch =  (ch if isinstance(ch, unicode) else unichr(ch))
    return unicodedata.category(uch) in IDENTIFIER_PART_CATEGORIES or uch in EXTRA_IDENTIFIER_PART_CHARS


def isValidIdentifier(name):
    if not name or isKeyword(name):
        return False
    check = isIdentifierStart
    for e in name:
        if not check(e):
            return False
        check = isIdentifierPart
    return True


def isWhiteSpace(ch):
    return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE


def isLineTerminator(ch):
    return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS


OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
HEX = set('0123456789abcdefABCDEF')
HEX_CONV = dict(('0123456789abcdef' [n], n) for n in xrange(16))
for i, e in enumerate('ABCDEF', 10):
    HEX_CONV[e] = i


def isDecimalDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC


def isHexDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX


def isOctalDigit(ch):
    return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL


def isFutureReservedWord(w):
    return w in ('enum', 'export', 'import', 'super')


RESERVED_WORD = set(('implements', 'interface', 'package', 'private',
                     'protected', 'public', 'static', 'yield', 'let'))


def isStrictModeReservedWord(w):
    return w in RESERVED_WORD


def isRestrictedWord(w):
    return w in ('eval', 'arguments')


KEYWORDS = set(
    ('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else',
     'case', 'void', 'with', 'enum', 'while', 'break', 'catch', 'throw',
     'const', 'yield', 'class', 'super', 'return', 'typeof', 'delete',
     'switch', 'export', 'import', 'default', 'finally', 'extends', 'function',
     'continue', 'debugger', 'instanceof', 'pyimport'))


def isKeyword(w):
    # 'const' is specialized as Keyword in V8.
    # 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
    # Some others are from future reserved words.
    return w in KEYWORDS


class JsSyntaxError(Exception):
    pass


if __name__ == '__main__':
    assert isLineTerminator('\n')
    assert isLineTerminator(0x0A)
    assert isIdentifierStart('$')
    assert isIdentifierStart(100)
    assert isWhiteSpace(' ')