"""
The process of translating JS will go like that : # TOP = 'imports and scope set'
1. Remove all the comments
2. Replace number , string and regexp literals with markers
4. Remove global Functions and move their translation to the TOP . Also add register code there .
5. Replace inline functions with lvals
6. Remove List and Object literals and replace them with lvals
7. Find and remove var declarations , generate python register code that would go on TOP .
Here we should be left with global code only where 1 line of js code = 1 line of python code .
Routine translating this code should be called glob_translate :
1. Search for outer structures and translate them using glob and inside using exps_translate
exps_translate routine :
1. Remove outer { }
2. Split lines at ;
3. Convert line by line using exp_translate
4. In case of error in 3 try to insert ; according to ECMA rules and repeat 3.
exp_translate routine :
It takes a single line of JS code and returns a SINGLE line of Python code .
Note var is not present here because it was removed in previous stages .
If case of parsing errors it must return a pos of error .
1. Convert all assignment operations to put operations , this may be hard : (
2. Convert all gets and calls to get and callprop .
3. Convert unary operators like typeof , new , ! , delete .
Delete can be handled by replacing last get method with delete .
4. Convert remaining operators that are not handled by python eg : == = and ,
lval format PyJsLvalNR
marker PyJs ( TYPE_NAME ) ( NR )
TODO
1. Number literal replacement
2. Array literal replacement
3. Object literal replacement
5. Function replacement
4. Literal replacement translators
"""
from __future__ import print_function
from utils import *
OP_METHODS = {
' * ' : ' __mul__ ' ,
' / ' : ' __div__ ' ,
' % ' : ' __mod__ ' ,
' + ' : ' __add__ ' ,
' - ' : ' __sub__ ' ,
' << ' : ' __lshift__ ' ,
' >> ' : ' __rshift__ ' ,
' & ' : ' __and__ ' ,
' ^ ' : ' __xor__ ' ,
' | ' : ' __or__ '
}
def dbg ( source ) :
try :
with open ( r ' C: \ Users \ Piotrek \ Desktop \ dbg.py ' , ' w ' ) as f :
f . write ( source )
except :
pass
def indent ( lines , ind = 4 ) :
return ind * ' ' + lines . replace ( ' \n ' , ' \n ' + ind * ' ' ) . rstrip ( ' ' )
def inject_before_lval ( source , lval , code ) :
if source . count ( lval ) > 1 :
dbg ( source )
print ( )
print ( lval )
raise RuntimeError ( ' To many lvals ( %s ) ' % lval )
elif not source . count ( lval ) :
dbg ( source )
print ( )
print ( lval )
assert lval not in source
raise RuntimeError ( ' No lval found " %s " ' % lval )
end = source . index ( lval )
inj = source . rfind ( ' \n ' , 0 , end )
ind = inj
while source [ ind + 1 ] == ' ' :
ind + = 1
ind - = inj
return source [ : inj + 1 ] + indent ( code , ind ) + source [ inj + 1 : ]
def bracket_split ( source , brackets = ( ' () ' , ' {} ' , ' [] ' ) , strip = False ) :
""" DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True) """
starts = [ e [ 0 ] for e in brackets ]
in_bracket = 0
n = 0
last = 0
while n < len ( source ) :
e = source [ n ]
if not in_bracket and e in starts :
in_bracket = 1
start = n
b_start , b_end = brackets [ starts . index ( e ) ]
elif in_bracket :
if e == b_start :
in_bracket + = 1
elif e == b_end :
in_bracket - = 1
if not in_bracket :
if source [ last : start ] :
yield source [ last : start ]
last = n + 1
yield source [ start + strip : n + 1 - strip ]
n + = 1
if source [ last : ] :
yield source [ last : ]
def pass_bracket ( source , start , bracket = ' () ' ) :
""" Returns content of brackets with brackets and first pos after brackets
if source [ start ] is followed by some optional white space and brackets .
Otherwise None """
e = bracket_split ( source [ start : ] , [ bracket ] , False )
try :
cand = e . next ( )
except StopIteration :
return None , None
if not cand . strip ( ) : #white space...
try :
res = e . next ( )
return res , start + len ( cand ) + len ( res )
except StopIteration :
return None , None
elif cand [ - 1 ] == bracket [ 1 ] :
return cand , start + len ( cand )
else :
return None , None
def startswith_keyword ( start , keyword ) :
start = start . lstrip ( )
if start . startswith ( keyword ) :
if len ( keyword ) < len ( start ) :
if start [ len ( keyword ) ] in IDENTIFIER_PART :
return False
return True
return False
def endswith_keyword ( ending , keyword ) :
ending = ending . rstrip ( )
if ending . endswith ( keyword ) :
if len ( keyword ) < len ( ending ) :
if ending [ len ( ending ) - len ( keyword ) - 1 ] in IDENTIFIER_PART :
return False
return True
return False
def pass_white ( source , start ) :
n = start
while n < len ( source ) :
if source [ n ] in SPACE :
n + = 1
else :
break
return n
def except_token ( source , start , token , throw = True ) :
""" Token can be only a single char. Returns position after token if found. Otherwise raises syntax error if throw
otherwise returns None """
start = pass_white ( source , start )
if start < len ( source ) and source [ start ] == token :
return start + 1
if throw :
raise SyntaxError ( ' Missing token. Expected %s ' % token )
return None
def except_keyword ( source , start , keyword ) :
""" Returns position after keyword if found else None
Note : skips white space """
start = pass_white ( source , start )
kl = len ( keyword ) #keyword len
if kl + start > len ( source ) :
return None
if source [ start : start + kl ] != keyword :
return None
if kl + start < len ( source ) and source [ start + kl ] in IDENTIFIER_PART :
return None
return start + kl
def parse_identifier ( source , start , throw = True ) :
""" passes white space from start and returns first identifier,
if identifier invalid and throw raises SyntaxError otherwise returns None """
start = pass_white ( source , start )
end = start
if not end < len ( source ) :
if throw :
raise SyntaxError ( ' Missing identifier! ' )
return None
if source [ end ] not in IDENTIFIER_START :
if throw :
raise SyntaxError ( ' Invalid identifier start: " %s " ' % source [ end ] )
return None
end + = 1
while end < len ( source ) and source [ end ] in IDENTIFIER_PART :
end + = 1
if not is_valid_lval ( source [ start : end ] ) :
if throw :
raise SyntaxError (
' Invalid identifier name: " %s " ' % source [ start : end ] )
return None
return source [ start : end ] , end
def argsplit ( args , sep = ' , ' ) :
""" used to split JS args (it is not that simple as it seems because
sep can be inside brackets ) .
pass args * without * brackets !
Used also to parse array and object elements , and more """
parsed_len = 0
last = 0
splits = [ ]
for e in bracket_split ( args , brackets = [ ' () ' , ' [] ' , ' {} ' ] ) :
if e [ 0 ] not in { ' ( ' , ' [ ' , ' { ' } :
for i , char in enumerate ( e ) :
if char == sep :
splits . append ( args [ last : parsed_len + i ] )
last = parsed_len + i + 1
parsed_len + = len ( e )
splits . append ( args [ last : ] )
return splits
def split_add_ops ( text ) :
""" Specialized function splitting text at add/sub operators.
Operands are * not * translated . Example result [ ' op1 ' , ' + ' , ' op2 ' , ' - ' , ' op3 ' ] """
n = 0
text = text . replace ( ' ++ ' , ' ## ' ) . replace (
' -- ' , ' @@ ' ) #text does not normally contain any of these
spotted = False # set to true if noticed anything other than +- or white space
last = 0
while n < len ( text ) :
e = text [ n ]
if e == ' + ' or e == ' - ' :
if spotted :
yield text [ last : n ] . replace ( ' ## ' , ' ++ ' ) . replace ( ' @@ ' , ' -- ' )
yield e
last = n + 1
spotted = False
elif e == ' / ' or e == ' * ' or e == ' % ' :
spotted = False
elif e != ' ' :
spotted = True
n + = 1
yield text [ last : n ] . replace ( ' ## ' , ' ++ ' ) . replace ( ' @@ ' , ' -- ' )
def split_at_any ( text ,
lis ,
translate = False ,
not_before = [ ] ,
not_after = [ ] ,
validitate = None ) :
""" doc """
lis . sort ( key = lambda x : len ( x ) , reverse = True )
last = 0
n = 0
text_len = len ( text )
while n < text_len :
if any ( text [ : n ] . endswith ( e )
for e in not_before ) : #Cant end with end before
n + = 1
continue
for e in lis :
s = len ( e )
if s + n > text_len :
continue
if validitate and not validitate ( e , text [ : n ] , text [ n + s : ] ) :
continue
if any ( text [ n + s : ] . startswith ( e )
for e in not_after ) : #Cant end with end before
n + = 1
break
if e == text [ n : n + s ] :
yield text [ last : n ] if not translate else translate (
text [ last : n ] )
yield e
n + = s
last = n
break
else :
n + = 1
yield text [ last : n ] if not translate else translate ( text [ last : n ] )
def split_at_single ( text , sep , not_before = [ ] , not_after = [ ] ) :
""" Works like text.split(sep) but separated fragments
cant end with not_before or start with not_after """
n = 0
lt , s = len ( text ) , len ( sep )
last = 0
while n < lt :
if not s + n > lt :
if sep == text [ n : n + s ] :
if any ( text [ last : n ] . endswith ( e ) for e in not_before ) :
pass
elif any ( text [ n + s : ] . startswith ( e ) for e in not_after ) :
pass
else :
yield text [ last : n ]
last = n + s
n + = s - 1
n + = 1
yield text [ last : ]