You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
1.4 KiB
56 lines
1.4 KiB
6 years ago
|
"""
|
||
|
Makes some function names and behavior consistent between Python 2 and
|
||
|
Python 3, and also between narrow and wide builds.
|
||
|
"""
|
||
|
from __future__ import unicode_literals
|
||
|
import sys
|
||
|
import unicodedata
|
||
|
|
||
|
if sys.hexversion >= 0x03000000:
|
||
|
unichr = chr
|
||
|
xrange = range
|
||
|
PYTHON2 = False
|
||
|
else:
|
||
|
unichr = unichr
|
||
|
xrange = xrange
|
||
|
PYTHON2 = True
|
||
|
|
||
|
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
|
||
|
|
||
|
|
||
|
def _narrow_unichr_workaround(codepoint):
|
||
|
"""
|
||
|
A replacement for unichr() on narrow builds of Python. This will get
|
||
|
us the narrow representation of an astral character, which will be
|
||
|
a string of length two, containing two UTF-16 surrogates.
|
||
|
"""
|
||
|
escaped = b'\\U%08x' % codepoint
|
||
|
return escaped.decode('unicode-escape')
|
||
|
|
||
|
|
||
|
if sys.maxunicode < 0x10000:
|
||
|
unichr = _narrow_unichr_workaround
|
||
|
|
||
|
|
||
|
def bytes_to_ints(bytestring):
|
||
|
"""
|
||
|
No matter what version of Python this is, make a sequence of integers from
|
||
|
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
|
||
|
sequence of integers.
|
||
|
"""
|
||
|
if PYTHON2:
|
||
|
return [ord(b) for b in bytestring]
|
||
|
else:
|
||
|
return bytestring
|
||
|
|
||
|
|
||
|
def is_printable(char):
|
||
|
"""
|
||
|
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
|
||
|
let's make a crude approximation in Python 2.
|
||
|
"""
|
||
|
if PYTHON2:
|
||
|
return not unicodedata.category(char).startswith('C')
|
||
|
else:
|
||
|
return char.isprintable()
|