You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
155 lines
5.3 KiB
155 lines
5.3 KiB
6 years ago
|
# coding: utf-8
|
||
|
"""
|
||
|
This module provides functions for justifying Unicode text in a monospaced
|
||
|
display such as a terminal.
|
||
|
|
||
|
We used to have our own implementation here, but now we mostly rely on
|
||
|
the 'wcwidth' library.
|
||
|
"""
|
||
|
from __future__ import unicode_literals, division
|
||
|
from unicodedata import normalize
|
||
|
from wcwidth import wcwidth, wcswidth
|
||
|
|
||
|
|
||
|
def character_width(char):
|
||
|
r"""
|
||
|
Determine the width that a character is likely to be displayed as in
|
||
|
a monospaced terminal. The width for a printable character will
|
||
|
always be 0, 1, or 2.
|
||
|
|
||
|
Nonprintable or control characters will return -1, a convention that comes
|
||
|
from wcwidth.
|
||
|
|
||
|
>>> character_width('車')
|
||
|
2
|
||
|
>>> character_width('A')
|
||
|
1
|
||
|
>>> character_width('\N{ZERO WIDTH JOINER}')
|
||
|
0
|
||
|
>>> character_width('\n')
|
||
|
-1
|
||
|
"""
|
||
|
return wcwidth(char)
|
||
|
|
||
|
|
||
|
def monospaced_width(text):
|
||
|
"""
|
||
|
Return the number of character cells that this string is likely to occupy
|
||
|
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
|
||
|
We refer to this as the "display width" of the string.
|
||
|
|
||
|
This can be useful for formatting text that may contain non-spacing
|
||
|
characters, or CJK characters that take up two character cells.
|
||
|
|
||
|
Returns -1 if the string contains a non-printable or control character.
|
||
|
|
||
|
>>> monospaced_width('ちゃぶ台返し')
|
||
|
12
|
||
|
>>> len('ちゃぶ台返し')
|
||
|
6
|
||
|
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
|
||
|
12
|
||
|
>>> monospaced_width('example\x80')
|
||
|
-1
|
||
|
|
||
|
# The Korean word 'ibnida' can be written with 3 characters or 7 jamo.
|
||
|
# Either way, it *looks* the same and takes up 6 character cells.
|
||
|
>>> monospaced_width('입니다')
|
||
|
6
|
||
|
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
|
||
|
6
|
||
|
"""
|
||
|
# NFC-normalize the text first, so that we don't need special cases for
|
||
|
# Hangul jamo.
|
||
|
return wcswidth(normalize('NFC', text))
|
||
|
|
||
|
|
||
|
def display_ljust(text, width, fillchar=' '):
|
||
|
"""
|
||
|
Return `text` left-justified in a Unicode string whose display width,
|
||
|
in a monospaced terminal, should be at least `width` character cells.
|
||
|
The rest of the string will be padded with `fillchar`, which must be
|
||
|
a width-1 character.
|
||
|
|
||
|
"Left" here means toward the beginning of the string, which may actually
|
||
|
appear on the right in an RTL context. This is similar to the use of the
|
||
|
word "left" in "left parenthesis".
|
||
|
|
||
|
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||
|
>>> for line in lines:
|
||
|
... print(display_ljust(line, 20, '▒'))
|
||
|
Table flip▒▒▒▒▒▒▒▒▒▒
|
||
|
(╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒
|
||
|
ちゃぶ台返し▒▒▒▒▒▒▒▒
|
||
|
|
||
|
This example, and the similar ones that follow, should come out justified
|
||
|
correctly when viewed in a monospaced terminal. It will probably not look
|
||
|
correct if you're viewing this code or documentation in a Web browser.
|
||
|
"""
|
||
|
if character_width(fillchar) != 1:
|
||
|
raise ValueError("The padding character must have display width 1")
|
||
|
|
||
|
text_width = monospaced_width(text)
|
||
|
if text_width == -1:
|
||
|
# There's a control character here, so just don't add padding
|
||
|
return text
|
||
|
|
||
|
padding = max(0, width - text_width)
|
||
|
return text + fillchar * padding
|
||
|
|
||
|
|
||
|
def display_rjust(text, width, fillchar=' '):
|
||
|
"""
|
||
|
Return `text` right-justified in a Unicode string whose display width,
|
||
|
in a monospaced terminal, should be at least `width` character cells.
|
||
|
The rest of the string will be padded with `fillchar`, which must be
|
||
|
a width-1 character.
|
||
|
|
||
|
"Right" here means toward the end of the string, which may actually be on
|
||
|
the left in an RTL context. This is similar to the use of the word "right"
|
||
|
in "right parenthesis".
|
||
|
|
||
|
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||
|
>>> for line in lines:
|
||
|
... print(display_rjust(line, 20, '▒'))
|
||
|
▒▒▒▒▒▒▒▒▒▒Table flip
|
||
|
▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻
|
||
|
▒▒▒▒▒▒▒▒ちゃぶ台返し
|
||
|
"""
|
||
|
if character_width(fillchar) != 1:
|
||
|
raise ValueError("The padding character must have display width 1")
|
||
|
|
||
|
text_width = monospaced_width(text)
|
||
|
if text_width == -1:
|
||
|
return text
|
||
|
|
||
|
padding = max(0, width - text_width)
|
||
|
return fillchar * padding + text
|
||
|
|
||
|
|
||
|
def display_center(text, width, fillchar=' '):
|
||
|
"""
|
||
|
Return `text` centered in a Unicode string whose display width, in a
|
||
|
monospaced terminal, should be at least `width` character cells. The rest
|
||
|
of the string will be padded with `fillchar`, which must be a width-1
|
||
|
character.
|
||
|
|
||
|
>>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し']
|
||
|
>>> for line in lines:
|
||
|
... print(display_center(line, 20, '▒'))
|
||
|
▒▒▒▒▒Table flip▒▒▒▒▒
|
||
|
▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒
|
||
|
▒▒▒▒ちゃぶ台返し▒▒▒▒
|
||
|
"""
|
||
|
if character_width(fillchar) != 1:
|
||
|
raise ValueError("The padding character must have display width 1")
|
||
|
|
||
|
text_width = monospaced_width(text)
|
||
|
if text_width == -1:
|
||
|
return text
|
||
|
|
||
|
padding = max(0, width - text_width)
|
||
|
left_padding = padding // 2
|
||
|
right_padding = padding - left_padding
|
||
|
return fillchar * left_padding + text + fillchar * right_padding
|