Fixes mojibake and other problems with Unicode, after the fact
—
Unicode-aware text formatting for terminal display, including width calculation and justification functions that handle fullwidth characters and zero-width characters correctly.
Functions for calculating the display width of Unicode characters in monospaced terminals.
def character_width(char: str) -> int:
"""
Determine display width of character in monospaced terminal.
Returns width in terminal columns: 0 for zero-width, 1 for normal,
2 for wide characters (CJK), -1 for control/non-printable chars.
Uses wcwidth library for accurate Unicode width calculation.
Args:
char: Single Unicode character
Returns:
Display width (0, 1, 2, or -1)
Examples:
>>> character_width('A')
1
>>> character_width('車') # Wide CJK character
2
>>> character_width('\\u200b') # Zero-width space
0
>>> character_width('\\x1b') # Control character
-1
"""
def monospaced_width(text: str) -> int:
"""
Calculate total display width of text in monospaced terminal.
Sums the display widths of all characters, handling wide characters,
zero-width characters, and control sequences properly.
Args:
text: Unicode string
Returns:
Total display width in terminal columns, or -1 if contains
control characters that make width undetermined
Examples:
>>> monospaced_width("hello")
5
>>> monospaced_width("café")
4
>>> monospaced_width("hello世界") # Mixed ASCII and wide chars
9
>>> monospaced_width("hello\\x1b[31m") # Contains control chars
-1
"""Unicode-aware text justification functions that properly handle character width for terminal display.
def display_ljust(text: str, width: int, fillchar: str = " ") -> str:
"""
Left-justify text in field of given width, accounting for Unicode display width.
Unlike str.ljust(), correctly handles wide characters (CJK), zero-width
characters, and combining characters for proper terminal alignment.
Args:
text: String to justify
width: Target display width in terminal columns
fillchar: Character to pad with (default space)
Returns:
Left-justified string
Examples:
>>> display_ljust("hello", 10)
'hello '
>>> display_ljust("café", 10, '-')
'café------'
>>> display_ljust("世界", 6) # Wide chars count as 2
'世界 '
"""
def display_rjust(text: str, width: int, fillchar: str = " ") -> str:
"""
Right-justify text in field of given width, accounting for Unicode display width.
Unicode-aware version of str.rjust() that handles wide characters,
zero-width characters, and combining characters correctly.
Args:
text: String to justify
width: Target display width in terminal columns
fillchar: Character to pad with (default space)
Returns:
Right-justified string
Examples:
>>> display_rjust("hello", 10)
' hello'
>>> display_rjust("世界", 6) # Wide chars handled correctly
' 世界'
"""
def display_center(text: str, width: int, fillchar: str = " ") -> str:
"""
Center text in field of given width, accounting for Unicode display width.
Unicode-aware version of str.center() that properly centers text
containing wide characters, zero-width characters, and combining chars.
Args:
text: String to center
width: Target display width in terminal columns
fillchar: Character to pad with (default space)
Returns:
Centered string
Examples:
>>> display_center("hello", 11)
' hello '
>>> display_center("世界", 8) # Wide chars centered correctly
' 世界 '
"""from ftfy.formatting import character_width, monospaced_width
# Check individual character widths
print(character_width('A')) # 1 - normal ASCII
print(character_width('世')) # 2 - wide CJK
print(character_width('\u0300')) # 0 - combining accent
print(character_width('\t')) # -1 - control character
# Calculate total text width
text = "Hello 世界!"
width = monospaced_width(text)
print(f"'{text}' displays as {width} columns") # 9 columnsfrom ftfy.formatting import display_ljust, display_rjust, display_center
texts = ["hello", "café", "世界", "mixed 世界 text"]
width = 20
print("Left justified:")
for text in texts:
justified = display_ljust(text, width, '.')
print(f"'{justified}'")
print("\nRight justified:")
for text in texts:
justified = display_rjust(text, width, '.')
print(f"'{justified}'")
print("\nCentered:")
for text in texts:
justified = display_center(text, width, '.')
print(f"'{justified}'")from ftfy.formatting import display_ljust, display_rjust, monospaced_width
def format_table(data, headers, widths):
"""Format table with proper Unicode alignment."""
# Print headers
header_row = " | ".join(
display_ljust(header, width)
for header, width in zip(headers, widths)
)
print(header_row)
print("-" * monospaced_width(header_row))
# Print data rows
for row in data:
formatted_row = " | ".join(
display_ljust(str(cell), width)
for cell, width in zip(row, widths)
)
print(formatted_row)
# Example with mixed character widths
headers = ["Name", "City", "Score"]
widths = [15, 10, 8]
data = [
["Alice Smith", "NYC", "95.5"],
["田中太郎", "東京", "87.2"], # Japanese name and city
["José García", "México", "92.1"] # Accented characters
]
format_table(data, headers, widths)from ftfy.formatting import display_ljust, monospaced_width
def unicode_progress_bar(current, total, width=40, fill='█', empty='░'):
"""Create progress bar that handles Unicode fill characters."""
# Calculate fill amount based on actual character widths
fill_width = monospaced_width(fill)
empty_width = monospaced_width(empty)
# Adjust for character widths
if fill_width > 1:
width = width // fill_width * fill_width
percent = current / total
filled_chars = int(width * percent // fill_width)
empty_chars = (width - filled_chars * fill_width) // empty_width
bar = fill * filled_chars + empty * empty_chars
return f"[{bar}] {current}/{total} ({percent:.1%})"
# Example with Unicode characters
print(unicode_progress_bar(7, 10)) # [██████████████████████████████░░░░░░░░░░] 7/10 (70.0%)
print(unicode_progress_bar(3, 5)) # [████████████████████████░░░░░░░░░░░░░░░░] 3/5 (60.0%)from ftfy.formatting import display_ljust, display_center, monospaced_width
def print_status_table(statuses):
"""Print status table with proper alignment."""
# Calculate column widths based on actual display widths
name_width = max(monospaced_width(s['name']) for s in statuses) + 2
status_width = max(monospaced_width(s['status']) for s in statuses) + 2
print(display_center("System Status", name_width + status_width))
print("=" * (name_width + status_width))
for item in statuses:
name_col = display_ljust(item['name'], name_width)
status_col = display_ljust(item['status'], status_width)
print(f"{name_col}{status_col}")
# Example with international text
statuses = [
{'name': 'Database', 'status': '✓ Running'},
{'name': 'サーバー', 'status': '✓ 動作中'}, # Japanese
{'name': 'Señales', 'status': '⚠ Alerta'}, # Spanish with warning
{'name': 'Система', 'status': '✗ Ошибка'} # Russian with error
]
print_status_table(statuses)from ftfy.formatting import monospaced_width, display_ljust
def wrap_text_unicode(text, max_width):
"""Wrap text accounting for Unicode display width."""
words = text.split()
lines = []
current_line = []
current_width = 0
for word in words:
word_width = monospaced_width(word)
space_width = 1 if current_line else 0
if current_width + word_width + space_width <= max_width:
current_line.append(word)
current_width += word_width + space_width
else:
if current_line:
lines.append(' '.join(current_line))
current_line = [word]
current_width = word_width
if current_line:
lines.append(' '.join(current_line))
return lines
# Example with mixed character widths
mixed_text = "This text contains 中文字符 and עברית characters with different display widths"
wrapped = wrap_text_unicode(mixed_text, 30)
for line in wrapped:
print(f"'{line}' ({monospaced_width(line)} columns)")Install with Tessl CLI
npx tessl i tessl/pypi-ftfy