tessl/pypi-ftfy

Fixes mojibake and other problems with Unicode, after the fact

—

Pending

Overview

Eval results

Files

Display and Formatting

Name: tessl/pypi-ftfy
Author: tessl

Unicode-aware text formatting for terminal display, including width calculation and justification functions that handle fullwidth characters and zero-width characters correctly.

Capabilities

Character Width Calculation

Functions for calculating the display width of Unicode characters in monospaced terminals.

def character_width(char: str) -> int:
    """
    Determine display width of character in monospaced terminal.
    
    Returns width in terminal columns: 0 for zero-width, 1 for normal,
    2 for wide characters (CJK), -1 for control/non-printable chars.
    Uses wcwidth library for accurate Unicode width calculation.
    
    Args:
        char: Single Unicode character
        
    Returns:
        Display width (0, 1, 2, or -1)
        
    Examples:
        >>> character_width('A')
        1
        >>> character_width('車')  # Wide CJK character
        2
        >>> character_width('\\u200b')  # Zero-width space
        0
        >>> character_width('\\x1b')  # Control character
        -1
    """

def monospaced_width(text: str) -> int:
    """
    Calculate total display width of text in monospaced terminal.
    
    Sums the display widths of all characters, handling wide characters,
    zero-width characters, and control sequences properly.
    
    Args:
        text: Unicode string
        
    Returns:
        Total display width in terminal columns, or -1 if contains
        control characters that make width undetermined
        
    Examples:
        >>> monospaced_width("hello")
        5
        >>> monospaced_width("café")
        4
        >>> monospaced_width("hello世界")  # Mixed ASCII and wide chars
        9
        >>> monospaced_width("hello\\x1b[31m")  # Contains control chars
        -1
    """

Text Justification

Unicode-aware text justification functions that properly handle character width for terminal display.

def display_ljust(text: str, width: int, fillchar: str = " ") -> str:
    """
    Left-justify text in field of given width, accounting for Unicode display width.
    
    Unlike str.ljust(), correctly handles wide characters (CJK), zero-width
    characters, and combining characters for proper terminal alignment.
    
    Args:
        text: String to justify
        width: Target display width in terminal columns
        fillchar: Character to pad with (default space)
        
    Returns:
        Left-justified string
        
    Examples:
        >>> display_ljust("hello", 10)
        'hello     '
        >>> display_ljust("café", 10, '-') 
        'café------'
        >>> display_ljust("世界", 6)  # Wide chars count as 2
        '世界  '
    """

def display_rjust(text: str, width: int, fillchar: str = " ") -> str:
    """
    Right-justify text in field of given width, accounting for Unicode display width.
    
    Unicode-aware version of str.rjust() that handles wide characters,
    zero-width characters, and combining characters correctly.
    
    Args:
        text: String to justify  
        width: Target display width in terminal columns
        fillchar: Character to pad with (default space)
        
    Returns:
        Right-justified string
        
    Examples:
        >>> display_rjust("hello", 10)
        '     hello'
        >>> display_rjust("世界", 6)  # Wide chars handled correctly
        '  世界'
    """

def display_center(text: str, width: int, fillchar: str = " ") -> str:
    """
    Center text in field of given width, accounting for Unicode display width.
    
    Unicode-aware version of str.center() that properly centers text
    containing wide characters, zero-width characters, and combining chars.
    
    Args:
        text: String to center
        width: Target display width in terminal columns  
        fillchar: Character to pad with (default space)
        
    Returns:
        Centered string
        
    Examples:
        >>> display_center("hello", 11)
        '   hello   '
        >>> display_center("世界", 8)  # Wide chars centered correctly
        '   世界   '
    """

Usage Examples

Basic Width Calculation

from ftfy.formatting import character_width, monospaced_width

# Check individual character widths
print(character_width('A'))        # 1 - normal ASCII
print(character_width('世'))       # 2 - wide CJK  
print(character_width('\u0300'))  # 0 - combining accent
print(character_width('\t'))       # -1 - control character

# Calculate total text width
text = "Hello 世界!"
width = monospaced_width(text)
print(f"'{text}' displays as {width} columns")  # 9 columns

Terminal-Aware Text Alignment

from ftfy.formatting import display_ljust, display_rjust, display_center

texts = ["hello", "café", "世界", "mixed 世界 text"]
width = 20

print("Left justified:")
for text in texts:
    justified = display_ljust(text, width, '.')
    print(f"'{justified}'")

print("\nRight justified:")  
for text in texts:
    justified = display_rjust(text, width, '.')
    print(f"'{justified}'")
    
print("\nCentered:")
for text in texts:
    justified = display_center(text, width, '.')
    print(f"'{justified}'")

Table Formatting

from ftfy.formatting import display_ljust, display_rjust, monospaced_width

def format_table(data, headers, widths):
    """Format table with proper Unicode alignment."""
    
    # Print headers
    header_row = " | ".join(
        display_ljust(header, width) 
        for header, width in zip(headers, widths)
    )
    print(header_row)
    print("-" * monospaced_width(header_row))
    
    # Print data rows
    for row in data:
        formatted_row = " | ".join(
            display_ljust(str(cell), width)
            for cell, width in zip(row, widths)  
        )
        print(formatted_row)

# Example with mixed character widths
headers = ["Name", "City", "Score"]
widths = [15, 10, 8]
data = [
    ["Alice Smith", "NYC", "95.5"],
    ["田中太郎", "東京", "87.2"],  # Japanese name and city  
    ["José García", "México", "92.1"]  # Accented characters
]

format_table(data, headers, widths)

Progress Bar with Unicode

from ftfy.formatting import display_ljust, monospaced_width

def unicode_progress_bar(current, total, width=40, fill='█', empty='░'):
    """Create progress bar that handles Unicode fill characters."""
    
    # Calculate fill amount based on actual character widths
    fill_width = monospaced_width(fill)
    empty_width = monospaced_width(empty)
    
    # Adjust for character widths
    if fill_width > 1:
        width = width // fill_width * fill_width
    
    percent = current / total
    filled_chars = int(width * percent // fill_width)
    empty_chars = (width - filled_chars * fill_width) // empty_width
    
    bar = fill * filled_chars + empty * empty_chars
    
    return f"[{bar}] {current}/{total} ({percent:.1%})"

# Example with Unicode characters
print(unicode_progress_bar(7, 10))   # [██████████████████████████████░░░░░░░░░░] 7/10 (70.0%)
print(unicode_progress_bar(3, 5))    # [████████████████████████░░░░░░░░░░░░░░░░] 3/5 (60.0%)

Command Line Output Formatting

from ftfy.formatting import display_ljust, display_center, monospaced_width

def print_status_table(statuses):
    """Print status table with proper alignment."""
    
    # Calculate column widths based on actual display widths
    name_width = max(monospaced_width(s['name']) for s in statuses) + 2
    status_width = max(monospaced_width(s['status']) for s in statuses) + 2
    
    print(display_center("System Status", name_width + status_width))
    print("=" * (name_width + status_width))
    
    for item in statuses:
        name_col = display_ljust(item['name'], name_width)
        status_col = display_ljust(item['status'], status_width)
        print(f"{name_col}{status_col}")

# Example with international text
statuses = [
    {'name': 'Database', 'status': '✓ Running'},
    {'name': 'サーバー', 'status': '✓ 動作中'},  # Japanese
    {'name': 'Señales', 'status': '⚠ Alerta'},  # Spanish with warning  
    {'name': 'Система', 'status': '✗ Ошибка'}   # Russian with error
]

print_status_table(statuses)

Width-Aware Text Processing

from ftfy.formatting import monospaced_width, display_ljust

def wrap_text_unicode(text, max_width):
    """Wrap text accounting for Unicode display width."""
    words = text.split()
    lines = []
    current_line = []
    current_width = 0
    
    for word in words:
        word_width = monospaced_width(word)
        space_width = 1 if current_line else 0
        
        if current_width + word_width + space_width <= max_width:
            current_line.append(word)
            current_width += word_width + space_width
        else:
            if current_line:
                lines.append(' '.join(current_line))
            current_line = [word]
            current_width = word_width
            
    if current_line:
        lines.append(' '.join(current_line))
        
    return lines

# Example with mixed character widths
mixed_text = "This text contains 中文字符 and עברית characters with different display widths"
wrapped = wrap_text_unicode(mixed_text, 30)
for line in wrapped:
    print(f"'{line}' ({monospaced_width(line)} columns)")

Install with Tessl CLI