CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-polib

A library to manipulate gettext files (po and mo files).

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

Essential utility functions for encoding detection and string processing when working with gettext files. These functions handle the low-level details of gettext file format requirements.

Capabilities

Encoding Detection

Automatically detect the character encoding used in PO or MO files by parsing Content-Type headers and validating charset specifications.

def detect_encoding(file, binary_mode=False):
    """
    Try to detect the encoding used by the file.

    Parameters:
    - file (str): Full or relative path to the po/mo file or its content (string)
    - binary_mode (bool): Set to True if file is a binary mo file, default False

    Returns:
    str: Detected encoding name, or default_encoding if detection fails

    Notes:
    - Searches for Content-Type charset declarations in file headers
    - Validates that detected encoding is supported by Python's codecs
    - Falls back to default_encoding ('utf-8') if detection fails
    - Handles both file paths and file content strings/bytes
    """

Usage Examples:

import polib

# Detect encoding from file path
encoding = polib.detect_encoding('messages.po')
print(f"Detected encoding: {encoding}")

# Detect encoding from PO file content
po_content = '''
msgid ""
msgstr ""
"Content-Type: text/plain; charset=ISO-8859-1\\n"

msgid "Hello"
msgstr "Hola"
'''
encoding = polib.detect_encoding(po_content)
print(f"Content encoding: {encoding}")  # 'ISO-8859-1'

# Detect encoding from MO file (binary mode)
encoding = polib.detect_encoding('messages.mo', binary_mode=True)

# Use detected encoding when loading files
encoding = polib.detect_encoding('unknown_encoding.po')
po = polib.pofile('unknown_encoding.po', encoding=encoding)

# Handle detection failures
encoding = polib.detect_encoding('malformed.po')
if encoding == polib.default_encoding:
    print("Using default encoding (detection failed)")

String Escaping

Escape special characters in strings for proper gettext file format compliance, converting control characters to their escaped representations.

def escape(st):
    """
    Escape special characters in the given string for gettext format.

    Parameters:
    - st (str): String to escape

    Returns:
    str: String with escaped characters

    Escapes:
    - \\\\ (backslash) -> \\\\\\\\
    - \\t (tab) -> \\\\t  
    - \\n (newline) -> \\\\n
    - \\r (carriage return) -> \\\\r
    - \\v (vertical tab) -> \\\\v
    - \\b (backspace) -> \\\\b
    - \\f (form feed) -> \\\\f
    - " (double quote) -> \\\\"
    """

Usage Examples:

import polib

# Escape a string with special characters
original = 'Line 1\nLine 2\tTabbed\r\nWith "quotes"'
escaped = polib.escape(original)
print(escaped)  # 'Line 1\\nLine 2\\tTabbed\\r\\nWith \\"quotes\\"'

# Common use case: preparing strings for PO entries
message_with_formatting = "Error: File not found.\nPlease check the path."
safe_message = polib.escape(message_with_formatting)

entry = polib.POEntry(
    msgid=safe_message,
    msgstr=polib.escape("Error: Archivo no encontrado.\nPor favor revise la ruta.")
)

# Escape strings before adding to translations
def add_translation(po_file, english, translated):
    entry = polib.POEntry(
        msgid=polib.escape(english),
        msgstr=polib.escape(translated)
    )
    po_file.append(entry)

po = polib.pofile('messages.po')
add_translation(po, 'Hello\nWorld', 'Hola\nMundo')

String Unescaping

Unescape special characters from gettext format strings, converting escaped representations back to their original control characters.

def unescape(st):
    """
    Unescape special characters in the given string from gettext format.

    Parameters:
    - st (str): String to unescape

    Returns:
    str: String with unescaped characters

    Unescapes:
    - \\\\\\\\ (escaped backslash) -> \\\\
    - \\\\t (escaped tab) -> \\t
    - \\\\n (escaped newline) -> \\n  
    - \\\\r (escaped carriage return) -> \\r
    - \\\\v (escaped vertical tab) -> \\v
    - \\\\b (escaped backspace) -> \\b
    - \\\\f (escaped form feed) -> \\f
    - \\\\" (escaped quote) -> "
    """

Usage Examples:

import polib

# Unescape a string from gettext format
escaped = 'Line 1\\\\nLine 2\\\\tTabbed\\\\r\\\\nWith \\\\"quotes\\\\"'
original = polib.unescape(escaped)
print(original)  # 'Line 1\nLine 2\tTabbed\r\nWith "quotes"'

# Process entries from loaded PO files
po = polib.pofile('messages.po')
for entry in po:
    # Get the unescaped versions for display
    display_msgid = polib.unescape(entry.msgid)
    display_msgstr = polib.unescape(entry.msgstr)
    print(f"Original: {display_msgid}")
    print(f"Translation: {display_msgstr}")

# Convert between escaped and unescaped forms
def process_translation_pair(msgid, msgstr):
    # Ensure proper escaping for storage
    safe_msgid = polib.escape(msgid)
    safe_msgstr = polib.escape(msgstr)
    
    # Create entry
    entry = polib.POEntry(msgid=safe_msgid, msgstr=safe_msgstr)
    
    # Later, when displaying, unescape for user
    display_original = polib.unescape(entry.msgid)
    display_translation = polib.unescape(entry.msgstr)
    
    return entry, display_original, display_translation

Default Encoding Constant

The module-level constant used as fallback when encoding detection fails.

default_encoding: str  # Default encoding ('utf-8') used when detection fails

Usage Examples:

import polib

# Access the default encoding
print(f"Default encoding: {polib.default_encoding}")  # 'utf-8'

# Use in encoding detection workflow
def safe_load_pofile(filepath):
    """Load a PO file with robust encoding handling."""
    detected = polib.detect_encoding(filepath)
    
    if detected == polib.default_encoding:
        print(f"Warning: Could not detect encoding for {filepath}, using {detected}")
    else:
        print(f"Detected encoding: {detected}")
    
    return polib.pofile(filepath, encoding=detected)

# Modify default encoding if needed (not recommended)
original_default = polib.default_encoding
polib.default_encoding = 'latin-1'
# ... do work with different default ...
polib.default_encoding = original_default  # Restore

Utility Function Combinations

Common patterns combining the utility functions for robust file processing.

Usage Examples:

import polib

def robust_po_processing(filepath):
    """Demonstrate robust PO file processing with utilities."""
    
    # Step 1: Detect encoding
    encoding = polib.detect_encoding(filepath)
    print(f"Using encoding: {encoding}")
    
    # Step 2: Load with detected encoding
    po = polib.pofile(filepath, encoding=encoding)
    
    # Step 3: Process entries with proper escaping
    for entry in po:
        # Get unescaped versions for processing
        original_text = polib.unescape(entry.msgid)
        translated_text = polib.unescape(entry.msgstr) if entry.msgstr else ""
        
        # Do text processing...
        processed_translation = translated_text.upper()  # Example processing
        
        # Re-escape and update
        entry.msgstr = polib.escape(processed_translation)
    
    # Step 4: Save with proper encoding
    po.save()

def create_entry_safely(msgid, msgstr, **kwargs):
    """Create a POEntry with automatic escaping."""
    return polib.POEntry(
        msgid=polib.escape(msgid),
        msgstr=polib.escape(msgstr),
        **kwargs
    )

def extract_display_text(entry):
    """Extract display-ready text from a POEntry."""
    return {
        'original': polib.unescape(entry.msgid),
        'translation': polib.unescape(entry.msgstr) if entry.msgstr else '',
        'context': entry.msgctxt or None
    }

Install with Tessl CLI

npx tessl i tessl/pypi-polib

docs

entry-manipulation.md

file-operations.md

index.md

utilities.md

tile.json