A library to manipulate gettext files (po and mo files).
—
Essential utility functions for encoding detection and string processing when working with gettext files. These functions handle the low-level details of gettext file format requirements.
Automatically detect the character encoding used in PO or MO files by parsing Content-Type headers and validating charset specifications.
def detect_encoding(file, binary_mode=False):
"""
Try to detect the encoding used by the file.
Parameters:
- file (str): Full or relative path to the po/mo file or its content (string)
- binary_mode (bool): Set to True if file is a binary mo file, default False
Returns:
str: Detected encoding name, or default_encoding if detection fails
Notes:
- Searches for Content-Type charset declarations in file headers
- Validates that detected encoding is supported by Python's codecs
- Falls back to default_encoding ('utf-8') if detection fails
- Handles both file paths and file content strings/bytes
"""Usage Examples:
import polib
# Detect encoding from file path
encoding = polib.detect_encoding('messages.po')
print(f"Detected encoding: {encoding}")
# Detect encoding from PO file content
po_content = '''
msgid ""
msgstr ""
"Content-Type: text/plain; charset=ISO-8859-1\\n"
msgid "Hello"
msgstr "Hola"
'''
encoding = polib.detect_encoding(po_content)
print(f"Content encoding: {encoding}") # 'ISO-8859-1'
# Detect encoding from MO file (binary mode)
encoding = polib.detect_encoding('messages.mo', binary_mode=True)
# Use detected encoding when loading files
encoding = polib.detect_encoding('unknown_encoding.po')
po = polib.pofile('unknown_encoding.po', encoding=encoding)
# Handle detection failures
encoding = polib.detect_encoding('malformed.po')
if encoding == polib.default_encoding:
print("Using default encoding (detection failed)")Escape special characters in strings for proper gettext file format compliance, converting control characters to their escaped representations.
def escape(st):
"""
Escape special characters in the given string for gettext format.
Parameters:
- st (str): String to escape
Returns:
str: String with escaped characters
Escapes:
- \\\\ (backslash) -> \\\\\\\\
- \\t (tab) -> \\\\t
- \\n (newline) -> \\\\n
- \\r (carriage return) -> \\\\r
- \\v (vertical tab) -> \\\\v
- \\b (backspace) -> \\\\b
- \\f (form feed) -> \\\\f
- " (double quote) -> \\\\"
"""Usage Examples:
import polib
# Escape a string with special characters
original = 'Line 1\nLine 2\tTabbed\r\nWith "quotes"'
escaped = polib.escape(original)
print(escaped) # 'Line 1\\nLine 2\\tTabbed\\r\\nWith \\"quotes\\"'
# Common use case: preparing strings for PO entries
message_with_formatting = "Error: File not found.\nPlease check the path."
safe_message = polib.escape(message_with_formatting)
entry = polib.POEntry(
msgid=safe_message,
msgstr=polib.escape("Error: Archivo no encontrado.\nPor favor revise la ruta.")
)
# Escape strings before adding to translations
def add_translation(po_file, english, translated):
entry = polib.POEntry(
msgid=polib.escape(english),
msgstr=polib.escape(translated)
)
po_file.append(entry)
po = polib.pofile('messages.po')
add_translation(po, 'Hello\nWorld', 'Hola\nMundo')Unescape special characters from gettext format strings, converting escaped representations back to their original control characters.
def unescape(st):
"""
Unescape special characters in the given string from gettext format.
Parameters:
- st (str): String to unescape
Returns:
str: String with unescaped characters
Unescapes:
- \\\\\\\\ (escaped backslash) -> \\\\
- \\\\t (escaped tab) -> \\t
- \\\\n (escaped newline) -> \\n
- \\\\r (escaped carriage return) -> \\r
- \\\\v (escaped vertical tab) -> \\v
- \\\\b (escaped backspace) -> \\b
- \\\\f (escaped form feed) -> \\f
- \\\\" (escaped quote) -> "
"""Usage Examples:
import polib
# Unescape a string from gettext format
escaped = 'Line 1\\\\nLine 2\\\\tTabbed\\\\r\\\\nWith \\\\"quotes\\\\"'
original = polib.unescape(escaped)
print(original) # 'Line 1\nLine 2\tTabbed\r\nWith "quotes"'
# Process entries from loaded PO files
po = polib.pofile('messages.po')
for entry in po:
# Get the unescaped versions for display
display_msgid = polib.unescape(entry.msgid)
display_msgstr = polib.unescape(entry.msgstr)
print(f"Original: {display_msgid}")
print(f"Translation: {display_msgstr}")
# Convert between escaped and unescaped forms
def process_translation_pair(msgid, msgstr):
# Ensure proper escaping for storage
safe_msgid = polib.escape(msgid)
safe_msgstr = polib.escape(msgstr)
# Create entry
entry = polib.POEntry(msgid=safe_msgid, msgstr=safe_msgstr)
# Later, when displaying, unescape for user
display_original = polib.unescape(entry.msgid)
display_translation = polib.unescape(entry.msgstr)
return entry, display_original, display_translationThe module-level constant used as fallback when encoding detection fails.
default_encoding: str # Default encoding ('utf-8') used when detection failsUsage Examples:
import polib
# Access the default encoding
print(f"Default encoding: {polib.default_encoding}") # 'utf-8'
# Use in encoding detection workflow
def safe_load_pofile(filepath):
"""Load a PO file with robust encoding handling."""
detected = polib.detect_encoding(filepath)
if detected == polib.default_encoding:
print(f"Warning: Could not detect encoding for {filepath}, using {detected}")
else:
print(f"Detected encoding: {detected}")
return polib.pofile(filepath, encoding=detected)
# Modify default encoding if needed (not recommended)
original_default = polib.default_encoding
polib.default_encoding = 'latin-1'
# ... do work with different default ...
polib.default_encoding = original_default # RestoreCommon patterns combining the utility functions for robust file processing.
Usage Examples:
import polib
def robust_po_processing(filepath):
"""Demonstrate robust PO file processing with utilities."""
# Step 1: Detect encoding
encoding = polib.detect_encoding(filepath)
print(f"Using encoding: {encoding}")
# Step 2: Load with detected encoding
po = polib.pofile(filepath, encoding=encoding)
# Step 3: Process entries with proper escaping
for entry in po:
# Get unescaped versions for processing
original_text = polib.unescape(entry.msgid)
translated_text = polib.unescape(entry.msgstr) if entry.msgstr else ""
# Do text processing...
processed_translation = translated_text.upper() # Example processing
# Re-escape and update
entry.msgstr = polib.escape(processed_translation)
# Step 4: Save with proper encoding
po.save()
def create_entry_safely(msgid, msgstr, **kwargs):
"""Create a POEntry with automatic escaping."""
return polib.POEntry(
msgid=polib.escape(msgid),
msgstr=polib.escape(msgstr),
**kwargs
)
def extract_display_text(entry):
"""Extract display-ready text from a POEntry."""
return {
'original': polib.unescape(entry.msgid),
'translation': polib.unescape(entry.msgstr) if entry.msgstr else '',
'context': entry.msgctxt or None
}Install with Tessl CLI
npx tessl i tessl/pypi-polib