A tool and pre-commit hook to automatically upgrade Python syntax for newer versions of the language.
—
Specialized utilities for processing and transforming string literals and format strings. These functions handle the complex parsing and manipulation of Python string formats.
Parse and manipulate format strings with support for named Unicode escapes.
def parse_format(s: str) -> list[DotFormatPart]:
"""
Parse format string into component parts.
Args:
s: Format string to parse (e.g., "Hello {name}!")
Returns:
List of format parts, each containing:
- Literal text
- Field name (None for literal parts)
- Format specification (None if not specified)
- Conversion specification (None if not specified)
Notes:
- Handles named Unicode escape sequences (\N{...})
- Compatible with string.Formatter.parse()
- Preserves all format string information for reconstruction
"""
def unparse_parsed_string(parsed: list[DotFormatPart]) -> str:
"""
Convert parsed format parts back to format string.
Args:
parsed: List of format parts from parse_format()
Returns:
Reconstructed format string
Notes:
- Escapes curly braces in literal parts
- Rebuilds field specifications with proper syntax
- Inverse operation of parse_format()
"""Utilities for working with string encodings and codecs.
def is_codec(encoding: str, name: str) -> bool:
"""
Check if encoding matches codec name.
Args:
encoding: Encoding string to check (e.g., "utf-8", "ascii")
name: Codec name to match against
Returns:
True if encoding resolves to the specified codec name
Notes:
- Handles encoding aliases (e.g., "utf8" → "utf-8")
- Returns False for unknown encodings
- Used to determine safe string-to-binary conversions
"""DotFormatPart = tuple[str, Optional[str], Optional[str], Optional[str]]
"""
Format string component tuple.
Elements:
0: Literal text portion
1: Field name (None for literal-only parts)
2: Format specification (None if not specified)
3: Conversion specification (None if not specified)
Examples:
("Hello ", None, None, None) # Literal text
("", "name", None, None) # Simple field {name}
("", "0", ">10", None) # Formatted field {0:>10}
("", "value", None, "r") # Conversion field {value!r}
"""from pyupgrade._string_helpers import parse_format, unparse_parsed_string
# Parse a format string
format_str = "Hello {name}! You have {count:d} messages."
parts = parse_format(format_str)
# parts contains:
# [
# ("Hello ", None, None, None),
# ("", "name", None, None),
# ("! You have ", None, None, None),
# ("", "count", "d", None),
# (" messages.", None, None, None)
# ]
# Modify and reconstruct
# Remove format specifications to simplify
simplified_parts = [
(text, field, None, conv) if field else (text, field, spec, conv)
for text, field, spec, conv in parts
]
simplified_str = unparse_parsed_string(simplified_parts)
# Result: "Hello {name}! You have {count} messages."from pyupgrade._string_helpers import is_codec
# Check if encoding is safe for ASCII conversion
def can_convert_to_ascii(encoding_str: str) -> bool:
"""Check if encoding is ASCII-compatible."""
return (is_codec(encoding_str, 'ascii') or
is_codec(encoding_str, 'utf-8'))
# Usage in string.encode() conversion
encoding = "utf-8"
if can_convert_to_ascii(encoding):
# Safe to convert "text".encode("utf-8") → b"text"
pass
# Handle encoding aliases
assert is_codec("utf8", "utf-8") # True - alias
assert is_codec("ascii", "ascii") # True - exact
assert is_codec("latin1", "iso8859-1") # True - standard namedef simplify_format_string(format_str: str) -> str:
"""Remove positional format keys from format string."""
parts = parse_format(format_str)
simplified = []
for text, field, spec, conv in parts:
if field and field.isdigit():
# Remove positional field numbers
simplified.append((text, "", spec, conv))
else:
simplified.append((text, field, spec, conv))
return unparse_parsed_string(simplified)
# Example usage
original = "Item {0}: {1} (price: ${2:.2f})"
simplified = simplify_format_string(original)
# Result: "Item {}: {} (price: ${:.2f})"# parse_format handles named Unicode escapes correctly
unicode_format = "Greek letter: \\N{GREEK SMALL LETTER ALPHA} = {value}"
parts = parse_format(unicode_format)
# The literal part preserves the Unicode escape:
# [("Greek letter: \\N{GREEK SMALL LETTER ALPHA} = ", None, None, None),
# ("", "value", None, None)]
reconstructed = unparse_parsed_string(parts)
assert reconstructed == unicode_formatfrom pyupgrade._string_helpers import parse_format, unparse_parsed_string
from tokenize_rt import Token
def transform_format_token(token: Token) -> Token:
"""Transform format string token to remove positional keys."""
try:
parts = parse_format(token.src)
except ValueError:
# Malformed format string, skip transformation
return token
# Check if all format keys are positional and sequential
field_nums = []
for _, field, _, _ in parts:
if field and field.isdigit():
field_nums.append(int(field))
if field_nums == list(range(len(field_nums))):
# Sequential positional keys, safe to remove
simplified_parts = [
(text, "" if field and field.isdigit() else field, spec, conv)
for text, field, spec, conv in parts
]
new_src = unparse_parsed_string(simplified_parts)
return token._replace(src=new_src)
return tokendef validate_format_string(format_str: str) -> bool:
"""Check if format string is valid."""
try:
parse_format(format_str)
return True
except ValueError:
return False
def count_format_fields(format_str: str) -> int:
"""Count number of format fields in string."""
try:
parts = parse_format(format_str)
return sum(1 for _, field, _, _ in parts if field is not None)
except ValueError:
return 0def is_safe_binary_conversion(text: str, encoding: str) -> bool:
"""Check if string can be safely converted to binary literal."""
# Check encoding compatibility
if not (is_codec(encoding, 'ascii') or
is_codec(encoding, 'utf-8') or
is_codec(encoding, 'iso8859-1')):
return False
# Check for non-ASCII characters with restrictive encodings
if not text.isascii() and is_codec(encoding, 'ascii'):
return False
# Check for Unicode escapes that can't be represented
if '\\u' in text or '\\U' in text or '\\N' in text:
if is_codec(encoding, 'ascii'):
return False
return TrueInstall with Tessl CLI
npx tessl i tessl/pypi-pyupgrade