A tool and pre-commit hook to automatically upgrade Python syntax for newer versions of the language.
—
Core functionality for applying syntax transformations through plugin and token systems. The engine operates in two phases: plugin-based AST transformations followed by token-level fixes.
Apply all registered plugin transformations to source code through AST analysis.
def _fix_plugins(contents_text: str, settings: Settings) -> str:
"""
Apply all plugin-based AST transformations to source code.
Args:
contents_text: Python source code to transform
settings: Configuration settings for transformations
Returns:
Transformed source code with plugin fixes applied
Notes:
- Returns original code if syntax errors occur
- Applies token fixup for DEDENT/UNIMPORTANT_WS ordering
- Processes callbacks in reverse token order for correct offsets
"""Apply token-level transformations for string literals, parentheses, and format strings.
def _fix_tokens(contents_text: str) -> str:
"""
Apply token-level transformations to source code.
Args:
contents_text: Python source code to transform
Returns:
Transformed source code with token fixes applied
Transformations:
- Fix escape sequences in string literals
- Remove 'u' prefix from Unicode strings
- Remove extraneous parentheses
- Simplify format string literals
- Convert string.encode() to binary literals
- Remove encoding cookies from file headers
"""Core utility functions used throughout the transformation process.
def inty(s: str) -> bool:
"""
Check if string represents an integer.
Args:
s: String to check
Returns:
True if string can be converted to int, False otherwise
Notes:
Uses try/except to handle ValueError and TypeError gracefully
"""Configuration object controlling transformation behavior.
class Settings(NamedTuple):
"""
Configuration settings for pyupgrade transformations.
Attributes:
min_version: Minimum Python version tuple (e.g., (3, 10))
keep_percent_format: Preserve %-style format strings
keep_mock: Preserve mock imports instead of unittest.mock
keep_runtime_typing: Preserve typing imports at runtime
"""
min_version: Version = (3,)
keep_percent_format: bool = False
keep_mock: bool = False
keep_runtime_typing: bool = FalseFix misordered DEDENT and UNIMPORTANT_WS tokens from tokenize-rt.
def _fixup_dedent_tokens(tokens: list[Token]) -> None:
"""
Fix misordered DEDENT/UNIMPORTANT_WS tokens.
Args:
tokens: Token list to fix in-place
Notes:
Addresses tokenize-rt issue where DEDENT and UNIMPORTANT_WS
tokens appear in wrong order in certain indentation patterns.
"""Constants used for validating and processing escape sequences in string literals.
ESCAPE_STARTS: frozenset[str]
"""
Valid escape sequence starting characters.
Contains:
- Newline characters: '\n', '\r'
- Quote characters: '\\', "'", '"'
- Named escapes: 'a', 'b', 'f', 'n', 'r', 't', 'v'
- Octal digits: '0'-'7'
- Hex escape: 'x'
"""
ESCAPE_RE: re.Pattern[str]
"""Regex pattern for matching escape sequences ('\\.', DOTALL)."""
NAMED_ESCAPE_NAME: re.Pattern[str]
"""Regex pattern for matching named Unicode escapes ('{[^}]+}')."""Fix invalid escape sequences in string literals.
def _fix_escape_sequences(token: Token) -> Token:
"""
Fix invalid escape sequences in string token.
Args:
token: String token to process
Returns:
Token with fixed escape sequences
Logic:
- Skips raw strings and strings without backslashes
- Validates escape sequences against Python standards
- Adds raw prefix if only invalid escapes found
- Escapes invalid sequences if valid ones also present
"""Remove unnecessary 'u' prefixes from Unicode string literals.
def _remove_u_prefix(token: Token) -> Token:
"""
Remove 'u' prefix from Unicode string literals.
Args:
token: String token to process
Returns:
Token with 'u'/'U' prefixes removed
"""Remove unnecessary parentheses around expressions.
def _fix_extraneous_parens(tokens: list[Token], i: int) -> None:
"""
Remove extraneous parentheses around expressions.
Args:
tokens: Token list to modify in-place
i: Index of opening parenthesis token
Notes:
- Preserves tuple syntax (checks for commas)
- Preserves generator expressions (checks for yield)
- Only removes truly redundant parentheses
"""Simplify format string literals by removing redundant format keys.
def _fix_format_literal(tokens: list[Token], end: int) -> None:
"""
Simplify format string literals.
Args:
tokens: Token list to modify in-place
end: Index of format method call
Logic:
- Removes positional format keys (0, 1, 2, ...)
- Only processes sequential numeric keys
- Skips f-strings and malformed format strings
"""Convert string.encode() calls to binary string literals.
def _fix_encode_to_binary(tokens: list[Token], i: int) -> None:
"""
Convert string.encode() to binary literals.
Args:
tokens: Token list to modify in-place
i: Index of 'encode' token
Supported encodings:
- ASCII, UTF-8: Full conversion
- ISO-8859-1: Latin-1 compatible conversion
- Skips non-ASCII or complex escape sequences
"""from pyupgrade._main import _fix_plugins, _fix_tokens
from pyupgrade._data import Settings
# Apply both transformation phases
source = "set([1, 2, 3])"
settings = Settings(min_version=(3, 8))
# Phase 1: Plugin transformations
transformed = _fix_plugins(source, settings)
# Result: "{1, 2, 3}"
# Phase 2: Token transformations
final = _fix_tokens(transformed)# Configure for Python 3.10+ with format preservation
settings = Settings(
min_version=(3, 10),
keep_percent_format=True,
keep_mock=True,
keep_runtime_typing=False
)
transformed = _fix_plugins(source_code, settings)Install with Tessl CLI
npx tessl i tessl/pypi-pyupgrade