A Python parser that supports error recovery and round-trip parsing for different Python versions
—
The syntax tree navigation system provides comprehensive methods for traversing, analyzing, and manipulating parsed Python code. All nodes and leaves inherit from common base classes that provide consistent navigation and introspection APIs.
Fundamental classes that form the foundation of parso's syntax tree structure.
class NodeOrLeaf:
"""
Base class for all tree nodes and leaves.
Attributes:
parent (BaseNode | None): Parent node, None for root
type (str): Node type string matching grammar rules
"""
def get_root_node(self):
"""
Get the root node of the syntax tree.
Returns:
NodeOrLeaf: Root node (typically Module)
"""
def get_next_sibling(self):
"""
Get the next sibling node in parent's children.
Returns:
NodeOrLeaf | None: Next sibling or None if last child
"""
def get_previous_sibling(self):
"""
Get the previous sibling node in parent's children.
Returns:
NodeOrLeaf | None: Previous sibling or None if first child
"""
def get_next_leaf(self):
"""
Get the next leaf node in tree traversal order.
Returns:
Leaf | None: Next leaf or None if this is the last leaf
"""
def get_previous_leaf(self):
"""
Get the previous leaf node in tree traversal order.
Returns:
Leaf | None: Previous leaf or None if this is the first leaf
"""
def get_first_leaf(self):
"""
Get the first leaf node in this subtree.
Returns:
Leaf: First leaf node
"""
def get_last_leaf(self):
"""
Get the last leaf node in this subtree.
Returns:
Leaf: Last leaf node
"""
def search_ancestor(self, *node_types):
"""
Search for an ancestor node of specified types.
Args:
*node_types (str): Node type names to search for
Returns:
BaseNode | None: First matching ancestor or None
"""
def get_code(self, include_prefix=True):
"""
Get the source code for this node/leaf.
Args:
include_prefix (bool): Include whitespace and comments (default: True)
Returns:
str: Source code representation
"""
def dump(self, *, indent=4):
"""
Get formatted tree dump for debugging.
Args:
indent (int | str | None): Indentation style (default: 4 spaces)
Returns:
str: Formatted tree representation
"""
@property
def start_pos(self):
"""
Starting position of this node/leaf.
Returns:
tuple[int, int]: (line, column) position (1-indexed)
"""
@property
def end_pos(self):
"""
Ending position of this node/leaf.
Returns:
tuple[int, int]: (line, column) position (1-indexed)
"""class BaseNode(NodeOrLeaf):
"""
Base class for nodes with children.
Attributes:
children (list[NodeOrLeaf]): Child nodes and leaves
"""
def get_leaf_for_position(self, position, include_prefixes=False):
"""
Find the leaf at a specific position.
Args:
position (tuple[int, int]): (line, column) position
include_prefixes (bool): Whether to match positions in prefixes
Returns:
Leaf | None: Leaf at position or None
Raises:
ValueError: If position is outside this node's range
"""class Leaf(NodeOrLeaf):
"""
Base class for leaf nodes (tokens).
Attributes:
value (str): Token value/text
prefix (str): Preceding whitespace and comments
"""
def get_start_pos_of_prefix(self):
"""
Get the starting position of this leaf's prefix.
Returns:
tuple[int, int]: (line, column) where prefix starts
"""import parso
# Parse some code
module = parso.parse('''
def example():
# Comment here
x = 42
return x
''')
# Navigate the tree structure
func_def = module.children[0] # Function definition
print(f"Function type: {func_def.type}") # 'funcdef'
# Get position information
print(f"Function starts at: {func_def.start_pos}")
print(f"Function ends at: {func_def.end_pos}")
# Navigate to parent and siblings
suite = func_def.get_suite() # Function body
first_stmt = suite.children[1] # First statement (x = 42)
print(f"Next sibling: {first_stmt.get_next_sibling()}")
# Find leaves (tokens)
first_leaf = func_def.get_first_leaf()
print(f"First token: '{first_leaf.value}' at {first_leaf.start_pos}")
last_leaf = func_def.get_last_leaf()
print(f"Last token: '{last_leaf.value}' at {last_leaf.start_pos}")Common patterns for traversing and analyzing syntax trees.
import parso
module = parso.parse('''
import os
import sys
from pathlib import Path
def func1():
pass
def func2():
pass
''')
# Navigate through siblings
current = module.children[0] # First import
while current:
print(f"Statement type: {current.type}")
current = current.get_next_sibling()
# Reverse navigation
current = module.children[-1] # Last statement
while current:
print(f"Statement type: {current.type}")
current = current.get_previous_sibling()import parso
module = parso.parse('x = 1 + 2')
# Walk through all tokens
leaf = module.get_first_leaf()
tokens = []
while leaf:
if leaf.value.strip(): # Skip empty tokens
tokens.append((leaf.value, leaf.start_pos))
leaf = leaf.get_next_leaf()
print("Tokens:", tokens)
# Output: [('x', (1, 0)), ('=', (1, 2)), ('1', (1, 4)), ('+', (1, 6)), ('2', (1, 8))]import parso
module = parso.parse('''
class MyClass:
def method(self):
if True:
x = 42
''')
# Find the assignment statement
assignment = None
for node in module.get_used_names()['x']:
if node.is_definition():
assignment = node.parent
break
# Search for containing structures
method = assignment.search_ancestor('funcdef')
class_def = assignment.search_ancestor('classdef')
if_stmt = assignment.search_ancestor('if_stmt')
print(f"Assignment is in method: {method.name.value}")
print(f"Assignment is in class: {class_def.name.value}")
print(f"Assignment is in if statement: {if_stmt is not None}")Working with position information and regenerating source code.
def get_start_pos_of_prefix(self):
"""Get starting position of prefix (whitespace/comments before token)."""
def get_code(self, include_prefix=True):
"""Get source code, optionally including prefix."""
@property
def start_pos(self):
"""Starting (line, column) position."""
@property
def end_pos(self):
"""Ending (line, column) position."""import parso
module = parso.parse('''
def func(): # Function comment
x = 42 # Variable comment
return x
''')
# Position information
func_def = module.children[0]
print(f"Function definition: {func_def.start_pos} to {func_def.end_pos}")
# Get code with and without prefixes
assignment = func_def.get_suite().children[1] # x = 42
print(f"With prefix: {repr(assignment.get_code(include_prefix=True))}")
print(f"Without prefix: {repr(assignment.get_code(include_prefix=False))}")
# Prefix handling for individual tokens
x_name = assignment.children[0] # The 'x' token
print(f"Token value: '{x_name.value}'")
print(f"Token prefix: '{x_name.prefix}'")
print(f"Prefix starts at: {x_name.get_start_pos_of_prefix()}")Finding nodes and tokens at specific positions in the source code.
def get_leaf_for_position(self, position, include_prefixes=False):
"""Find leaf at specific position."""import parso
code = '''def example():
result = calculate(1, 2, 3)
return result'''
module = parso.parse(code)
# Find token at specific positions
leaf_at_def = module.get_leaf_for_position((1, 0)) # 'def'
leaf_at_name = module.get_leaf_for_position((1, 4)) # 'example'
leaf_at_calc = module.get_leaf_for_position((2, 13)) # 'calculate'
print(f"At (1,0): '{leaf_at_def.value}'")
print(f"At (1,4): '{leaf_at_name.value}'")
print(f"At (2,13): '{leaf_at_calc.value}'")
# Handle positions in whitespace/comments
code_with_comment = '''def func(): # Comment
pass'''
module = parso.parse(code_with_comment)
# Position in comment (without include_prefixes)
leaf_in_comment = module.get_leaf_for_position((1, 15), include_prefixes=False)
print(f"In comment (no prefix): {leaf_in_comment}") # None
# Position in comment (with include_prefixes)
leaf_in_comment = module.get_leaf_for_position((1, 15), include_prefixes=True)
print(f"In comment (with prefix): {leaf_in_comment.value}") # 'pass'Tools for debugging and understanding the tree structure.
def dump(self, *, indent=4):
"""Format tree structure for debugging."""import parso
module = parso.parse('lambda x: x + 1')
# Pretty-printed tree dump
print(module.dump())
# Compact dump (single line)
print(module.dump(indent=None))
# Custom indentation
print(module.dump(indent='\t')) # Tab indentation
print(module.dump(indent=2)) # 2-space indentation
# Dump specific subtrees
lambda_node = module.children[0]
print("Lambda subtree:")
print(lambda_node.dump())Working with error nodes when parsing invalid code.
import parso
# Parse invalid code
module = parso.parse('def broken(: pass') # Missing parameter
# Find error nodes
def find_error_nodes(node):
"""Recursively find all error nodes in tree."""
errors = []
if hasattr(node, 'type') and 'error' in node.type:
errors.append(node)
if hasattr(node, 'children'):
for child in node.children:
errors.extend(find_error_nodes(child))
return errors
error_nodes = find_error_nodes(module)
for error_node in error_nodes:
print(f"Error node: {error_node.type} at {error_node.start_pos}")
print(f"Error content: {repr(error_node.get_code())}")Complex navigation patterns for sophisticated code analysis.
import parso
def find_all_nodes(root, node_type):
"""Find all nodes of a specific type in the tree."""
results = []
def walk(node):
if hasattr(node, 'type') and node.type == node_type:
results.append(node)
if hasattr(node, 'children'):
for child in node.children:
walk(child)
walk(root)
return results
module = parso.parse('''
def func1():
pass
class MyClass:
def method(self):
pass
def func2():
pass
''')
# Find all function definitions
functions = find_all_nodes(module, 'funcdef')
for func in functions:
print(f"Function: {func.name.value}")
# Find all class definitions
classes = find_all_nodes(module, 'classdef')
for cls in classes:
print(f"Class: {cls.name.value}")import parso
def get_containing_scope(node):
"""Get the containing function, class, or module for a node."""
scope_types = ('funcdef', 'classdef', 'file_input')
return node.search_ancestor(*scope_types)
def get_statement_context(node):
"""Get contextual information about where a node appears."""
# Find the statement containing this node
stmt = node
while stmt.parent and stmt.parent.type not in ('suite', 'file_input'):
stmt = stmt.parent
# Find containing scope
scope = get_containing_scope(node)
return {
'statement': stmt,
'scope': scope,
'scope_type': scope.type if scope else None,
'scope_name': getattr(scope, 'name', None)
}
module = parso.parse('''
class Example:
def method(self):
x = 42
return x
''')
# Analyze context for the variable 'x'
for name_node in module.get_used_names()['x']:
context = get_statement_context(name_node)
print(f"Variable 'x' context:")
print(f" Statement type: {context['statement'].type}")
print(f" Scope type: {context['scope_type']}")
if context['scope_name']:
print(f" Scope name: {context['scope_name'].value}")Install with Tessl CLI
npx tessl i tessl/pypi-parso