Annotate AST trees with source code positions
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Functions and methods for navigating and searching through tokenized source code, finding specific tokens by position, type, or content. These methods work with the enhanced Token objects created by ASTTokens.
Methods for retrieving tokens based on character offsets or line/column positions.
class ASTTokens:
def get_token_from_offset(self, offset) -> Token:
"""
Get token at specific character offset in source.
Parameters:
- offset (int): Character offset in source text
Returns:
Token: Token at the specified offset
"""
def get_token(self, lineno, col_offset) -> Token:
"""
Get token at line and column position.
Parameters:
- lineno (int): Line number (1-based)
- col_offset (int): Column offset (0-based, unicode)
Returns:
Token: Token at the specified position
"""
def get_token_from_utf8(self, lineno, col_offset) -> Token:
"""
Get token at line and column position with UTF8 offsets.
Parameters:
- lineno (int): Line number (1-based)
- col_offset (int): Column offset (0-based, UTF8 bytes)
Returns:
Token: Token at the specified position
"""import asttokens
source = "def hello():\n print('world')"
atok = asttokens.ASTTokens(source, parse=True)
# Get token at character offset
token = atok.get_token_from_offset(0)
print(token.string) # 'def'
# Get token at line/column
token = atok.get_token(1, 4) # Line 1, column 4
print(token.string) # 'hello'
# Get token at second line
token = atok.get_token(2, 4) # Line 2, column 4
print(token.string) # 'print'Methods for moving through tokens sequentially in forward or backward direction.
class ASTTokens:
def next_token(self, tok, include_extra=False) -> Token:
"""
Get the next token after the given token.
Parameters:
- tok (Token): Current token
- include_extra (bool): Include non-coding tokens (comments, newlines)
Returns:
Token: Next token in sequence
"""
def prev_token(self, tok, include_extra=False) -> Token:
"""
Get the previous token before the given token.
Parameters:
- tok (Token): Current token
- include_extra (bool): Include non-coding tokens (comments, newlines)
Returns:
Token: Previous token in sequence
"""import asttokens
source = "x = 42 # comment"
atok = asttokens.ASTTokens(source, parse=True)
# Start with first token
token = atok.get_token_from_offset(0)
print(token.string) # 'x'
# Navigate forward
token = atok.next_token(token)
print(token.string) # '='
token = atok.next_token(token)
print(token.string) # '42'
# Include comments and extra tokens
token = atok.next_token(token, include_extra=True)
print(token.string) # '# comment'
# Navigate backward
token = atok.prev_token(token)
print(token.string) # '42'Methods for finding specific tokens by type, content, or other criteria.
class ASTTokens:
def find_token(self, start_token, tok_type, tok_str=None, reverse=False) -> Token:
"""
Find token by type and optionally by string content.
Parameters:
- start_token (Token): Token to start search from
- tok_type (int): Token type to search for (from token module)
- tok_str (str, optional): Specific token string to match
- reverse (bool): Search backwards if True
Returns:
Token: First matching token found
Raises:
ValueError: If no matching token is found
"""import asttokens
import token
source = "def func(a, b): return a + b"
atok = asttokens.ASTTokens(source, parse=True)
# Start from beginning
start = atok.get_token_from_offset(0)
# Find opening parenthesis
paren = atok.find_token(start, token.OP, '(')
print(paren.string) # '('
# Find specific operator
plus = atok.find_token(start, token.OP, '+')
print(plus.string) # '+'
# Find any name token
name = atok.find_token(start, token.NAME)
print(name.string) # 'def'
# Search backwards from end
end = atok.tokens[-1]
last_name = atok.find_token(end, token.NAME, reverse=True)
print(last_name.string) # 'b'Methods for working with ranges of tokens, useful for processing all tokens within an AST node.
class ASTTokens:
def token_range(self, first_token, last_token, include_extra=False) -> Iterator[Token]:
"""
Yield all tokens in range from first_token to last_token (inclusive).
Parameters:
- first_token (Token): Starting token
- last_token (Token): Ending token
- include_extra (bool): Include non-coding tokens
Yields:
Token: Each token in the range
"""
def get_tokens(self, node, include_extra=False) -> Iterator[Token]:
"""
Yield all tokens for an AST node.
Parameters:
- node (ast.AST): AST node to get tokens for
- include_extra (bool): Include non-coding tokens
Yields:
Token: Each token that belongs to the node
"""import asttokens
import ast
source = "result = func(x, y) # calculation"
atok = asttokens.ASTTokens(source, parse=True)
# Get all tokens for assignment statement
assign_node = atok.tree.body[0]
tokens = list(atok.get_tokens(assign_node))
print([t.string for t in tokens]) # ['result', '=', 'func', '(', 'x', ',', 'y', ')']
# Include comments and whitespace
tokens_extra = list(atok.get_tokens(assign_node, include_extra=True))
print([t.string for t in tokens_extra]) # Includes '# calculation'
# Manual range iteration
first = assign_node.first_token
last = assign_node.last_token
tokens_range = list(atok.token_range(first, last))
print([t.string for t in tokens_range]) # Same as get_tokens without extraInstall with Tessl CLI
npx tessl i tessl/pypi-asttokens