CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-asttokens

Annotate AST trees with source code positions

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

position-utilities.mddocs/

Position Utilities

Utilities for converting between different position representations (line/column vs character offsets) and working with source code positions. These utilities handle the complexities of Unicode text and provide compatibility across different position systems.

Capabilities

LineNumbers Class

Utility class for converting between character offsets and (line, column) positions in source text.

class LineNumbers:
    def __init__(self, text):
        """
        Initialize with source text for position calculations.
        
        Parameters:
        - text (str): Source text to analyze
        """
        
    def line_to_offset(self, line, column) -> int:
        """
        Convert line and column position to character offset.
        
        Parameters:
        - line (int): Line number (1-based)
        - column (int): Column position (0-based)
        
        Returns:
        int: Character offset in source text
        """
        
    def offset_to_line(self, offset) -> Tuple[int, int]:
        """
        Convert character offset to line and column position.
        
        Parameters:
        - offset (int): Character offset in source text
        
        Returns:
        Tuple[int, int]: (line, column) where line is 1-based, column is 0-based
        """
        
    def from_utf8_col(self, line, utf8_column) -> int:
        """
        Convert UTF8 byte column to Unicode character column.
        
        Parameters:
        - line (int): Line number (1-based)
        - utf8_column (int): Column position in UTF8 bytes
        
        Returns:
        int: Column position in Unicode characters
        """

Usage Example

import asttokens

source = "hello = 'world'\nprint(hello)"
line_numbers = asttokens.LineNumbers(source)

# Convert position to offset
offset = line_numbers.line_to_offset(1, 8)  # Line 1, column 8
print(source[offset])  # '=' (character at that position)

# Convert offset to position  
line, col = line_numbers.offset_to_line(16)  # Character 16
print(f"Line {line}, Column {col}")  # Line 2, Column 0

# Handle UTF8 encoding differences
source_utf8 = "café = 'délicious'"
line_numbers_utf8 = asttokens.LineNumbers(source_utf8)
unicode_col = line_numbers_utf8.from_utf8_col(1, 5)  # UTF8 byte 5
print(unicode_col)  # Unicode character position

Tokenless Support Detection

Function to determine if nodes or Python versions support faster tokenless operations.

def supports_tokenless(node=None) -> bool:
    """
    Check if node or Python version supports tokenless operation.
    
    Parameters:
    - node (ast.AST, optional): Specific AST node to check
    
    Returns:
    bool: True if tokenless operation is supported
    """

Usage Example

import asttokens
import ast

source = "x = [1, 2, 3]"
tree = ast.parse(source)

# Check general tokenless support
if asttokens.supports_tokenless():
    print("Python version supports tokenless operations")
    
# Check specific node support
assign_node = tree.body[0]
if asttokens.supports_tokenless(assign_node):
    print("This node supports tokenless operations")
    # Use ASTText for better performance
    astext = asttokens.ASTText(source, tree=tree)
    text = astext.get_text(assign_node)
else:
    print("Node requires full tokenization")
    # Use ASTTokens
    atok = asttokens.ASTTokens(source, tree=tree)
    text = atok.get_text(assign_node)

Token Utility Functions

Helper functions for working with tokens are available through the asttokens.util module. These provide token matching, type checking, and generation capabilities.

# Available through asttokens.util module
from asttokens.util import (
    token_repr, match_token, expect_token, is_non_coding_token,
    generate_tokens, patched_generate_tokens
)

def token_repr(tok_type, string) -> str:
    """
    Create human-readable representation of token.
    
    Parameters:
    - tok_type (int): Token type from token module
    - string (str): Token string content
    
    Returns:
    str: Human-friendly token representation
    """

def match_token(token, tok_type, tok_str=None) -> bool:
    """
    Check if token matches specified type and optionally string.
    
    Parameters:
    - token (Token): Token to check
    - tok_type (int): Expected token type
    - tok_str (str, optional): Expected token string
    
    Returns:
    bool: True if token matches criteria
    """

def expect_token(token, tok_type, tok_str=None):
    """
    Validate that token matches expected type/string, raise if not.
    
    Parameters:
    - token (Token): Token to validate
    - tok_type (int): Expected token type
    - tok_str (str, optional): Expected token string
    
    Raises:
    ValueError: If token doesn't match expectations
    """

def is_non_coding_token(token_type) -> bool:
    """
    Check if token type represents non-coding content.
    
    Parameters:
    - token_type (int): Token type to check
    
    Returns:
    bool: True for comments, newlines, encoding declarations
    """

def generate_tokens(text) -> Iterator[Token]:
    """
    Generate enhanced Token objects from source text.
    
    Parameters:
    - text (str): Source code to tokenize
    
    Yields:
    Token: Enhanced token with position information
    """

def patched_generate_tokens(original_tokens) -> Iterator[Token]:
    """
    Fixed tokenizer that handles non-ASCII identifiers correctly.
    
    Parameters:
    - original_tokens (Iterator): Original token stream
    
    Yields:
    Token: Corrected tokens with proper handling
    """

Usage Example

import asttokens
import asttokens.util
import token

source = "name = 'value'  # comment"
atok = asttokens.ASTTokens(source, parse=True)

# Get first token
first_token = atok.tokens[0]

# Check token matching
if asttokens.util.match_token(first_token, token.NAME, 'name'):
    print("Found 'name' token")

# Create readable representation
repr_str = asttokens.util.token_repr(first_token.type, first_token.string)
print(repr_str)  # "NAME:'name'"

# Check for non-coding tokens
for tok in atok.tokens:
    if asttokens.util.is_non_coding_token(tok.type):
        print(f"Non-coding token: {tok.string}")

# Generate tokens manually
tokens = list(asttokens.util.generate_tokens("x = 1"))
print([f"{t.type}:{t.string}" for t in tokens])

Enhanced Token Class

The Token class provides rich position information for each token.

class Token:
    """
    Enhanced token representation with comprehensive position information.
    
    Attributes:
    - type (int): Token type from token module
    - string (str): Token text content  
    - start (Tuple[int, int]): Starting (row, column) position
    - end (Tuple[int, int]): Ending (row, column) position
    - line (str): Complete line text containing this token
    - index (int): Token index in token list
    - startpos (int): Starting character offset
    - endpos (int): Ending character offset
    """
    
    def __str__(self) -> str:
        """
        Human-readable token representation.
        
        Returns:
        str: String representation of token
        """

Usage Example

import asttokens

source = "def func():\n    pass"
atok = asttokens.ASTTokens(source, parse=True)

# Examine token details
def_token = atok.tokens[0]
print(f"Type: {def_token.type}")        # Token type number
print(f"String: {def_token.string}")    # 'def'
print(f"Start: {def_token.start}")      # (1, 0) - line 1, column 0
print(f"End: {def_token.end}")          # (1, 3) - line 1, column 3
print(f"Line: {def_token.line}")        # 'def func():'
print(f"Index: {def_token.index}")      # 0 - first token
print(f"Start pos: {def_token.startpos}")  # 0 - character offset 0
print(f"End pos: {def_token.endpos}")      # 3 - character offset 3
print(f"Repr: {def_token}")             # Human-readable representation

# Use position information
text_slice = source[def_token.startpos:def_token.endpos]
print(text_slice)  # 'def' - exact token text

Install with Tessl CLI

npx tessl i tessl/pypi-asttokens

docs

ast-processing.md

ast-utilities.md

index.md

position-utilities.md

token-navigation.md

tile.json