CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pathspec

Utility library for gitignore style pattern matching of file paths.

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

File system traversal, path normalization, and helper functions for working with file paths and pattern matching results. These utilities provide the foundation for pathspec's file system operations.

Imports

from pathspec import iter_tree_files, iter_tree_entries, normalize_file, RecursionError
from pathspec.util import CheckResult, TreeEntry, match_file, check_match_file
from typing import Callable, Collection, Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
import os
import pathlib

Type Definitions

StrPath = Union[str, os.PathLike[str]]  # Python 3.9+
TStrPath = TypeVar("TStrPath", bound=StrPath)

Capabilities

Directory Tree Traversal

Functions for walking directory trees and yielding file information.

def iter_tree_files(
    root: Union[str, os.PathLike], 
    on_error: Optional[Callable[[OSError], None]] = None, 
    follow_links: Optional[bool] = None
) -> Iterator[str]:
    """
    Walk directory tree yielding file paths.
    
    Parameters:
    - root: Root directory path to traverse
    - on_error: Optional callback for handling OS errors during traversal
    - follow_links: Whether to follow symbolic links (defaults to False)
    
    Yields:
    File paths relative to root directory
    
    Raises:
    RecursionError: If directory recursion is detected
    """

def iter_tree_entries(
    root: Union[str, os.PathLike], 
    on_error: Optional[Callable[[OSError], None]] = None, 
    follow_links: Optional[bool] = None
) -> Iterator[TreeEntry]:
    """
    Walk directory tree yielding TreeEntry objects with detailed file information.
    
    Parameters:
    - root: Root directory path to traverse
    - on_error: Optional callback for handling OS errors during traversal
    - follow_links: Whether to follow symbolic links (defaults to False)
    
    Yields:
    TreeEntry objects containing file system information
    
    Raises:
    RecursionError: If directory recursion is detected
    """

def iter_tree(
    root: Union[str, os.PathLike], 
    on_error: Optional[Callable[[OSError], None]] = None, 
    follow_links: Optional[bool] = None
) -> Iterator[str]:
    """
    DEPRECATED: Alias for iter_tree_files.
    
    Walk directory tree yielding file paths.
    Use iter_tree_files instead.
    """

Path Normalization

Functions for normalizing and processing file paths for cross-platform compatibility.

def normalize_file(
    file: Union[str, os.PathLike], 
    separators: Optional[Collection[str]] = None
) -> str:
    """
    Normalize file path to use POSIX separators and ensure relative paths.
    
    Parameters:
    - file: File path to normalize
    - separators: Collection of path separators to replace (defaults to os.sep)
    
    Returns:
    Normalized file path using forward slashes and relative to current directory
    """

def normalize_files(
    files: Iterable[Union[str, os.PathLike]], 
    separators: Optional[Collection[str]] = None
) -> Dict[str, List[Union[str, os.PathLike]]]:
    """
    DEPRECATED: Normalize multiple file paths.
    
    Parameters:
    - files: Iterable of file paths to normalize
    - separators: Collection of path separators to replace
    
    Returns:
    Dictionary mapping normalized paths to lists of original paths
    """

def append_dir_sep(path: pathlib.Path) -> str:
    """
    Append directory separator to path if it's a directory.
    Used to distinguish files from directories in pattern matching.
    
    Parameters:
    - path: Path object to process
    
    Returns:
    String path with trailing separator if directory, unchanged if file
    """

Pattern Matching Utilities

Helper functions for working with pattern matching operations.

def match_file(patterns: Iterable[Pattern], file: str) -> bool:
    """
    Test if file matches any pattern in a collection.
    Simple boolean matching without detailed results.
    
    Parameters:
    - patterns: Iterable of Pattern instances to test against
    - file: File path to test
    
    Returns:
    True if file matches any include pattern and no exclude patterns
    """

def match_files(
    patterns: Iterable[Pattern], 
    files: Iterable[str]
) -> Set[str]:
    """
    DEPRECATED: Match files against patterns.
    Use PathSpec.match_files instead.
    
    Parameters:
    - patterns: Iterable of Pattern instances
    - files: Iterable of file paths to test
    
    Returns:
    Set of file paths that match the patterns
    """

def check_match_file(
    patterns: Iterable[Tuple[int, Pattern]], 
    file: str
) -> Tuple[Optional[bool], Optional[int]]:
    """
    Check file against indexed patterns with detailed results.
    
    Parameters:
    - patterns: Iterable of (index, Pattern) tuples
    - file: File path to test
    
    Returns:
    Tuple of (match_result, pattern_index) where match_result is:
    - True: file matches and should be included
    - False: file matches and should be excluded  
    - None: file doesn't match any patterns
    """

def detailed_match_files(
    patterns: Iterable[Pattern], 
    files: Iterable[str], 
    all_matches: Optional[bool] = None
) -> Dict[str, MatchDetail]:
    """
    Match files against patterns with detailed information about which patterns matched.
    
    Parameters:
    - patterns: Iterable of Pattern instances
    - files: Iterable of file paths to test
    - all_matches: If True, include all pattern matches; if False, only final result
    
    Returns:
    Dictionary mapping file paths to MatchDetail objects
    """

Data Classes

Classes for containing file system and match result information.

class TreeEntry:
    """
    Contains file system entry information from directory traversal.
    
    Attributes:
    - name (str): Entry name (basename)
    - path (str): Full path relative to traversal root
    - stat (os.stat_result): File system stat information
    """
    name: str
    path: str
    stat: os.stat_result
    
    def is_dir(self, follow_links: Optional[bool] = None) -> bool:
        """
        Get whether the entry is a directory.
        
        Parameters:
        - follow_links: Whether to follow symbolic links when determining type
        
        Returns:
        True if entry is a directory
        """
    
    def is_file(self, follow_links: Optional[bool] = None) -> bool:
        """
        Get whether the entry is a regular file.
        
        Parameters:
        - follow_links: Whether to follow symbolic links when determining type
        
        Returns:
        True if entry is a regular file
        """
    
    def is_symlink(self) -> bool:
        """
        Get whether the entry is a symbolic link.
        
        Returns:
        True if entry is a symbolic link
        """

class CheckResult(Generic[TStrPath]):
    """
    Contains detailed results from pattern matching operations.
    
    Type Parameters:
    - TStrPath: Type of the file path (str or pathlib.Path)
    
    Attributes:
    - file (TStrPath): File path that was tested
    - include (Optional[bool]): Match result - True (include), False (exclude), None (no match)
    - index (Optional[int]): Index of the pattern that produced the result
    """
    file: TStrPath
    include: Optional[bool]
    index: Optional[int]

class MatchDetail:
    """
    Contains information about which patterns matched during detailed matching.
    
    Attributes contain pattern match information and results.
    """

Exception Classes

Specialized exceptions for utility operations.

class RecursionError(Exception):
    """
    Raised when directory recursion is detected during tree traversal.
    Prevents infinite loops from circular symbolic links.
    """

class AlreadyRegisteredError(Exception):
    """
    Raised when attempting to register a pattern factory name that already exists.
    """

Internal Utilities

Internal helper functions used by the pathspec system.

def _filter_check_patterns(
    patterns: Iterable[Tuple[int, Pattern]]
) -> List[Tuple[int, Pattern]]:
    """
    Internal: Filter and prepare patterns for checking operations.
    """

def _is_iterable(value: Any) -> bool:
    """
    Internal: Test if a value is iterable but not a string.
    """

def _iter_tree_entries_next(
    dir_entry: os.DirEntry, 
    root_full: str, 
    memo: Set[int]
) -> Iterator[TreeEntry]:
    """
    Internal: Process directory entries during tree traversal.
    """

Usage Examples

Directory Tree Traversal

import pathspec

# Basic file iteration
for file_path in pathspec.iter_tree_files("/path/to/project"):
    print(f"Found file: {file_path}")

# Detailed file information
for entry in pathspec.iter_tree_entries("/path/to/project"):
    print(f"Entry: {entry.name}")
    print(f"  Path: {entry.path}")
    print(f"  Type: {'dir' if entry.is_dir else 'file'}")
    print(f"  Size: {entry.stat.st_size}")
    print(f"  Modified: {entry.stat.st_mtime}")

# Handle errors during traversal
def handle_error(error):
    print(f"Warning: Cannot access {error.filename}: {error}")

for file_path in pathspec.iter_tree_files(
    "/path/to/project", 
    on_error=handle_error
):
    print(f"Accessible file: {file_path}")

# Follow symbolic links
for file_path in pathspec.iter_tree_files(
    "/path/to/project", 
    follow_links=True
):
    print(f"File (including symlinks): {file_path}")

Path Normalization

import pathspec

# Normalize paths for cross-platform compatibility
windows_path = "src\\utils\\helper.py"
normalized = pathspec.normalize_file(windows_path)
print(normalized)  # "src/utils/helper.py"

# Normalize with custom separators
weird_path = "src|utils|helper.py"
normalized = pathspec.normalize_file(weird_path, separators=["|"])
print(normalized)  # "src/utils/helper.py"

# Directory path handling
import pathlib
dir_path = pathlib.Path("/project/src")
dir_with_sep = pathspec.append_dir_sep(dir_path)
print(dir_with_sep)  # "/project/src/" (if directory)

Direct Pattern Matching

import pathspec
from pathspec.patterns import GitWildMatchPattern

# Create patterns
patterns = [
    GitWildMatchPattern("*.py"),
    GitWildMatchPattern("!test_*.py"),
]

# Simple boolean matching
files = ["main.py", "test_main.py", "utils.py"]
for file in files:
    matches = pathspec.match_file(patterns, file)
    print(f"{file}: {'matches' if matches else 'no match'}")

# Detailed matching with pattern information
indexed_patterns = list(enumerate(patterns))
for file in files:
    result, index = pathspec.check_match_file(indexed_patterns, file)
    if result is True:
        print(f"{file}: included by pattern {index}")
    elif result is False:
        print(f"{file}: excluded by pattern {index}")
    else:
        print(f"{file}: no match")

Working with CheckResult

import pathspec

# Create PathSpec and check files
spec = pathspec.PathSpec.from_lines('gitwildmatch', [
    "*.py",      # Pattern 0
    "!test_*.py" # Pattern 1
])

files = ["main.py", "test_main.py", "utils.py"]

# Process detailed results
for result in spec.check_files(files):
    print(f"File: {result.file}")
    
    if result.include is True:
        print(f"  Status: INCLUDED (pattern {result.index})")
    elif result.include is False:
        print(f"  Status: EXCLUDED (pattern {result.index})")
    else:
        print(f"  Status: NO MATCH")

Working with TreeEntry

import pathspec

# Filter tree entries by type
for entry in pathspec.iter_tree_entries("/project"):
    if entry.is_dir():
        print(f"Directory: {entry.path}")
    elif entry.is_file():
        print(f"File: {entry.path} ({entry.stat.st_size} bytes)")
    elif entry.is_symlink():
        print(f"Symlink: {entry.path}")

# Use TreeEntry with PathSpec
spec = pathspec.PathSpec.from_lines('gitwildmatch', ["*.py"])

matching_entries = list(spec.match_entries(
    pathspec.iter_tree_entries("/project")
))

for entry in matching_entries:
    print(f"Matching file: {entry.path}")

Error Handling

import pathspec

try:
    # This might raise RecursionError if there are circular symlinks
    files = list(pathspec.iter_tree_files("/project", follow_links=True))
except pathspec.RecursionError as e:
    print(f"Circular symlink detected: {e}")

# Custom error handling
def log_and_continue(error):
    """Log errors but continue traversal."""
    print(f"Error accessing {error.filename}: {error}")

# Traverse with error handling
safe_files = list(pathspec.iter_tree_files(
    "/project",
    on_error=log_and_continue,
    follow_links=True
))

Advanced TreeEntry Usage

import pathspec
import os
import time

# Analyze project structure
total_size = 0
file_types = {}
recent_files = []
cutoff_time = time.time() - (7 * 24 * 60 * 60)  # 7 days ago

for entry in pathspec.iter_tree_entries("/project"):
    if entry.is_file():
        # Track size
        total_size += entry.stat.st_size
        
        # Track file types
        ext = os.path.splitext(entry.name)[1].lower()
        file_types[ext] = file_types.get(ext, 0) + 1
        
        # Track recent files
        if entry.stat.st_mtime > cutoff_time:
            recent_files.append(entry.path)

print(f"Total size: {total_size} bytes")
print(f"File types: {file_types}")
print(f"Recent files: {len(recent_files)}")

Install with Tessl CLI

npx tessl i tessl/pypi-pathspec

docs

index.md

path-matching.md

pattern-system.md

utilities.md

tile.json