Utility library for gitignore style pattern matching of file paths.
—
Extensible pattern implementation system with built-in Git wildmatch support and custom pattern registration capabilities. The pattern system allows for pluggable pattern implementations and provides the foundation for pathspec's flexibility.
from pathspec import lookup_pattern, register_pattern
from pathspec.pattern import Pattern, RegexPattern
from pathspec.patterns import GitWildMatchPattern, GitIgnorePattern
from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
from typing import Any, AnyStr, Callable, Iterable, List, Match, Optional, Tuple, Union
import reAbstract base class for all pattern implementations. Defines the interface that all patterns must implement.
class Pattern:
def __init__(self, include: Optional[bool]) -> None:
"""
Initialize pattern with include/exclude flag.
Parameters:
- include: True for include patterns, False for exclude patterns, None for auto-detection
"""
def match_file(self, file: str) -> Optional[Any]:
"""
Abstract method to match pattern against a single file.
Must be implemented by subclasses.
Parameters:
- file: File path to test against pattern
Returns:
Match result (implementation-specific) or None if no match
"""
def match(self, files: Iterable[str]) -> Iterator[str]:
"""
DEPRECATED: Match pattern against multiple files.
Parameters:
- files: Iterable of file paths to test
Yields:
File paths that match the pattern
"""Concrete pattern implementation using regular expressions. Serves as the base for most pattern types.
class RegexPattern(Pattern):
def __init__(
self,
pattern: Union[str, bytes, Pattern, None],
include: Optional[bool] = None
) -> None:
"""
Initialize regex pattern from string or compiled regex.
Parameters:
- pattern: Pattern string, compiled regex, or None
- include: Include/exclude flag, auto-detected if None
"""
def __eq__(self, other: RegexPattern) -> bool:
"""
Test equality by comparing include flag and regex pattern.
"""
def match_file(self, file: str) -> Optional[RegexMatchResult]:
"""
Match file against regex pattern.
Parameters:
- file: File path to test
Returns:
RegexMatchResult if match found, None otherwise
"""
@classmethod
def pattern_to_regex(cls, pattern: str) -> Tuple[str, bool]:
"""
Convert pattern string to regex and include flag.
Base implementation for simple regex conversion.
Parameters:
- pattern: Pattern string to convert
Returns:
Tuple of (regex_string, include_flag)
"""Git-style wildcard pattern implementation that converts Git wildmatch patterns to regular expressions.
class GitWildMatchPattern(RegexPattern):
@classmethod
def pattern_to_regex(cls, pattern: Union[str, bytes]) -> Tuple[Optional[Union[str, bytes]], Optional[bool]]:
"""
Convert Git wildmatch pattern to regex.
Handles Git-specific wildcards, character classes, and negation.
Parameters:
- pattern: Git wildmatch pattern string
Returns:
Tuple of (regex_string, include_flag) or (None, None) for invalid patterns
Raises:
GitWildMatchPatternError: For invalid pattern syntax
"""
@staticmethod
def escape(s: Union[str, bytes]) -> Union[str, bytes]:
"""
Escape special characters in strings for use in Git patterns.
Parameters:
- s: String to escape
Returns:
Escaped string safe for use in patterns
"""Backward compatibility alias for GitWildMatchPattern.
class GitIgnorePattern(GitWildMatchPattern):
"""
DEPRECATED: Use GitWildMatchPattern instead.
Maintained for backward compatibility.
This class is identical to GitWildMatchPattern and exists only
for backward compatibility with older code.
"""Functions for registering and looking up pattern implementations by name.
def register_pattern(
name: str,
pattern_factory: Callable[[Union[str, bytes]], Pattern],
override: Optional[bool] = None
) -> None:
"""
Register a pattern factory under a specified name.
Parameters:
- name: Name to register the factory under
- pattern_factory: Callable that creates Pattern instances from strings
- override: Allow overriding existing registrations if True
Raises:
AlreadyRegisteredError: If name already registered and override is False
"""
def lookup_pattern(name: str) -> Callable[[Union[str, bytes]], Pattern]:
"""
Look up a registered pattern factory by name.
Parameters:
- name: Name of the pattern factory to look up
Returns:
Pattern factory callable
Raises:
KeyError: If pattern name is not registered
"""Data classes for containing pattern match information.
class RegexMatchResult:
"""
Contains information about a regex pattern match.
Attributes:
- match: The regex match object from re.match()
"""
match: Match[str]Exception classes for pattern-related errors.
class GitWildMatchPatternError(ValueError):
"""
Raised when a Git wildmatch pattern is invalid or cannot be parsed.
"""
class AlreadyRegisteredError(Exception):
"""
Raised when attempting to register a pattern factory name that already exists.
"""PathSpec includes several pre-registered pattern factories:
'gitwildmatch': GitWildMatchPattern factory for Git wildcard patterns'gitignore': Alias for GitWildMatchPattern (deprecated, use 'gitwildmatch')import pathspec
# Use the built-in gitwildmatch factory
spec = pathspec.PathSpec.from_lines('gitwildmatch', [
"*.py",
"!test_*.py",
"src/**/*.js"
])
# Equivalent to using the class directly
from pathspec.patterns import GitWildMatchPattern
patterns = [GitWildMatchPattern(line) for line in pattern_lines]
spec = pathspec.PathSpec(patterns)import pathspec
import re
from pathspec.pattern import RegexPattern
class SimpleGlobPattern(RegexPattern):
"""Simple glob pattern supporting * and ? wildcards only."""
@classmethod
def pattern_to_regex(cls, pattern):
# Convert simple glob to regex
regex = pattern.replace('*', '.*').replace('?', '.')
regex = f'^{regex}$'
include = not pattern.startswith('!')
if not include:
regex = regex[1:] # Remove ! prefix
return regex, include
# Register the custom pattern
pathspec.register_pattern('simpleglob', SimpleGlobPattern)
# Use the custom pattern
spec = pathspec.PathSpec.from_lines('simpleglob', [
"*.txt",
"!temp.*"
])import pathspec
def create_case_insensitive_factory(base_factory):
"""Create a case-insensitive version of any pattern factory."""
def case_insensitive_factory(pattern):
# Create base pattern
base_pattern = base_factory(pattern.lower())
# Override match_file to lowercase the input
original_match = base_pattern.match_file
def case_insensitive_match(file):
return original_match(file.lower())
base_pattern.match_file = case_insensitive_match
return base_pattern
return case_insensitive_factory
# Create and register case-insensitive version
case_insensitive_git = create_case_insensitive_factory(
pathspec.lookup_pattern('gitwildmatch')
)
pathspec.register_pattern('gitwildmatch_ci', case_insensitive_git)
# Use case-insensitive matching
spec = pathspec.PathSpec.from_lines('gitwildmatch_ci', [
"*.PY", # Will match .py, .Py, .PY, etc.
"SRC/"
])import pathspec
from pathspec.patterns import GitWildMatchPattern
# Examine how patterns are converted
pattern_str = "src/**/*.py"
regex, include = GitWildMatchPattern.pattern_to_regex(pattern_str)
print(f"Pattern: {pattern_str}")
print(f"Regex: {regex}")
print(f"Include: {include}")
# Test individual pattern
pattern = GitWildMatchPattern(pattern_str)
result = pattern.match_file("src/utils/helper.py")
if result:
print(f"Match: {result.match.group()}")import pathspec
try:
# Invalid pattern syntax
pattern = pathspec.patterns.GitWildMatchPattern("[invalid")
except pathspec.patterns.GitWildMatchPatternError as e:
print(f"Invalid pattern: {e}")
try:
# Attempt to register existing name
pathspec.register_pattern('gitwildmatch', lambda x: None)
except pathspec.AlreadyRegisteredError as e:
print(f"Pattern already registered: {e}")
try:
# Look up non-existent pattern
factory = pathspec.lookup_pattern('nonexistent')
except KeyError as e:
print(f"Pattern not found: {e}")import pathspec
# Combine different pattern types
def create_multi_pattern_spec(pattern_groups):
"""Create PathSpec from multiple pattern types."""
all_patterns = []
for factory_name, patterns in pattern_groups.items():
factory = pathspec.lookup_pattern(factory_name)
for pattern_str in patterns:
all_patterns.append(factory(pattern_str))
return pathspec.PathSpec(all_patterns)
# Use multiple pattern types together
spec = create_multi_pattern_spec({
'gitwildmatch': ["*.py", "src/"],
'simpleglob': ["*.txt"],
})The GitWildMatchPattern supports full Git wildmatch syntax:
*: Matches any number of characters except path separators**: Matches any number of characters including path separators?: Matches exactly one character except path separators[abc]: Matches any character in the set[a-z]: Matches any character in the range[!abc]: Matches any character not in the set\: Escapes the next character!pattern: Negation (exclude) pattern/: Directory separator (normalized across platforms)Patterns ending with / match directories only. Patterns starting with / are anchored to the root.
Install with Tessl CLI
npx tessl i tessl/pypi-pathspec