CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-regex

Alternative regular expression module providing enhanced pattern matching, fuzzy matching, and advanced Unicode support as a replacement for Python's re module.

Pending
Overview
Eval results
Files

compilation-utilities.mddocs/

Pattern Compilation and Utilities

Pattern compilation, caching control, template support, and string escaping utilities for preparing and managing regular expression patterns. These functions provide essential tools for optimizing pattern usage and preparing literal strings for pattern inclusion.

Capabilities

Pattern Compilation

Compile a regular expression pattern into a Pattern object for efficient reuse, with enhanced compilation options and caching control.

def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs):
    """
    Compile a regular expression pattern, returning a Pattern object.
    
    Args:
        pattern (str): Regular expression pattern to compile
        flags (int, optional): Regex flags to modify pattern behavior
        ignore_unused (bool, optional): Ignore unused keyword arguments
        cache_pattern (bool, optional): Override default caching behavior
        **kwargs: Additional compilation arguments (version, etc.)
    
    Returns:
        Pattern: Compiled pattern object with matching methods
    """

Usage Examples:

import regex

# Basic pattern compilation
pattern = regex.compile(r'\b\w+@\w+\.\w+\b')
emails = pattern.findall('Contact: user@example.com or admin@site.org')
print(emails)  # ['user@example.com', 'admin@site.org']

# Compile with flags
pattern = regex.compile(r'hello\s+world', regex.IGNORECASE | regex.VERBOSE)
result = pattern.search('HELLO   WORLD')
print(result.group())  # 'HELLO   WORLD'

# Reuse compiled pattern for efficiency
email_pattern = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
for line in file_lines:
    if email_pattern.search(line):
        process_line_with_email(line)

# Compile with version specification
v1_pattern = regex.compile(r'(?V1)pattern', regex.IGNORECASE)  # Enhanced mode
v0_pattern = regex.compile(r'(?V0)pattern', regex.IGNORECASE)  # Legacy mode

# Fuzzy pattern compilation
fuzzy_pattern = regex.compile(r'(?e)(search){e<=2}', regex.BESTMATCH)
result = fuzzy_pattern.search('serch text searching')
print(result.group())  # Best fuzzy match

# Control pattern caching
pattern = regex.compile(r'\d+', cache_pattern=False)  # Don't cache this pattern

Template Pattern Compilation

Compile a regular expression template for use with substitution operations, providing a specialized pattern type for replacement templates.

def template(pattern, flags=0):
    """
    Compile a template pattern, returning a Pattern object.
    
    Args:
        pattern (str): Template pattern to compile
        flags (int, optional): Regex flags to modify template behavior
    
    Returns:
        Pattern: Compiled template pattern object
    """

Usage Examples:

import regex

# Basic template compilation
template_pattern = regex.template(r'\1-\2-\3')
result = regex.sub(r'(\d{4})(\d{2})(\d{2})', template_pattern, '20231225')
print(result)  # '2023-12-25'

# Named group template
template_pattern = regex.template(r'\g<last>, \g<first>')
pattern = r'(?P<first>\w+) (?P<last>\w+)'
result = regex.sub(pattern, template_pattern, 'John Doe')
print(result)  # 'Doe, John'

# Template with flags
template_pattern = regex.template(r'\1:\2', regex.IGNORECASE)

String Escaping

Escape special regex characters in a string to use it as a literal pattern, with options for controlling which characters are escaped.

def escape(pattern, special_only=True, literal_spaces=False):
    """
    Escape a string for use as a literal in a pattern.
    
    Args:
        pattern (str): String to escape for literal use
        special_only (bool, optional): Escape only special regex characters
        literal_spaces (bool, optional): Treat spaces as literal (don't escape)
    
    Returns:
        str: Escaped string safe for use in regex patterns
    """

Usage Examples:

import regex

# Basic string escaping
literal_text = "Price: $19.99 (special!)"
escaped = regex.escape(literal_text)
print(escaped)  # 'Price:\\ \\$19\\.99\\ \\(special!\\)'

# Use escaped string in pattern
pattern = r'Item: ' + regex.escape("$19.99 (sale)")
result = regex.search(pattern, 'Item: $19.99 (sale) - Buy now!')
print(result.group())  # 'Item: $19.99 (sale)'

# Escape only special characters
text = "hello.world*test"
escaped = regex.escape(text, special_only=True)
print(escaped)  # 'hello\\.world\\*test'

# Control space escaping
text = "hello world test"
escaped_with_spaces = regex.escape(text, literal_spaces=False)
escaped_literal_spaces = regex.escape(text, literal_spaces=True)
print(escaped_with_spaces)    # 'hello\\ world\\ test'
print(escaped_literal_spaces) # 'hello world test'

# Build patterns with literals and regex parts
user_input = "user@domain.com"
pattern = r'\b' + regex.escape(user_input) + r'\b'
result = regex.search(pattern, 'Email: user@domain.com is valid')
print(result.group())  # 'user@domain.com'

Pattern Cache Management

Control the internal pattern cache to optimize memory usage and performance for applications with many patterns.

def purge():
    """Clear the regular expression cache."""

def cache_all(value=True):
    """
    Set/get whether to cache all patterns, even those compiled explicitly.
    
    Args:
        value (bool or None): True to enable caching all, False to disable,
                             None to return current setting
    
    Returns:
        bool or None: Current caching setting when value is None
    """

Usage Examples:

import regex

# Clear the pattern cache
regex.purge()

# Check current cache setting
current_setting = regex.cache_all(None)
print(f"Current cache setting: {current_setting}")

# Enable caching of all patterns
regex.cache_all(True)

# Disable caching of explicitly compiled patterns
regex.cache_all(False)

# Typical cache management workflow
def process_many_patterns(patterns, text):
    # Clear cache before processing many patterns
    regex.purge()
    
    # Disable caching to prevent memory buildup
    old_setting = regex.cache_all(None)
    regex.cache_all(False)
    
    try:
        results = []
        for pattern in patterns:
            compiled = regex.compile(pattern)
            results.append(compiled.findall(text))
        return results
    finally:
        # Restore original cache setting
        regex.cache_all(old_setting)

# Monitor cache usage in long-running applications
def periodic_cache_cleanup():
    import gc
    regex.purge()  # Clear regex cache
    gc.collect()   # Run garbage collection

Advanced Compilation Features

Version-Specific Compilation

Control regex behavior version during compilation:

# Version 0 (legacy re-compatible)
v0_pattern = regex.compile(r'(?V0)\w+', regex.IGNORECASE)

# Version 1 (enhanced behavior with full case-folding)  
v1_pattern = regex.compile(r'(?V1)\w+', regex.IGNORECASE)

# Default version control
regex.DEFAULT_VERSION = regex.VERSION1  # Set global default

Fuzzy Pattern Compilation

Compile patterns with fuzzy matching capabilities:

# Basic fuzzy compilation
fuzzy = regex.compile(r'(?e)(hello){e<=2}')  # Allow up to 2 errors

# Best match fuzzy compilation
best_fuzzy = regex.compile(r'(?be)(search){i<=1,d<=1,s<=2}', regex.BESTMATCH)

# Enhanced fuzzy matching
enhanced = regex.compile(r'(?ee)(pattern){e<=1}', regex.ENHANCEMATCH)

Performance Optimization

# Pre-compile frequently used patterns
EMAIL_PATTERN = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
PHONE_PATTERN = regex.compile(r'\b\d{3}-\d{3}-\d{4}\b')
DATE_PATTERN = regex.compile(r'\b\d{4}-\d{2}-\d{2}\b')

def extract_info(text):
    emails = EMAIL_PATTERN.findall(text)
    phones = PHONE_PATTERN.findall(text)
    dates = DATE_PATTERN.findall(text)
    return {'emails': emails, 'phones': phones, 'dates': dates}

# Cache control for dynamic patterns
def process_user_patterns(user_patterns, text):
    # Disable caching for one-time patterns
    regex.cache_all(False)
    
    results = {}
    for name, pattern in user_patterns.items():
        try:
            compiled = regex.compile(pattern)
            results[name] = compiled.findall(text)
        except regex.error as e:
            results[name] = f"Error: {e}"
    
    # Re-enable caching
    regex.cache_all(True)
    return results

Error Handling and Validation

def safe_compile(pattern_str, flags=0):
    """Safely compile a pattern with error handling."""
    try:
        return regex.compile(pattern_str, flags)
    except regex.error as e:
        print(f"Pattern compilation failed: {e}")
        print(f"Pattern: {pattern_str}")
        if hasattr(e, 'pos') and e.pos is not None:
            print(f"Error at position {e.pos}")
        return None

# Validate user input patterns
def validate_pattern(user_pattern):
    escaped_input = regex.escape(user_pattern)
    try:
        test_pattern = regex.compile(escaped_input)
        return True, f"Valid literal pattern: {escaped_input}"
    except regex.error as e:
        return False, f"Cannot create valid pattern: {e}"

# Test pattern against sample text
def test_pattern(pattern_str, test_text="test sample text 123"):
    try:
        pattern = regex.compile(pattern_str)
        matches = pattern.findall(test_text)
        return True, f"Pattern works. Found {len(matches)} matches: {matches}"
    except regex.error as e:
        return False, f"Pattern error: {e}"

Install with Tessl CLI

npx tessl i tessl/pypi-regex

docs

classes-types.md

compilation-utilities.md

flags-constants.md

index.md

pattern-matching.md

splitting.md

substitution.md

tile.json