CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-re2

RE2 - A regular expression library with linear time guarantees

Pending
Overview
Eval results
Files

pattern-compilation.mddocs/

Pattern Compilation

Pre-compilation of regular expressions for improved performance when patterns are used repeatedly. Compiled patterns provide access to advanced features, optimization options, and detailed pattern information.

Capabilities

Pattern Compilation

Compiles a regular expression pattern into a reusable pattern object with optional configuration.

def compile(pattern, options=None):
    """
    Compile regular expression pattern.
    
    Args:
        pattern (str or _Regexp): Pattern string or existing compiled pattern
        options (Options, optional): Compilation options
    
    Returns:
        _Regexp: Compiled pattern object
    
    Raises:
        error: If pattern compilation fails
    """

Example usage:

import re2

# Compile pattern for reuse
email_pattern = re2.compile(r'(\w+)@(\w+\.\w+)')

# Use compiled pattern multiple times (more efficient)
texts = [
    "Contact alice@example.com",
    "Email bob@test.org for details",
    "No email in this text"
]

for text in texts:
    match = email_pattern.search(text)
    if match:
        username, domain = match.groups()
        print(f"Found: {username} at {domain}")

# Compile with options
options = re2.Options()
options.case_sensitive = False
pattern = re2.compile(r'HELLO', options)
match = pattern.search("hello world")  # Matches due to case insensitivity

Compiled Pattern Object

class _Regexp:
    """Compiled regular expression pattern object."""
    
    def search(self, text, pos=None, endpos=None):
        """
        Search for pattern in text.
        
        Args:
            text (str): Text to search
            pos (int, optional): Start position for search
            endpos (int, optional): End position for search
        
        Returns:
            _Match or None: Match object if found
        """
    
    def match(self, text, pos=None, endpos=None):
        """
        Match pattern at beginning of text.
        
        Args:
            text (str): Text to match
            pos (int, optional): Start position for match
            endpos (int, optional): End position for match
        
        Returns:
            _Match or None: Match object if matched
        """
    
    def fullmatch(self, text, pos=None, endpos=None):
        """
        Match pattern against entire text.
        
        Args:
            text (str): Text to match
            pos (int, optional): Start position for match
            endpos (int, optional): End position for match
        
        Returns:
            _Match or None: Match object if matched
        """
    
    def findall(self, text, pos=None, endpos=None):
        """
        Find all matches in text.
        
        Args:
            text (str): Text to search
            pos (int, optional): Start position for search
            endpos (int, optional): End position for search
        
        Returns:
            list: List of matched strings or group tuples
        """
    
    def finditer(self, text, pos=None, endpos=None):
        """
        Return iterator of match objects.
        
        Args:
            text (str): Text to search
            pos (int, optional): Start position for search
            endpos (int, optional): End position for search
        
        Returns:
            iterator: Iterator of _Match objects
        """
    
    def split(self, text, maxsplit=0):
        """
        Split text using pattern as delimiter.
        
        Args:
            text (str): Text to split
            maxsplit (int): Maximum splits (0 = unlimited)
        
        Returns:
            list: Split text segments
        """
    
    def sub(self, repl, text, count=0):
        """
        Replace matches with replacement.
        
        Args:
            repl (str or callable): Replacement string or function
            text (str): Text to process
            count (int): Maximum replacements (0 = all)
        
        Returns:
            str: Text with replacements
        """
    
    def subn(self, repl, text, count=0):
        """
        Replace matches and return count.
        
        Args:
            repl (str or callable): Replacement string or function
            text (str): Text to process
            count (int): Maximum replacements (0 = all)
        
        Returns:
            tuple: (result_text, substitution_count)
        """
    
    def possiblematchrange(self, maxlen):
        """
        Compute possible match range for optimization.
        
        Args:
            maxlen (int): Maximum string length to consider
        
        Returns:
            tuple: (min_string, max_string) for possible matches
        """
    
    # Properties
    pattern: str             # Original pattern string
    options: Options         # Compilation options used
    groups: int             # Number of capturing groups
    groupindex: dict        # Named group indices mapping
    programsize: int        # Compiled program size (complexity measure)
    reverseprogramsize: int # Reverse program size
    programfanout: list     # Program fanout histogram
    reverseprogramfanout: list  # Reverse program fanout histogram

Example usage with compiled patterns:

import re2

# Compile pattern with all features
pattern = re2.compile(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})')

# Pattern information
print(f"Groups: {pattern.groups}")           # 3
print(f"Named groups: {pattern.groupindex}") # {'year': 1, 'month': 2, 'day': 3}
print(f"Program size: {pattern.programsize}") # Complexity measure

# Use with position control
text = "Dates: 2023-01-15 and 2023-12-31"
match = pattern.search(text, pos=10)  # Search starting from position 10
if match:
    print(match.groupdict())  # {'year': '2023', 'month': '12', 'day': '31'}

# Performance optimization info
min_str, max_str = pattern.possiblematchrange(20)
print(f"Possible matches range from '{min_str}' to '{max_str}'")

Pattern Creation from Existing Pattern

import re2

# Create pattern from existing pattern (returns same object if options match)
original = re2.compile(r'\d+')
duplicate = re2.compile(original)  # Returns original if no options specified

# Create with different options (creates new pattern)
options = re2.Options()
options.case_sensitive = False
new_pattern = re2.compile(original, options)  # Creates new pattern object

Error Handling

import re2

try:
    # Invalid pattern
    pattern = re2.compile(r'[invalid')
except re2.error as e:
    print(f"Compilation failed: {e}")

# Check pattern validity before use
def safe_compile(pattern_str):
    try:
        return re2.compile(pattern_str)
    except re2.error:
        return None

pattern = safe_compile(r'(?P<name>\w+)')
if pattern:
    # Use pattern safely
    match = pattern.search("hello world")

Performance Benefits

Compiled patterns provide significant performance benefits when used repeatedly:

import re2
import time

text = "The quick brown fox jumps over the lazy dog" * 1000
pattern_str = r'\b\w{5}\b'

# Method 1: Recompile each time (slower)
start = time.time()
for _ in range(1000):
    matches = re2.findall(pattern_str, text)
slow_time = time.time() - start

# Method 2: Compile once, reuse (faster)
compiled_pattern = re2.compile(pattern_str)
start = time.time()
for _ in range(1000):
    matches = compiled_pattern.findall(text)
fast_time = time.time() - start

print(f"Speedup: {slow_time / fast_time:.2f}x")

Install with Tessl CLI

npx tessl i tessl/pypi-google-re2

docs

advanced-features.md

core-matching.md

index.md

options-configuration.md

pattern-compilation.md

text-processing.md

tile.json