RE2 - A regular expression library with linear time guarantees
—
Pre-compilation of regular expressions for improved performance when patterns are used repeatedly. Compiled patterns provide access to advanced features, optimization options, and detailed pattern information.
Compiles a regular expression pattern into a reusable pattern object with optional configuration.
def compile(pattern, options=None):
"""
Compile regular expression pattern.
Args:
pattern (str or _Regexp): Pattern string or existing compiled pattern
options (Options, optional): Compilation options
Returns:
_Regexp: Compiled pattern object
Raises:
error: If pattern compilation fails
"""Example usage:
import re2
# Compile pattern for reuse
email_pattern = re2.compile(r'(\w+)@(\w+\.\w+)')
# Use compiled pattern multiple times (more efficient)
texts = [
"Contact alice@example.com",
"Email bob@test.org for details",
"No email in this text"
]
for text in texts:
match = email_pattern.search(text)
if match:
username, domain = match.groups()
print(f"Found: {username} at {domain}")
# Compile with options
options = re2.Options()
options.case_sensitive = False
pattern = re2.compile(r'HELLO', options)
match = pattern.search("hello world") # Matches due to case insensitivityclass _Regexp:
"""Compiled regular expression pattern object."""
def search(self, text, pos=None, endpos=None):
"""
Search for pattern in text.
Args:
text (str): Text to search
pos (int, optional): Start position for search
endpos (int, optional): End position for search
Returns:
_Match or None: Match object if found
"""
def match(self, text, pos=None, endpos=None):
"""
Match pattern at beginning of text.
Args:
text (str): Text to match
pos (int, optional): Start position for match
endpos (int, optional): End position for match
Returns:
_Match or None: Match object if matched
"""
def fullmatch(self, text, pos=None, endpos=None):
"""
Match pattern against entire text.
Args:
text (str): Text to match
pos (int, optional): Start position for match
endpos (int, optional): End position for match
Returns:
_Match or None: Match object if matched
"""
def findall(self, text, pos=None, endpos=None):
"""
Find all matches in text.
Args:
text (str): Text to search
pos (int, optional): Start position for search
endpos (int, optional): End position for search
Returns:
list: List of matched strings or group tuples
"""
def finditer(self, text, pos=None, endpos=None):
"""
Return iterator of match objects.
Args:
text (str): Text to search
pos (int, optional): Start position for search
endpos (int, optional): End position for search
Returns:
iterator: Iterator of _Match objects
"""
def split(self, text, maxsplit=0):
"""
Split text using pattern as delimiter.
Args:
text (str): Text to split
maxsplit (int): Maximum splits (0 = unlimited)
Returns:
list: Split text segments
"""
def sub(self, repl, text, count=0):
"""
Replace matches with replacement.
Args:
repl (str or callable): Replacement string or function
text (str): Text to process
count (int): Maximum replacements (0 = all)
Returns:
str: Text with replacements
"""
def subn(self, repl, text, count=0):
"""
Replace matches and return count.
Args:
repl (str or callable): Replacement string or function
text (str): Text to process
count (int): Maximum replacements (0 = all)
Returns:
tuple: (result_text, substitution_count)
"""
def possiblematchrange(self, maxlen):
"""
Compute possible match range for optimization.
Args:
maxlen (int): Maximum string length to consider
Returns:
tuple: (min_string, max_string) for possible matches
"""
# Properties
pattern: str # Original pattern string
options: Options # Compilation options used
groups: int # Number of capturing groups
groupindex: dict # Named group indices mapping
programsize: int # Compiled program size (complexity measure)
reverseprogramsize: int # Reverse program size
programfanout: list # Program fanout histogram
reverseprogramfanout: list # Reverse program fanout histogramExample usage with compiled patterns:
import re2
# Compile pattern with all features
pattern = re2.compile(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})')
# Pattern information
print(f"Groups: {pattern.groups}") # 3
print(f"Named groups: {pattern.groupindex}") # {'year': 1, 'month': 2, 'day': 3}
print(f"Program size: {pattern.programsize}") # Complexity measure
# Use with position control
text = "Dates: 2023-01-15 and 2023-12-31"
match = pattern.search(text, pos=10) # Search starting from position 10
if match:
print(match.groupdict()) # {'year': '2023', 'month': '12', 'day': '31'}
# Performance optimization info
min_str, max_str = pattern.possiblematchrange(20)
print(f"Possible matches range from '{min_str}' to '{max_str}'")import re2
# Create pattern from existing pattern (returns same object if options match)
original = re2.compile(r'\d+')
duplicate = re2.compile(original) # Returns original if no options specified
# Create with different options (creates new pattern)
options = re2.Options()
options.case_sensitive = False
new_pattern = re2.compile(original, options) # Creates new pattern objectimport re2
try:
# Invalid pattern
pattern = re2.compile(r'[invalid')
except re2.error as e:
print(f"Compilation failed: {e}")
# Check pattern validity before use
def safe_compile(pattern_str):
try:
return re2.compile(pattern_str)
except re2.error:
return None
pattern = safe_compile(r'(?P<name>\w+)')
if pattern:
# Use pattern safely
match = pattern.search("hello world")Compiled patterns provide significant performance benefits when used repeatedly:
import re2
import time
text = "The quick brown fox jumps over the lazy dog" * 1000
pattern_str = r'\b\w{5}\b'
# Method 1: Recompile each time (slower)
start = time.time()
for _ in range(1000):
matches = re2.findall(pattern_str, text)
slow_time = time.time() - start
# Method 2: Compile once, reuse (faster)
compiled_pattern = re2.compile(pattern_str)
start = time.time()
for _ in range(1000):
matches = compiled_pattern.findall(text)
fast_time = time.time() - start
print(f"Speedup: {slow_time / fast_time:.2f}x")Install with Tessl CLI
npx tessl i tessl/pypi-google-re2