RE2 - A regular expression library with linear time guarantees
—
Configuration options that control how RE2 processes regular expressions, including encoding, syntax modes, memory limits, and performance tuning. These options provide fine-grained control over pattern compilation and matching behavior.
Main configuration class for controlling RE2 behavior during pattern compilation and matching.
class Options:
"""Configuration options for RE2 compilation and matching."""
def __init__(self):
"""Create Options object with default values."""
# Memory and Performance Options
max_mem: int = 8388608 # Maximum memory usage (8MiB default)
# Text Encoding Options
encoding: Options.Encoding = Options.Encoding.UTF8 # Text encoding
# Syntax and Matching Mode Options
posix_syntax: bool = False # Use POSIX syntax instead of Perl
longest_match: bool = False # Find longest match (POSIX mode)
case_sensitive: bool = True # Case-sensitive matching
literal: bool = False # Treat pattern as literal string
# Character Class and Boundary Options
perl_classes: bool = True # Enable Perl character classes (\d, \w, \s)
word_boundary: bool = True # Enable word boundary assertions (\b, \B)
# Newline Handling Options
never_nl: bool = False # Never match newlines with . or [^...]
dot_nl: bool = False # Allow . to match newlines
one_line: bool = False # Treat input as single line (^ and $ match only at start/end)
# Capture and Logging Options
never_capture: bool = False # Disable capturing groups (performance optimization)
log_errors: bool = True # Log compilation errors to stderrclass Options:
class Encoding:
"""Text encoding options for pattern and input text."""
UTF8: int = 1 # UTF-8 encoding (default)
LATIN1: int = 2 # Latin-1 (ISO 8859-1) encodingimport re2
# Create options with custom settings
options = re2.Options()
options.case_sensitive = False
options.max_mem = 16777216 # 16MiB
# Use with compilation
pattern = re2.compile(r'HELLO', options)
match = pattern.search("hello world") # Matches due to case insensitivityimport re2
# Limit memory usage for large patterns
options = re2.Options()
options.max_mem = 1048576 # 1MiB limit
try:
# This might fail if pattern is too complex
pattern = re2.compile(r'very|complex|pattern|with|many|alternatives', options)
except re2.error:
print("Pattern too complex for memory limit")
# Disable capturing for better performance
options.never_capture = True
fast_pattern = re2.compile(r'\d+', options) # No capture groups, faster matchingimport re2
# UTF-8 text (default)
utf8_options = re2.Options()
utf8_options.encoding = re2.Options.Encoding.UTF8
pattern = re2.compile(r'café', utf8_options)
# Latin-1 text
latin1_options = re2.Options()
latin1_options.encoding = re2.Options.Encoding.LATIN1
# Note: Pattern must be bytes when using Latin-1
latin1_pattern = re2.compile(b'caf\xe9', latin1_options)import re2
# POSIX syntax mode
posix_options = re2.Options()
posix_options.posix_syntax = True
posix_options.longest_match = True # POSIX requires longest match
# In POSIX mode, some Perl features are disabled
pattern = re2.compile(r'colou?r', posix_options) # Works
# pattern = re2.compile(r'(?i)case', posix_options) # Would fail - no inline modifiers
# Perl syntax mode (default)
perl_options = re2.Options()
perl_options.posix_syntax = False
pattern = re2.compile(r'(?i)case|CASE', perl_options) # Worksimport re2
# Treat pattern as literal string (no special characters)
options = re2.Options()
options.literal = True
# All regex special characters are treated literally
pattern = re2.compile(r'$19.99 (20% off)', options)
text = "Price: $19.99 (20% off) today"
match = pattern.search(text) # Matches literally, not as regeximport re2
text = "line1\nline2\nline3"
# Default behavior: . doesn't match newlines
default_pattern = re2.compile(r'line1.*line3')
match = default_pattern.search(text) # No match
# Allow . to match newlines
options = re2.Options()
options.dot_nl = True
dot_nl_pattern = re2.compile(r'line1.*line3', options)
match = dot_nl_pattern.search(text) # Matches across newlines
# Never match newlines (strict)
options.never_nl = True
options.dot_nl = False
strict_pattern = re2.compile(r'[^x]*', options) # [^x] won't match newlinesimport re2
# Optimize for performance when captures aren't needed
options = re2.Options()
options.never_capture = True # Disable all capturing
options.never_nl = True # Optimize newline handling
options.one_line = True # Single-line mode optimization
# Fast pattern for validation only
validator = re2.compile(r'\d{3}-\d{2}-\d{4}', options)
is_valid = validator.search("123-45-6789") is not None # Fast validationimport re2
# Suppress error logging
quiet_options = re2.Options()
quiet_options.log_errors = False
try:
# Invalid pattern won't log to stderr
pattern = re2.compile(r'[invalid', quiet_options)
except re2.error as e:
# Handle error without stderr noise
print(f"Pattern compilation failed: {e}")
# Default behavior logs errors to stderr
default_options = re2.Options()
try:
pattern = re2.compile(r'[invalid', default_options) # Logs error to stderr
except re2.error:
passimport re2
# Disable Perl character classes
options = re2.Options()
options.perl_classes = False
# \d, \w, \s won't work with perl_classes=False
try:
pattern = re2.compile(r'\d+', options) # May fail
except re2.error:
print("Perl character classes disabled")
# Use POSIX character classes instead
posix_pattern = re2.compile(r'[[:digit:]]+', options) # Works
# Disable word boundary assertions
options.word_boundary = False
try:
pattern = re2.compile(r'\bword\b', options) # May fail
except re2.error:
print("Word boundary assertions disabled")import re2
# Strict POSIX configuration
posix_config = re2.Options()
posix_config.posix_syntax = True
posix_config.longest_match = True
posix_config.perl_classes = False
posix_config.case_sensitive = True
# Performance-optimized configuration
fast_config = re2.Options()
fast_config.never_capture = True
fast_config.never_nl = True
fast_config.one_line = True
fast_config.log_errors = False
# Memory-constrained configuration
limited_config = re2.Options()
limited_config.max_mem = 1048576 # 1MiB
limited_config.never_capture = True
limited_config.log_errors = False
# Case-insensitive Unicode configuration
unicode_config = re2.Options()
unicode_config.case_sensitive = False
unicode_config.encoding = re2.Options.Encoding.UTF8
unicode_config.dot_nl = TrueInstall with Tessl CLI
npx tessl i tessl/pypi-google-re2