CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-re2

RE2 - A regular expression library with linear time guarantees

Pending
Overview
Eval results
Files

options-configuration.mddocs/

Options and Configuration

Configuration options that control how RE2 processes regular expressions, including encoding, syntax modes, memory limits, and performance tuning. These options provide fine-grained control over pattern compilation and matching behavior.

Capabilities

Options Class

Main configuration class for controlling RE2 behavior during pattern compilation and matching.

class Options:
    """Configuration options for RE2 compilation and matching."""
    
    def __init__(self):
        """Create Options object with default values."""
    
    # Memory and Performance Options
    max_mem: int = 8388608              # Maximum memory usage (8MiB default)
    
    # Text Encoding Options
    encoding: Options.Encoding = Options.Encoding.UTF8  # Text encoding
    
    # Syntax and Matching Mode Options
    posix_syntax: bool = False          # Use POSIX syntax instead of Perl
    longest_match: bool = False         # Find longest match (POSIX mode)
    case_sensitive: bool = True         # Case-sensitive matching
    literal: bool = False               # Treat pattern as literal string
    
    # Character Class and Boundary Options
    perl_classes: bool = True           # Enable Perl character classes (\d, \w, \s)
    word_boundary: bool = True          # Enable word boundary assertions (\b, \B)
    
    # Newline Handling Options
    never_nl: bool = False              # Never match newlines with . or [^...]
    dot_nl: bool = False                # Allow . to match newlines
    one_line: bool = False              # Treat input as single line (^ and $ match only at start/end)
    
    # Capture and Logging Options
    never_capture: bool = False         # Disable capturing groups (performance optimization)
    log_errors: bool = True             # Log compilation errors to stderr

Encoding Options

class Options:
    class Encoding:
        """Text encoding options for pattern and input text."""
        UTF8: int = 1      # UTF-8 encoding (default)
        LATIN1: int = 2    # Latin-1 (ISO 8859-1) encoding

Configuration Examples

Basic Options Usage

import re2

# Create options with custom settings
options = re2.Options()
options.case_sensitive = False
options.max_mem = 16777216  # 16MiB

# Use with compilation
pattern = re2.compile(r'HELLO', options)
match = pattern.search("hello world")  # Matches due to case insensitivity

Memory Management

import re2

# Limit memory usage for large patterns
options = re2.Options()
options.max_mem = 1048576  # 1MiB limit

try:
    # This might fail if pattern is too complex
    pattern = re2.compile(r'very|complex|pattern|with|many|alternatives', options)
except re2.error:
    print("Pattern too complex for memory limit")

# Disable capturing for better performance
options.never_capture = True
fast_pattern = re2.compile(r'\d+', options)  # No capture groups, faster matching

Encoding Configuration

import re2

# UTF-8 text (default)
utf8_options = re2.Options()
utf8_options.encoding = re2.Options.Encoding.UTF8
pattern = re2.compile(r'café', utf8_options)

# Latin-1 text
latin1_options = re2.Options()
latin1_options.encoding = re2.Options.Encoding.LATIN1
# Note: Pattern must be bytes when using Latin-1
latin1_pattern = re2.compile(b'caf\xe9', latin1_options)

Syntax Mode Configuration

import re2

# POSIX syntax mode
posix_options = re2.Options()
posix_options.posix_syntax = True
posix_options.longest_match = True  # POSIX requires longest match

# In POSIX mode, some Perl features are disabled
pattern = re2.compile(r'colou?r', posix_options)  # Works
# pattern = re2.compile(r'(?i)case', posix_options)  # Would fail - no inline modifiers

# Perl syntax mode (default)
perl_options = re2.Options()
perl_options.posix_syntax = False
pattern = re2.compile(r'(?i)case|CASE', perl_options)  # Works

Literal Pattern Matching

import re2

# Treat pattern as literal string (no special characters)
options = re2.Options()
options.literal = True

# All regex special characters are treated literally
pattern = re2.compile(r'$19.99 (20% off)', options)
text = "Price: $19.99 (20% off) today"
match = pattern.search(text)  # Matches literally, not as regex

Newline Handling

import re2

text = "line1\nline2\nline3"

# Default behavior: . doesn't match newlines
default_pattern = re2.compile(r'line1.*line3')
match = default_pattern.search(text)  # No match

# Allow . to match newlines
options = re2.Options()
options.dot_nl = True
dot_nl_pattern = re2.compile(r'line1.*line3', options)
match = dot_nl_pattern.search(text)  # Matches across newlines

# Never match newlines (strict)
options.never_nl = True
options.dot_nl = False
strict_pattern = re2.compile(r'[^x]*', options)  # [^x] won't match newlines

Performance Optimization

import re2

# Optimize for performance when captures aren't needed
options = re2.Options()
options.never_capture = True       # Disable all capturing
options.never_nl = True           # Optimize newline handling
options.one_line = True           # Single-line mode optimization

# Fast pattern for validation only
validator = re2.compile(r'\d{3}-\d{2}-\d{4}', options)
is_valid = validator.search("123-45-6789") is not None  # Fast validation

Error Handling Configuration

import re2

# Suppress error logging
quiet_options = re2.Options()
quiet_options.log_errors = False

try:
    # Invalid pattern won't log to stderr
    pattern = re2.compile(r'[invalid', quiet_options)
except re2.error as e:
    # Handle error without stderr noise
    print(f"Pattern compilation failed: {e}")

# Default behavior logs errors to stderr
default_options = re2.Options()
try:
    pattern = re2.compile(r'[invalid', default_options)  # Logs error to stderr
except re2.error:
    pass

Character Class Configuration

import re2

# Disable Perl character classes
options = re2.Options()
options.perl_classes = False

# \d, \w, \s won't work with perl_classes=False
try:
    pattern = re2.compile(r'\d+', options)  # May fail
except re2.error:
    print("Perl character classes disabled")

# Use POSIX character classes instead
posix_pattern = re2.compile(r'[[:digit:]]+', options)  # Works

# Disable word boundary assertions
options.word_boundary = False
try:
    pattern = re2.compile(r'\bword\b', options)  # May fail
except re2.error:
    print("Word boundary assertions disabled")

Options Combinations

import re2

# Strict POSIX configuration
posix_config = re2.Options()
posix_config.posix_syntax = True
posix_config.longest_match = True
posix_config.perl_classes = False
posix_config.case_sensitive = True

# Performance-optimized configuration
fast_config = re2.Options()
fast_config.never_capture = True
fast_config.never_nl = True
fast_config.one_line = True
fast_config.log_errors = False

# Memory-constrained configuration
limited_config = re2.Options()
limited_config.max_mem = 1048576  # 1MiB
limited_config.never_capture = True
limited_config.log_errors = False

# Case-insensitive Unicode configuration
unicode_config = re2.Options()
unicode_config.case_sensitive = False
unicode_config.encoding = re2.Options.Encoding.UTF8
unicode_config.dot_nl = True

Install with Tessl CLI

npx tessl i tessl/pypi-google-re2

docs

advanced-features.md

core-matching.md

index.md

options-configuration.md

pattern-compilation.md

text-processing.md

tile.json