0
# Options and Configuration
1
2
Configuration options that control how RE2 processes regular expressions, including encoding, syntax modes, memory limits, and performance tuning. These options provide fine-grained control over pattern compilation and matching behavior.
3
4
## Capabilities
5
6
### Options Class
7
8
Main configuration class for controlling RE2 behavior during pattern compilation and matching.
9
10
```python { .api }
11
class Options:
12
"""Configuration options for RE2 compilation and matching."""
13
14
def __init__(self):
15
"""Create Options object with default values."""
16
17
# Memory and Performance Options
18
max_mem: int = 8388608 # Maximum memory usage (8MiB default)
19
20
# Text Encoding Options
21
encoding: Options.Encoding = Options.Encoding.UTF8 # Text encoding
22
23
# Syntax and Matching Mode Options
24
posix_syntax: bool = False # Use POSIX syntax instead of Perl
25
longest_match: bool = False # Find longest match (POSIX mode)
26
case_sensitive: bool = True # Case-sensitive matching
27
literal: bool = False # Treat pattern as literal string
28
29
# Character Class and Boundary Options
30
perl_classes: bool = True # Enable Perl character classes (\d, \w, \s)
31
word_boundary: bool = True # Enable word boundary assertions (\b, \B)
32
33
# Newline Handling Options
34
never_nl: bool = False # Never match newlines with . or [^...]
35
dot_nl: bool = False # Allow . to match newlines
36
one_line: bool = False # Treat input as single line (^ and $ match only at start/end)
37
38
# Capture and Logging Options
39
never_capture: bool = False # Disable capturing groups (performance optimization)
40
log_errors: bool = True # Log compilation errors to stderr
41
```
42
43
### Encoding Options
44
45
```python { .api }
46
class Options:
47
class Encoding:
48
"""Text encoding options for pattern and input text."""
49
UTF8: int = 1 # UTF-8 encoding (default)
50
LATIN1: int = 2 # Latin-1 (ISO 8859-1) encoding
51
```
52
53
## Configuration Examples
54
55
### Basic Options Usage
56
57
```python
58
import re2
59
60
# Create options with custom settings
61
options = re2.Options()
62
options.case_sensitive = False
63
options.max_mem = 16777216 # 16MiB
64
65
# Use with compilation
66
pattern = re2.compile(r'HELLO', options)
67
match = pattern.search("hello world") # Matches due to case insensitivity
68
```
69
70
### Memory Management
71
72
```python
73
import re2
74
75
# Limit memory usage for large patterns
76
options = re2.Options()
77
options.max_mem = 1048576 # 1MiB limit
78
79
try:
80
# This might fail if pattern is too complex
81
pattern = re2.compile(r'very|complex|pattern|with|many|alternatives', options)
82
except re2.error:
83
print("Pattern too complex for memory limit")
84
85
# Disable capturing for better performance
86
options.never_capture = True
87
fast_pattern = re2.compile(r'\d+', options) # No capture groups, faster matching
88
```
89
90
### Encoding Configuration
91
92
```python
93
import re2
94
95
# UTF-8 text (default)
96
utf8_options = re2.Options()
97
utf8_options.encoding = re2.Options.Encoding.UTF8
98
pattern = re2.compile(r'café', utf8_options)
99
100
# Latin-1 text
101
latin1_options = re2.Options()
102
latin1_options.encoding = re2.Options.Encoding.LATIN1
103
# Note: Pattern must be bytes when using Latin-1
104
latin1_pattern = re2.compile(b'caf\xe9', latin1_options)
105
```
106
107
### Syntax Mode Configuration
108
109
```python
110
import re2
111
112
# POSIX syntax mode
113
posix_options = re2.Options()
114
posix_options.posix_syntax = True
115
posix_options.longest_match = True # POSIX requires longest match
116
117
# In POSIX mode, some Perl features are disabled
118
pattern = re2.compile(r'colou?r', posix_options) # Works
119
# pattern = re2.compile(r'(?i)case', posix_options) # Would fail - no inline modifiers
120
121
# Perl syntax mode (default)
122
perl_options = re2.Options()
123
perl_options.posix_syntax = False
124
pattern = re2.compile(r'(?i)case|CASE', perl_options) # Works
125
```
126
127
### Literal Pattern Matching
128
129
```python
130
import re2
131
132
# Treat pattern as literal string (no special characters)
133
options = re2.Options()
134
options.literal = True
135
136
# All regex special characters are treated literally
137
pattern = re2.compile(r'$19.99 (20% off)', options)
138
text = "Price: $19.99 (20% off) today"
139
match = pattern.search(text) # Matches literally, not as regex
140
```
141
142
### Newline Handling
143
144
```python
145
import re2
146
147
text = "line1\nline2\nline3"
148
149
# Default behavior: . doesn't match newlines
150
default_pattern = re2.compile(r'line1.*line3')
151
match = default_pattern.search(text) # No match
152
153
# Allow . to match newlines
154
options = re2.Options()
155
options.dot_nl = True
156
dot_nl_pattern = re2.compile(r'line1.*line3', options)
157
match = dot_nl_pattern.search(text) # Matches across newlines
158
159
# Never match newlines (strict)
160
options.never_nl = True
161
options.dot_nl = False
162
strict_pattern = re2.compile(r'[^x]*', options) # [^x] won't match newlines
163
```
164
165
### Performance Optimization
166
167
```python
168
import re2
169
170
# Optimize for performance when captures aren't needed
171
options = re2.Options()
172
options.never_capture = True # Disable all capturing
173
options.never_nl = True # Optimize newline handling
174
options.one_line = True # Single-line mode optimization
175
176
# Fast pattern for validation only
177
validator = re2.compile(r'\d{3}-\d{2}-\d{4}', options)
178
is_valid = validator.search("123-45-6789") is not None # Fast validation
179
```
180
181
### Error Handling Configuration
182
183
```python
184
import re2
185
186
# Suppress error logging
187
quiet_options = re2.Options()
188
quiet_options.log_errors = False
189
190
try:
191
# Invalid pattern won't log to stderr
192
pattern = re2.compile(r'[invalid', quiet_options)
193
except re2.error as e:
194
# Handle error without stderr noise
195
print(f"Pattern compilation failed: {e}")
196
197
# Default behavior logs errors to stderr
198
default_options = re2.Options()
199
try:
200
pattern = re2.compile(r'[invalid', default_options) # Logs error to stderr
201
except re2.error:
202
pass
203
```
204
205
### Character Class Configuration
206
207
```python
208
import re2
209
210
# Disable Perl character classes
211
options = re2.Options()
212
options.perl_classes = False
213
214
# \d, \w, \s won't work with perl_classes=False
215
try:
216
pattern = re2.compile(r'\d+', options) # May fail
217
except re2.error:
218
print("Perl character classes disabled")
219
220
# Use POSIX character classes instead
221
posix_pattern = re2.compile(r'[[:digit:]]+', options) # Works
222
223
# Disable word boundary assertions
224
options.word_boundary = False
225
try:
226
pattern = re2.compile(r'\bword\b', options) # May fail
227
except re2.error:
228
print("Word boundary assertions disabled")
229
```
230
231
## Options Combinations
232
233
```python
234
import re2
235
236
# Strict POSIX configuration
237
posix_config = re2.Options()
238
posix_config.posix_syntax = True
239
posix_config.longest_match = True
240
posix_config.perl_classes = False
241
posix_config.case_sensitive = True
242
243
# Performance-optimized configuration
244
fast_config = re2.Options()
245
fast_config.never_capture = True
246
fast_config.never_nl = True
247
fast_config.one_line = True
248
fast_config.log_errors = False
249
250
# Memory-constrained configuration
251
limited_config = re2.Options()
252
limited_config.max_mem = 1048576 # 1MiB
253
limited_config.never_capture = True
254
limited_config.log_errors = False
255
256
# Case-insensitive Unicode configuration
257
unicode_config = re2.Options()
258
unicode_config.case_sensitive = False
259
unicode_config.encoding = re2.Options.Encoding.UTF8
260
unicode_config.dot_nl = True
261
```