0
# Pattern Compilation
1
2
Pre-compilation of regular expressions for improved performance when patterns are used repeatedly. Compiled patterns provide access to advanced features, optimization options, and detailed pattern information.
3
4
## Capabilities
5
6
### Pattern Compilation
7
8
Compiles a regular expression pattern into a reusable pattern object with optional configuration.
9
10
```python { .api }
11
def compile(pattern, options=None):
12
"""
13
Compile regular expression pattern.
14
15
Args:
16
pattern (str or _Regexp): Pattern string or existing compiled pattern
17
options (Options, optional): Compilation options
18
19
Returns:
20
_Regexp: Compiled pattern object
21
22
Raises:
23
error: If pattern compilation fails
24
"""
25
```
26
27
Example usage:
28
29
```python
30
import re2
31
32
# Compile pattern for reuse
33
email_pattern = re2.compile(r'(\w+)@(\w+\.\w+)')
34
35
# Use compiled pattern multiple times (more efficient)
36
texts = [
37
"Contact alice@example.com",
38
"Email bob@test.org for details",
39
"No email in this text"
40
]
41
42
for text in texts:
43
match = email_pattern.search(text)
44
if match:
45
username, domain = match.groups()
46
print(f"Found: {username} at {domain}")
47
48
# Compile with options
49
options = re2.Options()
50
options.case_sensitive = False
51
pattern = re2.compile(r'HELLO', options)
52
match = pattern.search("hello world") # Matches due to case insensitivity
53
```
54
55
## Compiled Pattern Object
56
57
```python { .api }
58
class _Regexp:
59
"""Compiled regular expression pattern object."""
60
61
def search(self, text, pos=None, endpos=None):
62
"""
63
Search for pattern in text.
64
65
Args:
66
text (str): Text to search
67
pos (int, optional): Start position for search
68
endpos (int, optional): End position for search
69
70
Returns:
71
_Match or None: Match object if found
72
"""
73
74
def match(self, text, pos=None, endpos=None):
75
"""
76
Match pattern at beginning of text.
77
78
Args:
79
text (str): Text to match
80
pos (int, optional): Start position for match
81
endpos (int, optional): End position for match
82
83
Returns:
84
_Match or None: Match object if matched
85
"""
86
87
def fullmatch(self, text, pos=None, endpos=None):
88
"""
89
Match pattern against entire text.
90
91
Args:
92
text (str): Text to match
93
pos (int, optional): Start position for match
94
endpos (int, optional): End position for match
95
96
Returns:
97
_Match or None: Match object if matched
98
"""
99
100
def findall(self, text, pos=None, endpos=None):
101
"""
102
Find all matches in text.
103
104
Args:
105
text (str): Text to search
106
pos (int, optional): Start position for search
107
endpos (int, optional): End position for search
108
109
Returns:
110
list: List of matched strings or group tuples
111
"""
112
113
def finditer(self, text, pos=None, endpos=None):
114
"""
115
Return iterator of match objects.
116
117
Args:
118
text (str): Text to search
119
pos (int, optional): Start position for search
120
endpos (int, optional): End position for search
121
122
Returns:
123
iterator: Iterator of _Match objects
124
"""
125
126
def split(self, text, maxsplit=0):
127
"""
128
Split text using pattern as delimiter.
129
130
Args:
131
text (str): Text to split
132
maxsplit (int): Maximum splits (0 = unlimited)
133
134
Returns:
135
list: Split text segments
136
"""
137
138
def sub(self, repl, text, count=0):
139
"""
140
Replace matches with replacement.
141
142
Args:
143
repl (str or callable): Replacement string or function
144
text (str): Text to process
145
count (int): Maximum replacements (0 = all)
146
147
Returns:
148
str: Text with replacements
149
"""
150
151
def subn(self, repl, text, count=0):
152
"""
153
Replace matches and return count.
154
155
Args:
156
repl (str or callable): Replacement string or function
157
text (str): Text to process
158
count (int): Maximum replacements (0 = all)
159
160
Returns:
161
tuple: (result_text, substitution_count)
162
"""
163
164
def possiblematchrange(self, maxlen):
165
"""
166
Compute possible match range for optimization.
167
168
Args:
169
maxlen (int): Maximum string length to consider
170
171
Returns:
172
tuple: (min_string, max_string) for possible matches
173
"""
174
175
# Properties
176
pattern: str # Original pattern string
177
options: Options # Compilation options used
178
groups: int # Number of capturing groups
179
groupindex: dict # Named group indices mapping
180
programsize: int # Compiled program size (complexity measure)
181
reverseprogramsize: int # Reverse program size
182
programfanout: list # Program fanout histogram
183
reverseprogramfanout: list # Reverse program fanout histogram
184
```
185
186
Example usage with compiled patterns:
187
188
```python
189
import re2
190
191
# Compile pattern with all features
192
pattern = re2.compile(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})')
193
194
# Pattern information
195
print(f"Groups: {pattern.groups}") # 3
196
print(f"Named groups: {pattern.groupindex}") # {'year': 1, 'month': 2, 'day': 3}
197
print(f"Program size: {pattern.programsize}") # Complexity measure
198
199
# Use with position control
200
text = "Dates: 2023-01-15 and 2023-12-31"
201
match = pattern.search(text, pos=10) # Search starting from position 10
202
if match:
203
print(match.groupdict()) # {'year': '2023', 'month': '12', 'day': '31'}
204
205
# Performance optimization info
206
min_str, max_str = pattern.possiblematchrange(20)
207
print(f"Possible matches range from '{min_str}' to '{max_str}'")
208
```
209
210
### Pattern Creation from Existing Pattern
211
212
```python
213
import re2
214
215
# Create pattern from existing pattern (returns same object if options match)
216
original = re2.compile(r'\d+')
217
duplicate = re2.compile(original) # Returns original if no options specified
218
219
# Create with different options (creates new pattern)
220
options = re2.Options()
221
options.case_sensitive = False
222
new_pattern = re2.compile(original, options) # Creates new pattern object
223
```
224
225
### Error Handling
226
227
```python
228
import re2
229
230
try:
231
# Invalid pattern
232
pattern = re2.compile(r'[invalid')
233
except re2.error as e:
234
print(f"Compilation failed: {e}")
235
236
# Check pattern validity before use
237
def safe_compile(pattern_str):
238
try:
239
return re2.compile(pattern_str)
240
except re2.error:
241
return None
242
243
pattern = safe_compile(r'(?P<name>\w+)')
244
if pattern:
245
# Use pattern safely
246
match = pattern.search("hello world")
247
```
248
249
## Performance Benefits
250
251
Compiled patterns provide significant performance benefits when used repeatedly:
252
253
```python
254
import re2
255
import time
256
257
text = "The quick brown fox jumps over the lazy dog" * 1000
258
pattern_str = r'\b\w{5}\b'
259
260
# Method 1: Recompile each time (slower)
261
start = time.time()
262
for _ in range(1000):
263
matches = re2.findall(pattern_str, text)
264
slow_time = time.time() - start
265
266
# Method 2: Compile once, reuse (faster)
267
compiled_pattern = re2.compile(pattern_str)
268
start = time.time()
269
for _ in range(1000):
270
matches = compiled_pattern.findall(text)
271
fast_time = time.time() - start
272
273
print(f"Speedup: {slow_time / fast_time:.2f}x")
274
```