0
# Pattern Compilation and Utilities
1
2
Pattern compilation, caching control, template support, and string escaping utilities for preparing and managing regular expression patterns. These functions provide essential tools for optimizing pattern usage and preparing literal strings for pattern inclusion.
3
4
## Capabilities
5
6
### Pattern Compilation
7
8
Compile a regular expression pattern into a Pattern object for efficient reuse, with enhanced compilation options and caching control.
9
10
```python { .api }
11
def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs):
12
"""
13
Compile a regular expression pattern, returning a Pattern object.
14
15
Args:
16
pattern (str): Regular expression pattern to compile
17
flags (int, optional): Regex flags to modify pattern behavior
18
ignore_unused (bool, optional): Ignore unused keyword arguments
19
cache_pattern (bool, optional): Override default caching behavior
20
**kwargs: Additional compilation arguments (version, etc.)
21
22
Returns:
23
Pattern: Compiled pattern object with matching methods
24
"""
25
```
26
27
**Usage Examples:**
28
29
```python
30
import regex
31
32
# Basic pattern compilation
33
pattern = regex.compile(r'\b\w+@\w+\.\w+\b')
34
emails = pattern.findall('Contact: user@example.com or admin@site.org')
35
print(emails) # ['user@example.com', 'admin@site.org']
36
37
# Compile with flags
38
pattern = regex.compile(r'hello\s+world', regex.IGNORECASE | regex.VERBOSE)
39
result = pattern.search('HELLO WORLD')
40
print(result.group()) # 'HELLO WORLD'
41
42
# Reuse compiled pattern for efficiency
43
email_pattern = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
44
for line in file_lines:
45
if email_pattern.search(line):
46
process_line_with_email(line)
47
48
# Compile with version specification
49
v1_pattern = regex.compile(r'(?V1)pattern', regex.IGNORECASE) # Enhanced mode
50
v0_pattern = regex.compile(r'(?V0)pattern', regex.IGNORECASE) # Legacy mode
51
52
# Fuzzy pattern compilation
53
fuzzy_pattern = regex.compile(r'(?e)(search){e<=2}', regex.BESTMATCH)
54
result = fuzzy_pattern.search('serch text searching')
55
print(result.group()) # Best fuzzy match
56
57
# Control pattern caching
58
pattern = regex.compile(r'\d+', cache_pattern=False) # Don't cache this pattern
59
```
60
61
### Template Pattern Compilation
62
63
Compile a regular expression template for use with substitution operations, providing a specialized pattern type for replacement templates.
64
65
```python { .api }
66
def template(pattern, flags=0):
67
"""
68
Compile a template pattern, returning a Pattern object.
69
70
Args:
71
pattern (str): Template pattern to compile
72
flags (int, optional): Regex flags to modify template behavior
73
74
Returns:
75
Pattern: Compiled template pattern object
76
"""
77
```
78
79
**Usage Examples:**
80
81
```python
82
import regex
83
84
# Basic template compilation
85
template_pattern = regex.template(r'\1-\2-\3')
86
result = regex.sub(r'(\d{4})(\d{2})(\d{2})', template_pattern, '20231225')
87
print(result) # '2023-12-25'
88
89
# Named group template
90
template_pattern = regex.template(r'\g<last>, \g<first>')
91
pattern = r'(?P<first>\w+) (?P<last>\w+)'
92
result = regex.sub(pattern, template_pattern, 'John Doe')
93
print(result) # 'Doe, John'
94
95
# Template with flags
96
template_pattern = regex.template(r'\1:\2', regex.IGNORECASE)
97
```
98
99
### String Escaping
100
101
Escape special regex characters in a string to use it as a literal pattern, with options for controlling which characters are escaped.
102
103
```python { .api }
104
def escape(pattern, special_only=True, literal_spaces=False):
105
"""
106
Escape a string for use as a literal in a pattern.
107
108
Args:
109
pattern (str): String to escape for literal use
110
special_only (bool, optional): Escape only special regex characters
111
literal_spaces (bool, optional): Treat spaces as literal (don't escape)
112
113
Returns:
114
str: Escaped string safe for use in regex patterns
115
"""
116
```
117
118
**Usage Examples:**
119
120
```python
121
import regex
122
123
# Basic string escaping
124
literal_text = "Price: $19.99 (special!)"
125
escaped = regex.escape(literal_text)
126
print(escaped) # 'Price:\\ \\$19\\.99\\ \\(special!\\)'
127
128
# Use escaped string in pattern
129
pattern = r'Item: ' + regex.escape("$19.99 (sale)")
130
result = regex.search(pattern, 'Item: $19.99 (sale) - Buy now!')
131
print(result.group()) # 'Item: $19.99 (sale)'
132
133
# Escape only special characters
134
text = "hello.world*test"
135
escaped = regex.escape(text, special_only=True)
136
print(escaped) # 'hello\\.world\\*test'
137
138
# Control space escaping
139
text = "hello world test"
140
escaped_with_spaces = regex.escape(text, literal_spaces=False)
141
escaped_literal_spaces = regex.escape(text, literal_spaces=True)
142
print(escaped_with_spaces) # 'hello\\ world\\ test'
143
print(escaped_literal_spaces) # 'hello world test'
144
145
# Build patterns with literals and regex parts
146
user_input = "user@domain.com"
147
pattern = r'\b' + regex.escape(user_input) + r'\b'
148
result = regex.search(pattern, 'Email: user@domain.com is valid')
149
print(result.group()) # 'user@domain.com'
150
```
151
152
### Pattern Cache Management
153
154
Control the internal pattern cache to optimize memory usage and performance for applications with many patterns.
155
156
```python { .api }
157
def purge():
158
"""Clear the regular expression cache."""
159
160
def cache_all(value=True):
161
"""
162
Set/get whether to cache all patterns, even those compiled explicitly.
163
164
Args:
165
value (bool or None): True to enable caching all, False to disable,
166
None to return current setting
167
168
Returns:
169
bool or None: Current caching setting when value is None
170
"""
171
```
172
173
**Usage Examples:**
174
175
```python
176
import regex
177
178
# Clear the pattern cache
179
regex.purge()
180
181
# Check current cache setting
182
current_setting = regex.cache_all(None)
183
print(f"Current cache setting: {current_setting}")
184
185
# Enable caching of all patterns
186
regex.cache_all(True)
187
188
# Disable caching of explicitly compiled patterns
189
regex.cache_all(False)
190
191
# Typical cache management workflow
192
def process_many_patterns(patterns, text):
193
# Clear cache before processing many patterns
194
regex.purge()
195
196
# Disable caching to prevent memory buildup
197
old_setting = regex.cache_all(None)
198
regex.cache_all(False)
199
200
try:
201
results = []
202
for pattern in patterns:
203
compiled = regex.compile(pattern)
204
results.append(compiled.findall(text))
205
return results
206
finally:
207
# Restore original cache setting
208
regex.cache_all(old_setting)
209
210
# Monitor cache usage in long-running applications
211
def periodic_cache_cleanup():
212
import gc
213
regex.purge() # Clear regex cache
214
gc.collect() # Run garbage collection
215
```
216
217
## Advanced Compilation Features
218
219
### Version-Specific Compilation
220
221
Control regex behavior version during compilation:
222
223
```python
224
# Version 0 (legacy re-compatible)
225
v0_pattern = regex.compile(r'(?V0)\w+', regex.IGNORECASE)
226
227
# Version 1 (enhanced behavior with full case-folding)
228
v1_pattern = regex.compile(r'(?V1)\w+', regex.IGNORECASE)
229
230
# Default version control
231
regex.DEFAULT_VERSION = regex.VERSION1 # Set global default
232
```
233
234
### Fuzzy Pattern Compilation
235
236
Compile patterns with fuzzy matching capabilities:
237
238
```python
239
# Basic fuzzy compilation
240
fuzzy = regex.compile(r'(?e)(hello){e<=2}') # Allow up to 2 errors
241
242
# Best match fuzzy compilation
243
best_fuzzy = regex.compile(r'(?be)(search){i<=1,d<=1,s<=2}', regex.BESTMATCH)
244
245
# Enhanced fuzzy matching
246
enhanced = regex.compile(r'(?ee)(pattern){e<=1}', regex.ENHANCEMATCH)
247
```
248
249
### Performance Optimization
250
251
```python
252
# Pre-compile frequently used patterns
253
EMAIL_PATTERN = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
254
PHONE_PATTERN = regex.compile(r'\b\d{3}-\d{3}-\d{4}\b')
255
DATE_PATTERN = regex.compile(r'\b\d{4}-\d{2}-\d{2}\b')
256
257
def extract_info(text):
258
emails = EMAIL_PATTERN.findall(text)
259
phones = PHONE_PATTERN.findall(text)
260
dates = DATE_PATTERN.findall(text)
261
return {'emails': emails, 'phones': phones, 'dates': dates}
262
263
# Cache control for dynamic patterns
264
def process_user_patterns(user_patterns, text):
265
# Disable caching for one-time patterns
266
regex.cache_all(False)
267
268
results = {}
269
for name, pattern in user_patterns.items():
270
try:
271
compiled = regex.compile(pattern)
272
results[name] = compiled.findall(text)
273
except regex.error as e:
274
results[name] = f"Error: {e}"
275
276
# Re-enable caching
277
regex.cache_all(True)
278
return results
279
```
280
281
### Error Handling and Validation
282
283
```python
284
def safe_compile(pattern_str, flags=0):
285
"""Safely compile a pattern with error handling."""
286
try:
287
return regex.compile(pattern_str, flags)
288
except regex.error as e:
289
print(f"Pattern compilation failed: {e}")
290
print(f"Pattern: {pattern_str}")
291
if hasattr(e, 'pos') and e.pos is not None:
292
print(f"Error at position {e.pos}")
293
return None
294
295
# Validate user input patterns
296
def validate_pattern(user_pattern):
297
escaped_input = regex.escape(user_pattern)
298
try:
299
test_pattern = regex.compile(escaped_input)
300
return True, f"Valid literal pattern: {escaped_input}"
301
except regex.error as e:
302
return False, f"Cannot create valid pattern: {e}"
303
304
# Test pattern against sample text
305
def test_pattern(pattern_str, test_text="test sample text 123"):
306
try:
307
pattern = regex.compile(pattern_str)
308
matches = pattern.findall(test_text)
309
return True, f"Pattern works. Found {len(matches)} matches: {matches}"
310
except regex.error as e:
311
return False, f"Pattern error: {e}"
312
```