0
# Flags and Constants
1
2
Comprehensive flag system including standard regex flags, enhanced flags for fuzzy matching and Unicode handling, version control flags, and global constants for controlling library behavior. These flags provide fine-grained control over pattern matching behavior and enable advanced regex features.
3
4
## Capabilities
5
6
### Standard Regular Expression Flags
7
8
Traditional regex flags that control basic matching behavior, compatible with Python's standard `re` module while providing enhanced functionality.
9
10
```python { .api }
11
# Case and Character Class Flags
12
ASCII = A = 0x80 # ASCII-only character class matching
13
IGNORECASE = I = 0x2 # Case-insensitive matching
14
LOCALE = L = 0x4 # Locale-dependent character classes
15
UNICODE = U = 0x20 # Unicode-dependent character classes
16
17
# Pattern Behavior Flags
18
MULTILINE = M = 0x8 # Multi-line mode for ^ and $
19
DOTALL = S = 0x10 # Make . match any character including newline
20
VERBOSE = X = 0x40 # Verbose mode allowing comments and whitespace
21
TEMPLATE = T = 0x1 # Template mode (compatibility with re module)
22
```
23
24
**Usage Examples:**
25
26
```python
27
import regex
28
29
# Case-insensitive matching
30
result = regex.search(r'hello', 'HELLO WORLD', regex.IGNORECASE)
31
print(result.group()) # 'HELLO'
32
33
# Multi-line mode - ^ and $ match line boundaries
34
text = 'line1\nline2\nline3'
35
matches = regex.findall(r'^line\d$', text, regex.MULTILINE)
36
print(matches) # ['line1', 'line2', 'line3']
37
38
# Dot matches newlines
39
result = regex.search(r'start.*end', 'start\nmiddle\nend', regex.DOTALL)
40
print(result.group()) # 'start\nmiddle\nend'
41
42
# Verbose mode with comments
43
pattern = regex.compile(r'''
44
\b # Word boundary
45
(\w+) # Username (group 1)
46
@ # Literal @
47
([\w.-]+) # Domain name (group 2)
48
\. # Literal dot
49
(\w+) # TLD (group 3)
50
\b # Word boundary
51
''', regex.VERBOSE)
52
53
# Combining flags
54
combined = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL
55
result = regex.search(r'^hello.*world$', 'HELLO\nBEAUTIFUL\nWORLD', combined)
56
57
# ASCII vs Unicode character classes
58
text = 'café naïve résumé'
59
# Unicode mode (default for str patterns)
60
unicode_words = regex.findall(r'\w+', text, regex.UNICODE)
61
print(unicode_words) # ['café', 'naïve', 'résumé']
62
63
# ASCII mode
64
ascii_words = regex.findall(r'\w+', text, regex.ASCII)
65
print(ascii_words) # ['caf', 'na', 've', 'r', 'sum']
66
```
67
68
### Enhanced Regular Expression Flags
69
70
Advanced flags unique to the regex module that enable fuzzy matching, improved Unicode support, and specialized matching behaviors.
71
72
```python { .api }
73
# Fuzzy Matching Flags
74
BESTMATCH = B = 0x1000 # Find best fuzzy match instead of first
75
ENHANCEMATCH = E = 0x8000 # Improve fuzzy match fit after finding first
76
77
# Unicode Enhancement Flags
78
FULLCASE = F = 0x4000 # Full case-folding for Unicode case-insensitive matching
79
WORD = W = 0x800 # Unicode word boundaries and line breaks
80
81
# Matching Behavior Flags
82
POSIX = P = 0x10000 # POSIX-standard leftmost longest matching
83
REVERSE = R = 0x400 # Search backwards through string
84
DEBUG = D = 0x200 # Print parsed pattern for debugging
85
```
86
87
**Usage Examples:**
88
89
```python
90
import regex
91
92
# Fuzzy matching with best match
93
pattern = r'(?b)(python){e<=2}' # Allow up to 2 errors, find best match
94
text = 'pyton pythom python pyth'
95
result = regex.search(pattern, text, regex.BESTMATCH)
96
print(result.group()) # 'python' (exact match is best)
97
98
# Enhanced fuzzy matching
99
pattern = r'(?e)(search){e<=1}'
100
result = regex.search(pattern, 'serch found', regex.ENHANCEMATCH)
101
print(result.group()) # 'serch' with improved fit
102
103
# Full case-folding for Unicode
104
pattern = r'STRASSE'
105
text = 'Hauptstraße in München' # German ß should match SS
106
result = regex.search(pattern, text, regex.IGNORECASE | regex.FULLCASE)
107
print(result.group()) # 'straße'
108
109
# Word boundaries with Unicode
110
text = 'hello мир world'
111
words = regex.findall(r'\b\w+\b', text, regex.WORD)
112
print(words) # ['hello', 'мир', 'world'] - properly handles Unicode
113
114
# POSIX leftmost-longest matching
115
pattern = r'a|ab'
116
text = 'ab'
117
# Normal (first match)
118
result1 = regex.search(pattern, text)
119
print(result1.group()) # 'a'
120
121
# POSIX (longest match)
122
result2 = regex.search(pattern, text, regex.POSIX)
123
print(result2.group()) # 'ab'
124
125
# Reverse searching
126
text = 'first second third'
127
result = regex.search(r'\w+', text, regex.REVERSE)
128
print(result.group()) # 'third' (last word when searching backwards)
129
130
# Debug mode - prints parsed pattern
131
pattern = regex.compile(r'(a+)(b+)', regex.DEBUG)
132
# Prints internal pattern structure to stdout
133
```
134
135
### Version Control Flags
136
137
Flags that control regex behavior version, allowing choice between legacy re-compatible behavior and enhanced regex features.
138
139
```python { .api }
140
# Version Control Flags
141
VERSION0 = V0 = 0x2000 # Legacy re-compatible behavior
142
VERSION1 = V1 = 0x100 # Enhanced behavior mode (includes FULLCASE)
143
144
# Global Version Setting
145
DEFAULT_VERSION # Current default version setting (VERSION0)
146
```
147
148
**Usage Examples:**
149
150
```python
151
import regex
152
153
# Version 0 (legacy re-compatible behavior)
154
pattern_v0 = regex.compile(r'(?V0)\w+', regex.IGNORECASE)
155
156
# Version 1 (enhanced behavior with full case-folding)
157
pattern_v1 = regex.compile(r'(?V1)\w+', regex.IGNORECASE)
158
159
# Compare behavior with Unicode case-folding
160
text = 'Straße' # German word with ß
161
162
# Version 0 - basic case folding
163
result_v0 = regex.search(r'(?V0)STRASSE', text, regex.IGNORECASE)
164
print(f"V0 result: {result_v0}") # May not match
165
166
# Version 1 - full case folding (automatic with IGNORECASE)
167
result_v1 = regex.search(r'(?V1)STRASSE', text, regex.IGNORECASE)
168
print(f"V1 result: {result_v1.group() if result_v1 else None}") # 'Straße'
169
170
# Global default version setting
171
print(f"Current default: {regex.DEFAULT_VERSION}")
172
173
# Set global default (affects patterns without explicit version)
174
# regex.DEFAULT_VERSION = regex.VERSION1 # Would change global default
175
176
# Inline version specification in patterns
177
pattern = r'(?V1)case insensitive with full folding'
178
result = regex.search(pattern, 'CASE INSENSITIVE', regex.IGNORECASE)
179
180
# Mixed version usage
181
def compare_versions(pattern_str, text, flags=0):
182
v0_result = regex.search(f'(?V0){pattern_str}', text, flags)
183
v1_result = regex.search(f'(?V1){pattern_str}', text, flags)
184
185
return {
186
'v0': v0_result.group() if v0_result else None,
187
'v1': v1_result.group() if v1_result else None
188
}
189
```
190
191
### Module Constants and Metadata
192
193
Global constants and version information for the regex module.
194
195
```python { .api }
196
# Module Information
197
__version__ = "2.5.161" # Module version string
198
__doc__ # Module documentation string
199
200
# Function Aliases
201
Regex # Alias for compile function (for pattern repr)
202
203
# Exception Class
204
error # Exception class for regex errors
205
```
206
207
**Usage Examples:**
208
209
```python
210
import regex
211
212
# Check module version
213
print(f"regex module version: {regex.__version__}")
214
215
# Read module documentation
216
print(f"Module doc length: {len(regex.__doc__)} characters")
217
218
# Using Regex alias (mainly for internal use)
219
pattern = regex.Regex(r'\d+') # Same as regex.compile(r'\d+')
220
221
# Exception handling
222
try:
223
bad_pattern = regex.compile(r'[') # Invalid pattern
224
except regex.error as e:
225
print(f"Regex error: {e}")
226
print(f"Error message: {e.msg}")
227
if hasattr(e, 'pos'):
228
print(f"Error position: {e.pos}")
229
```
230
231
## Flag Combinations and Usage Patterns
232
233
### Common Flag Combinations
234
235
```python
236
# Case-insensitive multiline matching
237
CASE_INSENSITIVE_MULTILINE = regex.IGNORECASE | regex.MULTILINE
238
239
# Full Unicode support with word boundaries
240
UNICODE_WORDS = regex.UNICODE | regex.WORD
241
242
# Enhanced fuzzy matching
243
FUZZY_BEST = regex.BESTMATCH | regex.ENHANCEMATCH
244
245
# Version 1 with full case folding
246
ENHANCED_CASE = regex.VERSION1 | regex.IGNORECASE
247
248
# Debug verbose mode
249
DEBUG_VERBOSE = regex.DEBUG | regex.VERBOSE
250
251
# Example usage
252
pattern = regex.compile(r'''
253
\b # Word boundary
254
(?e) # Enable fuzzy matching
255
(search){e<=2} # Allow up to 2 errors
256
\b # Word boundary
257
''', FUZZY_BEST | DEBUG_VERBOSE)
258
```
259
260
### Dynamic Flag Handling
261
262
```python
263
def build_pattern_flags(case_sensitive=True, multiline=False,
264
fuzzy=False, unicode_aware=True):
265
"""Build flags based on requirements."""
266
flags = 0
267
268
if not case_sensitive:
269
flags |= regex.IGNORECASE
270
flags |= regex.FULLCASE # Enhanced case folding
271
272
if multiline:
273
flags |= regex.MULTILINE
274
275
if fuzzy:
276
flags |= regex.BESTMATCH | regex.ENHANCEMATCH
277
278
if unicode_aware:
279
flags |= regex.UNICODE | regex.WORD
280
281
return flags
282
283
# Usage
284
flags = build_pattern_flags(case_sensitive=False, fuzzy=True)
285
pattern = regex.compile(r'(?e)(search){e<=1}', flags)
286
```
287
288
### Flag Testing and Introspection
289
290
```python
291
def analyze_pattern_flags(pattern):
292
"""Analyze flags used in a compiled pattern."""
293
flags = pattern.flags
294
295
flag_names = []
296
for flag_name in dir(regex):
297
if flag_name.isupper() and len(flag_name) <= 12: # Flag names
298
flag_value = getattr(regex, flag_name)
299
if isinstance(flag_value, int) and flags & flag_value:
300
flag_names.append(flag_name)
301
302
return {
303
'flags_value': flags,
304
'flags_hex': f'0x{flags:x}',
305
'active_flags': flag_names
306
}
307
308
# Example
309
pattern = regex.compile(r'test', regex.IGNORECASE | regex.MULTILINE)
310
info = analyze_pattern_flags(pattern)
311
print(info)
312
```
313
314
### Performance Considerations
315
316
```python
317
# Pre-define flag combinations for reuse
318
STANDARD_TEXT = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL
319
FUZZY_SEARCH = regex.BESTMATCH | regex.ENHANCEMATCH | regex.IGNORECASE
320
UNICODE_FULL = regex.UNICODE | regex.WORD | regex.FULLCASE
321
322
# Cache compiled patterns with flags
323
_pattern_cache = {}
324
325
def get_cached_pattern(pattern_str, flags):
326
cache_key = (pattern_str, flags)
327
if cache_key not in _pattern_cache:
328
_pattern_cache[cache_key] = regex.compile(pattern_str, flags)
329
return _pattern_cache[cache_key]
330
331
# Usage
332
email_pattern = get_cached_pattern(r'\b[\w.-]+@[\w.-]+\.\w+\b', STANDARD_TEXT)
333
```
334
335
### Advanced Flag Usage
336
337
```python
338
# Conditional flag application
339
def smart_search(pattern, text, **options):
340
flags = 0
341
342
# Apply flags based on text characteristics
343
if any(ord(c) > 127 for c in text): # Contains non-ASCII
344
flags |= regex.UNICODE | regex.WORD | regex.FULLCASE
345
346
if '\n' in text: # Multi-line text
347
flags |= regex.MULTILINE
348
349
if options.get('case_insensitive', True):
350
flags |= regex.IGNORECASE
351
352
if options.get('fuzzy', False):
353
flags |= regex.BESTMATCH
354
pattern = f'(?e)({pattern}){{e<={options.get("errors", 1)}}}'
355
356
return regex.search(pattern, text, flags)
357
358
# Example usage
359
result = smart_search('hello', 'Hello, мир!', case_insensitive=True, fuzzy=True)
360
```