Tessl Tile for pypi/regex@2025.9.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

classes-types.md compilation-utilities.md flags-constants.md index.md pattern-matching.md splitting.md substitution.md

flags-constants.mddocs/

0
# Flags and Constants
1

2
Comprehensive flag system including standard regex flags, enhanced flags for fuzzy matching and Unicode handling, version control flags, and global constants for controlling library behavior. These flags provide fine-grained control over pattern matching behavior and enable advanced regex features.
3

4
## Capabilities
5

6
### Standard Regular Expression Flags
7

8
Traditional regex flags that control basic matching behavior, compatible with Python's standard `re` module while providing enhanced functionality.
9

10
```python { .api }
11
# Case and Character Class Flags
12
ASCII = A = 0x80              # ASCII-only character class matching
13
IGNORECASE = I = 0x2          # Case-insensitive matching  
14
LOCALE = L = 0x4              # Locale-dependent character classes
15
UNICODE = U = 0x20            # Unicode-dependent character classes
16

17
# Pattern Behavior Flags
18
MULTILINE = M = 0x8           # Multi-line mode for ^ and $
19
DOTALL = S = 0x10             # Make . match any character including newline
20
VERBOSE = X = 0x40            # Verbose mode allowing comments and whitespace
21
TEMPLATE = T = 0x1            # Template mode (compatibility with re module)
22
```
23

24
**Usage Examples:**
25

26
```python
27
import regex
28

29
# Case-insensitive matching
30
result = regex.search(r'hello', 'HELLO WORLD', regex.IGNORECASE)
31
print(result.group())  # 'HELLO'
32

33
# Multi-line mode - ^ and $ match line boundaries
34
text = 'line1\nline2\nline3'
35
matches = regex.findall(r'^line\d$', text, regex.MULTILINE)
36
print(matches)  # ['line1', 'line2', 'line3']
37

38
# Dot matches newlines
39
result = regex.search(r'start.*end', 'start\nmiddle\nend', regex.DOTALL)
40
print(result.group())  # 'start\nmiddle\nend'
41

42
# Verbose mode with comments
43
pattern = regex.compile(r'''
44
    \b                    # Word boundary
45
    (\w+)                 # Username (group 1)
46
    @                     # Literal @
47
    ([\w.-]+)             # Domain name (group 2)
48
    \.                    # Literal dot
49
    (\w+)                 # TLD (group 3)
50
    \b                    # Word boundary
51
''', regex.VERBOSE)
52

53
# Combining flags
54
combined = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL
55
result = regex.search(r'^hello.*world$', 'HELLO\nBEAUTIFUL\nWORLD', combined)
56

57
# ASCII vs Unicode character classes
58
text = 'café naïve résumé'
59
# Unicode mode (default for str patterns)
60
unicode_words = regex.findall(r'\w+', text, regex.UNICODE)
61
print(unicode_words)  # ['café', 'naïve', 'résumé']
62

63
# ASCII mode
64
ascii_words = regex.findall(r'\w+', text, regex.ASCII)
65
print(ascii_words)  # ['caf', 'na', 've', 'r', 'sum']
66
```
67

68
### Enhanced Regular Expression Flags
69

70
Advanced flags unique to the regex module that enable fuzzy matching, improved Unicode support, and specialized matching behaviors.
71

72
```python { .api }
73
# Fuzzy Matching Flags
74
BESTMATCH = B = 0x1000        # Find best fuzzy match instead of first
75
ENHANCEMATCH = E = 0x8000     # Improve fuzzy match fit after finding first
76

77
# Unicode Enhancement Flags  
78
FULLCASE = F = 0x4000         # Full case-folding for Unicode case-insensitive matching
79
WORD = W = 0x800              # Unicode word boundaries and line breaks
80

81
# Matching Behavior Flags
82
POSIX = P = 0x10000           # POSIX-standard leftmost longest matching
83
REVERSE = R = 0x400           # Search backwards through string
84
DEBUG = D = 0x200             # Print parsed pattern for debugging
85
```
86

87
**Usage Examples:**
88

89
```python
90
import regex
91

92
# Fuzzy matching with best match
93
pattern = r'(?b)(python){e<=2}'  # Allow up to 2 errors, find best match
94
text = 'pyton pythom python pyth'
95
result = regex.search(pattern, text, regex.BESTMATCH)
96
print(result.group())  # 'python' (exact match is best)
97

98
# Enhanced fuzzy matching  
99
pattern = r'(?e)(search){e<=1}'
100
result = regex.search(pattern, 'serch found', regex.ENHANCEMATCH)
101
print(result.group())  # 'serch' with improved fit
102

103
# Full case-folding for Unicode
104
pattern = r'STRASSE'
105
text = 'Hauptstraße in München'  # German ß should match SS
106
result = regex.search(pattern, text, regex.IGNORECASE | regex.FULLCASE)
107
print(result.group())  # 'straße'
108

109
# Word boundaries with Unicode
110
text = 'hello мир world'
111
words = regex.findall(r'\b\w+\b', text, regex.WORD)
112
print(words)  # ['hello', 'мир', 'world'] - properly handles Unicode
113

114
# POSIX leftmost-longest matching
115
pattern = r'a|ab'
116
text = 'ab'
117
# Normal (first match)
118
result1 = regex.search(pattern, text)
119
print(result1.group())  # 'a'
120

121
# POSIX (longest match)
122
result2 = regex.search(pattern, text, regex.POSIX)
123
print(result2.group())  # 'ab'
124

125
# Reverse searching
126
text = 'first second third'
127
result = regex.search(r'\w+', text, regex.REVERSE)
128
print(result.group())  # 'third' (last word when searching backwards)
129

130
# Debug mode - prints parsed pattern
131
pattern = regex.compile(r'(a+)(b+)', regex.DEBUG)
132
# Prints internal pattern structure to stdout
133
```
134

135
### Version Control Flags
136

137
Flags that control regex behavior version, allowing choice between legacy re-compatible behavior and enhanced regex features.
138

139
```python { .api }
140
# Version Control Flags
141
VERSION0 = V0 = 0x2000        # Legacy re-compatible behavior
142
VERSION1 = V1 = 0x100         # Enhanced behavior mode (includes FULLCASE)
143

144
# Global Version Setting
145
DEFAULT_VERSION               # Current default version setting (VERSION0)
146
```
147

148
**Usage Examples:**
149

150
```python
151
import regex
152

153
# Version 0 (legacy re-compatible behavior)
154
pattern_v0 = regex.compile(r'(?V0)\w+', regex.IGNORECASE)
155

156
# Version 1 (enhanced behavior with full case-folding)
157
pattern_v1 = regex.compile(r'(?V1)\w+', regex.IGNORECASE)
158

159
# Compare behavior with Unicode case-folding
160
text = 'Straße'  # German word with ß
161

162
# Version 0 - basic case folding
163
result_v0 = regex.search(r'(?V0)STRASSE', text, regex.IGNORECASE)
164
print(f"V0 result: {result_v0}")  # May not match
165

166
# Version 1 - full case folding (automatic with IGNORECASE)
167
result_v1 = regex.search(r'(?V1)STRASSE', text, regex.IGNORECASE)
168
print(f"V1 result: {result_v1.group() if result_v1 else None}")  # 'Straße'
169

170
# Global default version setting
171
print(f"Current default: {regex.DEFAULT_VERSION}")
172

173
# Set global default (affects patterns without explicit version)
174
# regex.DEFAULT_VERSION = regex.VERSION1  # Would change global default
175

176
# Inline version specification in patterns
177
pattern = r'(?V1)case insensitive with full folding'
178
result = regex.search(pattern, 'CASE INSENSITIVE', regex.IGNORECASE)
179

180
# Mixed version usage
181
def compare_versions(pattern_str, text, flags=0):
182
    v0_result = regex.search(f'(?V0){pattern_str}', text, flags)
183
    v1_result = regex.search(f'(?V1){pattern_str}', text, flags)
184
    
185
    return {
186
        'v0': v0_result.group() if v0_result else None,
187
        'v1': v1_result.group() if v1_result else None
188
    }
189
```
190

191
### Module Constants and Metadata
192

193
Global constants and version information for the regex module.
194

195
```python { .api }
196
# Module Information
197
__version__ = "2.5.161"       # Module version string
198
__doc__                       # Module documentation string
199

200
# Function Aliases
201
Regex                         # Alias for compile function (for pattern repr)
202

203
# Exception Class
204
error                         # Exception class for regex errors
205
```
206

207
**Usage Examples:**
208

209
```python
210
import regex
211

212
# Check module version
213
print(f"regex module version: {regex.__version__}")
214

215
# Read module documentation
216
print(f"Module doc length: {len(regex.__doc__)} characters")
217

218
# Using Regex alias (mainly for internal use)
219
pattern = regex.Regex(r'\d+')  # Same as regex.compile(r'\d+')
220

221
# Exception handling
222
try:
223
    bad_pattern = regex.compile(r'[')  # Invalid pattern
224
except regex.error as e:
225
    print(f"Regex error: {e}")
226
    print(f"Error message: {e.msg}")
227
    if hasattr(e, 'pos'):
228
        print(f"Error position: {e.pos}")
229
```
230

231
## Flag Combinations and Usage Patterns
232

233
### Common Flag Combinations
234

235
```python
236
# Case-insensitive multiline matching
237
CASE_INSENSITIVE_MULTILINE = regex.IGNORECASE | regex.MULTILINE
238

239
# Full Unicode support with word boundaries
240
UNICODE_WORDS = regex.UNICODE | regex.WORD
241

242
# Enhanced fuzzy matching
243
FUZZY_BEST = regex.BESTMATCH | regex.ENHANCEMATCH
244

245
# Version 1 with full case folding
246
ENHANCED_CASE = regex.VERSION1 | regex.IGNORECASE
247

248
# Debug verbose mode
249
DEBUG_VERBOSE = regex.DEBUG | regex.VERBOSE
250

251
# Example usage
252
pattern = regex.compile(r'''
253
    \b                  # Word boundary
254
    (?e)                # Enable fuzzy matching
255
    (search){e<=2}      # Allow up to 2 errors
256
    \b                  # Word boundary
257
''', FUZZY_BEST | DEBUG_VERBOSE)
258
```
259

260
### Dynamic Flag Handling
261

262
```python
263
def build_pattern_flags(case_sensitive=True, multiline=False, 
264
                       fuzzy=False, unicode_aware=True):
265
    """Build flags based on requirements."""
266
    flags = 0
267
    
268
    if not case_sensitive:
269
        flags |= regex.IGNORECASE
270
        flags |= regex.FULLCASE  # Enhanced case folding
271
    
272
    if multiline:
273
        flags |= regex.MULTILINE
274
    
275
    if fuzzy:
276
        flags |= regex.BESTMATCH | regex.ENHANCEMATCH
277
    
278
    if unicode_aware:
279
        flags |= regex.UNICODE | regex.WORD
280
    
281
    return flags
282

283
# Usage
284
flags = build_pattern_flags(case_sensitive=False, fuzzy=True)
285
pattern = regex.compile(r'(?e)(search){e<=1}', flags)
286
```
287

288
### Flag Testing and Introspection
289

290
```python
291
def analyze_pattern_flags(pattern):
292
    """Analyze flags used in a compiled pattern."""
293
    flags = pattern.flags
294
    
295
    flag_names = []
296
    for flag_name in dir(regex):
297
        if flag_name.isupper() and len(flag_name) <= 12:  # Flag names
298
            flag_value = getattr(regex, flag_name)
299
            if isinstance(flag_value, int) and flags & flag_value:
300
                flag_names.append(flag_name)
301
    
302
    return {
303
        'flags_value': flags,
304
        'flags_hex': f'0x{flags:x}',
305
        'active_flags': flag_names
306
    }
307

308
# Example
309
pattern = regex.compile(r'test', regex.IGNORECASE | regex.MULTILINE)
310
info = analyze_pattern_flags(pattern)
311
print(info)
312
```
313

314
### Performance Considerations
315

316
```python
317
# Pre-define flag combinations for reuse
318
STANDARD_TEXT = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL
319
FUZZY_SEARCH = regex.BESTMATCH | regex.ENHANCEMATCH | regex.IGNORECASE
320
UNICODE_FULL = regex.UNICODE | regex.WORD | regex.FULLCASE
321

322
# Cache compiled patterns with flags
323
_pattern_cache = {}
324

325
def get_cached_pattern(pattern_str, flags):
326
    cache_key = (pattern_str, flags)
327
    if cache_key not in _pattern_cache:
328
        _pattern_cache[cache_key] = regex.compile(pattern_str, flags)
329
    return _pattern_cache[cache_key]
330

331
# Usage
332
email_pattern = get_cached_pattern(r'\b[\w.-]+@[\w.-]+\.\w+\b', STANDARD_TEXT)
333
```
334

335
### Advanced Flag Usage
336

337
```python
338
# Conditional flag application
339
def smart_search(pattern, text, **options):
340
    flags = 0
341
    
342
    # Apply flags based on text characteristics
343
    if any(ord(c) > 127 for c in text):  # Contains non-ASCII
344
        flags |= regex.UNICODE | regex.WORD | regex.FULLCASE
345
    
346
    if '\n' in text:  # Multi-line text
347
        flags |= regex.MULTILINE
348
    
349
    if options.get('case_insensitive', True):
350
        flags |= regex.IGNORECASE
351
    
352
    if options.get('fuzzy', False):
353
        flags |= regex.BESTMATCH
354
        pattern = f'(?e)({pattern}){{e<={options.get("errors", 1)}}}'
355
    
356
    return regex.search(pattern, text, flags)
357

358
# Example usage
359
result = smart_search('hello', 'Hello, мир!', case_insensitive=True, fuzzy=True)
360
```

Version

Tile

Files

flags-constants.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

flags-constants.mddocs/