Tessl Tile for pypi/regex@2025.9.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

classes-types.md compilation-utilities.md flags-constants.md index.md pattern-matching.md splitting.md substitution.md

classes-types.mddocs/

0
# Advanced Classes and Types
1

2
Pattern and Match objects providing compiled pattern functionality and match result access, plus Scanner for tokenization and RegexFlag enumeration for proper flag handling. These classes form the core object-oriented interface for advanced regex operations.
3

4
## Capabilities
5

6
### Pattern Class
7

8
Compiled regular expression pattern object that provides all matching methods with enhanced performance and additional functionality beyond module-level functions.
9

10
```python { .api }
11
class Pattern:
12
    """Compiled regular expression pattern object with matching methods."""
13
    
14
    def match(self, string, pos=None, endpos=None, concurrent=None, partial=False, timeout=None):
15
        """Try to apply pattern at start of string, returning Match object or None."""
16
    
17
    def fullmatch(self, string, pos=None, endpos=None, concurrent=None, partial=False, timeout=None):
18
        """Try to apply pattern against entire string, returning Match object or None."""
19
    
20
    def search(self, string, pos=None, endpos=None, concurrent=None, partial=False, timeout=None):
21
        """Search through string for pattern match, returning Match object or None."""
22
    
23
    def findall(self, string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None):
24
        """Return list of all matches in string."""
25
    
26
    def finditer(self, string, pos=None, endpos=None, overlapped=False, partial=False, concurrent=None, timeout=None):
27
        """Return iterator over all matches in string."""
28
    
29
    def sub(self, repl, string, count=0, pos=None, endpos=None, concurrent=None, timeout=None):
30
        """Replace pattern occurrences with replacement string."""
31
    
32
    def subf(self, format, string, count=0, pos=None, endpos=None, concurrent=None, timeout=None):
33
        """Replace pattern occurrences using format string."""
34
    
35
    def subn(self, repl, string, count=0, pos=None, endpos=None, concurrent=None, timeout=None):
36
        """Return (new_string, number_of_substitutions_made) tuple."""
37
    
38
    def subfn(self, format, string, count=0, pos=None, endpos=None, concurrent=None, timeout=None):
39
        """Return (formatted_string, number_of_substitutions_made) tuple."""
40
    
41
    def split(self, string, maxsplit=0, concurrent=None, timeout=None):
42
        """Split string by pattern occurrences, returning list of substrings."""
43
    
44
    def splititer(self, string, maxsplit=0, concurrent=None, timeout=None):
45
        """Return iterator yielding split string parts."""
46
    
47
    # Pattern properties
48
    pattern: str        # Original pattern string
49
    flags: int         # Compilation flags
50
    groups: int        # Number of capturing groups
51
    groupindex: dict   # Mapping of group names to numbers
52
```
53

54
**Usage Examples:**
55

56
```python
57
import regex
58

59
# Compile and use pattern object
60
email_pattern = regex.compile(r'\b([\w.-]+)@([\w.-]+\.\w+)\b')
61

62
# Use pattern methods
63
text = "Contact: john@example.com or admin@site.org"
64
matches = email_pattern.findall(text)
65
print(matches)  # [('john', 'example.com'), ('admin', 'site.org')]
66

67
# Pattern properties
68
print(f"Pattern: {email_pattern.pattern}")
69
print(f"Groups: {email_pattern.groups}")
70
print(f"Flags: {email_pattern.flags}")
71

72
# Multiple operations on same pattern
73
def analyze_email_text(text, pattern):
74
    # Count emails
75
    all_emails = pattern.findall(text)
76
    
77
    # Find first email
78
    first_match = pattern.search(text)
79
    
80
    # Replace emails with placeholder
81
    anonymized = pattern.sub('[EMAIL]', text)
82
    
83
    return {
84
        'count': len(all_emails),
85
        'first': first_match.group() if first_match else None,
86
        'anonymized': anonymized
87
    }
88

89
# Advanced pattern usage with concurrent execution
90
large_text = open('large_file.txt').read()
91
results = email_pattern.findall(large_text, concurrent=True)
92

93
# Pattern with timeout
94
try:
95
    complex_pattern = regex.compile(r'(a+)+b')
96
    result = complex_pattern.search('a' * 30, timeout=1.0)
97
except regex.error as e:
98
    print(f"Pattern timed out: {e}")
99
```
100

101
### Match Class
102

103
Match object containing information about a successful pattern match, providing access to matched text, groups, and position information.
104

105
```python { .api }
106
class Match:
107
    """Match object containing match information and results."""
108
    
109
    def group(self, *groups):
110
        """Return one or more subgroups of the match."""
111
    
112
    def groups(self, default=None):
113
        """Return tuple of all subgroups of the match."""
114
    
115
    def groupdict(self, default=None):
116
        """Return dictionary of all named subgroups."""
117
    
118
    def start(self, group=0):
119
        """Return start position of substring matched by group."""
120
    
121
    def end(self, group=0):
122
        """Return end position of substring matched by group."""
123
    
124
    def span(self, group=0):
125
        """Return (start, end) positions of substring matched by group."""
126
    
127
    def expand(self, template):
128
        """Return string obtained by template substitution."""
129
    
130
    def expandf(self, format):
131
        """Return string obtained by format substitution."""
132
    
133
    # Match properties
134
    string: str        # String passed to match function
135
    pos: int          # Start position for search
136
    endpos: int       # End position for search
137
    lastindex: int    # Index of last matched capturing group
138
    lastgroup: str    # Name of last matched capturing group
139
    re: Pattern       # Pattern object that produced this match
140
```
141

142
**Usage Examples:**
143

144
```python
145
import regex
146

147
# Basic match operations
148
pattern = regex.compile(r'(\w+)@(\w+\.\w+)')
149
match = pattern.search('Email: john@example.com is valid')
150

151
if match:
152
    print(f"Full match: {match.group()}")      # 'john@example.com'
153
    print(f"Username: {match.group(1)}")       # 'john'
154
    print(f"Domain: {match.group(2)}")         # 'example.com'
155
    print(f"All groups: {match.groups()}")     # ('john', 'example.com')
156
    print(f"Match span: {match.span()}")       # (7, 21)
157

158
# Named groups
159
pattern = regex.compile(r'(?P<user>\w+)@(?P<domain>\w+\.\w+)')
160
match = pattern.search('Contact: admin@site.org')
161

162
if match:
163
    print(f"User: {match.group('user')}")           # 'admin'
164
    print(f"Domain: {match.group('domain')}")       # 'site.org'
165
    print(f"Group dict: {match.groupdict()}")       # {'user': 'admin', 'domain': 'site.org'}
166

167
# Multiple group access
168
match = regex.search(r'(\d{4})-(\d{2})-(\d{2})', 'Date: 2023-12-25')
169
if match:
170
    year, month, day = match.groups()
171
    print(f"Date parts: {year}, {month}, {day}")    # '2023', '12', '25'
172
    
173
    # Individual positions
174
    print(f"Year at: {match.span(1)}")              # (6, 10)
175
    print(f"Month at: {match.span(2)}")             # (11, 13)
176
    print(f"Day at: {match.span(3)}")               # (14, 16)
177

178
# Template expansion
179
match = regex.search(r'(\w+)\s+(\w+)', 'John Doe')
180
if match:
181
    # Traditional template
182
    formatted = match.expand(r'\2, \1')
183
    print(formatted)  # 'Doe, John'
184
    
185
    # Format-style template
186
    formatted = match.expandf('{1}, {0}')
187
    print(formatted)  # 'Doe, John'
188

189
# Match object properties
190
print(f"Original string: {match.string}")
191
print(f"Search bounds: {match.pos}-{match.endpos}")
192
print(f"Last group index: {match.lastindex}")
193
print(f"Pattern object: {match.re}")
194
```
195

196
### Scanner Class
197

198
Tokenizing scanner that processes strings using a list of pattern-action pairs, providing a powerful tool for lexical analysis and text processing.
199

200
```python { .api }
201
class Scanner:
202
    """Scanner for tokenizing strings using pattern-action pairs."""
203
    
204
    def __init__(self, lexicon, flags=0):
205
        """
206
        Initialize scanner with lexicon of pattern-action pairs.
207
        
208
        Args:
209
            lexicon (list): List of (pattern, action) tuples
210
            flags (int, optional): Regex flags for all patterns
211
        """
212
    
213
    def scan(self, string):
214
        """
215
        Scan string and return list of action results.
216
        
217
        Args:
218
            string (str): String to scan
219
            
220
        Returns:
221
            tuple: (results_list, remaining_string)
222
        """
223
```
224

225
**Usage Examples:**
226

227
```python
228
import regex
229

230
# Basic tokenizer
231
def make_number(scanner, token):
232
    return ('NUMBER', int(token))
233

234
def make_word(scanner, token):
235
    return ('WORD', token)
236

237
def make_operator(scanner, token):
238
    return ('OP', token)
239

240
# Define lexicon (pattern, action) pairs
241
lexicon = [
242
    (r'\d+', make_number),
243
    (r'\w+', make_word),
244
    (r'[+\-*/]', make_operator),
245
    (r'\s+', None),  # Skip whitespace
246
]
247

248
scanner = regex.Scanner(lexicon)
249
tokens, remainder = scanner.scan('age + 25 * factor')
250
print(tokens)  # [('WORD', 'age'), ('OP', '+'), ('NUMBER', 25), ('OP', '*'), ('WORD', 'factor')]
251
print(f"Remainder: '{remainder}'")  # Should be empty
252

253
# Advanced tokenizer with state
254
class StatefulScanner:
255
    def __init__(self):
256
        self.in_string = False
257
        
258
    def string_start(self, scanner, token):
259
        self.in_string = True
260
        return ('STRING_START', token)
261
        
262
    def string_content(self, scanner, token):
263
        return ('STRING_CONTENT', token)
264
        
265
    def string_end(self, scanner, token):
266
        self.in_string = False
267
        return ('STRING_END', token)
268

269
# HTML/XML tokenizer
270
def make_tag_open(scanner, token):
271
    return ('TAG_OPEN', token)
272

273
def make_tag_close(scanner, token):
274
    return ('TAG_CLOSE', token)
275

276
def make_text(scanner, token):
277
    return ('TEXT', token.strip())
278

279
html_lexicon = [
280
    (r'<(/?\w+)[^>]*>', make_tag_open),
281
    (r'[^<]+', make_text),
282
]
283

284
html_scanner = regex.Scanner(html_lexicon)
285
tokens, remainder = html_scanner.scan('<div>Hello <span>world</span></div>')
286
print(tokens)
287

288
# Programming language tokenizer
289
def tokenize_code(code):
290
    lexicon = [
291
        (r'#.*$', lambda s, t: ('COMMENT', t)),          # Comments
292
        (r'\b(if|else|while|for|def|class)\b', lambda s, t: ('KEYWORD', t)),  # Keywords
293
        (r'\b[a-zA-Z_]\w*\b', lambda s, t: ('IDENTIFIER', t)),  # Identifiers
294
        (r'\b\d+\.\d+\b', lambda s, t: ('FLOAT', float(t))),    # Float numbers
295
        (r'\b\d+\b', lambda s, t: ('INTEGER', int(t))),         # Integers
296
        (r'[+\-*/=<>!]+', lambda s, t: ('OPERATOR', t)),        # Operators
297
        (r'[(){}[\];,.]', lambda s, t: ('DELIMITER', t)),       # Delimiters
298
        (r'"[^"]*"', lambda s, t: ('STRING', t[1:-1])),         # String literals
299
        (r'\s+', None),  # Skip whitespace
300
    ]
301
    
302
    scanner = regex.Scanner(lexicon, regex.MULTILINE)
303
    tokens, remainder = scanner.scan(code)
304
    
305
    if remainder:
306
        print(f"Warning: Could not tokenize: '{remainder}'")
307
    
308
    return tokens
309

310
# Example usage
311
code = '''
312
def hello(name):
313
    # Print greeting
314
    print("Hello, " + name)
315
    return 42
316
'''
317

318
tokens = tokenize_code(code)
319
for token in tokens:
320
    print(token)
321
```
322

323
### RegexFlag Enumeration
324

325
Enumeration of regex flags with proper flag combination support, providing a type-safe way to work with regex flags.
326

327
```python { .api }
328
class RegexFlag(enum.IntFlag):
329
    """Enumeration of regex flags with proper combination support."""
330
    
331
    # Standard flags
332
    ASCII = A = 0x80
333
    IGNORECASE = I = 0x2
334
    LOCALE = L = 0x4
335
    MULTILINE = M = 0x8
336
    DOTALL = S = 0x10
337
    VERBOSE = X = 0x40
338
    UNICODE = U = 0x20
339
    
340
    # Enhanced flags
341
    BESTMATCH = B = 0x1000
342
    DEBUG = D = 0x200
343
    ENHANCEMATCH = E = 0x8000
344
    FULLCASE = F = 0x4000
345
    POSIX = P = 0x10000
346
    REVERSE = R = 0x400
347
    TEMPLATE = T = 0x1
348
    WORD = W = 0x800
349
    
350
    # Version flags
351
    VERSION0 = V0 = 0x2000
352
    VERSION1 = V1 = 0x100
353
```
354

355
**Usage Examples:**
356

357
```python
358
import regex
359
from regex import RegexFlag
360

361
# Using flag enumeration
362
flags = RegexFlag.IGNORECASE | RegexFlag.MULTILINE
363
pattern = regex.compile(r'^hello.*world$', flags)
364

365
# Check flag combinations
366
combined_flags = RegexFlag.IGNORECASE | RegexFlag.DOTALL | RegexFlag.VERBOSE
367
print(f"Combined flags value: {combined_flags}")
368

369
# Test flag presence
370
if RegexFlag.IGNORECASE in combined_flags:
371
    print("Case-insensitive matching enabled")
372

373
# Enhanced flags
374
fuzzy_flags = RegexFlag.BESTMATCH | RegexFlag.ENHANCEMATCH
375
pattern = regex.compile(r'(?e)(search){e<=2}', fuzzy_flags)
376

377
# Version-specific flags
378
v1_flags = RegexFlag.VERSION1 | RegexFlag.IGNORECASE | RegexFlag.FULLCASE
379
pattern = regex.compile(r'unicode', v1_flags)
380

381
# All flag names and values
382
print("Available flags:")
383
for flag in RegexFlag:
384
    print(f"{flag.name}: {flag.value} (0x{flag.value:x})")
385
```
386

387
## Advanced Usage Patterns
388

389
### Pattern Object Reuse
390

391
```python
392
# Efficient pattern reuse
393
class TextProcessor:
394
    def __init__(self):
395
        # Pre-compile frequently used patterns
396
        self.email_pattern = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
397
        self.phone_pattern = regex.compile(r'\b\d{3}-\d{3}-\d{4}\b')
398
        self.url_pattern = regex.compile(r'https?://[^\s]+')
399
    
400
    def extract_contacts(self, text):
401
        return {
402
            'emails': self.email_pattern.findall(text),
403
            'phones': self.phone_pattern.findall(text),
404
            'urls': self.url_pattern.findall(text)
405
        }
406
```
407

408
### Match Object Chaining
409

410
```python
411
def process_structured_data(text):
412
    # Chain match operations
413
    date_pattern = regex.compile(r'(\d{4})-(\d{2})-(\d{2})')
414
    
415
    results = []
416
    for match in date_pattern.finditer(text):
417
        # Extract date components
418
        year, month, day = match.groups()
419
        
420
        # Use match position to get context
421
        start, end = match.span()
422
        context_start = max(0, start - 20)
423
        context_end = min(len(text), end + 20)
424
        context = text[context_start:context_end]
425
        
426
        results.append({
427
            'date': f"{year}-{month}-{day}",
428
            'position': (start, end),
429
            'context': context.strip()
430
        })
431
    
432
    return results
433
```
434

435
### Scanner State Management
436

437
```python
438
class AdvancedScanner:
439
    def __init__(self):
440
        self.context_stack = []
441
        self.current_context = 'normal'
442
        
443
    def enter_context(self, scanner, token):
444
        self.context_stack.append(self.current_context)
445
        self.current_context = 'special'
446
        return ('CONTEXT_ENTER', token)
447
        
448
    def exit_context(self, scanner, token):
449
        if self.context_stack:
450
            self.current_context = self.context_stack.pop()
451
        return ('CONTEXT_EXIT', token)
452
        
453
    def process_token(self, scanner, token):
454
        return (f'{self.current_context.upper()}_TOKEN', token)
455
```

Version

Tile

Files

classes-types.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

classes-types.mddocs/