or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classes-types.mdcompilation-utilities.mdflags-constants.mdindex.mdpattern-matching.mdsplitting.mdsubstitution.md

compilation-utilities.mddocs/

0

# Pattern Compilation and Utilities

1

2

Pattern compilation, caching control, template support, and string escaping utilities for preparing and managing regular expression patterns. These functions provide essential tools for optimizing pattern usage and preparing literal strings for pattern inclusion.

3

4

## Capabilities

5

6

### Pattern Compilation

7

8

Compile a regular expression pattern into a Pattern object for efficient reuse, with enhanced compilation options and caching control.

9

10

```python { .api }

11

def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs):

12

"""

13

Compile a regular expression pattern, returning a Pattern object.

14

15

Args:

16

pattern (str): Regular expression pattern to compile

17

flags (int, optional): Regex flags to modify pattern behavior

18

ignore_unused (bool, optional): Ignore unused keyword arguments

19

cache_pattern (bool, optional): Override default caching behavior

20

**kwargs: Additional compilation arguments (version, etc.)

21

22

Returns:

23

Pattern: Compiled pattern object with matching methods

24

"""

25

```

26

27

**Usage Examples:**

28

29

```python

30

import regex

31

32

# Basic pattern compilation

33

pattern = regex.compile(r'\b\w+@\w+\.\w+\b')

34

emails = pattern.findall('Contact: user@example.com or admin@site.org')

35

print(emails) # ['user@example.com', 'admin@site.org']

36

37

# Compile with flags

38

pattern = regex.compile(r'hello\s+world', regex.IGNORECASE | regex.VERBOSE)

39

result = pattern.search('HELLO WORLD')

40

print(result.group()) # 'HELLO WORLD'

41

42

# Reuse compiled pattern for efficiency

43

email_pattern = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')

44

for line in file_lines:

45

if email_pattern.search(line):

46

process_line_with_email(line)

47

48

# Compile with version specification

49

v1_pattern = regex.compile(r'(?V1)pattern', regex.IGNORECASE) # Enhanced mode

50

v0_pattern = regex.compile(r'(?V0)pattern', regex.IGNORECASE) # Legacy mode

51

52

# Fuzzy pattern compilation

53

fuzzy_pattern = regex.compile(r'(?e)(search){e<=2}', regex.BESTMATCH)

54

result = fuzzy_pattern.search('serch text searching')

55

print(result.group()) # Best fuzzy match

56

57

# Control pattern caching

58

pattern = regex.compile(r'\d+', cache_pattern=False) # Don't cache this pattern

59

```

60

61

### Template Pattern Compilation

62

63

Compile a regular expression template for use with substitution operations, providing a specialized pattern type for replacement templates.

64

65

```python { .api }

66

def template(pattern, flags=0):

67

"""

68

Compile a template pattern, returning a Pattern object.

69

70

Args:

71

pattern (str): Template pattern to compile

72

flags (int, optional): Regex flags to modify template behavior

73

74

Returns:

75

Pattern: Compiled template pattern object

76

"""

77

```

78

79

**Usage Examples:**

80

81

```python

82

import regex

83

84

# Basic template compilation

85

template_pattern = regex.template(r'\1-\2-\3')

86

result = regex.sub(r'(\d{4})(\d{2})(\d{2})', template_pattern, '20231225')

87

print(result) # '2023-12-25'

88

89

# Named group template

90

template_pattern = regex.template(r'\g<last>, \g<first>')

91

pattern = r'(?P<first>\w+) (?P<last>\w+)'

92

result = regex.sub(pattern, template_pattern, 'John Doe')

93

print(result) # 'Doe, John'

94

95

# Template with flags

96

template_pattern = regex.template(r'\1:\2', regex.IGNORECASE)

97

```

98

99

### String Escaping

100

101

Escape special regex characters in a string to use it as a literal pattern, with options for controlling which characters are escaped.

102

103

```python { .api }

104

def escape(pattern, special_only=True, literal_spaces=False):

105

"""

106

Escape a string for use as a literal in a pattern.

107

108

Args:

109

pattern (str): String to escape for literal use

110

special_only (bool, optional): Escape only special regex characters

111

literal_spaces (bool, optional): Treat spaces as literal (don't escape)

112

113

Returns:

114

str: Escaped string safe for use in regex patterns

115

"""

116

```

117

118

**Usage Examples:**

119

120

```python

121

import regex

122

123

# Basic string escaping

124

literal_text = "Price: $19.99 (special!)"

125

escaped = regex.escape(literal_text)

126

print(escaped) # 'Price:\\ \\$19\\.99\\ \\(special!\\)'

127

128

# Use escaped string in pattern

129

pattern = r'Item: ' + regex.escape("$19.99 (sale)")

130

result = regex.search(pattern, 'Item: $19.99 (sale) - Buy now!')

131

print(result.group()) # 'Item: $19.99 (sale)'

132

133

# Escape only special characters

134

text = "hello.world*test"

135

escaped = regex.escape(text, special_only=True)

136

print(escaped) # 'hello\\.world\\*test'

137

138

# Control space escaping

139

text = "hello world test"

140

escaped_with_spaces = regex.escape(text, literal_spaces=False)

141

escaped_literal_spaces = regex.escape(text, literal_spaces=True)

142

print(escaped_with_spaces) # 'hello\\ world\\ test'

143

print(escaped_literal_spaces) # 'hello world test'

144

145

# Build patterns with literals and regex parts

146

user_input = "user@domain.com"

147

pattern = r'\b' + regex.escape(user_input) + r'\b'

148

result = regex.search(pattern, 'Email: user@domain.com is valid')

149

print(result.group()) # 'user@domain.com'

150

```

151

152

### Pattern Cache Management

153

154

Control the internal pattern cache to optimize memory usage and performance for applications with many patterns.

155

156

```python { .api }

157

def purge():

158

"""Clear the regular expression cache."""

159

160

def cache_all(value=True):

161

"""

162

Set/get whether to cache all patterns, even those compiled explicitly.

163

164

Args:

165

value (bool or None): True to enable caching all, False to disable,

166

None to return current setting

167

168

Returns:

169

bool or None: Current caching setting when value is None

170

"""

171

```

172

173

**Usage Examples:**

174

175

```python

176

import regex

177

178

# Clear the pattern cache

179

regex.purge()

180

181

# Check current cache setting

182

current_setting = regex.cache_all(None)

183

print(f"Current cache setting: {current_setting}")

184

185

# Enable caching of all patterns

186

regex.cache_all(True)

187

188

# Disable caching of explicitly compiled patterns

189

regex.cache_all(False)

190

191

# Typical cache management workflow

192

def process_many_patterns(patterns, text):

193

# Clear cache before processing many patterns

194

regex.purge()

195

196

# Disable caching to prevent memory buildup

197

old_setting = regex.cache_all(None)

198

regex.cache_all(False)

199

200

try:

201

results = []

202

for pattern in patterns:

203

compiled = regex.compile(pattern)

204

results.append(compiled.findall(text))

205

return results

206

finally:

207

# Restore original cache setting

208

regex.cache_all(old_setting)

209

210

# Monitor cache usage in long-running applications

211

def periodic_cache_cleanup():

212

import gc

213

regex.purge() # Clear regex cache

214

gc.collect() # Run garbage collection

215

```

216

217

## Advanced Compilation Features

218

219

### Version-Specific Compilation

220

221

Control regex behavior version during compilation:

222

223

```python

224

# Version 0 (legacy re-compatible)

225

v0_pattern = regex.compile(r'(?V0)\w+', regex.IGNORECASE)

226

227

# Version 1 (enhanced behavior with full case-folding)

228

v1_pattern = regex.compile(r'(?V1)\w+', regex.IGNORECASE)

229

230

# Default version control

231

regex.DEFAULT_VERSION = regex.VERSION1 # Set global default

232

```

233

234

### Fuzzy Pattern Compilation

235

236

Compile patterns with fuzzy matching capabilities:

237

238

```python

239

# Basic fuzzy compilation

240

fuzzy = regex.compile(r'(?e)(hello){e<=2}') # Allow up to 2 errors

241

242

# Best match fuzzy compilation

243

best_fuzzy = regex.compile(r'(?be)(search){i<=1,d<=1,s<=2}', regex.BESTMATCH)

244

245

# Enhanced fuzzy matching

246

enhanced = regex.compile(r'(?ee)(pattern){e<=1}', regex.ENHANCEMATCH)

247

```

248

249

### Performance Optimization

250

251

```python

252

# Pre-compile frequently used patterns

253

EMAIL_PATTERN = regex.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')

254

PHONE_PATTERN = regex.compile(r'\b\d{3}-\d{3}-\d{4}\b')

255

DATE_PATTERN = regex.compile(r'\b\d{4}-\d{2}-\d{2}\b')

256

257

def extract_info(text):

258

emails = EMAIL_PATTERN.findall(text)

259

phones = PHONE_PATTERN.findall(text)

260

dates = DATE_PATTERN.findall(text)

261

return {'emails': emails, 'phones': phones, 'dates': dates}

262

263

# Cache control for dynamic patterns

264

def process_user_patterns(user_patterns, text):

265

# Disable caching for one-time patterns

266

regex.cache_all(False)

267

268

results = {}

269

for name, pattern in user_patterns.items():

270

try:

271

compiled = regex.compile(pattern)

272

results[name] = compiled.findall(text)

273

except regex.error as e:

274

results[name] = f"Error: {e}"

275

276

# Re-enable caching

277

regex.cache_all(True)

278

return results

279

```

280

281

### Error Handling and Validation

282

283

```python

284

def safe_compile(pattern_str, flags=0):

285

"""Safely compile a pattern with error handling."""

286

try:

287

return regex.compile(pattern_str, flags)

288

except regex.error as e:

289

print(f"Pattern compilation failed: {e}")

290

print(f"Pattern: {pattern_str}")

291

if hasattr(e, 'pos') and e.pos is not None:

292

print(f"Error at position {e.pos}")

293

return None

294

295

# Validate user input patterns

296

def validate_pattern(user_pattern):

297

escaped_input = regex.escape(user_pattern)

298

try:

299

test_pattern = regex.compile(escaped_input)

300

return True, f"Valid literal pattern: {escaped_input}"

301

except regex.error as e:

302

return False, f"Cannot create valid pattern: {e}"

303

304

# Test pattern against sample text

305

def test_pattern(pattern_str, test_text="test sample text 123"):

306

try:

307

pattern = regex.compile(pattern_str)

308

matches = pattern.findall(test_text)

309

return True, f"Pattern works. Found {len(matches)} matches: {matches}"

310

except regex.error as e:

311

return False, f"Pattern error: {e}"

312

```