or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdcore-matching.mdindex.mdoptions-configuration.mdpattern-compilation.mdtext-processing.md

pattern-compilation.mddocs/

0

# Pattern Compilation

1

2

Pre-compilation of regular expressions for improved performance when patterns are used repeatedly. Compiled patterns provide access to advanced features, optimization options, and detailed pattern information.

3

4

## Capabilities

5

6

### Pattern Compilation

7

8

Compiles a regular expression pattern into a reusable pattern object with optional configuration.

9

10

```python { .api }

11

def compile(pattern, options=None):

12

"""

13

Compile regular expression pattern.

14

15

Args:

16

pattern (str or _Regexp): Pattern string or existing compiled pattern

17

options (Options, optional): Compilation options

18

19

Returns:

20

_Regexp: Compiled pattern object

21

22

Raises:

23

error: If pattern compilation fails

24

"""

25

```

26

27

Example usage:

28

29

```python

30

import re2

31

32

# Compile pattern for reuse

33

email_pattern = re2.compile(r'(\w+)@(\w+\.\w+)')

34

35

# Use compiled pattern multiple times (more efficient)

36

texts = [

37

"Contact alice@example.com",

38

"Email bob@test.org for details",

39

"No email in this text"

40

]

41

42

for text in texts:

43

match = email_pattern.search(text)

44

if match:

45

username, domain = match.groups()

46

print(f"Found: {username} at {domain}")

47

48

# Compile with options

49

options = re2.Options()

50

options.case_sensitive = False

51

pattern = re2.compile(r'HELLO', options)

52

match = pattern.search("hello world") # Matches due to case insensitivity

53

```

54

55

## Compiled Pattern Object

56

57

```python { .api }

58

class _Regexp:

59

"""Compiled regular expression pattern object."""

60

61

def search(self, text, pos=None, endpos=None):

62

"""

63

Search for pattern in text.

64

65

Args:

66

text (str): Text to search

67

pos (int, optional): Start position for search

68

endpos (int, optional): End position for search

69

70

Returns:

71

_Match or None: Match object if found

72

"""

73

74

def match(self, text, pos=None, endpos=None):

75

"""

76

Match pattern at beginning of text.

77

78

Args:

79

text (str): Text to match

80

pos (int, optional): Start position for match

81

endpos (int, optional): End position for match

82

83

Returns:

84

_Match or None: Match object if matched

85

"""

86

87

def fullmatch(self, text, pos=None, endpos=None):

88

"""

89

Match pattern against entire text.

90

91

Args:

92

text (str): Text to match

93

pos (int, optional): Start position for match

94

endpos (int, optional): End position for match

95

96

Returns:

97

_Match or None: Match object if matched

98

"""

99

100

def findall(self, text, pos=None, endpos=None):

101

"""

102

Find all matches in text.

103

104

Args:

105

text (str): Text to search

106

pos (int, optional): Start position for search

107

endpos (int, optional): End position for search

108

109

Returns:

110

list: List of matched strings or group tuples

111

"""

112

113

def finditer(self, text, pos=None, endpos=None):

114

"""

115

Return iterator of match objects.

116

117

Args:

118

text (str): Text to search

119

pos (int, optional): Start position for search

120

endpos (int, optional): End position for search

121

122

Returns:

123

iterator: Iterator of _Match objects

124

"""

125

126

def split(self, text, maxsplit=0):

127

"""

128

Split text using pattern as delimiter.

129

130

Args:

131

text (str): Text to split

132

maxsplit (int): Maximum splits (0 = unlimited)

133

134

Returns:

135

list: Split text segments

136

"""

137

138

def sub(self, repl, text, count=0):

139

"""

140

Replace matches with replacement.

141

142

Args:

143

repl (str or callable): Replacement string or function

144

text (str): Text to process

145

count (int): Maximum replacements (0 = all)

146

147

Returns:

148

str: Text with replacements

149

"""

150

151

def subn(self, repl, text, count=0):

152

"""

153

Replace matches and return count.

154

155

Args:

156

repl (str or callable): Replacement string or function

157

text (str): Text to process

158

count (int): Maximum replacements (0 = all)

159

160

Returns:

161

tuple: (result_text, substitution_count)

162

"""

163

164

def possiblematchrange(self, maxlen):

165

"""

166

Compute possible match range for optimization.

167

168

Args:

169

maxlen (int): Maximum string length to consider

170

171

Returns:

172

tuple: (min_string, max_string) for possible matches

173

"""

174

175

# Properties

176

pattern: str # Original pattern string

177

options: Options # Compilation options used

178

groups: int # Number of capturing groups

179

groupindex: dict # Named group indices mapping

180

programsize: int # Compiled program size (complexity measure)

181

reverseprogramsize: int # Reverse program size

182

programfanout: list # Program fanout histogram

183

reverseprogramfanout: list # Reverse program fanout histogram

184

```

185

186

Example usage with compiled patterns:

187

188

```python

189

import re2

190

191

# Compile pattern with all features

192

pattern = re2.compile(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})')

193

194

# Pattern information

195

print(f"Groups: {pattern.groups}") # 3

196

print(f"Named groups: {pattern.groupindex}") # {'year': 1, 'month': 2, 'day': 3}

197

print(f"Program size: {pattern.programsize}") # Complexity measure

198

199

# Use with position control

200

text = "Dates: 2023-01-15 and 2023-12-31"

201

match = pattern.search(text, pos=10) # Search starting from position 10

202

if match:

203

print(match.groupdict()) # {'year': '2023', 'month': '12', 'day': '31'}

204

205

# Performance optimization info

206

min_str, max_str = pattern.possiblematchrange(20)

207

print(f"Possible matches range from '{min_str}' to '{max_str}'")

208

```

209

210

### Pattern Creation from Existing Pattern

211

212

```python

213

import re2

214

215

# Create pattern from existing pattern (returns same object if options match)

216

original = re2.compile(r'\d+')

217

duplicate = re2.compile(original) # Returns original if no options specified

218

219

# Create with different options (creates new pattern)

220

options = re2.Options()

221

options.case_sensitive = False

222

new_pattern = re2.compile(original, options) # Creates new pattern object

223

```

224

225

### Error Handling

226

227

```python

228

import re2

229

230

try:

231

# Invalid pattern

232

pattern = re2.compile(r'[invalid')

233

except re2.error as e:

234

print(f"Compilation failed: {e}")

235

236

# Check pattern validity before use

237

def safe_compile(pattern_str):

238

try:

239

return re2.compile(pattern_str)

240

except re2.error:

241

return None

242

243

pattern = safe_compile(r'(?P<name>\w+)')

244

if pattern:

245

# Use pattern safely

246

match = pattern.search("hello world")

247

```

248

249

## Performance Benefits

250

251

Compiled patterns provide significant performance benefits when used repeatedly:

252

253

```python

254

import re2

255

import time

256

257

text = "The quick brown fox jumps over the lazy dog" * 1000

258

pattern_str = r'\b\w{5}\b'

259

260

# Method 1: Recompile each time (slower)

261

start = time.time()

262

for _ in range(1000):

263

matches = re2.findall(pattern_str, text)

264

slow_time = time.time() - start

265

266

# Method 2: Compile once, reuse (faster)

267

compiled_pattern = re2.compile(pattern_str)

268

start = time.time()

269

for _ in range(1000):

270

matches = compiled_pattern.findall(text)

271

fast_time = time.time() - start

272

273

print(f"Speedup: {slow_time / fast_time:.2f}x")

274

```