or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdcore-matching.mdindex.mdoptions-configuration.mdpattern-compilation.mdtext-processing.md

options-configuration.mddocs/

0

# Options and Configuration

1

2

Configuration options that control how RE2 processes regular expressions, including encoding, syntax modes, memory limits, and performance tuning. These options provide fine-grained control over pattern compilation and matching behavior.

3

4

## Capabilities

5

6

### Options Class

7

8

Main configuration class for controlling RE2 behavior during pattern compilation and matching.

9

10

```python { .api }

11

class Options:

12

"""Configuration options for RE2 compilation and matching."""

13

14

def __init__(self):

15

"""Create Options object with default values."""

16

17

# Memory and Performance Options

18

max_mem: int = 8388608 # Maximum memory usage (8MiB default)

19

20

# Text Encoding Options

21

encoding: Options.Encoding = Options.Encoding.UTF8 # Text encoding

22

23

# Syntax and Matching Mode Options

24

posix_syntax: bool = False # Use POSIX syntax instead of Perl

25

longest_match: bool = False # Find longest match (POSIX mode)

26

case_sensitive: bool = True # Case-sensitive matching

27

literal: bool = False # Treat pattern as literal string

28

29

# Character Class and Boundary Options

30

perl_classes: bool = True # Enable Perl character classes (\d, \w, \s)

31

word_boundary: bool = True # Enable word boundary assertions (\b, \B)

32

33

# Newline Handling Options

34

never_nl: bool = False # Never match newlines with . or [^...]

35

dot_nl: bool = False # Allow . to match newlines

36

one_line: bool = False # Treat input as single line (^ and $ match only at start/end)

37

38

# Capture and Logging Options

39

never_capture: bool = False # Disable capturing groups (performance optimization)

40

log_errors: bool = True # Log compilation errors to stderr

41

```

42

43

### Encoding Options

44

45

```python { .api }

46

class Options:

47

class Encoding:

48

"""Text encoding options for pattern and input text."""

49

UTF8: int = 1 # UTF-8 encoding (default)

50

LATIN1: int = 2 # Latin-1 (ISO 8859-1) encoding

51

```

52

53

## Configuration Examples

54

55

### Basic Options Usage

56

57

```python

58

import re2

59

60

# Create options with custom settings

61

options = re2.Options()

62

options.case_sensitive = False

63

options.max_mem = 16777216 # 16MiB

64

65

# Use with compilation

66

pattern = re2.compile(r'HELLO', options)

67

match = pattern.search("hello world") # Matches due to case insensitivity

68

```

69

70

### Memory Management

71

72

```python

73

import re2

74

75

# Limit memory usage for large patterns

76

options = re2.Options()

77

options.max_mem = 1048576 # 1MiB limit

78

79

try:

80

# This might fail if pattern is too complex

81

pattern = re2.compile(r'very|complex|pattern|with|many|alternatives', options)

82

except re2.error:

83

print("Pattern too complex for memory limit")

84

85

# Disable capturing for better performance

86

options.never_capture = True

87

fast_pattern = re2.compile(r'\d+', options) # No capture groups, faster matching

88

```

89

90

### Encoding Configuration

91

92

```python

93

import re2

94

95

# UTF-8 text (default)

96

utf8_options = re2.Options()

97

utf8_options.encoding = re2.Options.Encoding.UTF8

98

pattern = re2.compile(r'café', utf8_options)

99

100

# Latin-1 text

101

latin1_options = re2.Options()

102

latin1_options.encoding = re2.Options.Encoding.LATIN1

103

# Note: Pattern must be bytes when using Latin-1

104

latin1_pattern = re2.compile(b'caf\xe9', latin1_options)

105

```

106

107

### Syntax Mode Configuration

108

109

```python

110

import re2

111

112

# POSIX syntax mode

113

posix_options = re2.Options()

114

posix_options.posix_syntax = True

115

posix_options.longest_match = True # POSIX requires longest match

116

117

# In POSIX mode, some Perl features are disabled

118

pattern = re2.compile(r'colou?r', posix_options) # Works

119

# pattern = re2.compile(r'(?i)case', posix_options) # Would fail - no inline modifiers

120

121

# Perl syntax mode (default)

122

perl_options = re2.Options()

123

perl_options.posix_syntax = False

124

pattern = re2.compile(r'(?i)case|CASE', perl_options) # Works

125

```

126

127

### Literal Pattern Matching

128

129

```python

130

import re2

131

132

# Treat pattern as literal string (no special characters)

133

options = re2.Options()

134

options.literal = True

135

136

# All regex special characters are treated literally

137

pattern = re2.compile(r'$19.99 (20% off)', options)

138

text = "Price: $19.99 (20% off) today"

139

match = pattern.search(text) # Matches literally, not as regex

140

```

141

142

### Newline Handling

143

144

```python

145

import re2

146

147

text = "line1\nline2\nline3"

148

149

# Default behavior: . doesn't match newlines

150

default_pattern = re2.compile(r'line1.*line3')

151

match = default_pattern.search(text) # No match

152

153

# Allow . to match newlines

154

options = re2.Options()

155

options.dot_nl = True

156

dot_nl_pattern = re2.compile(r'line1.*line3', options)

157

match = dot_nl_pattern.search(text) # Matches across newlines

158

159

# Never match newlines (strict)

160

options.never_nl = True

161

options.dot_nl = False

162

strict_pattern = re2.compile(r'[^x]*', options) # [^x] won't match newlines

163

```

164

165

### Performance Optimization

166

167

```python

168

import re2

169

170

# Optimize for performance when captures aren't needed

171

options = re2.Options()

172

options.never_capture = True # Disable all capturing

173

options.never_nl = True # Optimize newline handling

174

options.one_line = True # Single-line mode optimization

175

176

# Fast pattern for validation only

177

validator = re2.compile(r'\d{3}-\d{2}-\d{4}', options)

178

is_valid = validator.search("123-45-6789") is not None # Fast validation

179

```

180

181

### Error Handling Configuration

182

183

```python

184

import re2

185

186

# Suppress error logging

187

quiet_options = re2.Options()

188

quiet_options.log_errors = False

189

190

try:

191

# Invalid pattern won't log to stderr

192

pattern = re2.compile(r'[invalid', quiet_options)

193

except re2.error as e:

194

# Handle error without stderr noise

195

print(f"Pattern compilation failed: {e}")

196

197

# Default behavior logs errors to stderr

198

default_options = re2.Options()

199

try:

200

pattern = re2.compile(r'[invalid', default_options) # Logs error to stderr

201

except re2.error:

202

pass

203

```

204

205

### Character Class Configuration

206

207

```python

208

import re2

209

210

# Disable Perl character classes

211

options = re2.Options()

212

options.perl_classes = False

213

214

# \d, \w, \s won't work with perl_classes=False

215

try:

216

pattern = re2.compile(r'\d+', options) # May fail

217

except re2.error:

218

print("Perl character classes disabled")

219

220

# Use POSIX character classes instead

221

posix_pattern = re2.compile(r'[[:digit:]]+', options) # Works

222

223

# Disable word boundary assertions

224

options.word_boundary = False

225

try:

226

pattern = re2.compile(r'\bword\b', options) # May fail

227

except re2.error:

228

print("Word boundary assertions disabled")

229

```

230

231

## Options Combinations

232

233

```python

234

import re2

235

236

# Strict POSIX configuration

237

posix_config = re2.Options()

238

posix_config.posix_syntax = True

239

posix_config.longest_match = True

240

posix_config.perl_classes = False

241

posix_config.case_sensitive = True

242

243

# Performance-optimized configuration

244

fast_config = re2.Options()

245

fast_config.never_capture = True

246

fast_config.never_nl = True

247

fast_config.one_line = True

248

fast_config.log_errors = False

249

250

# Memory-constrained configuration

251

limited_config = re2.Options()

252

limited_config.max_mem = 1048576 # 1MiB

253

limited_config.never_capture = True

254

limited_config.log_errors = False

255

256

# Case-insensitive Unicode configuration

257

unicode_config = re2.Options()

258

unicode_config.case_sensitive = False

259

unicode_config.encoding = re2.Options.Encoding.UTF8

260

unicode_config.dot_nl = True

261

```