or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classes-types.mdcompilation-utilities.mdflags-constants.mdindex.mdpattern-matching.mdsplitting.mdsubstitution.md

flags-constants.mddocs/

0

# Flags and Constants

1

2

Comprehensive flag system including standard regex flags, enhanced flags for fuzzy matching and Unicode handling, version control flags, and global constants for controlling library behavior. These flags provide fine-grained control over pattern matching behavior and enable advanced regex features.

3

4

## Capabilities

5

6

### Standard Regular Expression Flags

7

8

Traditional regex flags that control basic matching behavior, compatible with Python's standard `re` module while providing enhanced functionality.

9

10

```python { .api }

11

# Case and Character Class Flags

12

ASCII = A = 0x80 # ASCII-only character class matching

13

IGNORECASE = I = 0x2 # Case-insensitive matching

14

LOCALE = L = 0x4 # Locale-dependent character classes

15

UNICODE = U = 0x20 # Unicode-dependent character classes

16

17

# Pattern Behavior Flags

18

MULTILINE = M = 0x8 # Multi-line mode for ^ and $

19

DOTALL = S = 0x10 # Make . match any character including newline

20

VERBOSE = X = 0x40 # Verbose mode allowing comments and whitespace

21

TEMPLATE = T = 0x1 # Template mode (compatibility with re module)

22

```

23

24

**Usage Examples:**

25

26

```python

27

import regex

28

29

# Case-insensitive matching

30

result = regex.search(r'hello', 'HELLO WORLD', regex.IGNORECASE)

31

print(result.group()) # 'HELLO'

32

33

# Multi-line mode - ^ and $ match line boundaries

34

text = 'line1\nline2\nline3'

35

matches = regex.findall(r'^line\d$', text, regex.MULTILINE)

36

print(matches) # ['line1', 'line2', 'line3']

37

38

# Dot matches newlines

39

result = regex.search(r'start.*end', 'start\nmiddle\nend', regex.DOTALL)

40

print(result.group()) # 'start\nmiddle\nend'

41

42

# Verbose mode with comments

43

pattern = regex.compile(r'''

44

\b # Word boundary

45

(\w+) # Username (group 1)

46

@ # Literal @

47

([\w.-]+) # Domain name (group 2)

48

\. # Literal dot

49

(\w+) # TLD (group 3)

50

\b # Word boundary

51

''', regex.VERBOSE)

52

53

# Combining flags

54

combined = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL

55

result = regex.search(r'^hello.*world$', 'HELLO\nBEAUTIFUL\nWORLD', combined)

56

57

# ASCII vs Unicode character classes

58

text = 'café naïve résumé'

59

# Unicode mode (default for str patterns)

60

unicode_words = regex.findall(r'\w+', text, regex.UNICODE)

61

print(unicode_words) # ['café', 'naïve', 'résumé']

62

63

# ASCII mode

64

ascii_words = regex.findall(r'\w+', text, regex.ASCII)

65

print(ascii_words) # ['caf', 'na', 've', 'r', 'sum']

66

```

67

68

### Enhanced Regular Expression Flags

69

70

Advanced flags unique to the regex module that enable fuzzy matching, improved Unicode support, and specialized matching behaviors.

71

72

```python { .api }

73

# Fuzzy Matching Flags

74

BESTMATCH = B = 0x1000 # Find best fuzzy match instead of first

75

ENHANCEMATCH = E = 0x8000 # Improve fuzzy match fit after finding first

76

77

# Unicode Enhancement Flags

78

FULLCASE = F = 0x4000 # Full case-folding for Unicode case-insensitive matching

79

WORD = W = 0x800 # Unicode word boundaries and line breaks

80

81

# Matching Behavior Flags

82

POSIX = P = 0x10000 # POSIX-standard leftmost longest matching

83

REVERSE = R = 0x400 # Search backwards through string

84

DEBUG = D = 0x200 # Print parsed pattern for debugging

85

```

86

87

**Usage Examples:**

88

89

```python

90

import regex

91

92

# Fuzzy matching with best match

93

pattern = r'(?b)(python){e<=2}' # Allow up to 2 errors, find best match

94

text = 'pyton pythom python pyth'

95

result = regex.search(pattern, text, regex.BESTMATCH)

96

print(result.group()) # 'python' (exact match is best)

97

98

# Enhanced fuzzy matching

99

pattern = r'(?e)(search){e<=1}'

100

result = regex.search(pattern, 'serch found', regex.ENHANCEMATCH)

101

print(result.group()) # 'serch' with improved fit

102

103

# Full case-folding for Unicode

104

pattern = r'STRASSE'

105

text = 'Hauptstraße in München' # German ß should match SS

106

result = regex.search(pattern, text, regex.IGNORECASE | regex.FULLCASE)

107

print(result.group()) # 'straße'

108

109

# Word boundaries with Unicode

110

text = 'hello мир world'

111

words = regex.findall(r'\b\w+\b', text, regex.WORD)

112

print(words) # ['hello', 'мир', 'world'] - properly handles Unicode

113

114

# POSIX leftmost-longest matching

115

pattern = r'a|ab'

116

text = 'ab'

117

# Normal (first match)

118

result1 = regex.search(pattern, text)

119

print(result1.group()) # 'a'

120

121

# POSIX (longest match)

122

result2 = regex.search(pattern, text, regex.POSIX)

123

print(result2.group()) # 'ab'

124

125

# Reverse searching

126

text = 'first second third'

127

result = regex.search(r'\w+', text, regex.REVERSE)

128

print(result.group()) # 'third' (last word when searching backwards)

129

130

# Debug mode - prints parsed pattern

131

pattern = regex.compile(r'(a+)(b+)', regex.DEBUG)

132

# Prints internal pattern structure to stdout

133

```

134

135

### Version Control Flags

136

137

Flags that control regex behavior version, allowing choice between legacy re-compatible behavior and enhanced regex features.

138

139

```python { .api }

140

# Version Control Flags

141

VERSION0 = V0 = 0x2000 # Legacy re-compatible behavior

142

VERSION1 = V1 = 0x100 # Enhanced behavior mode (includes FULLCASE)

143

144

# Global Version Setting

145

DEFAULT_VERSION # Current default version setting (VERSION0)

146

```

147

148

**Usage Examples:**

149

150

```python

151

import regex

152

153

# Version 0 (legacy re-compatible behavior)

154

pattern_v0 = regex.compile(r'(?V0)\w+', regex.IGNORECASE)

155

156

# Version 1 (enhanced behavior with full case-folding)

157

pattern_v1 = regex.compile(r'(?V1)\w+', regex.IGNORECASE)

158

159

# Compare behavior with Unicode case-folding

160

text = 'Straße' # German word with ß

161

162

# Version 0 - basic case folding

163

result_v0 = regex.search(r'(?V0)STRASSE', text, regex.IGNORECASE)

164

print(f"V0 result: {result_v0}") # May not match

165

166

# Version 1 - full case folding (automatic with IGNORECASE)

167

result_v1 = regex.search(r'(?V1)STRASSE', text, regex.IGNORECASE)

168

print(f"V1 result: {result_v1.group() if result_v1 else None}") # 'Straße'

169

170

# Global default version setting

171

print(f"Current default: {regex.DEFAULT_VERSION}")

172

173

# Set global default (affects patterns without explicit version)

174

# regex.DEFAULT_VERSION = regex.VERSION1 # Would change global default

175

176

# Inline version specification in patterns

177

pattern = r'(?V1)case insensitive with full folding'

178

result = regex.search(pattern, 'CASE INSENSITIVE', regex.IGNORECASE)

179

180

# Mixed version usage

181

def compare_versions(pattern_str, text, flags=0):

182

v0_result = regex.search(f'(?V0){pattern_str}', text, flags)

183

v1_result = regex.search(f'(?V1){pattern_str}', text, flags)

184

185

return {

186

'v0': v0_result.group() if v0_result else None,

187

'v1': v1_result.group() if v1_result else None

188

}

189

```

190

191

### Module Constants and Metadata

192

193

Global constants and version information for the regex module.

194

195

```python { .api }

196

# Module Information

197

__version__ = "2.5.161" # Module version string

198

__doc__ # Module documentation string

199

200

# Function Aliases

201

Regex # Alias for compile function (for pattern repr)

202

203

# Exception Class

204

error # Exception class for regex errors

205

```

206

207

**Usage Examples:**

208

209

```python

210

import regex

211

212

# Check module version

213

print(f"regex module version: {regex.__version__}")

214

215

# Read module documentation

216

print(f"Module doc length: {len(regex.__doc__)} characters")

217

218

# Using Regex alias (mainly for internal use)

219

pattern = regex.Regex(r'\d+') # Same as regex.compile(r'\d+')

220

221

# Exception handling

222

try:

223

bad_pattern = regex.compile(r'[') # Invalid pattern

224

except regex.error as e:

225

print(f"Regex error: {e}")

226

print(f"Error message: {e.msg}")

227

if hasattr(e, 'pos'):

228

print(f"Error position: {e.pos}")

229

```

230

231

## Flag Combinations and Usage Patterns

232

233

### Common Flag Combinations

234

235

```python

236

# Case-insensitive multiline matching

237

CASE_INSENSITIVE_MULTILINE = regex.IGNORECASE | regex.MULTILINE

238

239

# Full Unicode support with word boundaries

240

UNICODE_WORDS = regex.UNICODE | regex.WORD

241

242

# Enhanced fuzzy matching

243

FUZZY_BEST = regex.BESTMATCH | regex.ENHANCEMATCH

244

245

# Version 1 with full case folding

246

ENHANCED_CASE = regex.VERSION1 | regex.IGNORECASE

247

248

# Debug verbose mode

249

DEBUG_VERBOSE = regex.DEBUG | regex.VERBOSE

250

251

# Example usage

252

pattern = regex.compile(r'''

253

\b # Word boundary

254

(?e) # Enable fuzzy matching

255

(search){e<=2} # Allow up to 2 errors

256

\b # Word boundary

257

''', FUZZY_BEST | DEBUG_VERBOSE)

258

```

259

260

### Dynamic Flag Handling

261

262

```python

263

def build_pattern_flags(case_sensitive=True, multiline=False,

264

fuzzy=False, unicode_aware=True):

265

"""Build flags based on requirements."""

266

flags = 0

267

268

if not case_sensitive:

269

flags |= regex.IGNORECASE

270

flags |= regex.FULLCASE # Enhanced case folding

271

272

if multiline:

273

flags |= regex.MULTILINE

274

275

if fuzzy:

276

flags |= regex.BESTMATCH | regex.ENHANCEMATCH

277

278

if unicode_aware:

279

flags |= regex.UNICODE | regex.WORD

280

281

return flags

282

283

# Usage

284

flags = build_pattern_flags(case_sensitive=False, fuzzy=True)

285

pattern = regex.compile(r'(?e)(search){e<=1}', flags)

286

```

287

288

### Flag Testing and Introspection

289

290

```python

291

def analyze_pattern_flags(pattern):

292

"""Analyze flags used in a compiled pattern."""

293

flags = pattern.flags

294

295

flag_names = []

296

for flag_name in dir(regex):

297

if flag_name.isupper() and len(flag_name) <= 12: # Flag names

298

flag_value = getattr(regex, flag_name)

299

if isinstance(flag_value, int) and flags & flag_value:

300

flag_names.append(flag_name)

301

302

return {

303

'flags_value': flags,

304

'flags_hex': f'0x{flags:x}',

305

'active_flags': flag_names

306

}

307

308

# Example

309

pattern = regex.compile(r'test', regex.IGNORECASE | regex.MULTILINE)

310

info = analyze_pattern_flags(pattern)

311

print(info)

312

```

313

314

### Performance Considerations

315

316

```python

317

# Pre-define flag combinations for reuse

318

STANDARD_TEXT = regex.IGNORECASE | regex.MULTILINE | regex.DOTALL

319

FUZZY_SEARCH = regex.BESTMATCH | regex.ENHANCEMATCH | regex.IGNORECASE

320

UNICODE_FULL = regex.UNICODE | regex.WORD | regex.FULLCASE

321

322

# Cache compiled patterns with flags

323

_pattern_cache = {}

324

325

def get_cached_pattern(pattern_str, flags):

326

cache_key = (pattern_str, flags)

327

if cache_key not in _pattern_cache:

328

_pattern_cache[cache_key] = regex.compile(pattern_str, flags)

329

return _pattern_cache[cache_key]

330

331

# Usage

332

email_pattern = get_cached_pattern(r'\b[\w.-]+@[\w.-]+\.\w+\b', STANDARD_TEXT)

333

```

334

335

### Advanced Flag Usage

336

337

```python

338

# Conditional flag application

339

def smart_search(pattern, text, **options):

340

flags = 0

341

342

# Apply flags based on text characteristics

343

if any(ord(c) > 127 for c in text): # Contains non-ASCII

344

flags |= regex.UNICODE | regex.WORD | regex.FULLCASE

345

346

if '\n' in text: # Multi-line text

347

flags |= regex.MULTILINE

348

349

if options.get('case_insensitive', True):

350

flags |= regex.IGNORECASE

351

352

if options.get('fuzzy', False):

353

flags |= regex.BESTMATCH

354

pattern = f'(?e)({pattern}){{e<={options.get("errors", 1)}}}'

355

356

return regex.search(pattern, text, flags)

357

358

# Example usage

359

result = smart_search('hello', 'Hello, мир!', case_insensitive=True, fuzzy=True)

360

```