or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mderror-handling.mdgrammar-system.mdindex.mdpython-elements.mdtokenization.mdtree-navigation.mdutilities.md

utilities.mddocs/

0

# Utilities and Helpers

1

2

Utility functions for text processing, version handling, encoding detection, file I/O operations, and caching that support the parsing infrastructure and provide convenient helpers for working with Python code.

3

4

## Capabilities

5

6

### Text Processing

7

8

Utilities for handling Python source code text with proper encoding and line splitting.

9

10

```python { .api }

11

def split_lines(string, keepends=False):

12

"""

13

Split Python code into lines, handling form feeds correctly.

14

15

Unlike str.splitlines(), this treats form feeds as normal characters

16

and only splits on \\n and \\r\\n, which matches Python's behavior.

17

18

Args:

19

string (str): Text to split

20

keepends (bool): Keep line endings in result (default: False)

21

22

Returns:

23

list[str]: List of lines, always returns at least [''] for empty input

24

"""

25

26

def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):

27

"""

28

Convert bytes to unicode with Python encoding detection.

29

30

Handles Unicode BOMs and PEP 263 encoding declarations automatically.

31

32

Args:

33

source (str | bytes): Source code as string or bytes

34

encoding (str): Default encoding if none detected (default: 'utf-8')

35

errors (str): Error handling strategy ('strict', 'replace', 'ignore')

36

37

Returns:

38

str: Unicode string representation of source code

39

40

Raises:

41

LookupError: If detected encoding is invalid and errors='strict'

42

"""

43

```

44

45

#### Usage Examples

46

47

```python

48

from parso.utils import split_lines, python_bytes_to_unicode

49

50

# Line splitting - Python-aware

51

code = "line1\nline2\r\nline3\f\nline4" # Form feed between line3 and line4

52

lines = split_lines(code)

53

print("Lines:", lines) # ['line1', 'line2', 'line3\fline4']

54

55

# With line endings preserved

56

lines_with_ends = split_lines(code, keepends=True)

57

print("With endings:", lines_with_ends)

58

59

# Encoding detection from bytes

60

latin1_code = b'# -*- coding: latin-1 -*-\ntext = "caf\xe9"'

61

unicode_code = python_bytes_to_unicode(latin1_code)

62

print("Detected and converted:", repr(unicode_code))

63

64

# UTF-8 BOM handling

65

utf8_bom = b'\xef\xbb\xbfprint("hello world")'

66

clean_code = python_bytes_to_unicode(utf8_bom)

67

print("BOM removed:", repr(clean_code))

68

69

# Error handling options

70

invalid_bytes = b'\xff\xfe invalid encoding'

71

safe_code = python_bytes_to_unicode(invalid_bytes, errors='replace')

72

print("With replacements:", repr(safe_code))

73

```

74

75

### Version Handling

76

77

Classes and functions for working with Python version information.

78

79

```python { .api }

80

class Version:

81

"""

82

Parso version information.

83

84

Attributes:

85

major (int): Major version number

86

minor (int): Minor version number

87

micro (int): Micro version number

88

"""

89

90

class PythonVersionInfo:

91

"""

92

Python version information for grammar selection.

93

94

Attributes:

95

major (int): Python major version (e.g., 3)

96

minor (int): Python minor version (e.g., 9)

97

"""

98

99

def __gt__(self, other):

100

"""Compare versions (supports tuples)."""

101

102

def __eq__(self, other):

103

"""Check version equality (supports tuples)."""

104

105

def parse_version_string(version=None):

106

"""

107

Parse Python version string into version info.

108

109

Args:

110

version (str, optional): Version string like '3.8' or '3.10.1'

111

Defaults to current Python version

112

113

Returns:

114

PythonVersionInfo: Parsed version information

115

116

Raises:

117

ValueError: If version format is invalid

118

TypeError: If version is not a string

119

"""

120

121

def version_info():

122

"""

123

Get parso library version information.

124

125

Returns:

126

Version: Parso version as named tuple

127

"""

128

```

129

130

#### Usage Examples

131

132

```python

133

from parso.utils import parse_version_string, version_info, PythonVersionInfo

134

135

# Parse version strings

136

py38 = parse_version_string("3.8")

137

py310 = parse_version_string("3.10.5") # Micro version ignored

138

current = parse_version_string() # Uses sys.version_info

139

140

print(f"Python 3.8: {py38.major}.{py38.minor}")

141

print(f"Python 3.10: {py310.major}.{py310.minor}")

142

print(f"Current: {current.major}.{current.minor}")

143

144

# Version comparisons

145

if py310 > py38:

146

print("3.10 is newer than 3.8")

147

148

if py38 == (3, 8): # Compare with tuple

149

print("Version matches tuple")

150

151

# Get parso version

152

parso_version = version_info()

153

print(f"Parso version: {parso_version.major}.{parso_version.minor}.{parso_version.micro}")

154

155

# Version-specific feature detection

156

def supports_walrus_operator(version_info):

157

"""Check if Python version supports walrus operator."""

158

return version_info >= (3, 8)

159

160

def supports_match_statements(version_info):

161

"""Check if Python version supports match statements."""

162

return version_info >= (3, 10)

163

164

py_version = parse_version_string("3.9")

165

print(f"3.9 supports walrus: {supports_walrus_operator(py_version)}")

166

print(f"3.9 supports match: {supports_match_statements(py_version)}")

167

```

168

169

### File I/O Classes

170

171

File handling abstractions that support caching and content management.

172

173

```python { .api }

174

class FileIO:

175

"""

176

File I/O abstraction for reading Python source files.

177

178

Attributes:

179

path (Path): File path as pathlib.Path object

180

"""

181

182

def __init__(self, path):

183

"""

184

Initialize file I/O handler.

185

186

Args:

187

path (str | Path): File path to read

188

"""

189

190

def read(self):

191

"""

192

Read file contents as bytes.

193

194

Returns:

195

bytes: Raw file contents

196

"""

197

198

def get_last_modified(self):

199

"""

200

Get file modification timestamp.

201

202

Returns:

203

float | None: Timestamp or None if file doesn't exist

204

"""

205

206

class KnownContentFileIO(FileIO):

207

"""

208

File I/O wrapper for content that's already known.

209

210

Useful for parsing strings while maintaining file-like interface.

211

"""

212

213

def __init__(self, path, content):

214

"""

215

Initialize with known content.

216

217

Args:

218

path (str | Path): File path (can be None)

219

content (str | bytes): Known file content

220

"""

221

222

def read(self):

223

"""

224

Return the known content.

225

226

Returns:

227

str | bytes: The provided content

228

"""

229

```

230

231

#### Usage Examples

232

233

```python

234

from parso.file_io import FileIO, KnownContentFileIO

235

import parso

236

237

# Read from actual file

238

file_io = FileIO("/path/to/script.py")

239

content = file_io.read()

240

last_modified = file_io.get_last_modified()

241

242

# Parse using FileIO

243

grammar = parso.load_grammar()

244

module = grammar.parse(file_io=file_io, cache=True)

245

246

# Use known content (useful for in-memory parsing)

247

code = '''

248

def example():

249

return "hello world"

250

'''

251

252

known_io = KnownContentFileIO("virtual_file.py", code)

253

module = grammar.parse(file_io=known_io)

254

255

# File I/O with caching

256

def parse_file_with_caching(file_path):

257

"""Parse file with automatic caching."""

258

file_io = FileIO(file_path)

259

260

# Check if file exists and get modification time

261

mod_time = file_io.get_last_modified()

262

if mod_time is None:

263

raise FileNotFoundError(f"File not found: {file_path}")

264

265

grammar = parso.load_grammar()

266

return grammar.parse(file_io=file_io, cache=True)

267

268

# Virtual file for testing

269

def create_test_module(code_string, filename="test.py"):

270

"""Create module from string with virtual filename."""

271

file_io = KnownContentFileIO(filename, code_string)

272

grammar = parso.load_grammar()

273

return grammar.parse(file_io=file_io)

274

275

test_module = create_test_module('x = 42')

276

```

277

278

### Cache Management

279

280

Functions for managing parso's parser cache system.

281

282

```python { .api }

283

def load_module(hashed_grammar, file_io, cache_path=None):

284

"""

285

Load cached parsed module.

286

287

Args:

288

hashed_grammar (str): Grammar hash identifier

289

file_io (FileIO): File I/O handler

290

cache_path (Path, optional): Custom cache directory

291

292

Returns:

293

NodeOrLeaf | None: Cached module or None if not cached/outdated

294

"""

295

296

def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):

297

"""

298

Save parsed module to cache.

299

300

Args:

301

hashed_grammar (str): Grammar hash

302

file_io (FileIO): File I/O handler

303

module (NodeOrLeaf): Parsed module to cache

304

lines (list[str]): Source code lines

305

pickling (bool): Enable disk caching (default: True)

306

cache_path (Path, optional): Custom cache directory

307

"""

308

309

def clear_cache(cache_path=None):

310

"""

311

Clear all cached files and in-memory cache.

312

313

Args:

314

cache_path (Path, optional): Cache directory to clear

315

"""

316

317

def clear_inactive_cache(cache_path=None, inactivity_threshold=2592000):

318

"""

319

Clear cached files that haven't been accessed recently.

320

321

Args:

322

cache_path (Path, optional): Cache directory

323

inactivity_threshold (int): Seconds of inactivity before removal

324

325

Returns:

326

bool: True if cleanup completed successfully

327

"""

328

```

329

330

#### Usage Examples

331

332

```python

333

import parso

334

import parso.cache

335

from pathlib import Path

336

337

# Manual cache management

338

def process_files_with_caching(file_paths):

339

"""Process multiple files with shared cache."""

340

grammar = parso.load_grammar()

341

342

for file_path in file_paths:

343

try:

344

# Parse with caching enabled

345

module = grammar.parse(path=file_path, cache=True)

346

print(f"Processed {file_path}: {len(module.children)} statements")

347

except Exception as e:

348

print(f"Error processing {file_path}: {e}")

349

350

# Cache statistics

351

def get_cache_stats():

352

"""Get information about current cache state."""

353

cache = parso.cache.parser_cache

354

355

total_grammars = len(cache)

356

total_files = sum(len(files) for files in cache.values())

357

358

return {

359

'grammars_cached': total_grammars,

360

'files_cached': total_files,

361

'cache_keys': list(cache.keys())

362

}

363

364

stats = get_cache_stats()

365

print("Cache statistics:", stats)

366

367

# Periodic cache cleanup

368

def cleanup_old_cache():

369

"""Clean up old cache files."""

370

print("Clearing inactive cache files...")

371

success = parso.cache.clear_inactive_cache()

372

373

if success:

374

print("Cache cleanup completed")

375

else:

376

print("Cache cleanup had issues")

377

378

# Custom cache directory

379

custom_cache = Path.home() / '.my_parso_cache'

380

grammar = parso.load_grammar()

381

module = grammar.parse(

382

path="example.py",

383

cache=True,

384

cache_path=custom_cache

385

)

386

387

# Clear specific cache directory

388

parso.cache.clear_cache(cache_path=custom_cache)

389

```

390

391

## Integration Patterns

392

393

### Encoding-Safe File Processing

394

395

```python

396

from parso.utils import python_bytes_to_unicode

397

from parso.file_io import FileIO

398

import parso

399

400

def safe_parse_file(file_path):

401

"""Safely parse file handling encoding issues."""

402

try:

403

# Read as bytes first

404

with open(file_path, 'rb') as f:

405

raw_content = f.read()

406

407

# Convert to unicode with encoding detection

408

unicode_content = python_bytes_to_unicode(raw_content, errors='replace')

409

410

# Parse the content

411

grammar = parso.load_grammar()

412

return grammar.parse(unicode_content)

413

414

except Exception as e:

415

print(f"Error parsing {file_path}: {e}")

416

return None

417

418

# Process directory of Python files

419

def process_python_directory(directory):

420

"""Process all Python files in directory safely."""

421

from pathlib import Path

422

423

python_files = Path(directory).glob("**/*.py")

424

425

for py_file in python_files:

426

module = safe_parse_file(py_file)

427

if module:

428

print(f"Successfully parsed: {py_file}")

429

else:

430

print(f"Failed to parse: {py_file}")

431

```

432

433

### Version-Aware Parsing

434

435

```python

436

from parso.utils import parse_version_string

437

import parso

438

439

def parse_with_version_detection(code):

440

"""Parse code with automatic version detection."""

441

442

# Try to detect version from code features

443

def detect_version_features(code):

444

"""Detect Python version from code features."""

445

if ':=' in code: # Walrus operator

446

return "3.8"

447

if 'match ' in code and 'case ' in code: # Match statements

448

return "3.10"

449

if '|' in code and 'Union' not in code: # Union types

450

return "3.10"

451

return "3.6" # Safe default

452

453

detected_version = detect_version_features(code)

454

version_info = parse_version_string(detected_version)

455

456

grammar = parso.load_grammar(version=f"{version_info.major}.{version_info.minor}")

457

return grammar.parse(code), detected_version

458

459

# Usage

460

code_samples = [

461

'x = 42', # Basic

462

'if (n := len(items)) > 5: pass', # Python 3.8 walrus

463

'''match value:

464

case 1: print("one")''', # Python 3.10 match

465

]

466

467

for code in code_samples:

468

module, version = parse_with_version_detection(code)

469

print(f"Parsed with Python {version}: {code[:30]}...")

470

```

471

472

### High-Performance Parsing

473

474

```python

475

import parso

476

from parso.cache import clear_inactive_cache

477

import time

478

479

class HighPerformanceParser:

480

"""Optimized parser for processing many files."""

481

482

def __init__(self, cache_cleanup_interval=3600): # 1 hour

483

self.grammar = parso.load_grammar()

484

self.last_cleanup = time.time()

485

self.cleanup_interval = cache_cleanup_interval

486

self.files_processed = 0

487

488

def parse_file(self, file_path):

489

"""Parse single file with optimizations."""

490

try:

491

# Use caching and differential parsing for performance

492

module = self.grammar.parse(

493

path=file_path,

494

cache=True,

495

diff_cache=True

496

)

497

498

self.files_processed += 1

499

500

# Periodic cache cleanup

501

if time.time() - self.last_cleanup > self.cleanup_interval:

502

clear_inactive_cache()

503

self.last_cleanup = time.time()

504

print(f"Cleaned cache after processing {self.files_processed} files")

505

506

return module

507

508

except Exception as e:

509

print(f"Error parsing {file_path}: {e}")

510

return None

511

512

def batch_parse(self, file_paths):

513

"""Parse multiple files efficiently."""

514

results = []

515

516

for file_path in file_paths:

517

result = self.parse_file(file_path)

518

if result:

519

results.append((file_path, result))

520

521

return results

522

523

# Usage

524

parser = HighPerformanceParser()

525

file_paths = ["file1.py", "file2.py", "file3.py"]

526

results = parser.batch_parse(file_paths)

527

print(f"Successfully parsed {len(results)} files")

528

```