or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mdexceptions.mdindex.mdtokens-lexing.mdtree-processing.mdutilities.md

utilities.mddocs/

0

# Utilities and Tools

1

2

Additional utilities including AST generation helpers, tree reconstruction, standalone parser generation, serialization, visualization tools, and various helper functions.

3

4

## Capabilities

5

6

### Grammar Building Components

7

8

Classes for programmatically building and manipulating grammar definitions.

9

10

```python { .api }

11

class Symbol:

12

"""

13

Base class for grammar symbols.

14

"""

15

16

def __init__(self, name: str):

17

"""

18

Initialize symbol.

19

20

Parameters:

21

- name: Symbol name

22

"""

23

24

name: str

25

is_term: bool

26

27

class Terminal(Symbol):

28

"""

29

Terminal symbol in grammar definitions.

30

"""

31

32

def __init__(self, name: str, filter_out: bool = False):

33

"""

34

Initialize terminal symbol.

35

36

Parameters:

37

- name: Terminal name

38

- filter_out: Whether to filter out this terminal from parse trees

39

"""

40

41

filter_out: bool

42

is_term = True

43

44

class NonTerminal(Symbol):

45

"""

46

Non-terminal symbol in grammar definitions.

47

"""

48

49

is_term = False

50

51

class Rule:

52

"""

53

Grammar rule definition containing origin, expansion, and options.

54

"""

55

56

def __init__(self, origin: NonTerminal, expansion: List[Symbol],

57

order: int = 0, alias: str = None, options: 'RuleOptions' = None):

58

"""

59

Initialize grammar rule.

60

61

Parameters:

62

- origin: Non-terminal that this rule defines

63

- expansion: List of symbols that make up the rule

64

- order: Rule priority order

65

- alias: Alternative name for the rule

66

- options: Rule configuration options

67

"""

68

69

origin: NonTerminal

70

expansion: List[Symbol]

71

alias: str

72

order: int

73

options: 'RuleOptions'

74

75

class RuleOptions:

76

"""

77

Configuration options for grammar rules.

78

"""

79

80

def __init__(self, keep_all_tokens: bool = False, expand1: bool = False,

81

priority: int = None, template_source: str = None,

82

empty_indices: Tuple = ()):

83

"""

84

Initialize rule options.

85

86

Parameters:

87

- keep_all_tokens: Preserve all tokens in parse tree

88

- expand1: Expand single-child rules

89

- priority: Rule priority for disambiguation

90

- template_source: Template source information

91

- empty_indices: Indices of empty rule positions

92

"""

93

94

keep_all_tokens: bool

95

expand1: bool

96

priority: int

97

template_source: str

98

empty_indices: Tuple

99

```

100

101

### Configuration Classes

102

103

Configuration objects for lexer and parser behavior.

104

105

```python { .api }

106

class LexerConf:

107

"""

108

Lexer configuration containing terminals and options.

109

"""

110

111

def __init__(self, terminals: List['TerminalDef'], re_module,

112

ignore: Tuple = (), postlex=None, callbacks: Dict = None,

113

g_regex_flags: int = 0, skip_validation: bool = False,

114

use_bytes: bool = False):

115

"""

116

Initialize lexer configuration.

117

118

Parameters:

119

- terminals: List of terminal definitions

120

- re_module: Regular expression module (re or regex)

121

- ignore: Terminals to ignore in parsing

122

- postlex: Post-lexing processor

123

- callbacks: Lexer callback functions

124

- g_regex_flags: Global regex flags

125

- skip_validation: Skip terminal validation

126

- use_bytes: Process bytes instead of strings

127

"""

128

129

terminals: List['TerminalDef']

130

terminals_by_name: Dict[str, 'TerminalDef']

131

ignore: Tuple

132

postlex: 'PostLex'

133

callbacks: Dict

134

g_regex_flags: int

135

re_module: Any

136

skip_validation: bool

137

use_bytes: bool

138

139

class ParserConf:

140

"""

141

Parser configuration containing rules and start symbols.

142

"""

143

144

def __init__(self, rules: List[Rule], callbacks: Dict, start: List[str]):

145

"""

146

Initialize parser configuration.

147

148

Parameters:

149

- rules: Grammar rules

150

- callbacks: Parser callback functions

151

- start: Start symbol(s)

152

"""

153

154

rules: List[Rule]

155

callbacks: Dict

156

start: List[str]

157

```

158

159

### AST Generation Utilities

160

161

Helper classes and functions for creating custom Abstract Syntax Tree (AST) classes from parse trees.

162

163

```python { .api }

164

class Ast:

165

"""

166

Abstract base class for custom AST node classes.

167

Provides foundation for creating domain-specific AST representations.

168

"""

169

170

@classmethod

171

def from_lark_tree(cls, tree: Tree) -> 'Ast':

172

"""

173

Create AST instance from Lark parse tree.

174

175

Parameters:

176

- tree: Lark Tree instance

177

178

Returns:

179

Ast: AST node instance

180

"""

181

182

class AsList(Ast):

183

"""

184

AST node that stores parse results as a single list.

185

Useful for collecting multiple items into a flat structure.

186

"""

187

188

def create_transformer(ast_module, transformer: Transformer = None) -> Transformer:

189

"""

190

Create transformer from module containing AST classes.

191

Automatically maps grammar rules to AST classes based on naming.

192

193

Parameters:

194

- ast_module: Module containing AST class definitions

195

- transformer: Base transformer class (optional)

196

197

Returns:

198

Transformer: Configured transformer for AST generation

199

"""

200

201

def camel_to_snake(name: str) -> str:

202

"""

203

Convert CamelCase names to snake_case.

204

205

Parameters:

206

- name: CamelCase string

207

208

Returns:

209

str: snake_case version

210

"""

211

212

def inline(f):

213

"""

214

Decorator to mark AST classes as inline.

215

Indicates that the AST class should receive children as separate arguments.

216

217

Parameters:

218

- f: AST class to mark as inline

219

220

Returns:

221

Callable: Decorated class

222

"""

223

```

224

225

### Text Reconstruction

226

227

Classes for reconstructing original text from parse trees, useful for pretty-printing and code generation.

228

229

```python { .api }

230

class Reconstructor:

231

"""

232

Reconstructs text from parse trees by writing tokens in order.

233

"""

234

235

def __init__(self, parser: Lark, term_subs: Dict[str, Callable] = None):

236

"""

237

Initialize reconstructor.

238

239

Parameters:

240

- parser: Lark parser instance used to create trees

241

- term_subs: Terminal substitution functions

242

"""

243

244

def reconstruct(self, tree: Tree, postproc: Callable = None,

245

insert_spaces: bool = True) -> str:

246

"""

247

Reconstruct text from parse tree.

248

249

Parameters:

250

- tree: Parse tree to reconstruct

251

- postproc: Post-processing function for final text

252

- insert_spaces: Whether to insert spaces between tokens

253

254

Returns:

255

str: Reconstructed text

256

"""

257

258

class WriteTokensTransformer(Transformer):

259

"""

260

Transformer that reconstructs text by writing tokens.

261

Used internally by Reconstructor for token-level reconstruction.

262

"""

263

264

def __init__(self, tokens: Dict[str, str], term_subs: Dict[str, Callable]):

265

"""

266

Initialize token writer.

267

268

Parameters:

269

- tokens: Mapping of token types to values

270

- term_subs: Terminal substitution functions

271

"""

272

```

273

274

### Standalone Parser Generation

275

276

Tools for generating standalone parsers that don't require the Lark library at runtime.

277

278

```python { .api }

279

def gen_standalone(lark_instance: Lark, out=None, compress: bool = False) -> str:

280

"""

281

Generate standalone parser code from Lark instance.

282

Creates self-contained Python code that can parse without Lark dependency.

283

Only works with LALR parser mode.

284

285

Parameters:

286

- lark_instance: Lark parser instance to convert (must use parser='lalr')

287

- out: Output file object (optional)

288

- compress: Whether to compress the generated code

289

290

Returns:

291

str: Generated standalone parser code

292

293

Example:

294

>>> parser = Lark(grammar, parser='lalr')

295

>>> standalone_code = gen_standalone(parser)

296

>>> with open('standalone_parser.py', 'w') as f:

297

... f.write(standalone_code)

298

"""

299

300

def build_lalr(grammar_text: str, **options) -> Lark:

301

"""

302

Build LALR parser from command-line style arguments.

303

304

Parameters:

305

- grammar_text: Grammar definition string

306

- **options: Parser configuration options

307

308

Returns:

309

Lark: Configured LALR parser instance

310

"""

311

312

def make_warnings_comments():

313

"""

314

Configure warnings to appear as comments in generated output.

315

Useful for command-line tools that generate code.

316

"""

317

```

318

319

### Parser Serialization

320

321

Functions for saving and loading parser instances to avoid repeated grammar compilation.

322

323

```python { .api }

324

def serialize(lark_instance: Lark, f) -> None:

325

"""

326

Serialize Lark parser instance to file for caching.

327

328

Parameters:

329

- lark_instance: Lark parser to serialize

330

- f: File object to write serialized data

331

"""

332

```

333

334

### Tree Visualization

335

336

Functions for creating visual representations of parse trees using graphing libraries.

337

338

```python { .api }

339

def pydot__tree_to_png(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:

340

"""

341

Create PNG image of parse tree using pydot.

342

343

Parameters:

344

- tree: Parse tree to visualize

345

- filename: Output PNG filename

346

- rankdir: Graph direction ("LR", "TB", etc.)

347

- **kwargs: Additional pydot options

348

"""

349

350

def pydot__tree_to_dot(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:

351

"""

352

Create DOT file representation of parse tree.

353

354

Parameters:

355

- tree: Parse tree to convert

356

- filename: Output DOT filename

357

- rankdir: Graph direction

358

- **kwargs: Additional pydot options

359

"""

360

361

def pydot__tree_to_graph(tree: Tree, rankdir: str = "LR", **kwargs):

362

"""

363

Create pydot graph object from parse tree.

364

365

Parameters:

366

- tree: Parse tree to convert

367

- rankdir: Graph direction

368

- **kwargs: Additional pydot options

369

370

Returns:

371

pydot.Dot: Graph object

372

"""

373

```

374

375

### Command-Line Tools

376

377

Utilities for building command-line interfaces and processing grammar files.

378

379

```python { .api }

380

def build_lalr(grammar_text: str, **options) -> Lark:

381

"""

382

Build LALR parser from command-line arguments.

383

384

Parameters:

385

- grammar_text: Grammar definition

386

- **options: Parser configuration options

387

388

Returns:

389

Lark: Configured LALR parser

390

"""

391

392

def make_warnings_comments() -> None:

393

"""

394

Configure warnings to appear as comments in generated output.

395

Useful for command-line tools that generate code.

396

"""

397

```

398

399

### Logger Configuration

400

401

Logging utilities for debugging and development.

402

403

```python { .api }

404

logger: logging.Logger

405

"""

406

Lark's logging instance for debug output and development information.

407

Use logger.setLevel() to control verbosity.

408

"""

409

```

410

411

### Internal Utilities

412

413

Various helper classes and functions used internally by Lark components.

414

415

```python { .api }

416

class Serialize:

417

"""

418

Mixin class providing serialization capabilities.

419

"""

420

421

def serialize(self, memo: Dict = None) -> Any:

422

"""

423

Serialize object to transferable format.

424

425

Parameters:

426

- memo: Memoization dictionary for circular references

427

428

Returns:

429

Any: Serialized representation

430

"""

431

432

class SerializeMemoizer:

433

"""

434

Helper for memoizing object serialization.

435

"""

436

437

def __init__(self):

438

self.memo = {}

439

440

def serialize(self, obj: Any) -> Any:

441

"""

442

Serialize object with memoization.

443

444

Parameters:

445

- obj: Object to serialize

446

447

Returns:

448

Any: Serialized object

449

"""

450

```

451

452

### File System Utilities

453

454

Cross-platform file system operation helpers.

455

456

```python { .api }

457

class FS:

458

"""

459

File system utilities for cross-platform operations.

460

"""

461

462

@staticmethod

463

def open(filename: str, mode: str = 'r', **kwargs):

464

"""

465

Open file with proper encoding handling.

466

467

Parameters:

468

- filename: File path

469

- mode: File open mode

470

- **kwargs: Additional open() arguments

471

472

Returns:

473

File object

474

"""

475

476

@staticmethod

477

def exists(path: str) -> bool:

478

"""

479

Check if path exists.

480

481

Parameters:

482

- path: File or directory path

483

484

Returns:

485

bool: True if path exists

486

"""

487

```

488

489

### String and Type Utilities

490

491

Helper functions for string processing and type checking.

492

493

```python { .api }

494

def isascii(s: str) -> bool:

495

"""

496

Check if string contains only ASCII characters.

497

498

Parameters:

499

- s: String to check

500

501

Returns:

502

bool: True if string is ASCII-only

503

"""

504

505

def is_id_continue(c: str) -> bool:

506

"""

507

Check if character can continue a Unicode identifier.

508

509

Parameters:

510

- c: Character to check

511

512

Returns:

513

bool: True if character can continue identifier

514

"""

515

516

def is_id_start(c: str) -> bool:

517

"""

518

Check if character can start a Unicode identifier.

519

520

Parameters:

521

- c: Character to check

522

523

Returns:

524

bool: True if character can start identifier

525

"""

526

527

def combine_alternatives(lists: List[List[Any]]) -> List[Any]:

528

"""

529

Combine alternative rule definitions.

530

531

Parameters:

532

- lists: List of alternative rule lists

533

534

Returns:

535

List[Any]: Combined alternatives

536

"""

537

538

def classify(seq: Sequence[Any], key: Callable = None, value: Callable = None) -> Dict:

539

"""

540

Classify sequence elements into dictionary by key function.

541

542

Parameters:

543

- seq: Sequence to classify

544

- key: Function to extract keys

545

- value: Function to extract values

546

547

Returns:

548

Dict: Classified elements

549

"""

550

551

def get_regexp_width(regexp: str) -> Tuple[int, int]:

552

"""

553

Analyze regular expression to determine min/max match width.

554

555

Parameters:

556

- regexp: Regular expression string

557

558

Returns:

559

Tuple[int, int]: (min_width, max_width)

560

"""

561

562

STRING_TYPE: type # String type for version compatibility

563

"""Type object representing string type across Python versions."""

564

565

ABC: type # Abstract base class type

566

"""Abstract base class type for creating abstract classes."""

567

568

def abstractmethod(func: Callable) -> Callable:

569

"""

570

Decorator marking method as abstract.

571

572

Parameters:

573

- func: Method to mark as abstract

574

575

Returns:

576

Callable: Decorated method

577

"""

578

```

579

580

### Smart Decorators

581

582

Advanced decorator utilities for flexible function modification.

583

584

```python { .api }

585

def smart_decorator(decorator: Callable, **decorator_kwargs) -> Callable:

586

"""

587

Create smart decorator that can handle various function signatures.

588

589

Parameters:

590

- decorator: Base decorator function

591

- **decorator_kwargs: Default decorator arguments

592

593

Returns:

594

Callable: Smart decorator function

595

"""

596

597

def combine_alternatives(*alternatives) -> Callable:

598

"""

599

Combine multiple alternative implementations into single function.

600

601

Parameters:

602

- *alternatives: Alternative function implementations

603

604

Returns:

605

Callable: Combined function

606

"""

607

```

608

609

## Usage Examples

610

611

### Creating Custom AST Classes

612

613

```python

614

from lark import Lark, Tree

615

from lark.ast_utils import Ast, create_transformer, inline

616

617

# Define AST classes

618

class Expression(Ast):

619

pass

620

621

class BinaryOp(Expression):

622

def __init__(self, left, op, right):

623

self.left = left

624

self.op = op

625

self.right = right

626

627

@inline

628

class Number(Expression):

629

def __init__(self, value):

630

self.value = int(value)

631

632

# Create module with AST classes

633

import sys

634

ast_module = sys.modules[__name__]

635

636

# Generate transformer

637

transformer = create_transformer(ast_module)

638

639

# Use with parser

640

parser = Lark(grammar, transformer=transformer)

641

ast = parser.parse("2 + 3 * 4")

642

643

print(f"AST root type: {type(ast)}")

644

print(f"Left operand: {ast.left}")

645

```

646

647

### Text Reconstruction

648

649

```python

650

from lark import Lark

651

from lark.reconstruct import Reconstructor

652

653

# Parse text

654

parser = Lark(grammar)

655

tree = parser.parse("x = 42 + y")

656

657

# Reconstruct original text

658

reconstructor = Reconstructor(parser)

659

reconstructed = reconstructor.reconstruct(tree)

660

print(f"Reconstructed: {reconstructed}")

661

662

# Reconstruct with custom formatting

663

def format_postproc(text):

664

return text.replace('+', ' + ').replace('=', ' = ')

665

666

formatted = reconstructor.reconstruct(tree, postproc=format_postproc)

667

print(f"Formatted: {formatted}")

668

```

669

670

### Generating Standalone Parser

671

672

```python

673

from lark import Lark

674

from lark.tools.standalone import gen_standalone

675

676

# Create parser

677

parser = Lark(grammar, parser='lalr') # Only LALR supports standalone

678

679

# Generate standalone code

680

standalone_code = gen_standalone(parser)

681

682

# Save to file

683

with open('my_parser.py', 'w') as f:

684

f.write(standalone_code)

685

686

# The generated file can be used without Lark:

687

# from my_parser import Lark_StandAlone

688

# parser = Lark_StandAlone()

689

# result = parser.parse(text)

690

```

691

692

### Parser Serialization and Caching

693

694

```python

695

from lark import Lark

696

from lark.tools.serialize import serialize

697

import pickle

698

699

# Create parser

700

parser = Lark(grammar)

701

702

# Serialize parser

703

with open('parser.cache', 'wb') as f:

704

serialize(parser, f)

705

706

# Load serialized parser

707

with open('parser.cache', 'rb') as f:

708

cached_parser = pickle.load(f)

709

710

# Use cached parser

711

result = cached_parser.parse(text)

712

```

713

714

### Tree Visualization

715

716

```python

717

from lark import Lark

718

from lark.tree import pydot__tree_to_png

719

720

# Parse text

721

parser = Lark(grammar)

722

tree = parser.parse("complex expression")

723

724

# Create PNG visualization

725

pydot__tree_to_png(tree, 'parse_tree.png', rankdir='TB')

726

727

# Create DOT file

728

from lark.tree import pydot__tree_to_dot

729

pydot__tree_to_dot(tree, 'parse_tree.dot')

730

```

731

732

### Command-Line Tool Integration

733

734

```python

735

from lark.tools import build_lalr, make_warnings_comments

736

import argparse

737

738

def main():

739

parser = argparse.ArgumentParser(description='Grammar processor')

740

parser.add_argument('grammar_file', help='Grammar file path')

741

parser.add_argument('input_file', help='Input file to parse')

742

parser.add_argument('--debug', action='store_true')

743

744

args = parser.parse_args()

745

746

# Configure warnings as comments

747

make_warnings_comments()

748

749

# Read grammar

750

with open(args.grammar_file) as f:

751

grammar = f.read()

752

753

# Build parser

754

lark_parser = build_lalr(grammar, debug=args.debug)

755

756

# Parse input

757

with open(args.input_file) as f:

758

text = f.read()

759

760

result = lark_parser.parse(text)

761

print(result.pretty())

762

763

if __name__ == '__main__':

764

main()

765

```

766

767

### Custom Logger Configuration

768

769

```python

770

from lark.utils import logger

771

import logging

772

773

# Configure Lark logging

774

logger.setLevel(logging.DEBUG)

775

handler = logging.StreamHandler()

776

handler.setFormatter(logging.Formatter(

777

'%(asctime)s - %(name)s - %(levelname)s - %(message)s'

778

))

779

logger.addHandler(handler)

780

781

# Now Lark will output debug information

782

parser = Lark(grammar, debug=True)

783

tree = parser.parse(text) # Will show debug output

784

```

785

786

### Advanced AST Transformation

787

788

```python

789

from lark import Lark, Transformer

790

from lark.ast_utils import camel_to_snake

791

792

class AstGenerator(Transformer):

793

"""Generate AST nodes with converted names."""

794

795

def __init__(self, ast_classes):

796

super().__init__()

797

self.ast_classes = ast_classes

798

799

def __default__(self, data, children, meta):

800

# Convert rule name to class name

801

class_name = data.title().replace('_', '')

802

803

if class_name in self.ast_classes:

804

ast_class = self.ast_classes[class_name]

805

return ast_class(*children)

806

807

# Fallback to generic AST node

808

return super().__default__(data, children, meta)

809

810

# Define AST classes

811

class Expression:

812

pass

813

814

class BinaryExpr(Expression):

815

def __init__(self, left, op, right):

816

self.left = left

817

self.op = op

818

self.right = right

819

820

ast_classes = {

821

'BinaryExpr': BinaryExpr,

822

'Expression': Expression

823

}

824

825

# Use custom AST generator

826

transformer = AstGenerator(ast_classes)

827

parser = Lark(grammar, transformer=transformer)

828

```

829

830

### File System Operations

831

832

```python

833

from lark.utils import FS

834

import os

835

836

# Cross-platform file operations

837

grammar_file = 'grammar.lark'

838

839

if FS.exists(grammar_file):

840

with FS.open(grammar_file, 'r', encoding='utf-8') as f:

841

grammar = f.read()

842

843

parser = Lark(grammar)

844

else:

845

print(f"Grammar file {grammar_file} not found")

846

```

847

848

### Smart Decorator Usage

849

850

```python

851

from lark.utils import smart_decorator

852

853

def timing_decorator(func, log_time=True):

854

"""Decorator that measures function execution time."""

855

import time

856

857

def wrapper(*args, **kwargs):

858

start = time.time()

859

result = func(*args, **kwargs)

860

end = time.time()

861

862

if log_time:

863

print(f"{func.__name__} took {end - start:.4f} seconds")

864

865

return result

866

867

return wrapper

868

869

# Create smart timing decorator

870

timed = smart_decorator(timing_decorator, log_time=True)

871

872

# Use with functions

873

@timed

874

def parse_large_file(filename):

875

parser = Lark(grammar)

876

with open(filename) as f:

877

return parser.parse(f.read())

878

879

# Function will automatically log execution time

880

result = parse_large_file('large_input.txt')

881

```