0
# Utilities and Tools
1
2
Additional utilities including AST generation helpers, tree reconstruction, standalone parser generation, serialization, visualization tools, and various helper functions.
3
4
## Capabilities
5
6
### Grammar Building Components
7
8
Classes for programmatically building and manipulating grammar definitions.
9
10
```python { .api }
11
class Symbol:
12
"""
13
Base class for grammar symbols.
14
"""
15
16
def __init__(self, name: str):
17
"""
18
Initialize symbol.
19
20
Parameters:
21
- name: Symbol name
22
"""
23
24
name: str
25
is_term: bool
26
27
class Terminal(Symbol):
28
"""
29
Terminal symbol in grammar definitions.
30
"""
31
32
def __init__(self, name: str, filter_out: bool = False):
33
"""
34
Initialize terminal symbol.
35
36
Parameters:
37
- name: Terminal name
38
- filter_out: Whether to filter out this terminal from parse trees
39
"""
40
41
filter_out: bool
42
is_term = True
43
44
class NonTerminal(Symbol):
45
"""
46
Non-terminal symbol in grammar definitions.
47
"""
48
49
is_term = False
50
51
class Rule:
52
"""
53
Grammar rule definition containing origin, expansion, and options.
54
"""
55
56
def __init__(self, origin: NonTerminal, expansion: List[Symbol],
57
order: int = 0, alias: str = None, options: 'RuleOptions' = None):
58
"""
59
Initialize grammar rule.
60
61
Parameters:
62
- origin: Non-terminal that this rule defines
63
- expansion: List of symbols that make up the rule
64
- order: Rule priority order
65
- alias: Alternative name for the rule
66
- options: Rule configuration options
67
"""
68
69
origin: NonTerminal
70
expansion: List[Symbol]
71
alias: str
72
order: int
73
options: 'RuleOptions'
74
75
class RuleOptions:
76
"""
77
Configuration options for grammar rules.
78
"""
79
80
def __init__(self, keep_all_tokens: bool = False, expand1: bool = False,
81
priority: int = None, template_source: str = None,
82
empty_indices: Tuple = ()):
83
"""
84
Initialize rule options.
85
86
Parameters:
87
- keep_all_tokens: Preserve all tokens in parse tree
88
- expand1: Expand single-child rules
89
- priority: Rule priority for disambiguation
90
- template_source: Template source information
91
- empty_indices: Indices of empty rule positions
92
"""
93
94
keep_all_tokens: bool
95
expand1: bool
96
priority: int
97
template_source: str
98
empty_indices: Tuple
99
```
100
101
### Configuration Classes
102
103
Configuration objects for lexer and parser behavior.
104
105
```python { .api }
106
class LexerConf:
107
"""
108
Lexer configuration containing terminals and options.
109
"""
110
111
def __init__(self, terminals: List['TerminalDef'], re_module,
112
ignore: Tuple = (), postlex=None, callbacks: Dict = None,
113
g_regex_flags: int = 0, skip_validation: bool = False,
114
use_bytes: bool = False):
115
"""
116
Initialize lexer configuration.
117
118
Parameters:
119
- terminals: List of terminal definitions
120
- re_module: Regular expression module (re or regex)
121
- ignore: Terminals to ignore in parsing
122
- postlex: Post-lexing processor
123
- callbacks: Lexer callback functions
124
- g_regex_flags: Global regex flags
125
- skip_validation: Skip terminal validation
126
- use_bytes: Process bytes instead of strings
127
"""
128
129
terminals: List['TerminalDef']
130
terminals_by_name: Dict[str, 'TerminalDef']
131
ignore: Tuple
132
postlex: 'PostLex'
133
callbacks: Dict
134
g_regex_flags: int
135
re_module: Any
136
skip_validation: bool
137
use_bytes: bool
138
139
class ParserConf:
140
"""
141
Parser configuration containing rules and start symbols.
142
"""
143
144
def __init__(self, rules: List[Rule], callbacks: Dict, start: List[str]):
145
"""
146
Initialize parser configuration.
147
148
Parameters:
149
- rules: Grammar rules
150
- callbacks: Parser callback functions
151
- start: Start symbol(s)
152
"""
153
154
rules: List[Rule]
155
callbacks: Dict
156
start: List[str]
157
```
158
159
### AST Generation Utilities
160
161
Helper classes and functions for creating custom Abstract Syntax Tree (AST) classes from parse trees.
162
163
```python { .api }
164
class Ast:
165
"""
166
Abstract base class for custom AST node classes.
167
Provides foundation for creating domain-specific AST representations.
168
"""
169
170
@classmethod
171
def from_lark_tree(cls, tree: Tree) -> 'Ast':
172
"""
173
Create AST instance from Lark parse tree.
174
175
Parameters:
176
- tree: Lark Tree instance
177
178
Returns:
179
Ast: AST node instance
180
"""
181
182
class AsList(Ast):
183
"""
184
AST node that stores parse results as a single list.
185
Useful for collecting multiple items into a flat structure.
186
"""
187
188
def create_transformer(ast_module, transformer: Transformer = None) -> Transformer:
189
"""
190
Create transformer from module containing AST classes.
191
Automatically maps grammar rules to AST classes based on naming.
192
193
Parameters:
194
- ast_module: Module containing AST class definitions
195
- transformer: Base transformer class (optional)
196
197
Returns:
198
Transformer: Configured transformer for AST generation
199
"""
200
201
def camel_to_snake(name: str) -> str:
202
"""
203
Convert CamelCase names to snake_case.
204
205
Parameters:
206
- name: CamelCase string
207
208
Returns:
209
str: snake_case version
210
"""
211
212
def inline(f):
213
"""
214
Decorator to mark AST classes as inline.
215
Indicates that the AST class should receive children as separate arguments.
216
217
Parameters:
218
- f: AST class to mark as inline
219
220
Returns:
221
Callable: Decorated class
222
"""
223
```
224
225
### Text Reconstruction
226
227
Classes for reconstructing original text from parse trees, useful for pretty-printing and code generation.
228
229
```python { .api }
230
class Reconstructor:
231
"""
232
Reconstructs text from parse trees by writing tokens in order.
233
"""
234
235
def __init__(self, parser: Lark, term_subs: Dict[str, Callable] = None):
236
"""
237
Initialize reconstructor.
238
239
Parameters:
240
- parser: Lark parser instance used to create trees
241
- term_subs: Terminal substitution functions
242
"""
243
244
def reconstruct(self, tree: Tree, postproc: Callable = None,
245
insert_spaces: bool = True) -> str:
246
"""
247
Reconstruct text from parse tree.
248
249
Parameters:
250
- tree: Parse tree to reconstruct
251
- postproc: Post-processing function for final text
252
- insert_spaces: Whether to insert spaces between tokens
253
254
Returns:
255
str: Reconstructed text
256
"""
257
258
class WriteTokensTransformer(Transformer):
259
"""
260
Transformer that reconstructs text by writing tokens.
261
Used internally by Reconstructor for token-level reconstruction.
262
"""
263
264
def __init__(self, tokens: Dict[str, str], term_subs: Dict[str, Callable]):
265
"""
266
Initialize token writer.
267
268
Parameters:
269
- tokens: Mapping of token types to values
270
- term_subs: Terminal substitution functions
271
"""
272
```
273
274
### Standalone Parser Generation
275
276
Tools for generating standalone parsers that don't require the Lark library at runtime.
277
278
```python { .api }
279
def gen_standalone(lark_instance: Lark, out=None, compress: bool = False) -> str:
280
"""
281
Generate standalone parser code from Lark instance.
282
Creates self-contained Python code that can parse without Lark dependency.
283
Only works with LALR parser mode.
284
285
Parameters:
286
- lark_instance: Lark parser instance to convert (must use parser='lalr')
287
- out: Output file object (optional)
288
- compress: Whether to compress the generated code
289
290
Returns:
291
str: Generated standalone parser code
292
293
Example:
294
>>> parser = Lark(grammar, parser='lalr')
295
>>> standalone_code = gen_standalone(parser)
296
>>> with open('standalone_parser.py', 'w') as f:
297
... f.write(standalone_code)
298
"""
299
300
def build_lalr(grammar_text: str, **options) -> Lark:
301
"""
302
Build LALR parser from command-line style arguments.
303
304
Parameters:
305
- grammar_text: Grammar definition string
306
- **options: Parser configuration options
307
308
Returns:
309
Lark: Configured LALR parser instance
310
"""
311
312
def make_warnings_comments():
313
"""
314
Configure warnings to appear as comments in generated output.
315
Useful for command-line tools that generate code.
316
"""
317
```
318
319
### Parser Serialization
320
321
Functions for saving and loading parser instances to avoid repeated grammar compilation.
322
323
```python { .api }
324
def serialize(lark_instance: Lark, f) -> None:
325
"""
326
Serialize Lark parser instance to file for caching.
327
328
Parameters:
329
- lark_instance: Lark parser to serialize
330
- f: File object to write serialized data
331
"""
332
```
333
334
### Tree Visualization
335
336
Functions for creating visual representations of parse trees using graphing libraries.
337
338
```python { .api }
339
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
340
"""
341
Create PNG image of parse tree using pydot.
342
343
Parameters:
344
- tree: Parse tree to visualize
345
- filename: Output PNG filename
346
- rankdir: Graph direction ("LR", "TB", etc.)
347
- **kwargs: Additional pydot options
348
"""
349
350
def pydot__tree_to_dot(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
351
"""
352
Create DOT file representation of parse tree.
353
354
Parameters:
355
- tree: Parse tree to convert
356
- filename: Output DOT filename
357
- rankdir: Graph direction
358
- **kwargs: Additional pydot options
359
"""
360
361
def pydot__tree_to_graph(tree: Tree, rankdir: str = "LR", **kwargs):
362
"""
363
Create pydot graph object from parse tree.
364
365
Parameters:
366
- tree: Parse tree to convert
367
- rankdir: Graph direction
368
- **kwargs: Additional pydot options
369
370
Returns:
371
pydot.Dot: Graph object
372
"""
373
```
374
375
### Command-Line Tools
376
377
Utilities for building command-line interfaces and processing grammar files.
378
379
```python { .api }
380
def build_lalr(grammar_text: str, **options) -> Lark:
381
"""
382
Build LALR parser from command-line arguments.
383
384
Parameters:
385
- grammar_text: Grammar definition
386
- **options: Parser configuration options
387
388
Returns:
389
Lark: Configured LALR parser
390
"""
391
392
def make_warnings_comments() -> None:
393
"""
394
Configure warnings to appear as comments in generated output.
395
Useful for command-line tools that generate code.
396
"""
397
```
398
399
### Logger Configuration
400
401
Logging utilities for debugging and development.
402
403
```python { .api }
404
logger: logging.Logger
405
"""
406
Lark's logging instance for debug output and development information.
407
Use logger.setLevel() to control verbosity.
408
"""
409
```
410
411
### Internal Utilities
412
413
Various helper classes and functions used internally by Lark components.
414
415
```python { .api }
416
class Serialize:
417
"""
418
Mixin class providing serialization capabilities.
419
"""
420
421
def serialize(self, memo: Dict = None) -> Any:
422
"""
423
Serialize object to transferable format.
424
425
Parameters:
426
- memo: Memoization dictionary for circular references
427
428
Returns:
429
Any: Serialized representation
430
"""
431
432
class SerializeMemoizer:
433
"""
434
Helper for memoizing object serialization.
435
"""
436
437
def __init__(self):
438
self.memo = {}
439
440
def serialize(self, obj: Any) -> Any:
441
"""
442
Serialize object with memoization.
443
444
Parameters:
445
- obj: Object to serialize
446
447
Returns:
448
Any: Serialized object
449
"""
450
```
451
452
### File System Utilities
453
454
Cross-platform file system operation helpers.
455
456
```python { .api }
457
class FS:
458
"""
459
File system utilities for cross-platform operations.
460
"""
461
462
@staticmethod
463
def open(filename: str, mode: str = 'r', **kwargs):
464
"""
465
Open file with proper encoding handling.
466
467
Parameters:
468
- filename: File path
469
- mode: File open mode
470
- **kwargs: Additional open() arguments
471
472
Returns:
473
File object
474
"""
475
476
@staticmethod
477
def exists(path: str) -> bool:
478
"""
479
Check if path exists.
480
481
Parameters:
482
- path: File or directory path
483
484
Returns:
485
bool: True if path exists
486
"""
487
```
488
489
### String and Type Utilities
490
491
Helper functions for string processing and type checking.
492
493
```python { .api }
494
def isascii(s: str) -> bool:
495
"""
496
Check if string contains only ASCII characters.
497
498
Parameters:
499
- s: String to check
500
501
Returns:
502
bool: True if string is ASCII-only
503
"""
504
505
def is_id_continue(c: str) -> bool:
506
"""
507
Check if character can continue a Unicode identifier.
508
509
Parameters:
510
- c: Character to check
511
512
Returns:
513
bool: True if character can continue identifier
514
"""
515
516
def is_id_start(c: str) -> bool:
517
"""
518
Check if character can start a Unicode identifier.
519
520
Parameters:
521
- c: Character to check
522
523
Returns:
524
bool: True if character can start identifier
525
"""
526
527
def combine_alternatives(lists: List[List[Any]]) -> List[Any]:
528
"""
529
Combine alternative rule definitions.
530
531
Parameters:
532
- lists: List of alternative rule lists
533
534
Returns:
535
List[Any]: Combined alternatives
536
"""
537
538
def classify(seq: Sequence[Any], key: Callable = None, value: Callable = None) -> Dict:
539
"""
540
Classify sequence elements into dictionary by key function.
541
542
Parameters:
543
- seq: Sequence to classify
544
- key: Function to extract keys
545
- value: Function to extract values
546
547
Returns:
548
Dict: Classified elements
549
"""
550
551
def get_regexp_width(regexp: str) -> Tuple[int, int]:
552
"""
553
Analyze regular expression to determine min/max match width.
554
555
Parameters:
556
- regexp: Regular expression string
557
558
Returns:
559
Tuple[int, int]: (min_width, max_width)
560
"""
561
562
STRING_TYPE: type # String type for version compatibility
563
"""Type object representing string type across Python versions."""
564
565
ABC: type # Abstract base class type
566
"""Abstract base class type for creating abstract classes."""
567
568
def abstractmethod(func: Callable) -> Callable:
569
"""
570
Decorator marking method as abstract.
571
572
Parameters:
573
- func: Method to mark as abstract
574
575
Returns:
576
Callable: Decorated method
577
"""
578
```
579
580
### Smart Decorators
581
582
Advanced decorator utilities for flexible function modification.
583
584
```python { .api }
585
def smart_decorator(decorator: Callable, **decorator_kwargs) -> Callable:
586
"""
587
Create smart decorator that can handle various function signatures.
588
589
Parameters:
590
- decorator: Base decorator function
591
- **decorator_kwargs: Default decorator arguments
592
593
Returns:
594
Callable: Smart decorator function
595
"""
596
597
def combine_alternatives(*alternatives) -> Callable:
598
"""
599
Combine multiple alternative implementations into single function.
600
601
Parameters:
602
- *alternatives: Alternative function implementations
603
604
Returns:
605
Callable: Combined function
606
"""
607
```
608
609
## Usage Examples
610
611
### Creating Custom AST Classes
612
613
```python
614
from lark import Lark, Tree
615
from lark.ast_utils import Ast, create_transformer, inline
616
617
# Define AST classes
618
class Expression(Ast):
619
pass
620
621
class BinaryOp(Expression):
622
def __init__(self, left, op, right):
623
self.left = left
624
self.op = op
625
self.right = right
626
627
@inline
628
class Number(Expression):
629
def __init__(self, value):
630
self.value = int(value)
631
632
# Create module with AST classes
633
import sys
634
ast_module = sys.modules[__name__]
635
636
# Generate transformer
637
transformer = create_transformer(ast_module)
638
639
# Use with parser
640
parser = Lark(grammar, transformer=transformer)
641
ast = parser.parse("2 + 3 * 4")
642
643
print(f"AST root type: {type(ast)}")
644
print(f"Left operand: {ast.left}")
645
```
646
647
### Text Reconstruction
648
649
```python
650
from lark import Lark
651
from lark.reconstruct import Reconstructor
652
653
# Parse text
654
parser = Lark(grammar)
655
tree = parser.parse("x = 42 + y")
656
657
# Reconstruct original text
658
reconstructor = Reconstructor(parser)
659
reconstructed = reconstructor.reconstruct(tree)
660
print(f"Reconstructed: {reconstructed}")
661
662
# Reconstruct with custom formatting
663
def format_postproc(text):
664
return text.replace('+', ' + ').replace('=', ' = ')
665
666
formatted = reconstructor.reconstruct(tree, postproc=format_postproc)
667
print(f"Formatted: {formatted}")
668
```
669
670
### Generating Standalone Parser
671
672
```python
673
from lark import Lark
674
from lark.tools.standalone import gen_standalone
675
676
# Create parser
677
parser = Lark(grammar, parser='lalr') # Only LALR supports standalone
678
679
# Generate standalone code
680
standalone_code = gen_standalone(parser)
681
682
# Save to file
683
with open('my_parser.py', 'w') as f:
684
f.write(standalone_code)
685
686
# The generated file can be used without Lark:
687
# from my_parser import Lark_StandAlone
688
# parser = Lark_StandAlone()
689
# result = parser.parse(text)
690
```
691
692
### Parser Serialization and Caching
693
694
```python
695
from lark import Lark
696
from lark.tools.serialize import serialize
697
import pickle
698
699
# Create parser
700
parser = Lark(grammar)
701
702
# Serialize parser
703
with open('parser.cache', 'wb') as f:
704
serialize(parser, f)
705
706
# Load serialized parser
707
with open('parser.cache', 'rb') as f:
708
cached_parser = pickle.load(f)
709
710
# Use cached parser
711
result = cached_parser.parse(text)
712
```
713
714
### Tree Visualization
715
716
```python
717
from lark import Lark
718
from lark.tree import pydot__tree_to_png
719
720
# Parse text
721
parser = Lark(grammar)
722
tree = parser.parse("complex expression")
723
724
# Create PNG visualization
725
pydot__tree_to_png(tree, 'parse_tree.png', rankdir='TB')
726
727
# Create DOT file
728
from lark.tree import pydot__tree_to_dot
729
pydot__tree_to_dot(tree, 'parse_tree.dot')
730
```
731
732
### Command-Line Tool Integration
733
734
```python
735
from lark.tools import build_lalr, make_warnings_comments
736
import argparse
737
738
def main():
739
parser = argparse.ArgumentParser(description='Grammar processor')
740
parser.add_argument('grammar_file', help='Grammar file path')
741
parser.add_argument('input_file', help='Input file to parse')
742
parser.add_argument('--debug', action='store_true')
743
744
args = parser.parse_args()
745
746
# Configure warnings as comments
747
make_warnings_comments()
748
749
# Read grammar
750
with open(args.grammar_file) as f:
751
grammar = f.read()
752
753
# Build parser
754
lark_parser = build_lalr(grammar, debug=args.debug)
755
756
# Parse input
757
with open(args.input_file) as f:
758
text = f.read()
759
760
result = lark_parser.parse(text)
761
print(result.pretty())
762
763
if __name__ == '__main__':
764
main()
765
```
766
767
### Custom Logger Configuration
768
769
```python
770
from lark.utils import logger
771
import logging
772
773
# Configure Lark logging
774
logger.setLevel(logging.DEBUG)
775
handler = logging.StreamHandler()
776
handler.setFormatter(logging.Formatter(
777
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
778
))
779
logger.addHandler(handler)
780
781
# Now Lark will output debug information
782
parser = Lark(grammar, debug=True)
783
tree = parser.parse(text) # Will show debug output
784
```
785
786
### Advanced AST Transformation
787
788
```python
789
from lark import Lark, Transformer
790
from lark.ast_utils import camel_to_snake
791
792
class AstGenerator(Transformer):
793
"""Generate AST nodes with converted names."""
794
795
def __init__(self, ast_classes):
796
super().__init__()
797
self.ast_classes = ast_classes
798
799
def __default__(self, data, children, meta):
800
# Convert rule name to class name
801
class_name = data.title().replace('_', '')
802
803
if class_name in self.ast_classes:
804
ast_class = self.ast_classes[class_name]
805
return ast_class(*children)
806
807
# Fallback to generic AST node
808
return super().__default__(data, children, meta)
809
810
# Define AST classes
811
class Expression:
812
pass
813
814
class BinaryExpr(Expression):
815
def __init__(self, left, op, right):
816
self.left = left
817
self.op = op
818
self.right = right
819
820
ast_classes = {
821
'BinaryExpr': BinaryExpr,
822
'Expression': Expression
823
}
824
825
# Use custom AST generator
826
transformer = AstGenerator(ast_classes)
827
parser = Lark(grammar, transformer=transformer)
828
```
829
830
### File System Operations
831
832
```python
833
from lark.utils import FS
834
import os
835
836
# Cross-platform file operations
837
grammar_file = 'grammar.lark'
838
839
if FS.exists(grammar_file):
840
with FS.open(grammar_file, 'r', encoding='utf-8') as f:
841
grammar = f.read()
842
843
parser = Lark(grammar)
844
else:
845
print(f"Grammar file {grammar_file} not found")
846
```
847
848
### Smart Decorator Usage
849
850
```python
851
from lark.utils import smart_decorator
852
853
def timing_decorator(func, log_time=True):
854
"""Decorator that measures function execution time."""
855
import time
856
857
def wrapper(*args, **kwargs):
858
start = time.time()
859
result = func(*args, **kwargs)
860
end = time.time()
861
862
if log_time:
863
print(f"{func.__name__} took {end - start:.4f} seconds")
864
865
return result
866
867
return wrapper
868
869
# Create smart timing decorator
870
timed = smart_decorator(timing_decorator, log_time=True)
871
872
# Use with functions
873
@timed
874
def parse_large_file(filename):
875
parser = Lark(grammar)
876
with open(filename) as f:
877
return parser.parse(f.read())
878
879
# Function will automatically log execution time
880
result = parse_large_file('large_input.txt')
881
```