0
# Language and Parser Management
1
2
Core functionality for loading language grammars and creating parsers that convert source code into syntax trees. The Language class represents compiled grammars, while Parser handles the actual parsing process.
3
4
## Capabilities
5
6
### Language Loading and Introspection
7
8
Load language grammars and inspect their properties including node types, fields, and parse states.
9
10
```python { .api }
11
class Language:
12
def __init__(self, ptr: object) -> None:
13
"""
14
Create language from language pointer.
15
16
Args:
17
ptr: Language pointer from external language package
18
"""
19
20
@property
21
def name(self) -> str | None:
22
"""Language name if available."""
23
24
@property
25
def abi_version(self) -> int:
26
"""ABI version of the language."""
27
28
@property
29
def semantic_version(self) -> tuple[int, int, int] | None:
30
"""Semantic version tuple (major, minor, patch)."""
31
32
@property
33
def node_kind_count(self) -> int:
34
"""Total number of node kinds in the grammar."""
35
36
@property
37
def parse_state_count(self) -> int:
38
"""Total number of parse states in the grammar."""
39
40
@property
41
def field_count(self) -> int:
42
"""Total number of fields in the grammar."""
43
44
def node_kind_for_id(self, id: int) -> str | None:
45
"""
46
Get node kind name for the given ID.
47
48
Args:
49
id: Node kind ID
50
51
Returns:
52
Node kind name or None if invalid ID
53
"""
54
55
def id_for_node_kind(self, kind: str, named: bool) -> int | None:
56
"""
57
Get ID for the given node kind.
58
59
Args:
60
kind: Node kind name
61
named: Whether the node kind is named
62
63
Returns:
64
Node kind ID or None if not found
65
"""
66
67
def field_name_for_id(self, field_id: int) -> str | None:
68
"""
69
Get field name for the given field ID.
70
71
Args:
72
field_id: Field ID
73
74
Returns:
75
Field name or None if invalid ID
76
"""
77
78
def field_id_for_name(self, name: str) -> int | None:
79
"""
80
Get field ID for the given field name.
81
82
Args:
83
name: Field name
84
85
Returns:
86
Field ID or None if not found
87
"""
88
89
@property
90
def supertypes(self) -> tuple[int, ...]:
91
"""Tuple of supertype node IDs in the grammar."""
92
93
def subtypes(self, supertype: int) -> tuple[int, ...]:
94
"""
95
Get subtypes for the given supertype.
96
97
Args:
98
supertype: Supertype node ID
99
100
Returns:
101
Tuple of subtype node IDs
102
"""
103
104
def node_kind_is_named(self, id: int) -> bool:
105
"""
106
Check if the node kind is named.
107
108
Args:
109
id: Node kind ID
110
111
Returns:
112
True if the node kind is named
113
"""
114
115
def node_kind_is_visible(self, id: int) -> bool:
116
"""
117
Check if the node kind is visible.
118
119
Args:
120
id: Node kind ID
121
122
Returns:
123
True if the node kind is visible
124
"""
125
126
def node_kind_is_supertype(self, id: int) -> bool:
127
"""
128
Check if the node kind is a supertype.
129
130
Args:
131
id: Node kind ID
132
133
Returns:
134
True if the node kind is a supertype
135
"""
136
137
def next_state(self, state: int, id: int) -> int:
138
"""
139
Get the next parse state given current state and symbol ID.
140
141
Args:
142
state: Current parse state
143
id: Symbol ID
144
145
Returns:
146
Next parse state
147
"""
148
149
def lookahead_iterator(self, state: int) -> LookaheadIterator | None:
150
"""
151
Create lookahead iterator for the given parse state.
152
153
Args:
154
state: Parse state
155
156
Returns:
157
LookaheadIterator for the state or None if invalid
158
"""
159
160
def copy(self) -> Language:
161
"""Create a copy of this language."""
162
163
@property
164
def version(self) -> int:
165
"""Deprecated: Use abi_version instead."""
166
167
def query(self, source: str) -> Query:
168
"""Deprecated: Use the Query() constructor instead."""
169
```
170
171
### Parser Creation and Configuration
172
173
Create parsers and configure them with languages, byte ranges, and logging.
174
175
```python { .api }
176
class Parser:
177
def __init__(
178
self,
179
language: Language | None = None,
180
*,
181
included_ranges: list[Range] | None = None,
182
logger: Callable[[LogType, str], None] | None = None,
183
) -> None:
184
"""
185
Create a new parser.
186
187
Args:
188
language: Language to use for parsing
189
included_ranges: Byte ranges to include in parsing
190
logger: Callback for parse/lex log messages
191
"""
192
193
@property
194
def language(self) -> Language | None:
195
"""Current language (can be get/set/deleted)."""
196
197
@language.setter
198
def language(self, language: Language) -> None: ...
199
200
@language.deleter
201
def language(self) -> None: ...
202
203
@property
204
def included_ranges(self) -> list[Range]:
205
"""Byte ranges to include in parsing (can be get/set/deleted)."""
206
207
@included_ranges.setter
208
def included_ranges(self, ranges: list[Range]) -> None: ...
209
210
@included_ranges.deleter
211
def included_ranges(self) -> None: ...
212
213
@property
214
def logger(self) -> Callable[[LogType, str], None] | None:
215
"""Logging callback (can be get/set/deleted)."""
216
217
@logger.setter
218
def logger(self, logger: Callable[[LogType, str], None]) -> None: ...
219
220
@logger.deleter
221
def logger(self) -> None: ...
222
223
def reset(self) -> None:
224
"""Reset the parser state."""
225
226
def print_dot_graphs(self, file) -> None:
227
"""
228
Print parse graphs as DOT format for debugging.
229
230
Args:
231
file: File object with fileno() method or None for stdout
232
"""
233
```
234
235
### Source Code Parsing
236
237
Parse source code from bytes or using a read callback function for large or streaming sources.
238
239
```python { .api }
240
class Parser:
241
def parse(
242
self,
243
source: bytes | bytearray | memoryview,
244
old_tree: Tree | None = None,
245
encoding: str = "utf8",
246
) -> Tree:
247
"""
248
Parse source code from bytes.
249
250
Args:
251
source: Source code as bytes
252
old_tree: Previous tree for incremental parsing
253
encoding: Text encoding ("utf8", "utf16", "utf16le", "utf16be")
254
255
Returns:
256
Parsed syntax tree
257
"""
258
259
def parse(
260
self,
261
read_callback: Callable[[int, Point], bytes | None],
262
old_tree: Tree | None = None,
263
encoding: str = "utf8",
264
progress_callback: Callable[[int, bool], bool] | None = None,
265
) -> Tree:
266
"""
267
Parse source code using a read callback.
268
269
Args:
270
read_callback: Function that returns bytes for byte offset and Point
271
old_tree: Previous tree for incremental parsing
272
encoding: Text encoding
273
progress_callback: Progress monitoring callback
274
275
Returns:
276
Parsed syntax tree
277
"""
278
```
279
280
## Usage Examples
281
282
### Loading Language Grammars
283
284
```python
285
import tree_sitter_python
286
import tree_sitter_javascript
287
from tree_sitter import Language
288
289
# Load Python grammar
290
py_language = Language(tree_sitter_python.language())
291
print(f"Python grammar: {py_language.name}")
292
print(f"Node types: {py_language.node_kind_count}")
293
294
# Load JavaScript grammar
295
js_language = Language(tree_sitter_javascript.language())
296
print(f"JavaScript ABI version: {js_language.abi_version}")
297
```
298
299
### Basic Parsing
300
301
```python
302
from tree_sitter import Language, Parser
303
import tree_sitter_python
304
305
# Setup
306
language = Language(tree_sitter_python.language())
307
parser = Parser(language)
308
309
# Parse simple code
310
code = b'''
311
def calculate(x, y):
312
return x + y
313
'''
314
315
tree = parser.parse(code)
316
print(f"Root node: {tree.root_node.type}")
317
print(f"Children: {len(tree.root_node.children)}")
318
```
319
320
### Parsing with Read Callback
321
322
```python
323
# For large files or streaming
324
source_lines = ["def main():\n", " print('hello')\n", " return 0\n"]
325
326
def read_by_line(byte_offset, point):
327
row, column = point
328
if row >= len(source_lines):
329
return None
330
line = source_lines[row]
331
return line[column:].encode("utf8")
332
333
tree = parser.parse(read_by_line)
334
```
335
336
### Parser Configuration
337
338
```python
339
from tree_sitter import Parser, Range, LogType
340
341
def custom_logger(log_type, message):
342
if log_type == LogType.PARSE:
343
print(f"Parse: {message}")
344
elif log_type == LogType.LEX:
345
print(f"Lex: {message}")
346
347
# Create parser with configuration
348
parser = Parser(
349
language=language,
350
included_ranges=[Range((0, 0), (10, 0), 0, 100)],
351
logger=custom_logger
352
)
353
354
# Modify parser settings
355
parser.language = different_language
356
parser.included_ranges = [Range((0, 0), (5, 0), 0, 50)]
357
del parser.logger # Remove logger
358
```