0
# Incremental Parsing
1
2
Tree-sitter's incremental parsing capabilities enable efficient updates to syntax trees when source code changes. Instead of reparsing entire files, incremental parsing reuses unchanged portions of the tree and only reparses modified sections, making it ideal for real-time applications like editors and language servers.
3
4
## Capabilities
5
6
### Tree Editing
7
8
Edit syntax trees to reflect changes in source code before incremental parsing.
9
10
```python { .api }
11
class Tree:
12
def edit(
13
self,
14
start_byte: int,
15
old_end_byte: int,
16
new_end_byte: int,
17
start_point: Point | tuple[int, int],
18
old_end_point: Point | tuple[int, int],
19
new_end_point: Point | tuple[int, int],
20
) -> None:
21
"""
22
Edit the syntax tree to reflect a change in source code.
23
24
Args:
25
start_byte: Start byte position of the edit
26
old_end_byte: End byte position before the edit
27
new_end_byte: End byte position after the edit
28
start_point: Start point (row, column) of the edit
29
old_end_point: End point before the edit
30
new_end_point: End point after the edit
31
"""
32
```
33
34
```python { .api }
35
class Node:
36
def edit(
37
self,
38
start_byte: int,
39
old_end_byte: int,
40
new_end_byte: int,
41
start_point: Point | tuple[int, int],
42
old_end_point: Point | tuple[int, int],
43
new_end_point: Point | tuple[int, int],
44
) -> None:
45
"""
46
Edit this node to reflect a change in source code.
47
Same parameters as Tree.edit().
48
"""
49
```
50
51
### Incremental Parsing
52
53
Parse updated source code using a previous tree to enable incremental updates.
54
55
```python { .api }
56
class Parser:
57
def parse(
58
self,
59
source: bytes | Callable[[int, Point], bytes | None],
60
old_tree: Tree | None = None,
61
encoding: str = "utf8",
62
progress_callback: Callable[[int, bool], bool] | None = None,
63
) -> Tree:
64
"""
65
Parse source code, optionally using previous tree for incremental parsing.
66
67
Args:
68
source: Source code or read callback function
69
old_tree: Previous tree to enable incremental parsing
70
encoding: Text encoding
71
progress_callback: Progress monitoring callback
72
73
Returns:
74
New syntax tree incorporating changes
75
"""
76
```
77
78
### Change Detection
79
80
Identify ranges in the tree that changed between parse operations.
81
82
```python { .api }
83
class Tree:
84
def changed_ranges(self, new_tree: Tree) -> list[Range]:
85
"""
86
Get ranges that changed between this tree and a new tree.
87
88
Args:
89
new_tree: New tree to compare against
90
91
Returns:
92
List of ranges where syntactic structure changed
93
"""
94
```
95
96
## Usage Examples
97
98
### Basic Incremental Parsing
99
100
```python
101
from tree_sitter import Language, Parser, Point
102
import tree_sitter_python
103
104
# Setup
105
language = Language(tree_sitter_python.language())
106
parser = Parser(language)
107
108
# Initial parsing
109
original_source = b'''
110
def calculate(x, y):
111
result = x + y
112
return result
113
'''
114
115
tree = parser.parse(original_source)
116
print(f"Original tree root: {tree.root_node.type}")
117
118
# Simulate editing: change "x + y" to "x * y"
119
# Edit is at position where "+" appears
120
edit_start_byte = original_source.find(b'+')
121
edit_start_point = (1, 14) # Line 1, column 14
122
123
# Edit the tree to reflect the change
124
tree.edit(
125
start_byte=edit_start_byte,
126
old_end_byte=edit_start_byte + 1, # "+" is 1 byte
127
new_end_byte=edit_start_byte + 1, # "*" is also 1 byte
128
start_point=edit_start_point,
129
old_end_point=(1, 15), # End of "+"
130
new_end_point=(1, 15), # End of "*"
131
)
132
133
# Create modified source
134
modified_source = original_source.replace(b'+', b'*')
135
136
# Incremental parsing using the edited tree
137
new_tree = parser.parse(modified_source, old_tree=tree)
138
print(f"New tree root: {new_tree.root_node.type}")
139
```
140
141
### Detecting Changes
142
143
```python
144
# Find what changed between trees
145
changed_ranges = tree.changed_ranges(new_tree)
146
147
print(f"Found {len(changed_ranges)} changed ranges:")
148
for i, range_obj in enumerate(changed_ranges):
149
print(f" Range {i}:")
150
print(f" Start: byte {range_obj.start_byte}, point {range_obj.start_point}")
151
print(f" End: byte {range_obj.end_byte}, point {range_obj.end_point}")
152
153
# Extract changed text
154
changed_text = modified_source[range_obj.start_byte:range_obj.end_byte]
155
print(f" Changed text: {changed_text}")
156
```
157
158
### Complex Text Editing
159
160
```python
161
# Simulate inserting a new line of code
162
original_source = b'''
163
def process_data(items):
164
results = []
165
return results
166
'''
167
168
tree = parser.parse(original_source)
169
170
# Insert "filtered = filter_items(items)" before "return results"
171
insert_position = original_source.find(b'return results')
172
insert_byte = insert_position
173
insert_point = (2, 4) # Line 2, column 4
174
175
new_line = b' filtered = filter_items(items)\n '
176
modified_source = (
177
original_source[:insert_position] +
178
new_line +
179
original_source[insert_position:]
180
)
181
182
# Edit tree for the insertion
183
tree.edit(
184
start_byte=insert_byte,
185
old_end_byte=insert_byte, # Insertion, so old_end equals start
186
new_end_byte=insert_byte + len(new_line), # New content length
187
start_point=insert_point,
188
old_end_point=insert_point, # Insertion
189
new_end_point=(3, 4), # New line added, so row increases
190
)
191
192
# Incremental parsing
193
new_tree = parser.parse(modified_source, old_tree=tree)
194
```
195
196
### Multiple Edits
197
198
```python
199
# Handle multiple edits in sequence
200
original_source = b'''
201
def old_function(a, b):
202
temp = a + b
203
return temp
204
'''
205
206
tree = parser.parse(original_source)
207
208
# Edit 1: Rename function
209
edit1_start = original_source.find(b'old_function')
210
tree.edit(
211
start_byte=edit1_start,
212
old_end_byte=edit1_start + len(b'old_function'),
213
new_end_byte=edit1_start + len(b'new_function'),
214
start_point=(0, 4),
215
old_end_point=(0, 16),
216
new_end_point=(0, 16),
217
)
218
219
modified_source = original_source.replace(b'old_function', b'new_function')
220
221
# Edit 2: Change operator
222
edit2_start = modified_source.find(b'+')
223
tree.edit(
224
start_byte=edit2_start,
225
old_end_byte=edit2_start + 1,
226
new_end_byte=edit2_start + 1,
227
start_point=(1, 13),
228
old_end_point=(1, 14),
229
new_end_point=(1, 14),
230
)
231
232
final_source = modified_source.replace(b'+', b'*')
233
234
# Incremental parsing with all edits
235
final_tree = parser.parse(final_source, old_tree=tree)
236
```
237
238
### Deletion Handling
239
240
```python
241
# Handle text deletion
242
original_source = b'''
243
def calculate(x, y, z):
244
first = x + y
245
second = first * z
246
return second
247
'''
248
249
tree = parser.parse(original_source)
250
251
# Remove the parameter "z" and its usage
252
# First, remove ", z" from parameters
253
param_start = original_source.find(b', z')
254
param_end = param_start + len(b', z')
255
256
# Edit for parameter removal
257
tree.edit(
258
start_byte=param_start,
259
old_end_byte=param_end,
260
new_end_byte=param_start, # Deletion, so new_end equals start
261
start_point=(0, 16),
262
old_end_point=(0, 19),
263
new_end_point=(0, 16), # Deletion
264
)
265
266
# Remove the z usage (need to adjust for previous edit)
267
modified_source = original_source[:param_start] + original_source[param_end:]
268
269
# Remove "second = first * z" line
270
line_to_remove = b' second = first * z\n'
271
line_start = modified_source.find(line_to_remove)
272
line_end = line_start + len(line_to_remove)
273
274
tree.edit(
275
start_byte=line_start,
276
old_end_byte=line_end,
277
new_end_byte=line_start, # Deletion
278
start_point=(2, 0),
279
old_end_point=(3, 0),
280
new_end_point=(2, 0),
281
)
282
283
final_source = modified_source[:line_start] + modified_source[line_end:]
284
285
# Update return statement
286
final_source = final_source.replace(b'second', b'first')
287
288
# Final incremental parse
289
final_tree = parser.parse(final_source, old_tree=tree)
290
```
291
292
### Performance Monitoring
293
294
```python
295
import time
296
297
def benchmark_parsing(source, old_tree=None):
298
"""Benchmark parsing performance."""
299
start_time = time.time()
300
tree = parser.parse(source, old_tree=old_tree)
301
end_time = time.time()
302
303
parse_type = "incremental" if old_tree else "full"
304
print(f"{parse_type.title()} parsing took {end_time - start_time:.4f} seconds")
305
return tree
306
307
# Compare full vs incremental parsing
308
original_tree = benchmark_parsing(original_source)
309
310
# Make edit
311
tree.edit(start_byte=50, old_end_byte=51, new_end_byte=51,
312
start_point=(1, 10), old_end_point=(1, 11), new_end_point=(1, 11))
313
314
modified_source = original_source[:50] + b'*' + original_source[51:]
315
316
# Incremental parsing should be faster
317
new_tree = benchmark_parsing(modified_source, old_tree=tree)
318
```
319
320
### Editor Integration Pattern
321
322
```python
323
class IncrementalEditor:
324
"""Example pattern for editor integration."""
325
326
def __init__(self, language):
327
self.parser = Parser(language)
328
self.current_tree = None
329
self.current_source = b""
330
331
def set_content(self, source):
332
"""Set initial content."""
333
self.current_source = source
334
self.current_tree = self.parser.parse(source)
335
336
def apply_edit(self, start_byte, old_end_byte, new_end_byte,
337
start_point, old_end_point, new_end_point, new_text):
338
"""Apply an edit and reparse incrementally."""
339
if self.current_tree:
340
# Edit the tree
341
self.current_tree.edit(
342
start_byte, old_end_byte, new_end_byte,
343
start_point, old_end_point, new_end_point
344
)
345
346
# Update source
347
self.current_source = (
348
self.current_source[:start_byte] +
349
new_text +
350
self.current_source[old_end_byte:]
351
)
352
353
# Incremental parsing
354
self.current_tree = self.parser.parse(
355
self.current_source,
356
old_tree=self.current_tree
357
)
358
359
def get_changed_ranges(self, old_tree):
360
"""Get ranges that changed since old tree."""
361
if old_tree and self.current_tree:
362
return old_tree.changed_ranges(self.current_tree)
363
return []
364
365
# Usage
366
editor = IncrementalEditor(language)
367
editor.set_content(b"def hello(): pass")
368
369
# Simulate typing
370
old_tree = editor.current_tree.copy()
371
editor.apply_edit(
372
start_byte=13, old_end_byte=13, new_end_byte=20,
373
start_point=(0, 13), old_end_point=(0, 13), new_end_point=(0, 20),
374
new_text=b"world()"
375
)
376
377
# Check what changed
378
changes = editor.get_changed_ranges(old_tree)
379
print(f"Edit affected {len(changes)} ranges")
380
```
381
382
## Performance Tips
383
384
1. **Always edit the tree before incremental parsing** - This allows Tree-sitter to reuse unchanged subtrees
385
2. **Batch edits when possible** - Apply multiple edits to the same tree before reparsing
386
3. **Use precise edit ranges** - Accurate byte and point positions improve incremental parsing efficiency
387
4. **Monitor change ranges** - Use `changed_ranges()` to identify which parts of your application need updates
388
5. **Consider parse performance** - Incremental parsing is most beneficial for large files with small changes