0
# Diff Processing Utilities
1
2
Utilities for diffing text files and processing diff opcodes to extract changed line numbers and content chunks. These functions form the foundation for comparing original code with reformatted code.
3
4
## Capabilities
5
6
### Diff Generation
7
8
Functions for generating and processing diffs between text documents.
9
10
```python { .api }
11
def diff_and_get_opcodes(src: TextDocument, dst: TextDocument) -> List[Tuple[str, int, int, int, int]]:
12
"""
13
Generate diff opcodes between source and destination documents.
14
15
Divides a diff between the original and reformatted content into
16
alternating chunks of intact (represented by the 'equal' tag) and
17
modified ('delete', 'replace' or 'insert' tag) lines.
18
19
Parameters:
20
- src: Source document (original content)
21
- dst: Destination document (reformatted content)
22
23
Returns:
24
List of opcodes, where each opcode is a tuple of:
25
(tag, src_start, src_end, dst_start, dst_end)
26
27
Example opcode: ('replace', 0, 1, 0, 2) means replace line 0-1 in src
28
with lines 0-2 in dst
29
"""
30
31
def opcodes_to_chunks(
32
opcodes: List[Tuple[str, int, int, int, int]],
33
src: TextDocument,
34
dst: TextDocument
35
) -> Generator[DiffChunk, None, None]:
36
"""
37
Convert diff opcodes into chunks with line content and offsets.
38
39
Picks the lines from original and reformatted content for each opcode
40
and combines line content with the 1-based line offset in the original content.
41
42
Parameters:
43
- opcodes: List of diff opcodes from diff_and_get_opcodes
44
- src: Source document (original content)
45
- dst: Destination document (reformatted content)
46
47
Yields:
48
DiffChunk objects containing:
49
- original_lines_offset: 1-based line number where chunk starts in original
50
- original_lines: List of original line content
51
- formatted_lines: List of reformatted line content
52
"""
53
```
54
55
### Line Number Extraction
56
57
Functions for extracting changed line numbers from diff opcodes.
58
59
```python { .api }
60
def opcodes_to_edit_linenums(opcodes: List[Tuple[str, int, int, int, int]]) -> List[int]:
61
"""
62
Convert diff opcodes to a list of changed line numbers.
63
64
Parameters:
65
- opcodes: List of diff opcodes
66
67
Returns:
68
List of 1-based line numbers that were changed
69
"""
70
```
71
72
## Usage Examples
73
74
### Basic Diff Processing
75
76
```python
77
from darker.diff import diff_and_get_opcodes, opcodes_to_chunks
78
from darkgraylib.utils import TextDocument
79
80
# Create example documents
81
original = TextDocument.from_lines([
82
'for i in range(5): print(i)',
83
'print("done")'
84
])
85
86
reformatted = TextDocument.from_lines([
87
'for i in range(5):',
88
' print(i)',
89
'print("done")'
90
])
91
92
# Generate diff opcodes
93
opcodes = diff_and_get_opcodes(original, reformatted)
94
print(f"Generated {len(opcodes)} opcodes:")
95
96
for opcode in opcodes:
97
tag, src_start, src_end, dst_start, dst_end = opcode
98
print(f" {tag}: src[{src_start}:{src_end}] -> dst[{dst_start}:{dst_end}]")
99
100
# Convert to chunks
101
chunks = list(opcodes_to_chunks(opcodes, original, reformatted))
102
print(f"\nGenerated {len(chunks)} chunks:")
103
104
for i, chunk in enumerate(chunks):
105
print(f" Chunk {i+1}: starts at line {chunk.original_lines_offset}")
106
print(f" Original: {chunk.original_lines}")
107
print(f" Formatted: {chunk.formatted_lines}")
108
```
109
110
### Extract Changed Line Numbers
111
112
```python
113
from darker.diff import diff_and_get_opcodes, opcodes_to_edit_linenums
114
from darkgraylib.utils import TextDocument
115
116
# Compare git working tree version with formatted version
117
with open("myfile.py") as f:
118
current_content = TextDocument.from_str(f.read())
119
120
# Assume we have formatted content from Black/Ruff/etc
121
# formatted_content = apply_formatter(current_content)
122
123
# Get diff and extract changed lines
124
opcodes = diff_and_get_opcodes(current_content, formatted_content)
125
changed_lines = opcodes_to_edit_linenums(opcodes)
126
127
if changed_lines:
128
print(f"Formatter would change lines: {changed_lines}")
129
else:
130
print("No formatting changes needed")
131
```
132
133
### Integration with Formatter Output
134
135
```python
136
from darker.diff import diff_and_get_opcodes, opcodes_to_chunks
137
from darker.chooser import choose_lines
138
from darker.git import EditedLinenumsDiffer
139
from darkgraylib.utils import TextDocument
140
from pathlib import Path
141
142
def apply_selective_formatting(file_path: Path, original: TextDocument, formatted: TextDocument, git_edits: List[int]) -> TextDocument:
143
"""Apply formatting only to git-edited regions."""
144
145
# Get diff chunks between original and formatted
146
opcodes = diff_and_get_opcodes(original, formatted)
147
chunks = list(opcodes_to_chunks(opcodes, original, formatted))
148
149
# Choose lines based on git edits
150
selected_lines = list(choose_lines(chunks, git_edits))
151
152
# Reconstruct document
153
return TextDocument.from_lines(selected_lines)
154
155
# Example usage
156
file_path = Path("src/module.py")
157
with open(file_path) as f:
158
original_content = TextDocument.from_str(f.read())
159
160
# Get git edits (implementation depends on git integration)
161
# git_edited_lines = get_git_edited_lines(file_path)
162
163
# Apply formatter (Black, Ruff, etc.)
164
# formatted_content = apply_formatter(original_content)
165
166
# Apply selective formatting
167
# result = apply_selective_formatting(file_path, original_content, formatted_content, git_edited_lines)
168
```
169
170
### Advanced Opcode Analysis
171
172
```python
173
from darker.diff import diff_and_get_opcodes
174
175
def analyze_diff_opcodes(opcodes):
176
"""Analyze diff opcodes to understand change patterns."""
177
stats = {
178
'equal': 0, # unchanged chunks
179
'delete': 0, # deleted lines
180
'insert': 0, # inserted lines
181
'replace': 0 # modified lines
182
}
183
184
for tag, src_start, src_end, dst_start, dst_end in opcodes:
185
stats[tag] += 1
186
187
if tag == 'equal':
188
print(f"Unchanged: {src_end - src_start} lines")
189
elif tag == 'delete':
190
print(f"Deleted: lines {src_start+1}-{src_end}")
191
elif tag == 'insert':
192
print(f"Inserted: {dst_end - dst_start} lines at position {src_start+1}")
193
elif tag == 'replace':
194
print(f"Replaced: lines {src_start+1}-{src_end} with {dst_end - dst_start} lines")
195
196
return stats
197
198
# Usage
199
opcodes = diff_and_get_opcodes(original_doc, formatted_doc)
200
stats = analyze_diff_opcodes(opcodes)
201
print(f"Diff summary: {stats}")
202
```