0
# Diff and Patches
1
2
Difference analysis between Git objects, working directory, and index. Provides detailed change information and patch generation with support for similarity detection, custom diff options, and patch application.
3
4
## Capabilities
5
6
### Diff Creation
7
8
Generate diffs between various Git entities including commits, trees, index, and working directory.
9
10
```python { .api }
11
class Diff:
12
@property
13
def deltas(self) -> list[DiffDelta]:
14
"""List of file differences"""
15
16
@property
17
def stats(self) -> DiffStats:
18
"""Diff statistics"""
19
20
def patch(self) -> str:
21
"""Generate unified diff patch"""
22
23
def find_similar(
24
self,
25
flags: int = None,
26
rename_threshold: int = 50,
27
copy_threshold: int = 50,
28
rename_from_rewrite_threshold: int = 50,
29
break_rewrite_threshold: int = 60,
30
rename_limit: int = 1000
31
):
32
"""
33
Find renamed and copied files.
34
35
Parameters:
36
- flags: Similarity detection flags
37
- rename_threshold: Threshold for rename detection (0-100)
38
- copy_threshold: Threshold for copy detection (0-100)
39
- rename_from_rewrite_threshold: Threshold for rename from rewrite
40
- break_rewrite_threshold: Threshold for breaking rewrites
41
- rename_limit: Maximum files to consider for renames
42
"""
43
44
def merge(self, other: 'Diff') -> 'Diff':
45
"""Merge two diffs"""
46
47
def __len__(self) -> int:
48
"""Number of deltas"""
49
50
def __iter__(self):
51
"""Iterate over deltas"""
52
53
def __getitem__(self, index: int) -> 'DiffDelta':
54
"""Get delta by index"""
55
56
# Repository diff methods
57
class Repository:
58
def diff(
59
self,
60
a: Object | str = None,
61
b: Object | str = None,
62
cached: bool = False,
63
flags: int = 0,
64
context_lines: int = 3,
65
interhunk_lines: int = 0,
66
pathspecs: list[str] = None,
67
max_size: int = 0,
68
swap: bool = False
69
) -> Diff:
70
"""
71
Create diff between objects.
72
73
Parameters:
74
- a: Source object (None = empty tree)
75
- b: Target object (None = working directory, cached = index)
76
- cached: Compare with index instead of working directory
77
- flags: Diff option flags
78
- context_lines: Lines of context around changes
79
- interhunk_lines: Lines between hunks to merge them
80
- pathspecs: Limit diff to specific paths
81
- max_size: Maximum file size to diff
82
- swap: Swap source and target
83
84
Returns:
85
Diff object
86
"""
87
```
88
89
### Diff Deltas
90
91
DiffDelta represents the difference information for a single file.
92
93
```python { .api }
94
class DiffDelta:
95
@property
96
def status(self) -> int:
97
"""Change status (added, deleted, modified, etc.)"""
98
99
@property
100
def flags(self) -> int:
101
"""Delta flags"""
102
103
@property
104
def similarity(self) -> int:
105
"""Similarity score for renames/copies (0-100)"""
106
107
@property
108
def nfiles(self) -> int:
109
"""Number of files in delta (1 or 2)"""
110
111
@property
112
def old_file(self) -> DiffFile:
113
"""Source file information"""
114
115
@property
116
def new_file(self) -> DiffFile:
117
"""Target file information"""
118
119
@property
120
def is_binary(self) -> bool:
121
"""True if file is binary"""
122
123
class DiffFile:
124
@property
125
def path(self) -> str:
126
"""File path"""
127
128
@property
129
def size(self) -> int:
130
"""File size"""
131
132
@property
133
def flags(self) -> int:
134
"""File flags"""
135
136
@property
137
def mode(self) -> int:
138
"""File mode"""
139
140
@property
141
def oid(self) -> Oid:
142
"""File object ID"""
143
144
# Delta Status Constants
145
GIT_DELTA_UNMODIFIED: int # No change
146
GIT_DELTA_ADDED: int # File added
147
GIT_DELTA_DELETED: int # File deleted
148
GIT_DELTA_MODIFIED: int # File modified
149
GIT_DELTA_RENAMED: int # File renamed
150
GIT_DELTA_COPIED: int # File copied
151
GIT_DELTA_IGNORED: int # File ignored
152
GIT_DELTA_UNTRACKED: int # File untracked
153
GIT_DELTA_TYPECHANGE: int # File type changed
154
GIT_DELTA_UNREADABLE: int # File unreadable
155
GIT_DELTA_CONFLICTED: int # File conflicted
156
```
157
158
### Patches
159
160
Patch objects provide detailed line-by-line difference information.
161
162
```python { .api }
163
class Patch:
164
@property
165
def delta(self) -> DiffDelta:
166
"""Associated diff delta"""
167
168
@property
169
def hunks(self) -> list[DiffHunk]:
170
"""List of diff hunks"""
171
172
@property
173
def line_stats(self) -> tuple[int, int, int]:
174
"""Line statistics (context, additions, deletions)"""
175
176
def data(self) -> str:
177
"""Patch as string"""
178
179
def __str__(self) -> str:
180
"""String representation"""
181
182
class DiffHunk:
183
@property
184
def old_start(self) -> int:
185
"""Start line in old file"""
186
187
@property
188
def old_lines(self) -> int:
189
"""Number of lines in old file"""
190
191
@property
192
def new_start(self) -> int:
193
"""Start line in new file"""
194
195
@property
196
def new_lines(self) -> int:
197
"""Number of lines in new file"""
198
199
@property
200
def header(self) -> str:
201
"""Hunk header"""
202
203
@property
204
def lines(self) -> list[DiffLine]:
205
"""List of diff lines"""
206
207
class DiffLine:
208
@property
209
def origin(self) -> str:
210
"""Line origin ('+', '-', ' ')"""
211
212
@property
213
def old_lineno(self) -> int:
214
"""Line number in old file"""
215
216
@property
217
def new_lineno(self) -> int:
218
"""Line number in new file"""
219
220
@property
221
def num_lines(self) -> int:
222
"""Number of newlines in content"""
223
224
@property
225
def content(self) -> str:
226
"""Line content"""
227
228
@property
229
def raw_content(self) -> bytes:
230
"""Raw line content"""
231
```
232
233
### Diff Statistics
234
235
DiffStats provides summary information about changes in a diff.
236
237
```python { .api }
238
class DiffStats:
239
@property
240
def files_changed(self) -> int:
241
"""Number of files changed"""
242
243
@property
244
def insertions(self) -> int:
245
"""Number of lines inserted"""
246
247
@property
248
def deletions(self) -> int:
249
"""Number of lines deleted"""
250
251
def format(self, format_flags: int, width: int = 80) -> str:
252
"""
253
Format statistics as string.
254
255
Parameters:
256
- format_flags: Formatting options
257
- width: Output width
258
259
Returns:
260
Formatted statistics string
261
"""
262
263
# Diff Stats Format Constants
264
GIT_DIFF_STATS_NONE: int # No stats
265
GIT_DIFF_STATS_FULL: int # Full stats with graph
266
GIT_DIFF_STATS_SHORT: int # Short format
267
GIT_DIFF_STATS_NUMBER: int # Numbers only
268
GIT_DIFF_STATS_INCLUDE_SUMMARY: int # Include summary
269
```
270
271
### Diff Options
272
273
Constants for controlling diff generation behavior.
274
275
```python { .api }
276
# Basic Diff Options
277
GIT_DIFF_NORMAL: int # Standard diff
278
GIT_DIFF_REVERSE: int # Reverse diff order
279
GIT_DIFF_INCLUDE_IGNORED: int # Include ignored files
280
GIT_DIFF_RECURSE_IGNORED_DIRS: int # Recurse into ignored dirs
281
GIT_DIFF_INCLUDE_UNTRACKED: int # Include untracked files
282
GIT_DIFF_RECURSE_UNTRACKED_DIRS: int # Recurse into untracked dirs
283
GIT_DIFF_INCLUDE_UNMODIFIED: int # Include unmodified files
284
GIT_DIFF_INCLUDE_TYPECHANGE: int # Include type changes
285
GIT_DIFF_INCLUDE_TYPECHANGE_TREES: int # Include tree type changes
286
GIT_DIFF_IGNORE_FILEMODE: int # Ignore file mode changes
287
GIT_DIFF_IGNORE_SUBMODULES: int # Ignore submodules
288
GIT_DIFF_IGNORE_CASE: int # Case insensitive
289
GIT_DIFF_DISABLE_PATHSPEC_MATCH: int # Disable pathspec matching
290
GIT_DIFF_SKIP_BINARY_CHECK: int # Skip binary check
291
GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS: int # Fast untracked dirs
292
293
# Content Options
294
GIT_DIFF_FORCE_TEXT: int # Treat all files as text
295
GIT_DIFF_FORCE_BINARY: int # Treat all files as binary
296
GIT_DIFF_IGNORE_WHITESPACE: int # Ignore whitespace
297
GIT_DIFF_IGNORE_WHITESPACE_CHANGE: int # Ignore whitespace changes
298
GIT_DIFF_IGNORE_WHITESPACE_EOL: int # Ignore EOL whitespace
299
GIT_DIFF_SHOW_UNTRACKED_CONTENT: int # Show untracked content
300
GIT_DIFF_SHOW_UNMODIFIED: int # Show unmodified files
301
GIT_DIFF_PATIENCE: int # Use patience diff algorithm
302
GIT_DIFF_MINIMAL: int # Minimize diff size
303
GIT_DIFF_SHOW_BINARY: int # Show binary diffs
304
305
# Similarity Detection Options
306
GIT_DIFF_FIND_RENAMES: int # Find renames
307
GIT_DIFF_FIND_COPIES: int # Find copies
308
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED: int # Find copies from unmodified
309
GIT_DIFF_FIND_REWRITES: int # Find rewrites
310
GIT_DIFF_BREAK_REWRITES: int # Break rewrites
311
GIT_DIFF_FIND_AND_BREAK_REWRITES: int # Find and break rewrites
312
GIT_DIFF_FIND_FOR_UNTRACKED: int # Find for untracked files
313
GIT_DIFF_FIND_ALL: int # Find everything
314
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE: int # Ignore leading whitespace
315
GIT_DIFF_FIND_IGNORE_WHITESPACE: int # Ignore all whitespace
316
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE: int # Don't ignore whitespace
317
GIT_DIFF_FIND_EXACT_MATCH_ONLY: int # Exact matches only
318
GIT_DIFF_FIND_REMOVE_UNMODIFIED: int # Remove unmodified from output
319
```
320
321
### Usage Examples
322
323
#### Basic Diff Operations
324
325
```python
326
import pygit2
327
328
repo = pygit2.Repository('/path/to/repo')
329
330
# Diff working directory to index
331
diff = repo.diff(cached=True)
332
print(f"Staged changes: {len(diff.deltas)} files")
333
334
# Diff working directory to HEAD
335
diff = repo.diff('HEAD')
336
print(f"Working directory changes: {len(diff.deltas)} files")
337
338
# Diff between commits
339
commit1 = repo.revparse_single('HEAD~5')
340
commit2 = repo.revparse_single('HEAD')
341
diff = repo.diff(commit1, commit2)
342
print(f"Changes in last 5 commits: {len(diff.deltas)} files")
343
344
# Diff between branches
345
main_commit = repo.branches['main'].target
346
feature_commit = repo.branches['feature'].target
347
diff = repo.diff(main_commit, feature_commit)
348
```
349
350
#### Analyzing Diff Details
351
352
```python
353
# Examine each changed file
354
for delta in diff.deltas:
355
status_names = {
356
pygit2.GIT_DELTA_ADDED: "Added",
357
pygit2.GIT_DELTA_DELETED: "Deleted",
358
pygit2.GIT_DELTA_MODIFIED: "Modified",
359
pygit2.GIT_DELTA_RENAMED: "Renamed",
360
pygit2.GIT_DELTA_COPIED: "Copied",
361
pygit2.GIT_DELTA_TYPECHANGE: "Type changed"
362
}
363
364
status = status_names.get(delta.status, "Unknown")
365
old_path = delta.old_file.path
366
new_path = delta.new_file.path
367
368
if delta.status == pygit2.GIT_DELTA_RENAMED:
369
print(f"{status}: {old_path} -> {new_path} ({delta.similarity}%)")
370
else:
371
print(f"{status}: {new_path}")
372
373
if delta.is_binary:
374
print(" (binary file)")
375
else:
376
print(f" Size: {delta.old_file.size} -> {delta.new_file.size} bytes")
377
```
378
379
#### Working with Patches
380
381
```python
382
# Generate patches for each file
383
for delta in diff.deltas:
384
patch = delta.patch
385
if patch:
386
print(f"\n=== {delta.new_file.path} ===")
387
print(f"Statistics: {patch.line_stats}")
388
389
# Show detailed line changes
390
for hunk in patch.hunks:
391
print(f"\n@@ -{hunk.old_start},{hunk.old_lines} +{hunk.new_start},{hunk.new_lines} @@")
392
print(hunk.header)
393
394
for line in hunk.lines:
395
marker = line.origin
396
content = line.content.rstrip('\n')
397
line_num = line.new_lineno if line.new_lineno > 0 else line.old_lineno
398
print(f"{marker}{line_num:4d}: {content}")
399
400
# Generate unified patch
401
patch_text = diff.patch()
402
with open('changes.patch', 'w') as f:
403
f.write(patch_text)
404
```
405
406
#### Diff Statistics
407
408
```python
409
# Get summary statistics
410
stats = diff.stats
411
print(f"Files changed: {stats.files_changed}")
412
print(f"Insertions: {stats.insertions}")
413
print(f"Deletions: {stats.deletions}")
414
415
# Format statistics
416
stats_text = stats.format(
417
pygit2.GIT_DIFF_STATS_FULL | pygit2.GIT_DIFF_STATS_INCLUDE_SUMMARY,
418
width=80
419
)
420
print(stats_text)
421
```
422
423
#### Advanced Diff Options
424
425
```python
426
# Ignore whitespace changes
427
diff = repo.diff(
428
'HEAD~1',
429
'HEAD',
430
flags=pygit2.GIT_DIFF_IGNORE_WHITESPACE_CHANGE
431
)
432
433
# Include untracked files
434
diff = repo.diff(
435
flags=pygit2.GIT_DIFF_INCLUDE_UNTRACKED
436
)
437
438
# Custom context lines
439
diff = repo.diff(
440
'HEAD~1',
441
'HEAD',
442
context_lines=10
443
)
444
445
# Limit to specific paths
446
diff = repo.diff(
447
'HEAD~1',
448
'HEAD',
449
pathspecs=['*.py', 'docs/']
450
)
451
```
452
453
#### Similarity Detection
454
455
```python
456
# Find renames and copies
457
diff = repo.diff('HEAD~1', 'HEAD')
458
diff.find_similar(
459
flags=pygit2.GIT_DIFF_FIND_RENAMES | pygit2.GIT_DIFF_FIND_COPIES,
460
rename_threshold=50,
461
copy_threshold=70
462
)
463
464
# Check for renames/copies
465
for delta in diff.deltas:
466
if delta.status == pygit2.GIT_DELTA_RENAMED:
467
print(f"Renamed: {delta.old_file.path} -> {delta.new_file.path}")
468
print(f"Similarity: {delta.similarity}%")
469
elif delta.status == pygit2.GIT_DELTA_COPIED:
470
print(f"Copied: {delta.old_file.path} -> {delta.new_file.path}")
471
print(f"Similarity: {delta.similarity}%")
472
```
473
474
#### Tree and Index Diffs
475
476
```python
477
# Compare trees directly
478
tree1 = repo['HEAD~1'].tree
479
tree2 = repo['HEAD'].tree
480
diff = tree1.diff_to_tree(tree2)
481
482
# Compare tree to working directory
483
tree = repo['HEAD'].tree
484
diff = tree.diff_to_workdir()
485
486
# Compare tree to index
487
diff = tree.diff_to_index()
488
489
# Compare index to working directory
490
diff = repo.index.diff_to_workdir()
491
```