0
# Index Management
1
2
Git index manipulation for staging changes, managing file states, and preparing commits. The index serves as Git's staging area between the working directory and repository history.
3
4
## Capabilities
5
6
### Index Class
7
8
Main class for Git index operations with file staging and commit preparation.
9
10
```python { .api }
11
class Index:
12
"""
13
Git index (staging area) for managing file changes.
14
15
The index tracks file modifications, additions, and deletions
16
between the working directory and repository commits.
17
"""
18
19
def __init__(
20
self,
21
filename: Union[bytes, str, os.PathLike],
22
read: bool = True,
23
skip_hash: bool = False,
24
version: Optional[int] = None,
25
):
26
"""
27
Initialize index from file.
28
29
Parameters:
30
- filename: Path to index file (.git/index)
31
- read: Whether to initialize from existing file
32
- skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
33
- version: Index format version to use (None = auto-detect)
34
"""
35
36
def __getitem__(self, name: bytes) -> IndexEntry:
37
"""
38
Get index entry by path.
39
40
Parameters:
41
- name: File path as bytes
42
43
Returns:
44
IndexEntry for the file
45
46
Raises:
47
KeyError: If file not in index
48
"""
49
50
def __setitem__(self, name: bytes, entry: IndexEntry) -> None:
51
"""
52
Set index entry for path.
53
54
Parameters:
55
- name: File path as bytes
56
- entry: IndexEntry object
57
"""
58
59
def __delitem__(self, name: bytes) -> None:
60
"""
61
Remove entry from index.
62
63
Parameters:
64
- name: File path as bytes to remove
65
"""
66
67
def __contains__(self, name: bytes) -> bool:
68
"""
69
Check if path is in index.
70
71
Parameters:
72
- name: File path as bytes
73
74
Returns:
75
True if path exists in index
76
"""
77
78
def __iter__(self) -> Iterator[bytes]:
79
"""
80
Iterate over index paths.
81
82
Yields:
83
File paths as bytes
84
"""
85
86
def __len__(self) -> int:
87
"""
88
Get number of entries in index.
89
90
Returns:
91
Number of index entries
92
"""
93
94
def clear(self) -> None:
95
"""Remove all entries from index."""
96
97
def get_sha1(self, path: bytes) -> bytes:
98
"""
99
Return the SHA-1 for the object at a path.
100
101
Parameters:
102
- path: File path as bytes
103
104
Returns:
105
SHA-1 hash of the object
106
107
Raises:
108
UnmergedEntries: If path has merge conflicts
109
"""
110
111
def get_mode(self, path: bytes) -> int:
112
"""
113
Return the POSIX file mode for the object at a path.
114
115
Parameters:
116
- path: File path as bytes
117
118
Returns:
119
File mode as integer
120
121
Raises:
122
UnmergedEntries: If path has merge conflicts
123
"""
124
125
def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
126
"""
127
Iterate over path, sha, mode tuples for use with commit_tree.
128
129
Yields:
130
Tuples of (path, sha, mode)
131
132
Raises:
133
UnmergedEntries: If index contains unmerged entries
134
"""
135
136
def has_conflicts(self) -> bool:
137
"""
138
Check if index contains merge conflicts.
139
140
Returns:
141
True if conflicts exist
142
"""
143
144
def changes_from_tree(
145
self,
146
object_store: ObjectContainer,
147
tree: ObjectID,
148
want_unchanged: bool = False,
149
) -> Generator[
150
tuple[
151
tuple[Optional[bytes], Optional[bytes]],
152
tuple[Optional[int], Optional[int]],
153
tuple[Optional[bytes], Optional[bytes]],
154
],
155
None,
156
None,
157
]:
158
"""
159
Find differences between index and tree.
160
161
Parameters:
162
- object_store: Object store for retrieving tree contents
163
- tree: SHA-1 of the root tree
164
- want_unchanged: Whether unchanged files should be reported
165
166
Yields:
167
Tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
168
"""
169
170
def commit(self, object_store: ObjectContainer) -> bytes:
171
"""
172
Create tree object from index contents.
173
174
Parameters:
175
- object_store: Object store for writing tree objects
176
177
Returns:
178
SHA-1 hash of created tree
179
"""
180
181
def write(self) -> None:
182
"""Write index to disk."""
183
184
def read(self) -> None:
185
"""Read index from disk."""
186
187
@property
188
def path(self) -> str:
189
"""
190
Index file path.
191
192
Returns:
193
Path to index file
194
"""
195
196
@property
197
def version(self) -> int:
198
"""
199
Index format version.
200
201
Returns:
202
Index format version number
203
"""
204
```
205
206
### Index Entry Classes
207
208
Classes representing individual entries in the Git index.
209
210
```python { .api }
211
class IndexEntry:
212
"""
213
Single entry in Git index representing staged file.
214
215
Contains file metadata including timestamps, permissions,
216
size, and SHA-1 hash of content.
217
"""
218
219
def __init__(
220
self,
221
ctime: Tuple[int, int],
222
mtime: Tuple[int, int],
223
dev: int,
224
ino: int,
225
mode: int,
226
uid: int,
227
gid: int,
228
size: int,
229
sha: bytes,
230
flags: int = 0
231
):
232
"""
233
Create index entry.
234
235
Parameters:
236
- ctime: Creation time as (seconds, nanoseconds)
237
- mtime: Modification time as (seconds, nanoseconds)
238
- dev: Device ID
239
- ino: Inode number
240
- mode: File mode/permissions
241
- uid: User ID
242
- gid: Group ID
243
- size: File size in bytes
244
- sha: 20-byte SHA-1 hash of content
245
- flags: Index entry flags
246
"""
247
248
@property
249
def ctime(self) -> Tuple[int, int]:
250
"""
251
Creation time.
252
253
Returns:
254
Tuple of (seconds, nanoseconds)
255
"""
256
257
@property
258
def mtime(self) -> Tuple[int, int]:
259
"""
260
Modification time.
261
262
Returns:
263
Tuple of (seconds, nanoseconds)
264
"""
265
266
@property
267
def mode(self) -> int:
268
"""
269
File mode/permissions.
270
271
Returns:
272
Unix file mode
273
"""
274
275
@property
276
def sha(self) -> bytes:
277
"""
278
Content SHA-1 hash.
279
280
Returns:
281
20-byte SHA-1 hash
282
"""
283
284
@property
285
def size(self) -> int:
286
"""
287
File size.
288
289
Returns:
290
File size in bytes
291
"""
292
293
@property
294
def flags(self) -> int:
295
"""
296
Index entry flags.
297
298
Returns:
299
Flags indicating entry state
300
"""
301
302
def stage(self) -> Stage:
303
"""
304
Get the merge stage of this entry.
305
306
Returns:
307
Stage enum value
308
"""
309
310
@property
311
def skip_worktree(self) -> bool:
312
"""
313
Return True if the skip-worktree bit is set.
314
315
Returns:
316
True if skip-worktree flag is set
317
"""
318
319
def set_skip_worktree(self, skip: bool = True) -> None:
320
"""
321
Set or clear the skip-worktree bit.
322
323
Parameters:
324
- skip: Whether to set the skip-worktree bit
325
"""
326
327
@classmethod
328
def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
329
"""
330
Create IndexEntry from serialized data.
331
332
Parameters:
333
- serialized: SerializedIndexEntry object
334
335
Returns:
336
IndexEntry object
337
"""
338
339
def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
340
"""
341
Serialize entry for writing to index file.
342
343
Parameters:
344
- name: File path as bytes
345
- stage: Merge stage
346
347
Returns:
348
SerializedIndexEntry object
349
"""
350
351
@dataclass
352
class SerializedIndexEntry:
353
"""
354
Serialized representation of an index entry.
355
356
Used during index file I/O operations to handle
357
raw data before conversion to IndexEntry objects.
358
"""
359
360
name: bytes
361
ctime: Union[int, float, tuple[int, int]]
362
mtime: Union[int, float, tuple[int, int]]
363
dev: int
364
ino: int
365
mode: int
366
uid: int
367
gid: int
368
size: int
369
sha: bytes
370
flags: int
371
extended_flags: int
372
373
def stage(self) -> Stage:
374
"""
375
Get the merge stage of this entry.
376
377
Returns:
378
Stage enum value
379
"""
380
381
class Stage(Enum):
382
"""
383
Merge conflict stage numbers.
384
385
Used to identify different versions of files
386
during merge operations.
387
"""
388
389
NORMAL = 0
390
MERGE_CONFLICT_ANCESTOR = 1
391
MERGE_CONFLICT_THIS = 2
392
MERGE_CONFLICT_OTHER = 3
393
394
class ConflictedIndexEntry:
395
"""
396
Index entry representing merge conflict.
397
398
Contains multiple versions of the same file from
399
different merge parents requiring resolution.
400
"""
401
402
ancestor: Optional[IndexEntry]
403
this: Optional[IndexEntry]
404
other: Optional[IndexEntry]
405
406
def __init__(
407
self,
408
ancestor: Optional[IndexEntry] = None,
409
this: Optional[IndexEntry] = None,
410
other: Optional[IndexEntry] = None,
411
):
412
"""
413
Create conflicted index entry.
414
415
Parameters:
416
- ancestor: Common ancestor version (stage 1)
417
- this: Current branch version (stage 2)
418
- other: Other branch version (stage 3)
419
"""
420
```
421
422
### Index Extensions
423
424
Classes for handling Git index extensions.
425
426
```python { .api }
427
@dataclass
428
class IndexExtension:
429
"""
430
Base class for index extensions.
431
432
Extensions provide additional metadata stored
433
in the Git index file format.
434
"""
435
436
signature: bytes
437
data: bytes
438
439
@classmethod
440
def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
441
"""
442
Create extension from raw data.
443
444
Parameters:
445
- signature: 4-byte extension signature
446
- data: Extension data
447
448
Returns:
449
Parsed extension object
450
"""
451
452
def to_bytes(self) -> bytes:
453
"""
454
Serialize extension to bytes.
455
456
Returns:
457
Extension data as bytes
458
"""
459
460
class TreeExtension(IndexExtension):
461
"""
462
Tree cache extension for faster tree object creation.
463
464
Caches tree SHA-1 values to avoid recalculation
465
when creating tree objects from index.
466
"""
467
468
def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
469
"""
470
Initialize tree extension.
471
472
Parameters:
473
- entries: List of (path, sha, entry_count) tuples
474
"""
475
476
class ResolveUndoExtension(IndexExtension):
477
"""
478
Resolve undo extension for recording merge conflicts.
479
480
Stores information about resolved conflicts to allow
481
undoing merge conflict resolution.
482
"""
483
484
def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
485
"""
486
Initialize resolve undo extension.
487
488
Parameters:
489
- entries: List of (path, [(stage, sha), ...]) tuples
490
"""
491
492
class UntrackedExtension(IndexExtension):
493
"""
494
Untracked cache extension for faster status operations.
495
496
Caches information about untracked files to speed up
497
git status operations.
498
"""
499
500
def __init__(self, data: bytes) -> None:
501
"""
502
Initialize untracked extension.
503
504
Parameters:
505
- data: Raw extension data
506
"""
507
```
508
509
### Index Functions
510
511
Standalone functions for index operations and manipulation.
512
513
```python { .api }
514
def read_index(filename: str) -> Index:
515
"""
516
Read index from file.
517
518
Parameters:
519
- filename: Path to index file
520
521
Returns:
522
Index object loaded from file
523
"""
524
525
def write_index(filename: str, entries: Dict[bytes, IndexEntry]) -> None:
526
"""
527
Write index entries to file.
528
529
Parameters:
530
- filename: Path to index file
531
- entries: Dict mapping paths to IndexEntry objects
532
"""
533
534
def read_index_dict(filename: str) -> Dict[bytes, IndexEntry]:
535
"""
536
Read index as dictionary.
537
538
Parameters:
539
- filename: Path to index file
540
541
Returns:
542
Dict mapping file paths to IndexEntry objects
543
"""
544
545
def write_index_dict(
546
filename: str,
547
entries: Dict[bytes, IndexEntry],
548
version: int = 2
549
) -> None:
550
"""
551
Write index dictionary to file.
552
553
Parameters:
554
- filename: Path to index file
555
- entries: Dict mapping paths to IndexEntry objects
556
- version: Index format version
557
"""
558
559
def commit_tree(
560
object_store: BaseObjectStore,
561
index: Dict[bytes, IndexEntry]
562
) -> bytes:
563
"""
564
Create tree object from index entries.
565
566
Parameters:
567
- object_store: Object store for writing tree objects
568
- index: Dict mapping paths to IndexEntry objects
569
570
Returns:
571
SHA-1 hash of created tree
572
"""
573
574
def commit_index(
575
object_store: BaseObjectStore,
576
index: Index
577
) -> bytes:
578
"""
579
Create tree from Index object.
580
581
Parameters:
582
- object_store: Object store for writing objects
583
- index: Index object
584
585
Returns:
586
SHA-1 hash of created tree
587
"""
588
589
def build_index_from_tree(
590
object_store: BaseObjectStore,
591
tree_id: bytes,
592
honor_filemode: bool = True
593
) -> Dict[bytes, IndexEntry]:
594
"""
595
Build index entries from tree object.
596
597
Parameters:
598
- object_store: Object store containing tree objects
599
- tree_id: Tree SHA-1 hash
600
- honor_filemode: Respect file mode from tree
601
602
Returns:
603
Dict mapping paths to IndexEntry objects
604
"""
605
606
def changes_from_tree(
607
object_store: BaseObjectStore,
608
index: Dict[bytes, IndexEntry],
609
tree_id: bytes,
610
want_unchanged: bool = False
611
) -> Iterator[Tuple[bytes, int, bytes]]:
612
"""
613
Get changes between index and tree.
614
615
Parameters:
616
- object_store: Object store containing objects
617
- index: Index entries to compare
618
- tree_id: Tree SHA-1 hash to compare against
619
- want_unchanged: Include unchanged files
620
621
Yields:
622
Tuples of (path, change_type, sha1_hash)
623
"""
624
625
def blob_from_path_and_stat(
626
fs_path: bytes,
627
st: os.stat_result,
628
tree_encoding: str = "utf-8"
629
) -> Blob:
630
"""
631
Create blob from filesystem path and stat.
632
633
Parameters:
634
- fs_path: Filesystem path to file
635
- st: File stat information
636
- tree_encoding: Encoding for tree paths
637
638
Returns:
639
Blob object
640
"""
641
642
def blob_from_path_and_mode(
643
fs_path: bytes,
644
mode: int,
645
tree_encoding: str = "utf-8"
646
) -> Blob:
647
"""
648
Create blob from filesystem path and mode.
649
650
Parameters:
651
- fs_path: Filesystem path to file
652
- mode: File mode
653
- tree_encoding: Encoding for tree paths
654
655
Returns:
656
Blob object
657
"""
658
659
def index_entry_from_stat(
660
stat_val: os.stat_result,
661
hex_sha: bytes,
662
mode: Optional[int] = None,
663
) -> IndexEntry:
664
"""
665
Create index entry from stat result.
666
667
Parameters:
668
- stat_val: POSIX stat_result
669
- hex_sha: Hex SHA of the object
670
- mode: Optional file mode override
671
672
Returns:
673
IndexEntry object
674
"""
675
676
def index_entry_from_path(
677
path: bytes,
678
object_store: Optional[ObjectContainer] = None
679
) -> Optional[IndexEntry]:
680
"""
681
Create index entry from filesystem path.
682
683
Parameters:
684
- path: Path to create entry for
685
- object_store: Optional object store for new blobs
686
687
Returns:
688
IndexEntry object or None for directories
689
"""
690
691
def update_working_tree(
692
repo: "Repo",
693
old_tree_id: Optional[bytes],
694
new_tree_id: bytes,
695
change_iterator: Iterator["TreeChange"],
696
honor_filemode: bool = True,
697
validate_path_element: Optional[Callable[[bytes], bool]] = None,
698
symlink_fn: Optional[Callable] = None,
699
force_remove_untracked: bool = False,
700
blob_normalizer: Optional["BlobNormalizer"] = None,
701
tree_encoding: str = "utf-8",
702
allow_overwrite_modified: bool = False,
703
) -> None:
704
"""
705
Update working tree and index to match a new tree.
706
707
Parameters:
708
- repo: Repository object
709
- old_tree_id: SHA of tree before update
710
- new_tree_id: SHA of tree to update to
711
- change_iterator: Iterator of TreeChange objects
712
- honor_filemode: Honor core.filemode setting
713
- validate_path_element: Function to validate paths
714
- symlink_fn: Function for creating symlinks
715
- force_remove_untracked: Remove untracked files
716
- blob_normalizer: Line ending normalizer
717
- tree_encoding: Encoding for tree paths
718
- allow_overwrite_modified: Allow overwriting modified files
719
"""
720
721
def get_unstaged_changes(
722
index: Index,
723
root_path: Union[str, bytes],
724
filter_blob_callback: Optional[Callable] = None,
725
) -> Generator[bytes, None, None]:
726
"""
727
Find paths with unstaged changes.
728
729
Parameters:
730
- index: Index to check
731
- root_path: Root path to find files
732
- filter_blob_callback: Optional blob filter
733
734
Yields:
735
Paths with unstaged changes
736
"""
737
738
def refresh_index(index: Index, root_path: bytes) -> None:
739
"""
740
Refresh index contents from filesystem.
741
742
Parameters:
743
- index: Index to update
744
- root_path: Root filesystem path
745
"""
746
```
747
748
### Context Manager
749
750
Context manager for safely modifying the index.
751
752
```python { .api }
753
class locked_index:
754
"""
755
Context manager for locking index during modifications.
756
757
Ensures atomic writes to the index file and proper
758
cleanup if operations fail.
759
"""
760
761
def __init__(self, path: Union[bytes, str]) -> None:
762
"""
763
Initialize locked index.
764
765
Parameters:
766
- path: Path to index file
767
"""
768
769
def __enter__(self) -> Index:
770
"""
771
Enter context and return locked index.
772
773
Returns:
774
Index object ready for modification
775
"""
776
777
def __exit__(
778
self,
779
exc_type: Optional[type],
780
exc_value: Optional[BaseException],
781
traceback: Optional[types.TracebackType],
782
) -> None:
783
"""
784
Exit context and write/abort changes.
785
786
Parameters:
787
- exc_type: Exception type if any
788
- exc_value: Exception value if any
789
- traceback: Exception traceback if any
790
"""
791
```
792
793
### Exception Classes
794
795
Index-specific exceptions for error handling.
796
797
```python { .api }
798
class UnsupportedIndexFormat(Exception):
799
"""
800
Raised when index format version is not supported.
801
802
Occurs when trying to read index with newer format
803
version than supported by current dulwich version.
804
"""
805
806
def __init__(self, version: int) -> None:
807
"""
808
Initialize exception.
809
810
Parameters:
811
- version: Unsupported index format version
812
"""
813
814
class UnmergedEntries(Exception):
815
"""
816
Raised when index contains unmerged entries.
817
818
Occurs during operations that require clean index
819
but merge conflicts exist.
820
"""
821
```
822
823
## Usage Examples
824
825
### Basic Index Operations
826
827
```python
828
from dulwich.index import (
829
Index, IndexEntry, locked_index,
830
build_index_from_tree, update_working_tree
831
)
832
from dulwich.objects import Blob
833
from dulwich.repo import Repo
834
import os
835
836
# Open repository index
837
index = Index("/path/to/repo/.git/index")
838
839
# Check if file is staged
840
if b"file.txt" in index:
841
entry = index[b"file.txt"]
842
print(f"File SHA-1: {entry.sha.hex()}")
843
print(f"File size: {entry.size}")
844
print(f"Stage: {entry.stage()}")
845
846
# Stage new file using locked context
847
with locked_index("/path/to/repo/.git/index") as index:
848
stat_result = os.stat("/path/to/file.txt")
849
entry = IndexEntry(
850
ctime=(int(stat_result.st_ctime), 0),
851
mtime=(int(stat_result.st_mtime), 0),
852
dev=stat_result.st_dev,
853
ino=stat_result.st_ino,
854
mode=stat_result.st_mode,
855
uid=stat_result.st_uid,
856
gid=stat_result.st_gid,
857
size=stat_result.st_size,
858
sha=b"new_blob_sha_here",
859
flags=0
860
)
861
index[b"file.txt"] = entry
862
# Index automatically written on context exit
863
864
# Build index from tree
865
repo = Repo("/path/to/repo")
866
build_index_from_tree(
867
"/path/to/worktree",
868
"/path/to/repo/.git/index",
869
repo.object_store,
870
b"tree_sha_here"
871
)
872
873
# Create tree from index
874
tree_sha = index.commit(repo.object_store)
875
print(f"Tree SHA-1: {tree_sha.hex()}")
876
877
# Check for conflicts
878
if index.has_conflicts():
879
print("Index has merge conflicts")
880
for path in index:
881
entry = index[path]
882
if isinstance(entry, ConflictedIndexEntry):
883
print(f"Conflict in {path.decode()}:")
884
if entry.ancestor:
885
print(f" Ancestor: {entry.ancestor.sha.hex()}")
886
if entry.this:
887
print(f" This: {entry.this.sha.hex()}")
888
if entry.other:
889
print(f" Other: {entry.other.sha.hex()}")
890
```