0
# Object Storage
1
2
Flexible object storage backends supporting filesystem, memory, pack files, and cloud storage. Object stores provide efficient storage and retrieval of Git objects with various optimization strategies.
3
4
## Capabilities
5
6
### Base Object Store
7
8
Abstract interface for all object storage implementations.
9
10
```python { .api }
11
class BaseObjectStore:
12
"""
13
Abstract base class for Git object storage.
14
15
Defines interface for storing and retrieving Git objects
16
with support for different storage backends.
17
"""
18
19
def __contains__(self, sha: bytes) -> bool:
20
"""
21
Check if object exists in store.
22
23
Parameters:
24
- sha: 20-byte SHA-1 hash
25
26
Returns:
27
True if object exists
28
"""
29
30
def __getitem__(self, sha: bytes) -> ShaFile:
31
"""
32
Retrieve object by SHA-1.
33
34
Parameters:
35
- sha: 20-byte SHA-1 hash
36
37
Returns:
38
ShaFile object (Blob, Tree, Commit, or Tag)
39
40
Raises:
41
KeyError: If object not found
42
"""
43
44
def __iter__(self) -> Iterator[bytes]:
45
"""
46
Iterate over all object SHA-1 hashes.
47
48
Yields:
49
20-byte SHA-1 hashes of all objects
50
"""
51
52
def add_object(self, obj: ShaFile) -> None:
53
"""
54
Add object to store.
55
56
Parameters:
57
- obj: ShaFile object to store
58
"""
59
60
def add_objects(self, objects: Iterator[ShaFile]) -> None:
61
"""
62
Add multiple objects to store.
63
64
Parameters:
65
- objects: Iterator of ShaFile objects
66
"""
67
68
def contains_loose(self, sha: bytes) -> bool:
69
"""
70
Check if object exists as loose object.
71
72
Parameters:
73
- sha: 20-byte SHA-1 hash
74
75
Returns:
76
True if loose object exists
77
"""
78
79
def contains_packed(self, sha: bytes) -> bool:
80
"""
81
Check if object exists in pack file.
82
83
Parameters:
84
- sha: 20-byte SHA-1 hash
85
86
Returns:
87
True if packed object exists
88
"""
89
90
def get_raw(self, sha: bytes) -> Tuple[int, bytes]:
91
"""
92
Get raw object data.
93
94
Parameters:
95
- sha: 20-byte SHA-1 hash
96
97
Returns:
98
Tuple of (object_type, raw_data)
99
"""
100
101
def find_missing_objects(
102
self,
103
haves: List[bytes],
104
wants: List[bytes],
105
shallow: Set[bytes] = None,
106
progress: Callable = None,
107
get_tagged: Callable = None,
108
get_parents: Callable = lambda commit: commit.parents
109
) -> Iterator[Tuple[bytes, Optional[bytes]]]:
110
"""
111
Find objects missing from store.
112
113
Parameters:
114
- haves: SHA-1 hashes of objects we have
115
- wants: SHA-1 hashes of objects we want
116
- shallow: Set of shallow commit SHA-1s to skip
117
- progress: Progress callback function
118
- get_tagged: Function to get tagged objects
119
- get_parents: Function for getting commit parents
120
121
Yields:
122
Tuples of (sha1_hash, path) for missing objects
123
"""
124
125
def find_common_revisions(
126
self,
127
graphwalker: GraphWalker
128
) -> List[bytes]:
129
"""
130
Find common revisions using graph walker.
131
132
Parameters:
133
- graphwalker: Graph walker for revision traversal
134
135
Returns:
136
List of common revision SHA-1 hashes
137
"""
138
139
def generate_pack_data(
140
self,
141
have: List[bytes],
142
want: List[bytes],
143
shallow: Set[bytes] = None,
144
progress: Callable = None,
145
ofs_delta: bool = True
146
) -> Tuple[int, Iterator[UnpackedObject]]:
147
"""
148
Generate pack data for a set of wants/haves.
149
150
Parameters:
151
- have: SHA-1s of objects that should not be sent
152
- want: SHA-1s of objects that should be sent
153
- shallow: Set of shallow commit SHA-1s to skip
154
- progress: Optional progress reporting function
155
- ofs_delta: Whether OFS deltas can be included
156
157
Returns:
158
Tuple of (object_count, unpacked_objects_iterator)
159
"""
160
161
def determine_wants_all(
162
self,
163
refs: Dict[bytes, bytes],
164
depth: Optional[int] = None
165
) -> List[bytes]:
166
"""
167
Determine which refs are wanted based on availability and depth.
168
169
Parameters:
170
- refs: Dictionary mapping ref names to SHA-1 hashes
171
- depth: Optional depth limit for shallow operations
172
173
Returns:
174
List of SHA-1 hashes that are wanted
175
"""
176
177
def tree_changes(
178
self,
179
source: bytes,
180
target: bytes,
181
want_unchanged: bool = False,
182
include_trees: bool = False,
183
change_type_same: bool = False,
184
rename_detector: Optional[RenameDetector] = None,
185
paths: Optional[List[bytes]] = None
186
) -> Iterator[Tuple[Tuple[bytes, bytes], Tuple[int, int], Tuple[bytes, bytes]]]:
187
"""
188
Find differences between contents of two trees.
189
190
Parameters:
191
- source: SHA-1 of source tree
192
- target: SHA-1 of target tree
193
- want_unchanged: Whether unchanged files should be reported
194
- include_trees: Whether to include trees in output
195
- change_type_same: Whether to report files changing type
196
- rename_detector: RenameDetector for detecting renames
197
- paths: Optional list of paths to filter (as bytes)
198
199
Yields:
200
Tuples of ((old_path, new_path), (old_mode, new_mode), (old_sha, new_sha))
201
"""
202
203
def iterobjects_subset(
204
self,
205
shas: Iterable[bytes],
206
*,
207
allow_missing: bool = False
208
) -> Iterator[ShaFile]:
209
"""
210
Iterate over subset of objects by SHA-1.
211
212
Parameters:
213
- shas: Iterable of SHA-1 hashes to retrieve
214
- allow_missing: If True, skip missing objects instead of raising KeyError
215
216
Yields:
217
ShaFile objects for requested SHA-1s
218
"""
219
220
def close(self) -> None:
221
"""
222
Close any files opened by this object store.
223
224
Default implementation is a NO-OP. Subclasses should override
225
to provide proper cleanup of resources.
226
"""
227
228
def prune(self, grace_period: Optional[int] = None) -> None:
229
"""
230
Prune/clean up this object store.
231
232
This includes removing orphaned temporary files and other
233
housekeeping tasks.
234
235
Parameters:
236
- grace_period: Grace period in seconds for removing temporary files.
237
If None, uses the default grace period.
238
"""
239
240
def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
241
"""
242
Iterate over SHA-1s that start with given prefix.
243
244
Parameters:
245
- prefix: SHA-1 prefix to match
246
247
Yields:
248
SHA-1 hashes starting with the prefix
249
"""
250
251
def get_commit_graph(self) -> Optional[CommitGraph]:
252
"""
253
Get commit graph for this object store.
254
255
Returns:
256
CommitGraph object if available, None otherwise
257
"""
258
259
def write_commit_graph(
260
self,
261
refs: Optional[List[bytes]] = None,
262
reachable: bool = True
263
) -> None:
264
"""
265
Write commit graph file for this object store.
266
267
Parameters:
268
- refs: List of refs to include. If None, includes all refs.
269
- reachable: If True, includes all reachable commits from refs.
270
If False, only includes direct ref targets.
271
"""
272
273
def get_object_mtime(self, sha: bytes) -> float:
274
"""
275
Get modification time of an object.
276
277
Parameters:
278
- sha: SHA-1 of the object
279
280
Returns:
281
Modification time as seconds since epoch
282
283
Raises:
284
KeyError: If object is not found
285
"""
286
```
287
288
### Filesystem Object Store
289
290
Standard Git object storage using filesystem layout.
291
292
```python { .api }
293
class DiskObjectStore(BaseObjectStore):
294
"""
295
Object store using standard Git filesystem layout.
296
297
Stores objects as loose files and pack files in .git/objects
298
directory with standard Git directory structure.
299
"""
300
301
def __init__(self, path: str):
302
"""
303
Initialize disk object store.
304
305
Parameters:
306
- path: Path to objects directory (.git/objects)
307
"""
308
309
@property
310
def path(self) -> str:
311
"""
312
Objects directory path.
313
314
Returns:
315
Absolute path to objects directory
316
"""
317
318
def move_in_pack(self, path: str) -> None:
319
"""
320
Move pack file into objects directory.
321
322
Parameters:
323
- path: Path to pack file to move
324
"""
325
326
def add_pack(self) -> Tuple[BinaryIO, BinaryIO]:
327
"""
328
Create new pack files for writing.
329
330
Returns:
331
Tuple of (pack_file, index_file) streams
332
"""
333
334
def pack_loose_objects(self) -> None:
335
"""
336
Pack loose objects into pack file.
337
338
Combines loose objects into efficient pack format
339
to reduce disk usage and improve performance.
340
"""
341
342
def repack(self) -> None:
343
"""
344
Repack all objects for optimal storage.
345
346
Combines all objects into optimized pack files
347
and removes redundant loose objects.
348
"""
349
350
def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]:
351
"""
352
Get loose object by SHA-1 hash.
353
354
Parameters:
355
- sha: 20-byte SHA-1 hash
356
357
Returns:
358
ShaFile object if found, None otherwise
359
"""
360
361
def _iter_loose_objects(self) -> Iterator[bytes]:
362
"""
363
Iterate over SHA-1s of all loose objects.
364
365
Yields:
366
20-byte SHA-1 hashes of loose objects in store
367
"""
368
369
def _remove_pack(self, name: str) -> None:
370
"""
371
Remove pack file from disk.
372
373
Parameters:
374
- name: Pack file name to remove
375
"""
376
377
def _get_pack(self, name: str) -> Pack:
378
"""
379
Get pack object by name.
380
381
Parameters:
382
- name: Pack file name
383
384
Returns:
385
Pack object
386
"""
387
388
def _iter_pack_names(self) -> Iterator[str]:
389
"""
390
Iterate over pack file names.
391
392
Yields:
393
Pack file names in objects directory
394
"""
395
396
def _update_pack_cache(self) -> List[Pack]:
397
"""
398
Update pack cache with new packs from disk.
399
400
Returns:
401
List of newly added packs
402
"""
403
```
404
405
### Memory Object Store
406
407
In-memory object storage for temporary operations.
408
409
```python { .api }
410
class MemoryObjectStore(BaseObjectStore):
411
"""
412
Object store using in-memory storage.
413
414
Stores all objects in memory without persistence.
415
Useful for testing and temporary operations.
416
"""
417
418
def __init__(self):
419
"""Initialize empty memory object store."""
420
421
def add_pack_data(
422
self,
423
count: int,
424
unpacked_objects: Iterator[UnpackedObject],
425
progress: Callable = None
426
) -> None:
427
"""
428
Add pack data to memory store.
429
430
Parameters:
431
- count: Number of objects in pack
432
- unpacked_objects: Iterator of unpacked object data
433
- progress: Progress callback function
434
"""
435
436
def add_thin_pack(
437
self,
438
read_all: Callable[[], bytes],
439
read_some: Callable[[int], bytes],
440
progress: Callable = None
441
) -> None:
442
"""
443
Add thin pack to memory store.
444
445
Parameters:
446
- read_all: Function to read all available data
447
- read_some: Function to read specified number of bytes
448
- progress: Progress callback function
449
"""
450
```
451
452
### Pack-Based Object Store
453
454
Object store optimized for pack file operations with comprehensive pack management.
455
456
Provides base functionality for object stores that use Git pack files as their primary storage mechanism, including delta compression, streaming operations, and pack caching.
457
458
```python { .api }
459
class PackBasedObjectStore(BaseObjectStore):
460
"""
461
Object store using pack files for storage.
462
463
Optimized for pack file operations with efficient
464
delta compression and streaming support.
465
"""
466
467
def __init__(
468
self,
469
pack_compression_level: int = -1,
470
pack_index_version: Optional[int] = None,
471
pack_delta_window_size: Optional[int] = None,
472
pack_window_memory: Optional[int] = None,
473
pack_delta_cache_size: Optional[int] = None,
474
pack_depth: Optional[int] = None,
475
pack_threads: Optional[int] = None,
476
pack_big_file_threshold: Optional[int] = None
477
):
478
"""
479
Initialize pack-based object store.
480
481
Parameters:
482
- pack_compression_level: Compression level for pack files (-1 to 9)
483
- pack_index_version: Pack index format version (1 or 2)
484
- pack_delta_window_size: Size of delta compression window
485
- pack_window_memory: Memory limit for delta window
486
- pack_delta_cache_size: Size of delta cache
487
- pack_depth: Maximum delta depth
488
- pack_threads: Number of threads for pack operations
489
- pack_big_file_threshold: Threshold for big file handling
490
"""
491
492
@property
493
def alternates(self) -> List[BaseObjectStore]:
494
"""
495
Get list of alternate object stores.
496
497
Returns:
498
List of alternate object stores to search
499
"""
500
501
def add_pack(self) -> Tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
502
"""
503
Add new pack to this object store.
504
505
Returns:
506
Tuple of (pack_file_stream, commit_function, abort_function)
507
"""
508
509
def add_pack_data(
510
self,
511
count: int,
512
unpacked_objects: Iterator[UnpackedObject],
513
progress: Callable = None
514
) -> Optional[Pack]:
515
"""
516
Add pack data to store.
517
518
Parameters:
519
- count: Number of objects in pack
520
- unpacked_objects: Iterator of unpacked object data
521
- progress: Progress callback function
522
523
Returns:
524
Pack object if pack was created, None if empty
525
"""
526
527
def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
528
"""
529
Add newly appeared pack to the cache by path.
530
531
Parameters:
532
- base_name: Base name of the pack file
533
- pack: Pack object to cache
534
"""
535
```
536
537
### Overlay Object Store
538
539
Layered object store with fallback to base store.
540
541
```python { .api }
542
class OverlayObjectStore(BaseObjectStore):
543
"""
544
Object store with overlay and base layers.
545
546
Provides layered storage where objects are first checked
547
in overlay layer, falling back to base layer if not found.
548
"""
549
550
def __init__(
551
self,
552
bases: List[BaseObjectStore],
553
add_store: Optional[BaseObjectStore] = None
554
):
555
"""
556
Initialize overlay object store.
557
558
Parameters:
559
- bases: List of base object stores (checked in order)
560
- add_store: Store for adding new objects
561
"""
562
563
@property
564
def bases(self) -> List[BaseObjectStore]:
565
"""
566
Base object stores.
567
568
Returns:
569
List of base stores checked for objects
570
"""
571
572
@property
573
def packs(self) -> List[Pack]:
574
"""
575
All packs from base stores.
576
577
Returns:
578
Combined list of packs from all base stores
579
"""
580
581
def iter_unpacked_subset(
582
self,
583
shas: Iterable[bytes],
584
*,
585
include_comp: bool = False,
586
allow_missing: bool = False,
587
convert_ofs_delta: bool = True
588
) -> Iterator[ShaFile]:
589
"""
590
Iterate over unpacked subset of objects.
591
592
Parameters:
593
- shas: SHA-1 hashes to retrieve
594
- include_comp: Whether to include compressed objects
595
- allow_missing: Whether to skip missing objects
596
- convert_ofs_delta: Whether to convert OFS deltas
597
598
Yields:
599
ShaFile objects from base stores
600
"""
601
```
602
603
### Bucket-Based Object Store
604
605
Object store implementation for cloud storage backends like Amazon S3.
606
607
Optimized for bucket-based storage systems where objects are stored as pack files in cloud storage buckets. Does not support loose objects, making it ideal for distributed and cloud-native Git hosting scenarios.
608
609
```python { .api }
610
class BucketBasedObjectStore(PackBasedObjectStore):
611
"""
612
Object store using bucket organization like S3.
613
614
Implementation that uses a bucket store like S3 as backend.
615
Optimized for cloud storage scenarios with no loose objects.
616
"""
617
618
def _iter_loose_objects(self) -> Iterator[bytes]:
619
"""
620
Iterate over SHA-1s of all loose objects.
621
622
Returns:
623
Empty iterator (bucket stores have no loose objects)
624
"""
625
626
def _get_loose_object(self, sha: bytes) -> None:
627
"""
628
Get loose object by SHA-1.
629
630
Parameters:
631
- sha: 20-byte SHA-1 hash
632
633
Returns:
634
None (bucket stores have no loose objects)
635
"""
636
637
def delete_loose_object(self, sha: bytes) -> None:
638
"""
639
Delete loose object by SHA-1.
640
641
Parameters:
642
- sha: 20-byte SHA-1 hash
643
644
Note:
645
No-op for bucket stores as there are no loose objects.
646
"""
647
648
def _remove_pack(self, name: str) -> None:
649
"""
650
Remove pack from bucket storage.
651
652
Parameters:
653
- name: Pack file name
654
655
Raises:
656
NotImplementedError: Must be implemented by subclasses
657
"""
658
659
def _iter_pack_names(self) -> Iterator[str]:
660
"""
661
Iterate over pack file names in bucket.
662
663
Yields:
664
Pack file names available in bucket
665
666
Raises:
667
NotImplementedError: Must be implemented by subclasses
668
"""
669
670
def _get_pack(self, name: str) -> Pack:
671
"""
672
Get pack object from bucket storage.
673
674
Parameters:
675
- name: Pack file name
676
677
Returns:
678
Pack object
679
680
Raises:
681
NotImplementedError: Must be implemented by subclasses
682
"""
683
684
def _update_pack_cache(self) -> List[Pack]:
685
"""
686
Update pack cache with packs from bucket storage.
687
688
Returns:
689
List of newly added packs
690
"""
691
692
def _upload_pack(
693
self,
694
basename: str,
695
pack_file: BinaryIO,
696
index_file: BinaryIO
697
) -> None:
698
"""
699
Upload pack and index files to bucket storage.
700
701
Parameters:
702
- basename: Base name for pack files
703
- pack_file: Pack file stream
704
- index_file: Index file stream
705
706
Raises:
707
NotImplementedError: Must be implemented by subclasses
708
"""
709
```
710
711
### Protocol Classes
712
713
Protocol definitions for object store interfaces.
714
715
```python { .api }
716
class PackContainer(Protocol):
717
"""
718
Protocol for containers that can manage pack files.
719
720
Defines interface for adding new pack files to storage.
721
"""
722
723
def add_pack(self) -> Tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
724
"""
725
Add new pack to container.
726
727
Returns:
728
Tuple of (pack_file_stream, commit_function, abort_function)
729
"""
730
731
class ObjectIterator(Protocol):
732
"""
733
Protocol for iterating over objects.
734
735
Defines interface for object iteration with filtering.
736
"""
737
738
def iterobjects_subset(
739
self,
740
shas: Iterable[bytes],
741
*,
742
allow_missing: bool = False
743
) -> Iterator[ShaFile]:
744
"""
745
Iterate over subset of objects by SHA-1.
746
747
Parameters:
748
- shas: SHA-1 hashes to retrieve
749
- allow_missing: Whether to skip missing objects
750
751
Yields:
752
ShaFile objects for requested SHA-1s
753
"""
754
```
755
756
### Utility Classes
757
758
Helper classes for object store operations.
759
760
```python { .api }
761
class MissingObjectFinder:
762
"""
763
Finds missing objects between object stores.
764
765
Used for determining what objects need to be transferred
766
during fetch and push operations.
767
"""
768
769
def __init__(
770
self,
771
object_store: BaseObjectStore,
772
haves: Iterable[bytes],
773
wants: Iterable[bytes],
774
*,
775
shallow: Optional[Set[bytes]] = None,
776
progress: Optional[Callable] = None,
777
get_tagged: Optional[Callable] = None,
778
get_parents: Callable[[Commit], List[bytes]] = lambda commit: commit.parents
779
):
780
"""
781
Initialize missing object finder.
782
783
Parameters:
784
- object_store: Object store containing objects to be sent
785
- haves: SHA-1 hashes of commits not to send (already present)
786
- wants: SHA-1 hashes of commits to send
787
- shallow: Set of shallow commit SHA-1s to skip
788
- progress: Progress callback function
789
- get_tagged: Function returning dict of pointed-to sha -> tag sha
790
- get_parents: Function for getting parents of a commit
791
"""
792
793
def get_remote_has(self) -> Set[bytes]:
794
"""
795
Get set of objects the remote has.
796
797
Returns:
798
Set of SHA-1 hashes known to be on remote
799
"""
800
801
def add_todo(
802
self,
803
entries: Iterable[Tuple[bytes, Optional[bytes], Optional[int], bool]]
804
) -> None:
805
"""
806
Add objects to todo list for processing.
807
808
Parameters:
809
- entries: Iterable of (sha, name, type_num, leaf) tuples
810
"""
811
812
def __next__(self) -> Tuple[bytes, Optional[PackHint]]:
813
"""
814
Get next missing object.
815
816
Returns:
817
Tuple of (sha1_hash, pack_hint) for next missing object
818
819
Raises:
820
StopIteration: When no more missing objects
821
"""
822
823
def __iter__(self):
824
"""
825
Make this object iterable.
826
827
Returns:
828
Self as iterator
829
"""
830
831
class ObjectStoreGraphWalker:
832
"""
833
Graph walker that finds what commits are missing from an object store.
834
835
Walks commit and tag relationships to determine
836
object dependencies and ancestry for protocol operations.
837
"""
838
839
heads: Set[bytes]
840
"""Revisions without descendants in the local repo."""
841
842
get_parents: Callable[[bytes], List[bytes]]
843
"""Function to retrieve parents in the local repo."""
844
845
shallow: Set[bytes]
846
"""Set of shallow commit SHA-1s."""
847
848
def __init__(
849
self,
850
local_heads: Iterable[bytes],
851
get_parents: Callable[[bytes], List[bytes]],
852
shallow: Optional[Set[bytes]] = None,
853
update_shallow: Optional[Callable] = None
854
):
855
"""
856
Initialize graph walker.
857
858
Parameters:
859
- local_heads: Heads to start search with
860
- get_parents: Function for finding parents of a SHA-1
861
- shallow: Set of shallow commit SHA-1s
862
- update_shallow: Function to update shallow set
863
"""
864
865
def nak(self) -> None:
866
"""
867
Indicate nothing in common was found.
868
"""
869
870
def ack(self, sha: bytes) -> None:
871
"""
872
Acknowledge that a revision and its ancestors are present.
873
874
Parameters:
875
- sha: SHA-1 hash of acknowledged revision
876
"""
877
878
def __next__(self) -> Optional[bytes]:
879
"""
880
Get next revision to ask about.
881
882
Returns:
883
SHA-1 hash of next revision, or None when done
884
"""
885
886
def __iter__(self):
887
"""
888
Make this object iterable.
889
890
Returns:
891
Self as iterator
892
"""
893
```
894
895
### Utility Functions
896
897
Standalone functions for object store operations.
898
899
```python { .api }
900
def find_shallow(
901
store: BaseObjectStore,
902
heads: Iterable[bytes],
903
depth: int
904
) -> Tuple[Set[bytes], Set[bytes]]:
905
"""
906
Find shallow commits at specified depth.
907
908
Parameters:
909
- store: Object store for looking up objects
910
- heads: Iterable of head SHA-1s to start walking from
911
- depth: Depth of ancestors to include (1 = heads only)
912
913
Returns:
914
Tuple of (shallow_commits, not_shallow_commits) sets
915
916
Note:
917
Sets may overlap if commit is reachable along multiple paths.
918
"""
919
920
def get_depth(
921
store: BaseObjectStore,
922
head: bytes,
923
get_parents: Callable[[Commit], List[bytes]] = lambda commit: commit.parents,
924
max_depth: Optional[int] = None
925
) -> int:
926
"""
927
Get actual depth from head commit.
928
929
For commits with multiple parents, the largest possible depth
930
will be returned.
931
932
Parameters:
933
- store: Object store containing commits
934
- head: HEAD commit SHA-1 hash
935
- get_parents: Function for getting parents of a commit
936
- max_depth: Maximum depth to search
937
938
Returns:
939
Current available depth for the given head
940
"""
941
942
def tree_lookup_path(
943
lookup_obj: Callable[[bytes], ShaFile],
944
root_sha: bytes,
945
path: bytes
946
) -> Tuple[int, bytes]:
947
"""
948
Look up object at path in tree.
949
950
Parameters:
951
- lookup_obj: Function to retrieve objects by SHA-1
952
- root_sha: Root tree SHA-1 hash
953
- path: Path within tree to lookup
954
955
Returns:
956
Tuple of (mode, sha1_hash) for object at path
957
958
Raises:
959
NotTreeError: If root_sha does not point to a tree object
960
"""
961
962
def commit_tree_changes(
963
object_store: BaseObjectStore,
964
tree: Tree,
965
changes: List[TreeChange]
966
) -> Tree:
967
"""
968
Apply changes to tree and return new tree.
969
970
Parameters:
971
- object_store: Object store for reading/writing objects
972
- tree: Original tree object to modify
973
- changes: List of TreeChange objects describing modifications
974
975
Returns:
976
Modified tree object with changes applied
977
"""
978
979
def read_packs_file(f: BinaryIO) -> Iterator[str]:
980
"""
981
Yield pack names listed in a packs file.
982
983
Parameters:
984
- f: File-like object containing packs data
985
986
Yields:
987
Pack file names from the packs file
988
"""
989
990
def iter_tree_contents(
991
store: ObjectContainer,
992
tree_id: Optional[bytes],
993
*,
994
include_trees: bool = False
995
) -> Iterator[TreeEntry]:
996
"""
997
Iterate recursively over tree contents.
998
999
Iteration is depth-first pre-order, as in e.g. os.walk.
1000
1001
Parameters:
1002
- store: Object store containing objects
1003
- tree_id: Tree SHA-1 hash (can be None)
1004
- include_trees: Include tree objects in iteration
1005
1006
Yields:
1007
TreeEntry namedtuples for each object in tree
1008
"""
1009
1010
def peel_sha(
1011
store: ObjectContainer,
1012
sha: bytes
1013
) -> Tuple[ShaFile, ShaFile]:
1014
"""
1015
Peel all tags from a SHA.
1016
1017
Follows tag references to underlying objects until
1018
reaching a non-tag object.
1019
1020
Parameters:
1021
- store: Object store containing objects
1022
- sha: SHA-1 hash of object to peel
1023
1024
Returns:
1025
Tuple of (original_object, peeled_object)
1026
"""
1027
```
1028
1029
## Usage Examples
1030
1031
### Basic Object Store Operations
1032
1033
```python
1034
from dulwich.object_store import DiskObjectStore, MemoryObjectStore
1035
from dulwich.objects import Blob
1036
1037
# Create disk object store
1038
disk_store = DiskObjectStore("/path/to/repo/.git/objects")
1039
1040
# Check if object exists
1041
blob_sha = b'...' # 20-byte SHA-1
1042
if blob_sha in disk_store:
1043
blob = disk_store[blob_sha]
1044
print(f"Blob size: {len(blob.data)}")
1045
1046
# Add new object
1047
new_blob = Blob(b"New file content")
1048
disk_store.add_object(new_blob)
1049
1050
# Create memory store for temporary operations
1051
memory_store = MemoryObjectStore()
1052
memory_store.add_object(new_blob)
1053
```
1054
1055
### Object Store Layering
1056
1057
```python
1058
from dulwich.object_store import OverlayObjectStore, DiskObjectStore, MemoryObjectStore
1059
1060
# Create layered store
1061
base_store = DiskObjectStore("/path/to/repo/.git/objects")
1062
overlay_store = MemoryObjectStore()
1063
layered_store = OverlayObjectStore([base_store], overlay_store)
1064
1065
# Objects are first checked in overlay, then base
1066
# New objects go to overlay
1067
layered_store.add_object(Blob(b"Temporary content"))
1068
```
1069
1070
### Finding Missing Objects
1071
1072
```python
1073
from dulwich.object_store import MissingObjectFinder
1074
1075
# Find objects needed for synchronization
1076
haves = [b'...'] # SHA-1s of objects we have
1077
wants = [b'...'] # SHA-1s of objects we want
1078
1079
finder = MissingObjectFinder(
1080
object_store=disk_store,
1081
haves=haves,
1082
wants=wants
1083
)
1084
1085
# Iterate over missing objects
1086
missing_objects = []
1087
for sha, pack_hint in finder:
1088
missing_objects.append((sha, pack_hint))
1089
1090
print(f"Need {len(missing_objects)} objects")
1091
1092
# Alternative: use with graph walker
1093
from dulwich.object_store import ObjectStoreGraphWalker
1094
1095
# Set up graph walker for protocol operations
1096
local_heads = [repo.head()]
1097
def get_parents(sha):
1098
return disk_store[sha].parents
1099
1100
walker = ObjectStoreGraphWalker(local_heads, get_parents)
1101
1102
# Find common revisions
1103
common_revs = disk_store.find_common_revisions(walker)
1104
print(f"Found {len(common_revs)} common revisions")
1105
```