0
# Git Objects
1
2
Complete implementation of Git's object model including blobs, trees, commits, and tags. These classes provide full read/write capabilities for Git's internal object formats with proper serialization and validation.
3
4
Includes utility functions for object manipulation, type checking, time/timezone parsing, tree operations, and GPG signing support.
5
6
## Type Definitions and Constants
7
8
Core type aliases and constants used throughout the objects module.
9
10
```python { .api }
11
# Type aliases
12
ObjectID = bytes # 20-byte SHA-1 hash identifier
13
14
# Constants
15
ZERO_SHA = b"0" * 40 # Zero/null SHA-1 hash as hex string
16
S_IFGITLINK = 0o160000 # Git submodule file mode constant
17
MAX_TIME = 9223372036854775807 # Maximum valid timestamp (2^63 - 1)
18
BEGIN_PGP_SIGNATURE = b"-----BEGIN PGP SIGNATURE-----" # PGP signature marker
19
```
20
21
## Type Guard Functions
22
23
Runtime type checking functions for ShaFile objects with proper type narrowing support.
24
25
```python { .api }
26
def is_commit(obj: ShaFile) -> TypeGuard[Commit]:
27
"""
28
Check if a ShaFile is a Commit object.
29
30
Args:
31
obj: ShaFile object to check
32
33
Returns:
34
True if object is a Commit, with type narrowing
35
"""
36
37
def is_tree(obj: ShaFile) -> TypeGuard[Tree]:
38
"""
39
Check if a ShaFile is a Tree object.
40
41
Args:
42
obj: ShaFile object to check
43
44
Returns:
45
True if object is a Tree, with type narrowing
46
"""
47
48
def is_blob(obj: ShaFile) -> TypeGuard[Blob]:
49
"""
50
Check if a ShaFile is a Blob object.
51
52
Args:
53
obj: ShaFile object to check
54
55
Returns:
56
True if object is a Blob, with type narrowing
57
"""
58
59
def is_tag(obj: ShaFile) -> TypeGuard[Tag]:
60
"""
61
Check if a ShaFile is a Tag object.
62
63
Args:
64
obj: ShaFile object to check
65
66
Returns:
67
True if object is a Tag, with type narrowing
68
"""
69
```
70
71
## Utility Functions
72
73
Core utility functions for object operations and format conversions.
74
75
```python { .api }
76
def sha_to_hex(sha: ObjectID) -> bytes:
77
"""
78
Convert binary SHA-1 to hex string.
79
80
Args:
81
sha: 20-byte binary SHA-1
82
83
Returns:
84
40-byte hex representation as bytes
85
"""
86
87
def hex_to_sha(hex: Union[bytes, str]) -> bytes:
88
"""
89
Convert hex SHA-1 string to binary.
90
91
Args:
92
hex: 40-character hex string (bytes or str)
93
94
Returns:
95
20-byte binary SHA-1
96
97
Raises:
98
ValueError: If hex string is invalid
99
"""
100
101
def valid_hexsha(hex: Union[bytes, str]) -> bool:
102
"""
103
Check if string is a valid hex SHA-1.
104
105
Args:
106
hex: String to validate
107
108
Returns:
109
True if valid 40-character hex SHA-1
110
"""
111
112
def hex_to_filename(path: Union[str, bytes], hex: Union[str, bytes]) -> Union[str, bytes]:
113
"""
114
Convert hex SHA to Git object filename path.
115
116
Args:
117
path: Base objects directory path
118
hex: 40-character hex SHA-1
119
120
Returns:
121
Full path to object file (first 2 hex chars as dir, rest as filename)
122
"""
123
124
def filename_to_hex(filename: Union[str, bytes]) -> str:
125
"""
126
Extract hex SHA from Git object filename.
127
128
Args:
129
filename: Path to Git object file
130
131
Returns:
132
40-character hex SHA-1 string
133
"""
134
135
def object_header(num_type: int, length: int) -> bytes:
136
"""
137
Create Git object header for given type and content length.
138
139
Args:
140
num_type: Object type number (1=commit, 2=tree, 3=blob, 4=tag)
141
length: Content length in bytes
142
143
Returns:
144
Object header as bytes (e.g., b"blob 123\0")
145
"""
146
147
def object_class(type: Union[bytes, int]) -> Optional[type[ShaFile]]:
148
"""
149
Get ShaFile subclass for given type identifier.
150
151
Args:
152
type: Type name (b"commit", b"tree", etc.) or type number (1-4)
153
154
Returns:
155
ShaFile subclass or None if type unknown
156
"""
157
158
def check_hexsha(hex: Union[str, bytes], error_msg: str) -> None:
159
"""
160
Validate hex SHA-1 string, raising exception if invalid.
161
162
Args:
163
hex: Hex string to check
164
error_msg: Error message prefix for exception
165
166
Raises:
167
ObjectFormatException: If hex SHA is invalid
168
"""
169
170
def check_identity(identity: Optional[bytes], error_msg: str) -> None:
171
"""
172
Validate Git identity string format ("Name <email>").
173
174
Args:
175
identity: Identity string to validate
176
error_msg: Error message for exception
177
178
Raises:
179
ObjectFormatException: If identity format is invalid
180
"""
181
182
def check_time(time_seconds: int) -> None:
183
"""
184
Validate timestamp to prevent overflow errors.
185
186
Args:
187
time_seconds: Unix timestamp to validate
188
189
Raises:
190
ObjectFormatException: If timestamp too large (> MAX_TIME)
191
"""
192
193
def git_line(*items: bytes) -> bytes:
194
"""
195
Format items into space-separated Git header line.
196
197
Args:
198
items: Byte strings to join
199
200
Returns:
201
Space-separated line with trailing newline
202
"""
203
204
def S_ISGITLINK(m: int) -> bool:
205
"""
206
Check if file mode indicates a Git submodule.
207
208
Args:
209
m: Unix file mode to check
210
211
Returns:
212
True if mode indicates submodule (S_IFGITLINK)
213
"""
214
```
215
216
## Time and Timezone Functions
217
218
Utilities for parsing and formatting Git timestamp entries with timezone information.
219
220
```python { .api }
221
def parse_timezone(text: bytes) -> tuple[int, bool]:
222
"""
223
Parse timezone offset string (e.g., "+0100", "-0500").
224
225
Args:
226
text: Timezone string starting with + or -
227
228
Returns:
229
Tuple of (offset_seconds, is_negative_utc)
230
- offset_seconds: Timezone offset in seconds from UTC
231
- is_negative_utc: True if "-0000" (negative UTC)
232
233
Raises:
234
ValueError: If timezone format is invalid
235
"""
236
237
def format_timezone(offset: int, unnecessary_negative_timezone: bool = False) -> bytes:
238
"""
239
Format timezone offset for Git serialization.
240
241
Args:
242
offset: Timezone offset in seconds from UTC
243
unnecessary_negative_timezone: Use minus sign for UTC/positive zones
244
245
Returns:
246
Formatted timezone string (e.g., b"+0100", b"-0500")
247
248
Raises:
249
ValueError: If offset not divisible by 60 seconds
250
"""
251
252
def parse_time_entry(value: bytes) -> tuple[bytes, Optional[int], tuple[Optional[int], bool]]:
253
"""
254
Parse Git time entry from author/committer/tagger line.
255
256
Args:
257
value: Git time entry ("Name <email> timestamp timezone")
258
259
Returns:
260
Tuple of (identity, timestamp, (timezone_offset, is_negative_utc))
261
262
Raises:
263
ObjectFormatException: If time entry format is invalid
264
"""
265
266
def format_time_entry(person: bytes, time: int, timezone_info: tuple[int, bool]) -> bytes:
267
"""
268
Format time entry for Git serialization.
269
270
Args:
271
person: Identity string ("Name <email>")
272
time: Unix timestamp
273
timezone_info: Tuple of (offset_seconds, is_negative_utc)
274
275
Returns:
276
Formatted time entry for Git object
277
"""
278
```
279
280
## Tree Utility Functions
281
282
Functions for parsing, serializing, and manipulating Git tree objects.
283
284
```python { .api }
285
def parse_tree(text: bytes, strict: bool = False) -> Iterator[tuple[bytes, int, bytes]]:
286
"""
287
Parse serialized tree object data.
288
289
Args:
290
text: Raw tree object bytes
291
strict: Enable strict mode validation (reject leading zeros in modes)
292
293
Yields:
294
Tuples of (name, mode, hexsha) for each tree entry
295
296
Raises:
297
ObjectFormatException: If tree format is invalid
298
"""
299
300
def serialize_tree(items: Iterable[tuple[bytes, int, bytes]]) -> Iterator[bytes]:
301
"""
302
Serialize tree entries to Git tree format.
303
304
Args:
305
items: Iterable of (name, mode, hexsha) tuples (must be sorted)
306
307
Yields:
308
Byte chunks of serialized tree data
309
"""
310
311
def sorted_tree_items(
312
entries: dict[bytes, tuple[int, bytes]],
313
name_order: bool
314
) -> Iterator[TreeEntry]:
315
"""
316
Iterate tree entries in correct Git sort order.
317
318
Args:
319
entries: Dictionary mapping names to (mode, sha) tuples
320
name_order: If True, sort by name; if False, use Git tree order
321
322
Yields:
323
TreeEntry objects in proper sort order
324
"""
325
326
def key_entry(entry: tuple[bytes, tuple[int, ObjectID]]) -> bytes:
327
"""
328
Generate sort key for tree entry in Git tree order.
329
330
Args:
331
entry: Tuple of (name, (mode, sha))
332
333
Returns:
334
Sort key (directories get "/" suffix)
335
"""
336
337
def key_entry_name_order(entry: tuple[bytes, tuple[int, ObjectID]]) -> bytes:
338
"""
339
Generate sort key for tree entry in name order.
340
341
Args:
342
entry: Tuple of (name, (mode, sha))
343
344
Returns:
345
Name as sort key
346
"""
347
348
def pretty_format_tree_entry(
349
name: bytes,
350
mode: int,
351
hexsha: bytes,
352
encoding: str = "utf-8"
353
) -> str:
354
"""
355
Format tree entry for human-readable display.
356
357
Args:
358
name: Entry name
359
mode: File mode
360
hexsha: Object SHA-1
361
encoding: Text encoding for name display
362
363
Returns:
364
Formatted string like "100644 blob abc123... filename"
365
"""
366
```
367
368
## Exception Classes
369
370
Specialized exceptions for Git object operations.
371
372
```python { .api }
373
class EmptyFileException(FileFormatException):
374
"""
375
Raised when encountering unexpectedly empty Git object file.
376
377
This indicates a corrupted object or filesystem issue.
378
"""
379
380
class SubmoduleEncountered(Exception):
381
"""
382
Raised when submodule entry encountered during path traversal.
383
384
Contains information about the submodule path and commit SHA.
385
"""
386
387
def __init__(self, path: bytes, sha: ObjectID):
388
"""
389
Initialize submodule exception.
390
391
Args:
392
path: Path where submodule was encountered
393
sha: SHA-1 of submodule commit
394
"""
395
self.path = path
396
self.sha = sha
397
```
398
399
## Capabilities
400
401
### Base Object Class
402
403
Foundation class for all Git objects with common functionality.
404
405
```python { .api }
406
class ShaFile:
407
"""
408
Base class for Git objects with SHA-1 identification.
409
410
All Git objects (blobs, trees, commits, tags) inherit from this class
411
and share common serialization and identification methods.
412
"""
413
414
@property
415
def id(self) -> bytes:
416
"""
417
SHA-1 hash of object.
418
419
Returns:
420
20-byte SHA-1 hash
421
"""
422
423
@property
424
def type_name(self) -> bytes:
425
"""
426
Git object type name.
427
428
Returns:
429
Object type as bytes (b'blob', b'tree', b'commit', b'tag')
430
"""
431
432
def as_raw_string(self) -> bytes:
433
"""
434
Serialize object to Git's internal format.
435
436
Returns:
437
Raw bytes in Git object format
438
"""
439
440
def as_raw_chunks(self) -> Iterator[bytes]:
441
"""
442
Serialize object as chunks for streaming.
443
444
Yields:
445
Byte chunks of serialized object
446
"""
447
448
def sha(self) -> Union[FixedSha, "HASH"]:
449
"""
450
Get SHA-1 hash object.
451
452
Returns:
453
SHA1 hash object for this Git object (FixedSha or hashlib SHA1)
454
"""
455
456
@staticmethod
457
def from_raw_string(
458
type_num: int,
459
string: bytes,
460
sha: Optional[ObjectID] = None
461
) -> 'ShaFile':
462
"""
463
Create object from raw Git format data.
464
465
Parameters:
466
- type_num: Git object type number (1=commit, 2=tree, 3=blob, 4=tag)
467
- string: Raw serialized object data
468
- sha: Optional known SHA-1 for the object
469
470
Returns:
471
ShaFile subclass instance
472
"""
473
474
@staticmethod
475
def from_raw_chunks(
476
type_num: int,
477
chunks: list[bytes],
478
sha: Optional[ObjectID] = None
479
) -> 'ShaFile':
480
"""
481
Create object from raw Git format chunks.
482
483
Parameters:
484
- type_num: Git object type number
485
- chunks: List of raw data chunks
486
- sha: Optional known SHA-1 for the object
487
488
Returns:
489
ShaFile subclass instance
490
"""
491
492
@classmethod
493
def from_string(cls, string: bytes) -> 'ShaFile':
494
"""
495
Create ShaFile from serialized string.
496
497
Parameters:
498
- string: Serialized object data
499
500
Returns:
501
ShaFile instance
502
"""
503
504
@classmethod
505
def from_file(cls, f: Union[BufferedIOBase, IO[bytes], "_GitFile"]) -> 'ShaFile':
506
"""
507
Load ShaFile from file object.
508
509
Parameters:
510
- f: File-like object to read from
511
512
Returns:
513
ShaFile instance
514
515
Raises:
516
ObjectFormatException: If object format is invalid
517
"""
518
519
def check(self) -> None:
520
"""
521
Validate object format and contents.
522
523
Raises:
524
ObjectFormatException: If object format is invalid
525
ChecksumMismatch: If SHA-1 doesn't match contents
526
"""
527
528
def as_legacy_object_chunks(self, compression_level: int = -1) -> Iterator[bytes]:
529
"""
530
Serialize object in legacy Git format as chunks.
531
532
Parameters:
533
- compression_level: zlib compression level (-1 for default)
534
535
Yields:
536
Compressed byte chunks
537
"""
538
539
def as_legacy_object(self, compression_level: int = -1) -> bytes:
540
"""
541
Serialize object in legacy Git format.
542
543
Parameters:
544
- compression_level: zlib compression level (-1 for default)
545
546
Returns:
547
Compressed object data
548
"""
549
550
def set_raw_chunks(
551
self,
552
chunks: list[bytes],
553
sha: Optional[ObjectID] = None
554
) -> None:
555
"""
556
Set object contents from raw data chunks.
557
558
Parameters:
559
- chunks: List of raw data chunks
560
- sha: Optional known SHA-1 for the object
561
"""
562
563
def copy(self) -> 'ShaFile':
564
"""
565
Create copy of this object.
566
567
Returns:
568
New ShaFile instance with same contents
569
"""
570
571
def raw_length(self) -> int:
572
"""
573
Get length of raw serialized data.
574
575
Returns:
576
Total byte length of serialized object
577
"""
578
579
def __hash__(self) -> int:
580
"""
581
Hash based on object ID.
582
583
Returns:
584
Hash of object's SHA-1
585
"""
586
587
def __ne__(self, other: object) -> bool:
588
"""
589
Check if objects are not equal.
590
591
Parameters:
592
- other: Object to compare with
593
594
Returns:
595
True if objects have different SHA-1s
596
"""
597
598
def __eq__(self, other: object) -> bool:
599
"""
600
Check if objects are equal.
601
602
Parameters:
603
- other: Object to compare with
604
605
Returns:
606
True if objects have same SHA-1
607
"""
608
609
def __lt__(self, other: object) -> bool:
610
"""
611
Compare objects by SHA-1.
612
613
Parameters:
614
- other: ShaFile to compare with
615
616
Returns:
617
True if this object's SHA-1 is less than other's
618
619
Raises:
620
TypeError: If other is not a ShaFile
621
"""
622
623
def __le__(self, other: object) -> bool:
624
"""
625
Compare objects by SHA-1 (less than or equal).
626
627
Parameters:
628
- other: ShaFile to compare with
629
630
Returns:
631
True if this object's SHA-1 is less than or equal to other's
632
633
Raises:
634
TypeError: If other is not a ShaFile
635
"""
636
```
637
638
### Blob Objects
639
640
File content storage in Git repositories.
641
642
```python { .api }
643
class Blob(ShaFile):
644
"""
645
Git blob object representing file contents.
646
647
Blobs store the raw content of files without any metadata
648
like filename, permissions, or directory structure.
649
"""
650
651
def __init__(self, data: bytes = b""):
652
"""
653
Create blob with file data.
654
655
Parameters:
656
- data: File content as bytes
657
"""
658
659
@property
660
def data(self) -> bytes:
661
"""
662
File content data.
663
664
Returns:
665
Raw file content as bytes
666
"""
667
668
@data.setter
669
def data(self, value: bytes) -> None:
670
"""
671
Set file content data.
672
673
Parameters:
674
- value: New file content as bytes
675
"""
676
677
@classmethod
678
def from_path(cls, path: Union[str, bytes]) -> 'Blob':
679
"""
680
Create blob from filesystem file.
681
682
Parameters:
683
- path: Path to file
684
685
Returns:
686
Blob object with file contents
687
688
Raises:
689
NotBlobError: If file is not a valid blob
690
"""
691
692
@property
693
def chunked(self) -> list[bytes]:
694
"""
695
Access blob data as chunks.
696
697
Returns:
698
List of data chunks (not necessarily lines)
699
"""
700
701
@chunked.setter
702
def chunked(self, chunks: list[bytes]) -> None:
703
"""
704
Set blob data from chunks.
705
706
Parameters:
707
- chunks: List of data chunks to set
708
"""
709
710
def splitlines(self) -> list[bytes]:
711
"""
712
Split blob data into lines.
713
714
Returns:
715
List of lines as bytes (preserves original line endings)
716
"""
717
```
718
719
### Tree Objects
720
721
Directory structure representation in Git repositories.
722
723
```python { .api }
724
class Tree(ShaFile):
725
"""
726
Git tree object representing directory structure.
727
728
Trees contain entries for files and subdirectories with
729
their names, modes, and SHA-1 hashes.
730
"""
731
732
def __init__(self):
733
"""Create empty tree."""
734
735
def add(
736
self,
737
name: bytes,
738
mode: int,
739
hexsha: bytes
740
) -> None:
741
"""
742
Add entry to tree.
743
744
Parameters:
745
- name: File or directory name
746
- mode: Unix file mode (permissions and type)
747
- hexsha: SHA-1 hash of object
748
"""
749
750
def __getitem__(self, name: bytes) -> Tuple[int, bytes]:
751
"""
752
Get tree entry by name.
753
754
Parameters:
755
- name: Entry name
756
757
Returns:
758
Tuple of (mode, sha1_hash)
759
"""
760
761
def __setitem__(
762
self,
763
name: bytes,
764
value: Tuple[int, bytes]
765
) -> None:
766
"""
767
Set tree entry.
768
769
Parameters:
770
- name: Entry name
771
- value: Tuple of (mode, sha1_hash)
772
"""
773
774
def __delitem__(self, name: bytes) -> None:
775
"""
776
Remove entry from tree.
777
778
Parameters:
779
- name: Entry name to remove
780
"""
781
782
def __contains__(self, name: bytes) -> bool:
783
"""
784
Check if entry exists in tree.
785
786
Parameters:
787
- name: Entry name
788
789
Returns:
790
True if entry exists
791
"""
792
793
def items(self) -> Iterator[TreeEntry]:
794
"""
795
Iterate over tree entries.
796
797
Yields:
798
TreeEntry named tuples (path, mode, sha)
799
"""
800
801
def iteritems(self) -> Iterator[Tuple[bytes, int, bytes]]:
802
"""
803
Iterate over tree entries as tuples.
804
805
Yields:
806
Tuples of (name, mode, sha1_hash)
807
"""
808
809
@classmethod
810
def from_path(cls, filename: Union[str, bytes]) -> 'Tree':
811
"""
812
Create tree from Git object file.
813
814
Parameters:
815
- filename: Path to Git tree object file
816
817
Returns:
818
Tree object loaded from file
819
820
Raises:
821
NotTreeError: If file is not a valid tree
822
"""
823
824
def lookup_path(
825
self,
826
lookup_obj: Callable[[ObjectID], ShaFile],
827
path: bytes
828
) -> tuple[int, ObjectID]:
829
"""
830
Look up object at path within tree.
831
832
Parameters:
833
- lookup_obj: Function to retrieve objects by SHA-1
834
- path: Path within tree (may contain subdirectories)
835
836
Returns:
837
Tuple of (mode, sha1_hash) for object at path
838
839
Raises:
840
SubmoduleEncountered: If path crosses submodule boundary
841
NotTreeError: If intermediate path component is not a tree
842
"""
843
844
def as_pretty_string(self) -> str:
845
"""
846
Format tree as human-readable string.
847
848
Returns:
849
Multi-line string showing tree contents in ls-tree format
850
"""
851
852
def add(self, name: bytes, mode: int, hexsha: bytes) -> None:
853
"""
854
Add entry to tree.
855
856
Parameters:
857
- name: Entry name as bytes
858
- mode: Unix file mode
859
- hexsha: SHA-1 hash as hex bytes
860
"""
861
862
def iteritems(self, name_order: bool = False) -> Iterator[TreeEntry]:
863
"""
864
Iterate over tree entries.
865
866
Parameters:
867
- name_order: If True, sort by name instead of Git tree order
868
869
Yields:
870
TreeEntry objects for each entry
871
"""
872
```
873
874
### Commit Objects
875
876
Repository history and metadata storage.
877
878
```python { .api }
879
class Commit(ShaFile):
880
"""
881
Git commit object representing repository snapshots.
882
883
Commits link trees with metadata including author, committer,
884
timestamp, message, and parent relationships.
885
"""
886
887
def __init__(self):
888
"""Create empty commit."""
889
890
@property
891
def message(self) -> bytes:
892
"""
893
Commit message.
894
895
Returns:
896
Commit message as bytes
897
"""
898
899
@message.setter
900
def message(self, value: bytes) -> None:
901
"""
902
Set commit message.
903
904
Parameters:
905
- value: Commit message as bytes
906
"""
907
908
@property
909
def author(self) -> bytes:
910
"""
911
Commit author information.
912
913
Returns:
914
Author in format b"Name <email>"
915
"""
916
917
@author.setter
918
def author(self, value: bytes) -> None:
919
"""
920
Set commit author.
921
922
Parameters:
923
- value: Author in format b"Name <email>"
924
"""
925
926
@property
927
def committer(self) -> bytes:
928
"""
929
Commit committer information.
930
931
Returns:
932
Committer in format b"Name <email>"
933
"""
934
935
@committer.setter
936
def committer(self, value: bytes) -> None:
937
"""
938
Set commit committer.
939
940
Parameters:
941
- value: Committer in format b"Name <email>"
942
"""
943
944
@property
945
def author_time(self) -> int:
946
"""
947
Author timestamp.
948
949
Returns:
950
Unix timestamp when authored
951
"""
952
953
@author_time.setter
954
def author_time(self, value: int) -> None:
955
"""
956
Set author timestamp.
957
958
Parameters:
959
- value: Unix timestamp
960
"""
961
962
@property
963
def commit_time(self) -> int:
964
"""
965
Commit timestamp.
966
967
Returns:
968
Unix timestamp when committed
969
"""
970
971
@commit_time.setter
972
def commit_time(self, value: int) -> None:
973
"""
974
Set commit timestamp.
975
976
Parameters:
977
- value: Unix timestamp
978
"""
979
980
@property
981
def author_timezone(self) -> int:
982
"""
983
Author timezone offset.
984
985
Returns:
986
Timezone offset in seconds
987
"""
988
989
@property
990
def commit_timezone(self) -> int:
991
"""
992
Commit timezone offset.
993
994
Returns:
995
Timezone offset in seconds
996
"""
997
998
@property
999
def tree(self) -> bytes:
1000
"""
1001
Tree SHA-1 hash.
1002
1003
Returns:
1004
20-byte SHA-1 hash of commit tree
1005
"""
1006
1007
@tree.setter
1008
def tree(self, value: bytes) -> None:
1009
"""
1010
Set tree SHA-1 hash.
1011
1012
Parameters:
1013
- value: 20-byte SHA-1 hash
1014
"""
1015
1016
@property
1017
def parents(self) -> List[bytes]:
1018
"""
1019
Parent commit SHA-1 hashes.
1020
1021
Returns:
1022
List of parent commit SHA-1 hashes
1023
"""
1024
1025
@parents.setter
1026
def parents(self, value: List[bytes]) -> None:
1027
"""
1028
Set parent commit SHA-1 hashes.
1029
1030
Parameters:
1031
- value: List of parent commit SHA-1 hashes
1032
"""
1033
1034
@property
1035
def encoding(self) -> bytes:
1036
"""
1037
Commit message encoding.
1038
1039
Returns:
1040
Encoding name as bytes (e.g., b'utf-8')
1041
"""
1042
1043
@encoding.setter
1044
def encoding(self, value: bytes) -> None:
1045
"""
1046
Set commit message encoding.
1047
1048
Parameters:
1049
- value: Encoding name as bytes
1050
"""
1051
1052
@property
1053
def gpgsig(self) -> Optional[bytes]:
1054
"""
1055
GPG signature for commit.
1056
1057
Returns:
1058
GPG signature as bytes, or None if unsigned
1059
"""
1060
1061
@gpgsig.setter
1062
def gpgsig(self, value: Optional[bytes]) -> None:
1063
"""
1064
Set GPG signature.
1065
1066
Parameters:
1067
- value: GPG signature as bytes or None
1068
"""
1069
1070
@property
1071
def extra(self) -> list[tuple[bytes, Optional[bytes]]]:
1072
"""
1073
Extra header fields not understood by this version.
1074
1075
Returns:
1076
List of (field_name, field_value) tuples for unknown headers
1077
"""
1078
1079
@property
1080
def mergetag(self) -> list['Tag']:
1081
"""
1082
Associated signed tags for merge commits.
1083
1084
Returns:
1085
List of Tag objects embedded in commit
1086
"""
1087
1088
@mergetag.setter
1089
def mergetag(self, value: list['Tag']) -> None:
1090
"""
1091
Set associated merge tags.
1092
1093
Parameters:
1094
- value: List of Tag objects
1095
"""
1096
1097
def sign(self, keyid: Optional[str] = None) -> None:
1098
"""
1099
Sign commit with GPG key.
1100
1101
Parameters:
1102
- keyid: Optional GPG key ID to use for signing
1103
1104
Raises:
1105
ImportError: If gpg module not available
1106
"""
1107
1108
def verify(self, keyids: Optional[Iterable[str]] = None) -> None:
1109
"""
1110
Verify GPG signature on commit.
1111
1112
Parameters:
1113
- keyids: Optional list of trusted key IDs
1114
1115
Raises:
1116
gpg.errors.BadSignatures: If signature verification fails
1117
gpg.errors.MissingSignatures: If not signed by trusted key
1118
"""
1119
1120
def raw_without_sig(self) -> bytes:
1121
"""
1122
Get raw commit data without GPG signature.
1123
1124
Returns:
1125
Raw serialized commit data excluding gpgsig field
1126
"""
1127
```
1128
1129
### Tag Objects
1130
1131
Named references to specific commits with optional annotation.
1132
1133
```python { .api }
1134
class Tag(ShaFile):
1135
"""
1136
Git tag object for marking specific commits.
1137
1138
Tags provide human-readable names for commits with optional
1139
annotation including tagger, timestamp, and message.
1140
"""
1141
1142
def __init__(self):
1143
"""Create empty tag."""
1144
1145
@property
1146
def object(self) -> Tuple[int, bytes]:
1147
"""
1148
Tagged object information.
1149
1150
Returns:
1151
Tuple of (object_type_num, sha1_hash)
1152
"""
1153
1154
@object.setter
1155
def object(self, value: Tuple[int, bytes]) -> None:
1156
"""
1157
Set tagged object.
1158
1159
Parameters:
1160
- value: Tuple of (object_type_num, sha1_hash)
1161
"""
1162
1163
@property
1164
def name(self) -> bytes:
1165
"""
1166
Tag name.
1167
1168
Returns:
1169
Tag name as bytes
1170
"""
1171
1172
@name.setter
1173
def name(self, value: bytes) -> None:
1174
"""
1175
Set tag name.
1176
1177
Parameters:
1178
- value: Tag name as bytes
1179
"""
1180
1181
@property
1182
def tagger(self) -> bytes:
1183
"""
1184
Tag tagger information.
1185
1186
Returns:
1187
Tagger in format b"Name <email>"
1188
"""
1189
1190
@tagger.setter
1191
def tagger(self, value: bytes) -> None:
1192
"""
1193
Set tag tagger.
1194
1195
Parameters:
1196
- value: Tagger in format b"Name <email>"
1197
"""
1198
1199
@property
1200
def tag_time(self) -> int:
1201
"""
1202
Tag creation timestamp.
1203
1204
Returns:
1205
Unix timestamp when tag was created
1206
"""
1207
1208
@tag_time.setter
1209
def tag_time(self, value: int) -> None:
1210
"""
1211
Set tag timestamp.
1212
1213
Parameters:
1214
- value: Unix timestamp
1215
"""
1216
1217
@property
1218
def tag_timezone(self) -> int:
1219
"""
1220
Tag timezone offset.
1221
1222
Returns:
1223
Timezone offset in seconds
1224
"""
1225
1226
@property
1227
def message(self) -> bytes:
1228
"""
1229
Tag annotation message.
1230
1231
Returns:
1232
Tag message as bytes
1233
"""
1234
1235
@message.setter
1236
def message(self, value: bytes) -> None:
1237
"""
1238
Set tag message.
1239
1240
Parameters:
1241
- value: Tag message as bytes
1242
"""
1243
1244
@property
1245
def signature(self) -> Optional[bytes]:
1246
"""
1247
Tag GPG signature.
1248
1249
Returns:
1250
GPG signature as bytes, or None if unsigned
1251
"""
1252
1253
@signature.setter
1254
def signature(self, value: Optional[bytes]) -> None:
1255
"""
1256
Set GPG signature.
1257
1258
Parameters:
1259
- value: GPG signature as bytes or None
1260
"""
1261
1262
def sign(self, keyid: Optional[str] = None) -> None:
1263
"""
1264
Sign tag with GPG key.
1265
1266
Parameters:
1267
- keyid: Optional GPG key ID to use for signing
1268
1269
Raises:
1270
ImportError: If gpg module not available
1271
"""
1272
1273
def verify(self, keyids: Optional[Iterable[str]] = None) -> None:
1274
"""
1275
Verify GPG signature on tag.
1276
1277
Parameters:
1278
- keyids: Optional list of trusted key IDs
1279
1280
Raises:
1281
gpg.errors.BadSignatures: If signature verification fails
1282
gpg.errors.MissingSignatures: If not signed by trusted key
1283
"""
1284
1285
def raw_without_sig(self) -> bytes:
1286
"""
1287
Get raw tag data without GPG signature.
1288
1289
Returns:
1290
Raw serialized tag data excluding signature
1291
"""
1292
```
1293
1294
### Utility Classes and Types
1295
1296
Supporting classes and types for object manipulation.
1297
1298
```python { .api }
1299
class TreeEntry(namedtuple("TreeEntry", ["path", "mode", "sha"])):
1300
"""
1301
Named tuple representing a Git tree entry.
1302
1303
Fields:
1304
- path: Entry name/path as bytes
1305
- mode: Unix file mode (int)
1306
- sha: SHA-1 hash as bytes
1307
"""
1308
1309
def in_path(self, path: bytes) -> 'TreeEntry':
1310
"""
1311
Return copy of entry with given path prepended.
1312
1313
Parameters:
1314
- path: Path prefix to prepend
1315
1316
Returns:
1317
New TreeEntry with combined path
1318
1319
Raises:
1320
TypeError: If path is not bytes
1321
"""
1322
1323
class FixedSha:
1324
"""
1325
SHA-1 object with predetermined fixed value.
1326
1327
Used for objects where SHA-1 is known in advance,
1328
avoiding need for recalculation during serialization.
1329
"""
1330
1331
def __init__(self, hexsha: Union[str, bytes]) -> None:
1332
"""
1333
Initialize with hex SHA-1 string.
1334
1335
Parameters:
1336
- hexsha: 40-character hex SHA-1 (str or bytes)
1337
1338
Raises:
1339
TypeError: If hexsha is not str or bytes
1340
"""
1341
1342
def digest(self) -> bytes:
1343
"""
1344
Get binary SHA-1 digest.
1345
1346
Returns:
1347
20-byte binary SHA-1
1348
"""
1349
1350
def hexdigest(self) -> str:
1351
"""
1352
Get hex SHA-1 digest.
1353
1354
Returns:
1355
40-character hex string
1356
"""
1357
```
1358
1359
1360
## Usage Examples
1361
1362
### Working with Blobs
1363
1364
```python
1365
from dulwich.objects import Blob
1366
1367
# Create blob from data
1368
blob = Blob(b"Hello, world!\n")
1369
print(f"Blob SHA-1: {blob.id.hex()}")
1370
1371
# Create blob from file
1372
blob = Blob.from_path("/path/to/file.txt")
1373
print(f"File size: {len(blob.data)} bytes")
1374
1375
# Process blob data
1376
lines = blob.splitlines()
1377
for i, line in enumerate(lines):
1378
print(f"Line {i}: {line.decode('utf-8', errors='replace')}")
1379
```
1380
1381
### Building Trees
1382
1383
```python
1384
from dulwich.objects import Tree, Blob
1385
1386
# Create tree with files
1387
tree = Tree()
1388
1389
# Add file blob
1390
file_blob = Blob(b"File content")
1391
tree.add(b"file.txt", 0o100644, file_blob.id)
1392
1393
# Add subdirectory
1394
subtree = Tree()
1395
subtree.add(b"subfile.txt", 0o100644, Blob(b"Subfile content").id)
1396
tree.add(b"subdir", 0o040000, subtree.id)
1397
1398
# Iterate tree entries
1399
for entry in tree.items():
1400
print(f"{entry.path.decode('utf-8')}: {entry.mode:o} {entry.sha.hex()}")
1401
```
1402
1403
### Creating Commits
1404
1405
```python
1406
from dulwich.objects import Commit, Tree, Blob
1407
import time
1408
1409
# Create commit
1410
commit = Commit()
1411
commit.tree = tree.id
1412
commit.author = b"John Doe <john@example.com>"
1413
commit.committer = b"John Doe <john@example.com>"
1414
commit.author_time = int(time.time())
1415
commit.commit_time = int(time.time())
1416
commit.author_timezone = 0
1417
commit.commit_timezone = 0
1418
commit.message = b"Initial commit\n"
1419
commit.parents = [] # No parents for initial commit
1420
1421
print(f"Commit SHA-1: {commit.id.hex()}")
1422
```
1423
1424
### Working with Tags
1425
1426
```python
1427
from dulwich.objects import Tag
1428
import time
1429
1430
# Create annotated tag
1431
tag = Tag()
1432
tag.object = (1, commit.id) # Tag a commit (type 1)
1433
tag.name = b"v1.0.0"
1434
tag.tagger = b"Release Manager <release@example.com>"
1435
tag.tag_time = int(time.time())
1436
tag.tag_timezone = 0
1437
tag.message = b"Version 1.0.0 release\n"
1438
1439
print(f"Tag SHA-1: {tag.id.hex()}")
1440
```
1441
1442
### Using Utility Functions
1443
1444
```python
1445
from dulwich.objects import (
1446
hex_to_sha, sha_to_hex, valid_hexsha,
1447
parse_timezone, format_timezone,
1448
is_commit, is_tree, is_blob, is_tag
1449
)
1450
1451
# Working with SHA-1 conversions
1452
hex_sha = b"1234567890abcdef1234567890abcdef12345678"
1453
binary_sha = hex_to_sha(hex_sha)
1454
back_to_hex = sha_to_hex(binary_sha)
1455
1456
# Validate SHA-1 strings
1457
if valid_hexsha("abc123def456"):
1458
print("Valid SHA-1")
1459
1460
# Parse timezone information
1461
timezone_str = b"+0530" # India Standard Time
1462
offset_seconds, is_negative_utc = parse_timezone(timezone_str)
1463
print(f"Offset: {offset_seconds} seconds, Negative UTC: {is_negative_utc}")
1464
1465
# Format timezone back
1466
formatted_tz = format_timezone(offset_seconds, is_negative_utc)
1467
1468
# Type checking with type guards
1469
obj = Blob(b"some data")
1470
if is_blob(obj):
1471
# obj is now typed as Blob
1472
print(f"Blob size: {len(obj.data)}")
1473
```
1474
1475
### Working with Tree Operations
1476
1477
```python
1478
from dulwich.objects import Tree, parse_tree, serialize_tree, sorted_tree_items
1479
1480
# Parse raw tree data
1481
tree_data = b"100644 file.txt\x00\x12\x34\x56..." # Raw tree bytes
1482
entries = list(parse_tree(tree_data))
1483
for name, mode, hexsha in entries:
1484
print(f"{name.decode('utf-8')}: mode {mode:o}, SHA {hexsha.hex()}")
1485
1486
# Work with tree entries in proper order
1487
tree = Tree()
1488
tree.add(b"file1.txt", 0o100644, b"abc123" * 20)
1489
tree.add(b"dir", 0o040000, b"def456" * 20)
1490
1491
# Get entries in Git tree order (directories sorted with trailing /)
1492
for entry in sorted_tree_items(tree._entries, name_order=False):
1493
print(f"{entry.path.decode('utf-8')}: {entry.mode:o}")
1494
1495
# Look up path in tree
1496
def lookup_object(sha):
1497
# Mock lookup function - in practice, use repo object store
1498
return Tree() if sha == b"def456" * 20 else Blob(b"content")
1499
1500
try:
1501
mode, sha = tree.lookup_path(lookup_object, b"dir/subfile.txt")
1502
print(f"Found object: mode {mode:o}, SHA {sha.hex()}")
1503
except SubmoduleEncountered as e:
1504
print(f"Encountered submodule at {e.path}")
1505
```
1506
1507
### Working with GPG Signatures
1508
1509
```python
1510
# Sign a commit (requires gpg module)
1511
try:
1512
commit.sign() # Uses default GPG key
1513
print("Commit signed successfully")
1514
1515
# Verify signature
1516
commit.verify()
1517
print("Signature verified")
1518
1519
# Get commit data without signature for external verification
1520
unsigned_data = commit.raw_without_sig()
1521
1522
except ImportError:
1523
print("GPG module not available")
1524
except Exception as e:
1525
print(f"GPG operation failed: {e}")
1526
1527
# Same operations work for tags
1528
try:
1529
tag.sign("specific-key-id")
1530
tag.verify(["trusted-key-1", "trusted-key-2"])
1531
except Exception as e:
1532
print(f"Tag signing/verification failed: {e}")
1533
```