0
# Repository Management
1
2
Core repository classes for opening, creating, and managing Git repositories. These classes provide direct access to repository internals including objects, references, and configuration.
3
4
## Capabilities
5
6
### Repository Classes
7
8
Main repository implementations supporting different storage backends and access patterns.
9
10
```python { .api }
11
class BaseRepo:
12
"""
13
Abstract base class for Git repositories.
14
15
Provides common interface for all repository implementations.
16
"""
17
18
def __init__(self, object_store: PackBasedObjectStore, refs: RefsContainer) -> None:
19
"""
20
Open a repository.
21
22
This shouldn't be called directly, but rather through one of the
23
base classes, such as MemoryRepo or Repo.
24
25
Parameters:
26
- object_store: Object store to use
27
- refs: Refs container to use
28
"""
29
30
def head(self) -> bytes:
31
"""
32
Get the SHA-1 of the current HEAD.
33
34
Returns:
35
20-byte SHA-1 hash of HEAD commit
36
"""
37
38
def get_refs(self) -> dict[bytes, bytes]:
39
"""
40
Get dictionary with all refs.
41
42
Returns:
43
A dict mapping ref names to SHA1s
44
"""
45
46
def get_config(self) -> Config:
47
"""
48
Get repository configuration.
49
50
Returns:
51
Config object for repository settings
52
"""
53
54
def close(self) -> None:
55
"""Close repository and release resources."""
56
57
def __enter__(self):
58
"""Context manager entry."""
59
return self
60
61
def __exit__(self, exc_type, exc_val, exc_tb):
62
"""Context manager exit."""
63
self.close()
64
65
def __getitem__(self, key: bytes) -> ShaFile:
66
"""
67
Retrieve object by SHA-1.
68
69
Parameters:
70
- key: 20-byte SHA-1 hash
71
72
Returns:
73
ShaFile object (Blob, Tree, Commit, or Tag)
74
"""
75
76
def __contains__(self, key: bytes) -> bool:
77
"""
78
Check if object exists in repository.
79
80
Parameters:
81
- key: 20-byte SHA-1 hash
82
83
Returns:
84
True if object exists
85
"""
86
87
def fetch(
88
self,
89
target,
90
determine_wants=None,
91
progress=None,
92
depth: Optional[int] = None
93
):
94
"""
95
Fetch objects into another repository.
96
97
Parameters:
98
- target: The target repository
99
- determine_wants: Optional function to determine what refs to fetch
100
- progress: Optional progress function
101
- depth: Optional shallow fetch depth
102
103
Returns:
104
The local refs
105
"""
106
107
def fetch_pack_data(
108
self,
109
determine_wants,
110
graph_walker,
111
progress,
112
*,
113
get_tagged=None,
114
depth: Optional[int] = None
115
):
116
"""
117
Fetch the pack data required for a set of revisions.
118
119
Parameters:
120
- determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch
121
- graph_walker: Object that can iterate over the list of revisions to fetch
122
- progress: Simple progress function that will be called with updated progress strings
123
- get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags
124
- depth: Shallow fetch depth
125
126
Returns:
127
count and iterator over pack data
128
"""
129
130
def find_missing_objects(
131
self,
132
determine_wants,
133
graph_walker,
134
progress,
135
*,
136
get_tagged=None,
137
depth: Optional[int] = None
138
) -> Optional[MissingObjectFinder]:
139
"""
140
Fetch the missing objects required for a set of revisions.
141
142
Parameters:
143
- determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch
144
- graph_walker: Object that can iterate over the list of revisions to fetch
145
- progress: Simple progress function that will be called with updated progress strings
146
- get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags
147
- depth: Shallow fetch depth
148
149
Returns:
150
iterator over objects, with __len__ implemented
151
"""
152
153
def generate_pack_data(
154
self,
155
have: list[ObjectID],
156
want: list[ObjectID],
157
progress: Optional[Callable[[str], None]] = None,
158
ofs_delta: Optional[bool] = None
159
):
160
"""
161
Generate pack data objects for a set of wants/haves.
162
163
Parameters:
164
- have: List of SHA1s of objects that should not be sent
165
- want: List of SHA1s of objects that should be sent
166
- ofs_delta: Whether OFS deltas can be included
167
- progress: Optional progress reporting method
168
"""
169
170
def get_graph_walker(
171
self,
172
heads: Optional[list[ObjectID]] = None
173
) -> ObjectStoreGraphWalker:
174
"""
175
Retrieve a graph walker.
176
177
A graph walker is used by a remote repository (or proxy)
178
to find out which objects are present in this repository.
179
180
Parameters:
181
- heads: Repository heads to use (optional)
182
183
Returns:
184
A graph walker object
185
"""
186
187
def get_parents(self, sha: bytes, commit: Optional[Commit] = None) -> list[bytes]:
188
"""
189
Retrieve the parents of a specific commit.
190
191
If the specific commit is a graftpoint, the graft parents
192
will be returned instead.
193
194
Parameters:
195
- sha: SHA of the commit for which to retrieve the parents
196
- commit: Optional commit matching the sha
197
198
Returns:
199
List of parents
200
"""
201
202
def parents_provider(self) -> ParentsProvider:
203
"""
204
Get parents provider for this repository.
205
206
Returns:
207
ParentsProvider object for resolving commit parents
208
"""
209
210
def get_worktree(self) -> "WorkTree":
211
"""
212
Get the working tree for this repository.
213
214
Returns:
215
WorkTree instance for performing working tree operations
216
217
Raises:
218
NotImplementedError: If the repository doesn't support working trees
219
"""
220
221
def get_rebase_state_manager(self):
222
"""
223
Get the appropriate rebase state manager for this repository.
224
225
Returns:
226
RebaseStateManager instance
227
"""
228
229
def get_blob_normalizer(self):
230
"""
231
Return a BlobNormalizer object for checkin/checkout operations.
232
233
Returns:
234
BlobNormalizer instance
235
"""
236
237
def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
238
"""
239
Read gitattributes for the repository.
240
241
Parameters:
242
- tree: Tree SHA to read .gitattributes from (defaults to HEAD)
243
244
Returns:
245
GitAttributes object that can be used to match paths
246
"""
247
248
def get_shallow(self) -> set[ObjectID]:
249
"""
250
Get the set of shallow commits.
251
252
Returns:
253
Set of shallow commits
254
"""
255
256
def update_shallow(self, new_shallow, new_unshallow) -> None:
257
"""
258
Update the list of shallow objects.
259
260
Parameters:
261
- new_shallow: Newly shallow objects
262
- new_unshallow: Newly no longer shallow objects
263
"""
264
265
def get_peeled(self, ref: Ref) -> ObjectID:
266
"""
267
Get the peeled value of a ref.
268
269
Parameters:
270
- ref: The refname to peel
271
272
Returns:
273
The fully-peeled SHA1 of a tag object, after peeling all
274
intermediate tags; if the original ref does not point to a tag,
275
this will equal the original SHA1
276
"""
277
278
@property
279
def notes(self) -> "Notes":
280
"""
281
Access notes functionality for this repository.
282
283
Returns:
284
Notes object for accessing notes
285
"""
286
287
def get_walker(self, include: Optional[list[bytes]] = None, **kwargs):
288
"""
289
Obtain a walker for this repository.
290
291
Parameters:
292
- include: Iterable of SHAs of commits to include along with their ancestors. Defaults to [HEAD]
293
294
Keyword Args:
295
- exclude: Iterable of SHAs of commits to exclude along with their ancestors, overriding includes
296
- order: ORDER_* constant specifying the order of results. Anything other than ORDER_DATE may result in O(n) memory usage
297
- reverse: If True, reverse the order of output, requiring O(n) memory
298
- max_entries: The maximum number of entries to yield, or None for no limit
299
- paths: Iterable of file or subtree paths to show entries for
300
- rename_detector: diff.RenameDetector object for detecting renames
301
- follow: If True, follow path across renames/copies. Forces a default rename_detector
302
- since: Timestamp to list commits after
303
- until: Timestamp to list commits before
304
- queue_cls: A class to use for a queue of commits, supporting the iterator protocol
305
306
Returns:
307
A Walker object
308
"""
309
310
class Repo(BaseRepo):
311
"""
312
Git repository on local filesystem.
313
314
Provides access to .git directory structure including objects,
315
refs, config, index, and working tree.
316
"""
317
318
def __init__(self, root: Union[str, bytes, os.PathLike] = ".", object_store: Optional[PackBasedObjectStore] = None, bare: Optional[bool] = None):
319
"""
320
Initialize repository object.
321
322
Parameters:
323
- root: Path to repository root (containing .git)
324
- object_store: ObjectStore to use; if omitted, we use the repository's default object store
325
- bare: True if this is a bare repository
326
"""
327
328
@property
329
def path(self) -> str:
330
"""
331
Repository root path.
332
333
Returns:
334
Absolute path to repository root
335
"""
336
337
@property
338
def controldir(self) -> str:
339
"""
340
Git control directory path.
341
342
Returns:
343
Path to .git directory
344
"""
345
346
@property
347
def commondir(self) -> str:
348
"""
349
Common Git directory for worktrees.
350
351
Returns:
352
Path to common .git directory
353
"""
354
355
def open_index(self) -> Index:
356
"""
357
Open repository index file.
358
359
Returns:
360
Index object for staging area
361
"""
362
363
def stage(self, fs_paths: List[str]) -> None:
364
"""
365
Stage files for commit.
366
367
Parameters:
368
- fs_paths: List of filesystem paths to stage
369
"""
370
371
def unstage(self, fs_paths: List[str]) -> None:
372
"""
373
Unstage files from index.
374
375
Parameters:
376
- fs_paths: List of filesystem paths to unstage
377
"""
378
379
def reset_index(self, tree_id: bytes = None) -> None:
380
"""
381
Reset index to match tree.
382
383
Parameters:
384
- tree_id: Tree SHA-1 (default: HEAD tree)
385
"""
386
387
@property
388
def worktrees(self) -> "WorkTreeContainer":
389
"""
390
Access worktrees container for this repository.
391
392
Returns:
393
WorkTreeContainer for managing linked worktrees
394
"""
395
396
@classmethod
397
def discover(cls, start="."):
398
"""
399
Iterate parent directories to discover a repository.
400
401
Return a Repo object for the first parent directory that looks like a
402
Git repository.
403
404
Parameters:
405
- start: The directory to start discovery from (defaults to '.')
406
"""
407
408
def clone(
409
self,
410
target_path,
411
*,
412
mkdir=True,
413
bare=False,
414
origin=b"origin",
415
checkout=None,
416
branch=None,
417
progress=None,
418
depth: Optional[int] = None,
419
symlinks=None,
420
) -> "Repo":
421
"""
422
Clone this repository.
423
424
Parameters:
425
- target_path: Target path
426
- mkdir: Create the target directory
427
- bare: Whether to create a bare repository
428
- checkout: Whether or not to check-out HEAD after cloning
429
- origin: Base name for refs in target repository cloned from this repository
430
- branch: Optional branch or tag to be used as HEAD in the new repository instead of this repository's HEAD
431
- progress: Optional progress function
432
- depth: Depth at which to fetch
433
- symlinks: Symlinks setting (default to autodetect)
434
435
Returns:
436
Created repository as Repo
437
"""
438
439
def read_reflog(self, ref):
440
"""
441
Read reflog entries for a reference.
442
443
Parameters:
444
- ref: Reference name (e.g. b'HEAD', b'refs/heads/master')
445
446
Yields:
447
reflog.Entry objects in chronological order (oldest first)
448
"""
449
450
@classmethod
451
def _init_new_working_directory(
452
cls,
453
path: Union[str, bytes, os.PathLike],
454
main_repo,
455
identifier=None,
456
mkdir=False,
457
):
458
"""
459
Create a new working directory linked to a repository.
460
461
Parameters:
462
- path: Path in which to create the working tree
463
- main_repo: Main repository to reference
464
- identifier: Worktree identifier
465
- mkdir: Whether to create the directory
466
467
Returns:
468
Repo instance
469
"""
470
471
def get_blob_normalizer(self):
472
"""
473
Return a BlobNormalizer object for checkin/checkout operations.
474
475
Returns:
476
BlobNormalizer instance
477
"""
478
479
def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
480
"""
481
Read gitattributes for the repository.
482
483
Parameters:
484
- tree: Tree SHA to read .gitattributes from (defaults to HEAD)
485
486
Returns:
487
GitAttributes object that can be used to match paths
488
"""
489
490
class MemoryRepo(BaseRepo):
491
"""
492
Git repository stored entirely in memory.
493
494
Useful for testing and temporary operations without
495
filesystem storage.
496
"""
497
498
def __init__(self):
499
"""Initialize empty in-memory repository."""
500
501
def set_description(self, description: bytes) -> None:
502
"""
503
Set repository description.
504
505
Parameters:
506
- description: Repository description text
507
"""
508
509
def get_description(self) -> bytes:
510
"""
511
Get repository description.
512
513
Returns:
514
Repository description text
515
"""
516
517
def do_commit(
518
self,
519
message: Optional[bytes] = None,
520
committer: Optional[bytes] = None,
521
author: Optional[bytes] = None,
522
commit_timestamp=None,
523
commit_timezone=None,
524
author_timestamp=None,
525
author_timezone=None,
526
tree: Optional[ObjectID] = None,
527
encoding: Optional[bytes] = None,
528
ref: Optional[Ref] = b"HEAD",
529
merge_heads: Optional[list[ObjectID]] = None,
530
no_verify: bool = False,
531
sign: bool = False,
532
):
533
"""
534
Create a new commit.
535
536
This is a simplified implementation for in-memory repositories that
537
doesn't support worktree operations or hooks.
538
539
Parameters:
540
- message: Commit message
541
- committer: Committer fullname
542
- author: Author fullname
543
- commit_timestamp: Commit timestamp (defaults to now)
544
- commit_timezone: Commit timestamp timezone (defaults to GMT)
545
- author_timestamp: Author timestamp (defaults to commit timestamp)
546
- author_timezone: Author timestamp timezone (defaults to commit timezone)
547
- tree: SHA1 of the tree root to use
548
- encoding: Encoding
549
- ref: Optional ref to commit to (defaults to current branch). If None, creates a dangling commit without updating any ref
550
- merge_heads: Merge heads
551
- no_verify: Skip pre-commit and commit-msg hooks (ignored for MemoryRepo)
552
- sign: GPG Sign the commit (ignored for MemoryRepo)
553
554
Returns:
555
New commit SHA1
556
"""
557
558
@classmethod
559
def init_bare(cls, objects, refs, format: Optional[int] = None):
560
"""
561
Create a new bare repository in memory.
562
563
Parameters:
564
- objects: Objects for the new repository, as iterable
565
- refs: Refs as dictionary, mapping names to object SHA1s
566
- format: Repository format version (defaults to 0)
567
"""
568
569
class BareRepo(Repo):
570
"""
571
Bare Git repository without working tree.
572
573
Contains only Git objects and metadata without
574
checked out files.
575
"""
576
577
def __init__(self, path: str):
578
"""
579
Initialize bare repository.
580
581
Parameters:
582
- path: Path to bare repository directory
583
"""
584
```
585
586
### Parents Provider
587
588
Helper class for resolving commit parents and ancestry relationships.
589
590
```python { .api }
591
class ParentsProvider:
592
"""
593
Provides parent commit information for merge operations.
594
595
Used by merge algorithms to traverse commit ancestry
596
and determine merge bases.
597
"""
598
599
def __init__(self, store, grafts={}, shallows=[]):
600
"""
601
Initialize parents provider.
602
603
Parameters:
604
- store: Object store containing commits
605
- grafts: Dictionary of commit grafts
606
- shallows: List of shallow commit SHAs
607
"""
608
609
def get_parents(self, commit_id, commit=None):
610
"""
611
Get parent commit IDs.
612
613
Parameters:
614
- commit_id: Commit SHA-1 hash
615
- commit: Optional commit object
616
617
Returns:
618
List of parent commit SHA-1 hashes
619
"""
620
```
621
622
### Exception Classes
623
624
Repository-specific exception classes for error handling.
625
626
```python { .api }
627
class InvalidUserIdentity(Exception):
628
"""
629
Raised when user identity is invalid for commit operations.
630
631
User identity must be in format "Name <email>".
632
"""
633
634
class DefaultIdentityNotFound(Exception):
635
"""
636
Raised when no default user identity found in configuration.
637
638
Occurs when attempting commit without author/committer
639
and no default configured.
640
"""
641
642
class UnsupportedVersion(Exception):
643
"""
644
Raised when repository version is not supported.
645
646
Occurs when opening repository with newer format version
647
than supported by current dulwich version.
648
"""
649
650
class UnsupportedExtension(Exception):
651
"""
652
Raised when repository uses unsupported extensions.
653
654
Git repositories can have extensions that modify behavior.
655
This is raised for unknown or unsupported extensions.
656
"""
657
```
658
659
## Usage Examples
660
661
### Basic Repository Operations
662
663
```python
664
from dulwich.repo import Repo, init_repo
665
from dulwich.errors import NotGitRepository
666
667
# Initialize new repository
668
repo = init_repo("/path/to/new/repo")
669
670
# Open existing repository
671
try:
672
repo = Repo("/path/to/existing/repo")
673
print(f"Repository at: {repo.path}")
674
print(f"Current HEAD: {repo.head().hex()}")
675
finally:
676
repo.close()
677
678
# Using context manager (recommended)
679
with Repo("/path/to/repo") as repo:
680
head_commit = repo[repo.head()]
681
print(f"Latest commit: {head_commit.message.decode('utf-8')}")
682
```
683
684
### Working with Objects
685
686
```python
687
with Repo("/path/to/repo") as repo:
688
# Get HEAD commit
689
head_sha = repo.head()
690
head_commit = repo[head_sha]
691
692
# Get commit tree
693
tree = repo[head_commit.tree]
694
695
# List tree contents
696
for entry in tree.items():
697
print(f"{entry.path.decode('utf-8')}: {entry.sha.hex()}")
698
699
# Check if object exists
700
if head_sha in repo:
701
print("HEAD commit exists in repository")
702
```
703
704
### Repository Information
705
706
```python
707
from dulwich.repo import find_repo_root, check_repo_exists
708
709
# Find repository root from current directory
710
try:
711
repo_root = find_repo_root()
712
print(f"Repository root: {repo_root}")
713
except NotGitRepository:
714
print("Not in a Git repository")
715
716
# Check if directory contains repository
717
if check_repo_exists("/some/path"):
718
print("Valid Git repository found")
719
```