0
# Data Package Management
1
2
Core functionality for creating, building, installing, and managing data packages with versioning, metadata handling, and collaborative workflows.
3
4
## Capabilities
5
6
### Package Creation and Building
7
8
Create new packages and build them to registries with versioning and metadata.
9
10
```python { .api }
11
class Package:
12
def __init__(self):
13
"""Creates an empty package."""
14
15
def build(self, name: str, registry: str = None, message: str = None, *, workflow = ...) -> str:
16
"""
17
Serializes this package to a registry.
18
19
Parameters:
20
- name: Name of the package
21
- registry: Registry to build the package to (defaults to configured registry)
22
- message: Commit message for the build
23
- workflow: Workflow configuration for validation
24
25
Returns:
26
Top hash of the built package
27
"""
28
29
def set_dir(self, lkey: str, path: str = None, meta: dict = None, update_policy: str = "incoming", unversioned: bool = False):
30
"""
31
Adds all files from path to the package.
32
33
Parameters:
34
- lkey: Logical key prefix for the directory in the package
35
- path: Local directory path to add
36
- meta: Metadata to associate with the directory
37
- update_policy: How to handle conflicts ("incoming", "existing")
38
- unversioned: Whether to include unversioned files
39
"""
40
41
def set_meta(self, meta: dict):
42
"""
43
Sets user metadata on this Package.
44
45
Parameters:
46
- meta: Dictionary of metadata to set
47
"""
48
49
def set(self, logical_key: str, entry=None, meta: dict = None, serialization_location: str = None, serialization_format_opts: dict = None, unversioned: bool = False):
50
"""
51
Returns self with logical_key set to entry.
52
53
Parameters:
54
- logical_key: Logical key to set in the package
55
- entry: PackageEntry to set, or local path, or None
56
- meta: User metadata dict to attach to entry
57
- serialization_location: Where to serialize entry if it's an object
58
- serialization_format_opts: Options for serialization format
59
- unversioned: If True, don't include version ID in package
60
61
Returns:
62
Modified package
63
"""
64
65
@property
66
def meta(self) -> dict:
67
"""
68
Get user metadata for this package.
69
70
Returns:
71
Dictionary of user metadata
72
"""
73
74
def delete(self, logical_key: str):
75
"""
76
Returns self with logical_key removed.
77
78
Parameters:
79
- logical_key: Key to remove from package
80
81
Returns:
82
Modified package
83
"""
84
85
def push(self, name: str, registry: str = None, dest: str = None, message: str = None, selector_fn=None, *, workflow=..., force: bool = False, dedupe: bool = False):
86
"""
87
Copy objects from this package to a different location.
88
89
Parameters:
90
- name: Name for package in registry
91
- registry: Registry where package will be stored
92
- dest: Destination for package objects (S3 or local)
93
- message: Commit message for package
94
- selector_fn: Function to filter which entries to push
95
- workflow: Workflow configuration
96
- force: Force push even if conflicts exist
97
- dedupe: Skip uploading duplicate files
98
99
Returns:
100
New package containing copied objects
101
"""
102
```
103
104
### Package Installation and Browsing
105
106
Install and browse existing packages from registries.
107
108
```python { .api }
109
class Package:
110
@classmethod
111
def install(cls, name: str, registry: str = None, top_hash: str = None, dest: str = None, dest_registry: str = None, *, path: str = None):
112
"""
113
Install a package from a registry.
114
115
Parameters:
116
- name: Name of the package to install
117
- registry: Registry to install from (defaults to default remote registry)
118
- top_hash: Specific version hash to install (defaults to latest)
119
- dest: Local destination directory for downloaded files
120
- dest_registry: Registry to install to (defaults to local registry)
121
- path: If specified, downloads only this path or its children
122
123
Returns:
124
Installed Package object
125
"""
126
127
@classmethod
128
def browse(cls, name: str, registry: str = None, top_hash: str = None):
129
"""
130
Browse an existing package without installing.
131
132
Parameters:
133
- name: Name of the package to browse
134
- registry: Registry to browse from
135
- top_hash: Specific version hash to browse
136
137
Returns:
138
Package object for browsing
139
"""
140
141
@classmethod
142
def load(cls, readable_file):
143
"""
144
Load a package from a readable file-like object.
145
146
Parameters:
147
- readable_file: File-like object containing serialized package
148
149
Returns:
150
Package object loaded from file
151
"""
152
153
@classmethod
154
def resolve_hash(cls, name: str, registry: str, hash_prefix: str) -> str:
155
"""
156
Resolve a shortened hash to the full hash for the package.
157
158
Parameters:
159
- name: Name of the package
160
- registry: Registry containing the package
161
- hash_prefix: Shortened hash to resolve
162
163
Returns:
164
Full hash string
165
"""
166
167
@classmethod
168
def rollback(cls, name: str, registry: str, top_hash: str):
169
"""
170
Set the "latest" version of a package to the given hash.
171
172
Parameters:
173
- name: Name of the package
174
- registry: Registry containing the package
175
- top_hash: Hash to set as latest version
176
"""
177
```
178
179
### Package Navigation and Inspection
180
181
Navigate package contents and inspect metadata.
182
183
```python { .api }
184
class Package:
185
def __contains__(self, logical_key: str) -> bool:
186
"""
187
Checks whether the package contains a specified logical_key.
188
189
Parameters:
190
- logical_key: Key to check for
191
192
Returns:
193
True if key exists in package
194
"""
195
196
def __getitem__(self, logical_key: str):
197
"""
198
Filters the package based on prefix, and returns either a new Package
199
or a PackageEntry.
200
201
Parameters:
202
- logical_key: Key or prefix to retrieve
203
204
Returns:
205
PackageEntry for files, Package for directories
206
"""
207
208
def __iter__(self):
209
"""Iterator over package keys."""
210
211
def __len__(self) -> int:
212
"""Number of direct children in package."""
213
214
def keys(self) -> list:
215
"""
216
Returns logical keys in the package.
217
218
Returns:
219
List of logical keys
220
"""
221
222
def walk(self):
223
"""
224
Generator that traverses all entries in the package tree and returns tuples of (key, entry),
225
with keys in alphabetical order.
226
227
Yields:
228
Tuples of (logical_key, PackageEntry)
229
"""
230
231
def get(self, logical_key: str) -> str:
232
"""
233
Gets object from logical_key and returns its physical path.
234
Equivalent to self[logical_key].get().
235
236
Parameters:
237
- logical_key: Key to retrieve
238
239
Returns:
240
Physical path to the object
241
"""
242
243
@property
244
def readme(self):
245
"""
246
Returns the README PackageEntry if it exists.
247
248
Returns:
249
PackageEntry for README file or None
250
"""
251
```
252
253
### Package Analysis and Comparison
254
255
Compare packages and analyze their contents.
256
257
```python { .api }
258
class Package:
259
def diff(self, other_pkg) -> tuple:
260
"""
261
Returns three lists -- added, modified, deleted.
262
263
Parameters:
264
- other_pkg: Package to compare against
265
266
Returns:
267
Tuple of (added_keys, modified_keys, deleted_keys)
268
"""
269
270
def map(self, f, include_directories: bool = False):
271
"""
272
Performs a user-specified operation on each entry in the package.
273
274
Parameters:
275
- f: Function to apply to each entry
276
- include_directories: Whether to include directory metadata
277
278
Returns:
279
List of function results
280
"""
281
282
def filter(self, f, include_directories: bool = False):
283
"""
284
Applies a user-specified operation to each entry in the package,
285
removing results that evaluate to False from the output.
286
287
Parameters:
288
- f: Filter function returning boolean
289
- include_directories: Whether to include directory metadata
290
291
Returns:
292
New Package with filtered entries
293
"""
294
295
def verify(self, src: str, extra_files_ok: bool = False) -> bool:
296
"""
297
Check if the contents of the given directory matches the package manifest.
298
299
Parameters:
300
- src: Directory path to verify against
301
- extra_files_ok: Whether extra files in directory are acceptable
302
303
Returns:
304
True if directory matches package manifest
305
"""
306
```
307
308
### Package Serialization and Hashing
309
310
Serialize packages and work with package hashes.
311
312
```python { .api }
313
class Package:
314
def dump(self, writable_file):
315
"""
316
Serializes this package to a writable file-like object.
317
318
Parameters:
319
- writable_file: File-like object to write to
320
"""
321
322
def manifest(self):
323
"""
324
Provides a generator of the dicts that make up the serialized package.
325
326
Yields:
327
Dictionary entries representing package manifest
328
"""
329
330
@property
331
def top_hash(self) -> str:
332
"""
333
Returns the top hash of the package.
334
335
Returns:
336
SHA256 hash string identifying the package state
337
"""
338
339
def fetch(self, dest: str = './'):
340
"""
341
Copy all descendants to dest. Descendants are written under their logical
342
names relative to self.
343
344
Parameters:
345
- dest: Destination directory path
346
"""
347
```
348
349
### Package Selector Functions
350
351
Static methods for filtering package entries during operations.
352
353
```python { .api }
354
class Package:
355
@staticmethod
356
def selector_fn_copy_all(*args) -> bool:
357
"""
358
Selector function that includes all entries.
359
360
Returns:
361
Always True
362
"""
363
364
@staticmethod
365
def selector_fn_copy_local(logical_key: str, entry) -> bool:
366
"""
367
Selector function that includes only local entries.
368
369
Parameters:
370
- logical_key: Logical key of the entry
371
- entry: PackageEntry object
372
373
Returns:
374
True if entry is local, False otherwise
375
"""
376
```
377
378
## Usage Examples
379
380
### Basic Package Creation
381
382
```python
383
import quilt3
384
385
# Create a new package
386
pkg = quilt3.Package()
387
388
# Add a directory of files
389
pkg.set_dir("data/", "path/to/my/data/")
390
391
# Add metadata
392
pkg.set_meta({
393
"description": "My research dataset",
394
"version": "1.0.0",
395
"tags": ["research", "experiment"]
396
})
397
398
# Build and save to registry
399
top_hash = pkg.build("my-username/my-dataset", message="Initial dataset version")
400
print(f"Package built with hash: {top_hash}")
401
```
402
403
### Package Installation and Browsing
404
405
```python
406
# Browse an existing package
407
pkg = quilt3.Package.browse("my-username/my-dataset")
408
409
# Check package contents
410
print("Package contents:")
411
for key in pkg.keys():
412
print(f" {key}")
413
414
# Install to local directory
415
quilt3.Package.install("my-username/my-dataset", dest="./my-data/")
416
417
# Install specific version
418
quilt3.Package.install("my-username/my-dataset",
419
top_hash="abc123...",
420
dest="./my-data-v1/")
421
```
422
423
### Package Comparison and Analysis
424
425
```python
426
# Compare two package versions
427
pkg1 = quilt3.Package.browse("my-username/my-dataset", top_hash="version1_hash")
428
pkg2 = quilt3.Package.browse("my-username/my-dataset", top_hash="version2_hash")
429
430
added, modified, deleted = pkg1.diff(pkg2)
431
print(f"Changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")
432
433
# Filter package entries
434
large_files = pkg.filter(lambda lk, entry: entry.size > 1000000)
435
print(f"Found {len(large_files)} files larger than 1MB")
436
```