0
# Package Registry Operations
1
2
Functions for working with package registries, including listing packages, searching, copying data, and package deletion.
3
4
## Type Imports
5
6
```python { .api }
7
from typing import Union
8
```
9
10
## Capabilities
11
12
### Package Listing
13
14
List packages and package versions from registries.
15
16
```python { .api }
17
def list_packages(registry: str = None) -> list:
18
"""
19
Lists Packages in the registry.
20
21
Returns an iterable of all named packages in a registry.
22
If the registry is None, default to the local registry.
23
24
Parameters:
25
- registry: Location of registry to load package from
26
27
Returns:
28
An iterable of strings containing the names of the packages
29
"""
30
31
def list_package_versions(name: str, registry: str = None) -> list:
32
"""
33
Lists versions of a given package.
34
35
Returns an iterable of (latest_or_unix_ts, hash) of package revisions.
36
If the registry is None, default to the local registry.
37
38
Parameters:
39
- name: Name of the package
40
- registry: Location of registry to load package from
41
42
Returns:
43
An iterable of tuples containing the version and hash for the package
44
45
Raises:
46
QuiltException: If package name is invalid
47
"""
48
```
49
50
### Package Search
51
52
Search for packages and data across registries.
53
54
```python { .api }
55
def search(query: Union[str, dict], limit: int = 10) -> list:
56
"""
57
Execute a search against the configured search endpoint.
58
59
Parameters:
60
- query: Query string to query if passed as str, DSL query body if passed as dict
61
- limit: Maximum number of results to return. Defaults to 10
62
63
Returns:
64
List of search results
65
66
Query Syntax:
67
- String queries use Query String Query syntax
68
- Dict queries use Query DSL syntax
69
- Both follow Elasticsearch patterns
70
71
Note:
72
Forces a call to configure_from_default if no config exists
73
"""
74
```
75
76
### Data Copying
77
78
Copy data between different storage locations.
79
80
```python { .api }
81
def copy(src: str, dest: str):
82
"""
83
Copies src object from QUILT to dest.
84
85
Either of src and dest may be S3 paths (starting with s3://)
86
or local file paths (starting with file:///).
87
88
Parameters:
89
- src: A path to retrieve
90
- dest: A path to write to
91
92
Examples:
93
- copy('s3://bucket/file.csv', 'file:///local/path/file.csv')
94
- copy('file:///local/file.csv', 's3://bucket/uploaded.csv')
95
- copy('s3://source-bucket/data.json', 's3://dest-bucket/data.json')
96
"""
97
```
98
99
### Package Deletion
100
101
Delete packages or package versions from registries.
102
103
```python { .api }
104
def delete_package(name: str, registry: str = None, top_hash: str = None):
105
"""
106
Delete a package. Deletes only the manifest entries and not the underlying files.
107
108
Parameters:
109
- name: Name of the package
110
- registry: The registry the package will be removed from
111
- top_hash: Optional. A package hash to delete, instead of the whole package
112
113
Note:
114
- If top_hash is None, deletes the entire package
115
- If top_hash is provided, deletes only that specific version
116
- Underlying data files are not deleted, only package manifests
117
118
Raises:
119
QuiltException: If package name is invalid
120
"""
121
```
122
123
## Usage Examples
124
125
### Package Discovery
126
127
```python
128
import quilt3
129
130
# List all packages in default registry
131
packages = quilt3.list_packages()
132
print(f"Found {len(packages)} packages:")
133
for package in packages:
134
print(f" {package}")
135
136
# List packages in specific registry
137
remote_packages = quilt3.list_packages("s3://my-registry-bucket")
138
print(f"Remote packages: {len(remote_packages)}")
139
140
# List versions of a specific package
141
versions = quilt3.list_package_versions("my-username/my-dataset")
142
print("Package versions:")
143
for timestamp, hash_val in versions:
144
print(f" {timestamp}: {hash_val[:8]}...")
145
```
146
147
### Package Search
148
149
```python
150
import quilt3
151
152
# Simple text search
153
results = quilt3.search("machine learning dataset", limit=20)
154
print(f"Found {len(results)} results")
155
156
for result in results:
157
hit = result['_source']
158
print(f"Package: {hit.get('name', 'Unknown')}")
159
print(f"Description: {hit.get('description', 'No description')}")
160
print("---")
161
162
# Advanced search with Query DSL
163
advanced_query = {
164
"query": {
165
"bool": {
166
"must": [
167
{"match": {"description": "experiment"}},
168
{"term": {"file_extensions": "csv"}}
169
],
170
"filter": [
171
{"range": {"size": {"gte": 1000000}}}
172
]
173
}
174
},
175
"sort": [{"modified": {"order": "desc"}}]
176
}
177
178
advanced_results = quilt3.search(advanced_query, limit=10)
179
print(f"Found {len(advanced_results)} large CSV experiment datasets")
180
```
181
182
### Data Transfer Operations
183
184
```python
185
import quilt3
186
187
# Copy from S3 to local
188
quilt3.copy(
189
src="s3://source-bucket/data/measurements.csv",
190
dest="file:///tmp/local_measurements.csv"
191
)
192
193
# Copy from local to S3
194
quilt3.copy(
195
src="file:///home/user/processed_data.json",
196
dest="s3://dest-bucket/processed/data.json"
197
)
198
199
# Copy between S3 buckets
200
quilt3.copy(
201
src="s3://source-bucket/raw/dataset.parquet",
202
dest="s3://backup-bucket/archives/dataset_backup.parquet"
203
)
204
205
# Batch copy operations
206
files_to_copy = [
207
("s3://source/file1.csv", "file:///local/file1.csv"),
208
("s3://source/file2.json", "file:///local/file2.json"),
209
("s3://source/file3.parquet", "file:///local/file3.parquet")
210
]
211
212
for src, dest in files_to_copy:
213
print(f"Copying {src} to {dest}")
214
quilt3.copy(src, dest)
215
print("✓ Complete")
216
```
217
218
### Package Management
219
220
```python
221
import quilt3
222
223
# Get package information before deletion
224
package_name = "my-username/old-dataset"
225
versions = quilt3.list_package_versions(package_name)
226
print(f"Package {package_name} has {len(versions)} versions")
227
228
# Delete specific version
229
specific_hash = "abc123def456..."
230
quilt3.delete_package(package_name, top_hash=specific_hash)
231
print(f"Deleted version {specific_hash[:8]}...")
232
233
# Delete entire package (all versions)
234
# WARNING: This removes all versions!
235
confirm = input(f"Delete entire package {package_name}? (yes/no): ")
236
if confirm.lower() == 'yes':
237
quilt3.delete_package(package_name)
238
print(f"Deleted all versions of {package_name}")
239
else:
240
print("Package deletion cancelled")
241
242
# Verify deletion
243
try:
244
remaining_versions = quilt3.list_package_versions(package_name)
245
print(f"Remaining versions: {len(remaining_versions)}")
246
except Exception as e:
247
print(f"Package no longer exists: {e}")
248
```
249
250
### Registry Management
251
252
```python
253
import quilt3
254
255
def audit_registry(registry_url=None):
256
"""Audit a registry for packages and their sizes"""
257
258
packages = quilt3.list_packages(registry_url)
259
print(f"Registry audit for: {registry_url or 'default'}")
260
print(f"Total packages: {len(packages)}")
261
262
package_stats = []
263
264
for package in packages:
265
try:
266
versions = quilt3.list_package_versions(package, registry_url)
267
package_stats.append({
268
'name': package,
269
'version_count': len(versions),
270
'latest_hash': versions[0][1] if versions else None
271
})
272
except Exception as e:
273
print(f"Error processing {package}: {e}")
274
275
# Sort by version count
276
package_stats.sort(key=lambda x: x['version_count'], reverse=True)
277
278
print("\nTop packages by version count:")
279
for stats in package_stats[:10]:
280
print(f" {stats['name']}: {stats['version_count']} versions")
281
282
return package_stats
283
284
# Run registry audit
285
stats = audit_registry()
286
```
287
288
### Search and Discovery Workflows
289
290
```python
291
import quilt3
292
293
def find_recent_packages(days=7):
294
"""Find packages modified in the last N days"""
295
296
from datetime import datetime, timedelta
297
298
cutoff_date = datetime.now() - timedelta(days=days)
299
cutoff_timestamp = cutoff_date.timestamp()
300
301
query = {
302
"query": {
303
"range": {
304
"last_modified": {
305
"gte": cutoff_timestamp
306
}
307
}
308
},
309
"sort": [{"last_modified": {"order": "desc"}}]
310
}
311
312
results = quilt3.search(query, limit=50)
313
314
print(f"Packages modified in last {days} days:")
315
for result in results:
316
source = result['_source']
317
name = source.get('name', 'Unknown')
318
modified = source.get('last_modified', 0)
319
mod_date = datetime.fromtimestamp(modified).strftime('%Y-%m-%d %H:%M')
320
print(f" {name} - modified {mod_date}")
321
322
return results
323
324
def search_by_file_type(extension, limit=20):
325
"""Search for packages containing files with specific extension"""
326
327
query = {
328
"query": {
329
"term": {
330
"file_extensions": extension.lower().replace('.', '')
331
}
332
}
333
}
334
335
results = quilt3.search(query, limit=limit)
336
337
print(f"Packages containing .{extension} files:")
338
for result in results:
339
source = result['_source']
340
name = source.get('name', 'Unknown')
341
file_count = source.get('file_count', 0)
342
print(f" {name} ({file_count} files)")
343
344
return results
345
346
# Use search functions
347
recent = find_recent_packages(30)
348
csv_packages = search_by_file_type('csv', 15)
349
parquet_packages = search_by_file_type('parquet', 10)
350
```
351
352
### Error Handling
353
354
```python
355
import quilt3
356
from quilt3.util import QuiltException
357
358
def safe_registry_operation(operation_name, operation_func):
359
"""Safely execute registry operations with error handling"""
360
361
try:
362
result = operation_func()
363
print(f"✓ {operation_name} completed successfully")
364
return result
365
366
except QuiltException as e:
367
print(f"✗ {operation_name} failed (Quilt error): {e}")
368
return None
369
370
except Exception as e:
371
print(f"✗ {operation_name} failed (unexpected error): {e}")
372
return None
373
374
# Safe operations
375
packages = safe_registry_operation(
376
"List packages",
377
lambda: quilt3.list_packages()
378
)
379
380
if packages:
381
print(f"Found {len(packages)} packages")
382
383
# Safe search with validation
384
def safe_search(query, limit=10):
385
if isinstance(query, str) and len(query.strip()) == 0:
386
print("Error: Empty search query")
387
return []
388
389
return quilt3.search(query, limit)
390
391
results = safe_registry_operation(
392
"Search packages",
393
lambda: safe_search("experiment data", 20)
394
)
395
```