or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

admin.mdbucket-operations.mdconfig-session.mddata-access.mdhooks.mdindex.mdpackage-management.mdregistry-operations.md

package-management.mddocs/

0

# Data Package Management

1

2

Core functionality for creating, building, installing, and managing data packages with versioning, metadata handling, and collaborative workflows.

3

4

## Capabilities

5

6

### Package Creation and Building

7

8

Create new packages and build them to registries with versioning and metadata.

9

10

```python { .api }

11

class Package:

12

def __init__(self):

13

"""Creates an empty package."""

14

15

def build(self, name: str, registry: str = None, message: str = None, *, workflow = ...) -> str:

16

"""

17

Serializes this package to a registry.

18

19

Parameters:

20

- name: Name of the package

21

- registry: Registry to build the package to (defaults to configured registry)

22

- message: Commit message for the build

23

- workflow: Workflow configuration for validation

24

25

Returns:

26

Top hash of the built package

27

"""

28

29

def set_dir(self, lkey: str, path: str = None, meta: dict = None, update_policy: str = "incoming", unversioned: bool = False):

30

"""

31

Adds all files from path to the package.

32

33

Parameters:

34

- lkey: Logical key prefix for the directory in the package

35

- path: Local directory path to add

36

- meta: Metadata to associate with the directory

37

- update_policy: How to handle conflicts ("incoming", "existing")

38

- unversioned: Whether to include unversioned files

39

"""

40

41

def set_meta(self, meta: dict):

42

"""

43

Sets user metadata on this Package.

44

45

Parameters:

46

- meta: Dictionary of metadata to set

47

"""

48

49

def set(self, logical_key: str, entry=None, meta: dict = None, serialization_location: str = None, serialization_format_opts: dict = None, unversioned: bool = False):

50

"""

51

Returns self with logical_key set to entry.

52

53

Parameters:

54

- logical_key: Logical key to set in the package

55

- entry: PackageEntry to set, or local path, or None

56

- meta: User metadata dict to attach to entry

57

- serialization_location: Where to serialize entry if it's an object

58

- serialization_format_opts: Options for serialization format

59

- unversioned: If True, don't include version ID in package

60

61

Returns:

62

Modified package

63

"""

64

65

@property

66

def meta(self) -> dict:

67

"""

68

Get user metadata for this package.

69

70

Returns:

71

Dictionary of user metadata

72

"""

73

74

def delete(self, logical_key: str):

75

"""

76

Returns self with logical_key removed.

77

78

Parameters:

79

- logical_key: Key to remove from package

80

81

Returns:

82

Modified package

83

"""

84

85

def push(self, name: str, registry: str = None, dest: str = None, message: str = None, selector_fn=None, *, workflow=..., force: bool = False, dedupe: bool = False):

86

"""

87

Copy objects from this package to a different location.

88

89

Parameters:

90

- name: Name for package in registry

91

- registry: Registry where package will be stored

92

- dest: Destination for package objects (S3 or local)

93

- message: Commit message for package

94

- selector_fn: Function to filter which entries to push

95

- workflow: Workflow configuration

96

- force: Force push even if conflicts exist

97

- dedupe: Skip uploading duplicate files

98

99

Returns:

100

New package containing copied objects

101

"""

102

```

103

104

### Package Installation and Browsing

105

106

Install and browse existing packages from registries.

107

108

```python { .api }

109

class Package:

110

@classmethod

111

def install(cls, name: str, registry: str = None, top_hash: str = None, dest: str = None, dest_registry: str = None, *, path: str = None):

112

"""

113

Install a package from a registry.

114

115

Parameters:

116

- name: Name of the package to install

117

- registry: Registry to install from (defaults to default remote registry)

118

- top_hash: Specific version hash to install (defaults to latest)

119

- dest: Local destination directory for downloaded files

120

- dest_registry: Registry to install to (defaults to local registry)

121

- path: If specified, downloads only this path or its children

122

123

Returns:

124

Installed Package object

125

"""

126

127

@classmethod

128

def browse(cls, name: str, registry: str = None, top_hash: str = None):

129

"""

130

Browse an existing package without installing.

131

132

Parameters:

133

- name: Name of the package to browse

134

- registry: Registry to browse from

135

- top_hash: Specific version hash to browse

136

137

Returns:

138

Package object for browsing

139

"""

140

141

@classmethod

142

def load(cls, readable_file):

143

"""

144

Load a package from a readable file-like object.

145

146

Parameters:

147

- readable_file: File-like object containing serialized package

148

149

Returns:

150

Package object loaded from file

151

"""

152

153

@classmethod

154

def resolve_hash(cls, name: str, registry: str, hash_prefix: str) -> str:

155

"""

156

Resolve a shortened hash to the full hash for the package.

157

158

Parameters:

159

- name: Name of the package

160

- registry: Registry containing the package

161

- hash_prefix: Shortened hash to resolve

162

163

Returns:

164

Full hash string

165

"""

166

167

@classmethod

168

def rollback(cls, name: str, registry: str, top_hash: str):

169

"""

170

Set the "latest" version of a package to the given hash.

171

172

Parameters:

173

- name: Name of the package

174

- registry: Registry containing the package

175

- top_hash: Hash to set as latest version

176

"""

177

```

178

179

### Package Navigation and Inspection

180

181

Navigate package contents and inspect metadata.

182

183

```python { .api }

184

class Package:

185

def __contains__(self, logical_key: str) -> bool:

186

"""

187

Checks whether the package contains a specified logical_key.

188

189

Parameters:

190

- logical_key: Key to check for

191

192

Returns:

193

True if key exists in package

194

"""

195

196

def __getitem__(self, logical_key: str):

197

"""

198

Filters the package based on prefix, and returns either a new Package

199

or a PackageEntry.

200

201

Parameters:

202

- logical_key: Key or prefix to retrieve

203

204

Returns:

205

PackageEntry for files, Package for directories

206

"""

207

208

def __iter__(self):

209

"""Iterator over package keys."""

210

211

def __len__(self) -> int:

212

"""Number of direct children in package."""

213

214

def keys(self) -> list:

215

"""

216

Returns logical keys in the package.

217

218

Returns:

219

List of logical keys

220

"""

221

222

def walk(self):

223

"""

224

Generator that traverses all entries in the package tree and returns tuples of (key, entry),

225

with keys in alphabetical order.

226

227

Yields:

228

Tuples of (logical_key, PackageEntry)

229

"""

230

231

def get(self, logical_key: str) -> str:

232

"""

233

Gets object from logical_key and returns its physical path.

234

Equivalent to self[logical_key].get().

235

236

Parameters:

237

- logical_key: Key to retrieve

238

239

Returns:

240

Physical path to the object

241

"""

242

243

@property

244

def readme(self):

245

"""

246

Returns the README PackageEntry if it exists.

247

248

Returns:

249

PackageEntry for README file or None

250

"""

251

```

252

253

### Package Analysis and Comparison

254

255

Compare packages and analyze their contents.

256

257

```python { .api }

258

class Package:

259

def diff(self, other_pkg) -> tuple:

260

"""

261

Returns three lists -- added, modified, deleted.

262

263

Parameters:

264

- other_pkg: Package to compare against

265

266

Returns:

267

Tuple of (added_keys, modified_keys, deleted_keys)

268

"""

269

270

def map(self, f, include_directories: bool = False):

271

"""

272

Performs a user-specified operation on each entry in the package.

273

274

Parameters:

275

- f: Function to apply to each entry

276

- include_directories: Whether to include directory metadata

277

278

Returns:

279

List of function results

280

"""

281

282

def filter(self, f, include_directories: bool = False):

283

"""

284

Applies a user-specified operation to each entry in the package,

285

removing results that evaluate to False from the output.

286

287

Parameters:

288

- f: Filter function returning boolean

289

- include_directories: Whether to include directory metadata

290

291

Returns:

292

New Package with filtered entries

293

"""

294

295

def verify(self, src: str, extra_files_ok: bool = False) -> bool:

296

"""

297

Check if the contents of the given directory matches the package manifest.

298

299

Parameters:

300

- src: Directory path to verify against

301

- extra_files_ok: Whether extra files in directory are acceptable

302

303

Returns:

304

True if directory matches package manifest

305

"""

306

```

307

308

### Package Serialization and Hashing

309

310

Serialize packages and work with package hashes.

311

312

```python { .api }

313

class Package:

314

def dump(self, writable_file):

315

"""

316

Serializes this package to a writable file-like object.

317

318

Parameters:

319

- writable_file: File-like object to write to

320

"""

321

322

def manifest(self):

323

"""

324

Provides a generator of the dicts that make up the serialized package.

325

326

Yields:

327

Dictionary entries representing package manifest

328

"""

329

330

@property

331

def top_hash(self) -> str:

332

"""

333

Returns the top hash of the package.

334

335

Returns:

336

SHA256 hash string identifying the package state

337

"""

338

339

def fetch(self, dest: str = './'):

340

"""

341

Copy all descendants to dest. Descendants are written under their logical

342

names relative to self.

343

344

Parameters:

345

- dest: Destination directory path

346

"""

347

```

348

349

### Package Selector Functions

350

351

Static methods for filtering package entries during operations.

352

353

```python { .api }

354

class Package:

355

@staticmethod

356

def selector_fn_copy_all(*args) -> bool:

357

"""

358

Selector function that includes all entries.

359

360

Returns:

361

Always True

362

"""

363

364

@staticmethod

365

def selector_fn_copy_local(logical_key: str, entry) -> bool:

366

"""

367

Selector function that includes only local entries.

368

369

Parameters:

370

- logical_key: Logical key of the entry

371

- entry: PackageEntry object

372

373

Returns:

374

True if entry is local, False otherwise

375

"""

376

```

377

378

## Usage Examples

379

380

### Basic Package Creation

381

382

```python

383

import quilt3

384

385

# Create a new package

386

pkg = quilt3.Package()

387

388

# Add a directory of files

389

pkg.set_dir("data/", "path/to/my/data/")

390

391

# Add metadata

392

pkg.set_meta({

393

"description": "My research dataset",

394

"version": "1.0.0",

395

"tags": ["research", "experiment"]

396

})

397

398

# Build and save to registry

399

top_hash = pkg.build("my-username/my-dataset", message="Initial dataset version")

400

print(f"Package built with hash: {top_hash}")

401

```

402

403

### Package Installation and Browsing

404

405

```python

406

# Browse an existing package

407

pkg = quilt3.Package.browse("my-username/my-dataset")

408

409

# Check package contents

410

print("Package contents:")

411

for key in pkg.keys():

412

print(f" {key}")

413

414

# Install to local directory

415

quilt3.Package.install("my-username/my-dataset", dest="./my-data/")

416

417

# Install specific version

418

quilt3.Package.install("my-username/my-dataset",

419

top_hash="abc123...",

420

dest="./my-data-v1/")

421

```

422

423

### Package Comparison and Analysis

424

425

```python

426

# Compare two package versions

427

pkg1 = quilt3.Package.browse("my-username/my-dataset", top_hash="version1_hash")

428

pkg2 = quilt3.Package.browse("my-username/my-dataset", top_hash="version2_hash")

429

430

added, modified, deleted = pkg1.diff(pkg2)

431

print(f"Changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")

432

433

# Filter package entries

434

large_files = pkg.filter(lambda lk, entry: entry.size > 1000000)

435

print(f"Found {len(large_files)} files larger than 1MB")

436

```