Tessl Tile for pypi/quilt3@7.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

admin.md bucket-operations.md config-session.md data-access.md hooks.md index.md package-management.md registry-operations.md

data-access.mddocs/

0
# Package Data Access
1

2
Methods for accessing, deserializing, and working with data files within packages. Supports various data formats with caching and optimization features.
3

4
## Capabilities
5

6
### File Access and Retrieval
7

8
Access files within packages and retrieve their physical locations.
9

10
```python { .api }
11
class PackageEntry:
12
    def get(self) -> str:
13
        """
14
        Returns the physical key of this PackageEntry.
15

16
        Returns:
17
        Physical path or S3 URI to the file
18
        """
19

20
    def get_cached_path(self) -> str:
21
        """
22
        Returns a locally cached physical key, if available.
23

24
        Returns:
25
        Local file path if cached, otherwise None
26
        """
27

28
    def fetch(self, dest: str = None):
29
        """
30
        Gets objects from entry and saves them to dest.
31

32
        Parameters:
33
        - dest: Destination path for the downloaded file
34

35
        Returns:
36
        Path to the fetched file
37
        """
38
```
39

40
### Binary Data Access
41

42
Retrieve raw bytes from package entries with caching support.
43

44
```python { .api }
45
class PackageEntry:
46
    def get_bytes(self, use_cache_if_available: bool = True) -> bytes:
47
        """
48
        Returns the bytes of the object this entry corresponds to.
49

50
        Parameters:
51
        - use_cache_if_available: Whether to use cached version if available
52

53
        Returns:
54
        Raw bytes of the file contents
55
        """
56
```
57

58
### Structured Data Access
59

60
Access structured data formats like JSON and text files.
61

62
```python { .api }
63
class PackageEntry:
64
    def get_as_json(self, use_cache_if_available: bool = True) -> dict:
65
        """
66
        Returns a JSON file as a dict. Assumes that the file is encoded using utf-8.
67

68
        Parameters:
69
        - use_cache_if_available: Whether to use cached version if available
70

71
        Returns:
72
        Parsed JSON data as a dictionary
73

74
        Raises:
75
        JSONDecodeError if file is not valid JSON
76
        """
77

78
    def get_as_string(self, use_cache_if_available: bool = True) -> str:
79
        """
80
        Return the object as a string. Assumes that the file is encoded using utf-8.
81

82
        Parameters:
83
        - use_cache_if_available: Whether to use cached version if available
84

85
        Returns:
86
        File contents as a UTF-8 decoded string
87
        """
88
```
89

90
### Data Deserialization
91

92
Deserialize files using format-specific handlers and custom functions.
93

94
```python { .api }
95
class PackageEntry:
96
    def deserialize(self, func=None, **format_opts):
97
        """
98
        Returns the object this entry corresponds to.
99

100
        Parameters:
101
        - func: Custom deserialization function
102
        - **format_opts: Format-specific options
103

104
        Returns:
105
        Deserialized data object (format depends on file type and func)
106

107
        Supported formats:
108
        - CSV: Returns pandas DataFrame (requires pandas)
109
        - Parquet: Returns pandas DataFrame (requires pandas, pyarrow)
110
        - JSON: Returns parsed JSON object
111
        - Custom: Uses provided func parameter
112
        """
113

114
    def __call__(self, func=None, **kwargs):
115
        """
116
        Shorthand for self.deserialize()
117

118
        Parameters:
119
        - func: Custom deserialization function
120
        - **kwargs: Passed to deserialize method
121

122
        Returns:
123
        Deserialized data object
124
        """
125
```
126

127
### Entry Metadata and Properties
128

129
Access and modify entry metadata and properties.
130

131
```python { .api }
132
class PackageEntry:
133
    @property
134
    def meta(self) -> dict:
135
        """
136
        Get user metadata for this entry.
137

138
        Returns:
139
        Dictionary of user metadata
140
        """
141

142
    def set_meta(self, meta: dict):
143
        """
144
        Sets the user_meta for this PackageEntry.
145

146
        Parameters:
147
        - meta: Dictionary of metadata to set
148
        """
149

150
    def set(self, path: str = None, meta: dict = None):
151
        """
152
        Returns self with the physical key set to path.
153

154
        Parameters:
155
        - path: New physical path for the entry
156
        - meta: New metadata for the entry
157

158
        Returns:
159
        New PackageEntry with updated properties
160
        """
161

162
    @property
163
    def size(self) -> int:
164
        """Size of the entry in bytes."""
165

166
    @property
167
    def hash(self) -> dict:
168
        """Hash information for the entry."""
169

170
    def as_dict(self) -> dict:
171
        """
172
        Returns dict representation of entry.
173

174
        Returns:
175
        Dictionary containing entry metadata and properties
176
        """
177

178
    def with_physical_key(self, key):
179
        """
180
        Returns a new PackageEntry with a different physical key.
181

182
        Parameters:
183
        - key: New PhysicalKey for the entry
184

185
        Returns:
186
        New PackageEntry with updated physical key
187
        """
188
```
189

190
### Entry Representation and Equality
191

192
String representation and equality comparison for entries.
193

194
```python { .api }
195
class PackageEntry:
196
    def __repr__(self) -> str:
197
        """String representation of the PackageEntry."""
198

199
    def __eq__(self, other) -> bool:
200
        """
201
        Equality comparison between PackageEntry objects.
202

203
        Parameters:
204
        - other: Another PackageEntry to compare with
205

206
        Returns:
207
        True if entries are equivalent (same size and hash)
208
        """
209
```
210

211
## Usage Examples
212

213
### Basic File Access
214

215
```python
216
import quilt3
217

218
# Browse a package
219
pkg = quilt3.Package.browse("my-username/my-dataset")
220

221
# Get a specific file entry
222
data_file = pkg["data/measurements.csv"]
223

224
# Get the physical location
225
file_path = data_file.get()
226
print(f"File location: {file_path}")
227

228
# Download file locally
229
local_path = data_file.fetch("./downloaded_measurements.csv")
230
print(f"Downloaded to: {local_path}")
231
```
232

233
### Working with Different Data Formats
234

235
```python
236
# JSON data access
237
config_entry = pkg["config/settings.json"]
238
config_data = config_entry.get_as_json()
239
print(f"Configuration: {config_data}")
240

241
# Text file access
242
readme_entry = pkg["README.txt"]
243
readme_content = readme_entry.get_as_string()
244
print(readme_content)
245

246
# Binary data access
247
image_entry = pkg["images/photo.jpg"]
248
image_bytes = image_entry.get_bytes()
249
print(f"Image size: {len(image_bytes)} bytes")
250
```
251

252
### Data Deserialization with pandas
253

254
```python
255
# Deserialize CSV to pandas DataFrame (requires pandas)
256
csv_entry = pkg["data/measurements.csv"]
257
df = csv_entry.deserialize()  # Automatically detects CSV format
258
print(df.head())
259

260
# Deserialize Parquet file (requires pandas and pyarrow)
261
parquet_entry = pkg["data/results.parquet"]
262
df = parquet_entry.deserialize()
263
print(f"DataFrame shape: {df.shape}")
264

265
# Custom deserialization function
266
def load_custom_format(file_path):
267
    # Custom loading logic
268
    return {"loaded_from": file_path}
269

270
custom_entry = pkg["data/custom.dat"]
271
custom_data = custom_entry.deserialize(func=load_custom_format)
272
print(custom_data)
273
```
274

275
### Entry Metadata Management
276

277
```python
278
# Access entry metadata
279
data_entry = pkg["data/experiment_1.csv"]
280
metadata = data_entry.meta
281
print(f"Entry metadata: {metadata}")
282

283
# Create new entry with metadata
284
new_entry = data_entry.set(meta={
285
    "experiment": "exp_001",
286
    "date": "2024-01-15",
287
    "researcher": "Dr. Smith"
288
})
289

290
# Get entry properties
291
print(f"File size: {data_entry.size} bytes")
292
print(f"Hash info: {data_entry.hash}")
293
print(f"Entry dict: {data_entry.as_dict()}")
294
```
295

296
### Cached Access
297

298
```python
299
# First access - downloads and caches
300
data = csv_entry.get_bytes(use_cache_if_available=True)
301

302
# Second access - uses cached version
303
data_cached = csv_entry.get_bytes(use_cache_if_available=True)  # Faster
304

305
# Force fresh download
306
data_fresh = csv_entry.get_bytes(use_cache_if_available=False)
307

308
# Check if cached version exists
309
cached_path = csv_entry.get_cached_path()
310
if cached_path:
311
    print(f"Cached at: {cached_path}")
312
else:
313
    print("No cached version available")
314
```
315

316
### Working with Large Files
317

318
```python
319
# Stream large files without loading entirely into memory
320
large_file = pkg["data/large_dataset.csv"]
321

322
# Get file handle for streaming
323
file_path = large_file.get()
324

325
# Use with context manager for efficient access
326
with open(file_path, 'r') as f:
327
    for line_num, line in enumerate(f):
328
        if line_num > 100:  # Process first 100 lines
329
            break
330
        process_line(line)
331

332
# Or deserialize with chunking (for pandas)
333
for chunk in large_file.deserialize(chunksize=1000):
334
    process_chunk(chunk)
335
```

Version

Tile

Files

data-access.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-access.mddocs/