Tessl Tile for pypi/papermill@2.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

cli.md exceptions.md execution.md index.md inspection.md storage.md

storage.mddocs/

0
# Storage Backends
1

2
Support for multiple storage systems including local filesystem, cloud storage (S3, Azure, GCS), distributed filesystems (HDFS), and remote repositories (GitHub). Papermill's modular I/O system enables seamless notebook execution across different storage environments.
3

4
## Capabilities
5

6
### Core I/O Functions
7

8
Primary functions for loading and saving notebooks across different storage backends.
9

10
```python { .api }
11
def load_notebook_node(notebook_path: str) -> nbformat.NotebookNode:
12
    """
13
    Loads notebook from various sources (local, S3, etc.).
14
    
15
    Parameters:
16
    - notebook_path: Path to notebook (supports local paths, S3 URLs, etc.)
17
    
18
    Returns:
19
    nbformat.NotebookNode: Loaded notebook object
20
    
21
    Raises:
22
    FileNotFoundError: If notebook doesn't exist
23
    """
24

25
def write_ipynb(nb: nbformat.NotebookNode, path: str) -> None:
26
    """
27
    Writes notebook to specified location.
28
    
29
    Parameters:
30
    - nb: Notebook to write
31
    - path: Destination path (supports local paths, S3 URLs, etc.)
32
    """
33

34
def list_notebook_files(path: str) -> list[str]:
35
    """
36
    Lists notebook files in directory.
37
    
38
    Parameters:
39
    - path: Directory path to list
40
    
41
    Returns:
42
    list[str]: List of notebook file paths
43
    """
44
```
45

46
### Path and I/O Utilities
47

48
Utilities for working with paths and managing I/O operations.
49

50
```python { .api }
51
def get_pretty_path(path: str) -> str:
52
    """
53
    Formats paths for display.
54
    
55
    Parameters:
56
    - path: Path to format
57
    
58
    Returns:
59
    str: Formatted path string
60
    """
61

62
def local_file_io_cwd(path: str = None):
63
    """
64
    Context manager for local file operations.
65
    
66
    Parameters:
67
    - path: Working directory path (optional)
68
    
69
    Returns:
70
    Context manager that temporarily changes working directory
71
    """
72

73
def read_yaml_file(path: str) -> dict:
74
    """
75
    Reads YAML configuration files.
76
    
77
    Parameters:
78
    - path: Path to YAML file
79
    
80
    Returns:
81
    dict: Parsed YAML content
82
    """
83
```
84

85
## Storage Backend Classes
86

87
### Main I/O Coordinator
88

89
```python { .api }
90
class PapermillIO:
91
    """
92
    Central I/O handler that delegates to specific storage handlers.
93
    Automatically routes requests based on path/URL schemes.
94
    """
95
    
96
    def read(self, path: str) -> str: ...
97
    def write(self, buf: str, path: str) -> None: ...
98
    def listdir(self, path: str) -> list[str]: ...
99
    def pretty_path(self, path: str) -> str: ...
100
```
101

102
### Local Filesystem
103

104
```python { .api }
105
class LocalHandler:
106
    """Handler for local filesystem operations."""
107
    
108
    def read(self, path: str) -> str: ...
109
    def write(self, buf: str, path: str) -> None: ...
110
    def listdir(self, path: str) -> list[str]: ...
111
```
112

113
### Cloud Storage Handlers
114

115
```python { .api }
116
class S3Handler:
117
    """Handler for Amazon S3 storage operations."""
118
    
119
    def read(self, path: str) -> str: ...
120
    def write(self, buf: str, path: str) -> None: ...
121
    def listdir(self, path: str) -> list[str]: ...
122

123
class ADLHandler:
124
    """Handler for Azure Data Lake storage operations."""
125
    
126
    def read(self, path: str) -> str: ...
127
    def write(self, buf: str, path: str) -> None: ...
128
    def listdir(self, path: str) -> list[str]: ...
129

130
class ABSHandler:
131
    """Handler for Azure Blob Storage operations."""
132
    
133
    def read(self, path: str) -> str: ...
134
    def write(self, buf: str, path: str) -> None: ...
135
    def listdir(self, path: str) -> list[str]: ...
136

137
class GCSHandler:
138
    """Handler for Google Cloud Storage operations."""
139
    
140
    def read(self, path: str) -> str: ...
141
    def write(self, buf: str, path: str) -> None: ...
142
    def listdir(self, path: str) -> list[str]: ...
143
```
144

145
### Other Storage Systems
146

147
```python { .api }
148
class HDFSHandler:
149
    """Handler for Hadoop Distributed File System operations."""
150
    
151
    def read(self, path: str) -> str: ...
152
    def write(self, buf: str, path: str) -> None: ...
153
    def listdir(self, path: str) -> list[str]: ...
154

155
class GithubHandler:
156
    """Handler for GitHub repository operations."""
157
    
158
    def read(self, path: str) -> str: ...
159
    def listdir(self, path: str) -> list[str]: ...
160

161
class HttpHandler:
162
    """Handler for HTTP/HTTPS operations."""
163
    
164
    def read(self, path: str) -> str: ...
165

166
class StreamHandler:
167
    """Handler for stream I/O operations."""
168
    
169
    def read(self, path: str) -> str: ...
170
    def write(self, buf: str, path: str) -> None: ...
171
```
172

173
## Cloud Storage Backend Classes
174

175
### Amazon S3
176

177
```python { .api }
178
class S3:
179
    """S3 client for interacting with Amazon S3."""
180
    
181
    def __init__(self, **kwargs): ...
182
    def read(self, key: str) -> str: ...
183
    def write(self, buf: str, key: str) -> None: ...
184
    def listdir(self, prefix: str) -> list[str]: ...
185

186
class Bucket:
187
    """S3 bucket representation."""
188
    
189
    def __init__(self, name: str, service: str = None): ...
190

191
class Prefix:
192
    """S3 prefix representation."""
193
    
194
    def __init__(self, bucket: Bucket, name: str): ...
195

196
class Key:
197
    """S3 key representation."""
198
    
199
    def __init__(self, prefix: Prefix, name: str): ...
200
```
201

202
### Azure Storage
203

204
```python { .api }
205
class ADL:
206
    """Azure Data Lake client."""
207
    
208
    def __init__(self, **kwargs): ...
209
    def read(self, path: str) -> str: ...
210
    def write(self, buf: str, path: str) -> None: ...
211
    def listdir(self, path: str) -> list[str]: ...
212

213
class AzureBlobStore:
214
    """Azure Blob Storage client."""
215
    
216
    def __init__(self, **kwargs): ...
217
    def read(self, path: str) -> str: ...
218
    def write(self, buf: str, path: str) -> None: ...
219
    def listdir(self, path: str) -> list[str]: ...
220
```
221

222
## Usage Examples
223

224
### Local Filesystem
225

226
```python
227
import papermill as pm
228

229
# Execute with local paths
230
pm.execute_notebook(
231
    '/path/to/input.ipynb',
232
    '/path/to/output.ipynb',
233
    parameters={'data_file': '/data/input.csv'}
234
)
235
```
236

237
### Amazon S3
238

239
```python
240
import papermill as pm
241

242
# Execute with S3 paths
243
pm.execute_notebook(
244
    's3://my-bucket/notebooks/analysis.ipynb',
245
    's3://my-bucket/results/output.ipynb',
246
    parameters={'dataset': 's3://my-bucket/data/sales.csv'}
247
)
248

249
# Mixed local and S3
250
pm.execute_notebook(
251
    'local_template.ipynb',
252
    's3://my-bucket/results/report.ipynb',
253
    parameters={'config': 'production'}
254
)
255
```
256

257
### Azure Storage
258

259
```python
260
import papermill as pm
261

262
# Azure Data Lake
263
pm.execute_notebook(
264
    'adl://mydatalake.azuredatalakestore.net/notebooks/analysis.ipynb',
265
    'adl://mydatalake.azuredatalakestore.net/results/output.ipynb'
266
)
267

268
# Azure Blob Storage
269
pm.execute_notebook(
270
    'abs://myaccount.blob.core.windows.net/container/notebook.ipynb',
271
    'abs://myaccount.blob.core.windows.net/container/result.ipynb'
272
)
273
```
274

275
### Google Cloud Storage
276

277
```python
278
import papermill as pm
279

280
# Execute with GCS paths
281
pm.execute_notebook(
282
    'gs://my-bucket/notebooks/analysis.ipynb',
283
    'gs://my-bucket/results/output.ipynb',
284
    parameters={'project_id': 'my-gcp-project'}
285
)
286
```
287

288
### HDFS
289

290
```python
291
import papermill as pm
292

293
# Execute with HDFS paths
294
pm.execute_notebook(
295
    'hdfs://namenode:port/notebooks/analysis.ipynb',
296
    'hdfs://namenode:port/results/output.ipynb'
297
)
298
```
299

300
### GitHub Repositories
301

302
```python
303
import papermill as pm
304

305
# Execute notebook directly from GitHub
306
pm.execute_notebook(
307
    'https://raw.githubusercontent.com/user/repo/main/notebook.ipynb',
308
    'output.ipynb',
309
    parameters={'branch': 'main'}
310
)
311
```
312

313
### HTTP/HTTPS
314

315
```python
316
import papermill as pm
317

318
# Execute notebook from HTTP URL
319
pm.execute_notebook(
320
    'https://example.com/notebooks/analysis.ipynb',
321
    'local_output.ipynb'
322
)
323
```
324

325
## Advanced Storage Configuration
326

327
### Custom I/O Handler Registration
328

329
```python
330
from papermill.iorw import papermill_io
331

332
# Register custom handler
333
class CustomHandler:
334
    def read(self, path):
335
        # Custom read logic
336
        pass
337
    
338
    def write(self, buf, path):
339
        # Custom write logic
340
        pass
341

342
# Register with papermill
343
papermill_io.register("custom://", CustomHandler())
344
```
345

346
### Storage Backend Authentication
347

348
```python
349
import os
350

351
# S3 authentication via environment variables
352
os.environ['AWS_ACCESS_KEY_ID'] = 'your-access-key'
353
os.environ['AWS_SECRET_ACCESS_KEY'] = 'your-secret-key'
354

355
# Azure authentication
356
os.environ['AZURE_STORAGE_ACCOUNT'] = 'your-account'
357
os.environ['AZURE_STORAGE_KEY'] = 'your-key'
358

359
# GCS authentication
360
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/service-account.json'
361
```
362

363
### Working with Large Files
364

365
```python
366
import papermill as pm
367

368
# Execute with progress tracking for large notebooks
369
pm.execute_notebook(
370
    's3://large-bucket/big-notebook.ipynb',
371
    's3://large-bucket/results/output.ipynb',
372
    progress_bar=True,
373
    log_output=True,
374
    start_timeout=300  # Extended timeout for large files
375
)
376
```

Version

Tile

Files

storage.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

storage.mddocs/