Tessl Tile for pypi/ubelt@1.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

dict-operations.md download-caching.md function-utilities.md hashing-imports.md index.md list-operations.md path-operations.md progress-timing.md system-integration.md text-processing.md

download-caching.mddocs/

0
# Download and Caching
1

2
Download files with progress tracking, verification, and comprehensive caching systems for computations and data.
3

4
## Capabilities
5

6
### File Downloads
7

8
Download files from URLs with progress tracking, hash verification, and caching support.
9

10
```python { .api }
11
def download(url, fpath=None, hash_prefix=None, hasher='sha512', **kwargs):
12
    """
13
    Download file from URL with progress and verification.
14
    
15
    Args:
16
        url (str): URL to download from
17
        fpath (str|Path): Local file path (auto-generated if None)
18
        hash_prefix (str): Expected hash prefix for verification
19
        hasher (str): Hash algorithm ('sha512', 'sha256', 'md5')
20
        verbose (int): Verbosity level
21
        chunk_size (int): Download chunk size in bytes
22
        timeout (float): Connection timeout
23
        
24
    Returns:
25
        str: Path to downloaded file
26
        
27
    Raises:
28
        URLError: Download failed
29
        HashMismatchError: Hash verification failed
30
    """
31

32
def grabdata(url, fpath=None, dpath=None, fname=None, **kwargs):
33
    """
34
    Download and cache data with automatic path handling.
35
    
36
    Args:
37
        url (str): URL to download
38
        fpath (str): Explicit file path
39
        dpath (str): Directory for cached file
40
        fname (str): Filename for cached file
41
        **kwargs: Additional download options
42
        
43
    Returns:
44
        str: Path to cached file
45
    """
46

47
class DownloadManager:
48
    """
49
    Manage multiple download operations with queuing and progress tracking.
50
    """
51
    def __init__(self, max_workers=4): ...
52
    
53
    def submit(self, url, fpath=None, **kwargs): ...
54
    def download_all(self): ...
55
    def __enter__(self): ...
56
    def __exit__(self, exc_type, exc_val, exc_tb): ...
57
```
58

59
### Computation Caching
60

61
Cache expensive computations to disk with dependency tracking and automatic invalidation.
62

63
```python { .api }
64
class Cacher:
65
    """
66
    On-disk caching with dependency tracking.
67
    Automatically invalidates cache when dependencies change.
68
    """
69
    def __init__(self, fname, depends=None, dpath=None, appname='ubelt', **kwargs):
70
        """
71
        Args:
72
            fname (str): Cache filename
73
            depends: Dependencies that invalidate cache when changed
74
            dpath (str): Cache directory
75
            appname (str): Application name for cache organization
76
            **kwargs: Additional cache options
77
        """
78
    
79
    def tryload(self):
80
        """
81
        Try to load cached result.
82
        
83
        Returns:
84
            object|None: Cached result or None if cache miss/invalid
85
        """
86
    
87
    def save(self, data):
88
        """
89
        Save data to cache.
90
        
91
        Args:
92
            data: Data to cache
93
        """
94
    
95
    def clear(self):
96
        """Clear cached data."""
97
    
98
    def exists(self):
99
        """
100
        Check if cache exists and is valid.
101
        
102
        Returns:
103
            bool: True if cache exists and dependencies unchanged
104
        """
105
    
106
    def ensure(self, func, *args, **kwargs):
107
        """
108
        Ensure cached result exists, computing if necessary.
109
        
110
        Args:
111
            func: Function to call if cache miss
112
            *args: Arguments for func
113
            **kwargs: Keyword arguments for func
114
            
115
        Returns:
116
            object: Cached or computed result
117
        """
118

119
class CacheStamp:
120
    """
121
    Lightweight cache stamping for file-producing computations.
122
    Tracks when outputs are newer than inputs.
123
    """
124
    def __init__(self, fname, dpath=None, **kwargs): ...
125
    
126
    def expired(self, *depends):
127
        """
128
        Check if cache is expired relative to dependencies.
129
        
130
        Args:
131
            *depends: File paths or other dependencies
132
            
133
        Returns:
134
            bool: True if cache is expired
135
        """
136
    
137
    def renew(self):
138
        """Update cache timestamp."""
139
    
140
    def clear(self):
141
        """Remove cache stamp."""
142
```
143

144
## Usage Examples
145

146
### File Downloads
147

148
```python
149
import ubelt as ub
150

151
# Simple download
152
url = 'https://example.com/data.zip'
153
fpath = ub.download(url)
154
print(f"Downloaded to: {fpath}")
155

156
# Download with verification
157
url = 'https://example.com/important.tar.gz'
158
expected_hash = 'a1b2c3d4e5f6...'  # First few characters of expected hash
159
fpath = ub.download(url, hash_prefix=expected_hash, hasher='sha256')
160

161
# Download to specific location
162
local_path = './downloads/myfile.zip'
163
ub.download(url, fpath=local_path, verbose=2)
164

165
# Download with caching (won't re-download if file exists)
166
cached_file = ub.grabdata(url, dpath='./cache')
167
```
168

169
### Multiple Downloads
170

171
```python
172
import ubelt as ub
173

174
# Download multiple files
175
urls = [
176
    'https://example.com/file1.zip',
177
    'https://example.com/file2.tar.gz',
178
    'https://example.com/file3.json'
179
]
180

181
# Sequential downloads
182
files = []
183
for url in urls:
184
    fpath = ub.download(url, dpath='./downloads')
185
    files.append(fpath)
186

187
# Parallel downloads with DownloadManager
188
with ub.DownloadManager(max_workers=3) as dm:
189
    futures = []
190
    for url in urls:
191
        future = dm.submit(url, dpath='./downloads')
192
        futures.append(future)
193
    
194
    # Get results
195
    files = [future.result() for future in futures]
196
```
197

198
### Computation Caching
199

200
```python
201
import ubelt as ub
202
import time
203

204
def expensive_computation(n):
205
    """Simulate expensive computation"""
206
    print(f"Computing for n={n}...")
207
    time.sleep(2)  # Simulate work
208
    return n ** 2
209

210
# Basic caching
211
cache = ub.Cacher('computation_cache')
212
result = cache.tryload()
213
if result is None:
214
    result = expensive_computation(100)
215
    cache.save(result)
216
print(f"Result: {result}")
217

218
# Dependency-based caching
219
input_file = 'input.txt'
220
with open(input_file, 'w') as f:
221
    f.write('some input data')
222

223
# Cache depends on input file
224
cache = ub.Cacher('file_processing', depends=[input_file])
225
result = cache.tryload()
226
if result is None:
227
    # Process the file
228
    with open(input_file, 'r') as f:
229
        data = f.read()
230
    result = data.upper()  # Simple processing
231
    cache.save(result)
232

233
# Cache will be invalidated if input.txt changes
234

235
# Using ensure for cleaner code
236
def process_data(filename):
237
    with open(filename, 'r') as f:
238
        return f.read().upper()
239

240
cache = ub.Cacher('processing', depends=[input_file])
241
result = cache.ensure(process_data, input_file)
242
```
243

244
### Cache Stamps for File Operations
245

246
```python
247
import ubelt as ub
248

249
# Stamp-based caching for file generation
250
input_files = ['input1.txt', 'input2.txt', 'config.json']
251
output_file = 'processed_output.json'
252

253
stamp = ub.CacheStamp('processing_stamp')
254

255
if stamp.expired(*input_files, output_file):
256
    print("Processing files...")
257
    # Do expensive file processing
258
    processed_data = {'result': 'processed'}
259
    
260
    # Write output
261
    import json
262
    with open(output_file, 'w') as f:
263
        json.dump(processed_data, f)
264
    
265
    # Update stamp
266
    stamp.renew()
267
else:
268
    print("Using cached output")
269

270
# Output file exists and is newer than inputs
271
```
272

273
### Advanced Caching Patterns
274

275
```python
276
import ubelt as ub
277

278
# Cache with custom dependencies
279
def get_data_hash():
280
    """Get hash of current data state"""
281
    return ub.hash_data({'version': '1.2', 'config': 'prod'})
282

283
# Cache that depends on data state, not just files
284
cache = ub.Cacher('model_cache', depends=[get_data_hash()])
285

286
def train_model():
287
    print("Training model...")
288
    return {'accuracy': 0.95, 'model': 'trained_weights'}
289

290
model = cache.ensure(train_model)
291

292
# Organized caching with app-specific directories
293
user_cache = ub.Cacher('user_prefs', appname='myapp')
294
model_cache = ub.Cacher('models', appname='myapp', dpath='./models')
295

296
# Clear caches when needed
297
if need_fresh_data:
298
    cache.clear()
299
    
300
# Check cache status
301
if cache.exists():
302
    print("Cache is valid")
303
    data = cache.tryload()
304
else:
305
    print("Cache expired or missing")
306
```

Version

Tile

Files

download-caching.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

download-caching.mddocs/