or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dict-operations.mddownload-caching.mdfunction-utilities.mdhashing-imports.mdindex.mdlist-operations.mdpath-operations.mdprogress-timing.mdsystem-integration.mdtext-processing.md

download-caching.mddocs/

0

# Download and Caching

1

2

Download files with progress tracking, verification, and comprehensive caching systems for computations and data.

3

4

## Capabilities

5

6

### File Downloads

7

8

Download files from URLs with progress tracking, hash verification, and caching support.

9

10

```python { .api }

11

def download(url, fpath=None, hash_prefix=None, hasher='sha512', **kwargs):

12

"""

13

Download file from URL with progress and verification.

14

15

Args:

16

url (str): URL to download from

17

fpath (str|Path): Local file path (auto-generated if None)

18

hash_prefix (str): Expected hash prefix for verification

19

hasher (str): Hash algorithm ('sha512', 'sha256', 'md5')

20

verbose (int): Verbosity level

21

chunk_size (int): Download chunk size in bytes

22

timeout (float): Connection timeout

23

24

Returns:

25

str: Path to downloaded file

26

27

Raises:

28

URLError: Download failed

29

HashMismatchError: Hash verification failed

30

"""

31

32

def grabdata(url, fpath=None, dpath=None, fname=None, **kwargs):

33

"""

34

Download and cache data with automatic path handling.

35

36

Args:

37

url (str): URL to download

38

fpath (str): Explicit file path

39

dpath (str): Directory for cached file

40

fname (str): Filename for cached file

41

**kwargs: Additional download options

42

43

Returns:

44

str: Path to cached file

45

"""

46

47

class DownloadManager:

48

"""

49

Manage multiple download operations with queuing and progress tracking.

50

"""

51

def __init__(self, max_workers=4): ...

52

53

def submit(self, url, fpath=None, **kwargs): ...

54

def download_all(self): ...

55

def __enter__(self): ...

56

def __exit__(self, exc_type, exc_val, exc_tb): ...

57

```

58

59

### Computation Caching

60

61

Cache expensive computations to disk with dependency tracking and automatic invalidation.

62

63

```python { .api }

64

class Cacher:

65

"""

66

On-disk caching with dependency tracking.

67

Automatically invalidates cache when dependencies change.

68

"""

69

def __init__(self, fname, depends=None, dpath=None, appname='ubelt', **kwargs):

70

"""

71

Args:

72

fname (str): Cache filename

73

depends: Dependencies that invalidate cache when changed

74

dpath (str): Cache directory

75

appname (str): Application name for cache organization

76

**kwargs: Additional cache options

77

"""

78

79

def tryload(self):

80

"""

81

Try to load cached result.

82

83

Returns:

84

object|None: Cached result or None if cache miss/invalid

85

"""

86

87

def save(self, data):

88

"""

89

Save data to cache.

90

91

Args:

92

data: Data to cache

93

"""

94

95

def clear(self):

96

"""Clear cached data."""

97

98

def exists(self):

99

"""

100

Check if cache exists and is valid.

101

102

Returns:

103

bool: True if cache exists and dependencies unchanged

104

"""

105

106

def ensure(self, func, *args, **kwargs):

107

"""

108

Ensure cached result exists, computing if necessary.

109

110

Args:

111

func: Function to call if cache miss

112

*args: Arguments for func

113

**kwargs: Keyword arguments for func

114

115

Returns:

116

object: Cached or computed result

117

"""

118

119

class CacheStamp:

120

"""

121

Lightweight cache stamping for file-producing computations.

122

Tracks when outputs are newer than inputs.

123

"""

124

def __init__(self, fname, dpath=None, **kwargs): ...

125

126

def expired(self, *depends):

127

"""

128

Check if cache is expired relative to dependencies.

129

130

Args:

131

*depends: File paths or other dependencies

132

133

Returns:

134

bool: True if cache is expired

135

"""

136

137

def renew(self):

138

"""Update cache timestamp."""

139

140

def clear(self):

141

"""Remove cache stamp."""

142

```

143

144

## Usage Examples

145

146

### File Downloads

147

148

```python

149

import ubelt as ub

150

151

# Simple download

152

url = 'https://example.com/data.zip'

153

fpath = ub.download(url)

154

print(f"Downloaded to: {fpath}")

155

156

# Download with verification

157

url = 'https://example.com/important.tar.gz'

158

expected_hash = 'a1b2c3d4e5f6...' # First few characters of expected hash

159

fpath = ub.download(url, hash_prefix=expected_hash, hasher='sha256')

160

161

# Download to specific location

162

local_path = './downloads/myfile.zip'

163

ub.download(url, fpath=local_path, verbose=2)

164

165

# Download with caching (won't re-download if file exists)

166

cached_file = ub.grabdata(url, dpath='./cache')

167

```

168

169

### Multiple Downloads

170

171

```python

172

import ubelt as ub

173

174

# Download multiple files

175

urls = [

176

'https://example.com/file1.zip',

177

'https://example.com/file2.tar.gz',

178

'https://example.com/file3.json'

179

]

180

181

# Sequential downloads

182

files = []

183

for url in urls:

184

fpath = ub.download(url, dpath='./downloads')

185

files.append(fpath)

186

187

# Parallel downloads with DownloadManager

188

with ub.DownloadManager(max_workers=3) as dm:

189

futures = []

190

for url in urls:

191

future = dm.submit(url, dpath='./downloads')

192

futures.append(future)

193

194

# Get results

195

files = [future.result() for future in futures]

196

```

197

198

### Computation Caching

199

200

```python

201

import ubelt as ub

202

import time

203

204

def expensive_computation(n):

205

"""Simulate expensive computation"""

206

print(f"Computing for n={n}...")

207

time.sleep(2) # Simulate work

208

return n ** 2

209

210

# Basic caching

211

cache = ub.Cacher('computation_cache')

212

result = cache.tryload()

213

if result is None:

214

result = expensive_computation(100)

215

cache.save(result)

216

print(f"Result: {result}")

217

218

# Dependency-based caching

219

input_file = 'input.txt'

220

with open(input_file, 'w') as f:

221

f.write('some input data')

222

223

# Cache depends on input file

224

cache = ub.Cacher('file_processing', depends=[input_file])

225

result = cache.tryload()

226

if result is None:

227

# Process the file

228

with open(input_file, 'r') as f:

229

data = f.read()

230

result = data.upper() # Simple processing

231

cache.save(result)

232

233

# Cache will be invalidated if input.txt changes

234

235

# Using ensure for cleaner code

236

def process_data(filename):

237

with open(filename, 'r') as f:

238

return f.read().upper()

239

240

cache = ub.Cacher('processing', depends=[input_file])

241

result = cache.ensure(process_data, input_file)

242

```

243

244

### Cache Stamps for File Operations

245

246

```python

247

import ubelt as ub

248

249

# Stamp-based caching for file generation

250

input_files = ['input1.txt', 'input2.txt', 'config.json']

251

output_file = 'processed_output.json'

252

253

stamp = ub.CacheStamp('processing_stamp')

254

255

if stamp.expired(*input_files, output_file):

256

print("Processing files...")

257

# Do expensive file processing

258

processed_data = {'result': 'processed'}

259

260

# Write output

261

import json

262

with open(output_file, 'w') as f:

263

json.dump(processed_data, f)

264

265

# Update stamp

266

stamp.renew()

267

else:

268

print("Using cached output")

269

270

# Output file exists and is newer than inputs

271

```

272

273

### Advanced Caching Patterns

274

275

```python

276

import ubelt as ub

277

278

# Cache with custom dependencies

279

def get_data_hash():

280

"""Get hash of current data state"""

281

return ub.hash_data({'version': '1.2', 'config': 'prod'})

282

283

# Cache that depends on data state, not just files

284

cache = ub.Cacher('model_cache', depends=[get_data_hash()])

285

286

def train_model():

287

print("Training model...")

288

return {'accuracy': 0.95, 'model': 'trained_weights'}

289

290

model = cache.ensure(train_model)

291

292

# Organized caching with app-specific directories

293

user_cache = ub.Cacher('user_prefs', appname='myapp')

294

model_cache = ub.Cacher('models', appname='myapp', dpath='./models')

295

296

# Clear caches when needed

297

if need_fresh_data:

298

cache.clear()

299

300

# Check cache status

301

if cache.exists():

302

print("Cache is valid")

303

data = cache.tryload()

304

else:

305

print("Cache expired or missing")

306

```