or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

archives.mdcloud-storage.mdconfiguration.mddata-formats.mddirectory-management.mdfile-operations.mdindex.mdmodule-class.mdnltk-integration.mdweb-scraping.md

file-operations.mddocs/

0

# File Download and Caching

1

2

PyStow provides a comprehensive file download and caching system that automatically manages file retrieval, storage, and cache validation. Files are downloaded once and reused from cache on subsequent requests.

3

4

## Core Download Functions

5

6

### Basic File Download

7

8

```python { .api }

9

def ensure(key: str, *subkeys: str, url: str, name: str | None = None, version: VersionHint = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None) -> Path:

10

"""Ensure a file is downloaded.

11

12

Args:

13

key: The name of the module. No funny characters. The envvar <key>_HOME where

14

key is uppercased is checked first before using the default home directory.

15

subkeys: A sequence of additional strings to join. If none are given, returns

16

the directory for this module.

17

url: The URL to download.

18

name: Overrides the name of the file at the end of the URL, if given. Also

19

useful for URLs that don't have proper filenames with extensions.

20

version: The optional version, or no-argument callable that returns an

21

optional version. This is prepended before the subkeys.

22

force: Should the download be done again, even if the path already exists?

23

Defaults to false.

24

download_kwargs: Keyword arguments to pass through to pystow.utils.download.

25

26

Returns:

27

The path of the file that has been downloaded (or already exists)

28

"""

29

```

30

31

### Custom File Creation

32

33

```python { .api }

34

def ensure_custom(key: str, *subkeys: str, name: str, force: bool = False, provider: Provider, **kwargs: Any) -> Path:

35

"""Ensure a file is present, and run a custom create function otherwise.

36

37

Args:

38

key: The name of the module. No funny characters. The envvar <key>_HOME where

39

key is uppercased is checked first before using the default home directory.

40

subkeys: A sequence of additional strings to join. If none are given, returns

41

the directory for this module.

42

name: The file name.

43

force: Should the file be re-created, even if the path already exists?

44

provider: The file provider. Will be run with the path as the first

45

positional argument, if the file needs to be generated.

46

kwargs: Additional keyword-based parameters passed to the provider.

47

48

Returns:

49

The path of the file that has been created (or already exists)

50

"""

51

```

52

53

## File I/O Context Managers

54

55

### Basic File Opening

56

57

```python { .api }

58

@contextmanager

59

def open(key: str, *subkeys: str, name: str, mode: Literal["r", "rb", "rt", "w", "wb", "wt"] = "r", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:

60

"""Open a file.

61

62

Args:

63

key: The name of the module. No funny characters. The envvar <key>_HOME where

64

key is uppercased is checked first before using the default home directory.

65

subkeys: A sequence of additional strings to join. If none are given, returns

66

the directory for this module.

67

name: The name of the file to open

68

mode: The read or write mode, passed to open

69

open_kwargs: Additional keyword arguments passed to open

70

ensure_exists: Should the directory the file is in be made? Set to true on

71

write operations.

72

73

Yields:

74

An open file object

75

"""

76

```

77

78

### Gzipped File Opening

79

80

```python { .api }

81

@contextmanager

82

def open_gz(key: str, *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:

83

"""Open a gzipped file that exists already.

84

85

Args:

86

key: The name of the module. No funny characters. The envvar <key>_HOME where

87

key is uppercased is checked first before using the default home directory.

88

subkeys: A sequence of additional strings to join. If none are given, returns

89

the directory for this module.

90

name: The name of the file to open

91

mode: The read mode, passed to gzip.open

92

open_kwargs: Additional keyword arguments passed to gzip.open

93

ensure_exists: Should the file be made? Set to true on write operations.

94

95

Yields:

96

An open file object

97

"""

98

```

99

100

### Download and Open

101

102

```python { .api }

103

@contextmanager

104

def ensure_open(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rt", "w", "wt"] | Literal["rb", "wb"] = "r", open_kwargs: Mapping[str, Any] | None = None) -> Generator[StringIO | BytesIO, None, None]:

105

"""Ensure a file is downloaded and open it.

106

107

Args:

108

key: The name of the module. No funny characters. The envvar <key>_HOME

109

where key is uppercased is checked first before using the default home

110

directory.

111

subkeys: A sequence of additional strings to join. If none are given, returns

112

the directory for this module.

113

url: The URL to download.

114

name: Overrides the name of the file at the end of the URL, if given. Also

115

useful for URLs that don't have proper filenames with extensions.

116

force: Should the download be done again, even if the path already exists?

117

Defaults to false.

118

download_kwargs: Keyword arguments to pass through to pystow.utils.download.

119

mode: The read mode, passed to lzma.open

120

open_kwargs: Additional keyword arguments passed to lzma.open

121

122

Yields:

123

An open file object

124

"""

125

```

126

127

### Gzipped File Operations

128

129

```python { .api }

130

@contextmanager

131

def open_gz(key: str, *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:

132

"""Open a gzipped file that exists already.

133

134

Args:

135

key: The name of the module. No funny characters. The envvar <key>_HOME where

136

key is uppercased is checked first before using the default home directory.

137

subkeys: A sequence of additional strings to join. If none are given, returns

138

the directory for this module.

139

name: The name of the file to open

140

mode: The read mode, passed to gzip.open

141

open_kwargs: Additional keyword arguments passed to gzip.open

142

ensure_exists: Should the file be made? Set to true on write operations.

143

144

Yields:

145

An open file object

146

"""

147

```

148

149

## Usage Examples

150

151

### Basic File Download

152

153

```python

154

import pystow

155

156

# Download a file with automatic caching

157

path = pystow.ensure(

158

"myapp", "datasets",

159

url="https://example.com/data.csv",

160

name="dataset.csv"

161

)

162

163

# File is cached - subsequent calls return immediately

164

path = pystow.ensure(

165

"myapp", "datasets",

166

url="https://example.com/data.csv",

167

name="dataset.csv"

168

)

169

170

# Force re-download

171

path = pystow.ensure(

172

"myapp", "datasets",

173

url="https://example.com/data.csv",

174

name="dataset.csv",

175

force=True

176

)

177

```

178

179

### Download with Versioning

180

181

```python

182

import pystow

183

import requests

184

185

def get_data_version():

186

"""Get current version from API"""

187

response = requests.get("https://api.example.com/version")

188

return response.json()["version"]

189

190

# Version-aware download

191

path = pystow.ensure(

192

"myapp", "datasets",

193

url="https://example.com/data.csv",

194

version=get_data_version

195

)

196

# Stores in: ~/.data/myapp/v1.2.3/datasets/data.csv

197

```

198

199

### Custom File Generation

200

201

```python

202

import pystow

203

import pandas as pd

204

205

def create_processed_data(path, raw_data_url):

206

"""Custom function to create processed data file"""

207

# Download raw data

208

raw_path = pystow.ensure(

209

"myapp", "raw",

210

url=raw_data_url

211

)

212

213

# Process data

214

df = pd.read_csv(raw_path)

215

processed_df = df.groupby('category').sum()

216

217

# Save to the target path

218

processed_df.to_csv(path)

219

220

# Ensure processed data exists

221

processed_path = pystow.ensure_custom(

222

"myapp", "processed",

223

name="aggregated_data.csv",

224

provider=create_processed_data,

225

raw_data_url="https://example.com/raw_data.csv"

226

)

227

```

228

229

### File I/O Operations

230

231

```python

232

import pystow

233

234

# Read from existing file

235

with pystow.open("myapp", "config", name="settings.txt", mode="r") as file:

236

config = file.read()

237

238

# Write to file (creates directories automatically)

239

with pystow.open("myapp", "logs", name="app.log", mode="w", ensure_exists=True) as file:

240

file.write("Application started\n")

241

242

# Download and read in one step

243

with pystow.ensure_open(

244

"myapp", "data",

245

url="https://example.com/data.txt"

246

) as file:

247

content = file.read()

248

249

# Work with gzipped files

250

with pystow.open_gz("myapp", "compressed", name="data.gz", mode="rt") as file:

251

data = file.read()

252

```

253

254

### Download Configuration

255

256

```python

257

import pystow

258

259

# Configure download behavior

260

path = pystow.ensure(

261

"myapp", "data",

262

url="https://example.com/large_file.zip",

263

download_kwargs={

264

"timeout": 300, # 5 minute timeout

265

"stream": True, # Stream download

266

"verify": True, # Verify SSL certificates

267

"headers": { # Custom headers

268

"User-Agent": "MyApp/1.0"

269

}

270

}

271

)

272

```

273

274

### Module-Based File Operations

275

276

```python

277

import pystow

278

279

# Create module instance

280

module = pystow.module("myapp")

281

282

# Download files using module

283

data_path = module.ensure(

284

"datasets",

285

url="https://example.com/data.csv"

286

)

287

288

# Open files using module

289

with module.open("config", name="settings.json", mode="r") as file:

290

config = json.load(file)

291

292

# Custom file creation with module

293

processed_path = module.ensure_custom(

294

"processed",

295

name="summary.txt",

296

provider=lambda path: path.write_text("Summary complete")

297

)

298

```