0
# Download Utilities
1
2
Tools for streaming downloads to files, monitoring download progress, handling multiple download destinations, and extracting download information from responses.
3
4
## Capabilities
5
6
### File Streaming
7
8
Stream HTTP response content directly to files without loading everything into memory.
9
10
```python { .api }
11
def stream_response_to_file(response, path=None, chunksize=512):
12
"""
13
Stream response content to a file.
14
15
Parameters:
16
- response: Response object to stream
17
- path: str, destination file path (auto-generated if None)
18
- chunksize: int, size of chunks to read (default: 512)
19
20
Returns:
21
str: path to the downloaded file
22
"""
23
24
def get_download_file_path(response, path):
25
"""
26
Generate appropriate file path for download based on response headers.
27
28
Parameters:
29
- response: Response object
30
- path: str, base path or directory
31
32
Returns:
33
str: complete file path for download
34
"""
35
```
36
37
#### Usage Examples
38
39
```python
40
import requests
41
from requests_toolbelt.downloadutils.stream import stream_response_to_file, get_download_file_path
42
43
# Stream large file download
44
response = requests.get('https://example.com/large-file.zip', stream=True)
45
local_path = stream_response_to_file(response)
46
print(f"Downloaded to: {local_path}")
47
48
# Specify destination path
49
response = requests.get('https://example.com/data.json', stream=True)
50
local_path = stream_response_to_file(response, '/downloads/data.json')
51
52
# Auto-generate filename from response headers
53
response = requests.get('https://example.com/report.pdf', stream=True)
54
download_path = get_download_file_path(response, '/downloads/')
55
local_path = stream_response_to_file(response, download_path)
56
57
# Custom chunk size for better performance
58
response = requests.get('https://example.com/video.mp4', stream=True)
59
local_path = stream_response_to_file(response, chunksize=8192)
60
```
61
62
### Tee Operations
63
64
Stream response content to multiple destinations simultaneously (file, memory, etc.).
65
66
```python { .api }
67
def tee(response, fileobject, chunksize=512, decode_content=True):
68
"""
69
Stream response to file-like object while yielding content.
70
71
Parameters:
72
- response: Response object to stream
73
- fileobject: file-like object to write to
74
- chunksize: int, size of chunks to read (default: 512)
75
- decode_content: bool, whether to decode content (default: True)
76
77
Yields:
78
bytes: chunks of response content
79
"""
80
81
def tee_to_file(response, filename, chunksize=512, decode_content=True):
82
"""
83
Stream response to file while yielding content.
84
85
Parameters:
86
- response: Response object to stream
87
- filename: str, destination filename
88
- chunksize: int, size of chunks to read (default: 512)
89
- decode_content: bool, whether to decode content (default: True)
90
91
Yields:
92
bytes: chunks of response content
93
"""
94
95
def tee_to_bytearray(response, bytearr, chunksize=512, decode_content=True):
96
"""
97
Stream response to bytearray while yielding content.
98
99
Parameters:
100
- response: Response object to stream
101
- bytearr: bytearray to append to
102
- chunksize: int, size of chunks to read (default: 512)
103
- decode_content: bool, whether to decode content (default: True)
104
105
Yields:
106
bytes: chunks of response content
107
"""
108
```
109
110
#### Usage Examples
111
112
```python
113
import requests
114
from requests_toolbelt.downloadutils.tee import tee, tee_to_file, tee_to_bytearray
115
116
# Save to file while processing content
117
response = requests.get('https://api.example.com/data.csv', stream=True)
118
119
processed_lines = []
120
with open('data.csv', 'wb') as f:
121
for chunk in tee(response, f):
122
# Process each chunk while saving to file
123
if b'\n' in chunk:
124
lines = chunk.split(b'\n')
125
processed_lines.extend(lines)
126
127
print(f"Processed {len(processed_lines)} lines while saving to file")
128
129
# Save to file and collect all content
130
response = requests.get('https://example.com/api/response.json', stream=True)
131
132
all_content = b''
133
for chunk in tee_to_file(response, 'response.json'):
134
all_content += chunk
135
136
# Now you have the content both in file and memory
137
import json
138
data = json.loads(all_content.decode('utf-8'))
139
140
# Stream to bytearray for memory efficiency
141
response = requests.get('https://example.com/binary-data', stream=True)
142
143
data_buffer = bytearray()
144
hash_calculator = hashlib.sha256()
145
146
for chunk in tee_to_bytearray(response, data_buffer, chunksize=8192):
147
hash_calculator.update(chunk)
148
149
print(f"Downloaded {len(data_buffer)} bytes")
150
print(f"SHA256: {hash_calculator.hexdigest()}")
151
152
# Multiple destinations
153
response = requests.get('https://example.com/large-file.dat', stream=True)
154
155
with open('backup1.dat', 'wb') as f1, open('backup2.dat', 'wb') as f2:
156
checksum = hashlib.md5()
157
158
for chunk in tee(response, f1):
159
f2.write(chunk) # Write to second file
160
checksum.update(chunk) # Update checksum
161
162
print(f"File saved to two locations with MD5: {checksum.hexdigest()}")
163
```
164
165
### Download Progress Monitoring
166
167
```python
168
import requests
169
from requests_toolbelt.downloadutils.tee import tee_to_file
170
171
def download_with_progress(url, filename):
172
"""Download file with progress indication."""
173
response = requests.get(url, stream=True)
174
total_size = int(response.headers.get('Content-Length', 0))
175
176
downloaded = 0
177
178
for chunk in tee_to_file(response, filename, chunksize=8192):
179
downloaded += len(chunk)
180
if total_size > 0:
181
percent = (downloaded / total_size) * 100
182
print(f"\rDownload progress: {percent:.1f}% ({downloaded}/{total_size} bytes)", end='')
183
184
print(f"\nDownload complete: {filename}")
185
186
# Usage
187
download_with_progress('https://example.com/large-file.zip', 'local-file.zip')
188
```
189
190
### Content Processing During Download
191
192
```python
193
import requests
194
import json
195
from requests_toolbelt.downloadutils.tee import tee_to_file
196
197
def download_and_process_json_stream(url, filename):
198
"""Download JSON stream while processing each object."""
199
response = requests.get(url, stream=True)
200
201
buffer = ""
202
objects_processed = 0
203
204
for chunk in tee_to_file(response, filename, decode_content=True):
205
buffer += chunk.decode('utf-8')
206
207
# Process complete JSON objects
208
while '\n' in buffer:
209
line, buffer = buffer.split('\n', 1)
210
if line.strip():
211
try:
212
obj = json.loads(line)
213
# Process the JSON object
214
process_json_object(obj)
215
objects_processed += 1
216
except json.JSONDecodeError:
217
pass
218
219
print(f"Processed {objects_processed} JSON objects while downloading to {filename}")
220
221
def process_json_object(obj):
222
"""Process individual JSON object."""
223
# Your processing logic here
224
pass
225
```