A utility belt for advanced users of python-requests
—
Tools for streaming downloads to files, monitoring download progress, handling multiple download destinations, and extracting download information from responses.
Stream HTTP response content directly to files without loading everything into memory.
def stream_response_to_file(response, path=None, chunksize=512):
"""
Stream response content to a file.
Parameters:
- response: Response object to stream
- path: str, destination file path (auto-generated if None)
- chunksize: int, size of chunks to read (default: 512)
Returns:
str: path to the downloaded file
"""
def get_download_file_path(response, path):
"""
Generate appropriate file path for download based on response headers.
Parameters:
- response: Response object
- path: str, base path or directory
Returns:
str: complete file path for download
"""import requests
from requests_toolbelt.downloadutils.stream import stream_response_to_file, get_download_file_path
# Stream large file download
response = requests.get('https://example.com/large-file.zip', stream=True)
local_path = stream_response_to_file(response)
print(f"Downloaded to: {local_path}")
# Specify destination path
response = requests.get('https://example.com/data.json', stream=True)
local_path = stream_response_to_file(response, '/downloads/data.json')
# Auto-generate filename from response headers
response = requests.get('https://example.com/report.pdf', stream=True)
download_path = get_download_file_path(response, '/downloads/')
local_path = stream_response_to_file(response, download_path)
# Custom chunk size for better performance
response = requests.get('https://example.com/video.mp4', stream=True)
local_path = stream_response_to_file(response, chunksize=8192)Stream response content to multiple destinations simultaneously (file, memory, etc.).
def tee(response, fileobject, chunksize=512, decode_content=True):
"""
Stream response to file-like object while yielding content.
Parameters:
- response: Response object to stream
- fileobject: file-like object to write to
- chunksize: int, size of chunks to read (default: 512)
- decode_content: bool, whether to decode content (default: True)
Yields:
bytes: chunks of response content
"""
def tee_to_file(response, filename, chunksize=512, decode_content=True):
"""
Stream response to file while yielding content.
Parameters:
- response: Response object to stream
- filename: str, destination filename
- chunksize: int, size of chunks to read (default: 512)
- decode_content: bool, whether to decode content (default: True)
Yields:
bytes: chunks of response content
"""
def tee_to_bytearray(response, bytearr, chunksize=512, decode_content=True):
"""
Stream response to bytearray while yielding content.
Parameters:
- response: Response object to stream
- bytearr: bytearray to append to
- chunksize: int, size of chunks to read (default: 512)
- decode_content: bool, whether to decode content (default: True)
Yields:
bytes: chunks of response content
"""import requests
from requests_toolbelt.downloadutils.tee import tee, tee_to_file, tee_to_bytearray
# Save to file while processing content
response = requests.get('https://api.example.com/data.csv', stream=True)
processed_lines = []
with open('data.csv', 'wb') as f:
for chunk in tee(response, f):
# Process each chunk while saving to file
if b'\n' in chunk:
lines = chunk.split(b'\n')
processed_lines.extend(lines)
print(f"Processed {len(processed_lines)} lines while saving to file")
# Save to file and collect all content
response = requests.get('https://example.com/api/response.json', stream=True)
all_content = b''
for chunk in tee_to_file(response, 'response.json'):
all_content += chunk
# Now you have the content both in file and memory
import json
data = json.loads(all_content.decode('utf-8'))
# Stream to bytearray for memory efficiency
response = requests.get('https://example.com/binary-data', stream=True)
data_buffer = bytearray()
hash_calculator = hashlib.sha256()
for chunk in tee_to_bytearray(response, data_buffer, chunksize=8192):
hash_calculator.update(chunk)
print(f"Downloaded {len(data_buffer)} bytes")
print(f"SHA256: {hash_calculator.hexdigest()}")
# Multiple destinations
response = requests.get('https://example.com/large-file.dat', stream=True)
with open('backup1.dat', 'wb') as f1, open('backup2.dat', 'wb') as f2:
checksum = hashlib.md5()
for chunk in tee(response, f1):
f2.write(chunk) # Write to second file
checksum.update(chunk) # Update checksum
print(f"File saved to two locations with MD5: {checksum.hexdigest()}")import requests
from requests_toolbelt.downloadutils.tee import tee_to_file
def download_with_progress(url, filename):
"""Download file with progress indication."""
response = requests.get(url, stream=True)
total_size = int(response.headers.get('Content-Length', 0))
downloaded = 0
for chunk in tee_to_file(response, filename, chunksize=8192):
downloaded += len(chunk)
if total_size > 0:
percent = (downloaded / total_size) * 100
print(f"\rDownload progress: {percent:.1f}% ({downloaded}/{total_size} bytes)", end='')
print(f"\nDownload complete: {filename}")
# Usage
download_with_progress('https://example.com/large-file.zip', 'local-file.zip')import requests
import json
from requests_toolbelt.downloadutils.tee import tee_to_file
def download_and_process_json_stream(url, filename):
"""Download JSON stream while processing each object."""
response = requests.get(url, stream=True)
buffer = ""
objects_processed = 0
for chunk in tee_to_file(response, filename, decode_content=True):
buffer += chunk.decode('utf-8')
# Process complete JSON objects
while '\n' in buffer:
line, buffer = buffer.split('\n', 1)
if line.strip():
try:
obj = json.loads(line)
# Process the JSON object
process_json_object(obj)
objects_processed += 1
except json.JSONDecodeError:
pass
print(f"Processed {objects_processed} JSON objects while downloading to {filename}")
def process_json_object(obj):
"""Process individual JSON object."""
# Your processing logic here
passInstall with Tessl CLI
npx tessl i tessl/pypi-requests-toolbelt