An Amazon S3 Transfer Manager that provides high-level abstractions for efficient uploads/downloads with multipart transfers, progress callbacks, and retry logic.
—
The original S3Transfer class provides simple, high-level methods for uploading and downloading files to/from Amazon S3. While still supported, the modern TransferManager is recommended for new development.
The main transfer class providing upload and download functionality with automatic multipart handling based on file size thresholds.
class S3Transfer:
"""
High-level S3 transfer interface with automatic multipart handling.
Args:
client: boto3 S3 client instance
config: TransferConfig for controlling transfer behavior
osutil: OSUtils instance for file operations
"""
def __init__(self, client, config=None, osutil=None): ...
def upload_file(self, filename, bucket, key, callback=None, extra_args=None):
"""
Upload a file to S3.
Args:
filename (str): Path to local file to upload
bucket (str): S3 bucket name
key (str): S3 object key/name
callback (callable, optional): Progress callback function(bytes_transferred)
extra_args (dict, optional): Additional S3 operation arguments
Raises:
S3UploadFailedError: If upload fails
ValueError: If extra_args contains invalid keys
"""
def download_file(self, bucket, key, filename, extra_args=None, callback=None):
"""
Download an S3 object to a file.
Args:
bucket (str): S3 bucket name
key (str): S3 object key/name
filename (str): Path where to save downloaded file
extra_args (dict, optional): Additional S3 operation arguments
callback (callable, optional): Progress callback function(bytes_transferred)
Raises:
S3DownloadFailedError: If download fails
ValueError: If extra_args contains invalid keys
"""
# Class constants for allowed operation arguments
ALLOWED_DOWNLOAD_ARGS: List[str]
ALLOWED_UPLOAD_ARGS: List[str]Configuration class for controlling S3Transfer behavior with basic options for multipart thresholds and concurrency.
class TransferConfig:
"""
Configuration for S3Transfer operations.
Args:
multipart_threshold (int): Size threshold for multipart uploads (default: 8MB)
max_concurrency (int): Maximum number of concurrent transfers (default: 10)
multipart_chunksize (int): Size of multipart chunks (default: 8MB)
num_download_attempts (int): Number of download retry attempts (default: 5)
max_io_queue (int): Maximum size of IO queue (default: 100)
"""
def __init__(
self,
multipart_threshold=8 * 1024 * 1024,
max_concurrency=10,
multipart_chunksize=8 * 1024 * 1024,
num_download_attempts=5,
max_io_queue=100
): ...
multipart_threshold: int
max_concurrency: int
multipart_chunksize: int
num_download_attempts: int
max_io_queue: intSupporting classes for file operations and progress tracking used by the S3Transfer interface.
class OSUtils:
"""
OS utility functions for file operations.
"""
def get_file_size(self, filename: str) -> int:
"""Get file size in bytes."""
def open_file_chunk_reader(self, filename: str, start_byte: int, size: int, callback):
"""Open a file chunk reader with progress callback."""
def open(self, filename: str, mode: str):
"""Open a file."""
def remove_file(self, filename: str):
"""Remove a file (no-op if doesn't exist)."""
def rename_file(self, current_filename: str, new_filename: str):
"""Rename a file."""
class ReadFileChunk:
"""
File-like object for reading chunks of files with progress callbacks.
Args:
fileobj: File object to read from
start_byte (int): Starting position in file
chunk_size (int): Maximum chunk size to read
full_file_size (int): Total file size
callback (callable, optional): Progress callback function
enable_callback (bool): Whether to enable callbacks initially
"""
def __init__(
self,
fileobj,
start_byte: int,
chunk_size: int,
full_file_size: int,
callback=None,
enable_callback: bool = True
): ...
@classmethod
def from_filename(
cls,
filename: str,
start_byte: int,
chunk_size: int,
callback=None,
enable_callback: bool = True
):
"""Create ReadFileChunk from filename."""
def read(self, amount=None) -> bytes:
"""Read data from chunk."""
def seek(self, where: int):
"""Seek to position within chunk."""
def tell(self) -> int:
"""Get current position."""
def close(self):
"""Close file handle."""
def enable_callback(self):
"""Enable progress callbacks."""
def disable_callback(self):
"""Disable progress callbacks."""
class StreamReaderProgress:
"""
Wrapper for read-only streams that adds progress callbacks.
Args:
stream: Stream to wrap
callback (callable, optional): Progress callback function
"""
def __init__(self, stream, callback=None): ...
def read(self, *args, **kwargs) -> bytes:
"""Read from stream with progress tracking."""import boto3
from s3transfer import S3Transfer
# Create client and transfer manager
client = boto3.client('s3', region_name='us-west-2')
transfer = S3Transfer(client)
# Simple upload
transfer.upload_file('/tmp/data.csv', 'my-bucket', 'data.csv')
# Simple download
transfer.download_file('my-bucket', 'data.csv', '/tmp/downloaded.csv')import os
import threading
from s3transfer import S3Transfer
class ProgressPercentage:
def __init__(self, filename):
self._filename = filename
self._size = float(os.path.getsize(filename))
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
print(f"\r{self._filename} {self._seen_so_far} / {self._size} ({percentage:.2f}%)", end='')
# Upload with progress tracking
client = boto3.client('s3')
transfer = S3Transfer(client)
progress = ProgressPercentage('/tmp/large_file.dat')
transfer.upload_file('/tmp/large_file.dat', 'my-bucket', 'large_file.dat', callback=progress)from s3transfer import S3Transfer
transfer = S3Transfer(boto3.client('s3'))
# Upload with metadata and ACL
transfer.upload_file(
'/tmp/document.pdf',
'my-bucket',
'documents/document.pdf',
extra_args={
'ACL': 'public-read',
'Metadata': {'author': 'John Doe', 'version': '1.0'},
'ContentType': 'application/pdf'
}
)from s3transfer import S3Transfer, TransferConfig
# Configure for large files
config = TransferConfig(
multipart_threshold=64 * 1024 * 1024, # 64MB
max_concurrency=20, # More concurrent uploads
multipart_chunksize=64 * 1024 * 1024, # 64MB chunks
num_download_attempts=10 # More retry attempts
)
client = boto3.client('s3')
transfer = S3Transfer(client, config)
# Large file transfer will use multipart automatically
transfer.upload_file('/tmp/large_dataset.zip', 'my-bucket', 'datasets/large_dataset.zip')The following arguments can be passed in the extra_args parameter for uploads:
ACL: Access control list permissionsCacheControl: Cache control directivesContentDisposition: Content disposition headerContentEncoding: Content encoding (e.g., 'gzip')ContentLanguage: Content languageContentType: MIME type of the contentExpires: Expiration dateGrantFullControl: Full control permissionsGrantRead: Read permissionsGrantReadACP: Read ACP permissionsGrantWriteACL: Write ACL permissionsMetadata: User-defined metadata dictionaryRequestPayer: Request payer settingServerSideEncryption: Server-side encryption methodStorageClass: Storage class (STANDARD, REDUCED_REDUNDANCY, etc.)SSECustomerAlgorithm: Customer-provided encryption algorithmSSECustomerKey: Customer-provided encryption keySSECustomerKeyMD5: MD5 hash of customer encryption keySSEKMSKeyId: KMS key ID for encryptionSSEKMSEncryptionContext: KMS encryption contextTagging: Object tagsThe following arguments can be passed in the extra_args parameter for downloads:
VersionId: Specific version of the object to downloadSSECustomerAlgorithm: Customer-provided encryption algorithmSSECustomerKey: Customer-provided encryption keySSECustomerKeyMD5: MD5 hash of customer encryption keyRequestPayer: Request payer settingWhen migrating from S3Transfer to TransferManager:
Install with Tessl CLI
npx tessl i tessl/pypi-s3transfer