tessl/pypi-apache-libcloud

A standard Python library that abstracts away differences among multiple cloud provider APIs

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Storage Services

Name: tessl/pypi-apache-libcloud
Author: tessl

The storage service provides a unified interface for object storage and blob operations across 20+ storage providers including AWS S3, Azure Blob Storage, Google Cloud Storage, Rackspace Cloud Files, and many others.

Providers

from libcloud.storage.types import Provider

class Provider:
    """Enumeration of supported storage providers"""
    S3 = 's3'
    S3_US_WEST = 's3_us_west'
    S3_EU_WEST = 's3_eu_west'
    S3_AP_SOUTHEAST = 's3_ap_southeast'
    S3_AP_NORTHEAST = 's3_ap_northeast'
    AZURE_BLOBS = 'azure_blobs'
    GOOGLE_STORAGE = 'google_storage'
    RACKSPACE_CLOUDFILES = 'cloudfiles'
    CLOUDFILES_US = 'cloudfiles_us'
    CLOUDFILES_UK = 'cloudfiles_uk'
    NINEFOLD = 'ninefold'
    LOCAL = 'local'
    # ... more providers

Driver Factory

from libcloud.storage.providers import get_driver

def get_driver(provider: Provider) -> type[StorageDriver]

Get the driver class for a specific storage provider.

Parameters:

provider: Provider identifier from the Provider enum

Returns:

Driver class for the specified provider

Example:

from libcloud.storage.types import Provider
from libcloud.storage.providers import get_driver

# Get S3 driver class
cls = get_driver(Provider.S3)

# Initialize driver with credentials
driver = cls('access_key', 'secret_key')

Core Classes

StorageDriver

class StorageDriver(BaseDriver):
    """Base class for all storage drivers"""
    
    def list_containers(self) -> List[Container]
    def get_container(self, container_name: str) -> Container
    def create_container(self, container_name: str) -> Container
    def delete_container(self, container: Container) -> bool
    def list_container_objects(self, container: Container, ex_prefix: str = None) -> List[Object]
    def get_object(self, container_name: str, object_name: str) -> Object
    def upload_object(self, file_path: str, container: Container, object_name: str, extra: Dict = None, ex_meta: Dict = None, verify_hash: bool = True) -> Object
    def upload_object_via_stream(self, iterator: Iterator[bytes], container: Container, object_name: str, extra: Dict = None, ex_meta: Dict = None) -> Object
    def download_object(self, obj: Object, destination_path: str, overwrite_existing: bool = False, delete_on_failure: bool = True) -> bool
    def download_object_as_stream(self, obj: Object, chunk_size: int = None) -> Iterator[bytes]
    def delete_object(self, obj: Object) -> bool
    def ex_iterate_container_objects(self, container: Container, prefix: str = None) -> Iterator[Object]

Base class that all storage drivers inherit from. Provides methods for managing containers (buckets) and objects (files/blobs).

Key Methods:

list_containers(): List all containers/buckets in the account
create_container(): Create a new container/bucket
upload_object(): Upload a file to storage
download_object(): Download a file from storage
delete_object(): Delete a stored object
list_container_objects(): List objects within a container

Container

class Container:
    """Represents a storage container/bucket"""
    
    name: str
    driver: StorageDriver
    extra: Dict[str, Any]
    
    def get_cdn_url(self) -> str
    def enable_cdn(self, **kwargs) -> bool
    def get_objects(self) -> List[Object]
    def upload_object(self, file_path: str, object_name: str) -> Object
    def delete_object(self, obj: Object) -> bool

Represents a storage container (bucket in S3 terminology) that holds objects.

Properties:

name: Container name
extra: Provider-specific metadata

Methods:

get_cdn_url(): Get CDN URL if CDN is enabled
enable_cdn(): Enable CDN for the container
upload_object(): Upload an object to this container

Object

class Object:
    """Represents a storage object/blob"""
    
    name: str
    size: int
    hash: str
    container: Container
    driver: StorageDriver
    meta_data: Dict[str, str]
    extra: Dict[str, Any]
    
    def get_cdn_url(self) -> str
    def enable_cdn(self) -> bool
    def download(self, destination_path: str, overwrite_existing: bool = False) -> bool
    def as_stream(self, chunk_size: int = None) -> Iterator[bytes]
    def delete(self) -> bool

Represents a stored object/file/blob.

Properties:

name: Object name/key
size: Size in bytes
hash: Content hash (usually MD5 or ETag)
container: Parent container
meta_data: Custom metadata dictionary
extra: Provider-specific metadata

Methods:

download(): Download object to local file
as_stream(): Get object contents as a stream
delete(): Delete the object

Constants

CHUNK_SIZE: int = 8096
DEFAULT_CONTENT_TYPE: str = "application/octet-stream"

CHUNK_SIZE: Default chunk size for streaming operations
DEFAULT_CONTENT_TYPE: Default MIME type for uploaded objects

Usage Examples

Basic Container and Object Management

from libcloud.storage.types import Provider
from libcloud.storage.providers import get_driver

# Initialize driver
cls = get_driver(Provider.S3)
driver = cls('access_key', 'secret_key')

# List existing containers
containers = driver.list_containers()
for container in containers:
    print(f"Container: {container.name}")

# Create a new container
container = driver.create_container('my-data-bucket')
print(f"Created container: {container.name}")

# List objects in container
objects = driver.list_container_objects(container)
print(f"Container has {len(objects)} objects")

File Upload and Download

# Upload a local file
obj = driver.upload_object(
    file_path='/path/to/local/file.txt',
    container=container,
    object_name='documents/file.txt',
    extra={'ContentType': 'text/plain'},
    ex_meta={'author': 'john_doe', 'version': '1.0'}
)
print(f"Uploaded object: {obj.name} ({obj.size} bytes)")

# Download the object
success = driver.download_object(
    obj, 
    destination_path='/path/to/download/file.txt',
    overwrite_existing=True
)
print(f"Download successful: {success}")

# Alternative: download using object method
success = obj.download('/path/to/download/file2.txt')
print(f"Downloaded via object method: {success}")

Streaming Operations

# Upload from stream
def file_generator():
    with open('/path/to/large/file.txt', 'rb') as f:
        while True:
            chunk = f.read(8192)
            if not chunk:
                break
            yield chunk

obj = driver.upload_object_via_stream(
    iterator=file_generator(),
    container=container,
    object_name='streaming/large_file.txt'
)
print(f"Streamed upload complete: {obj.name}")

# Download as stream
stream = driver.download_object_as_stream(obj, chunk_size=4096)
with open('/path/to/output.txt', 'wb') as f:
    for chunk in stream:
        f.write(chunk)

# Alternative: using object method
stream = obj.as_stream(chunk_size=8192)
total_size = 0
for chunk in stream:
    total_size += len(chunk)
    # Process chunk
print(f"Streamed {total_size} bytes")

Working with Object Metadata

# Upload with custom metadata
obj = driver.upload_object(
    file_path='/path/to/document.pdf',
    container=container,
    object_name='docs/report.pdf',
    extra={'ContentType': 'application/pdf'},
    ex_meta={
        'title': 'Monthly Report',
        'author': 'Jane Smith',
        'department': 'Finance',
        'created': '2023-10-15'
    }
)

# Access metadata
print(f"Object metadata: {obj.meta_data}")
print(f"Content type: {obj.extra.get('content_type')}")
print(f"Last modified: {obj.extra.get('last_modified')}")

Container Iteration and Filtering

# List objects with prefix filtering
objects = driver.list_container_objects(container, ex_prefix='documents/')
print(f"Found {len(objects)} objects in documents/ folder")

# Iterate through large containers efficiently
for obj in driver.ex_iterate_container_objects(container, prefix='logs/'):
    print(f"Log file: {obj.name} ({obj.size} bytes)")
    if obj.size > 1000000:  # Process large files differently
        print(f"  Large file detected: {obj.name}")

Multi-Provider Storage Management

from libcloud.storage.types import Provider
from libcloud.storage.providers import get_driver

# Configure multiple storage providers
storage_config = {
    's3': {
        'driver': get_driver(Provider.S3),
        'credentials': ('aws_access_key', 'aws_secret_key')
    },
    'azure': {
        'driver': get_driver(Provider.AZURE_BLOBS),
        'credentials': ('account_name', 'account_key')
    },
    'gcs': {
        'driver': get_driver(Provider.GOOGLE_STORAGE),
        'credentials': ('service_account_email', 'key_file_path')
    }
}

# Initialize drivers
drivers = {}
for name, config in storage_config.items():
    cls = config['driver']
    drivers[name] = cls(*config['credentials'])

# Sync data across providers
def sync_container(source_driver, dest_driver, container_name):
    # Get or create containers
    try:
        source_container = source_driver.get_container(container_name)
    except Exception:
        print(f"Source container {container_name} not found")
        return
    
    try:
        dest_container = dest_driver.get_container(container_name)
    except Exception:
        dest_container = dest_driver.create_container(container_name)
    
    # Sync objects
    for obj in source_driver.list_container_objects(source_container):
        print(f"Syncing {obj.name}...")
        
        # Download from source
        stream = source_driver.download_object_as_stream(obj)
        
        # Upload to destination
        dest_driver.upload_object_via_stream(
            iterator=stream,
            container=dest_container,
            object_name=obj.name,
            ex_meta=obj.meta_data
        )

# Sync from S3 to Azure
sync_container(drivers['s3'], drivers['azure'], 'backup-data')

CDN and Public Access

# Enable CDN for container (if supported)
try:
    success = container.enable_cdn()
    if success:
        cdn_url = container.get_cdn_url()
        print(f"CDN enabled. URL: {cdn_url}")
except Exception as e:
    print(f"CDN not supported or failed: {e}")

# Get public URLs for objects
try:
    public_url = obj.get_cdn_url()
    print(f"Public URL: {public_url}")
except Exception:
    print("Public URLs not supported by this provider")

Error Handling and Best Practices

from libcloud.storage.types import ContainerDoesNotExistError, ObjectDoesNotExistError
from libcloud.common.types import InvalidCredsError, LibcloudError

try:
    # Attempt to get a container that might not exist
    container = driver.get_container('nonexistent-container')
except ContainerDoesNotExistError:
    print("Container does not exist, creating it...")
    container = driver.create_container('nonexistent-container')

try:
    # Attempt to get an object that might not exist
    obj = driver.get_object('my-container', 'nonexistent-file.txt')
except ObjectDoesNotExistError:
    print("Object does not exist")

# Verify upload integrity
obj = driver.upload_object(
    file_path='/path/to/important.dat',
    container=container,
    object_name='important.dat',
    verify_hash=True  # Verify MD5 hash after upload
)

# Safe deletion with confirmation
objects_to_delete = driver.list_container_objects(container, ex_prefix='temp/')
if objects_to_delete:
    print(f"About to delete {len(objects_to_delete)} temporary objects")
    for obj in objects_to_delete:
        success = driver.delete_object(obj)
        print(f"Deleted {obj.name}: {success}")

Advanced Features and Provider-Specific Options

# AWS S3 specific features
s3_driver = get_driver(Provider.S3)('access_key', 'secret_key')

# Upload with S3-specific options
obj = s3_driver.upload_object(
    file_path='/path/to/file.txt',
    container=container,
    object_name='data/file.txt',
    extra={
        'ContentType': 'text/plain',
        'ContentEncoding': 'gzip',
        'StorageClass': 'GLACIER',  # Use Glacier storage class
        'ServerSideEncryption': 'AES256'  # Enable encryption
    }
)

# Azure specific features
azure_driver = get_driver(Provider.AZURE_BLOBS)('account', 'key')

# Upload with Azure-specific metadata
obj = azure_driver.upload_object(
    file_path='/path/to/file.txt',
    container=container,
    object_name='data/file.txt',
    ex_meta={'department': 'IT', 'project': 'backup'},
    extra={'blob_type': 'BlockBlob'}
)

# Google Cloud Storage specific features
gcs_driver = get_driver(Provider.GOOGLE_STORAGE)('email', 'key_file')

# Upload with GCS-specific options
obj = gcs_driver.upload_object(
    file_path='/path/to/file.txt',
    container=container,
    object_name='data/file.txt',
    extra={'storage_class': 'COLDLINE'}  # Use Coldline storage
)

Common Use Cases

Backup and Archive

import os
import datetime

def backup_directory(driver, container, local_dir, remote_prefix=''):
    """Recursively backup a local directory to cloud storage"""
    for root, dirs, files in os.walk(local_dir):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, local_dir)
            remote_path = os.path.join(remote_prefix, relative_path).replace('\\', '/')
            
            print(f"Backing up: {local_path} -> {remote_path}")
            
            obj = driver.upload_object(
                file_path=local_path,
                container=container,
                object_name=remote_path,
                ex_meta={
                    'backup_date': datetime.datetime.now().isoformat(),
                    'source_path': local_path
                }
            )

# Usage
backup_container = driver.create_container(f'backup-{datetime.date.today()}')
backup_directory(driver, backup_container, '/home/user/documents', 'documents/')

Static Website Hosting

def deploy_static_site(driver, container_name, site_dir):
    """Deploy a static website to cloud storage"""
    # Create or get container
    try:
        container = driver.get_container(container_name)
    except ContainerDoesNotExistError:
        container = driver.create_container(container_name)
    
    # Enable public access/CDN if supported
    try:
        container.enable_cdn()
    except Exception:
        pass
    
    # Upload all site files
    for root, dirs, files in os.walk(site_dir):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, site_dir)
            
            # Determine content type
            content_type = 'text/html' if file.endswith('.html') else None
            if file.endswith('.css'):
                content_type = 'text/css'
            elif file.endswith('.js'):
                content_type = 'application/javascript'
            
            extra = {}
            if content_type:
                extra['ContentType'] = content_type
            
            obj = driver.upload_object(
                file_path=local_path,
                container=container,
                object_name=relative_path,
                extra=extra
            )
            print(f"Deployed: {relative_path}")

# Usage
deploy_static_site(driver, 'my-website', '/path/to/site/')

Exception Types

from libcloud.storage.types import (
    ContainerError,
    ObjectError, 
    ContainerDoesNotExistError,
    ObjectDoesNotExistError,
    ContainerAlreadyExistsError,
    ObjectHashMismatchError,
    InvalidContainerNameError
)

class ContainerError(LibcloudError):
    """Base container exception"""
    
class ObjectError(LibcloudError):
    """Base object exception"""
    
class ContainerDoesNotExistError(ContainerError):
    """Container does not exist"""
    
class ObjectDoesNotExistError(ObjectError):
    """Object does not exist"""
    
class ContainerAlreadyExistsError(ContainerError):
    """Container already exists"""
    
class ObjectHashMismatchError(ObjectError):
    """Object hash verification failed"""

class InvalidContainerNameError(ContainerError):
    """Invalid container name provided"""

These exceptions provide specific error handling for storage operations, allowing you to handle different failure scenarios appropriately.

Install with Tessl CLI