Microsoft Azure File DataLake Storage Client Library for Python
npx @tessl/cli install tessl/pypi-azure-storage-file-datalake@12.21.0Azure Storage File DataLake provides a comprehensive Python client library for Azure Data Lake Storage Gen2, enabling developers to interact with hierarchical namespace-enabled storage accounts. It offers atomic directory operations (create, rename, delete), fine-grained access control management (ACLs), and seamless integration with the broader Azure ecosystem through support for various authentication methods including SAS tokens, shared access keys, and Azure Identity credentials.
pip install azure-storage-file-datalakefrom azure.storage.filedatalake import (
DataLakeServiceClient,
FileSystemClient,
DataLakeDirectoryClient,
DataLakeFileClient,
DataLakeLeaseClient
)For async operations:
from azure.storage.filedatalake.aio import (
DataLakeServiceClient,
FileSystemClient,
DataLakeDirectoryClient,
DataLakeFileClient,
DataLakeLeaseClient
)from azure.storage.filedatalake import DataLakeServiceClient
# Initialize service client
service_client = DataLakeServiceClient(
account_url="https://mystorageaccount.dfs.core.windows.net",
credential="<account_key>"
)
# Create a file system
file_system_client = service_client.create_file_system("myfilesystem")
# Create a directory
directory_client = file_system_client.create_directory("mydirectory")
# Upload a file
file_client = directory_client.create_file("myfile.txt")
file_client.upload_data("Hello, Data Lake!", overwrite=True)
# Download the file
download = file_client.download_file()
content = download.readall()
print(content.decode())The Azure Storage File DataLake SDK follows a hierarchical client architecture:
This design enables fine-grained control over data lake resources while maintaining a logical progression from account → file system → directory → file operations.
Account-level operations for managing file systems, user delegation keys, and service properties. Provides the entry point for accessing Data Lake Storage Gen2 resources.
class DataLakeServiceClient:
def __init__(self, account_url: str, credential=None, **kwargs): ...
def create_file_system(self, file_system: str, **kwargs) -> FileSystemClient: ...
def list_file_systems(self, **kwargs) -> ItemPaged[FileSystemProperties]: ...
def get_file_system_client(self, file_system: str) -> FileSystemClient: ...File system-level operations for managing directories, files, and access policies within a specific container.
class FileSystemClient:
def __init__(self, account_url: str, file_system_name: str, credential=None, **kwargs): ...
def create_directory(self, directory: str, **kwargs) -> DataLakeDirectoryClient: ...
def create_file(self, file: str, **kwargs) -> DataLakeFileClient: ...
def get_paths(self, **kwargs) -> ItemPaged[PathProperties]: ...Directory-specific operations for managing subdirectories, files, and access control lists within hierarchical structures.
class DataLakeDirectoryClient:
def __init__(self, account_url: str, file_system_name: str, directory_name: str, credential=None, **kwargs): ...
def create_sub_directory(self, sub_directory: str, **kwargs) -> DataLakeDirectoryClient: ...
def create_file(self, file: str, **kwargs) -> DataLakeFileClient: ...
def rename_directory(self, new_name: str, **kwargs) -> DataLakeDirectoryClient: ...File-specific operations for uploading, downloading, appending data, and managing file properties and metadata.
class DataLakeFileClient:
def __init__(self, account_url: str, file_system_name: str, file_path: str, credential=None, **kwargs): ...
def upload_data(self, data, **kwargs) -> Dict[str, Any]: ...
def download_file(self, **kwargs) -> StorageStreamDownloader: ...
def append_data(self, data, offset: int, **kwargs) -> Dict[str, Any]: ...Comprehensive access control management including POSIX-style ACLs, SAS token generation, and lease-based concurrency control.
def generate_file_system_sas(account_name: str, file_system_name: str, account_key: str, **kwargs) -> str: ...
def generate_directory_sas(account_name: str, file_system_name: str, directory_name: str, account_key: str, **kwargs) -> str: ...
def generate_file_sas(account_name: str, file_system_name: str, file_path: str, account_key: str, **kwargs) -> str: ...
class DataLakeLeaseClient:
def acquire(self, **kwargs) -> None: ...
def renew(self, **kwargs) -> None: ...
def release(self, **kwargs) -> None: ...Core data models, properties, permissions, and configuration classes used throughout the SDK.
class FileSystemProperties:
name: str
last_modified: datetime
etag: str
metadata: Dict[str, str]
class DirectoryProperties:
name: str
last_modified: datetime
etag: str
permissions: str
class FileProperties:
name: str
size: int
last_modified: datetime
etag: str
content_settings: ContentSettings