Microsoft Azure File DataLake Storage Client Library for Python
Overall
score
92%
Core data models, properties, permissions, and configuration classes used throughout the Azure Storage File DataLake SDK. These types provide structured representations of resources, metadata, and operational results.
Core property classes that represent the state and metadata of Data Lake Storage resources.
class FileSystemProperties:
"""
Properties of a file system.
Attributes:
name (str): Name of the file system
last_modified (datetime): Last modified timestamp
etag (str): ETag of the file system
lease_status (str): Current lease status
lease_state (str): Current lease state
lease_duration (str): Lease duration type
public_access (PublicAccess): Public access level
has_immutability_policy (bool): Whether immutability policy is set
has_legal_hold (bool): Whether legal hold is active
metadata (Dict[str, str]): User-defined metadata
encryption_scope (EncryptionScopeOptions): Default encryption scope
deleted_time (datetime): Deletion timestamp (for soft-deleted file systems)
remaining_retention_days (int): Days remaining in retention period
"""
class DirectoryProperties:
"""
Properties of a directory.
Attributes:
name (str): Name/path of the directory
last_modified (datetime): Last modified timestamp
etag (str): ETag of the directory
permissions (str): POSIX permissions in octal format
owner (str): Owner ID or principal name
group (str): Group ID or principal name
acl (str): Access control list in POSIX format
lease_status (str): Current lease status
lease_state (str): Current lease state
lease_duration (str): Lease duration type
metadata (Dict[str, str]): User-defined metadata
"""
class FileProperties:
"""
Properties of a file.
Attributes:
name (str): Name/path of the file
size (int): Size of the file in bytes
last_modified (datetime): Last modified timestamp
etag (str): ETag of the file
permissions (str): POSIX permissions in octal format
owner (str): Owner ID or principal name
group (str): Group ID or principal name
acl (str): Access control list in POSIX format
lease_status (str): Current lease status
lease_state (str): Current lease state
lease_duration (str): Lease duration type
content_settings (ContentSettings): Content-related settings
metadata (Dict[str, str]): User-defined metadata
creation_time (datetime): File creation timestamp
expiry_time (datetime): File expiration timestamp
encryption_context (str): Encryption context
"""
class PathProperties:
"""
Properties of a path (file or directory).
Attributes:
name (str): Name/path of the item
last_modified (datetime): Last modified timestamp
etag (str): ETag of the item
content_length (int): Size in bytes (0 for directories)
is_directory (bool): Whether the path is a directory
owner (str): Owner ID or principal name
group (str): Group ID or principal name
permissions (str): POSIX permissions in octal format
acl (str): Access control list in POSIX format
metadata (Dict[str, str]): User-defined metadata
creation_time (datetime): Creation timestamp
expiry_time (datetime): Expiration timestamp
encryption_context (str): Encryption context
"""
class DeletedPathProperties:
"""
Properties of a soft-deleted path.
Attributes:
name (str): Name of the deleted path
deleted_time (datetime): Deletion timestamp
remaining_retention_days (int): Days remaining in retention period
deletion_id (str): Unique identifier for the deletion
"""Classes for managing content settings, metadata, and operational configurations.
class ContentSettings:
"""
Content settings for files including MIME type and encoding information.
Attributes:
content_type (str): MIME type of the content
content_encoding (str): Content encoding (e.g., 'gzip')
content_language (str): Content language (e.g., 'en-US')
content_disposition (str): Content disposition header
cache_control (str): Cache control directives
content_md5 (bytes): MD5 hash of the content
"""
def __init__(
self,
content_type: str = None,
content_encoding: str = None,
content_language: str = None,
content_disposition: str = None,
cache_control: str = None,
content_md5: bytes = None
):
"""Initialize content settings."""
class CustomerProvidedEncryptionKey:
"""
Customer-provided encryption key for client-side encryption.
Attributes:
key_value (str): Base64-encoded encryption key
key_hash (str): Base64-encoded SHA256 hash of the key
algorithm (str): Encryption algorithm (AES256)
"""
def __init__(
self,
key_value: str,
key_hash: str = None,
algorithm: str = "AES256"
):
"""Initialize customer-provided encryption key."""
class EncryptionScopeOptions:
"""
Encryption scope configuration for server-side encryption.
Attributes:
default_encryption_scope (str): Default encryption scope name
prevent_encryption_scope_override (bool): Whether to prevent scope override
"""
def __init__(
self,
default_encryption_scope: str,
prevent_encryption_scope_override: bool = False
):
"""Initialize encryption scope options."""Classes for configuring account-level service properties including analytics, CORS, and retention policies.
class AnalyticsLogging:
"""
Analytics logging configuration for the storage account.
Attributes:
version (str): Analytics version
delete (bool): Log delete operations
read (bool): Log read operations
write (bool): Log write operations
retention_policy (RetentionPolicy): Log retention policy
"""
def __init__(
self,
version: str = "1.0",
delete: bool = False,
read: bool = False,
write: bool = False,
retention_policy: 'RetentionPolicy' = None
):
"""Initialize analytics logging configuration."""
class Metrics:
"""
Metrics configuration for the storage account.
Attributes:
version (str): Metrics version
enabled (bool): Whether metrics are enabled
include_apis (bool): Include API-level metrics
retention_policy (RetentionPolicy): Metrics retention policy
"""
def __init__(
self,
version: str = "1.0",
enabled: bool = False,
include_apis: bool = None,
retention_policy: 'RetentionPolicy' = None
):
"""Initialize metrics configuration."""
class CorsRule:
"""
Cross-Origin Resource Sharing (CORS) rule configuration.
Attributes:
allowed_origins (List[str]): Allowed origin domains
allowed_methods (List[str]): Allowed HTTP methods
allowed_headers (List[str]): Allowed request headers
exposed_headers (List[str]): Headers exposed to client
max_age_in_seconds (int): Preflight request cache duration
"""
def __init__(
self,
allowed_origins: List[str],
allowed_methods: List[str],
allowed_headers: List[str] = None,
exposed_headers: List[str] = None,
max_age_in_seconds: int = 0
):
"""Initialize CORS rule."""
class RetentionPolicy:
"""
Data retention policy configuration.
Attributes:
enabled (bool): Whether retention policy is enabled
days (int): Number of days to retain data
"""
def __init__(self, enabled: bool = False, days: int = None):
"""Initialize retention policy."""
class StaticWebsite:
"""
Static website hosting configuration.
Attributes:
enabled (bool): Whether static website hosting is enabled
index_document (str): Default index document name
error_document404_path (str): Path to 404 error document
default_index_document_path (str): Default index document path
"""
def __init__(
self,
enabled: bool = False,
index_document: str = None,
error_document404_path: str = None,
default_index_document_path: str = None
):
"""Initialize static website configuration."""Classes for managing access policies, delegation keys, and permission structures.
class AccessPolicy:
"""
Stored access policy for signed identifiers.
Attributes:
permission (str): Permissions granted by the policy
expiry (datetime): Policy expiration time
start (datetime): Policy start time
"""
def __init__(
self,
permission: str = None,
expiry: datetime = None,
start: datetime = None
):
"""Initialize access policy."""
class UserDelegationKey:
"""
User delegation key for generating user delegation SAS tokens.
Attributes:
signed_oid (str): Object ID of the user
signed_tid (str): Tenant ID
signed_start (datetime): Key validity start time
signed_expiry (datetime): Key validity end time
signed_service (str): Storage service
signed_version (str): Service version
value (str): Base64-encoded key value
"""
class LeaseProperties:
"""
Properties of a lease on a resource.
Attributes:
status (str): Lease status (locked/unlocked)
state (str): Lease state (available/leased/expired/breaking/broken)
duration (str): Lease duration (infinite/fixed)
"""Classes for configuring file querying and data serialization formats.
class QuickQueryDialect:
"""
Base class for query dialect configuration.
"""
class DelimitedTextDialect(QuickQueryDialect):
"""
Configuration for CSV/delimited text querying.
Attributes:
delimiter (str): Field delimiter character
quote_char (str): Quote character for fields
escape_char (str): Escape character
line_terminator (str): Line termination character(s)
has_header (bool): Whether first row contains headers
"""
def __init__(
self,
delimiter: str = ",",
quote_char: str = '"',
escape_char: str = "",
line_terminator: str = "\n",
has_header: bool = False
):
"""Initialize delimited text dialect."""
class DelimitedJsonDialect(QuickQueryDialect):
"""
Configuration for JSON Lines querying.
Attributes:
line_terminator (str): Line termination character(s)
"""
def __init__(self, line_terminator: str = "\n"):
"""Initialize JSON dialect."""
class ArrowDialect(QuickQueryDialect):
"""
Configuration for Apache Arrow format querying.
"""
class ArrowType:
"""
Apache Arrow data type specifications.
Attributes:
BOOL: Boolean type
INT8: 8-bit integer type
INT16: 16-bit integer type
INT32: 32-bit integer type
INT64: 64-bit integer type
FLOAT: 32-bit float type
DOUBLE: 64-bit float type
STRING: String type
BINARY: Binary type
TIMESTAMP: Timestamp type
DATE: Date type
"""Enumeration classes and constants used throughout the SDK for type safety and consistency.
class PublicAccess:
"""
Public access levels for file systems.
Attributes:
OFF: No public access
FileSystem: Public read access to file system and paths
Path: Public read access to paths only
"""
OFF = "off"
FileSystem = "container"
Path = "blob"
class LocationMode:
"""
Location modes for geo-redundant storage accounts.
Attributes:
PRIMARY: Primary location
SECONDARY: Secondary location
"""
PRIMARY = "primary"
SECONDARY = "secondary"
class ResourceTypes:
"""
Resource types for account SAS permissions.
Attributes:
service (bool): Service-level resources
container (bool): Container-level resources
object (bool): Object-level resources
"""
def __init__(
self,
service: bool = False,
container: bool = False,
object: bool = False
):
"""Initialize resource types."""
class Services:
"""
Storage services for account SAS permissions.
Attributes:
blob (bool): Blob service
queue (bool): Queue service
table (bool): Table service
file (bool): File service
"""
def __init__(
self,
blob: bool = False,
queue: bool = False,
table: bool = False,
file: bool = False
):
"""Initialize services."""
class StorageErrorCode:
"""
Standard error codes returned by Azure Storage services.
Common error codes include:
ACCOUNT_NOT_FOUND: Storage account not found
AUTHENTICATION_FAILED: Authentication failure
AUTHORIZATION_FAILED: Authorization failure
BLOB_NOT_FOUND: Blob/file not found
CONTAINER_NOT_FOUND: Container/file system not found
INVALID_URI: Invalid request URI
PATH_NOT_FOUND: Path not found
RESOURCE_NOT_FOUND: Resource not found
LEASE_ID_MISMATCH: Lease ID mismatch
LEASE_ALREADY_PRESENT: Lease already exists
"""
VERSION = "12.21.0"
"""The version string of the azure-storage-file-datalake package."""Retry policy classes for handling transient failures and implementing resilient operations.
class ExponentialRetry:
"""
Exponential backoff retry policy for Azure Storage operations.
Implements exponential backoff with jitter for handling transient failures.
The delay between retries increases exponentially with each attempt.
"""
def __init__(
self,
initial_backoff: int = 15,
increment_base: int = 3,
retry_total: int = 3,
retry_to_secondary: bool = False,
random_jitter_range: int = 3,
**kwargs
):
"""
Initialize exponential retry policy.
Args:
initial_backoff (int): Initial backoff interval in seconds
increment_base (int): Backoff increment base for exponential calculation
retry_total (int): Total number of retry attempts
retry_to_secondary (bool): Whether to retry to secondary location
random_jitter_range (int): Random jitter range in seconds
**kwargs: Additional configuration options
"""
class LinearRetry:
"""
Linear backoff retry policy for Azure Storage operations.
Implements linear backoff where the delay between retries increases
linearly with each attempt.
"""
def __init__(
self,
backoff: int = 15,
retry_total: int = 3,
retry_to_secondary: bool = False,
random_jitter_range: int = 3,
**kwargs
):
"""
Initialize linear retry policy.
Args:
backoff (int): Backoff interval in seconds between retries
retry_total (int): Total number of retry attempts
retry_to_secondary (bool): Whether to retry to secondary location
random_jitter_range (int): Random jitter range in seconds
**kwargs: Additional configuration options
"""Classes for handling paginated results and query responses.
class FileSystemPropertiesPaged:
"""
Paged result container for file system listings.
Provides iteration over FileSystemProperties objects with
automatic handling of result pagination.
"""
def __iter__(self) -> Iterator[FileSystemProperties]:
"""Iterate over file system properties."""
def by_page(self) -> Iterator[List[FileSystemProperties]]:
"""Iterate page by page."""
class ItemPaged:
"""
Generic paged result container for iterable collections.
Type Parameters:
T: Type of items in the collection (PathProperties, etc.)
"""
def __iter__(self) -> Iterator:
"""Iterate over items."""
def by_page(self) -> Iterator[List]:
"""Iterate page by page."""
class DataLakeFileQueryError:
"""
Error information from file query operations.
Attributes:
error (str): Error description
is_fatal (bool): Whether the error is fatal
description (str): Detailed error description
position (int): Position in the query where error occurred
"""
class DataLakeFileQueryReader:
"""
Reader for streaming query results from file query operations.
Provides methods to read query results as streams, similar to StorageStreamDownloader
but specifically for query operations.
"""
def readall(self) -> bytes:
"""
Read all query results.
Returns:
bytes: Complete query results
"""
def readinto(self, stream) -> int:
"""
Read query results into a stream.
Args:
stream: Target stream to write query results
Returns:
int: Number of bytes read
"""Usage Examples:
from azure.storage.filedatalake import (
DataLakeServiceClient,
ContentSettings,
PublicAccess,
DelimitedTextDialect,
CustomerProvidedEncryptionKey
)
# Create service client
service_client = DataLakeServiceClient(
account_url="https://mystorageaccount.dfs.core.windows.net",
credential="<account_key>"
)
# Create file system with custom properties
fs_client = service_client.create_file_system(
"analytics-data",
metadata={"department": "data-science", "project": "ml-pipeline"},
public_access=PublicAccess.OFF
)
# Upload file with content settings
file_client = fs_client.create_file("data/results.csv")
content_settings = ContentSettings(
content_type="text/csv",
content_encoding="utf-8",
cache_control="max-age=3600"
)
# Customer-provided encryption
cpk = CustomerProvidedEncryptionKey(
key_value="<base64_key>",
key_hash="<base64_hash>"
)
file_client.upload_data(
"col1,col2,col3\nval1,val2,val3",
content_settings=content_settings,
customer_provided_encryption_key=cpk,
metadata={"format": "csv", "version": "1.0"}
)
# Query CSV file with custom dialect
csv_dialect = DelimitedTextDialect(
delimiter=",",
quote_char='"',
has_header=True,
line_terminator="\n"
)
query_result = file_client.query_file(
"SELECT col1, col2 FROM BlobStorage WHERE col3 = 'val3'",
file_format=csv_dialect
)
# Process results
with query_result as stream:
data = stream.readall().decode()
print(f"Query results: {data}")
# List file systems with properties
for fs_props in service_client.list_file_systems(include_metadata=True):
print(f"File System: {fs_props.name}")
print(f" Last Modified: {fs_props.last_modified}")
print(f" Metadata: {fs_props.metadata}")
print(f" Public Access: {fs_props.public_access}")Install with Tessl CLI
npx tessl i tessl/pypi-azure-storage-file-datalakedocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10