CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-azure-storage-file-datalake

Microsoft Azure File DataLake Storage Client Library for Python

Overall
score

92%

Overview
Eval results
Files

models-types.mddocs/

Models and Types

Core data models, properties, permissions, and configuration classes used throughout the Azure Storage File DataLake SDK. These types provide structured representations of resources, metadata, and operational results.

Capabilities

Resource Properties

Core property classes that represent the state and metadata of Data Lake Storage resources.

class FileSystemProperties:
    """
    Properties of a file system.
    
    Attributes:
        name (str): Name of the file system
        last_modified (datetime): Last modified timestamp
        etag (str): ETag of the file system
        lease_status (str): Current lease status
        lease_state (str): Current lease state
        lease_duration (str): Lease duration type
        public_access (PublicAccess): Public access level
        has_immutability_policy (bool): Whether immutability policy is set
        has_legal_hold (bool): Whether legal hold is active
        metadata (Dict[str, str]): User-defined metadata
        encryption_scope (EncryptionScopeOptions): Default encryption scope
        deleted_time (datetime): Deletion timestamp (for soft-deleted file systems)
        remaining_retention_days (int): Days remaining in retention period
    """

class DirectoryProperties:
    """
    Properties of a directory.
    
    Attributes:
        name (str): Name/path of the directory
        last_modified (datetime): Last modified timestamp
        etag (str): ETag of the directory
        permissions (str): POSIX permissions in octal format
        owner (str): Owner ID or principal name
        group (str): Group ID or principal name
        acl (str): Access control list in POSIX format
        lease_status (str): Current lease status
        lease_state (str): Current lease state
        lease_duration (str): Lease duration type
        metadata (Dict[str, str]): User-defined metadata
    """

class FileProperties:
    """
    Properties of a file.
    
    Attributes:
        name (str): Name/path of the file
        size (int): Size of the file in bytes
        last_modified (datetime): Last modified timestamp
        etag (str): ETag of the file
        permissions (str): POSIX permissions in octal format
        owner (str): Owner ID or principal name
        group (str): Group ID or principal name
        acl (str): Access control list in POSIX format
        lease_status (str): Current lease status
        lease_state (str): Current lease state
        lease_duration (str): Lease duration type
        content_settings (ContentSettings): Content-related settings
        metadata (Dict[str, str]): User-defined metadata
        creation_time (datetime): File creation timestamp
        expiry_time (datetime): File expiration timestamp
        encryption_context (str): Encryption context
    """

class PathProperties:
    """
    Properties of a path (file or directory).
    
    Attributes:
        name (str): Name/path of the item
        last_modified (datetime): Last modified timestamp
        etag (str): ETag of the item
        content_length (int): Size in bytes (0 for directories)
        is_directory (bool): Whether the path is a directory
        owner (str): Owner ID or principal name
        group (str): Group ID or principal name
        permissions (str): POSIX permissions in octal format
        acl (str): Access control list in POSIX format
        metadata (Dict[str, str]): User-defined metadata
        creation_time (datetime): Creation timestamp
        expiry_time (datetime): Expiration timestamp
        encryption_context (str): Encryption context
    """

class DeletedPathProperties:
    """
    Properties of a soft-deleted path.
    
    Attributes:
        name (str): Name of the deleted path
        deleted_time (datetime): Deletion timestamp
        remaining_retention_days (int): Days remaining in retention period
        deletion_id (str): Unique identifier for the deletion
    """

Content and Configuration

Classes for managing content settings, metadata, and operational configurations.

class ContentSettings:
    """
    Content settings for files including MIME type and encoding information.
    
    Attributes:
        content_type (str): MIME type of the content
        content_encoding (str): Content encoding (e.g., 'gzip')
        content_language (str): Content language (e.g., 'en-US') 
        content_disposition (str): Content disposition header
        cache_control (str): Cache control directives
        content_md5 (bytes): MD5 hash of the content
    """
    
    def __init__(
        self,
        content_type: str = None,
        content_encoding: str = None,
        content_language: str = None,
        content_disposition: str = None,
        cache_control: str = None,
        content_md5: bytes = None
    ):
        """Initialize content settings."""

class CustomerProvidedEncryptionKey:
    """
    Customer-provided encryption key for client-side encryption.
    
    Attributes:
        key_value (str): Base64-encoded encryption key
        key_hash (str): Base64-encoded SHA256 hash of the key
        algorithm (str): Encryption algorithm (AES256)
    """
    
    def __init__(
        self,
        key_value: str,
        key_hash: str = None,
        algorithm: str = "AES256"
    ):
        """Initialize customer-provided encryption key."""

class EncryptionScopeOptions:
    """
    Encryption scope configuration for server-side encryption.
    
    Attributes:
        default_encryption_scope (str): Default encryption scope name
        prevent_encryption_scope_override (bool): Whether to prevent scope override
    """
    
    def __init__(
        self,
        default_encryption_scope: str,
        prevent_encryption_scope_override: bool = False
    ):
        """Initialize encryption scope options."""

Service Configuration

Classes for configuring account-level service properties including analytics, CORS, and retention policies.

class AnalyticsLogging:
    """
    Analytics logging configuration for the storage account.
    
    Attributes:
        version (str): Analytics version
        delete (bool): Log delete operations
        read (bool): Log read operations
        write (bool): Log write operations
        retention_policy (RetentionPolicy): Log retention policy
    """
    
    def __init__(
        self,
        version: str = "1.0",
        delete: bool = False,
        read: bool = False,
        write: bool = False,
        retention_policy: 'RetentionPolicy' = None
    ):
        """Initialize analytics logging configuration."""

class Metrics:
    """
    Metrics configuration for the storage account.
    
    Attributes:
        version (str): Metrics version
        enabled (bool): Whether metrics are enabled
        include_apis (bool): Include API-level metrics
        retention_policy (RetentionPolicy): Metrics retention policy
    """
    
    def __init__(
        self,
        version: str = "1.0",
        enabled: bool = False,
        include_apis: bool = None,
        retention_policy: 'RetentionPolicy' = None
    ):
        """Initialize metrics configuration."""

class CorsRule:
    """
    Cross-Origin Resource Sharing (CORS) rule configuration.
    
    Attributes:
        allowed_origins (List[str]): Allowed origin domains
        allowed_methods (List[str]): Allowed HTTP methods
        allowed_headers (List[str]): Allowed request headers
        exposed_headers (List[str]): Headers exposed to client
        max_age_in_seconds (int): Preflight request cache duration
    """
    
    def __init__(
        self,
        allowed_origins: List[str],
        allowed_methods: List[str],
        allowed_headers: List[str] = None,
        exposed_headers: List[str] = None,
        max_age_in_seconds: int = 0
    ):
        """Initialize CORS rule."""

class RetentionPolicy:
    """
    Data retention policy configuration.
    
    Attributes:
        enabled (bool): Whether retention policy is enabled
        days (int): Number of days to retain data
    """
    
    def __init__(self, enabled: bool = False, days: int = None):
        """Initialize retention policy."""

class StaticWebsite:
    """
    Static website hosting configuration.
    
    Attributes:
        enabled (bool): Whether static website hosting is enabled
        index_document (str): Default index document name
        error_document404_path (str): Path to 404 error document
        default_index_document_path (str): Default index document path
    """
    
    def __init__(
        self,
        enabled: bool = False,
        index_document: str = None,
        error_document404_path: str = None,
        default_index_document_path: str = None
    ):
        """Initialize static website configuration."""

Access Control and Policies

Classes for managing access policies, delegation keys, and permission structures.

class AccessPolicy:
    """
    Stored access policy for signed identifiers.
    
    Attributes:
        permission (str): Permissions granted by the policy
        expiry (datetime): Policy expiration time
        start (datetime): Policy start time
    """
    
    def __init__(
        self,
        permission: str = None,
        expiry: datetime = None,
        start: datetime = None
    ):
        """Initialize access policy."""

class UserDelegationKey:
    """
    User delegation key for generating user delegation SAS tokens.
    
    Attributes:
        signed_oid (str): Object ID of the user
        signed_tid (str): Tenant ID
        signed_start (datetime): Key validity start time
        signed_expiry (datetime): Key validity end time
        signed_service (str): Storage service
        signed_version (str): Service version
        value (str): Base64-encoded key value
    """

class LeaseProperties:
    """
    Properties of a lease on a resource.
    
    Attributes:
        status (str): Lease status (locked/unlocked)
        state (str): Lease state (available/leased/expired/breaking/broken)
        duration (str): Lease duration (infinite/fixed)
    """

Query and Serialization

Classes for configuring file querying and data serialization formats.

class QuickQueryDialect:
    """
    Base class for query dialect configuration.
    """

class DelimitedTextDialect(QuickQueryDialect):
    """
    Configuration for CSV/delimited text querying.
    
    Attributes:
        delimiter (str): Field delimiter character
        quote_char (str): Quote character for fields
        escape_char (str): Escape character
        line_terminator (str): Line termination character(s)
        has_header (bool): Whether first row contains headers
    """
    
    def __init__(
        self,
        delimiter: str = ",",
        quote_char: str = '"',
        escape_char: str = "",
        line_terminator: str = "\n",
        has_header: bool = False
    ):
        """Initialize delimited text dialect."""

class DelimitedJsonDialect(QuickQueryDialect):
    """
    Configuration for JSON Lines querying.
    
    Attributes:
        line_terminator (str): Line termination character(s)
    """
    
    def __init__(self, line_terminator: str = "\n"):
        """Initialize JSON dialect."""

class ArrowDialect(QuickQueryDialect):
    """
    Configuration for Apache Arrow format querying.
    """

class ArrowType:
    """
    Apache Arrow data type specifications.
    
    Attributes:
        BOOL: Boolean type
        INT8: 8-bit integer type  
        INT16: 16-bit integer type
        INT32: 32-bit integer type
        INT64: 64-bit integer type
        FLOAT: 32-bit float type
        DOUBLE: 64-bit float type
        STRING: String type
        BINARY: Binary type
        TIMESTAMP: Timestamp type
        DATE: Date type
    """

Enumerations and Constants

Enumeration classes and constants used throughout the SDK for type safety and consistency.

class PublicAccess:
    """
    Public access levels for file systems.
    
    Attributes:
        OFF: No public access
        FileSystem: Public read access to file system and paths
        Path: Public read access to paths only
    """
    OFF = "off"
    FileSystem = "container"
    Path = "blob"

class LocationMode:
    """
    Location modes for geo-redundant storage accounts.
    
    Attributes:
        PRIMARY: Primary location
        SECONDARY: Secondary location
    """
    PRIMARY = "primary"
    SECONDARY = "secondary"

class ResourceTypes:
    """
    Resource types for account SAS permissions.
    
    Attributes:
        service (bool): Service-level resources
        container (bool): Container-level resources
        object (bool): Object-level resources
    """
    
    def __init__(
        self,
        service: bool = False,
        container: bool = False,
        object: bool = False
    ):
        """Initialize resource types."""

class Services:
    """
    Storage services for account SAS permissions.
    
    Attributes:
        blob (bool): Blob service
        queue (bool): Queue service
        table (bool): Table service
        file (bool): File service
    """
    
    def __init__(
        self,
        blob: bool = False,
        queue: bool = False,
        table: bool = False,
        file: bool = False
    ):
        """Initialize services."""

class StorageErrorCode:
    """
    Standard error codes returned by Azure Storage services.
    
    Common error codes include:
        ACCOUNT_NOT_FOUND: Storage account not found
        AUTHENTICATION_FAILED: Authentication failure
        AUTHORIZATION_FAILED: Authorization failure
        BLOB_NOT_FOUND: Blob/file not found
        CONTAINER_NOT_FOUND: Container/file system not found
        INVALID_URI: Invalid request URI
        PATH_NOT_FOUND: Path not found
        RESOURCE_NOT_FOUND: Resource not found
        LEASE_ID_MISMATCH: Lease ID mismatch
        LEASE_ALREADY_PRESENT: Lease already exists
    """

VERSION = "12.21.0"
"""The version string of the azure-storage-file-datalake package."""

Retry Policies

Retry policy classes for handling transient failures and implementing resilient operations.

class ExponentialRetry:
    """
    Exponential backoff retry policy for Azure Storage operations.
    
    Implements exponential backoff with jitter for handling transient failures.
    The delay between retries increases exponentially with each attempt.
    """
    
    def __init__(
        self,
        initial_backoff: int = 15,
        increment_base: int = 3,
        retry_total: int = 3,
        retry_to_secondary: bool = False,
        random_jitter_range: int = 3,
        **kwargs
    ):
        """
        Initialize exponential retry policy.
        
        Args:
            initial_backoff (int): Initial backoff interval in seconds
            increment_base (int): Backoff increment base for exponential calculation
            retry_total (int): Total number of retry attempts
            retry_to_secondary (bool): Whether to retry to secondary location
            random_jitter_range (int): Random jitter range in seconds
            **kwargs: Additional configuration options
        """

class LinearRetry:
    """
    Linear backoff retry policy for Azure Storage operations.
    
    Implements linear backoff where the delay between retries increases
    linearly with each attempt.
    """
    
    def __init__(
        self,
        backoff: int = 15,
        retry_total: int = 3,
        retry_to_secondary: bool = False,
        random_jitter_range: int = 3,
        **kwargs
    ):
        """
        Initialize linear retry policy.
        
        Args:
            backoff (int): Backoff interval in seconds between retries
            retry_total (int): Total number of retry attempts
            retry_to_secondary (bool): Whether to retry to secondary location
            random_jitter_range (int): Random jitter range in seconds
            **kwargs: Additional configuration options
        """

Paging and Results

Classes for handling paginated results and query responses.

class FileSystemPropertiesPaged:
    """
    Paged result container for file system listings.
    
    Provides iteration over FileSystemProperties objects with
    automatic handling of result pagination.
    """
    
    def __iter__(self) -> Iterator[FileSystemProperties]:
        """Iterate over file system properties."""
    
    def by_page(self) -> Iterator[List[FileSystemProperties]]:
        """Iterate page by page."""

class ItemPaged:
    """
    Generic paged result container for iterable collections.
    
    Type Parameters:
        T: Type of items in the collection (PathProperties, etc.)
    """
    
    def __iter__(self) -> Iterator:
        """Iterate over items."""
    
    def by_page(self) -> Iterator[List]:
        """Iterate page by page."""

class DataLakeFileQueryError:
    """
    Error information from file query operations.
    
    Attributes:
        error (str): Error description
        is_fatal (bool): Whether the error is fatal
        description (str): Detailed error description
        position (int): Position in the query where error occurred
    """

class DataLakeFileQueryReader:
    """
    Reader for streaming query results from file query operations.
    
    Provides methods to read query results as streams, similar to StorageStreamDownloader
    but specifically for query operations.
    """
    
    def readall(self) -> bytes:
        """
        Read all query results.
        
        Returns:
            bytes: Complete query results
        """
    
    def readinto(self, stream) -> int:
        """
        Read query results into a stream.
        
        Args:
            stream: Target stream to write query results
            
        Returns:
            int: Number of bytes read
        """

Usage Examples:

from azure.storage.filedatalake import (
    DataLakeServiceClient,
    ContentSettings,
    PublicAccess,
    DelimitedTextDialect,
    CustomerProvidedEncryptionKey
)

# Create service client
service_client = DataLakeServiceClient(
    account_url="https://mystorageaccount.dfs.core.windows.net",  
    credential="<account_key>"
)

# Create file system with custom properties
fs_client = service_client.create_file_system(
    "analytics-data",
    metadata={"department": "data-science", "project": "ml-pipeline"},
    public_access=PublicAccess.OFF
)

# Upload file with content settings
file_client = fs_client.create_file("data/results.csv")

content_settings = ContentSettings(
    content_type="text/csv",
    content_encoding="utf-8",
    cache_control="max-age=3600"
)

# Customer-provided encryption
cpk = CustomerProvidedEncryptionKey(
    key_value="<base64_key>",
    key_hash="<base64_hash>"
)

file_client.upload_data(
    "col1,col2,col3\nval1,val2,val3",
    content_settings=content_settings,
    customer_provided_encryption_key=cpk,
    metadata={"format": "csv", "version": "1.0"}
)

# Query CSV file with custom dialect
csv_dialect = DelimitedTextDialect(
    delimiter=",",
    quote_char='"',
    has_header=True,
    line_terminator="\n"
)

query_result = file_client.query_file(
    "SELECT col1, col2 FROM BlobStorage WHERE col3 = 'val3'",
    file_format=csv_dialect
)

# Process results
with query_result as stream:
    data = stream.readall().decode()
    print(f"Query results: {data}")

# List file systems with properties
for fs_props in service_client.list_file_systems(include_metadata=True):
    print(f"File System: {fs_props.name}")
    print(f"  Last Modified: {fs_props.last_modified}")
    print(f"  Metadata: {fs_props.metadata}")
    print(f"  Public Access: {fs_props.public_access}")

Install with Tessl CLI

npx tessl i tessl/pypi-azure-storage-file-datalake

docs

access-control-security.md

directory-operations.md

file-operations.md

file-system-operations.md

index.md

models-types.md

service-operations.md

tile.json