CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-azure-storage-file-datalake

Microsoft Azure File DataLake Storage Client Library for Python

Overall
score

92%

Overview
Eval results
Files

directory-operations.mddocs/

Directory Operations

Directory-specific operations for managing subdirectories, files, and access control lists within hierarchical structures. The DataLakeDirectoryClient provides comprehensive directory management capabilities including ACL operations and path manipulations.

Capabilities

DataLakeDirectoryClient

Client to interact with a specific directory, providing operations for managing directory contents, access control, and hierarchical operations. Inherits path-based operations from the underlying PathClient.

class DataLakeDirectoryClient:
    """
    A client to interact with a specific directory in Azure Data Lake Storage Gen2.
    
    Attributes:
        url (str): The full endpoint URL to the directory, including SAS token if used
        primary_endpoint (str): The full primary endpoint URL
        primary_hostname (str): The hostname of the primary endpoint
        file_system_name (str): Name of the file system
        path_name (str): Path to the directory
    """
    
    def __init__(
        self,
        account_url: str,
        file_system_name: str,
        directory_name: str,
        credential=None,
        **kwargs
    ):
        """
        Initialize the DataLakeDirectoryClient.
        
        Args:
            account_url (str): The URL to the DataLake storage account
            file_system_name (str): Name of the file system
            directory_name (str): Name/path of the directory
            credential: Authentication credential
            **kwargs: Additional client configuration options
        """
    
    @classmethod
    def from_connection_string(
        cls,
        conn_str: str,
        file_system_name: str,
        directory_name: str,
        credential=None,
        **kwargs
    ) -> 'DataLakeDirectoryClient':
        """
        Create DataLakeDirectoryClient from connection string.
        
        Args:
            conn_str (str): Connection string for the storage account
            file_system_name (str): Name of the file system
            directory_name (str): Name/path of the directory
            credential: Optional credential to override connection string auth
            **kwargs: Additional client configuration options
            
        Returns:
            DataLakeDirectoryClient: The directory client instance
        """

Usage Examples:

from azure.storage.filedatalake import DataLakeDirectoryClient

# Create client directly
directory_client = DataLakeDirectoryClient(
    account_url="https://mystorageaccount.dfs.core.windows.net",
    file_system_name="myfilesystem",
    directory_name="data/analytics",
    credential="<account_key>"
)

# From connection string
directory_client = DataLakeDirectoryClient.from_connection_string(
    "DefaultEndpointsProtocol=https;AccountName=mystorageaccount;AccountKey=<key>",
    file_system_name="myfilesystem",
    directory_name="data/analytics"
)

Directory Management

Core operations for creating, deleting, and managing the directory itself.

def create_directory(self, **kwargs) -> Dict[str, Any]:
    """
    Create the directory.
    
    Args:
        content_settings (ContentSettings, optional): Content settings for the directory
        metadata (dict, optional): Metadata key-value pairs
        permissions (str, optional): POSIX permissions in octal format
        umask (str, optional): POSIX umask for permission calculation
        **kwargs: Additional options including conditions and CPK
        
    Returns:
        dict: Directory creation response headers including etag and last_modified
    """

def delete_directory(self, **kwargs) -> None:
    """
    Delete the directory.
    
    Args:
        recursive (bool): If True, delete directory and all its contents
        **kwargs: Additional options including conditions
    """

def exists(self, **kwargs) -> bool:
    """
    Check if the directory exists.
    
    Args:
        **kwargs: Additional options
        
    Returns:
        bool: True if directory exists, False otherwise
    """

def get_directory_properties(self, **kwargs) -> DirectoryProperties:
    """
    Get directory properties and metadata.
    
    Args:
        **kwargs: Additional options including conditions and user principal names
        
    Returns:
        DirectoryProperties: Properties of the directory including metadata, etag, permissions
    """

def rename_directory(
    self,
    new_name: str,
    **kwargs
) -> DataLakeDirectoryClient:
    """
    Rename the directory.
    
    Args:
        new_name (str): New name/path for the directory
        content_settings (ContentSettings, optional): Content settings for renamed directory
        metadata (dict, optional): Metadata for renamed directory
        **kwargs: Additional options including conditions
        
    Returns:
        DataLakeDirectoryClient: Client for the renamed directory
    """

Subdirectory Operations

Operations for creating and managing subdirectories within the current directory.

def create_sub_directory(
    self,
    sub_directory: Union[DirectoryProperties, str],
    metadata: Dict[str, str] = None,
    **kwargs
) -> DataLakeDirectoryClient:
    """
    Create a subdirectory within the current directory.
    
    Args:
        sub_directory: Name of the subdirectory or DirectoryProperties object
        metadata (dict, optional): Metadata key-value pairs
        **kwargs: Additional options including permissions, umask, and conditions
        
    Returns:
        DataLakeDirectoryClient: Client for the created subdirectory
    """

def delete_sub_directory(
    self,
    sub_directory: Union[DirectoryProperties, str],
    **kwargs
) -> DataLakeDirectoryClient:
    """
    Delete a subdirectory from the current directory.
    
    Args:
        sub_directory: Name of the subdirectory or DirectoryProperties object
        **kwargs: Additional options including recursive delete and conditions
        
    Returns:
        DataLakeDirectoryClient: Client for the deleted subdirectory
    """

def get_sub_directory_client(
    self,
    sub_directory: Union[DirectoryProperties, str]
) -> DataLakeDirectoryClient:
    """
    Get a DataLakeDirectoryClient for a subdirectory.
    
    Args:
        sub_directory: Name of the subdirectory or DirectoryProperties object
        
    Returns:
        DataLakeDirectoryClient: Client for the specified subdirectory
    """

File Operations

Operations for creating and managing files within the directory.

def create_file(
    self,
    file: Union[FileProperties, str],
    **kwargs
) -> DataLakeFileClient:
    """
    Create a file in the directory.
    
    Args:
        file: Name of the file or FileProperties object
        content_settings (ContentSettings, optional): Content settings for the file
        metadata (dict, optional): Metadata key-value pairs
        **kwargs: Additional options including permissions, umask, and conditions
        
    Returns:
        DataLakeFileClient: Client for the created file
    """

def get_file_client(
    self,
    file: Union[FileProperties, str]
) -> DataLakeFileClient:
    """
    Get a DataLakeFileClient for a file within the directory.
    
    Args:
        file: Name of the file or FileProperties object
        
    Returns:
        DataLakeFileClient: Client for the specified file
    """

Path Listing

Operations for listing contents within the directory hierarchy.

def get_paths(
    self,
    recursive: bool = True,
    max_results: int = None,
    **kwargs
) -> ItemPaged[PathProperties]:
    """
    List paths within the directory.
    
    Args:
        recursive (bool): Whether to list recursively through subdirectories
        max_results (int, optional): Maximum number of results per page
        **kwargs: Additional options including upn (user principal names)
        
    Returns:
        ItemPaged[PathProperties]: Paged list of path properties within the directory
    """

Access Control Management

Operations for managing POSIX-style access control lists (ACLs) and permissions.

def get_access_control(self, **kwargs) -> Dict[str, Any]:
    """
    Get access control properties for the directory.
    
    Args:
        upn (bool, optional): Return user principal names instead of object IDs
        **kwargs: Additional options including conditions
        
    Returns:
        dict: Access control information including ACL, group, owner, permissions
    """

def set_access_control(
    self,
    owner: str = None,
    group: str = None,
    permissions: str = None,
    acl: str = None,
    **kwargs
) -> Dict[str, Any]:
    """
    Set access control properties for the directory.
    
    Args:
        owner (str, optional): Owner user ID or principal name
        group (str, optional): Owning group ID or principal name  
        permissions (str, optional): POSIX permissions in octal format
        acl (str, optional): Access control list in POSIX format
        **kwargs: Additional options including conditions
        
    Returns:
        dict: Response headers including etag and last_modified
    """

def set_access_control_recursive(
    self,
    acl: str,
    **kwargs
) -> AccessControlChangeResult:
    """
    Set access control recursively on the directory and its contents.
    
    Args:
        acl (str): Access control list in POSIX format
        batch_size (int, optional): Number of paths to process per batch
        max_batches (int, optional): Maximum number of batches to process
        continue_on_failure (bool, optional): Continue processing on individual failures
        **kwargs: Additional options
        
    Returns:
        AccessControlChangeResult: Result including counters and failure information
    """

def update_access_control_recursive(
    self,
    acl: str,
    **kwargs
) -> AccessControlChangeResult:
    """
    Update access control recursively on the directory and its contents.
    
    Args:
        acl (str): Access control list in POSIX format
        batch_size (int, optional): Number of paths to process per batch
        max_batches (int, optional): Maximum number of batches to process
        continue_on_failure (bool, optional): Continue processing on individual failures
        **kwargs: Additional options
        
    Returns:
        AccessControlChangeResult: Result including counters and failure information
    """

def remove_access_control_recursive(
    self,
    acl: str,
    **kwargs
) -> AccessControlChangeResult:
    """
    Remove access control recursively from the directory and its contents.
    
    Args:
        acl (str): Access control list entries to remove in POSIX format
        batch_size (int, optional): Number of paths to process per batch
        max_batches (int, optional): Maximum number of batches to process
        continue_on_failure (bool, optional): Continue processing on individual failures
        **kwargs: Additional options
        
    Returns:
        AccessControlChangeResult: Result including counters and failure information
    """

Usage Examples:

from azure.storage.filedatalake import DataLakeDirectoryClient

# Create a directory client
directory_client = DataLakeDirectoryClient(
    account_url="https://mystorageaccount.dfs.core.windows.net",
    file_system_name="myfilesystem", 
    directory_name="data/analytics",
    credential="<account_key>"
)

# Create the directory with metadata and permissions
directory_client.create_directory(
    metadata={"purpose": "analytics", "team": "data-science"},
    permissions="0755"  # rwxr-xr-x
)

# Create subdirectories
raw_dir = directory_client.create_sub_directory("raw")
processed_dir = directory_client.create_sub_directory("processed")

# List all paths in the directory
paths = directory_client.get_paths(recursive=True)
for path in paths:
    print(f"Path: {path.name}, Size: {path.content_length if not path.is_directory else 'N/A'}")

# Set access control with POSIX ACLs
directory_client.set_access_control(
    owner="user1",
    group="datagroup", 
    permissions="0755",
    acl="user::rwx,group::r-x,other::r-x,user:analyst1:rwx"
)

# Apply ACLs recursively to all contents
acl_result = directory_client.set_access_control_recursive(
    acl="user::rwx,group::r-x,other::r-x,user:analyst1:rwx",
    continue_on_failure=True
)
print(f"ACL changes: {acl_result.counters.directories_successful} directories, "
      f"{acl_result.counters.files_successful} files")

# Rename the directory
new_directory_client = directory_client.rename_directory("data/analytics-v2")

Install with Tessl CLI

npx tessl i tessl/pypi-azure-storage-file-datalake

docs

access-control-security.md

directory-operations.md

file-operations.md

file-system-operations.md

index.md

models-types.md

service-operations.md

tile.json