Microsoft Azure File DataLake Storage Client Library for Python
Overall
score
92%
Directory-specific operations for managing subdirectories, files, and access control lists within hierarchical structures. The DataLakeDirectoryClient provides comprehensive directory management capabilities including ACL operations and path manipulations.
Client to interact with a specific directory, providing operations for managing directory contents, access control, and hierarchical operations. Inherits path-based operations from the underlying PathClient.
class DataLakeDirectoryClient:
"""
A client to interact with a specific directory in Azure Data Lake Storage Gen2.
Attributes:
url (str): The full endpoint URL to the directory, including SAS token if used
primary_endpoint (str): The full primary endpoint URL
primary_hostname (str): The hostname of the primary endpoint
file_system_name (str): Name of the file system
path_name (str): Path to the directory
"""
def __init__(
self,
account_url: str,
file_system_name: str,
directory_name: str,
credential=None,
**kwargs
):
"""
Initialize the DataLakeDirectoryClient.
Args:
account_url (str): The URL to the DataLake storage account
file_system_name (str): Name of the file system
directory_name (str): Name/path of the directory
credential: Authentication credential
**kwargs: Additional client configuration options
"""
@classmethod
def from_connection_string(
cls,
conn_str: str,
file_system_name: str,
directory_name: str,
credential=None,
**kwargs
) -> 'DataLakeDirectoryClient':
"""
Create DataLakeDirectoryClient from connection string.
Args:
conn_str (str): Connection string for the storage account
file_system_name (str): Name of the file system
directory_name (str): Name/path of the directory
credential: Optional credential to override connection string auth
**kwargs: Additional client configuration options
Returns:
DataLakeDirectoryClient: The directory client instance
"""Usage Examples:
from azure.storage.filedatalake import DataLakeDirectoryClient
# Create client directly
directory_client = DataLakeDirectoryClient(
account_url="https://mystorageaccount.dfs.core.windows.net",
file_system_name="myfilesystem",
directory_name="data/analytics",
credential="<account_key>"
)
# From connection string
directory_client = DataLakeDirectoryClient.from_connection_string(
"DefaultEndpointsProtocol=https;AccountName=mystorageaccount;AccountKey=<key>",
file_system_name="myfilesystem",
directory_name="data/analytics"
)Core operations for creating, deleting, and managing the directory itself.
def create_directory(self, **kwargs) -> Dict[str, Any]:
"""
Create the directory.
Args:
content_settings (ContentSettings, optional): Content settings for the directory
metadata (dict, optional): Metadata key-value pairs
permissions (str, optional): POSIX permissions in octal format
umask (str, optional): POSIX umask for permission calculation
**kwargs: Additional options including conditions and CPK
Returns:
dict: Directory creation response headers including etag and last_modified
"""
def delete_directory(self, **kwargs) -> None:
"""
Delete the directory.
Args:
recursive (bool): If True, delete directory and all its contents
**kwargs: Additional options including conditions
"""
def exists(self, **kwargs) -> bool:
"""
Check if the directory exists.
Args:
**kwargs: Additional options
Returns:
bool: True if directory exists, False otherwise
"""
def get_directory_properties(self, **kwargs) -> DirectoryProperties:
"""
Get directory properties and metadata.
Args:
**kwargs: Additional options including conditions and user principal names
Returns:
DirectoryProperties: Properties of the directory including metadata, etag, permissions
"""
def rename_directory(
self,
new_name: str,
**kwargs
) -> DataLakeDirectoryClient:
"""
Rename the directory.
Args:
new_name (str): New name/path for the directory
content_settings (ContentSettings, optional): Content settings for renamed directory
metadata (dict, optional): Metadata for renamed directory
**kwargs: Additional options including conditions
Returns:
DataLakeDirectoryClient: Client for the renamed directory
"""Operations for creating and managing subdirectories within the current directory.
def create_sub_directory(
self,
sub_directory: Union[DirectoryProperties, str],
metadata: Dict[str, str] = None,
**kwargs
) -> DataLakeDirectoryClient:
"""
Create a subdirectory within the current directory.
Args:
sub_directory: Name of the subdirectory or DirectoryProperties object
metadata (dict, optional): Metadata key-value pairs
**kwargs: Additional options including permissions, umask, and conditions
Returns:
DataLakeDirectoryClient: Client for the created subdirectory
"""
def delete_sub_directory(
self,
sub_directory: Union[DirectoryProperties, str],
**kwargs
) -> DataLakeDirectoryClient:
"""
Delete a subdirectory from the current directory.
Args:
sub_directory: Name of the subdirectory or DirectoryProperties object
**kwargs: Additional options including recursive delete and conditions
Returns:
DataLakeDirectoryClient: Client for the deleted subdirectory
"""
def get_sub_directory_client(
self,
sub_directory: Union[DirectoryProperties, str]
) -> DataLakeDirectoryClient:
"""
Get a DataLakeDirectoryClient for a subdirectory.
Args:
sub_directory: Name of the subdirectory or DirectoryProperties object
Returns:
DataLakeDirectoryClient: Client for the specified subdirectory
"""Operations for creating and managing files within the directory.
def create_file(
self,
file: Union[FileProperties, str],
**kwargs
) -> DataLakeFileClient:
"""
Create a file in the directory.
Args:
file: Name of the file or FileProperties object
content_settings (ContentSettings, optional): Content settings for the file
metadata (dict, optional): Metadata key-value pairs
**kwargs: Additional options including permissions, umask, and conditions
Returns:
DataLakeFileClient: Client for the created file
"""
def get_file_client(
self,
file: Union[FileProperties, str]
) -> DataLakeFileClient:
"""
Get a DataLakeFileClient for a file within the directory.
Args:
file: Name of the file or FileProperties object
Returns:
DataLakeFileClient: Client for the specified file
"""Operations for listing contents within the directory hierarchy.
def get_paths(
self,
recursive: bool = True,
max_results: int = None,
**kwargs
) -> ItemPaged[PathProperties]:
"""
List paths within the directory.
Args:
recursive (bool): Whether to list recursively through subdirectories
max_results (int, optional): Maximum number of results per page
**kwargs: Additional options including upn (user principal names)
Returns:
ItemPaged[PathProperties]: Paged list of path properties within the directory
"""Operations for managing POSIX-style access control lists (ACLs) and permissions.
def get_access_control(self, **kwargs) -> Dict[str, Any]:
"""
Get access control properties for the directory.
Args:
upn (bool, optional): Return user principal names instead of object IDs
**kwargs: Additional options including conditions
Returns:
dict: Access control information including ACL, group, owner, permissions
"""
def set_access_control(
self,
owner: str = None,
group: str = None,
permissions: str = None,
acl: str = None,
**kwargs
) -> Dict[str, Any]:
"""
Set access control properties for the directory.
Args:
owner (str, optional): Owner user ID or principal name
group (str, optional): Owning group ID or principal name
permissions (str, optional): POSIX permissions in octal format
acl (str, optional): Access control list in POSIX format
**kwargs: Additional options including conditions
Returns:
dict: Response headers including etag and last_modified
"""
def set_access_control_recursive(
self,
acl: str,
**kwargs
) -> AccessControlChangeResult:
"""
Set access control recursively on the directory and its contents.
Args:
acl (str): Access control list in POSIX format
batch_size (int, optional): Number of paths to process per batch
max_batches (int, optional): Maximum number of batches to process
continue_on_failure (bool, optional): Continue processing on individual failures
**kwargs: Additional options
Returns:
AccessControlChangeResult: Result including counters and failure information
"""
def update_access_control_recursive(
self,
acl: str,
**kwargs
) -> AccessControlChangeResult:
"""
Update access control recursively on the directory and its contents.
Args:
acl (str): Access control list in POSIX format
batch_size (int, optional): Number of paths to process per batch
max_batches (int, optional): Maximum number of batches to process
continue_on_failure (bool, optional): Continue processing on individual failures
**kwargs: Additional options
Returns:
AccessControlChangeResult: Result including counters and failure information
"""
def remove_access_control_recursive(
self,
acl: str,
**kwargs
) -> AccessControlChangeResult:
"""
Remove access control recursively from the directory and its contents.
Args:
acl (str): Access control list entries to remove in POSIX format
batch_size (int, optional): Number of paths to process per batch
max_batches (int, optional): Maximum number of batches to process
continue_on_failure (bool, optional): Continue processing on individual failures
**kwargs: Additional options
Returns:
AccessControlChangeResult: Result including counters and failure information
"""Usage Examples:
from azure.storage.filedatalake import DataLakeDirectoryClient
# Create a directory client
directory_client = DataLakeDirectoryClient(
account_url="https://mystorageaccount.dfs.core.windows.net",
file_system_name="myfilesystem",
directory_name="data/analytics",
credential="<account_key>"
)
# Create the directory with metadata and permissions
directory_client.create_directory(
metadata={"purpose": "analytics", "team": "data-science"},
permissions="0755" # rwxr-xr-x
)
# Create subdirectories
raw_dir = directory_client.create_sub_directory("raw")
processed_dir = directory_client.create_sub_directory("processed")
# List all paths in the directory
paths = directory_client.get_paths(recursive=True)
for path in paths:
print(f"Path: {path.name}, Size: {path.content_length if not path.is_directory else 'N/A'}")
# Set access control with POSIX ACLs
directory_client.set_access_control(
owner="user1",
group="datagroup",
permissions="0755",
acl="user::rwx,group::r-x,other::r-x,user:analyst1:rwx"
)
# Apply ACLs recursively to all contents
acl_result = directory_client.set_access_control_recursive(
acl="user::rwx,group::r-x,other::r-x,user:analyst1:rwx",
continue_on_failure=True
)
print(f"ACL changes: {acl_result.counters.directories_successful} directories, "
f"{acl_result.counters.files_successful} files")
# Rename the directory
new_directory_client = directory_client.rename_directory("data/analytics-v2")Install with Tessl CLI
npx tessl i tessl/pypi-azure-storage-file-datalakedocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10