CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-azure-mgmt-synapse

Microsoft Azure Synapse Management Client Library for Python

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

spark-pool-management.mddocs/

Apache Spark Pool Management

Big data pool (Apache Spark) configuration, auto-scaling, library management, and lifecycle operations. Spark pools provide distributed computing capabilities for big data processing, machine learning, and data engineering workloads within Azure Synapse Analytics.

Capabilities

Spark Pool Operations

Core Apache Spark pool lifecycle management including creation, retrieval, updates, and deletion.

def get(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> BigDataPoolResourceInfo:
    """
    Get a Big Data pool.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - big_data_pool_name (str): Name of the Big Data pool
    
    Returns:
    BigDataPoolResourceInfo: The Big Data pool object
    """

def create_or_update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_info: BigDataPoolResourceInfo) -> LROPoller[BigDataPoolResourceInfo]:
    """
    Create or update a Big Data pool.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - big_data_pool_name (str): Name of the Big Data pool
    - big_data_pool_info (BigDataPoolResourceInfo): Big Data pool properties
    
    Returns:
    LROPoller[BigDataPoolResourceInfo]: Long-running operation poller
    """

def delete(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> LROPoller[object]:
    """
    Delete a Big Data pool.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - big_data_pool_name (str): Name of the Big Data pool
    
    Returns:
    LROPoller[object]: Long-running operation poller
    """

def update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_patch_info: BigDataPoolPatchInfo) -> BigDataPoolResourceInfo:
    """
    Update Big Data pool properties.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - big_data_pool_name (str): Name of the Big Data pool
    - big_data_pool_patch_info (BigDataPoolPatchInfo): Update parameters
    
    Returns:
    BigDataPoolResourceInfo: Updated Big Data pool
    """

Spark Pool Listing

Operations to list and discover Spark pools within workspaces.

def list_by_workspace(resource_group_name: str, workspace_name: str) -> ItemPaged[BigDataPoolResourceInfo]:
    """
    List Big Data pools in a workspace.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    
    Returns:
    ItemPaged[BigDataPoolResourceInfo]: Paged collection of Big Data pools
    """

Library Management

Manage custom libraries and packages for Spark pools.

def list_libraries(resource_group_name: str, workspace_name: str) -> ItemPaged[LibraryResource]:
    """
    List libraries in workspace.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    
    Returns:
    ItemPaged[LibraryResource]: Paged collection of libraries
    """

def flush_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[object]:
    """
    Flush library changes.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - library_name (str): Name of the library
    
    Returns:
    LROPoller[object]: Long-running operation poller
    """

def get_operation_result(resource_group_name: str, workspace_name: str, operation_id: str) -> LibraryResource:
    """
    Get library operation result.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - operation_id (str): Operation ID
    
    Returns:
    LibraryResource: Library operation result
    """

def delete_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[LibraryResource]:
    """
    Delete a library.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - library_name (str): Name of the library
    
    Returns:
    LROPoller[LibraryResource]: Long-running operation poller
    """

def create_library(resource_group_name: str, workspace_name: str, library_name: str, library_resource: LibraryResource) -> LROPoller[LibraryResource]:
    """
    Create or update a library.
    
    Parameters:
    - resource_group_name (str): Name of the resource group
    - workspace_name (str): Name of the workspace
    - library_name (str): Name of the library
    - library_resource (LibraryResource): Library resource properties
    
    Returns:
    LROPoller[LibraryResource]: Long-running operation poller
    """

Types

BigDataPoolResourceInfo

class BigDataPoolResourceInfo:
    """
    A Big Data pool.
    
    Attributes:
    - id (str): Resource ID
    - name (str): Resource name
    - type (str): Resource type
    - location (str): Resource location
    - tags (dict): Resource tags
    - provisioning_state (str): Provisioning state
    - auto_scale (AutoScaleProperties): Auto-scale configuration
    - creation_date (datetime): Creation date
    - auto_pause (AutoPauseProperties): Auto-pause configuration
    - is_compute_isolation_enabled (bool): Compute isolation enabled
    - session_level_packages_enabled (bool): Session-level packages enabled
    - cache_size (int): Cache size
    - dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation
    - spark_events_folder (str): Spark events folder
    - node_count (int): Number of nodes
    - library_requirements (LibraryRequirements): Library requirements
    - custom_libraries (list): Custom libraries
    - spark_config_properties (dict): Spark configuration properties
    - spark_version (str): Spark version
    - default_spark_log_folder (str): Default Spark log folder
    - node_size (str): Node size
    - node_size_family (str): Node size family
    """

BigDataPoolPatchInfo

class BigDataPoolPatchInfo:
    """
    Properties patch for a Big Data pool.
    
    Attributes:
    - tags (dict): Resource tags
    - auto_scale (AutoScaleProperties): Auto-scale configuration
    - auto_pause (AutoPauseProperties): Auto-pause configuration
    - is_compute_isolation_enabled (bool): Compute isolation enabled
    - session_level_packages_enabled (bool): Session-level packages enabled
    - cache_size (int): Cache size
    - dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation
    - spark_events_folder (str): Spark events folder
    - node_count (int): Number of nodes
    - library_requirements (LibraryRequirements): Library requirements
    - custom_libraries (list): Custom libraries
    - spark_config_properties (dict): Spark configuration properties
    - spark_version (str): Spark version
    - default_spark_log_folder (str): Default Spark log folder
    - node_size (str): Node size
    - node_size_family (str): Node size family
    - force (bool): Force operation
    """

AutoScaleProperties

class AutoScaleProperties:
    """
    Auto-scaling properties.
    
    Attributes:
    - min_node_count (int): Minimum number of nodes
    - enabled (bool): Whether auto-scale is enabled
    - max_node_count (int): Maximum number of nodes
    """

AutoPauseProperties

class AutoPauseProperties:
    """
    Auto-pause properties.
    
    Attributes:
    - delay_in_minutes (int): Delay in minutes before auto-pause
    - enabled (bool): Whether auto-pause is enabled
    """

DynamicExecutorAllocation

class DynamicExecutorAllocation:
    """
    Dynamic executor allocation properties.
    
    Attributes:
    - enabled (bool): Whether dynamic executor allocation is enabled
    - min_executors (int): Minimum number of executors
    - max_executors (int): Maximum number of executors
    """

LibraryRequirements

class LibraryRequirements:
    """
    Library requirements for a Big Data pool.
    
    Attributes:
    - time (str): Requirements file timestamp
    - content (str): Requirements file content
    - filename (str): Requirements filename
    """

LibraryResource

class LibraryResource:
    """
    Library resource.
    
    Attributes:
    - id (str): Resource ID
    - name (str): Resource name
    - type (str): Resource type
    - etag (str): Entity tag
    - properties (LibraryInfo): Library properties
    """

LibraryInfo

class LibraryInfo:
    """
    Library information.
    
    Attributes:
    - name (str): Library name
    - path (str): Library path
    - container_name (str): Container name
    - uploaded_timestamp (datetime): Upload timestamp
    - type (str): Library type
    - provisioning_status (str): Provisioning status
    - creator_id (str): Creator ID
    """

Usage Examples

Create a Spark Pool with Auto-scaling

from azure.mgmt.synapse.models import (
    BigDataPoolResourceInfo, AutoScaleProperties, AutoPauseProperties,
    DynamicExecutorAllocation, LibraryRequirements
)

# Configure auto-scaling
auto_scale = AutoScaleProperties(
    enabled=True,
    min_node_count=3,
    max_node_count=10
)

# Configure auto-pause
auto_pause = AutoPauseProperties(
    enabled=True,
    delay_in_minutes=15
)

# Configure dynamic executor allocation
dynamic_executor = DynamicExecutorAllocation(
    enabled=True,
    min_executors=1,
    max_executors=4
)

# Create Spark pool
spark_pool_info = BigDataPoolResourceInfo(
    location="East US",
    node_size="Small",
    node_size_family="MemoryOptimized",
    spark_version="3.1",
    auto_scale=auto_scale,
    auto_pause=auto_pause,
    dynamic_executor_allocation=dynamic_executor,
    is_compute_isolation_enabled=False,
    session_level_packages_enabled=True
)

operation = client.big_data_pools.create_or_update(
    resource_group_name="my-resource-group",
    workspace_name="my-synapse-workspace",
    big_data_pool_name="my-spark-pool",
    big_data_pool_info=spark_pool_info
)

spark_pool = operation.result()
print(f"Created Spark pool: {spark_pool.name}")

Configure Library Requirements

from azure.mgmt.synapse.models import LibraryRequirements

# Define requirements.txt content
requirements_content = """
pandas==1.3.3
numpy==1.21.2
scikit-learn==0.24.2
matplotlib==3.4.3
"""

library_requirements = LibraryRequirements(
    filename="requirements.txt",
    content=requirements_content
)

# Update the Spark pool with library requirements
updated_pool_info = BigDataPoolPatchInfo(
    library_requirements=library_requirements
)

updated_pool = client.big_data_pools.update(
    resource_group_name="my-resource-group",
    workspace_name="my-synapse-workspace",
    big_data_pool_name="my-spark-pool",
    big_data_pool_patch_info=updated_pool_info
)

print("Updated library requirements")

Upload Custom Library

from azure.mgmt.synapse.models import LibraryResource, LibraryInfo

# Create library resource
library_info = LibraryInfo(
    name="my-custom-library.jar",
    path="abfss://container@storage.dfs.core.windows.net/libraries/my-custom-library.jar",
    container_name="libraries",
    type="jar"
)

library_resource = LibraryResource(
    properties=library_info
)

# Upload the library
operation = client.library.create_library(
    resource_group_name="my-resource-group",
    workspace_name="my-synapse-workspace",
    library_name="my-custom-library.jar",
    library_resource=library_resource
)

library = operation.result()
print(f"Uploaded library: {library.name}")

Install with Tessl CLI

npx tessl i tessl/pypi-azure-mgmt-synapse

docs

index.md

integration-runtime-management.md

spark-pool-management.md

sql-pool-management.md

sql-pool-security.md

workspace-management.md

tile.json