tessl/pypi-azure-ai-ml

Microsoft Azure Machine Learning Client Library for Python providing comprehensive SDK for ML workflows including job execution, pipeline components, model deployment, and AutoML capabilities

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Model Deployment

Name: tessl/pypi-azure-ai-ml
Author: tessl

Comprehensive model deployment capabilities for real-time and batch inference with online endpoints, batch endpoints, and various deployment configurations supporting different compute types and scaling options.

Capabilities

Online Endpoints

Real-time inference endpoints for serving models with low latency and high availability.

class OnlineEndpoint:
    def __init__(
        self,
        *,
        name: str,
        description: str = None,
        tags: dict = None,
        properties: dict = None,
        auth_mode: str = "key",
        identity: IdentityConfiguration = None,
        **kwargs
    ):
        """
        Online endpoint for real-time model inference.
        
        Parameters:
        - name: Endpoint name (must be unique in workspace)
        - description: Endpoint description
        - tags: Dictionary of tags
        - properties: Custom properties
        - auth_mode: Authentication mode ("key", "aml_token", "aad_token")
        - identity: Managed identity configuration
        """

class ManagedOnlineEndpoint(OnlineEndpoint):
    def __init__(
        self,
        *,
        name: str,
        public_network_access: str = "enabled",
        **kwargs
    ):
        """
        Azure-managed online endpoint with automatic scaling and load balancing.
        
        Parameters:
        - name: Endpoint name
        - public_network_access: Network access ("enabled", "disabled")
        """

class KubernetesOnlineEndpoint(OnlineEndpoint):
    def __init__(
        self,
        *,
        name: str,
        compute: str,
        **kwargs
    ):
        """
        Kubernetes-based online endpoint for custom compute environments.
        
        Parameters:
        - name: Endpoint name
        - compute: Kubernetes compute target name
        """

Usage Example

from azure.ai.ml.entities import ManagedOnlineEndpoint

# Create a managed online endpoint
endpoint = ManagedOnlineEndpoint(
    name="my-model-endpoint",
    description="Endpoint for my ML model",
    auth_mode="key",
    tags={"environment": "production", "version": "1.0"}
)

# Create the endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint).result()

Online Deployments

Deploy models to online endpoints with specific resource configurations and scaling settings.

class OnlineDeployment:
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model = None,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        **kwargs
    ):
        """
        Base online deployment class.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        """

class ManagedOnlineDeployment(OnlineDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        instance_type: str = "Standard_DS3_v2",
        instance_count: int = 1,
        scale_settings: OnlineScaleSettings = None,
        request_settings: OnlineRequestSettings = None,
        liveness_probe: ProbeSettings = None,
        readiness_probe: ProbeSettings = None,
        environment_variables: dict = None,
        **kwargs
    ):
        """
        Azure-managed online deployment with automatic scaling.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - instance_type: VM size for deployment
        - instance_count: Number of instances
        - scale_settings: Auto-scaling configuration
        - request_settings: Request handling settings
        - liveness_probe: Health check configuration
        - readiness_probe: Readiness check configuration
        - environment_variables: Environment variables
        """

class KubernetesOnlineDeployment(OnlineDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        instance_type: str = None,
        instance_count: int = 1,
        resources: ResourceRequirementsSettings = None,
        **kwargs
    ):
        """
        Kubernetes-based online deployment for custom compute.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - instance_type: Instance type (if applicable)
        - instance_count: Number of replicas
        - resources: Resource requirements (CPU, memory, GPU)
        """

Usage Example

from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration, Model, Environment

# Define the model, environment, and code configuration
model = Model(path="./model", name="my-model", version="1")
environment = Environment(
    image="mcr.microsoft.com/azureml/sklearn-1.0-ubuntu20.04-py38-cpu-inference:latest"
)
code_config = CodeConfiguration(
    code="./src",
    scoring_script="score.py"
)

# Create the deployment
deployment = ManagedOnlineDeployment(
    name="my-model-deployment",
    endpoint_name="my-model-endpoint",
    model=model,
    environment=environment,
    code_configuration=code_config,
    instance_type="Standard_DS3_v2",
    instance_count=1
)

# Deploy the model
ml_client.online_deployments.begin_create_or_update(deployment).result()

Batch Endpoints and Deployments

Batch inference for processing large datasets asynchronously.

class BatchEndpoint:
    def __init__(
        self,
        *,
        name: str,
        description: str = None,
        tags: dict = None,
        properties: dict = None,
        auth_mode: str = "aad_token",
        **kwargs
    ):
        """
        Batch endpoint for asynchronous batch inference.
        
        Parameters:
        - name: Endpoint name
        - description: Endpoint description
        - tags: Dictionary of tags
        - properties: Custom properties
        - auth_mode: Authentication mode ("aad_token", "key")
        """

class BatchDeployment:
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model = None,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        compute: str,
        instance_count: int = 1,
        max_concurrency_per_instance: int = 1,
        mini_batch_size: int = 10,
        retry_settings: BatchRetrySettings = None,
        output_action: str = "append_row",
        output_file_name: str = "predictions.csv",
        logging_level: str = "info",
        environment_variables: dict = None,
        **kwargs
    ):
        """
        Batch deployment for processing large datasets.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target batch endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - compute: Compute cluster for batch processing
        - instance_count: Number of compute instances
        - max_concurrency_per_instance: Max concurrent processes per instance
        - mini_batch_size: Size of mini-batches for processing
        - retry_settings: Retry configuration for failed batches
        - output_action: How to handle outputs ("append_row", "summary_only")
        - output_file_name: Name of output file
        - logging_level: Logging level ("debug", "info", "warning", "error")
        - environment_variables: Environment variables
        """

class ModelBatchDeployment(BatchDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        settings: ModelBatchDeploymentSettings = None,
        **kwargs
    ):
        """
        Model-specific batch deployment with optimized settings.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target batch endpoint name
        - model: Model to deploy
        - settings: Model-specific deployment settings
        """

Deployment Configuration Classes

class CodeConfiguration:
    def __init__(
        self,
        *,
        code: str,
        scoring_script: str
    ):
        """
        Code configuration for deployments.
        
        Parameters:
        - code: Path to source code directory
        - scoring_script: Name of scoring script file
        """

class OnlineScaleSettings:
    """Base class for online scaling settings."""

class DefaultScaleSettings(OnlineScaleSettings):
    def __init__(self):
        """Default scaling settings (no auto-scaling)."""

class TargetUtilizationScaleSettings(OnlineScaleSettings):
    def __init__(
        self,
        *,
        min_instances: int = 1,
        max_instances: int = 1,
        target_utilization_percentage: int = 70,
        polling_interval: int = 300,
        scale_up_cooldown: int = 300,
        scale_down_cooldown: int = 300
    ):
        """
        Auto-scaling based on CPU/memory utilization.
        
        Parameters:
        - min_instances: Minimum number of instances
        - max_instances: Maximum number of instances
        - target_utilization_percentage: Target CPU utilization percentage
        - polling_interval: Polling interval in seconds
        - scale_up_cooldown: Cooldown period for scaling up
        - scale_down_cooldown: Cooldown period for scaling down
        """

class OnlineRequestSettings:
    def __init__(
        self,
        *,
        request_timeout_ms: int = 90000,
        max_concurrent_requests_per_instance: int = 1,
        max_queue_wait_ms: int = 30000
    ):
        """
        Request handling settings for online deployments.
        
        Parameters:
        - request_timeout_ms: Request timeout in milliseconds
        - max_concurrent_requests_per_instance: Max concurrent requests per instance
        - max_queue_wait_ms: Max queue wait time in milliseconds
        """

class ProbeSettings:
    def __init__(
        self,
        *,
        failure_threshold: int = 30,
        success_threshold: int = 1,
        timeout: int = 2,
        period: int = 10,
        initial_delay: int = 10
    ):
        """
        Health probe settings for deployments.
        
        Parameters:
        - failure_threshold: Number of failures before marking unhealthy
        - success_threshold: Number of successes to mark healthy
        - timeout: Probe timeout in seconds
        - period: Probe period in seconds
        - initial_delay: Initial delay before first probe
        """

class BatchRetrySettings:
    def __init__(
        self,
        *,
        max_retries: int = 3,
        timeout: int = 30
    ):
        """
        Retry settings for batch deployments.
        
        Parameters:
        - max_retries: Maximum number of retries
        - timeout: Timeout for each retry in seconds
        """

class ResourceRequirementsSettings:
    def __init__(
        self,
        *,
        cpu: str = None,
        memory: str = None,
        gpu: str = None
    ):
        """
        Resource requirements for Kubernetes deployments.
        
        Parameters:
        - cpu: CPU requirements (e.g., "1", "500m")
        - memory: Memory requirements (e.g., "2Gi", "512Mi")
        - gpu: GPU requirements (e.g., "1")
        """

Serverless Endpoints

Serverless inference endpoints with automatic scaling and pay-per-use pricing.

class ServerlessEndpoint:
    def __init__(
        self,
        *,
        name: str,
        model_id: str,
        auth_mode: str = "key",
        content_safety: dict = None,
        **kwargs
    ):
        """
        Serverless endpoint for model inference.
        
        Parameters:
        - name: Endpoint name
        - model_id: Model identifier from model catalog
        - auth_mode: Authentication mode ("key", "aad_token")
        - content_safety: Content safety configuration
        """

Endpoint Authentication

Authentication methods and credential management for endpoints.

class EndpointAuthKeys:
    def __init__(
        self,
        *,
        primary_key: str = None,
        secondary_key: str = None
    ):
        """
        API key authentication for endpoints.
        
        Parameters:
        - primary_key: Primary API key
        - secondary_key: Secondary API key
        """

class EndpointAuthToken:
    def __init__(
        self,
        *,
        access_token: str
    ):
        """
        Token-based authentication for endpoints.
        
        Parameters:
        - access_token: Access token for authentication
        """

class EndpointAadToken:
    def __init__(
        self,
        *,
        access_token: str
    ):
        """
        Azure AD token authentication for endpoints.
        
        Parameters:
        - access_token: Azure AD access token
        """

Usage Example

# Invoke online endpoint
import json

# Prepare test data
test_data = {
    "data": [
        [1.0, 2.0, 3.0, 4.0],
        [2.0, 3.0, 4.0, 5.0]
    ]
}

# Get endpoint URI and key
endpoint = ml_client.online_endpoints.get("my-model-endpoint")
keys = ml_client.online_endpoints.get_keys("my-model-endpoint")

# Make prediction request
import requests

response = requests.post(
    endpoint.scoring_uri,
    headers={
        "Authorization": f"Bearer {keys.primary_key}",
        "Content-Type": "application/json"
    },
    data=json.dumps(test_data)
)

predictions = response.json()
print(predictions)

Install with Tessl CLI