CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-azure-ai-ml

Microsoft Azure Machine Learning Client Library for Python providing comprehensive SDK for ML workflows including job execution, pipeline components, model deployment, and AutoML capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

model-deployment.mddocs/

Model Deployment

Comprehensive model deployment capabilities for real-time and batch inference with online endpoints, batch endpoints, and various deployment configurations supporting different compute types and scaling options.

Capabilities

Online Endpoints

Real-time inference endpoints for serving models with low latency and high availability.

class OnlineEndpoint:
    def __init__(
        self,
        *,
        name: str,
        description: str = None,
        tags: dict = None,
        properties: dict = None,
        auth_mode: str = "key",
        identity: IdentityConfiguration = None,
        **kwargs
    ):
        """
        Online endpoint for real-time model inference.
        
        Parameters:
        - name: Endpoint name (must be unique in workspace)
        - description: Endpoint description
        - tags: Dictionary of tags
        - properties: Custom properties
        - auth_mode: Authentication mode ("key", "aml_token", "aad_token")
        - identity: Managed identity configuration
        """

class ManagedOnlineEndpoint(OnlineEndpoint):
    def __init__(
        self,
        *,
        name: str,
        public_network_access: str = "enabled",
        **kwargs
    ):
        """
        Azure-managed online endpoint with automatic scaling and load balancing.
        
        Parameters:
        - name: Endpoint name
        - public_network_access: Network access ("enabled", "disabled")
        """

class KubernetesOnlineEndpoint(OnlineEndpoint):
    def __init__(
        self,
        *,
        name: str,
        compute: str,
        **kwargs
    ):
        """
        Kubernetes-based online endpoint for custom compute environments.
        
        Parameters:
        - name: Endpoint name
        - compute: Kubernetes compute target name
        """

Usage Example

from azure.ai.ml.entities import ManagedOnlineEndpoint

# Create a managed online endpoint
endpoint = ManagedOnlineEndpoint(
    name="my-model-endpoint",
    description="Endpoint for my ML model",
    auth_mode="key",
    tags={"environment": "production", "version": "1.0"}
)

# Create the endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint).result()

Online Deployments

Deploy models to online endpoints with specific resource configurations and scaling settings.

class OnlineDeployment:
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model = None,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        **kwargs
    ):
        """
        Base online deployment class.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        """

class ManagedOnlineDeployment(OnlineDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        instance_type: str = "Standard_DS3_v2",
        instance_count: int = 1,
        scale_settings: OnlineScaleSettings = None,
        request_settings: OnlineRequestSettings = None,
        liveness_probe: ProbeSettings = None,
        readiness_probe: ProbeSettings = None,
        environment_variables: dict = None,
        **kwargs
    ):
        """
        Azure-managed online deployment with automatic scaling.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - instance_type: VM size for deployment
        - instance_count: Number of instances
        - scale_settings: Auto-scaling configuration
        - request_settings: Request handling settings
        - liveness_probe: Health check configuration
        - readiness_probe: Readiness check configuration
        - environment_variables: Environment variables
        """

class KubernetesOnlineDeployment(OnlineDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        instance_type: str = None,
        instance_count: int = 1,
        resources: ResourceRequirementsSettings = None,
        **kwargs
    ):
        """
        Kubernetes-based online deployment for custom compute.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - instance_type: Instance type (if applicable)
        - instance_count: Number of replicas
        - resources: Resource requirements (CPU, memory, GPU)
        """

Usage Example

from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration, Model, Environment

# Define the model, environment, and code configuration
model = Model(path="./model", name="my-model", version="1")
environment = Environment(
    image="mcr.microsoft.com/azureml/sklearn-1.0-ubuntu20.04-py38-cpu-inference:latest"
)
code_config = CodeConfiguration(
    code="./src",
    scoring_script="score.py"
)

# Create the deployment
deployment = ManagedOnlineDeployment(
    name="my-model-deployment",
    endpoint_name="my-model-endpoint",
    model=model,
    environment=environment,
    code_configuration=code_config,
    instance_type="Standard_DS3_v2",
    instance_count=1
)

# Deploy the model
ml_client.online_deployments.begin_create_or_update(deployment).result()

Batch Endpoints and Deployments

Batch inference for processing large datasets asynchronously.

class BatchEndpoint:
    def __init__(
        self,
        *,
        name: str,
        description: str = None,
        tags: dict = None,
        properties: dict = None,
        auth_mode: str = "aad_token",
        **kwargs
    ):
        """
        Batch endpoint for asynchronous batch inference.
        
        Parameters:
        - name: Endpoint name
        - description: Endpoint description
        - tags: Dictionary of tags
        - properties: Custom properties
        - auth_mode: Authentication mode ("aad_token", "key")
        """

class BatchDeployment:
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model = None,
        environment: Environment = None,
        code_configuration: CodeConfiguration = None,
        compute: str,
        instance_count: int = 1,
        max_concurrency_per_instance: int = 1,
        mini_batch_size: int = 10,
        retry_settings: BatchRetrySettings = None,
        output_action: str = "append_row",
        output_file_name: str = "predictions.csv",
        logging_level: str = "info",
        environment_variables: dict = None,
        **kwargs
    ):
        """
        Batch deployment for processing large datasets.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target batch endpoint name
        - model: Model to deploy
        - environment: Runtime environment
        - code_configuration: Scoring script configuration
        - compute: Compute cluster for batch processing
        - instance_count: Number of compute instances
        - max_concurrency_per_instance: Max concurrent processes per instance
        - mini_batch_size: Size of mini-batches for processing
        - retry_settings: Retry configuration for failed batches
        - output_action: How to handle outputs ("append_row", "summary_only")
        - output_file_name: Name of output file
        - logging_level: Logging level ("debug", "info", "warning", "error")
        - environment_variables: Environment variables
        """

class ModelBatchDeployment(BatchDeployment):
    def __init__(
        self,
        *,
        name: str,
        endpoint_name: str,
        model: Model,
        settings: ModelBatchDeploymentSettings = None,
        **kwargs
    ):
        """
        Model-specific batch deployment with optimized settings.
        
        Parameters:
        - name: Deployment name
        - endpoint_name: Target batch endpoint name
        - model: Model to deploy
        - settings: Model-specific deployment settings
        """

Deployment Configuration Classes

class CodeConfiguration:
    def __init__(
        self,
        *,
        code: str,
        scoring_script: str
    ):
        """
        Code configuration for deployments.
        
        Parameters:
        - code: Path to source code directory
        - scoring_script: Name of scoring script file
        """

class OnlineScaleSettings:
    """Base class for online scaling settings."""

class DefaultScaleSettings(OnlineScaleSettings):
    def __init__(self):
        """Default scaling settings (no auto-scaling)."""

class TargetUtilizationScaleSettings(OnlineScaleSettings):
    def __init__(
        self,
        *,
        min_instances: int = 1,
        max_instances: int = 1,
        target_utilization_percentage: int = 70,
        polling_interval: int = 300,
        scale_up_cooldown: int = 300,
        scale_down_cooldown: int = 300
    ):
        """
        Auto-scaling based on CPU/memory utilization.
        
        Parameters:
        - min_instances: Minimum number of instances
        - max_instances: Maximum number of instances
        - target_utilization_percentage: Target CPU utilization percentage
        - polling_interval: Polling interval in seconds
        - scale_up_cooldown: Cooldown period for scaling up
        - scale_down_cooldown: Cooldown period for scaling down
        """

class OnlineRequestSettings:
    def __init__(
        self,
        *,
        request_timeout_ms: int = 90000,
        max_concurrent_requests_per_instance: int = 1,
        max_queue_wait_ms: int = 30000
    ):
        """
        Request handling settings for online deployments.
        
        Parameters:
        - request_timeout_ms: Request timeout in milliseconds
        - max_concurrent_requests_per_instance: Max concurrent requests per instance
        - max_queue_wait_ms: Max queue wait time in milliseconds
        """

class ProbeSettings:
    def __init__(
        self,
        *,
        failure_threshold: int = 30,
        success_threshold: int = 1,
        timeout: int = 2,
        period: int = 10,
        initial_delay: int = 10
    ):
        """
        Health probe settings for deployments.
        
        Parameters:
        - failure_threshold: Number of failures before marking unhealthy
        - success_threshold: Number of successes to mark healthy
        - timeout: Probe timeout in seconds
        - period: Probe period in seconds
        - initial_delay: Initial delay before first probe
        """

class BatchRetrySettings:
    def __init__(
        self,
        *,
        max_retries: int = 3,
        timeout: int = 30
    ):
        """
        Retry settings for batch deployments.
        
        Parameters:
        - max_retries: Maximum number of retries
        - timeout: Timeout for each retry in seconds
        """

class ResourceRequirementsSettings:
    def __init__(
        self,
        *,
        cpu: str = None,
        memory: str = None,
        gpu: str = None
    ):
        """
        Resource requirements for Kubernetes deployments.
        
        Parameters:
        - cpu: CPU requirements (e.g., "1", "500m")
        - memory: Memory requirements (e.g., "2Gi", "512Mi")
        - gpu: GPU requirements (e.g., "1")
        """

Serverless Endpoints

Serverless inference endpoints with automatic scaling and pay-per-use pricing.

class ServerlessEndpoint:
    def __init__(
        self,
        *,
        name: str,
        model_id: str,
        auth_mode: str = "key",
        content_safety: dict = None,
        **kwargs
    ):
        """
        Serverless endpoint for model inference.
        
        Parameters:
        - name: Endpoint name
        - model_id: Model identifier from model catalog
        - auth_mode: Authentication mode ("key", "aad_token")
        - content_safety: Content safety configuration
        """

Endpoint Authentication

Authentication methods and credential management for endpoints.

class EndpointAuthKeys:
    def __init__(
        self,
        *,
        primary_key: str = None,
        secondary_key: str = None
    ):
        """
        API key authentication for endpoints.
        
        Parameters:
        - primary_key: Primary API key
        - secondary_key: Secondary API key
        """

class EndpointAuthToken:
    def __init__(
        self,
        *,
        access_token: str
    ):
        """
        Token-based authentication for endpoints.
        
        Parameters:
        - access_token: Access token for authentication
        """

class EndpointAadToken:
    def __init__(
        self,
        *,
        access_token: str
    ):
        """
        Azure AD token authentication for endpoints.
        
        Parameters:
        - access_token: Azure AD access token
        """

Usage Example

# Invoke online endpoint
import json

# Prepare test data
test_data = {
    "data": [
        [1.0, 2.0, 3.0, 4.0],
        [2.0, 3.0, 4.0, 5.0]
    ]
}

# Get endpoint URI and key
endpoint = ml_client.online_endpoints.get("my-model-endpoint")
keys = ml_client.online_endpoints.get_keys("my-model-endpoint")

# Make prediction request
import requests

response = requests.post(
    endpoint.scoring_uri,
    headers={
        "Authorization": f"Bearer {keys.primary_key}",
        "Content-Type": "application/json"
    },
    data=json.dumps(test_data)
)

predictions = response.json()
print(predictions)

Install with Tessl CLI

npx tessl i tessl/pypi-azure-ai-ml

docs

asset-management.md

automl.md

client-auth.md

compute-management.md

hyperparameter-tuning.md

index.md

job-management.md

model-deployment.md

tile.json