Microsoft Azure Machine Learning Client Library for Python providing comprehensive SDK for ML workflows including job execution, pipeline components, model deployment, and AutoML capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive model deployment capabilities for real-time and batch inference with online endpoints, batch endpoints, and various deployment configurations supporting different compute types and scaling options.
Real-time inference endpoints for serving models with low latency and high availability.
class OnlineEndpoint:
def __init__(
self,
*,
name: str,
description: str = None,
tags: dict = None,
properties: dict = None,
auth_mode: str = "key",
identity: IdentityConfiguration = None,
**kwargs
):
"""
Online endpoint for real-time model inference.
Parameters:
- name: Endpoint name (must be unique in workspace)
- description: Endpoint description
- tags: Dictionary of tags
- properties: Custom properties
- auth_mode: Authentication mode ("key", "aml_token", "aad_token")
- identity: Managed identity configuration
"""
class ManagedOnlineEndpoint(OnlineEndpoint):
def __init__(
self,
*,
name: str,
public_network_access: str = "enabled",
**kwargs
):
"""
Azure-managed online endpoint with automatic scaling and load balancing.
Parameters:
- name: Endpoint name
- public_network_access: Network access ("enabled", "disabled")
"""
class KubernetesOnlineEndpoint(OnlineEndpoint):
def __init__(
self,
*,
name: str,
compute: str,
**kwargs
):
"""
Kubernetes-based online endpoint for custom compute environments.
Parameters:
- name: Endpoint name
- compute: Kubernetes compute target name
"""from azure.ai.ml.entities import ManagedOnlineEndpoint
# Create a managed online endpoint
endpoint = ManagedOnlineEndpoint(
name="my-model-endpoint",
description="Endpoint for my ML model",
auth_mode="key",
tags={"environment": "production", "version": "1.0"}
)
# Create the endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint).result()Deploy models to online endpoints with specific resource configurations and scaling settings.
class OnlineDeployment:
def __init__(
self,
*,
name: str,
endpoint_name: str,
model: Model = None,
environment: Environment = None,
code_configuration: CodeConfiguration = None,
**kwargs
):
"""
Base online deployment class.
Parameters:
- name: Deployment name
- endpoint_name: Target endpoint name
- model: Model to deploy
- environment: Runtime environment
- code_configuration: Scoring script configuration
"""
class ManagedOnlineDeployment(OnlineDeployment):
def __init__(
self,
*,
name: str,
endpoint_name: str,
model: Model,
environment: Environment = None,
code_configuration: CodeConfiguration = None,
instance_type: str = "Standard_DS3_v2",
instance_count: int = 1,
scale_settings: OnlineScaleSettings = None,
request_settings: OnlineRequestSettings = None,
liveness_probe: ProbeSettings = None,
readiness_probe: ProbeSettings = None,
environment_variables: dict = None,
**kwargs
):
"""
Azure-managed online deployment with automatic scaling.
Parameters:
- name: Deployment name
- endpoint_name: Target endpoint name
- model: Model to deploy
- environment: Runtime environment
- code_configuration: Scoring script configuration
- instance_type: VM size for deployment
- instance_count: Number of instances
- scale_settings: Auto-scaling configuration
- request_settings: Request handling settings
- liveness_probe: Health check configuration
- readiness_probe: Readiness check configuration
- environment_variables: Environment variables
"""
class KubernetesOnlineDeployment(OnlineDeployment):
def __init__(
self,
*,
name: str,
endpoint_name: str,
model: Model,
environment: Environment = None,
code_configuration: CodeConfiguration = None,
instance_type: str = None,
instance_count: int = 1,
resources: ResourceRequirementsSettings = None,
**kwargs
):
"""
Kubernetes-based online deployment for custom compute.
Parameters:
- name: Deployment name
- endpoint_name: Target endpoint name
- model: Model to deploy
- environment: Runtime environment
- code_configuration: Scoring script configuration
- instance_type: Instance type (if applicable)
- instance_count: Number of replicas
- resources: Resource requirements (CPU, memory, GPU)
"""from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration, Model, Environment
# Define the model, environment, and code configuration
model = Model(path="./model", name="my-model", version="1")
environment = Environment(
image="mcr.microsoft.com/azureml/sklearn-1.0-ubuntu20.04-py38-cpu-inference:latest"
)
code_config = CodeConfiguration(
code="./src",
scoring_script="score.py"
)
# Create the deployment
deployment = ManagedOnlineDeployment(
name="my-model-deployment",
endpoint_name="my-model-endpoint",
model=model,
environment=environment,
code_configuration=code_config,
instance_type="Standard_DS3_v2",
instance_count=1
)
# Deploy the model
ml_client.online_deployments.begin_create_or_update(deployment).result()Batch inference for processing large datasets asynchronously.
class BatchEndpoint:
def __init__(
self,
*,
name: str,
description: str = None,
tags: dict = None,
properties: dict = None,
auth_mode: str = "aad_token",
**kwargs
):
"""
Batch endpoint for asynchronous batch inference.
Parameters:
- name: Endpoint name
- description: Endpoint description
- tags: Dictionary of tags
- properties: Custom properties
- auth_mode: Authentication mode ("aad_token", "key")
"""
class BatchDeployment:
def __init__(
self,
*,
name: str,
endpoint_name: str,
model: Model = None,
environment: Environment = None,
code_configuration: CodeConfiguration = None,
compute: str,
instance_count: int = 1,
max_concurrency_per_instance: int = 1,
mini_batch_size: int = 10,
retry_settings: BatchRetrySettings = None,
output_action: str = "append_row",
output_file_name: str = "predictions.csv",
logging_level: str = "info",
environment_variables: dict = None,
**kwargs
):
"""
Batch deployment for processing large datasets.
Parameters:
- name: Deployment name
- endpoint_name: Target batch endpoint name
- model: Model to deploy
- environment: Runtime environment
- code_configuration: Scoring script configuration
- compute: Compute cluster for batch processing
- instance_count: Number of compute instances
- max_concurrency_per_instance: Max concurrent processes per instance
- mini_batch_size: Size of mini-batches for processing
- retry_settings: Retry configuration for failed batches
- output_action: How to handle outputs ("append_row", "summary_only")
- output_file_name: Name of output file
- logging_level: Logging level ("debug", "info", "warning", "error")
- environment_variables: Environment variables
"""
class ModelBatchDeployment(BatchDeployment):
def __init__(
self,
*,
name: str,
endpoint_name: str,
model: Model,
settings: ModelBatchDeploymentSettings = None,
**kwargs
):
"""
Model-specific batch deployment with optimized settings.
Parameters:
- name: Deployment name
- endpoint_name: Target batch endpoint name
- model: Model to deploy
- settings: Model-specific deployment settings
"""class CodeConfiguration:
def __init__(
self,
*,
code: str,
scoring_script: str
):
"""
Code configuration for deployments.
Parameters:
- code: Path to source code directory
- scoring_script: Name of scoring script file
"""
class OnlineScaleSettings:
"""Base class for online scaling settings."""
class DefaultScaleSettings(OnlineScaleSettings):
def __init__(self):
"""Default scaling settings (no auto-scaling)."""
class TargetUtilizationScaleSettings(OnlineScaleSettings):
def __init__(
self,
*,
min_instances: int = 1,
max_instances: int = 1,
target_utilization_percentage: int = 70,
polling_interval: int = 300,
scale_up_cooldown: int = 300,
scale_down_cooldown: int = 300
):
"""
Auto-scaling based on CPU/memory utilization.
Parameters:
- min_instances: Minimum number of instances
- max_instances: Maximum number of instances
- target_utilization_percentage: Target CPU utilization percentage
- polling_interval: Polling interval in seconds
- scale_up_cooldown: Cooldown period for scaling up
- scale_down_cooldown: Cooldown period for scaling down
"""
class OnlineRequestSettings:
def __init__(
self,
*,
request_timeout_ms: int = 90000,
max_concurrent_requests_per_instance: int = 1,
max_queue_wait_ms: int = 30000
):
"""
Request handling settings for online deployments.
Parameters:
- request_timeout_ms: Request timeout in milliseconds
- max_concurrent_requests_per_instance: Max concurrent requests per instance
- max_queue_wait_ms: Max queue wait time in milliseconds
"""
class ProbeSettings:
def __init__(
self,
*,
failure_threshold: int = 30,
success_threshold: int = 1,
timeout: int = 2,
period: int = 10,
initial_delay: int = 10
):
"""
Health probe settings for deployments.
Parameters:
- failure_threshold: Number of failures before marking unhealthy
- success_threshold: Number of successes to mark healthy
- timeout: Probe timeout in seconds
- period: Probe period in seconds
- initial_delay: Initial delay before first probe
"""
class BatchRetrySettings:
def __init__(
self,
*,
max_retries: int = 3,
timeout: int = 30
):
"""
Retry settings for batch deployments.
Parameters:
- max_retries: Maximum number of retries
- timeout: Timeout for each retry in seconds
"""
class ResourceRequirementsSettings:
def __init__(
self,
*,
cpu: str = None,
memory: str = None,
gpu: str = None
):
"""
Resource requirements for Kubernetes deployments.
Parameters:
- cpu: CPU requirements (e.g., "1", "500m")
- memory: Memory requirements (e.g., "2Gi", "512Mi")
- gpu: GPU requirements (e.g., "1")
"""Serverless inference endpoints with automatic scaling and pay-per-use pricing.
class ServerlessEndpoint:
def __init__(
self,
*,
name: str,
model_id: str,
auth_mode: str = "key",
content_safety: dict = None,
**kwargs
):
"""
Serverless endpoint for model inference.
Parameters:
- name: Endpoint name
- model_id: Model identifier from model catalog
- auth_mode: Authentication mode ("key", "aad_token")
- content_safety: Content safety configuration
"""Authentication methods and credential management for endpoints.
class EndpointAuthKeys:
def __init__(
self,
*,
primary_key: str = None,
secondary_key: str = None
):
"""
API key authentication for endpoints.
Parameters:
- primary_key: Primary API key
- secondary_key: Secondary API key
"""
class EndpointAuthToken:
def __init__(
self,
*,
access_token: str
):
"""
Token-based authentication for endpoints.
Parameters:
- access_token: Access token for authentication
"""
class EndpointAadToken:
def __init__(
self,
*,
access_token: str
):
"""
Azure AD token authentication for endpoints.
Parameters:
- access_token: Azure AD access token
"""# Invoke online endpoint
import json
# Prepare test data
test_data = {
"data": [
[1.0, 2.0, 3.0, 4.0],
[2.0, 3.0, 4.0, 5.0]
]
}
# Get endpoint URI and key
endpoint = ml_client.online_endpoints.get("my-model-endpoint")
keys = ml_client.online_endpoints.get_keys("my-model-endpoint")
# Make prediction request
import requests
response = requests.post(
endpoint.scoring_uri,
headers={
"Authorization": f"Bearer {keys.primary_key}",
"Content-Type": "application/json"
},
data=json.dumps(test_data)
)
predictions = response.json()
print(predictions)Install with Tessl CLI
npx tessl i tessl/pypi-azure-ai-ml