CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-azure-mgmt-iothub

Microsoft Azure IoT Hub Management Client Library for programmatic management of Azure IoT Hub resources through the Azure Resource Manager API

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

failover-operations.mddocs/

Failover Operations

Manual failover capabilities for IoT Hub disaster recovery, enabling controlled failover to paired Azure regions during planned maintenance or disaster recovery scenarios to ensure business continuity and minimal service disruption.

Capabilities

Manual Failover Initiation

Initiate manual failover of IoT Hub to its paired region for disaster recovery scenarios, planned maintenance, or testing business continuity procedures.

def begin_manual_failover(
    iot_hub_name: str, 
    resource_group_name: str, 
    failover_input: FailoverInput, 
    **kwargs
) -> LROPoller[None]:
    """
    Initiate manual failover of IoT hub to its paired region.
    
    Args:
        iot_hub_name: Name of the IoT hub resource
        resource_group_name: Name of the resource group
        failover_input: Failover configuration including target region
        
    Returns:
        LROPoller[None]: Long-running operation for failover process monitoring
    """

Usage Examples

Initiating planned failover for maintenance

from azure.identity import DefaultAzureCredential
from azure.mgmt.iothub import IotHubClient
from azure.mgmt.iothub.models import FailoverInput
import time

# Initialize client
credential = DefaultAzureCredential()
client = IotHubClient(credential, "subscription-id")

resource_group = "myResourceGroup"
hub_name = "myIoTHub"

# Get current IoT Hub information before failover
print("Pre-Failover IoT Hub Status:")
print("=" * 40)

hub_info = client.iot_hub_resource.get(resource_group, hub_name)
print(f"Hub Name: {hub_info.name}")
print(f"Current Location: {hub_info.location}")
print(f"Provisioning State: {hub_info.properties.provisioning_state}")
print(f"State: {hub_info.properties.state}")

if hasattr(hub_info.properties, 'locations'):
    print("Available Locations:")
    for location in hub_info.properties.locations:
        print(f"  - {location.location} (Role: {location.role})")
print()

# Prepare failover configuration
failover_config = FailoverInput(
    failover_region="paired-region-name"  # Specify target region
)

# Initiate manual failover
print("Initiating Manual Failover...")
print("=" * 35)

try:
    # Start failover operation
    failover_operation = client.iot_hub.begin_manual_failover(
        hub_name, resource_group, failover_config
    )
    
    print(f"✓ Failover initiated for {hub_name}")
    print("⚠️  This is a long-running operation that may take several minutes")
    print("⚠️  IoT Hub will be temporarily unavailable during failover")
    
    # Monitor failover progress
    print("\nMonitoring Failover Progress...")
    start_time = time.time()
    
    while not failover_operation.done():
        elapsed_time = int(time.time() - start_time)
        print(f"  Failover in progress... ({elapsed_time}s elapsed)")
        time.sleep(30)  # Check every 30 seconds
    
    # Wait for completion
    failover_operation.result()  # This will block until completion
    
    elapsed_time = int(time.time() - start_time)
    print(f"✓ Failover completed successfully in {elapsed_time} seconds")
    
except Exception as e:
    print(f"✗ Failover failed: {e}")
    raise

Post-failover verification and monitoring

def verify_failover_completion(resource_group: str, hub_name: str):
    """Verify failover completion and new hub status."""
    
    print("Post-Failover Verification:")
    print("=" * 35)
    
    try:
        # Get updated hub information
        hub_info = client.iot_hub_resource.get(resource_group, hub_name)
        
        print(f"Hub Name: {hub_info.name}")
        print(f"New Location: {hub_info.location}")
        print(f"Provisioning State: {hub_info.properties.provisioning_state}")
        print(f"State: {hub_info.properties.state}")
        
        # Check if hub is operational
        if hub_info.properties.state == "Active":
            print("✓ IoT Hub is active and operational")
        else:
            print(f"⚠️  IoT Hub state: {hub_info.properties.state}")
        
        # Verify endpoint health after failover
        print("\nChecking Endpoint Health:")
        try:
            endpoint_health = list(client.iot_hub_resource.get_endpoint_health(resource_group, hub_name))
            healthy_count = sum(1 for ep in endpoint_health if ep.health_status == "Healthy")
            total_count = len(endpoint_health)
            
            print(f"  Healthy Endpoints: {healthy_count}/{total_count}")
            
            for endpoint in endpoint_health:
                status_icon = "✓" if endpoint.health_status == "Healthy" else "✗"
                print(f"  {status_icon} {endpoint.endpoint_id}: {endpoint.health_status}")
                
        except Exception as e:
            print(f"  Could not retrieve endpoint health: {e}")
        
        # Check device registry statistics
        print("\nDevice Registry Status:")
        try:
            stats = client.iot_hub_resource.get_stats(resource_group, hub_name)
            print(f"  Total Devices: {stats.total_device_count}")
            print(f"  Enabled Devices: {stats.enabled_device_count}")
            print(f"  Disabled Devices: {stats.disabled_device_count}")
        except Exception as e:
            print(f"  Could not retrieve device statistics: {e}")
            
        return True
        
    except Exception as e:
        print(f"✗ Verification failed: {e}")
        return False

# Verify failover completion
verification_success = verify_failover_completion(resource_group, hub_name)

if verification_success:
    print("\n✓ Failover verification completed successfully")
else:
    print("\n✗ Failover verification encountered issues")

Disaster recovery failover with comprehensive monitoring

import json
from datetime import datetime

def execute_disaster_recovery_failover(resource_group: str, hub_name: str, target_region: str):
    """Execute comprehensive disaster recovery failover with full monitoring."""
    
    dr_log = {
        "operation": "disaster_recovery_failover",
        "hub_name": hub_name,
        "start_time": datetime.utcnow().isoformat(),
        "target_region": target_region,
        "steps": [],
        "success": False
    }
    
    def log_step(step_name: str, status: str, details: str = ""):
        step_entry = {
            "step": step_name,
            "timestamp": datetime.utcnow().isoformat(),
            "status": status,
            "details": details
        }
        dr_log["steps"].append(step_entry)
        print(f"[{status.upper()}] {step_name}: {details}")
    
    try:
        # Step 1: Pre-failover validation
        log_step("pre_failover_validation", "started", "Collecting pre-failover hub state")
        
        pre_failover_hub = client.iot_hub_resource.get(resource_group, hub_name)
        dr_log["pre_failover_location"] = pre_failover_hub.location
        dr_log["pre_failover_state"] = pre_failover_hub.properties.state
        
        if pre_failover_hub.properties.state != "Active":
            log_step("pre_failover_validation", "warning", f"Hub not in Active state: {pre_failover_hub.properties.state}")
        else:
            log_step("pre_failover_validation", "success", f"Hub is active in {pre_failover_hub.location}")
        
        # Step 2: Collect pre-failover metrics
        log_step("collect_metrics", "started", "Collecting pre-failover device and quota metrics")
        
        pre_stats = client.iot_hub_resource.get_stats(resource_group, hub_name)
        dr_log["pre_failover_device_count"] = pre_stats.total_device_count
        
        pre_quotas = list(client.iot_hub_resource.get_quota_metrics(resource_group, hub_name))
        dr_log["pre_failover_quotas"] = {q.name: {"current": q.current_value, "max": q.max_value} for q in pre_quotas}
        
        log_step("collect_metrics", "success", f"Collected metrics for {pre_stats.total_device_count} devices")
        
        # Step 3: Initiate failover
        log_step("initiate_failover", "started", f"Starting failover to {target_region}")
        
        failover_config = FailoverInput(failover_region=target_region)
        failover_operation = client.iot_hub.begin_manual_failover(hub_name, resource_group, failover_config)
        
        dr_log["failover_started"] = datetime.utcnow().isoformat()
        
        # Step 4: Monitor failover progress
        log_step("monitor_failover", "started", "Monitoring failover operation progress")
        
        start_time = time.time()
        while not failover_operation.done():
            elapsed = int(time.time() - start_time)
            log_step("monitor_failover", "in_progress", f"Failover running for {elapsed} seconds")
            time.sleep(60)  # Check every minute for DR scenario
        
        # Wait for completion
        failover_operation.result()
        
        elapsed_total = int(time.time() - start_time)
        dr_log["failover_duration_seconds"] = elapsed_total
        dr_log["failover_completed"] = datetime.utcnow().isoformat()
        
        log_step("failover_execution", "success", f"Failover completed in {elapsed_total} seconds")
        
        # Step 5: Post-failover validation
        log_step("post_failover_validation", "started", "Validating hub state after failover")
        
        # Allow some time for system stabilization
        time.sleep(30)
        
        post_failover_hub = client.iot_hub_resource.get(resource_group, hub_name)
        dr_log["post_failover_location"] = post_failover_hub.location
        dr_log["post_failover_state"] = post_failover_hub.properties.state
        
        if post_failover_hub.properties.state == "Active":
            log_step("post_failover_validation", "success", f"Hub active in new location: {post_failover_hub.location}")
        else:
            log_step("post_failover_validation", "warning", f"Hub state: {post_failover_hub.properties.state}")
        
        # Step 6: Verify data integrity
        log_step("data_integrity_check", "started", "Verifying device count and data integrity")
        
        post_stats = client.iot_hub_resource.get_stats(resource_group, hub_name)
        dr_log["post_failover_device_count"] = post_stats.total_device_count
        
        if post_stats.total_device_count == pre_stats.total_device_count:
            log_step("data_integrity_check", "success", f"Device count verified: {post_stats.total_device_count}")
        else:
            log_step("data_integrity_check", "error", f"Device count mismatch: {pre_stats.total_device_count} -> {post_stats.total_device_count}")
        
        # Step 7: Check endpoint health
        log_step("endpoint_health_check", "started", "Checking routing endpoint health")
        
        try:
            endpoint_health = list(client.iot_hub_resource.get_endpoint_health(resource_group, hub_name))
            healthy_endpoints = [ep for ep in endpoint_health if ep.health_status == "Healthy"]
            
            dr_log["post_failover_endpoint_health"] = {
                "total": len(endpoint_health),
                "healthy": len(healthy_endpoints),
                "unhealthy": len(endpoint_health) - len(healthy_endpoints)
            }
            
            if len(healthy_endpoints) == len(endpoint_health):
                log_step("endpoint_health_check", "success", f"All {len(endpoint_health)} endpoints healthy")
            else:
                log_step("endpoint_health_check", "warning", f"{len(healthy_endpoints)}/{len(endpoint_health)} endpoints healthy")
                
        except Exception as e:
            log_step("endpoint_health_check", "error", f"Could not check endpoint health: {e}")
        
        dr_log["success"] = True
        dr_log["end_time"] = datetime.utcnow().isoformat()
        
        log_step("disaster_recovery", "completed", "Disaster recovery failover completed successfully")
        
    except Exception as e:
        dr_log["success"] = False
        dr_log["error"] = str(e)
        dr_log["end_time"] = datetime.utcnow().isoformat()
        
        log_step("disaster_recovery", "failed", f"Disaster recovery failed: {e}")
        raise
    
    finally:
        # Save disaster recovery log
        log_filename = f"dr_failover_{hub_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(log_filename, "w") as f:
            json.dump(dr_log, f, indent=2, default=str)
        
        print(f"\nDisaster recovery log saved to: {log_filename}")
        
    return dr_log

# Execute disaster recovery failover
try:
    dr_result = execute_disaster_recovery_failover(
        resource_group, 
        hub_name, 
        "East US 2"  # Target failover region
    )
    
    if dr_result["success"]:
        print("\n✅ DISASTER RECOVERY COMPLETED SUCCESSFULLY")
        print(f"   Hub failed over from {dr_result['pre_failover_location']} to {dr_result['post_failover_location']}")
        print(f"   Total time: {dr_result['failover_duration_seconds']} seconds")
    else:
        print("\n❌ DISASTER RECOVERY FAILED")
        print(f"   Error: {dr_result.get('error', 'Unknown error')}")
        
except Exception as e:
    print(f"\n💥 CRITICAL FAILURE: {e}")

Failover testing and rollback procedures

def test_failover_procedure(resource_group: str, hub_name: str):
    """Test failover procedure in a controlled manner for DR testing."""
    
    print("Failover Test Procedure:")
    print("=" * 30)
    
    # Pre-test validation
    print("1. Pre-test validation...")
    original_hub = client.iot_hub_resource.get(resource_group, hub_name)
    original_location = original_hub.location
    
    print(f"   Original location: {original_location}")
    print(f"   Hub state: {original_hub.properties.state}")
    
    if original_hub.properties.state != "Active":
        print("   ⚠️  Hub not in Active state - test may not be reliable")
        return False
    
    # Collect baseline metrics
    print("2. Collecting baseline metrics...")
    baseline_stats = client.iot_hub_resource.get_stats(resource_group, hub_name)
    print(f"   Device count: {baseline_stats.total_device_count}")
    
    try:
        # Test failover
        print("3. Testing failover...")
        failover_config = FailoverInput(failover_region="test-region")  # Use appropriate test region
        
        print("   ⚠️  This will temporarily disrupt hub operations")
        confirm = input("   Continue with test failover? (yes/no): ")
        
        if confirm.lower() != 'yes':
            print("   Test cancelled by user")
            return False
        
        # Execute test failover
        failover_op = client.iot_hub.begin_manual_failover(hub_name, resource_group, failover_config)
        
        print("   Waiting for failover completion...")
        start_time = time.time()
        failover_op.result()  # Wait for completion
        
        test_duration = int(time.time() - start_time)
        print(f"   ✓ Test failover completed in {test_duration} seconds")
        
        # Verify test results
        print("4. Verifying test results...")
        test_hub = client.iot_hub_resource.get(resource_group, hub_name)
        
        print(f"   New location: {test_hub.location}")
        print(f"   Hub state: {test_hub.properties.state}")
        
        # Verify data consistency
        test_stats = client.iot_hub_resource.get_stats(resource_group, hub_name)
        if test_stats.total_device_count == baseline_stats.total_device_count:
            print(f"   ✓ Device count consistent: {test_stats.total_device_count}")
        else:
            print(f"   ✗ Device count changed: {baseline_stats.total_device_count} -> {test_stats.total_device_count}")
        
        # Test successful
        print("5. Test completed successfully")
        print(f"   Failover test took {test_duration} seconds")
        print("   Hub operational in new region")
        
        return True
        
    except Exception as e:
        print(f"   ✗ Test failed: {e}")
        return False

# Run failover test
test_success = test_failover_procedure(resource_group, hub_name)

if test_success:
    print("\n✅ Failover test passed - DR procedures verified")
else:
    print("\n❌ Failover test failed - review DR procedures")

Types

FailoverInput

Configuration for manual failover operations including target region specification for disaster recovery scenarios.

class FailoverInput:
    """Manual failover configuration."""
    failover_region: str  # Target Azure region for failover operation

Install with Tessl CLI

npx tessl i tessl/pypi-azure-mgmt-iothub

docs

device-operations.md

event-hub-consumer-groups.md

failover-operations.md

index.md

message-routing.md

monitoring-quotas.md

private-networking.md

resource-management.md

security-management.md

utility-operations.md

tile.json