Google Cloud Monitoring API client library for collecting, analyzing, and alerting on metrics, events, and metadata from cloud and on-premise sources.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive service-level monitoring for managing services and Service Level Objectives (SLOs) in Google Cloud Monitoring. This enables service-oriented monitoring with SLI definitions, error budgets, and service health tracking for modern microservices architectures.
Manage the complete lifecycle of services including creation, updates, retrieval, and deletion.
class ServiceMonitoringServiceClient:
def create_service(
self,
request=None,
*,
parent: str = None,
service=None,
retry=None,
timeout=None,
metadata=()
) -> service.Service:
"""
Create a Service.
Args:
request: The request object or dict equivalent
parent: Required. Project name in format 'projects/[PROJECT_ID]'
service: Required. The Service to create
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Created Service object
"""
def get_service(
self,
request=None,
*,
name: str = None,
retry=None,
timeout=None,
metadata=()
) -> service.Service:
"""
Get the named Service.
Args:
request: The request object or dict equivalent
name: Required. Service name in format 'projects/[PROJECT_ID]/services/[SERVICE_ID]'
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Service object
"""
def list_services(
self,
request=None,
*,
parent: str = None,
retry=None,
timeout=None,
metadata=()
) -> pagers.ListServicesPager:
"""
List Services for this Metrics Scope.
Args:
request: The request object or dict equivalent
parent: Required. Project name
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Pager for iterating over Service objects
"""
def update_service(
self,
request=None,
*,
service=None,
retry=None,
timeout=None,
metadata=()
) -> service.Service:
"""
Update this Service.
Args:
request: The request object or dict equivalent
service: Required. Updated Service
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Updated Service object
"""
def delete_service(
self,
request=None,
*,
name: str = None,
retry=None,
timeout=None,
metadata=()
) -> None:
"""
Soft delete this Service.
Args:
request: The request object or dict equivalent
name: Required. Service name to delete
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
"""Manage Service Level Objectives (SLOs) for tracking service reliability and performance.
class ServiceMonitoringServiceClient:
def create_service_level_objective(
self,
request=None,
*,
parent: str = None,
service_level_objective=None,
retry=None,
timeout=None,
metadata=()
) -> service.ServiceLevelObjective:
"""
Create a ServiceLevelObjective for the given Service.
Args:
request: The request object or dict equivalent
parent: Required. Service name
service_level_objective: Required. The SLO to create
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Created ServiceLevelObjective object
"""
def get_service_level_objective(
self,
request=None,
*,
name: str = None,
retry=None,
timeout=None,
metadata=()
) -> service.ServiceLevelObjective:
"""
Get a ServiceLevelObjective by name.
Args:
request: The request object or dict equivalent
name: Required. SLO name
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
ServiceLevelObjective object
"""
def list_service_level_objectives(
self,
request=None,
*,
parent: str = None,
retry=None,
timeout=None,
metadata=()
) -> pagers.ListServiceLevelObjectivesPager:
"""
List the ServiceLevelObjectives for the given Service.
Args:
request: The request object or dict equivalent
parent: Required. Service name
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Pager for iterating over ServiceLevelObjective objects
"""
def update_service_level_objective(
self,
request=None,
*,
service_level_objective=None,
retry=None,
timeout=None,
metadata=()
) -> service.ServiceLevelObjective:
"""
Update the given ServiceLevelObjective.
Args:
request: The request object or dict equivalent
service_level_objective: Required. Updated SLO
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
Returns:
Updated ServiceLevelObjective object
"""
def delete_service_level_objective(
self,
request=None,
*,
name: str = None,
retry=None,
timeout=None,
metadata=()
) -> None:
"""
Delete the given ServiceLevelObjective.
Args:
request: The request object or dict equivalent
name: Required. SLO name to delete
retry: Retry configuration
timeout: Request timeout in seconds
metadata: Additional metadata
"""Represents a service for monitoring purposes.
class Service:
name: str # Resource name
display_name: str # Human-readable name
custom: Service.Custom # Custom service definition
app_engine: Service.AppEngine # App Engine service
cloud_endpoints: Service.CloudEndpoints # Cloud Endpoints service
cluster_istio: Service.ClusterIstio # Istio service mesh
mesh_istio: Service.MeshIstio # Istio mesh service
istio_canonical_service: Service.IstioCanonicalService # Canonical Istio service
cloud_run: Service.CloudRun # Cloud Run service
gke_namespace: Service.GkeNamespace # GKE namespace service
gke_workload: Service.GkeWorkload # GKE workload service
gke_service: Service.GkeService # GKE service
telemetry: Service.Telemetry # Telemetry configuration
user_labels: Dict[str, str] # User-defined labels
class Service.Custom:
# Custom service defined by a filter
class Service.CloudRun:
service_name: str # Cloud Run service name
location: str # Cloud Run service location
class Service.Telemetry:
resource_name: str # Resource name for telemetryRepresents a Service Level Objective definition.
class ServiceLevelObjective:
name: str # Resource name
display_name: str # Human-readable name
service_level_indicator: ServiceLevelIndicator # SLI definition
goal: float # SLO target (0.0 to 1.0)
rolling_period: Duration # Rolling period for SLO
calendar_period: CalendarPeriod # Calendar period for SLO
user_labels: Dict[str, str] # User-defined labels
class ServiceLevelIndicator:
basic_sli: BasicSli # Basic SLI definition
request_based: RequestBasedSli # Request-based SLI
windows_based: WindowsBasedSli # Windows-based SLI
class BasicSli:
method: List[str] # HTTP methods to monitor
location: List[str] # Locations to monitor
version: List[str] # Versions to monitor
availability: BasicSli.AvailabilityCriteria # Availability criteria
latency: BasicSli.LatencyCriteria # Latency criteria
class RequestBasedSli:
good_total_ratio: TimeSeriesRatio # Good events vs total events
distribution_cut: DistributionCut # Distribution-based SLI
class WindowsBasedSli:
good_bad_metric_filter: str # Metric filter for good/bad windows
good_total_ratio: TimeSeriesRatio # Good vs total windows
metric_mean_in_range: Range # Metric mean within range
metric_sum_in_range: Range # Metric sum within range
window_period: Duration # Window periodclass CreateServiceRequest:
parent: str # Required. Project name
service_id: str # Service ID
service: Service # Required. Service to create
class GetServiceRequest:
name: str # Required. Service name
class ListServicesRequest:
parent: str # Required. Project name
filter: str # Filter expression
page_size: int # Maximum results per page
page_token: str # Page token
class ListServicesResponse:
services: List[Service] # Services
next_page_token: str # Next page token
class UpdateServiceRequest:
service: Service # Required. Updated service
update_mask: FieldMask # Fields to update
class DeleteServiceRequest:
name: str # Required. Service name to delete
class CreateServiceLevelObjectiveRequest:
parent: str # Required. Service name
service_level_objective_id: str # SLO ID
service_level_objective: ServiceLevelObjective # Required. SLO to create
class GetServiceLevelObjectiveRequest:
name: str # Required. SLO name
view: ServiceLevelObjective.View # View type
class ListServiceLevelObjectivesRequest:
parent: str # Required. Service name
filter: str # Filter expression
page_size: int # Maximum results per page
page_token: str # Page token
class ListServiceLevelObjectivesResponse:
service_level_objectives: List[ServiceLevelObjective] # SLOs
next_page_token: str # Next page token
class UpdateServiceLevelObjectiveRequest:
service_level_objective: ServiceLevelObjective # Required. Updated SLO
update_mask: FieldMask # Fields to update
class DeleteServiceLevelObjectiveRequest:
name: str # Required. SLO name to deletefrom google.cloud.monitoring import ServiceMonitoringServiceClient
from google.cloud.monitoring_v3.types import Service
client = ServiceMonitoringServiceClient()
project_name = f"projects/{project_id}"
# Create a custom service
service_obj = Service()
service_obj.display_name = "Web Frontend Service"
# Define custom service with filter
custom_service = Service.Custom()
service_obj.custom = custom_service
# Add user labels
service_obj.user_labels["team"] = "frontend"
service_obj.user_labels["environment"] = "production"
created_service = client.create_service(
parent=project_name,
service=service_obj,
service_id="web-frontend"
)
print(f"Created service: {created_service.name}")# Create Cloud Run service
cloud_run_service = Service()
cloud_run_service.display_name = "API Service"
# Configure Cloud Run service
cloud_run = Service.CloudRun()
cloud_run.service_name = "api-service"
cloud_run.location = "us-central1"
cloud_run_service.cloud_run = cloud_run
created_cloud_run = client.create_service(
parent=project_name,
service=cloud_run_service,
service_id="api-service"
)
print(f"Created Cloud Run service: {created_cloud_run.name}")from google.cloud.monitoring_v3.types import (
ServiceLevelObjective, ServiceLevelIndicator, BasicSli
)
from google.protobuf.duration_pb2 import Duration
service_name = f"projects/{project_id}/services/web-frontend"
# Create SLO for availability
slo = ServiceLevelObjective()
slo.display_name = "Web Frontend Availability SLO"
slo.goal = 0.995 # 99.5% availability target
# Define rolling period (30 days)
rolling_period = Duration()
rolling_period.seconds = 30 * 24 * 60 * 60 # 30 days
slo.rolling_period = rolling_period
# Define Service Level Indicator
sli = ServiceLevelIndicator()
basic_sli = BasicSli()
# Configure availability criteria
availability = BasicSli.AvailabilityCriteria()
basic_sli.availability = availability
sli.basic_sli = basic_sli
slo.service_level_indicator = sli
# Add user labels
slo.user_labels["tier"] = "critical"
slo.user_labels["team"] = "frontend"
created_slo = client.create_service_level_objective(
parent=service_name,
service_level_objective=slo,
service_level_objective_id="availability-slo"
)
print(f"Created SLO: {created_slo.name}")
print(f"Target: {created_slo.goal * 100}%")# Create SLO for latency
latency_slo = ServiceLevelObjective()
latency_slo.display_name = "Web Frontend Latency SLO"
latency_slo.goal = 0.90 # 90% of requests under threshold
# Rolling period (7 days)
rolling_period = Duration()
rolling_period.seconds = 7 * 24 * 60 * 60
latency_slo.rolling_period = rolling_period
# Define latency SLI
latency_sli = ServiceLevelIndicator()
latency_basic = BasicSli()
# Configure latency criteria (500ms threshold)
latency_criteria = BasicSli.LatencyCriteria()
latency_criteria.threshold.seconds = 0
latency_criteria.threshold.nanos = 500000000 # 500ms
latency_basic.latency = latency_criteria
latency_sli.basic_sli = latency_basic
latency_slo.service_level_indicator = latency_sli
created_latency_slo = client.create_service_level_objective(
parent=service_name,
service_level_objective=latency_slo,
service_level_objective_id="latency-slo"
)
print(f"Created latency SLO: {created_latency_slo.name}")# List all services
print("Services:")
for service in client.list_services(parent=project_name):
print(f"- {service.display_name}: {service.name}")
# List SLOs for each service
print(f" SLOs:")
for slo in client.list_service_level_objectives(parent=service.name):
print(f" - {slo.display_name}: {slo.goal * 100}% target")
# Filter services by label
filter_expr = 'user_labels.environment="production"'
print(f"\nProduction services:")
for service in client.list_services(parent=project_name, filter=filter_expr):
print(f"- {service.display_name}")from google.protobuf import field_mask_pb2
# Get existing service
service_name = f"projects/{project_id}/services/web-frontend"
service = client.get_service(name=service_name)
# Update service properties
service.display_name = "Updated Web Frontend Service"
service.user_labels["version"] = "v2.0"
# Create field mask for selective update
update_mask = field_mask_pb2.FieldMask()
update_mask.paths.extend(["display_name", "user_labels"])
updated_service = client.update_service(
service=service,
update_mask=update_mask
)
print(f"Updated service: {updated_service.display_name}")from google.cloud.monitoring_v3.types import RequestBasedSli, TimeSeriesRatio
# Create request-based SLO
request_slo = ServiceLevelObjective()
request_slo.display_name = "Error Rate SLO"
request_slo.goal = 0.999 # 99.9% success rate
# Rolling period
rolling_period = Duration()
rolling_period.seconds = 28 * 24 * 60 * 60 # 28 days
request_slo.rolling_period = rolling_period
# Define request-based SLI
request_sli = ServiceLevelIndicator()
request_based = RequestBasedSli()
# Configure good vs total ratio
ratio = TimeSeriesRatio()
ratio.good_service_filter = 'project="my-project" AND service_name="api-service" AND response_code_class="2xx"'
ratio.total_service_filter = 'project="my-project" AND service_name="api-service"'
request_based.good_total_ratio = ratio
request_sli.request_based = request_based
request_slo.service_level_indicator = request_sli
created_request_slo = client.create_service_level_objective(
parent=service_name,
service_level_objective=request_slo,
service_level_objective_id="error-rate-slo"
)
print(f"Created request-based SLO: {created_request_slo.name}")# Delete SLO
slo_name = f"projects/{project_id}/services/web-frontend/serviceLevelObjectives/availability-slo"
client.delete_service_level_objective(name=slo_name)
print(f"Deleted SLO: {slo_name}")
# Delete service (soft delete)
service_name = f"projects/{project_id}/services/web-frontend"
client.delete_service(name=service_name)
print(f"Deleted service: {service_name}")import asyncio
from google.cloud.monitoring import ServiceMonitoringServiceAsyncClient
async def manage_services():
client = ServiceMonitoringServiceAsyncClient()
project_name = f"projects/{project_id}"
# List services asynchronously
async for service in await client.list_services(parent=project_name):
print(f"Async service: {service.display_name}")
# List SLOs for each service
async for slo in await client.list_service_level_objectives(parent=service.name):
print(f" Async SLO: {slo.display_name}")
asyncio.run(manage_services())class ServiceMonitoringServiceClient:
@staticmethod
def service_path(project: str, service: str) -> str:
"""Returns a fully-qualified service string."""
@staticmethod
def service_level_objective_path(
project: str,
service: str,
service_level_objective: str
) -> str:
"""Returns a fully-qualified service_level_objective string."""
@staticmethod
def parse_service_path(path: str) -> Dict[str, str]:
"""Parses a service path into its component segments."""Service monitoring operations can raise specific exceptions:
from google.api_core import exceptions
from google.cloud.monitoring import ServiceMonitoringServiceClient
client = ServiceMonitoringServiceClient()
try:
service = client.get_service(name="invalid/path")
except exceptions.NotFound:
print("Service not found")
except exceptions.InvalidArgument as e:
print(f"Invalid service configuration: {e}")
except exceptions.PermissionDenied:
print("Insufficient permissions")
except exceptions.FailedPrecondition as e:
print(f"Cannot delete service with active SLOs: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-monitoring