0
# Monitoring and Observability
1
2
Types for metrics, distributions, monitored resources, and logging. Enables comprehensive observability and monitoring of Google services with structured data collection, statistical analysis, and performance tracking.
3
4
## Capabilities
5
6
### Metric Definitions
7
8
Define metrics for monitoring service performance and behavior.
9
10
```python { .api }
11
from google.api.metric_pb2 import MetricDescriptor, Metric
12
from google.api.label_pb2 import LabelDescriptor
13
14
class MetricDescriptor(message.Message):
15
"""Describes a metric type and its schema."""
16
name: str # Metric type name (e.g., "compute.googleapis.com/instance/cpu/usage")
17
type: str # Metric type identifier
18
labels: list[LabelDescriptor] # Metric labels
19
metric_kind: MetricDescriptor.MetricKind # Kind of measurement
20
value_type: MetricDescriptor.ValueType # Type of metric values
21
unit: str # Unit of measurement
22
description: str # Metric description
23
display_name: str # Human-readable name
24
metadata: MetricDescriptor.MetricDescriptorMetadata # Additional metadata
25
launch_stage: LaunchStage # API maturity level
26
monitored_resource_types: list[str] # Compatible resource types
27
28
class MetricKind(enum.Enum):
29
"""Kind of measurement."""
30
METRIC_KIND_UNSPECIFIED = 0
31
GAUGE = 1 # Instantaneous measurement
32
DELTA = 2 # Change since last recorded value
33
CUMULATIVE = 3 # Cumulative measurement
34
35
class ValueType(enum.Enum):
36
"""Type of metric values."""
37
VALUE_TYPE_UNSPECIFIED = 0
38
BOOL = 1 # Boolean values
39
INT64 = 2 # 64-bit integers
40
DOUBLE = 3 # Double precision floats
41
STRING = 4 # String values
42
DISTRIBUTION = 5 # Distribution values
43
MONEY = 6 # Monetary values
44
45
class Metric(message.Message):
46
"""Single metric measurement."""
47
type: str # Metric type identifier
48
labels: dict[str, str] # Metric label values
49
50
class LabelDescriptor(message.Message):
51
"""Describes a label for metrics and resources."""
52
key: str # Label key
53
value_type: LabelDescriptor.ValueType # Label value type
54
description: str # Label description
55
56
class ValueType(enum.Enum):
57
"""Label value types."""
58
STRING = 0
59
BOOL = 1
60
INT64 = 2
61
```
62
63
### Statistical Distributions
64
65
Represent statistical distributions for latency, size, and other measurements.
66
67
```python { .api }
68
from google.api.distribution_pb2 import Distribution
69
70
class Distribution(message.Message):
71
"""Statistical distribution of values."""
72
count: int # Number of samples
73
mean: float # Mean of samples
74
sum_of_squared_deviation: float # Sum of squared deviations from mean
75
range: Distribution.Range # Min/max range
76
bucket_options: Distribution.BucketOptions # Bucket configuration
77
bucket_counts: list[int] # Count per bucket
78
exemplars: list[Distribution.Exemplar] # Example values
79
80
class Range(message.Message):
81
"""Min and max values in the distribution."""
82
min: float # Minimum value
83
max: float # Maximum value
84
85
class BucketOptions(message.Message):
86
"""Bucket options for histogram."""
87
# Union field 'options' (one of):
88
linear_buckets: Distribution.BucketOptions.Linear
89
exponential_buckets: Distribution.BucketOptions.Exponential
90
explicit_buckets: Distribution.BucketOptions.Explicit
91
92
class Linear(message.Message):
93
"""Linear bucket spacing."""
94
num_finite_buckets: int # Number of finite buckets
95
width: float # Bucket width
96
offset: float # Starting offset
97
98
class Exponential(message.Message):
99
"""Exponential bucket spacing."""
100
num_finite_buckets: int # Number of finite buckets
101
growth_factor: float # Growth factor between buckets
102
scale: float # Scale factor
103
104
class Explicit(message.Message):
105
"""Explicit bucket boundaries."""
106
bounds: list[float] # Bucket boundary values
107
108
class Exemplar(message.Message):
109
"""Example value from the distribution."""
110
value: float # Example value
111
timestamp: Timestamp # When value was recorded
112
attachments: list[Any] # Additional context
113
```
114
115
### Monitored Resources
116
117
Define resources that can be monitored and their metadata.
118
119
```python { .api }
120
from google.api.monitored_resource_pb2 import (
121
MonitoredResourceDescriptor, MonitoredResource, MonitoredResourceMetadata
122
)
123
124
class MonitoredResourceDescriptor(message.Message):
125
"""Describes a monitored resource type."""
126
name: str # Resource descriptor name
127
type: str # Resource type identifier
128
display_name: str # Human-readable name
129
description: str # Resource description
130
labels: list[LabelDescriptor] # Resource labels
131
launch_stage: LaunchStage # API maturity level
132
133
class MonitoredResource(message.Message):
134
"""Monitored resource instance."""
135
type: str # Resource type
136
labels: dict[str, str] # Resource label values
137
138
class MonitoredResourceMetadata(message.Message):
139
"""Additional metadata for monitored resources."""
140
system_labels: Struct # System-defined labels
141
user_labels: dict[str, str] # User-defined labels
142
```
143
144
### Log Entry Types
145
146
Structured logging types for service observability.
147
148
```python { .api }
149
from google.logging.type.http_request_pb2 import HttpRequest
150
from google.logging.type.log_severity_pb2 import LogSeverity
151
152
class HttpRequest(message.Message):
153
"""HTTP request information for logging."""
154
request_method: str # HTTP method (GET, POST, etc.)
155
request_url: str # Request URL
156
request_size: int # Request size in bytes
157
status: int # HTTP status code
158
response_size: int # Response size in bytes
159
user_agent: str # User agent string
160
remote_ip: str # Client IP address
161
server_ip: str # Server IP address
162
referer: str # HTTP referer
163
latency: Duration # Request latency
164
cache_lookup: bool # Cache lookup performed
165
cache_hit: bool # Cache hit occurred
166
cache_validated_with_origin_server: bool # Cache validation occurred
167
cache_fill_bytes: int # Bytes sent to cache
168
protocol: str # Protocol version
169
170
class LogSeverity(enum.Enum):
171
"""Log entry severity levels."""
172
DEFAULT = 0 # Default severity
173
DEBUG = 100 # Debug information
174
INFO = 200 # Informational messages
175
NOTICE = 300 # Normal but significant events
176
WARNING = 400 # Warning conditions
177
ERROR = 500 # Error conditions
178
CRITICAL = 600 # Critical conditions
179
ALERT = 700 # Action must be taken immediately
180
EMERGENCY = 800 # System is unusable
181
```
182
183
## Usage Examples
184
185
### Creating Metric Descriptors
186
187
```python
188
from google.api.metric_pb2 import MetricDescriptor
189
from google.api.label_pb2 import LabelDescriptor
190
191
# Create a CPU usage metric descriptor
192
cpu_metric = MetricDescriptor()
193
cpu_metric.name = "projects/my-project/metricDescriptors/compute.googleapis.com/instance/cpu/usage"
194
cpu_metric.type = "compute.googleapis.com/instance/cpu/usage"
195
cpu_metric.metric_kind = MetricDescriptor.MetricKind.GAUGE
196
cpu_metric.value_type = MetricDescriptor.ValueType.DOUBLE
197
cpu_metric.unit = "1" # Percentage (0.0-1.0)
198
cpu_metric.description = "CPU usage percentage"
199
cpu_metric.display_name = "CPU Usage"
200
201
# Add labels
202
instance_label = LabelDescriptor()
203
instance_label.key = "instance_name"
204
instance_label.value_type = LabelDescriptor.ValueType.STRING
205
instance_label.description = "Name of the VM instance"
206
cpu_metric.labels.append(instance_label)
207
208
zone_label = LabelDescriptor()
209
zone_label.key = "zone"
210
zone_label.value_type = LabelDescriptor.ValueType.STRING
211
zone_label.description = "GCP zone of the instance"
212
cpu_metric.labels.append(zone_label)
213
```
214
215
### Recording Metric Values
216
217
```python
218
from google.api.metric_pb2 import Metric
219
220
# Record a metric value
221
metric = Metric()
222
metric.type = "compute.googleapis.com/instance/cpu/usage"
223
metric.labels["instance_name"] = "web-server-1"
224
metric.labels["zone"] = "us-central1-a"
225
226
# The actual metric value would be recorded in a TimeSeries
227
# (TimeSeries is typically part of monitoring service APIs)
228
```
229
230
### Creating Distributions
231
232
```python
233
from google.api.distribution_pb2 import Distribution
234
235
# Create a latency distribution
236
latency_dist = Distribution()
237
latency_dist.count = 1000 # 1000 requests
238
latency_dist.mean = 0.150 # 150ms average
239
latency_dist.sum_of_squared_deviation = 2.5
240
241
# Set range
242
latency_dist.range.min = 0.010 # 10ms minimum
243
latency_dist.range.max = 2.000 # 2s maximum
244
245
# Configure exponential buckets for latency
246
bucket_opts = latency_dist.bucket_options.exponential_buckets
247
bucket_opts.num_finite_buckets = 20
248
bucket_opts.growth_factor = 2.0
249
bucket_opts.scale = 0.01 # Start at 10ms
250
251
# Set bucket counts (example data)
252
latency_dist.bucket_counts.extend([
253
10, # < 10ms
254
50, # 10-20ms
255
200, # 20-40ms
256
300, # 40-80ms
257
250, # 80-160ms
258
150, # 160-320ms
259
30, # 320-640ms
260
10, # 640ms-1.28s
261
0, # > 1.28s
262
])
263
264
# Add exemplar
265
exemplar = latency_dist.exemplars.add()
266
exemplar.value = 0.095 # 95ms example
267
exemplar.timestamp.GetCurrentTime()
268
```
269
270
### Working with Monitored Resources
271
272
```python
273
from google.api.monitored_resource_pb2 import MonitoredResourceDescriptor, MonitoredResource
274
275
# Define a monitored resource type
276
resource_desc = MonitoredResourceDescriptor()
277
resource_desc.name = "projects/my-project/monitoredResourceDescriptors/gce_instance"
278
resource_desc.type = "gce_instance"
279
resource_desc.display_name = "GCE VM Instance"
280
resource_desc.description = "A Google Compute Engine virtual machine instance"
281
282
# Add resource labels
283
project_label = LabelDescriptor()
284
project_label.key = "project_id"
285
project_label.value_type = LabelDescriptor.ValueType.STRING
286
project_label.description = "GCP project ID"
287
resource_desc.labels.append(project_label)
288
289
instance_label = LabelDescriptor()
290
instance_label.key = "instance_id"
291
instance_label.value_type = LabelDescriptor.ValueType.STRING
292
instance_label.description = "VM instance ID"
293
resource_desc.labels.append(instance_label)
294
295
# Create a monitored resource instance
296
resource = MonitoredResource()
297
resource.type = "gce_instance"
298
resource.labels["project_id"] = "my-project"
299
resource.labels["instance_id"] = "1234567890123456789"
300
resource.labels["zone"] = "us-central1-a"
301
```
302
303
### HTTP Request Logging
304
305
```python
306
from google.logging.type.http_request_pb2 import HttpRequest
307
from google.protobuf.duration_pb2 import Duration
308
309
# Log an HTTP request
310
http_request = HttpRequest()
311
http_request.request_method = "GET"
312
http_request.request_url = "https://api.example.com/users/123"
313
http_request.status = 200
314
http_request.request_size = 1024
315
http_request.response_size = 2048
316
http_request.user_agent = "MyApp/1.0"
317
http_request.remote_ip = "203.0.113.10"
318
http_request.referer = "https://example.com/dashboard"
319
320
# Set latency (250ms)
321
http_request.latency.seconds = 0
322
http_request.latency.nanos = 250000000
323
324
# Cache information
325
http_request.cache_lookup = True
326
http_request.cache_hit = False
327
http_request.cache_validated_with_origin_server = True
328
```
329
330
### Log Severity Usage
331
332
```python
333
from google.logging.type.log_severity_pb2 import LogSeverity
334
335
def get_severity_name(severity: LogSeverity) -> str:
336
"""Convert severity enum to string."""
337
severity_names = {
338
LogSeverity.DEFAULT: "DEFAULT",
339
LogSeverity.DEBUG: "DEBUG",
340
LogSeverity.INFO: "INFO",
341
LogSeverity.NOTICE: "NOTICE",
342
LogSeverity.WARNING: "WARNING",
343
LogSeverity.ERROR: "ERROR",
344
LogSeverity.CRITICAL: "CRITICAL",
345
LogSeverity.ALERT: "ALERT",
346
LogSeverity.EMERGENCY: "EMERGENCY"
347
}
348
return severity_names.get(severity, "UNKNOWN")
349
350
# Example usage in logging
351
log_severity = LogSeverity.ERROR
352
print(f"Log level: {get_severity_name(log_severity)}")
353
```
354
355
### Creating Custom Metrics
356
357
```python
358
from google.api.metric_pb2 import MetricDescriptor
359
360
def create_custom_metric(name: str, description: str, unit: str,
361
metric_kind: MetricDescriptor.MetricKind,
362
value_type: MetricDescriptor.ValueType) -> MetricDescriptor:
363
"""Create a custom metric descriptor."""
364
metric = MetricDescriptor()
365
metric.name = f"projects/my-project/metricDescriptors/{name}"
366
metric.type = name
367
metric.description = description
368
metric.unit = unit
369
metric.metric_kind = metric_kind
370
metric.value_type = value_type
371
metric.display_name = description
372
return metric
373
374
# Create error rate metric
375
error_rate = create_custom_metric(
376
name="custom.googleapis.com/service/error_rate",
377
description="Service Error Rate",
378
unit="1", # Ratio
379
metric_kind=MetricDescriptor.MetricKind.GAUGE,
380
value_type=MetricDescriptor.ValueType.DOUBLE
381
)
382
383
# Create request count metric
384
request_count = create_custom_metric(
385
name="custom.googleapis.com/service/request_count",
386
description="Total Request Count",
387
unit="1", # Count
388
metric_kind=MetricDescriptor.MetricKind.CUMULATIVE,
389
value_type=MetricDescriptor.ValueType.INT64
390
)
391
```
392
393
### Distribution Analysis
394
395
```python
396
from google.api.distribution_pb2 import Distribution
397
import math
398
399
def analyze_distribution(dist: Distribution):
400
"""Analyze distribution statistics."""
401
if dist.count == 0:
402
print("No data in distribution")
403
return
404
405
print(f"Sample count: {dist.count}")
406
print(f"Mean: {dist.mean:.3f}")
407
408
# Calculate standard deviation
409
if dist.count > 1:
410
variance = dist.sum_of_squared_deviation / (dist.count - 1)
411
std_dev = math.sqrt(variance)
412
print(f"Standard deviation: {std_dev:.3f}")
413
414
print(f"Range: {dist.range.min:.3f} - {dist.range.max:.3f}")
415
416
# Analyze bucket distribution
417
if dist.bucket_counts:
418
print(f"Histogram buckets: {len(dist.bucket_counts)}")
419
total_samples = sum(dist.bucket_counts)
420
print(f"Samples in buckets: {total_samples}")
421
422
# Find mode (bucket with most samples)
423
max_count = max(dist.bucket_counts)
424
mode_bucket = dist.bucket_counts.index(max_count)
425
print(f"Mode bucket: {mode_bucket} (count: {max_count})")
426
427
# Example usage
428
latency_distribution = Distribution()
429
# ... populate distribution ...
430
analyze_distribution(latency_distribution)
431
```