or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-framework.mdcloud-services.mdcommon-types.mdindex.mdmonitoring.mdoperations.mdrpc-status.md

monitoring.mddocs/

0

# Monitoring and Observability

1

2

Types for metrics, distributions, monitored resources, and logging. Enables comprehensive observability and monitoring of Google services with structured data collection, statistical analysis, and performance tracking.

3

4

## Capabilities

5

6

### Metric Definitions

7

8

Define metrics for monitoring service performance and behavior.

9

10

```python { .api }

11

from google.api.metric_pb2 import MetricDescriptor, Metric

12

from google.api.label_pb2 import LabelDescriptor

13

14

class MetricDescriptor(message.Message):

15

"""Describes a metric type and its schema."""

16

name: str # Metric type name (e.g., "compute.googleapis.com/instance/cpu/usage")

17

type: str # Metric type identifier

18

labels: list[LabelDescriptor] # Metric labels

19

metric_kind: MetricDescriptor.MetricKind # Kind of measurement

20

value_type: MetricDescriptor.ValueType # Type of metric values

21

unit: str # Unit of measurement

22

description: str # Metric description

23

display_name: str # Human-readable name

24

metadata: MetricDescriptor.MetricDescriptorMetadata # Additional metadata

25

launch_stage: LaunchStage # API maturity level

26

monitored_resource_types: list[str] # Compatible resource types

27

28

class MetricKind(enum.Enum):

29

"""Kind of measurement."""

30

METRIC_KIND_UNSPECIFIED = 0

31

GAUGE = 1 # Instantaneous measurement

32

DELTA = 2 # Change since last recorded value

33

CUMULATIVE = 3 # Cumulative measurement

34

35

class ValueType(enum.Enum):

36

"""Type of metric values."""

37

VALUE_TYPE_UNSPECIFIED = 0

38

BOOL = 1 # Boolean values

39

INT64 = 2 # 64-bit integers

40

DOUBLE = 3 # Double precision floats

41

STRING = 4 # String values

42

DISTRIBUTION = 5 # Distribution values

43

MONEY = 6 # Monetary values

44

45

class Metric(message.Message):

46

"""Single metric measurement."""

47

type: str # Metric type identifier

48

labels: dict[str, str] # Metric label values

49

50

class LabelDescriptor(message.Message):

51

"""Describes a label for metrics and resources."""

52

key: str # Label key

53

value_type: LabelDescriptor.ValueType # Label value type

54

description: str # Label description

55

56

class ValueType(enum.Enum):

57

"""Label value types."""

58

STRING = 0

59

BOOL = 1

60

INT64 = 2

61

```

62

63

### Statistical Distributions

64

65

Represent statistical distributions for latency, size, and other measurements.

66

67

```python { .api }

68

from google.api.distribution_pb2 import Distribution

69

70

class Distribution(message.Message):

71

"""Statistical distribution of values."""

72

count: int # Number of samples

73

mean: float # Mean of samples

74

sum_of_squared_deviation: float # Sum of squared deviations from mean

75

range: Distribution.Range # Min/max range

76

bucket_options: Distribution.BucketOptions # Bucket configuration

77

bucket_counts: list[int] # Count per bucket

78

exemplars: list[Distribution.Exemplar] # Example values

79

80

class Range(message.Message):

81

"""Min and max values in the distribution."""

82

min: float # Minimum value

83

max: float # Maximum value

84

85

class BucketOptions(message.Message):

86

"""Bucket options for histogram."""

87

# Union field 'options' (one of):

88

linear_buckets: Distribution.BucketOptions.Linear

89

exponential_buckets: Distribution.BucketOptions.Exponential

90

explicit_buckets: Distribution.BucketOptions.Explicit

91

92

class Linear(message.Message):

93

"""Linear bucket spacing."""

94

num_finite_buckets: int # Number of finite buckets

95

width: float # Bucket width

96

offset: float # Starting offset

97

98

class Exponential(message.Message):

99

"""Exponential bucket spacing."""

100

num_finite_buckets: int # Number of finite buckets

101

growth_factor: float # Growth factor between buckets

102

scale: float # Scale factor

103

104

class Explicit(message.Message):

105

"""Explicit bucket boundaries."""

106

bounds: list[float] # Bucket boundary values

107

108

class Exemplar(message.Message):

109

"""Example value from the distribution."""

110

value: float # Example value

111

timestamp: Timestamp # When value was recorded

112

attachments: list[Any] # Additional context

113

```

114

115

### Monitored Resources

116

117

Define resources that can be monitored and their metadata.

118

119

```python { .api }

120

from google.api.monitored_resource_pb2 import (

121

MonitoredResourceDescriptor, MonitoredResource, MonitoredResourceMetadata

122

)

123

124

class MonitoredResourceDescriptor(message.Message):

125

"""Describes a monitored resource type."""

126

name: str # Resource descriptor name

127

type: str # Resource type identifier

128

display_name: str # Human-readable name

129

description: str # Resource description

130

labels: list[LabelDescriptor] # Resource labels

131

launch_stage: LaunchStage # API maturity level

132

133

class MonitoredResource(message.Message):

134

"""Monitored resource instance."""

135

type: str # Resource type

136

labels: dict[str, str] # Resource label values

137

138

class MonitoredResourceMetadata(message.Message):

139

"""Additional metadata for monitored resources."""

140

system_labels: Struct # System-defined labels

141

user_labels: dict[str, str] # User-defined labels

142

```

143

144

### Log Entry Types

145

146

Structured logging types for service observability.

147

148

```python { .api }

149

from google.logging.type.http_request_pb2 import HttpRequest

150

from google.logging.type.log_severity_pb2 import LogSeverity

151

152

class HttpRequest(message.Message):

153

"""HTTP request information for logging."""

154

request_method: str # HTTP method (GET, POST, etc.)

155

request_url: str # Request URL

156

request_size: int # Request size in bytes

157

status: int # HTTP status code

158

response_size: int # Response size in bytes

159

user_agent: str # User agent string

160

remote_ip: str # Client IP address

161

server_ip: str # Server IP address

162

referer: str # HTTP referer

163

latency: Duration # Request latency

164

cache_lookup: bool # Cache lookup performed

165

cache_hit: bool # Cache hit occurred

166

cache_validated_with_origin_server: bool # Cache validation occurred

167

cache_fill_bytes: int # Bytes sent to cache

168

protocol: str # Protocol version

169

170

class LogSeverity(enum.Enum):

171

"""Log entry severity levels."""

172

DEFAULT = 0 # Default severity

173

DEBUG = 100 # Debug information

174

INFO = 200 # Informational messages

175

NOTICE = 300 # Normal but significant events

176

WARNING = 400 # Warning conditions

177

ERROR = 500 # Error conditions

178

CRITICAL = 600 # Critical conditions

179

ALERT = 700 # Action must be taken immediately

180

EMERGENCY = 800 # System is unusable

181

```

182

183

## Usage Examples

184

185

### Creating Metric Descriptors

186

187

```python

188

from google.api.metric_pb2 import MetricDescriptor

189

from google.api.label_pb2 import LabelDescriptor

190

191

# Create a CPU usage metric descriptor

192

cpu_metric = MetricDescriptor()

193

cpu_metric.name = "projects/my-project/metricDescriptors/compute.googleapis.com/instance/cpu/usage"

194

cpu_metric.type = "compute.googleapis.com/instance/cpu/usage"

195

cpu_metric.metric_kind = MetricDescriptor.MetricKind.GAUGE

196

cpu_metric.value_type = MetricDescriptor.ValueType.DOUBLE

197

cpu_metric.unit = "1" # Percentage (0.0-1.0)

198

cpu_metric.description = "CPU usage percentage"

199

cpu_metric.display_name = "CPU Usage"

200

201

# Add labels

202

instance_label = LabelDescriptor()

203

instance_label.key = "instance_name"

204

instance_label.value_type = LabelDescriptor.ValueType.STRING

205

instance_label.description = "Name of the VM instance"

206

cpu_metric.labels.append(instance_label)

207

208

zone_label = LabelDescriptor()

209

zone_label.key = "zone"

210

zone_label.value_type = LabelDescriptor.ValueType.STRING

211

zone_label.description = "GCP zone of the instance"

212

cpu_metric.labels.append(zone_label)

213

```

214

215

### Recording Metric Values

216

217

```python

218

from google.api.metric_pb2 import Metric

219

220

# Record a metric value

221

metric = Metric()

222

metric.type = "compute.googleapis.com/instance/cpu/usage"

223

metric.labels["instance_name"] = "web-server-1"

224

metric.labels["zone"] = "us-central1-a"

225

226

# The actual metric value would be recorded in a TimeSeries

227

# (TimeSeries is typically part of monitoring service APIs)

228

```

229

230

### Creating Distributions

231

232

```python

233

from google.api.distribution_pb2 import Distribution

234

235

# Create a latency distribution

236

latency_dist = Distribution()

237

latency_dist.count = 1000 # 1000 requests

238

latency_dist.mean = 0.150 # 150ms average

239

latency_dist.sum_of_squared_deviation = 2.5

240

241

# Set range

242

latency_dist.range.min = 0.010 # 10ms minimum

243

latency_dist.range.max = 2.000 # 2s maximum

244

245

# Configure exponential buckets for latency

246

bucket_opts = latency_dist.bucket_options.exponential_buckets

247

bucket_opts.num_finite_buckets = 20

248

bucket_opts.growth_factor = 2.0

249

bucket_opts.scale = 0.01 # Start at 10ms

250

251

# Set bucket counts (example data)

252

latency_dist.bucket_counts.extend([

253

10, # < 10ms

254

50, # 10-20ms

255

200, # 20-40ms

256

300, # 40-80ms

257

250, # 80-160ms

258

150, # 160-320ms

259

30, # 320-640ms

260

10, # 640ms-1.28s

261

0, # > 1.28s

262

])

263

264

# Add exemplar

265

exemplar = latency_dist.exemplars.add()

266

exemplar.value = 0.095 # 95ms example

267

exemplar.timestamp.GetCurrentTime()

268

```

269

270

### Working with Monitored Resources

271

272

```python

273

from google.api.monitored_resource_pb2 import MonitoredResourceDescriptor, MonitoredResource

274

275

# Define a monitored resource type

276

resource_desc = MonitoredResourceDescriptor()

277

resource_desc.name = "projects/my-project/monitoredResourceDescriptors/gce_instance"

278

resource_desc.type = "gce_instance"

279

resource_desc.display_name = "GCE VM Instance"

280

resource_desc.description = "A Google Compute Engine virtual machine instance"

281

282

# Add resource labels

283

project_label = LabelDescriptor()

284

project_label.key = "project_id"

285

project_label.value_type = LabelDescriptor.ValueType.STRING

286

project_label.description = "GCP project ID"

287

resource_desc.labels.append(project_label)

288

289

instance_label = LabelDescriptor()

290

instance_label.key = "instance_id"

291

instance_label.value_type = LabelDescriptor.ValueType.STRING

292

instance_label.description = "VM instance ID"

293

resource_desc.labels.append(instance_label)

294

295

# Create a monitored resource instance

296

resource = MonitoredResource()

297

resource.type = "gce_instance"

298

resource.labels["project_id"] = "my-project"

299

resource.labels["instance_id"] = "1234567890123456789"

300

resource.labels["zone"] = "us-central1-a"

301

```

302

303

### HTTP Request Logging

304

305

```python

306

from google.logging.type.http_request_pb2 import HttpRequest

307

from google.protobuf.duration_pb2 import Duration

308

309

# Log an HTTP request

310

http_request = HttpRequest()

311

http_request.request_method = "GET"

312

http_request.request_url = "https://api.example.com/users/123"

313

http_request.status = 200

314

http_request.request_size = 1024

315

http_request.response_size = 2048

316

http_request.user_agent = "MyApp/1.0"

317

http_request.remote_ip = "203.0.113.10"

318

http_request.referer = "https://example.com/dashboard"

319

320

# Set latency (250ms)

321

http_request.latency.seconds = 0

322

http_request.latency.nanos = 250000000

323

324

# Cache information

325

http_request.cache_lookup = True

326

http_request.cache_hit = False

327

http_request.cache_validated_with_origin_server = True

328

```

329

330

### Log Severity Usage

331

332

```python

333

from google.logging.type.log_severity_pb2 import LogSeverity

334

335

def get_severity_name(severity: LogSeverity) -> str:

336

"""Convert severity enum to string."""

337

severity_names = {

338

LogSeverity.DEFAULT: "DEFAULT",

339

LogSeverity.DEBUG: "DEBUG",

340

LogSeverity.INFO: "INFO",

341

LogSeverity.NOTICE: "NOTICE",

342

LogSeverity.WARNING: "WARNING",

343

LogSeverity.ERROR: "ERROR",

344

LogSeverity.CRITICAL: "CRITICAL",

345

LogSeverity.ALERT: "ALERT",

346

LogSeverity.EMERGENCY: "EMERGENCY"

347

}

348

return severity_names.get(severity, "UNKNOWN")

349

350

# Example usage in logging

351

log_severity = LogSeverity.ERROR

352

print(f"Log level: {get_severity_name(log_severity)}")

353

```

354

355

### Creating Custom Metrics

356

357

```python

358

from google.api.metric_pb2 import MetricDescriptor

359

360

def create_custom_metric(name: str, description: str, unit: str,

361

metric_kind: MetricDescriptor.MetricKind,

362

value_type: MetricDescriptor.ValueType) -> MetricDescriptor:

363

"""Create a custom metric descriptor."""

364

metric = MetricDescriptor()

365

metric.name = f"projects/my-project/metricDescriptors/{name}"

366

metric.type = name

367

metric.description = description

368

metric.unit = unit

369

metric.metric_kind = metric_kind

370

metric.value_type = value_type

371

metric.display_name = description

372

return metric

373

374

# Create error rate metric

375

error_rate = create_custom_metric(

376

name="custom.googleapis.com/service/error_rate",

377

description="Service Error Rate",

378

unit="1", # Ratio

379

metric_kind=MetricDescriptor.MetricKind.GAUGE,

380

value_type=MetricDescriptor.ValueType.DOUBLE

381

)

382

383

# Create request count metric

384

request_count = create_custom_metric(

385

name="custom.googleapis.com/service/request_count",

386

description="Total Request Count",

387

unit="1", # Count

388

metric_kind=MetricDescriptor.MetricKind.CUMULATIVE,

389

value_type=MetricDescriptor.ValueType.INT64

390

)

391

```

392

393

### Distribution Analysis

394

395

```python

396

from google.api.distribution_pb2 import Distribution

397

import math

398

399

def analyze_distribution(dist: Distribution):

400

"""Analyze distribution statistics."""

401

if dist.count == 0:

402

print("No data in distribution")

403

return

404

405

print(f"Sample count: {dist.count}")

406

print(f"Mean: {dist.mean:.3f}")

407

408

# Calculate standard deviation

409

if dist.count > 1:

410

variance = dist.sum_of_squared_deviation / (dist.count - 1)

411

std_dev = math.sqrt(variance)

412

print(f"Standard deviation: {std_dev:.3f}")

413

414

print(f"Range: {dist.range.min:.3f} - {dist.range.max:.3f}")

415

416

# Analyze bucket distribution

417

if dist.bucket_counts:

418

print(f"Histogram buckets: {len(dist.bucket_counts)}")

419

total_samples = sum(dist.bucket_counts)

420

print(f"Samples in buckets: {total_samples}")

421

422

# Find mode (bucket with most samples)

423

max_count = max(dist.bucket_counts)

424

mode_bucket = dist.bucket_counts.index(max_count)

425

print(f"Mode bucket: {mode_bucket} (count: {max_count})")

426

427

# Example usage

428

latency_distribution = Distribution()

429

# ... populate distribution ...

430

analyze_distribution(latency_distribution)

431

```