or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

alert-policies.mdgroups.mdindex.mdmetrics.mdnotifications.mdservices.mdsnooze.mduptime-checks.md

services.mddocs/

0

# Service Level Monitoring

1

2

Comprehensive service-level monitoring for managing services and Service Level Objectives (SLOs) in Google Cloud Monitoring. This enables service-oriented monitoring with SLI definitions, error budgets, and service health tracking for modern microservices architectures.

3

4

## Capabilities

5

6

### Service Management

7

8

Manage the complete lifecycle of services including creation, updates, retrieval, and deletion.

9

10

```python { .api }

11

class ServiceMonitoringServiceClient:

12

def create_service(

13

self,

14

request=None,

15

*,

16

parent: str = None,

17

service=None,

18

retry=None,

19

timeout=None,

20

metadata=()

21

) -> service.Service:

22

"""

23

Create a Service.

24

25

Args:

26

request: The request object or dict equivalent

27

parent: Required. Project name in format 'projects/[PROJECT_ID]'

28

service: Required. The Service to create

29

retry: Retry configuration

30

timeout: Request timeout in seconds

31

metadata: Additional metadata

32

33

Returns:

34

Created Service object

35

"""

36

37

def get_service(

38

self,

39

request=None,

40

*,

41

name: str = None,

42

retry=None,

43

timeout=None,

44

metadata=()

45

) -> service.Service:

46

"""

47

Get the named Service.

48

49

Args:

50

request: The request object or dict equivalent

51

name: Required. Service name in format 'projects/[PROJECT_ID]/services/[SERVICE_ID]'

52

retry: Retry configuration

53

timeout: Request timeout in seconds

54

metadata: Additional metadata

55

56

Returns:

57

Service object

58

"""

59

60

def list_services(

61

self,

62

request=None,

63

*,

64

parent: str = None,

65

retry=None,

66

timeout=None,

67

metadata=()

68

) -> pagers.ListServicesPager:

69

"""

70

List Services for this Metrics Scope.

71

72

Args:

73

request: The request object or dict equivalent

74

parent: Required. Project name

75

retry: Retry configuration

76

timeout: Request timeout in seconds

77

metadata: Additional metadata

78

79

Returns:

80

Pager for iterating over Service objects

81

"""

82

83

def update_service(

84

self,

85

request=None,

86

*,

87

service=None,

88

retry=None,

89

timeout=None,

90

metadata=()

91

) -> service.Service:

92

"""

93

Update this Service.

94

95

Args:

96

request: The request object or dict equivalent

97

service: Required. Updated Service

98

retry: Retry configuration

99

timeout: Request timeout in seconds

100

metadata: Additional metadata

101

102

Returns:

103

Updated Service object

104

"""

105

106

def delete_service(

107

self,

108

request=None,

109

*,

110

name: str = None,

111

retry=None,

112

timeout=None,

113

metadata=()

114

) -> None:

115

"""

116

Soft delete this Service.

117

118

Args:

119

request: The request object or dict equivalent

120

name: Required. Service name to delete

121

retry: Retry configuration

122

timeout: Request timeout in seconds

123

metadata: Additional metadata

124

"""

125

```

126

127

### Service Level Objective Management

128

129

Manage Service Level Objectives (SLOs) for tracking service reliability and performance.

130

131

```python { .api }

132

class ServiceMonitoringServiceClient:

133

def create_service_level_objective(

134

self,

135

request=None,

136

*,

137

parent: str = None,

138

service_level_objective=None,

139

retry=None,

140

timeout=None,

141

metadata=()

142

) -> service.ServiceLevelObjective:

143

"""

144

Create a ServiceLevelObjective for the given Service.

145

146

Args:

147

request: The request object or dict equivalent

148

parent: Required. Service name

149

service_level_objective: Required. The SLO to create

150

retry: Retry configuration

151

timeout: Request timeout in seconds

152

metadata: Additional metadata

153

154

Returns:

155

Created ServiceLevelObjective object

156

"""

157

158

def get_service_level_objective(

159

self,

160

request=None,

161

*,

162

name: str = None,

163

retry=None,

164

timeout=None,

165

metadata=()

166

) -> service.ServiceLevelObjective:

167

"""

168

Get a ServiceLevelObjective by name.

169

170

Args:

171

request: The request object or dict equivalent

172

name: Required. SLO name

173

retry: Retry configuration

174

timeout: Request timeout in seconds

175

metadata: Additional metadata

176

177

Returns:

178

ServiceLevelObjective object

179

"""

180

181

def list_service_level_objectives(

182

self,

183

request=None,

184

*,

185

parent: str = None,

186

retry=None,

187

timeout=None,

188

metadata=()

189

) -> pagers.ListServiceLevelObjectivesPager:

190

"""

191

List the ServiceLevelObjectives for the given Service.

192

193

Args:

194

request: The request object or dict equivalent

195

parent: Required. Service name

196

retry: Retry configuration

197

timeout: Request timeout in seconds

198

metadata: Additional metadata

199

200

Returns:

201

Pager for iterating over ServiceLevelObjective objects

202

"""

203

204

def update_service_level_objective(

205

self,

206

request=None,

207

*,

208

service_level_objective=None,

209

retry=None,

210

timeout=None,

211

metadata=()

212

) -> service.ServiceLevelObjective:

213

"""

214

Update the given ServiceLevelObjective.

215

216

Args:

217

request: The request object or dict equivalent

218

service_level_objective: Required. Updated SLO

219

retry: Retry configuration

220

timeout: Request timeout in seconds

221

metadata: Additional metadata

222

223

Returns:

224

Updated ServiceLevelObjective object

225

"""

226

227

def delete_service_level_objective(

228

self,

229

request=None,

230

*,

231

name: str = None,

232

retry=None,

233

timeout=None,

234

metadata=()

235

) -> None:

236

"""

237

Delete the given ServiceLevelObjective.

238

239

Args:

240

request: The request object or dict equivalent

241

name: Required. SLO name to delete

242

retry: Retry configuration

243

timeout: Request timeout in seconds

244

metadata: Additional metadata

245

"""

246

```

247

248

## Data Types

249

250

### Service

251

252

Represents a service for monitoring purposes.

253

254

```python { .api }

255

class Service:

256

name: str # Resource name

257

display_name: str # Human-readable name

258

custom: Service.Custom # Custom service definition

259

app_engine: Service.AppEngine # App Engine service

260

cloud_endpoints: Service.CloudEndpoints # Cloud Endpoints service

261

cluster_istio: Service.ClusterIstio # Istio service mesh

262

mesh_istio: Service.MeshIstio # Istio mesh service

263

istio_canonical_service: Service.IstioCanonicalService # Canonical Istio service

264

cloud_run: Service.CloudRun # Cloud Run service

265

gke_namespace: Service.GkeNamespace # GKE namespace service

266

gke_workload: Service.GkeWorkload # GKE workload service

267

gke_service: Service.GkeService # GKE service

268

telemetry: Service.Telemetry # Telemetry configuration

269

user_labels: Dict[str, str] # User-defined labels

270

271

class Service.Custom:

272

# Custom service defined by a filter

273

274

class Service.CloudRun:

275

service_name: str # Cloud Run service name

276

location: str # Cloud Run service location

277

278

class Service.Telemetry:

279

resource_name: str # Resource name for telemetry

280

```

281

282

### ServiceLevelObjective

283

284

Represents a Service Level Objective definition.

285

286

```python { .api }

287

class ServiceLevelObjective:

288

name: str # Resource name

289

display_name: str # Human-readable name

290

service_level_indicator: ServiceLevelIndicator # SLI definition

291

goal: float # SLO target (0.0 to 1.0)

292

rolling_period: Duration # Rolling period for SLO

293

calendar_period: CalendarPeriod # Calendar period for SLO

294

user_labels: Dict[str, str] # User-defined labels

295

296

class ServiceLevelIndicator:

297

basic_sli: BasicSli # Basic SLI definition

298

request_based: RequestBasedSli # Request-based SLI

299

windows_based: WindowsBasedSli # Windows-based SLI

300

301

class BasicSli:

302

method: List[str] # HTTP methods to monitor

303

location: List[str] # Locations to monitor

304

version: List[str] # Versions to monitor

305

availability: BasicSli.AvailabilityCriteria # Availability criteria

306

latency: BasicSli.LatencyCriteria # Latency criteria

307

308

class RequestBasedSli:

309

good_total_ratio: TimeSeriesRatio # Good events vs total events

310

distribution_cut: DistributionCut # Distribution-based SLI

311

312

class WindowsBasedSli:

313

good_bad_metric_filter: str # Metric filter for good/bad windows

314

good_total_ratio: TimeSeriesRatio # Good vs total windows

315

metric_mean_in_range: Range # Metric mean within range

316

metric_sum_in_range: Range # Metric sum within range

317

window_period: Duration # Window period

318

```

319

320

### Request and Response Types

321

322

```python { .api }

323

class CreateServiceRequest:

324

parent: str # Required. Project name

325

service_id: str # Service ID

326

service: Service # Required. Service to create

327

328

class GetServiceRequest:

329

name: str # Required. Service name

330

331

class ListServicesRequest:

332

parent: str # Required. Project name

333

filter: str # Filter expression

334

page_size: int # Maximum results per page

335

page_token: str # Page token

336

337

class ListServicesResponse:

338

services: List[Service] # Services

339

next_page_token: str # Next page token

340

341

class UpdateServiceRequest:

342

service: Service # Required. Updated service

343

update_mask: FieldMask # Fields to update

344

345

class DeleteServiceRequest:

346

name: str # Required. Service name to delete

347

348

class CreateServiceLevelObjectiveRequest:

349

parent: str # Required. Service name

350

service_level_objective_id: str # SLO ID

351

service_level_objective: ServiceLevelObjective # Required. SLO to create

352

353

class GetServiceLevelObjectiveRequest:

354

name: str # Required. SLO name

355

view: ServiceLevelObjective.View # View type

356

357

class ListServiceLevelObjectivesRequest:

358

parent: str # Required. Service name

359

filter: str # Filter expression

360

page_size: int # Maximum results per page

361

page_token: str # Page token

362

363

class ListServiceLevelObjectivesResponse:

364

service_level_objectives: List[ServiceLevelObjective] # SLOs

365

next_page_token: str # Next page token

366

367

class UpdateServiceLevelObjectiveRequest:

368

service_level_objective: ServiceLevelObjective # Required. Updated SLO

369

update_mask: FieldMask # Fields to update

370

371

class DeleteServiceLevelObjectiveRequest:

372

name: str # Required. SLO name to delete

373

```

374

375

## Usage Examples

376

377

### Creating a Custom Service

378

379

```python

380

from google.cloud.monitoring import ServiceMonitoringServiceClient

381

from google.cloud.monitoring_v3.types import Service

382

383

client = ServiceMonitoringServiceClient()

384

project_name = f"projects/{project_id}"

385

386

# Create a custom service

387

service_obj = Service()

388

service_obj.display_name = "Web Frontend Service"

389

390

# Define custom service with filter

391

custom_service = Service.Custom()

392

service_obj.custom = custom_service

393

394

# Add user labels

395

service_obj.user_labels["team"] = "frontend"

396

service_obj.user_labels["environment"] = "production"

397

398

created_service = client.create_service(

399

parent=project_name,

400

service=service_obj,

401

service_id="web-frontend"

402

)

403

print(f"Created service: {created_service.name}")

404

```

405

406

### Creating a Cloud Run Service

407

408

```python

409

# Create Cloud Run service

410

cloud_run_service = Service()

411

cloud_run_service.display_name = "API Service"

412

413

# Configure Cloud Run service

414

cloud_run = Service.CloudRun()

415

cloud_run.service_name = "api-service"

416

cloud_run.location = "us-central1"

417

cloud_run_service.cloud_run = cloud_run

418

419

created_cloud_run = client.create_service(

420

parent=project_name,

421

service=cloud_run_service,

422

service_id="api-service"

423

)

424

print(f"Created Cloud Run service: {created_cloud_run.name}")

425

```

426

427

### Creating a Service Level Objective

428

429

```python

430

from google.cloud.monitoring_v3.types import (

431

ServiceLevelObjective, ServiceLevelIndicator, BasicSli

432

)

433

from google.protobuf.duration_pb2 import Duration

434

435

service_name = f"projects/{project_id}/services/web-frontend"

436

437

# Create SLO for availability

438

slo = ServiceLevelObjective()

439

slo.display_name = "Web Frontend Availability SLO"

440

slo.goal = 0.995 # 99.5% availability target

441

442

# Define rolling period (30 days)

443

rolling_period = Duration()

444

rolling_period.seconds = 30 * 24 * 60 * 60 # 30 days

445

slo.rolling_period = rolling_period

446

447

# Define Service Level Indicator

448

sli = ServiceLevelIndicator()

449

basic_sli = BasicSli()

450

451

# Configure availability criteria

452

availability = BasicSli.AvailabilityCriteria()

453

basic_sli.availability = availability

454

sli.basic_sli = basic_sli

455

slo.service_level_indicator = sli

456

457

# Add user labels

458

slo.user_labels["tier"] = "critical"

459

slo.user_labels["team"] = "frontend"

460

461

created_slo = client.create_service_level_objective(

462

parent=service_name,

463

service_level_objective=slo,

464

service_level_objective_id="availability-slo"

465

)

466

print(f"Created SLO: {created_slo.name}")

467

print(f"Target: {created_slo.goal * 100}%")

468

```

469

470

### Creating a Latency SLO

471

472

```python

473

# Create SLO for latency

474

latency_slo = ServiceLevelObjective()

475

latency_slo.display_name = "Web Frontend Latency SLO"

476

latency_slo.goal = 0.90 # 90% of requests under threshold

477

478

# Rolling period (7 days)

479

rolling_period = Duration()

480

rolling_period.seconds = 7 * 24 * 60 * 60

481

latency_slo.rolling_period = rolling_period

482

483

# Define latency SLI

484

latency_sli = ServiceLevelIndicator()

485

latency_basic = BasicSli()

486

487

# Configure latency criteria (500ms threshold)

488

latency_criteria = BasicSli.LatencyCriteria()

489

latency_criteria.threshold.seconds = 0

490

latency_criteria.threshold.nanos = 500000000 # 500ms

491

latency_basic.latency = latency_criteria

492

493

latency_sli.basic_sli = latency_basic

494

latency_slo.service_level_indicator = latency_sli

495

496

created_latency_slo = client.create_service_level_objective(

497

parent=service_name,

498

service_level_objective=latency_slo,

499

service_level_objective_id="latency-slo"

500

)

501

print(f"Created latency SLO: {created_latency_slo.name}")

502

```

503

504

### Listing Services and SLOs

505

506

```python

507

# List all services

508

print("Services:")

509

for service in client.list_services(parent=project_name):

510

print(f"- {service.display_name}: {service.name}")

511

512

# List SLOs for each service

513

print(f" SLOs:")

514

for slo in client.list_service_level_objectives(parent=service.name):

515

print(f" - {slo.display_name}: {slo.goal * 100}% target")

516

517

# Filter services by label

518

filter_expr = 'user_labels.environment="production"'

519

print(f"\nProduction services:")

520

for service in client.list_services(parent=project_name, filter=filter_expr):

521

print(f"- {service.display_name}")

522

```

523

524

### Updating a Service

525

526

```python

527

from google.protobuf import field_mask_pb2

528

529

# Get existing service

530

service_name = f"projects/{project_id}/services/web-frontend"

531

service = client.get_service(name=service_name)

532

533

# Update service properties

534

service.display_name = "Updated Web Frontend Service"

535

service.user_labels["version"] = "v2.0"

536

537

# Create field mask for selective update

538

update_mask = field_mask_pb2.FieldMask()

539

update_mask.paths.extend(["display_name", "user_labels"])

540

541

updated_service = client.update_service(

542

service=service,

543

update_mask=update_mask

544

)

545

print(f"Updated service: {updated_service.display_name}")

546

```

547

548

### Working with Request-Based SLIs

549

550

```python

551

from google.cloud.monitoring_v3.types import RequestBasedSli, TimeSeriesRatio

552

553

# Create request-based SLO

554

request_slo = ServiceLevelObjective()

555

request_slo.display_name = "Error Rate SLO"

556

request_slo.goal = 0.999 # 99.9% success rate

557

558

# Rolling period

559

rolling_period = Duration()

560

rolling_period.seconds = 28 * 24 * 60 * 60 # 28 days

561

request_slo.rolling_period = rolling_period

562

563

# Define request-based SLI

564

request_sli = ServiceLevelIndicator()

565

request_based = RequestBasedSli()

566

567

# Configure good vs total ratio

568

ratio = TimeSeriesRatio()

569

ratio.good_service_filter = 'project="my-project" AND service_name="api-service" AND response_code_class="2xx"'

570

ratio.total_service_filter = 'project="my-project" AND service_name="api-service"'

571

request_based.good_total_ratio = ratio

572

573

request_sli.request_based = request_based

574

request_slo.service_level_indicator = request_sli

575

576

created_request_slo = client.create_service_level_objective(

577

parent=service_name,

578

service_level_objective=request_slo,

579

service_level_objective_id="error-rate-slo"

580

)

581

print(f"Created request-based SLO: {created_request_slo.name}")

582

```

583

584

### Deleting Services and SLOs

585

586

```python

587

# Delete SLO

588

slo_name = f"projects/{project_id}/services/web-frontend/serviceLevelObjectives/availability-slo"

589

client.delete_service_level_objective(name=slo_name)

590

print(f"Deleted SLO: {slo_name}")

591

592

# Delete service (soft delete)

593

service_name = f"projects/{project_id}/services/web-frontend"

594

client.delete_service(name=service_name)

595

print(f"Deleted service: {service_name}")

596

```

597

598

### Async Service Operations

599

600

```python

601

import asyncio

602

from google.cloud.monitoring import ServiceMonitoringServiceAsyncClient

603

604

async def manage_services():

605

client = ServiceMonitoringServiceAsyncClient()

606

project_name = f"projects/{project_id}"

607

608

# List services asynchronously

609

async for service in await client.list_services(parent=project_name):

610

print(f"Async service: {service.display_name}")

611

612

# List SLOs for each service

613

async for slo in await client.list_service_level_objectives(parent=service.name):

614

print(f" Async SLO: {slo.display_name}")

615

616

asyncio.run(manage_services())

617

```

618

619

## Resource Path Helpers

620

621

```python { .api }

622

class ServiceMonitoringServiceClient:

623

@staticmethod

624

def service_path(project: str, service: str) -> str:

625

"""Returns a fully-qualified service string."""

626

627

@staticmethod

628

def service_level_objective_path(

629

project: str,

630

service: str,

631

service_level_objective: str

632

) -> str:

633

"""Returns a fully-qualified service_level_objective string."""

634

635

@staticmethod

636

def parse_service_path(path: str) -> Dict[str, str]:

637

"""Parses a service path into its component segments."""

638

```

639

640

## Error Handling

641

642

Service monitoring operations can raise specific exceptions:

643

644

```python

645

from google.api_core import exceptions

646

from google.cloud.monitoring import ServiceMonitoringServiceClient

647

648

client = ServiceMonitoringServiceClient()

649

650

try:

651

service = client.get_service(name="invalid/path")

652

except exceptions.NotFound:

653

print("Service not found")

654

except exceptions.InvalidArgument as e:

655

print(f"Invalid service configuration: {e}")

656

except exceptions.PermissionDenied:

657

print("Insufficient permissions")

658

except exceptions.FailedPrecondition as e:

659

print(f"Cannot delete service with active SLOs: {e}")

660

```