or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

admin-services.mdbinary-logging.mdchannelz.mdhealth-checking.mdindex.mdload-balancing.mdmetrics.mdserver-reflection.md

metrics.mddocs/

0

# Metrics and Load Reporting

1

2

ORCA metrics collection for load balancing and performance monitoring. Provides both per-call metrics recording and out-of-band metrics reporting for intelligent load balancing decisions.

3

4

## Capabilities

5

6

### CallMetricRecorder

7

8

Records metrics on a per-call basis for load balancing and performance analysis. One instance exists per gRPC call and is automatically attached to the call context.

9

10

```java { .api }

11

/**

12

* Utility to record call metrics for load-balancing. One instance per call.

13

* Provides fluent API for recording various types of metrics.

14

*/

15

@ExperimentalApi("https://github.com/grpc/grpc-java/issues/6012")

16

@ThreadSafe

17

public final class CallMetricRecorder {

18

19

/**

20

* Returns the call metric recorder attached to the current Context

21

* @return CallMetricRecorder instance for the current call

22

*/

23

public static CallMetricRecorder getCurrent();

24

25

/**

26

* Records utilization metric in range [0, 1]

27

* @param name Metric name

28

* @param value Utilization value between 0.0 and 1.0

29

* @return this recorder object for chaining

30

*/

31

public CallMetricRecorder recordUtilizationMetric(String name, double value);

32

33

/**

34

* Records request cost metric (arbitrary units)

35

* @param name Metric name

36

* @param value Cost value (typically positive)

37

* @return this recorder object for chaining

38

*/

39

public CallMetricRecorder recordRequestCostMetric(String name, double value);

40

41

/**

42

* Records application-specific opaque custom metric

43

* @param name Metric name

44

* @param value Metric value

45

* @return this recorder object for chaining

46

*/

47

public CallMetricRecorder recordNamedMetric(String name, double value);

48

49

/**

50

* Records CPU utilization in range [0, inf)

51

* @param value CPU utilization value

52

* @return this recorder object for chaining

53

*/

54

public CallMetricRecorder recordCpuUtilizationMetric(double value);

55

56

/**

57

* Records application specific utilization in range [0, inf)

58

* @param value Application utilization value

59

* @return this recorder object for chaining

60

*/

61

public CallMetricRecorder recordApplicationUtilizationMetric(double value);

62

63

/**

64

* Records memory utilization in range [0, 1]

65

* @param value Memory utilization between 0.0 and 1.0

66

* @return this recorder object for chaining

67

*/

68

public CallMetricRecorder recordMemoryUtilizationMetric(double value);

69

70

/**

71

* Records queries per second in range [0, inf)

72

* @param value QPS value

73

* @return this recorder object for chaining

74

*/

75

public CallMetricRecorder recordQpsMetric(double value);

76

77

/**

78

* Records errors per second in range [0, inf)

79

* @param value EPS value

80

* @return this recorder object for chaining

81

*/

82

public CallMetricRecorder recordEpsMetric(double value);

83

84

/**

85

* Records request cost metric (deprecated, use recordRequestCostMetric)

86

* @param name Metric name

87

* @param value Cost value

88

* @return this recorder object for chaining

89

* @deprecated Use recordRequestCostMetric instead

90

*/

91

@Deprecated

92

public CallMetricRecorder recordCallMetric(String name, double value);

93

}

94

```

95

96

**Usage Examples:**

97

98

```java

99

import io.grpc.services.CallMetricRecorder;

100

import io.grpc.stub.StreamObserver;

101

102

public class MetricsAwareService extends UserServiceGrpc.UserServiceImplBase {

103

104

@Override

105

public void getUser(GetUserRequest request, StreamObserver<GetUserResponse> responseObserver) {

106

long startTime = System.nanoTime();

107

108

try {

109

// Business logic

110

UserResponse response = processGetUser(request);

111

112

// Record metrics for this call

113

CallMetricRecorder recorder = CallMetricRecorder.getCurrent();

114

115

long duration = System.nanoTime() - startTime;

116

double durationMs = duration / 1_000_000.0;

117

118

recorder

119

.recordRequestCostMetric("processing_time_ms", durationMs)

120

.recordCpuUtilizationMetric(getCurrentCpuUsage())

121

.recordMemoryUtilizationMetric(getCurrentMemoryUsage())

122

.recordUtilizationMetric("database_load", getDatabaseLoad());

123

124

responseObserver.onNext(response);

125

responseObserver.onCompleted();

126

127

} catch (Exception e) {

128

// Record error metrics

129

CallMetricRecorder.getCurrent()

130

.recordEpsMetric(1.0)

131

.recordNamedMetric("error_type", getErrorTypeCode(e));

132

133

responseObserver.onError(e);

134

}

135

}

136

137

private double getCurrentCpuUsage() {

138

// Implementation to get current CPU usage

139

return 0.75; // Example value

140

}

141

142

private double getCurrentMemoryUsage() {

143

Runtime runtime = Runtime.getRuntime();

144

return (double) runtime.totalMemory() / runtime.maxMemory();

145

}

146

147

private double getDatabaseLoad() {

148

// Implementation to get database connection load

149

return 0.60; // Example value

150

}

151

}

152

```

153

154

### MetricRecorder

155

156

Out-of-band metrics reporting for server-wide utilization metrics that are reported independently of individual calls.

157

158

```java { .api }

159

/**

160

* Implements Out-of-Band metrics reporting for utilization metrics.

161

* Reports server-wide metrics that apply across all calls.

162

*/

163

@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9006")

164

public final class MetricRecorder {

165

166

/**

167

* Creates a new MetricRecorder instance

168

* @return MetricRecorder instance for out-of-band reporting

169

*/

170

public static MetricRecorder newInstance();

171

172

/**

173

* Updates metrics value in range [0, 1] for specified key

174

* @param key Metric name

175

* @param value Utilization value between 0.0 and 1.0

176

*/

177

public void putUtilizationMetric(String key, double value);

178

179

/**

180

* Replaces the whole metrics data using the specified map

181

* @param metrics Map of metric names to values

182

*/

183

public void setAllUtilizationMetrics(Map<String, Double> metrics);

184

185

/**

186

* Removes the metrics data entry for specified key

187

* @param key Metric name to remove

188

*/

189

public void removeUtilizationMetric(String key);

190

191

/**

192

* Updates CPU utilization in range [0, inf)

193

* @param value CPU utilization value

194

*/

195

public void setCpuUtilizationMetric(double value);

196

197

/** Clears CPU utilization metrics data */

198

public void clearCpuUtilizationMetric();

199

200

/**

201

* Updates application specific utilization in range [0, inf)

202

* @param value Application utilization value

203

*/

204

public void setApplicationUtilizationMetric(double value);

205

206

/** Clears application specific utilization metrics data */

207

public void clearApplicationUtilizationMetric();

208

209

/**

210

* Updates memory utilization in range [0, 1]

211

* @param value Memory utilization between 0.0 and 1.0

212

*/

213

public void setMemoryUtilizationMetric(double value);

214

215

/** Clears memory utilization metrics data */

216

public void clearMemoryUtilizationMetric();

217

218

/**

219

* Updates QPS metrics in range [0, inf)

220

* @param value Queries per second value

221

*/

222

public void setQpsMetric(double value);

223

224

/** Clears QPS metrics data */

225

public void clearQpsMetric();

226

227

/**

228

* Updates EPS metrics in range [0, inf)

229

* @param value Errors per second value

230

*/

231

public void setEpsMetric(double value);

232

233

/** Clears EPS metrics data */

234

public void clearEpsMetric();

235

}

236

```

237

238

**Usage Examples:**

239

240

```java

241

import io.grpc.services.MetricRecorder;

242

import java.util.concurrent.Executors;

243

import java.util.concurrent.ScheduledExecutorService;

244

import java.util.concurrent.TimeUnit;

245

246

public class ServerMetricsReporter {

247

private final MetricRecorder metricRecorder;

248

private final ScheduledExecutorService scheduler;

249

250

public ServerMetricsReporter() {

251

this.metricRecorder = MetricRecorder.newInstance();

252

this.scheduler = Executors.newScheduledThreadPool(1);

253

}

254

255

public void startReporting() {

256

// Report server metrics every 10 seconds

257

scheduler.scheduleAtFixedRate(this::reportMetrics, 0, 10, TimeUnit.SECONDS);

258

}

259

260

private void reportMetrics() {

261

// Collect current server metrics

262

double cpuUsage = SystemMetrics.getCpuUsage();

263

double memoryUsage = SystemMetrics.getMemoryUsage();

264

double diskUsage = SystemMetrics.getDiskUsage();

265

double networkLatency = SystemMetrics.getNetworkLatency();

266

267

// Report standard metrics

268

metricRecorder.setCpuUtilizationMetric(cpuUsage);

269

metricRecorder.setMemoryUtilizationMetric(memoryUsage);

270

271

// Report custom utilization metrics

272

metricRecorder.putUtilizationMetric("disk_usage", diskUsage);

273

metricRecorder.putUtilizationMetric("network_latency", networkLatency);

274

275

// Report performance metrics

276

double currentQps = PerformanceTracker.getCurrentQps();

277

double currentEps = PerformanceTracker.getCurrentEps();

278

279

metricRecorder.setQpsMetric(currentQps);

280

metricRecorder.setEpsMetric(currentEps);

281

282

System.out.println("Reported metrics: CPU=" + cpuUsage +

283

", Memory=" + memoryUsage +

284

", QPS=" + currentQps);

285

}

286

287

public void shutdown() {

288

scheduler.shutdown();

289

}

290

}

291

```

292

293

### MetricReport

294

295

Read-only object containing ORCA load report data for load balancing policies.

296

297

```java { .api }

298

/**

299

* A gRPC object of orca load report for LB policies listening at per-rpc or oob orca load reports.

300

* Provides read-only access to collected metrics data.

301

*/

302

@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9381")

303

public class MetricReport {

304

305

/** @return CPU utilization value */

306

public double getCpuUtilization();

307

308

/** @return Application utilization value */

309

public double getApplicationUtilization();

310

311

/** @return Memory utilization value */

312

public double getMemoryUtilization();

313

314

/** @return QPS (queries per second) value */

315

public double getQps();

316

317

/** @return EPS (errors per second) value */

318

public double getEps();

319

320

/** @return Map of request cost metrics */

321

public Map<String, Double> getRequestCostMetrics();

322

323

/** @return Map of utilization metrics */

324

public Map<String, Double> getUtilizationMetrics();

325

326

/** @return Map of named metrics */

327

public Map<String, Double> getNamedMetrics();

328

329

/** @return String representation of the metric report */

330

public String toString();

331

}

332

```

333

334

## Integration Patterns

335

336

### Combined Per-Call and Out-of-Band Metrics

337

338

```java

339

public class ComprehensiveMetricsService extends OrderServiceGrpc.OrderServiceImplBase {

340

private final MetricRecorder serverMetrics;

341

342

public ComprehensiveMetricsService() {

343

this.serverMetrics = MetricRecorder.newInstance();

344

345

// Start background metrics reporting

346

startBackgroundMetricsReporting();

347

}

348

349

@Override

350

public void processOrder(OrderRequest request, StreamObserver<OrderResponse> responseObserver) {

351

long startTime = System.currentTimeMillis();

352

353

try {

354

// Process the order

355

OrderResponse response = handleOrder(request);

356

357

// Record per-call metrics

358

CallMetricRecorder callRecorder = CallMetricRecorder.getCurrent();

359

long processingTime = System.currentTimeMillis() - startTime;

360

361

callRecorder

362

.recordRequestCostMetric("order_processing_time", processingTime)

363

.recordUtilizationMetric("payment_gateway_load", getPaymentGatewayLoad())

364

.recordNamedMetric("order_value", request.getTotalAmount());

365

366

responseObserver.onNext(response);

367

responseObserver.onCompleted();

368

369

} catch (PaymentException e) {

370

CallMetricRecorder.getCurrent()

371

.recordEpsMetric(1.0)

372

.recordNamedMetric("payment_error", 1.0);

373

374

responseObserver.onError(e);

375

}

376

}

377

378

private void startBackgroundMetricsReporting() {

379

ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);

380

381

scheduler.scheduleAtFixedRate(() -> {

382

// Update server-wide metrics

383

serverMetrics.setCpuUtilizationMetric(getCurrentCpuUsage());

384

serverMetrics.setMemoryUtilizationMetric(getCurrentMemoryUsage());

385

serverMetrics.putUtilizationMetric("database_connections", getDatabaseConnectionUsage());

386

serverMetrics.putUtilizationMetric("cache_hit_rate", getCacheHitRate());

387

388

}, 0, 30, TimeUnit.SECONDS);

389

}

390

}

391

```

392

393

### Load Balancer Integration

394

395

```java

396

public class LoadBalancerAwareClient {

397

private final ManagedChannel channel;

398

private final OrderServiceGrpc.OrderServiceStub stub;

399

400

public LoadBalancerAwareClient(String target) {

401

this.channel = ManagedChannelBuilder.forTarget(target)

402

.defaultLoadBalancingPolicy("weighted_round_robin") // Uses ORCA metrics

403

.usePlaintext()

404

.build();

405

406

this.stub = OrderServiceGrpc.newStub(channel);

407

}

408

409

public void processOrderWithMetrics(OrderRequest request) {

410

// The load balancer will automatically use ORCA metrics

411

// reported by CallMetricRecorder and MetricRecorder

412

// to make intelligent routing decisions

413

414

stub.processOrder(request, new StreamObserver<OrderResponse>() {

415

@Override

416

public void onNext(OrderResponse response) {

417

System.out.println("Order processed: " + response.getOrderId());

418

}

419

420

@Override

421

public void onError(Throwable t) {

422

System.err.println("Order processing failed: " + t.getMessage());

423

}

424

425

@Override

426

public void onCompleted() {

427

// Request completed

428

}

429

});

430

}

431

}

432

```

433

434

### Metrics Collection and Analysis

435

436

```java

437

public class MetricsCollector {

438

private final List<MetricReport> collectedReports = new ArrayList<>();

439

440

public void collectMetrics(MetricReport report) {

441

synchronized (collectedReports) {

442

collectedReports.add(report);

443

}

444

445

// Analyze metrics

446

analyzeReport(report);

447

}

448

449

private void analyzeReport(MetricReport report) {

450

System.out.println("Metrics Analysis:");

451

System.out.println("CPU Utilization: " + report.getCpuUtilization());

452

System.out.println("Memory Utilization: " + report.getMemoryUtilization());

453

System.out.println("QPS: " + report.getQps());

454

System.out.println("EPS: " + report.getEps());

455

456

// Analyze request cost metrics

457

Map<String, Double> costMetrics = report.getRequestCostMetrics();

458

costMetrics.forEach((name, value) ->

459

System.out.println("Cost Metric " + name + ": " + value)

460

);

461

462

// Analyze custom metrics

463

Map<String, Double> namedMetrics = report.getNamedMetrics();

464

namedMetrics.forEach((name, value) ->

465

System.out.println("Named Metric " + name + ": " + value)

466

);

467

468

// Alert on high utilization

469

if (report.getCpuUtilization() > 0.8) {

470

System.out.println("WARNING: High CPU utilization detected!");

471

}

472

473

if (report.getEps() > 10.0) {

474

System.out.println("WARNING: High error rate detected!");

475

}

476

}

477

478

public void printSummaryReport() {

479

synchronized (collectedReports) {

480

if (collectedReports.isEmpty()) {

481

System.out.println("No metrics collected yet");

482

return;

483

}

484

485

double avgCpu = collectedReports.stream()

486

.mapToDouble(MetricReport::getCpuUtilization)

487

.average()

488

.orElse(0.0);

489

490

double avgMemory = collectedReports.stream()

491

.mapToDouble(MetricReport::getMemoryUtilization)

492

.average()

493

.orElse(0.0);

494

495

System.out.println("Summary Report:");

496

System.out.println("Total Reports: " + collectedReports.size());

497

System.out.println("Average CPU: " + avgCpu);

498

System.out.println("Average Memory: " + avgMemory);

499

}

500

}

501

}

502

```