0
# Metrics and Load Reporting
1
2
ORCA metrics collection for load balancing and performance monitoring. Provides both per-call metrics recording and out-of-band metrics reporting for intelligent load balancing decisions.
3
4
## Capabilities
5
6
### CallMetricRecorder
7
8
Records metrics on a per-call basis for load balancing and performance analysis. One instance exists per gRPC call and is automatically attached to the call context.
9
10
```java { .api }
11
/**
12
* Utility to record call metrics for load-balancing. One instance per call.
13
* Provides fluent API for recording various types of metrics.
14
*/
15
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/6012")
16
@ThreadSafe
17
public final class CallMetricRecorder {
18
19
/**
20
* Returns the call metric recorder attached to the current Context
21
* @return CallMetricRecorder instance for the current call
22
*/
23
public static CallMetricRecorder getCurrent();
24
25
/**
26
* Records utilization metric in range [0, 1]
27
* @param name Metric name
28
* @param value Utilization value between 0.0 and 1.0
29
* @return this recorder object for chaining
30
*/
31
public CallMetricRecorder recordUtilizationMetric(String name, double value);
32
33
/**
34
* Records request cost metric (arbitrary units)
35
* @param name Metric name
36
* @param value Cost value (typically positive)
37
* @return this recorder object for chaining
38
*/
39
public CallMetricRecorder recordRequestCostMetric(String name, double value);
40
41
/**
42
* Records application-specific opaque custom metric
43
* @param name Metric name
44
* @param value Metric value
45
* @return this recorder object for chaining
46
*/
47
public CallMetricRecorder recordNamedMetric(String name, double value);
48
49
/**
50
* Records CPU utilization in range [0, inf)
51
* @param value CPU utilization value
52
* @return this recorder object for chaining
53
*/
54
public CallMetricRecorder recordCpuUtilizationMetric(double value);
55
56
/**
57
* Records application specific utilization in range [0, inf)
58
* @param value Application utilization value
59
* @return this recorder object for chaining
60
*/
61
public CallMetricRecorder recordApplicationUtilizationMetric(double value);
62
63
/**
64
* Records memory utilization in range [0, 1]
65
* @param value Memory utilization between 0.0 and 1.0
66
* @return this recorder object for chaining
67
*/
68
public CallMetricRecorder recordMemoryUtilizationMetric(double value);
69
70
/**
71
* Records queries per second in range [0, inf)
72
* @param value QPS value
73
* @return this recorder object for chaining
74
*/
75
public CallMetricRecorder recordQpsMetric(double value);
76
77
/**
78
* Records errors per second in range [0, inf)
79
* @param value EPS value
80
* @return this recorder object for chaining
81
*/
82
public CallMetricRecorder recordEpsMetric(double value);
83
84
/**
85
* Records request cost metric (deprecated, use recordRequestCostMetric)
86
* @param name Metric name
87
* @param value Cost value
88
* @return this recorder object for chaining
89
* @deprecated Use recordRequestCostMetric instead
90
*/
91
@Deprecated
92
public CallMetricRecorder recordCallMetric(String name, double value);
93
}
94
```
95
96
**Usage Examples:**
97
98
```java
99
import io.grpc.services.CallMetricRecorder;
100
import io.grpc.stub.StreamObserver;
101
102
public class MetricsAwareService extends UserServiceGrpc.UserServiceImplBase {
103
104
@Override
105
public void getUser(GetUserRequest request, StreamObserver<GetUserResponse> responseObserver) {
106
long startTime = System.nanoTime();
107
108
try {
109
// Business logic
110
UserResponse response = processGetUser(request);
111
112
// Record metrics for this call
113
CallMetricRecorder recorder = CallMetricRecorder.getCurrent();
114
115
long duration = System.nanoTime() - startTime;
116
double durationMs = duration / 1_000_000.0;
117
118
recorder
119
.recordRequestCostMetric("processing_time_ms", durationMs)
120
.recordCpuUtilizationMetric(getCurrentCpuUsage())
121
.recordMemoryUtilizationMetric(getCurrentMemoryUsage())
122
.recordUtilizationMetric("database_load", getDatabaseLoad());
123
124
responseObserver.onNext(response);
125
responseObserver.onCompleted();
126
127
} catch (Exception e) {
128
// Record error metrics
129
CallMetricRecorder.getCurrent()
130
.recordEpsMetric(1.0)
131
.recordNamedMetric("error_type", getErrorTypeCode(e));
132
133
responseObserver.onError(e);
134
}
135
}
136
137
private double getCurrentCpuUsage() {
138
// Implementation to get current CPU usage
139
return 0.75; // Example value
140
}
141
142
private double getCurrentMemoryUsage() {
143
Runtime runtime = Runtime.getRuntime();
144
return (double) runtime.totalMemory() / runtime.maxMemory();
145
}
146
147
private double getDatabaseLoad() {
148
// Implementation to get database connection load
149
return 0.60; // Example value
150
}
151
}
152
```
153
154
### MetricRecorder
155
156
Out-of-band metrics reporting for server-wide utilization metrics that are reported independently of individual calls.
157
158
```java { .api }
159
/**
160
* Implements Out-of-Band metrics reporting for utilization metrics.
161
* Reports server-wide metrics that apply across all calls.
162
*/
163
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9006")
164
public final class MetricRecorder {
165
166
/**
167
* Creates a new MetricRecorder instance
168
* @return MetricRecorder instance for out-of-band reporting
169
*/
170
public static MetricRecorder newInstance();
171
172
/**
173
* Updates metrics value in range [0, 1] for specified key
174
* @param key Metric name
175
* @param value Utilization value between 0.0 and 1.0
176
*/
177
public void putUtilizationMetric(String key, double value);
178
179
/**
180
* Replaces the whole metrics data using the specified map
181
* @param metrics Map of metric names to values
182
*/
183
public void setAllUtilizationMetrics(Map<String, Double> metrics);
184
185
/**
186
* Removes the metrics data entry for specified key
187
* @param key Metric name to remove
188
*/
189
public void removeUtilizationMetric(String key);
190
191
/**
192
* Updates CPU utilization in range [0, inf)
193
* @param value CPU utilization value
194
*/
195
public void setCpuUtilizationMetric(double value);
196
197
/** Clears CPU utilization metrics data */
198
public void clearCpuUtilizationMetric();
199
200
/**
201
* Updates application specific utilization in range [0, inf)
202
* @param value Application utilization value
203
*/
204
public void setApplicationUtilizationMetric(double value);
205
206
/** Clears application specific utilization metrics data */
207
public void clearApplicationUtilizationMetric();
208
209
/**
210
* Updates memory utilization in range [0, 1]
211
* @param value Memory utilization between 0.0 and 1.0
212
*/
213
public void setMemoryUtilizationMetric(double value);
214
215
/** Clears memory utilization metrics data */
216
public void clearMemoryUtilizationMetric();
217
218
/**
219
* Updates QPS metrics in range [0, inf)
220
* @param value Queries per second value
221
*/
222
public void setQpsMetric(double value);
223
224
/** Clears QPS metrics data */
225
public void clearQpsMetric();
226
227
/**
228
* Updates EPS metrics in range [0, inf)
229
* @param value Errors per second value
230
*/
231
public void setEpsMetric(double value);
232
233
/** Clears EPS metrics data */
234
public void clearEpsMetric();
235
}
236
```
237
238
**Usage Examples:**
239
240
```java
241
import io.grpc.services.MetricRecorder;
242
import java.util.concurrent.Executors;
243
import java.util.concurrent.ScheduledExecutorService;
244
import java.util.concurrent.TimeUnit;
245
246
public class ServerMetricsReporter {
247
private final MetricRecorder metricRecorder;
248
private final ScheduledExecutorService scheduler;
249
250
public ServerMetricsReporter() {
251
this.metricRecorder = MetricRecorder.newInstance();
252
this.scheduler = Executors.newScheduledThreadPool(1);
253
}
254
255
public void startReporting() {
256
// Report server metrics every 10 seconds
257
scheduler.scheduleAtFixedRate(this::reportMetrics, 0, 10, TimeUnit.SECONDS);
258
}
259
260
private void reportMetrics() {
261
// Collect current server metrics
262
double cpuUsage = SystemMetrics.getCpuUsage();
263
double memoryUsage = SystemMetrics.getMemoryUsage();
264
double diskUsage = SystemMetrics.getDiskUsage();
265
double networkLatency = SystemMetrics.getNetworkLatency();
266
267
// Report standard metrics
268
metricRecorder.setCpuUtilizationMetric(cpuUsage);
269
metricRecorder.setMemoryUtilizationMetric(memoryUsage);
270
271
// Report custom utilization metrics
272
metricRecorder.putUtilizationMetric("disk_usage", diskUsage);
273
metricRecorder.putUtilizationMetric("network_latency", networkLatency);
274
275
// Report performance metrics
276
double currentQps = PerformanceTracker.getCurrentQps();
277
double currentEps = PerformanceTracker.getCurrentEps();
278
279
metricRecorder.setQpsMetric(currentQps);
280
metricRecorder.setEpsMetric(currentEps);
281
282
System.out.println("Reported metrics: CPU=" + cpuUsage +
283
", Memory=" + memoryUsage +
284
", QPS=" + currentQps);
285
}
286
287
public void shutdown() {
288
scheduler.shutdown();
289
}
290
}
291
```
292
293
### MetricReport
294
295
Read-only object containing ORCA load report data for load balancing policies.
296
297
```java { .api }
298
/**
299
* A gRPC object of orca load report for LB policies listening at per-rpc or oob orca load reports.
300
* Provides read-only access to collected metrics data.
301
*/
302
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9381")
303
public class MetricReport {
304
305
/** @return CPU utilization value */
306
public double getCpuUtilization();
307
308
/** @return Application utilization value */
309
public double getApplicationUtilization();
310
311
/** @return Memory utilization value */
312
public double getMemoryUtilization();
313
314
/** @return QPS (queries per second) value */
315
public double getQps();
316
317
/** @return EPS (errors per second) value */
318
public double getEps();
319
320
/** @return Map of request cost metrics */
321
public Map<String, Double> getRequestCostMetrics();
322
323
/** @return Map of utilization metrics */
324
public Map<String, Double> getUtilizationMetrics();
325
326
/** @return Map of named metrics */
327
public Map<String, Double> getNamedMetrics();
328
329
/** @return String representation of the metric report */
330
public String toString();
331
}
332
```
333
334
## Integration Patterns
335
336
### Combined Per-Call and Out-of-Band Metrics
337
338
```java
339
public class ComprehensiveMetricsService extends OrderServiceGrpc.OrderServiceImplBase {
340
private final MetricRecorder serverMetrics;
341
342
public ComprehensiveMetricsService() {
343
this.serverMetrics = MetricRecorder.newInstance();
344
345
// Start background metrics reporting
346
startBackgroundMetricsReporting();
347
}
348
349
@Override
350
public void processOrder(OrderRequest request, StreamObserver<OrderResponse> responseObserver) {
351
long startTime = System.currentTimeMillis();
352
353
try {
354
// Process the order
355
OrderResponse response = handleOrder(request);
356
357
// Record per-call metrics
358
CallMetricRecorder callRecorder = CallMetricRecorder.getCurrent();
359
long processingTime = System.currentTimeMillis() - startTime;
360
361
callRecorder
362
.recordRequestCostMetric("order_processing_time", processingTime)
363
.recordUtilizationMetric("payment_gateway_load", getPaymentGatewayLoad())
364
.recordNamedMetric("order_value", request.getTotalAmount());
365
366
responseObserver.onNext(response);
367
responseObserver.onCompleted();
368
369
} catch (PaymentException e) {
370
CallMetricRecorder.getCurrent()
371
.recordEpsMetric(1.0)
372
.recordNamedMetric("payment_error", 1.0);
373
374
responseObserver.onError(e);
375
}
376
}
377
378
private void startBackgroundMetricsReporting() {
379
ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
380
381
scheduler.scheduleAtFixedRate(() -> {
382
// Update server-wide metrics
383
serverMetrics.setCpuUtilizationMetric(getCurrentCpuUsage());
384
serverMetrics.setMemoryUtilizationMetric(getCurrentMemoryUsage());
385
serverMetrics.putUtilizationMetric("database_connections", getDatabaseConnectionUsage());
386
serverMetrics.putUtilizationMetric("cache_hit_rate", getCacheHitRate());
387
388
}, 0, 30, TimeUnit.SECONDS);
389
}
390
}
391
```
392
393
### Load Balancer Integration
394
395
```java
396
public class LoadBalancerAwareClient {
397
private final ManagedChannel channel;
398
private final OrderServiceGrpc.OrderServiceStub stub;
399
400
public LoadBalancerAwareClient(String target) {
401
this.channel = ManagedChannelBuilder.forTarget(target)
402
.defaultLoadBalancingPolicy("weighted_round_robin") // Uses ORCA metrics
403
.usePlaintext()
404
.build();
405
406
this.stub = OrderServiceGrpc.newStub(channel);
407
}
408
409
public void processOrderWithMetrics(OrderRequest request) {
410
// The load balancer will automatically use ORCA metrics
411
// reported by CallMetricRecorder and MetricRecorder
412
// to make intelligent routing decisions
413
414
stub.processOrder(request, new StreamObserver<OrderResponse>() {
415
@Override
416
public void onNext(OrderResponse response) {
417
System.out.println("Order processed: " + response.getOrderId());
418
}
419
420
@Override
421
public void onError(Throwable t) {
422
System.err.println("Order processing failed: " + t.getMessage());
423
}
424
425
@Override
426
public void onCompleted() {
427
// Request completed
428
}
429
});
430
}
431
}
432
```
433
434
### Metrics Collection and Analysis
435
436
```java
437
public class MetricsCollector {
438
private final List<MetricReport> collectedReports = new ArrayList<>();
439
440
public void collectMetrics(MetricReport report) {
441
synchronized (collectedReports) {
442
collectedReports.add(report);
443
}
444
445
// Analyze metrics
446
analyzeReport(report);
447
}
448
449
private void analyzeReport(MetricReport report) {
450
System.out.println("Metrics Analysis:");
451
System.out.println("CPU Utilization: " + report.getCpuUtilization());
452
System.out.println("Memory Utilization: " + report.getMemoryUtilization());
453
System.out.println("QPS: " + report.getQps());
454
System.out.println("EPS: " + report.getEps());
455
456
// Analyze request cost metrics
457
Map<String, Double> costMetrics = report.getRequestCostMetrics();
458
costMetrics.forEach((name, value) ->
459
System.out.println("Cost Metric " + name + ": " + value)
460
);
461
462
// Analyze custom metrics
463
Map<String, Double> namedMetrics = report.getNamedMetrics();
464
namedMetrics.forEach((name, value) ->
465
System.out.println("Named Metric " + name + ": " + value)
466
);
467
468
// Alert on high utilization
469
if (report.getCpuUtilization() > 0.8) {
470
System.out.println("WARNING: High CPU utilization detected!");
471
}
472
473
if (report.getEps() > 10.0) {
474
System.out.println("WARNING: High error rate detected!");
475
}
476
}
477
478
public void printSummaryReport() {
479
synchronized (collectedReports) {
480
if (collectedReports.isEmpty()) {
481
System.out.println("No metrics collected yet");
482
return;
483
}
484
485
double avgCpu = collectedReports.stream()
486
.mapToDouble(MetricReport::getCpuUtilization)
487
.average()
488
.orElse(0.0);
489
490
double avgMemory = collectedReports.stream()
491
.mapToDouble(MetricReport::getMemoryUtilization)
492
.average()
493
.orElse(0.0);
494
495
System.out.println("Summary Report:");
496
System.out.println("Total Reports: " + collectedReports.size());
497
System.out.println("Average CPU: " + avgCpu);
498
System.out.println("Average Memory: " + avgMemory);
499
}
500
}
501
}
502
```