0
# Tracing and Metrics
1
2
Pluggable tracing framework for monitoring ZooKeeper operations, connection events, and performance metrics in Apache Curator. The tracing system provides hooks for collecting operational data without impacting core functionality.
3
4
## Capabilities
5
6
### TracerDriver Interface
7
8
Core interface for implementing custom tracing and metrics collection systems.
9
10
```java { .api }
11
/**
12
* Interface for tracing and metrics collection
13
*/
14
public interface TracerDriver {
15
/**
16
* Add a timing trace for an operation
17
* @param name Operation name or identifier
18
* @param time Duration of the operation
19
* @param unit Time unit for the duration
20
*/
21
void addTrace(String name, long time, TimeUnit unit);
22
23
/**
24
* Add a counter increment for an event
25
* @param name Counter name or identifier
26
* @param increment Amount to increment counter (typically 1)
27
*/
28
void addCount(String name, int increment);
29
}
30
```
31
32
**Usage Examples:**
33
34
```java
35
import org.apache.curator.drivers.TracerDriver;
36
import java.util.concurrent.TimeUnit;
37
38
// Custom tracing implementation
39
TracerDriver customTracer = new TracerDriver() {
40
@Override
41
public void addTrace(String name, long time, TimeUnit unit) {
42
long millis = unit.toMillis(time);
43
System.out.printf("TRACE: %s took %d ms%n", name, millis);
44
// Send to metrics system (e.g., Micrometer, Dropwizard Metrics)
45
metricsRegistry.timer(name).record(time, unit);
46
}
47
48
@Override
49
public void addCount(String name, int increment) {
50
System.out.printf("COUNT: %s incremented by %d%n", name, increment);
51
// Send to metrics system
52
metricsRegistry.counter(name).increment(increment);
53
}
54
};
55
```
56
57
### AdvancedTracerDriver Abstract Class
58
59
Extended tracing interface with additional capabilities for sophisticated monitoring systems.
60
61
```java { .api }
62
/**
63
* Abstract class extending TracerDriver with advanced tracing capabilities
64
*/
65
public abstract class AdvancedTracerDriver implements TracerDriver {
66
// Provides additional tracing capabilities beyond basic TracerDriver
67
// Implementations can override to provide enhanced tracing features
68
}
69
```
70
71
### DefaultTracerDriver Class
72
73
No-operation implementation of TracerDriver for cases where tracing is not needed.
74
75
```java { .api }
76
/**
77
* Default no-operation implementation of TracerDriver
78
*/
79
public class DefaultTracerDriver implements TracerDriver {
80
/**
81
* No-op trace method - discards timing information
82
*/
83
@Override
84
public void addTrace(String name, long time, TimeUnit unit) {
85
// No operation - tracing disabled
86
}
87
88
/**
89
* No-op count method - discards counter information
90
*/
91
@Override
92
public void addCount(String name, int increment) {
93
// No operation - counting disabled
94
}
95
}
96
```
97
98
### TimeTrace Class
99
100
Utility class for measuring operation duration and automatically reporting traces.
101
102
```java { .api }
103
/**
104
* Utility for measuring and tracing operation duration
105
*/
106
public class TimeTrace {
107
/**
108
* Create a time trace for an operation
109
* @param name Operation name for tracing
110
* @param driver TracerDriver to receive the timing data
111
*/
112
public TimeTrace(String name, TracerDriver driver);
113
114
/**
115
* Complete the time measurement and send trace
116
* Call this when the operation completes
117
*/
118
public void commit();
119
}
120
```
121
122
**Usage Examples:**
123
124
```java
125
import org.apache.curator.TimeTrace;
126
import org.apache.curator.drivers.TracerDriver;
127
128
// Measure operation duration
129
TracerDriver tracer = getTracerDriver();
130
131
TimeTrace trace = new TimeTrace("zookeeper.getData", tracer);
132
try {
133
// Perform ZooKeeper operation
134
byte[] data = zookeeper.getData("/some/path", false, null);
135
// ... process data ...
136
} finally {
137
// Always commit the trace to record timing
138
trace.commit();
139
}
140
141
// Try-with-resources pattern (if TimeTrace implements AutoCloseable in your version)
142
try (TimeTrace trace = new TimeTrace("zookeeper.create", tracer)) {
143
zookeeper.create("/new/path", data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
144
// trace.commit() called automatically
145
}
146
```
147
148
### OperationTrace Class
149
150
Detailed operation tracing with session information for comprehensive monitoring.
151
152
```java { .api }
153
/**
154
* Detailed operation tracing with session context
155
*/
156
public class OperationTrace {
157
/**
158
* Create operation trace with session information
159
* @param name Operation name
160
* @param driver TracerDriver for receiving trace data
161
* @param sessionId ZooKeeper session ID for context
162
*/
163
public OperationTrace(String name, TracerDriver driver, long sessionId);
164
165
/**
166
* Complete the operation trace
167
*/
168
public void commit();
169
}
170
```
171
172
**Usage Examples:**
173
174
```java
175
import org.apache.curator.drivers.OperationTrace;
176
177
// Trace operations with session context
178
long sessionId = zookeeper.getSessionId();
179
OperationTrace opTrace = new OperationTrace("create-ephemeral", tracer, sessionId);
180
181
try {
182
zookeeper.create("/ephemeral/node", data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
183
} finally {
184
opTrace.commit();
185
}
186
```
187
188
### EventTrace Class
189
190
Event-based tracing for discrete events and state changes.
191
192
```java { .api }
193
/**
194
* Event-based tracing for discrete events
195
*/
196
public class EventTrace {
197
/**
198
* Create event trace
199
* @param name Event name
200
* @param driver TracerDriver for receiving event data
201
*/
202
public EventTrace(String name, TracerDriver driver);
203
204
/**
205
* Commit the event trace
206
*/
207
public void commit();
208
}
209
```
210
211
**Usage Examples:**
212
213
```java
214
import org.apache.curator.drivers.EventTrace;
215
216
// Trace discrete events
217
EventTrace connectionEvent = new EventTrace("connection.established", tracer);
218
connectionEvent.commit();
219
220
EventTrace sessionEvent = new EventTrace("session.expired", tracer);
221
sessionEvent.commit();
222
223
// Count-based events
224
tracer.addCount("connection.retry", 1);
225
tracer.addCount("operation.timeout", 1);
226
```
227
228
## Integration Patterns
229
230
### Curator Client Tracing Integration
231
232
```java
233
import org.apache.curator.CuratorZookeeperClient;
234
import org.apache.curator.drivers.TracerDriver;
235
236
// Custom tracer for production monitoring
237
TracerDriver productionTracer = new TracerDriver() {
238
private final MeterRegistry meterRegistry = Metrics.globalRegistry;
239
240
@Override
241
public void addTrace(String name, long time, TimeUnit unit) {
242
Timer.Sample.start(meterRegistry)
243
.stop(Timer.builder(name).register(meterRegistry));
244
}
245
246
@Override
247
public void addCount(String name, int increment) {
248
Counter.builder(name).register(meterRegistry).increment(increment);
249
}
250
};
251
252
// Configure client with tracing
253
CuratorZookeeperClient client = new CuratorZookeeperClientBuilder()
254
.connectString("localhost:2181")
255
.sessionTimeoutMs(5000)
256
.connectionTimeoutMs(5000)
257
.retryPolicy(new ExponentialBackoffRetry(1000, 3))
258
.tracerDriver(productionTracer) // Enable tracing
259
.build();
260
```
261
262
### Custom Tracing Implementations
263
264
```java
265
// Console tracing for development
266
TracerDriver consoleTracer = new TracerDriver() {
267
@Override
268
public void addTrace(String name, long time, TimeUnit unit) {
269
System.out.printf("[TRACE] %s: %d %s%n", name, time, unit.name());
270
}
271
272
@Override
273
public void addCount(String name, int increment) {
274
System.out.printf("[COUNT] %s: +%d%n", name, increment);
275
}
276
};
277
278
// File-based tracing
279
TracerDriver fileTracer = new TracerDriver() {
280
private final PrintWriter logWriter = new PrintWriter(new FileWriter("curator-traces.log", true));
281
282
@Override
283
public void addTrace(String name, long time, TimeUnit unit) {
284
logWriter.printf("%s TRACE %s %d %s%n",
285
Instant.now(), name, time, unit.name());
286
logWriter.flush();
287
}
288
289
@Override
290
public void addCount(String name, int increment) {
291
logWriter.printf("%s COUNT %s %d%n",
292
Instant.now(), name, increment);
293
logWriter.flush();
294
}
295
};
296
297
// Composite tracer (send to multiple destinations)
298
TracerDriver compositeTracer = new TracerDriver() {
299
private final List<TracerDriver> tracers = Arrays.asList(consoleTracer, fileTracer);
300
301
@Override
302
public void addTrace(String name, long time, TimeUnit unit) {
303
tracers.forEach(tracer -> tracer.addTrace(name, time, unit));
304
}
305
306
@Override
307
public void addCount(String name, int increment) {
308
tracers.forEach(tracer -> tracer.addCount(name, increment));
309
}
310
};
311
```
312
313
### Common Trace Names and Patterns
314
315
```java
316
// Connection lifecycle tracing
317
tracer.addCount("curator.connection.started", 1);
318
tracer.addCount("curator.connection.lost", 1);
319
tracer.addCount("curator.connection.reconnected", 1);
320
321
// Operation timing
322
TimeTrace createTrace = new TimeTrace("curator.zk.create", tracer);
323
TimeTrace getTrace = new TimeTrace("curator.zk.getData", tracer);
324
TimeTrace deleteTrace = new TimeTrace("curator.zk.delete", tracer);
325
326
// Retry tracking
327
tracer.addCount("curator.retry.attempt", 1);
328
tracer.addCount("curator.retry.exhausted", 1);
329
tracer.addCount("curator.retry.success", 1);
330
331
// Session tracking
332
tracer.addCount("curator.session.created", 1);
333
tracer.addCount("curator.session.expired", 1);
334
```
335
336
## Monitoring Best Practices
337
338
### Performance Monitoring
339
340
```java
341
// Monitor critical operations
342
TracerDriver performanceTracer = new TracerDriver() {
343
@Override
344
public void addTrace(String name, long time, TimeUnit unit) {
345
long millis = unit.toMillis(time);
346
347
// Alert on slow operations
348
if (millis > 5000) {
349
alertingSystem.sendAlert("Slow ZooKeeper operation: " + name + " took " + millis + "ms");
350
}
351
352
// Record metrics
353
metricsSystem.recordTiming(name, millis);
354
}
355
356
@Override
357
public void addCount(String name, int increment) {
358
// Track operation counts
359
metricsSystem.incrementCounter(name, increment);
360
361
// Alert on high error rates
362
if (name.contains("error") || name.contains("timeout")) {
363
errorRateTracker.recordError(name);
364
}
365
}
366
};
367
```
368
369
### Health Check Integration
370
371
```java
372
// Use tracing data for health checks
373
public class CuratorHealthCheck {
374
private final AtomicLong lastSuccessfulOperation = new AtomicLong(System.currentTimeMillis());
375
376
private final TracerDriver healthTracer = new TracerDriver() {
377
@Override
378
public void addTrace(String name, long time, TimeUnit unit) {
379
if (!name.contains("error")) {
380
lastSuccessfulOperation.set(System.currentTimeMillis());
381
}
382
}
383
384
@Override
385
public void addCount(String name, int increment) {
386
if (name.contains("success")) {
387
lastSuccessfulOperation.set(System.currentTimeMillis());
388
}
389
}
390
};
391
392
public boolean isHealthy() {
393
long timeSinceLastSuccess = System.currentTimeMillis() - lastSuccessfulOperation.get();
394
return timeSinceLastSuccess < 30000; // Healthy if successful operation within 30s
395
}
396
}
397
```