or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-connection.mdconnection-handling.mdensemble-providers.mdindex.mdpath-utilities.mdretry-policies.mdtracing-metrics.md

tracing-metrics.mddocs/

0

# Tracing and Metrics

1

2

Pluggable tracing framework for monitoring ZooKeeper operations, connection events, and performance metrics in Apache Curator. The tracing system provides hooks for collecting operational data without impacting core functionality.

3

4

## Capabilities

5

6

### TracerDriver Interface

7

8

Core interface for implementing custom tracing and metrics collection systems.

9

10

```java { .api }

11

/**

12

* Interface for tracing and metrics collection

13

*/

14

public interface TracerDriver {

15

/**

16

* Add a timing trace for an operation

17

* @param name Operation name or identifier

18

* @param time Duration of the operation

19

* @param unit Time unit for the duration

20

*/

21

void addTrace(String name, long time, TimeUnit unit);

22

23

/**

24

* Add a counter increment for an event

25

* @param name Counter name or identifier

26

* @param increment Amount to increment counter (typically 1)

27

*/

28

void addCount(String name, int increment);

29

}

30

```

31

32

**Usage Examples:**

33

34

```java

35

import org.apache.curator.drivers.TracerDriver;

36

import java.util.concurrent.TimeUnit;

37

38

// Custom tracing implementation

39

TracerDriver customTracer = new TracerDriver() {

40

@Override

41

public void addTrace(String name, long time, TimeUnit unit) {

42

long millis = unit.toMillis(time);

43

System.out.printf("TRACE: %s took %d ms%n", name, millis);

44

// Send to metrics system (e.g., Micrometer, Dropwizard Metrics)

45

metricsRegistry.timer(name).record(time, unit);

46

}

47

48

@Override

49

public void addCount(String name, int increment) {

50

System.out.printf("COUNT: %s incremented by %d%n", name, increment);

51

// Send to metrics system

52

metricsRegistry.counter(name).increment(increment);

53

}

54

};

55

```

56

57

### AdvancedTracerDriver Abstract Class

58

59

Extended tracing interface with additional capabilities for sophisticated monitoring systems.

60

61

```java { .api }

62

/**

63

* Abstract class extending TracerDriver with advanced tracing capabilities

64

*/

65

public abstract class AdvancedTracerDriver implements TracerDriver {

66

// Provides additional tracing capabilities beyond basic TracerDriver

67

// Implementations can override to provide enhanced tracing features

68

}

69

```

70

71

### DefaultTracerDriver Class

72

73

No-operation implementation of TracerDriver for cases where tracing is not needed.

74

75

```java { .api }

76

/**

77

* Default no-operation implementation of TracerDriver

78

*/

79

public class DefaultTracerDriver implements TracerDriver {

80

/**

81

* No-op trace method - discards timing information

82

*/

83

@Override

84

public void addTrace(String name, long time, TimeUnit unit) {

85

// No operation - tracing disabled

86

}

87

88

/**

89

* No-op count method - discards counter information

90

*/

91

@Override

92

public void addCount(String name, int increment) {

93

// No operation - counting disabled

94

}

95

}

96

```

97

98

### TimeTrace Class

99

100

Utility class for measuring operation duration and automatically reporting traces.

101

102

```java { .api }

103

/**

104

* Utility for measuring and tracing operation duration

105

*/

106

public class TimeTrace {

107

/**

108

* Create a time trace for an operation

109

* @param name Operation name for tracing

110

* @param driver TracerDriver to receive the timing data

111

*/

112

public TimeTrace(String name, TracerDriver driver);

113

114

/**

115

* Complete the time measurement and send trace

116

* Call this when the operation completes

117

*/

118

public void commit();

119

}

120

```

121

122

**Usage Examples:**

123

124

```java

125

import org.apache.curator.TimeTrace;

126

import org.apache.curator.drivers.TracerDriver;

127

128

// Measure operation duration

129

TracerDriver tracer = getTracerDriver();

130

131

TimeTrace trace = new TimeTrace("zookeeper.getData", tracer);

132

try {

133

// Perform ZooKeeper operation

134

byte[] data = zookeeper.getData("/some/path", false, null);

135

// ... process data ...

136

} finally {

137

// Always commit the trace to record timing

138

trace.commit();

139

}

140

141

// Try-with-resources pattern (if TimeTrace implements AutoCloseable in your version)

142

try (TimeTrace trace = new TimeTrace("zookeeper.create", tracer)) {

143

zookeeper.create("/new/path", data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);

144

// trace.commit() called automatically

145

}

146

```

147

148

### OperationTrace Class

149

150

Detailed operation tracing with session information for comprehensive monitoring.

151

152

```java { .api }

153

/**

154

* Detailed operation tracing with session context

155

*/

156

public class OperationTrace {

157

/**

158

* Create operation trace with session information

159

* @param name Operation name

160

* @param driver TracerDriver for receiving trace data

161

* @param sessionId ZooKeeper session ID for context

162

*/

163

public OperationTrace(String name, TracerDriver driver, long sessionId);

164

165

/**

166

* Complete the operation trace

167

*/

168

public void commit();

169

}

170

```

171

172

**Usage Examples:**

173

174

```java

175

import org.apache.curator.drivers.OperationTrace;

176

177

// Trace operations with session context

178

long sessionId = zookeeper.getSessionId();

179

OperationTrace opTrace = new OperationTrace("create-ephemeral", tracer, sessionId);

180

181

try {

182

zookeeper.create("/ephemeral/node", data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);

183

} finally {

184

opTrace.commit();

185

}

186

```

187

188

### EventTrace Class

189

190

Event-based tracing for discrete events and state changes.

191

192

```java { .api }

193

/**

194

* Event-based tracing for discrete events

195

*/

196

public class EventTrace {

197

/**

198

* Create event trace

199

* @param name Event name

200

* @param driver TracerDriver for receiving event data

201

*/

202

public EventTrace(String name, TracerDriver driver);

203

204

/**

205

* Commit the event trace

206

*/

207

public void commit();

208

}

209

```

210

211

**Usage Examples:**

212

213

```java

214

import org.apache.curator.drivers.EventTrace;

215

216

// Trace discrete events

217

EventTrace connectionEvent = new EventTrace("connection.established", tracer);

218

connectionEvent.commit();

219

220

EventTrace sessionEvent = new EventTrace("session.expired", tracer);

221

sessionEvent.commit();

222

223

// Count-based events

224

tracer.addCount("connection.retry", 1);

225

tracer.addCount("operation.timeout", 1);

226

```

227

228

## Integration Patterns

229

230

### Curator Client Tracing Integration

231

232

```java

233

import org.apache.curator.CuratorZookeeperClient;

234

import org.apache.curator.drivers.TracerDriver;

235

236

// Custom tracer for production monitoring

237

TracerDriver productionTracer = new TracerDriver() {

238

private final MeterRegistry meterRegistry = Metrics.globalRegistry;

239

240

@Override

241

public void addTrace(String name, long time, TimeUnit unit) {

242

Timer.Sample.start(meterRegistry)

243

.stop(Timer.builder(name).register(meterRegistry));

244

}

245

246

@Override

247

public void addCount(String name, int increment) {

248

Counter.builder(name).register(meterRegistry).increment(increment);

249

}

250

};

251

252

// Configure client with tracing

253

CuratorZookeeperClient client = new CuratorZookeeperClientBuilder()

254

.connectString("localhost:2181")

255

.sessionTimeoutMs(5000)

256

.connectionTimeoutMs(5000)

257

.retryPolicy(new ExponentialBackoffRetry(1000, 3))

258

.tracerDriver(productionTracer) // Enable tracing

259

.build();

260

```

261

262

### Custom Tracing Implementations

263

264

```java

265

// Console tracing for development

266

TracerDriver consoleTracer = new TracerDriver() {

267

@Override

268

public void addTrace(String name, long time, TimeUnit unit) {

269

System.out.printf("[TRACE] %s: %d %s%n", name, time, unit.name());

270

}

271

272

@Override

273

public void addCount(String name, int increment) {

274

System.out.printf("[COUNT] %s: +%d%n", name, increment);

275

}

276

};

277

278

// File-based tracing

279

TracerDriver fileTracer = new TracerDriver() {

280

private final PrintWriter logWriter = new PrintWriter(new FileWriter("curator-traces.log", true));

281

282

@Override

283

public void addTrace(String name, long time, TimeUnit unit) {

284

logWriter.printf("%s TRACE %s %d %s%n",

285

Instant.now(), name, time, unit.name());

286

logWriter.flush();

287

}

288

289

@Override

290

public void addCount(String name, int increment) {

291

logWriter.printf("%s COUNT %s %d%n",

292

Instant.now(), name, increment);

293

logWriter.flush();

294

}

295

};

296

297

// Composite tracer (send to multiple destinations)

298

TracerDriver compositeTracer = new TracerDriver() {

299

private final List<TracerDriver> tracers = Arrays.asList(consoleTracer, fileTracer);

300

301

@Override

302

public void addTrace(String name, long time, TimeUnit unit) {

303

tracers.forEach(tracer -> tracer.addTrace(name, time, unit));

304

}

305

306

@Override

307

public void addCount(String name, int increment) {

308

tracers.forEach(tracer -> tracer.addCount(name, increment));

309

}

310

};

311

```

312

313

### Common Trace Names and Patterns

314

315

```java

316

// Connection lifecycle tracing

317

tracer.addCount("curator.connection.started", 1);

318

tracer.addCount("curator.connection.lost", 1);

319

tracer.addCount("curator.connection.reconnected", 1);

320

321

// Operation timing

322

TimeTrace createTrace = new TimeTrace("curator.zk.create", tracer);

323

TimeTrace getTrace = new TimeTrace("curator.zk.getData", tracer);

324

TimeTrace deleteTrace = new TimeTrace("curator.zk.delete", tracer);

325

326

// Retry tracking

327

tracer.addCount("curator.retry.attempt", 1);

328

tracer.addCount("curator.retry.exhausted", 1);

329

tracer.addCount("curator.retry.success", 1);

330

331

// Session tracking

332

tracer.addCount("curator.session.created", 1);

333

tracer.addCount("curator.session.expired", 1);

334

```

335

336

## Monitoring Best Practices

337

338

### Performance Monitoring

339

340

```java

341

// Monitor critical operations

342

TracerDriver performanceTracer = new TracerDriver() {

343

@Override

344

public void addTrace(String name, long time, TimeUnit unit) {

345

long millis = unit.toMillis(time);

346

347

// Alert on slow operations

348

if (millis > 5000) {

349

alertingSystem.sendAlert("Slow ZooKeeper operation: " + name + " took " + millis + "ms");

350

}

351

352

// Record metrics

353

metricsSystem.recordTiming(name, millis);

354

}

355

356

@Override

357

public void addCount(String name, int increment) {

358

// Track operation counts

359

metricsSystem.incrementCounter(name, increment);

360

361

// Alert on high error rates

362

if (name.contains("error") || name.contains("timeout")) {

363

errorRateTracker.recordError(name);

364

}

365

}

366

};

367

```

368

369

### Health Check Integration

370

371

```java

372

// Use tracing data for health checks

373

public class CuratorHealthCheck {

374

private final AtomicLong lastSuccessfulOperation = new AtomicLong(System.currentTimeMillis());

375

376

private final TracerDriver healthTracer = new TracerDriver() {

377

@Override

378

public void addTrace(String name, long time, TimeUnit unit) {

379

if (!name.contains("error")) {

380

lastSuccessfulOperation.set(System.currentTimeMillis());

381

}

382

}

383

384

@Override

385

public void addCount(String name, int increment) {

386

if (name.contains("success")) {

387

lastSuccessfulOperation.set(System.currentTimeMillis());

388

}

389

}

390

};

391

392

public boolean isHealthy() {

393

long timeSinceLastSuccess = System.currentTimeMillis() - lastSuccessfulOperation.get();

394

return timeSinceLastSuccess < 30000; // Healthy if successful operation within 30s

395

}

396

}

397

```