or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-setup.mdconfiguration-management.mdcore-resources.mdindex.mdmachine-management.mdmonitoring.mdmulticluster-management.mdoperator-management.mdsecurity-rbac.md

monitoring.mddocs/

0

# Monitoring and Observability

1

2

Prometheus-based monitoring stack management through the monitoring.coreos.com API group. Provides comprehensive monitoring, alerting, and observability capabilities with Prometheus, Alertmanager, and related monitoring resources.

3

4

## Capabilities

5

6

### Monitoring Stack Management

7

8

```java { .api }

9

/**

10

* Access to Monitoring API Group (monitoring.coreos.com/v1)

11

* Prometheus-based monitoring and alerting resources

12

*/

13

OpenShiftMonitoringAPIGroupDSL monitoring();

14

15

interface OpenShiftMonitoringAPIGroupDSL {

16

/** Prometheus instances for metrics collection */

17

NonNamespaceOperation<Prometheus, PrometheusList, Resource<Prometheus>> prometheuses();

18

19

/** Alertmanager instances for alert handling */

20

NonNamespaceOperation<Alertmanager, AlertmanagerList, Resource<Alertmanager>> alertmanagers();

21

22

/** Alertmanager configuration */

23

MixedOperation<AlertmanagerConfig, AlertmanagerConfigList, Resource<AlertmanagerConfig>> alertmanagerConfigs();

24

25

/** Prometheus alerting and recording rules */

26

NonNamespaceOperation<PrometheusRule, PrometheusRuleList, Resource<PrometheusRule>> prometheusRules();

27

28

/** Service monitoring configurations */

29

MixedOperation<ServiceMonitor, ServiceMonitorList, Resource<ServiceMonitor>> serviceMonitors();

30

31

/** Pod monitoring configurations */

32

MixedOperation<PodMonitor, PodMonitorList, Resource<PodMonitor>> podMonitors();

33

34

/** Probe configurations for blackbox monitoring */

35

MixedOperation<Probe, ProbeList, Resource<Probe>> probes();

36

37

/** Thanos ruler instances for long-term storage */

38

NonNamespaceOperation<ThanosRuler, ThanosRulerList, Resource<ThanosRuler>> thanosRulers();

39

}

40

```

41

42

### Service and Pod Monitoring

43

44

Configure monitoring for services and pods to collect metrics and enable observability.

45

46

```java { .api }

47

/**

48

* Service monitoring for scraping metrics from services

49

*/

50

MixedOperation<ServiceMonitor, ServiceMonitorList, Resource<ServiceMonitor>> serviceMonitors();

51

52

/**

53

* Pod monitoring for scraping metrics directly from pods

54

*/

55

MixedOperation<PodMonitor, PodMonitorList, Resource<PodMonitor>> podMonitors();

56

```

57

58

**Usage Examples:**

59

60

```java

61

// Create service monitor for application metrics

62

ServiceMonitor serviceMonitor = new ServiceMonitorBuilder()

63

.withMetadata(new ObjectMetaBuilder()

64

.withName("my-app-monitor")

65

.withNamespace("monitoring")

66

.addToLabels("app", "my-app")

67

.build())

68

.withSpec(new ServiceMonitorSpecBuilder()

69

.withSelector(new LabelSelectorBuilder()

70

.addToMatchLabels("app", "my-app")

71

.build())

72

.addNewEndpoint()

73

.withPort("metrics")

74

.withPath("/metrics")

75

.withInterval("30s")

76

.withScrapeTimeout("10s")

77

.endEndpoint()

78

.build())

79

.build();

80

81

client.monitoring().serviceMonitors()

82

.inNamespace("monitoring")

83

.create(serviceMonitor);

84

85

// Create pod monitor for pod-level metrics

86

PodMonitor podMonitor = new PodMonitorBuilder()

87

.withMetadata(new ObjectMetaBuilder()

88

.withName("my-app-pods")

89

.withNamespace("monitoring")

90

.build())

91

.withSpec(new PodMonitorSpecBuilder()

92

.withSelector(new LabelSelectorBuilder()

93

.addToMatchLabels("app", "my-app")

94

.build())

95

.addNewPodMetricsEndpoint()

96

.withPort("metrics")

97

.withPath("/metrics")

98

.withInterval("30s")

99

.endPodMetricsEndpoint()

100

.build())

101

.build();

102

103

client.monitoring().podMonitors()

104

.inNamespace("monitoring")

105

.create(podMonitor);

106

```

107

108

### Alerting Rules and Configuration

109

110

Define Prometheus alerting rules and configure Alertmanager for alert routing and notifications.

111

112

```java { .api }

113

/**

114

* Prometheus rules for alerting and recording rules

115

*/

116

NonNamespaceOperation<PrometheusRule, PrometheusRuleList, Resource<PrometheusRule>> prometheusRules();

117

118

/**

119

* Alertmanager configuration for alert routing

120

*/

121

MixedOperation<AlertmanagerConfig, AlertmanagerConfigList, Resource<AlertmanagerConfig>> alertmanagerConfigs();

122

```

123

124

**Usage Examples:**

125

126

```java

127

// Create Prometheus alerting rules

128

PrometheusRule alertingRules = new PrometheusRuleBuilder()

129

.withMetadata(new ObjectMetaBuilder()

130

.withName("my-app-alerts")

131

.withNamespace("monitoring")

132

.addToLabels("app", "my-app")

133

.build())

134

.withSpec(new PrometheusRuleSpecBuilder()

135

.addNewGroup()

136

.withName("my-app.rules")

137

.withInterval("30s")

138

.addNewRule()

139

.withAlert("HighErrorRate")

140

.withExpr("rate(http_requests_total{status=~\"5..\"}[5m]) > 0.1")

141

.withFor("5m")

142

.addToLabels("severity", "warning")

143

.addToAnnotations("summary", "High error rate detected")

144

.addToAnnotations("description", "Error rate is {{ $value }} errors per second")

145

.endRule()

146

.addNewRule()

147

.withAlert("HighMemoryUsage")

148

.withExpr("container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9")

149

.withFor("10m")

150

.addToLabels("severity", "critical")

151

.addToAnnotations("summary", "High memory usage")

152

.endRule()

153

.endGroup()

154

.build())

155

.build();

156

157

client.monitoring().prometheusRules().create(alertingRules);

158

159

// Create Alertmanager configuration

160

AlertmanagerConfig amConfig = new AlertmanagerConfigBuilder()

161

.withMetadata(new ObjectMetaBuilder()

162

.withName("my-app-alerts")

163

.withNamespace("monitoring")

164

.build())

165

.withSpec(new AlertmanagerConfigSpecBuilder()

166

.addNewRoute()

167

.withGroupBy("alertname", "cluster", "service")

168

.withGroupWait("10s")

169

.withGroupInterval("10s")

170

.withRepeatInterval("1h")

171

.withReceiver("web.hook")

172

.addNewMatch()

173

.withName("app")

174

.withValue("my-app")

175

.endMatch()

176

.endRoute()

177

.addNewReceiver()

178

.withName("web.hook")

179

.addNewWebhookConfig()

180

.withUrl("http://my-webhook-service.monitoring.svc.cluster.local:8080/webhook")

181

.withSendResolved(true)

182

.endWebhookConfig()

183

.endReceiver()

184

.build())

185

.build();

186

187

client.monitoring().alertmanagerConfigs()

188

.inNamespace("monitoring")

189

.create(amConfig);

190

```

191

192

### Prometheus and Alertmanager Instances

193

194

Deploy and configure Prometheus and Alertmanager instances for custom monitoring requirements.

195

196

```java { .api }

197

/**

198

* Prometheus instances for metrics collection and storage

199

*/

200

NonNamespaceOperation<Prometheus, PrometheusList, Resource<Prometheus>> prometheuses();

201

202

/**

203

* Alertmanager instances for alert processing

204

*/

205

NonNamespaceOperation<Alertmanager, AlertmanagerList, Resource<Alertmanager>> alertmanagers();

206

```

207

208

**Usage Examples:**

209

210

```java

211

// Create custom Prometheus instance

212

Prometheus prometheus = new PrometheusBuilder()

213

.withMetadata(new ObjectMetaBuilder()

214

.withName("my-prometheus")

215

.withNamespace("monitoring")

216

.build())

217

.withSpec(new PrometheusSpecBuilder()

218

.withReplicas(2)

219

.withRetention("30d")

220

.withServiceAccountName("prometheus")

221

.withServiceMonitorSelector(new LabelSelectorBuilder()

222

.addToMatchLabels("team", "backend")

223

.build())

224

.withRuleSelector(new LabelSelectorBuilder()

225

.addToMatchLabels("prometheus", "my-prometheus")

226

.build())

227

.withResources(new ResourceRequirementsBuilder()

228

.addToRequests("memory", new Quantity("2Gi"))

229

.addToRequests("cpu", new Quantity("1"))

230

.addToLimits("memory", new Quantity("4Gi"))

231

.addToLimits("cpu", new Quantity("2"))

232

.build())

233

.withStorage(new StorageSpecBuilder()

234

.withVolumeClaimTemplate(new EmbeddedPersistentVolumeClaimBuilder()

235

.withMetadata(new ObjectMetaBuilder()

236

.withName("prometheus-storage")

237

.build())

238

.withSpec(new PersistentVolumeClaimSpecBuilder()

239

.withAccessModes("ReadWriteOnce")

240

.withResources(new ResourceRequirementsBuilder()

241

.addToRequests("storage", new Quantity("50Gi"))

242

.build())

243

.build())

244

.build())

245

.build())

246

.build())

247

.build();

248

249

client.monitoring().prometheuses().create(prometheus);

250

251

// Create Alertmanager instance

252

Alertmanager alertmanager = new AlertmanagerBuilder()

253

.withMetadata(new ObjectMetaBuilder()

254

.withName("my-alertmanager")

255

.withNamespace("monitoring")

256

.build())

257

.withSpec(new AlertmanagerSpecBuilder()

258

.withReplicas(3)

259

.withRetention("120h")

260

.withConfigSecret("alertmanager-config")

261

.withResources(new ResourceRequirementsBuilder()

262

.addToRequests("memory", new Quantity("200Mi"))

263

.addToRequests("cpu", new Quantity("100m"))

264

.build())

265

.build())

266

.build();

267

268

client.monitoring().alertmanagers().create(alertmanager);

269

```

270

271

### Blackbox Monitoring and Probes

272

273

Configure external endpoint monitoring using blackbox exporter probes.

274

275

```java { .api }

276

/**

277

* Probe configurations for blackbox monitoring of external endpoints

278

*/

279

MixedOperation<Probe, ProbeList, Resource<Probe>> probes();

280

```

281

282

**Usage Examples:**

283

284

```java

285

// Create HTTP probe for external service monitoring

286

Probe httpProbe = new ProbeBuilder()

287

.withMetadata(new ObjectMetaBuilder()

288

.withName("external-api-probe")

289

.withNamespace("monitoring")

290

.build())

291

.withSpec(new ProbeSpecBuilder()

292

.withProberSpec(new ProberSpecBuilder()

293

.withUrl("blackbox-exporter:9115")

294

.build())

295

.withModule("http_2xx")

296

.withTargets(new TargetsBuilder()

297

.withStaticConfig(new StaticConfigBuilder()

298

.withStatic("https://api.example.com/health")

299

.withLabels(Map.of(

300

"service", "external-api",

301

"environment", "production"

302

))

303

.build())

304

.build())

305

.withInterval("30s")

306

.withScrapeTimeout("10s")

307

.build())

308

.build();

309

310

client.monitoring().probes()

311

.inNamespace("monitoring")

312

.create(httpProbe);

313

```

314

315

## Usage Patterns

316

317

### Complete Monitoring Setup

318

319

```java

320

try (OpenShiftClient client = new KubernetesClientBuilder().build().adapt(OpenShiftClient.class)) {

321

String monitoringNamespace = "app-monitoring";

322

323

// 1. Create service monitor for application

324

ServiceMonitor appMonitor = new ServiceMonitorBuilder()

325

.withMetadata(new ObjectMetaBuilder()

326

.withName("my-app")

327

.withNamespace(monitoringNamespace)

328

.addToLabels("app", "my-app")

329

.build())

330

.withSpec(new ServiceMonitorSpecBuilder()

331

.withSelector(new LabelSelectorBuilder()

332

.addToMatchLabels("app", "my-app")

333

.build())

334

.addNewEndpoint()

335

.withPort("metrics")

336

.withPath("/metrics")

337

.withInterval("30s")

338

.endEndpoint()

339

.build())

340

.build();

341

342

client.monitoring().serviceMonitors()

343

.inNamespace(monitoringNamespace)

344

.create(appMonitor);

345

346

// 2. Create alerting rules

347

PrometheusRule rules = new PrometheusRuleBuilder()

348

.withMetadata(new ObjectMetaBuilder()

349

.withName("my-app-alerts")

350

.withNamespace(monitoringNamespace)

351

.addToLabels("prometheus", "app-prometheus")

352

.build())

353

.withSpec(new PrometheusRuleSpecBuilder()

354

.addNewGroup()

355

.withName("my-app.rules")

356

.addNewRule()

357

.withAlert("AppDown")

358

.withExpr("up{job=\"my-app\"} == 0")

359

.withFor("5m")

360

.addToLabels("severity", "critical")

361

.addToAnnotations("summary", "Application is down")

362

.endRule()

363

.endGroup()

364

.build())

365

.build();

366

367

client.monitoring().prometheusRules().create(rules);

368

369

// 3. Configure alert routing

370

AlertmanagerConfig alertConfig = new AlertmanagerConfigBuilder()

371

.withMetadata(new ObjectMetaBuilder()

372

.withName("my-app-routing")

373

.withNamespace(monitoringNamespace)

374

.build())

375

.withSpec(new AlertmanagerConfigSpecBuilder()

376

.addNewRoute()

377

.withReceiver("slack-notifications")

378

.addNewMatch()

379

.withName("app")

380

.withValue("my-app")

381

.endMatch()

382

.endRoute()

383

.addNewReceiver()

384

.withName("slack-notifications")

385

.addNewSlackConfig()

386

.withApiUrl(new SecretKeySelectorBuilder()

387

.withName("slack-webhook")

388

.withKey("url")

389

.build())

390

.withChannel("#alerts")

391

.withTitle("Alert: {{ .GroupLabels.alertname }}")

392

.withText("{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}")

393

.endSlackConfig()

394

.endReceiver()

395

.build())

396

.build();

397

398

client.monitoring().alertmanagerConfigs()

399

.inNamespace(monitoringNamespace)

400

.create(alertConfig);

401

}

402

```