0
# Monitoring and Observability
1
2
Prometheus-based monitoring stack management through the monitoring.coreos.com API group. Provides comprehensive monitoring, alerting, and observability capabilities with Prometheus, Alertmanager, and related monitoring resources.
3
4
## Capabilities
5
6
### Monitoring Stack Management
7
8
```java { .api }
9
/**
10
* Access to Monitoring API Group (monitoring.coreos.com/v1)
11
* Prometheus-based monitoring and alerting resources
12
*/
13
OpenShiftMonitoringAPIGroupDSL monitoring();
14
15
interface OpenShiftMonitoringAPIGroupDSL {
16
/** Prometheus instances for metrics collection */
17
NonNamespaceOperation<Prometheus, PrometheusList, Resource<Prometheus>> prometheuses();
18
19
/** Alertmanager instances for alert handling */
20
NonNamespaceOperation<Alertmanager, AlertmanagerList, Resource<Alertmanager>> alertmanagers();
21
22
/** Alertmanager configuration */
23
MixedOperation<AlertmanagerConfig, AlertmanagerConfigList, Resource<AlertmanagerConfig>> alertmanagerConfigs();
24
25
/** Prometheus alerting and recording rules */
26
NonNamespaceOperation<PrometheusRule, PrometheusRuleList, Resource<PrometheusRule>> prometheusRules();
27
28
/** Service monitoring configurations */
29
MixedOperation<ServiceMonitor, ServiceMonitorList, Resource<ServiceMonitor>> serviceMonitors();
30
31
/** Pod monitoring configurations */
32
MixedOperation<PodMonitor, PodMonitorList, Resource<PodMonitor>> podMonitors();
33
34
/** Probe configurations for blackbox monitoring */
35
MixedOperation<Probe, ProbeList, Resource<Probe>> probes();
36
37
/** Thanos ruler instances for long-term storage */
38
NonNamespaceOperation<ThanosRuler, ThanosRulerList, Resource<ThanosRuler>> thanosRulers();
39
}
40
```
41
42
### Service and Pod Monitoring
43
44
Configure monitoring for services and pods to collect metrics and enable observability.
45
46
```java { .api }
47
/**
48
* Service monitoring for scraping metrics from services
49
*/
50
MixedOperation<ServiceMonitor, ServiceMonitorList, Resource<ServiceMonitor>> serviceMonitors();
51
52
/**
53
* Pod monitoring for scraping metrics directly from pods
54
*/
55
MixedOperation<PodMonitor, PodMonitorList, Resource<PodMonitor>> podMonitors();
56
```
57
58
**Usage Examples:**
59
60
```java
61
// Create service monitor for application metrics
62
ServiceMonitor serviceMonitor = new ServiceMonitorBuilder()
63
.withMetadata(new ObjectMetaBuilder()
64
.withName("my-app-monitor")
65
.withNamespace("monitoring")
66
.addToLabels("app", "my-app")
67
.build())
68
.withSpec(new ServiceMonitorSpecBuilder()
69
.withSelector(new LabelSelectorBuilder()
70
.addToMatchLabels("app", "my-app")
71
.build())
72
.addNewEndpoint()
73
.withPort("metrics")
74
.withPath("/metrics")
75
.withInterval("30s")
76
.withScrapeTimeout("10s")
77
.endEndpoint()
78
.build())
79
.build();
80
81
client.monitoring().serviceMonitors()
82
.inNamespace("monitoring")
83
.create(serviceMonitor);
84
85
// Create pod monitor for pod-level metrics
86
PodMonitor podMonitor = new PodMonitorBuilder()
87
.withMetadata(new ObjectMetaBuilder()
88
.withName("my-app-pods")
89
.withNamespace("monitoring")
90
.build())
91
.withSpec(new PodMonitorSpecBuilder()
92
.withSelector(new LabelSelectorBuilder()
93
.addToMatchLabels("app", "my-app")
94
.build())
95
.addNewPodMetricsEndpoint()
96
.withPort("metrics")
97
.withPath("/metrics")
98
.withInterval("30s")
99
.endPodMetricsEndpoint()
100
.build())
101
.build();
102
103
client.monitoring().podMonitors()
104
.inNamespace("monitoring")
105
.create(podMonitor);
106
```
107
108
### Alerting Rules and Configuration
109
110
Define Prometheus alerting rules and configure Alertmanager for alert routing and notifications.
111
112
```java { .api }
113
/**
114
* Prometheus rules for alerting and recording rules
115
*/
116
NonNamespaceOperation<PrometheusRule, PrometheusRuleList, Resource<PrometheusRule>> prometheusRules();
117
118
/**
119
* Alertmanager configuration for alert routing
120
*/
121
MixedOperation<AlertmanagerConfig, AlertmanagerConfigList, Resource<AlertmanagerConfig>> alertmanagerConfigs();
122
```
123
124
**Usage Examples:**
125
126
```java
127
// Create Prometheus alerting rules
128
PrometheusRule alertingRules = new PrometheusRuleBuilder()
129
.withMetadata(new ObjectMetaBuilder()
130
.withName("my-app-alerts")
131
.withNamespace("monitoring")
132
.addToLabels("app", "my-app")
133
.build())
134
.withSpec(new PrometheusRuleSpecBuilder()
135
.addNewGroup()
136
.withName("my-app.rules")
137
.withInterval("30s")
138
.addNewRule()
139
.withAlert("HighErrorRate")
140
.withExpr("rate(http_requests_total{status=~\"5..\"}[5m]) > 0.1")
141
.withFor("5m")
142
.addToLabels("severity", "warning")
143
.addToAnnotations("summary", "High error rate detected")
144
.addToAnnotations("description", "Error rate is {{ $value }} errors per second")
145
.endRule()
146
.addNewRule()
147
.withAlert("HighMemoryUsage")
148
.withExpr("container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9")
149
.withFor("10m")
150
.addToLabels("severity", "critical")
151
.addToAnnotations("summary", "High memory usage")
152
.endRule()
153
.endGroup()
154
.build())
155
.build();
156
157
client.monitoring().prometheusRules().create(alertingRules);
158
159
// Create Alertmanager configuration
160
AlertmanagerConfig amConfig = new AlertmanagerConfigBuilder()
161
.withMetadata(new ObjectMetaBuilder()
162
.withName("my-app-alerts")
163
.withNamespace("monitoring")
164
.build())
165
.withSpec(new AlertmanagerConfigSpecBuilder()
166
.addNewRoute()
167
.withGroupBy("alertname", "cluster", "service")
168
.withGroupWait("10s")
169
.withGroupInterval("10s")
170
.withRepeatInterval("1h")
171
.withReceiver("web.hook")
172
.addNewMatch()
173
.withName("app")
174
.withValue("my-app")
175
.endMatch()
176
.endRoute()
177
.addNewReceiver()
178
.withName("web.hook")
179
.addNewWebhookConfig()
180
.withUrl("http://my-webhook-service.monitoring.svc.cluster.local:8080/webhook")
181
.withSendResolved(true)
182
.endWebhookConfig()
183
.endReceiver()
184
.build())
185
.build();
186
187
client.monitoring().alertmanagerConfigs()
188
.inNamespace("monitoring")
189
.create(amConfig);
190
```
191
192
### Prometheus and Alertmanager Instances
193
194
Deploy and configure Prometheus and Alertmanager instances for custom monitoring requirements.
195
196
```java { .api }
197
/**
198
* Prometheus instances for metrics collection and storage
199
*/
200
NonNamespaceOperation<Prometheus, PrometheusList, Resource<Prometheus>> prometheuses();
201
202
/**
203
* Alertmanager instances for alert processing
204
*/
205
NonNamespaceOperation<Alertmanager, AlertmanagerList, Resource<Alertmanager>> alertmanagers();
206
```
207
208
**Usage Examples:**
209
210
```java
211
// Create custom Prometheus instance
212
Prometheus prometheus = new PrometheusBuilder()
213
.withMetadata(new ObjectMetaBuilder()
214
.withName("my-prometheus")
215
.withNamespace("monitoring")
216
.build())
217
.withSpec(new PrometheusSpecBuilder()
218
.withReplicas(2)
219
.withRetention("30d")
220
.withServiceAccountName("prometheus")
221
.withServiceMonitorSelector(new LabelSelectorBuilder()
222
.addToMatchLabels("team", "backend")
223
.build())
224
.withRuleSelector(new LabelSelectorBuilder()
225
.addToMatchLabels("prometheus", "my-prometheus")
226
.build())
227
.withResources(new ResourceRequirementsBuilder()
228
.addToRequests("memory", new Quantity("2Gi"))
229
.addToRequests("cpu", new Quantity("1"))
230
.addToLimits("memory", new Quantity("4Gi"))
231
.addToLimits("cpu", new Quantity("2"))
232
.build())
233
.withStorage(new StorageSpecBuilder()
234
.withVolumeClaimTemplate(new EmbeddedPersistentVolumeClaimBuilder()
235
.withMetadata(new ObjectMetaBuilder()
236
.withName("prometheus-storage")
237
.build())
238
.withSpec(new PersistentVolumeClaimSpecBuilder()
239
.withAccessModes("ReadWriteOnce")
240
.withResources(new ResourceRequirementsBuilder()
241
.addToRequests("storage", new Quantity("50Gi"))
242
.build())
243
.build())
244
.build())
245
.build())
246
.build())
247
.build();
248
249
client.monitoring().prometheuses().create(prometheus);
250
251
// Create Alertmanager instance
252
Alertmanager alertmanager = new AlertmanagerBuilder()
253
.withMetadata(new ObjectMetaBuilder()
254
.withName("my-alertmanager")
255
.withNamespace("monitoring")
256
.build())
257
.withSpec(new AlertmanagerSpecBuilder()
258
.withReplicas(3)
259
.withRetention("120h")
260
.withConfigSecret("alertmanager-config")
261
.withResources(new ResourceRequirementsBuilder()
262
.addToRequests("memory", new Quantity("200Mi"))
263
.addToRequests("cpu", new Quantity("100m"))
264
.build())
265
.build())
266
.build();
267
268
client.monitoring().alertmanagers().create(alertmanager);
269
```
270
271
### Blackbox Monitoring and Probes
272
273
Configure external endpoint monitoring using blackbox exporter probes.
274
275
```java { .api }
276
/**
277
* Probe configurations for blackbox monitoring of external endpoints
278
*/
279
MixedOperation<Probe, ProbeList, Resource<Probe>> probes();
280
```
281
282
**Usage Examples:**
283
284
```java
285
// Create HTTP probe for external service monitoring
286
Probe httpProbe = new ProbeBuilder()
287
.withMetadata(new ObjectMetaBuilder()
288
.withName("external-api-probe")
289
.withNamespace("monitoring")
290
.build())
291
.withSpec(new ProbeSpecBuilder()
292
.withProberSpec(new ProberSpecBuilder()
293
.withUrl("blackbox-exporter:9115")
294
.build())
295
.withModule("http_2xx")
296
.withTargets(new TargetsBuilder()
297
.withStaticConfig(new StaticConfigBuilder()
298
.withStatic("https://api.example.com/health")
299
.withLabels(Map.of(
300
"service", "external-api",
301
"environment", "production"
302
))
303
.build())
304
.build())
305
.withInterval("30s")
306
.withScrapeTimeout("10s")
307
.build())
308
.build();
309
310
client.monitoring().probes()
311
.inNamespace("monitoring")
312
.create(httpProbe);
313
```
314
315
## Usage Patterns
316
317
### Complete Monitoring Setup
318
319
```java
320
try (OpenShiftClient client = new KubernetesClientBuilder().build().adapt(OpenShiftClient.class)) {
321
String monitoringNamespace = "app-monitoring";
322
323
// 1. Create service monitor for application
324
ServiceMonitor appMonitor = new ServiceMonitorBuilder()
325
.withMetadata(new ObjectMetaBuilder()
326
.withName("my-app")
327
.withNamespace(monitoringNamespace)
328
.addToLabels("app", "my-app")
329
.build())
330
.withSpec(new ServiceMonitorSpecBuilder()
331
.withSelector(new LabelSelectorBuilder()
332
.addToMatchLabels("app", "my-app")
333
.build())
334
.addNewEndpoint()
335
.withPort("metrics")
336
.withPath("/metrics")
337
.withInterval("30s")
338
.endEndpoint()
339
.build())
340
.build();
341
342
client.monitoring().serviceMonitors()
343
.inNamespace(monitoringNamespace)
344
.create(appMonitor);
345
346
// 2. Create alerting rules
347
PrometheusRule rules = new PrometheusRuleBuilder()
348
.withMetadata(new ObjectMetaBuilder()
349
.withName("my-app-alerts")
350
.withNamespace(monitoringNamespace)
351
.addToLabels("prometheus", "app-prometheus")
352
.build())
353
.withSpec(new PrometheusRuleSpecBuilder()
354
.addNewGroup()
355
.withName("my-app.rules")
356
.addNewRule()
357
.withAlert("AppDown")
358
.withExpr("up{job=\"my-app\"} == 0")
359
.withFor("5m")
360
.addToLabels("severity", "critical")
361
.addToAnnotations("summary", "Application is down")
362
.endRule()
363
.endGroup()
364
.build())
365
.build();
366
367
client.monitoring().prometheusRules().create(rules);
368
369
// 3. Configure alert routing
370
AlertmanagerConfig alertConfig = new AlertmanagerConfigBuilder()
371
.withMetadata(new ObjectMetaBuilder()
372
.withName("my-app-routing")
373
.withNamespace(monitoringNamespace)
374
.build())
375
.withSpec(new AlertmanagerConfigSpecBuilder()
376
.addNewRoute()
377
.withReceiver("slack-notifications")
378
.addNewMatch()
379
.withName("app")
380
.withValue("my-app")
381
.endMatch()
382
.endRoute()
383
.addNewReceiver()
384
.withName("slack-notifications")
385
.addNewSlackConfig()
386
.withApiUrl(new SecretKeySelectorBuilder()
387
.withName("slack-webhook")
388
.withKey("url")
389
.build())
390
.withChannel("#alerts")
391
.withTitle("Alert: {{ .GroupLabels.alertname }}")
392
.withText("{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}")
393
.endSlackConfig()
394
.endReceiver()
395
.build())
396
.build();
397
398
client.monitoring().alertmanagerConfigs()
399
.inNamespace(monitoringNamespace)
400
.create(alertConfig);
401
}
402
```