0
# Alerting and Notifications
1
2
Legacy alerting, modern alerting provisioning, notification channels, contact points, notification policies, and mute timings management. This covers both the legacy alerting system and the new Grafana 8+ unified alerting system.
3
4
## Capabilities
5
6
### Legacy Alerting Operations
7
8
Legacy alert rule management for Grafana versions prior to 8.0 and instances still using the legacy alerting system.
9
10
```python { .api }
11
def get_alertrule(self, folder_name: str, alertrule_name: str):
12
"""
13
Get legacy alert rule by folder and name.
14
15
Args:
16
folder_name (str): Folder name containing the alert rule
17
alertrule_name (str): Alert rule name
18
19
Returns:
20
dict: Alert rule configuration and status
21
"""
22
...
23
24
def create_alertrule(self, folder_name: str, alertrule: dict):
25
"""
26
Create legacy alert rule in specified folder.
27
28
Args:
29
folder_name (str): Target folder name
30
alertrule (dict): Alert rule configuration
31
32
Returns:
33
dict: Created alert rule with ID and metadata
34
"""
35
...
36
37
def update_alertrule(self, folder_name: str, alertrule: dict):
38
"""
39
Update existing legacy alert rule.
40
41
Args:
42
folder_name (str): Folder name containing the alert rule
43
alertrule (dict): Updated alert rule configuration
44
45
Returns:
46
dict: Update result
47
"""
48
...
49
50
def delete_alertrule(self, folder_name: str, alertrule_name: str):
51
"""
52
Delete legacy alert rule.
53
54
Args:
55
folder_name (str): Folder name containing the alert rule
56
alertrule_name (str): Alert rule name to delete
57
58
Returns:
59
dict: Deletion result
60
"""
61
...
62
```
63
64
**Legacy Alerting Usage Example:**
65
66
```python
67
from grafana_client import GrafanaApi, TokenAuth
68
69
api = GrafanaApi(auth=TokenAuth("your-token"), host="grafana.example.com")
70
71
# Create legacy alert rule
72
legacy_alert = {
73
"name": "High CPU Usage",
74
"message": "CPU usage is above 80%",
75
"frequency": "10s",
76
"conditions": [
77
{
78
"query": {
79
"params": ["A", "5m", "now"]
80
},
81
"reducer": {
82
"params": [],
83
"type": "avg"
84
},
85
"evaluator": {
86
"params": [80],
87
"type": "gt"
88
},
89
"operator": {
90
"type": "and"
91
}
92
}
93
],
94
"executionErrorState": "alerting",
95
"noDataState": "no_data",
96
"for": "5m"
97
}
98
99
# Create the alert rule
100
result = api.alerting.create_alertrule("Production", legacy_alert)
101
print(f"Created legacy alert rule: {result}")
102
103
# Get existing alert rule
104
existing_rule = api.alerting.get_alertrule("Production", "High CPU Usage")
105
print(f"Alert rule state: {existing_rule.get('state', 'unknown')}")
106
107
# Update alert rule
108
legacy_alert["message"] = "Updated: CPU usage is critically high"
109
api.alerting.update_alertrule("Production", legacy_alert)
110
print("Alert rule updated")
111
```
112
113
### Modern Alerting Provisioning
114
115
Modern unified alerting system operations for Grafana 8+ with support for alert rules, contact points, notification policies, and mute timings.
116
117
```python { .api }
118
def get_alertrules_all(self):
119
"""
120
Get all alert rules in unified alerting.
121
122
Returns:
123
list: List of all alert rules across folders
124
"""
125
...
126
127
def get_alertrule(self, alertrule_uid: str):
128
"""
129
Get alert rule by UID.
130
131
Args:
132
alertrule_uid (str): Alert rule UID
133
134
Returns:
135
dict: Alert rule configuration and metadata
136
"""
137
...
138
139
def create_alertrule(self, alertrule: dict, disable_provenance: bool = False):
140
"""
141
Create new alert rule.
142
143
Args:
144
alertrule (dict): Alert rule configuration
145
disable_provenance (bool): Disable provenance checking
146
147
Returns:
148
dict: Created alert rule with UID and metadata
149
"""
150
...
151
152
def update_alertrule(self, alertrule_uid: str, alertrule: dict, disable_provenance: bool = False):
153
"""
154
Update existing alert rule.
155
156
Args:
157
alertrule_uid (str): Alert rule UID
158
alertrule (dict): Updated alert rule configuration
159
disable_provenance (bool): Disable provenance checking
160
161
Returns:
162
dict: Update result
163
"""
164
...
165
166
def delete_alertrule(self, alertrule_uid: str):
167
"""
168
Delete alert rule by UID.
169
170
Args:
171
alertrule_uid (str): Alert rule UID to delete
172
173
Returns:
174
dict: Deletion result
175
"""
176
...
177
```
178
179
**Modern Alert Rule Usage Example:**
180
181
```python
182
# Modern alert rule configuration
183
modern_alert = {
184
"uid": "", # Auto-generated if empty
185
"title": "High Memory Usage",
186
"condition": "C", # Condition query ref ID
187
"data": [
188
{
189
"refId": "A",
190
"queryType": "",
191
"relativeTimeRange": {
192
"from": 600,
193
"to": 0
194
},
195
"datasourceUid": "prometheus-uid",
196
"model": {
197
"expr": "node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100",
198
"interval": "",
199
"refId": "A"
200
}
201
},
202
{
203
"refId": "C",
204
"queryType": "",
205
"relativeTimeRange": {
206
"from": 0,
207
"to": 0
208
},
209
"datasourceUid": "__expr__",
210
"model": {
211
"conditions": [
212
{
213
"evaluator": {
214
"params": [20],
215
"type": "lt"
216
},
217
"operator": {
218
"type": "and"
219
},
220
"query": {
221
"params": ["A"]
222
},
223
"reducer": {
224
"params": [],
225
"type": "last"
226
},
227
"type": "query"
228
}
229
],
230
"expression": "",
231
"hide": False,
232
"intervalMs": 1000,
233
"maxDataPoints": 43200,
234
"reducer": "last",
235
"refId": "C",
236
"type": "classic_conditions"
237
}
238
}
239
],
240
"folderUID": "alerts-folder-uid",
241
"ruleGroup": "System Alerts",
242
"noDataState": "NoData",
243
"execErrState": "Alerting",
244
"for": "5m",
245
"annotations": {
246
"description": "Memory usage is below 20%",
247
"runbook_url": "https://wiki.example.com/memory-alerts"
248
},
249
"labels": {
250
"severity": "warning",
251
"team": "sre"
252
}
253
}
254
255
# Create modern alert rule
256
created_rule = api.alertingprovisioning.create_alertrule(modern_alert)
257
print(f"Created alert rule UID: {created_rule['uid']}")
258
259
# Get all alert rules
260
all_rules = api.alertingprovisioning.get_alertrules_all()
261
print(f"Total alert rules: {len(all_rules)}")
262
263
# Get specific rule
264
rule_details = api.alertingprovisioning.get_alertrule(created_rule['uid'])
265
print(f"Rule: {rule_details['title']} - State: {rule_details.get('state', 'unknown')}")
266
```
267
268
### Contact Points Management
269
270
Managing notification channels and contact points for alert delivery.
271
272
```python { .api }
273
def get_contactpoints(self, name: Optional[str] = None):
274
"""
275
Get contact points, optionally filtered by name.
276
277
Args:
278
name (Optional[str]): Filter by contact point name
279
280
Returns:
281
list: List of contact points
282
"""
283
...
284
285
def create_contactpoint(self, contactpoint: dict, disable_provenance: bool = False):
286
"""
287
Create new contact point.
288
289
Args:
290
contactpoint (dict): Contact point configuration
291
disable_provenance (bool): Disable provenance checking
292
293
Returns:
294
dict: Created contact point with UID
295
"""
296
...
297
298
def update_contactpoint(self, contactpoint_uid: str, contactpoint: dict):
299
"""
300
Update existing contact point.
301
302
Args:
303
contactpoint_uid (str): Contact point UID
304
contactpoint (dict): Updated contact point configuration
305
306
Returns:
307
dict: Update result
308
"""
309
...
310
311
def delete_contactpoint(self, contactpoint_uid: str):
312
"""
313
Delete contact point.
314
315
Args:
316
contactpoint_uid (str): Contact point UID to delete
317
318
Returns:
319
dict: Deletion result
320
"""
321
...
322
```
323
324
**Contact Points Usage Example:**
325
326
```python
327
# Slack contact point
328
slack_contact = {
329
"name": "slack-alerts",
330
"type": "slack",
331
"settings": {
332
"url": "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX",
333
"channel": "#alerts",
334
"title": "Grafana Alert",
335
"text": "{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}"
336
}
337
}
338
339
# Email contact point
340
email_contact = {
341
"name": "email-sre",
342
"type": "email",
343
"settings": {
344
"addresses": ["sre-team@example.com", "oncall@example.com"],
345
"subject": "Grafana Alert: {{ .GroupLabels.alertname }}",
346
"body": "Alert Details:\n{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"
347
}
348
}
349
350
# Webhook contact point
351
webhook_contact = {
352
"name": "webhook-pagerduty",
353
"type": "webhook",
354
"settings": {
355
"url": "https://events.pagerduty.com/v2/enqueue",
356
"httpMethod": "POST",
357
"username": "",
358
"password": "",
359
"title": "Grafana Alert",
360
"body": '{"routing_key": "YOUR_ROUTING_KEY", "event_action": "trigger", "payload": {"summary": "{{ .GroupLabels.alertname }}", "severity": "error", "source": "Grafana"}}'
361
}
362
}
363
364
# Create contact points
365
slack_result = api.alertingprovisioning.create_contactpoint(slack_contact)
366
email_result = api.alertingprovisioning.create_contactpoint(email_contact)
367
webhook_result = api.alertingprovisioning.create_contactpoint(webhook_contact)
368
369
print(f"Created contact points:")
370
print(f"- Slack: {slack_result['uid']}")
371
print(f"- Email: {email_result['uid']}")
372
print(f"- Webhook: {webhook_result['uid']}")
373
374
# Get all contact points
375
contact_points = api.alertingprovisioning.get_contactpoints()
376
for cp in contact_points:
377
print(f"Contact point: {cp['name']} ({cp['type']})")
378
```
379
380
### Notification Policies
381
382
Managing notification policy trees that define routing and escalation rules for alerts.
383
384
```python { .api }
385
def get_notification_policy_tree(self):
386
"""
387
Get the notification policy tree.
388
389
Returns:
390
dict: Complete notification policy tree configuration
391
"""
392
...
393
394
def set_notification_policy_tree(self, notification_policy_tree: dict, disable_provenance: bool = False):
395
"""
396
Set/replace the entire notification policy tree.
397
398
Args:
399
notification_policy_tree (dict): Complete policy tree configuration
400
disable_provenance (bool): Disable provenance checking
401
402
Returns:
403
dict: Update result
404
"""
405
...
406
```
407
408
**Notification Policies Usage Example:**
409
410
```python
411
# Get current notification policy tree
412
current_policy = api.alertingprovisioning.get_notification_policy_tree()
413
print(f"Current root policy receiver: {current_policy.get('receiver', 'default')}")
414
415
# Define notification policy tree
416
policy_tree = {
417
"receiver": "default-contact", # Default receiver for unmatched alerts
418
"group_by": ["alertname", "cluster"], # Group alerts by these labels
419
"group_wait": "10s", # Wait before sending first notification
420
"group_interval": "10s", # Wait between notifications for same group
421
"repeat_interval": "1h", # Wait before repeating notifications
422
"routes": [
423
{
424
"receiver": "slack-alerts",
425
"matchers": [
426
{
427
"name": "severity",
428
"value": "warning",
429
"isRegex": False,
430
"isEqual": True
431
}
432
],
433
"group_wait": "5s",
434
"repeat_interval": "30m"
435
},
436
{
437
"receiver": "email-sre",
438
"matchers": [
439
{
440
"name": "severity",
441
"value": "critical",
442
"isRegex": False,
443
"isEqual": True
444
}
445
],
446
"group_wait": "0s",
447
"repeat_interval": "15m",
448
"routes": [
449
{
450
"receiver": "webhook-pagerduty",
451
"matchers": [
452
{
453
"name": "team",
454
"value": "sre",
455
"isRegex": False,
456
"isEqual": True
457
}
458
],
459
"continue": True # Continue to parent route as well
460
}
461
]
462
}
463
]
464
}
465
466
# Set notification policy tree
467
api.alertingprovisioning.set_notification_policy_tree(policy_tree)
468
print("Notification policy tree updated")
469
```
470
471
### Mute Timings
472
473
Managing mute timings to suppress alerts during maintenance windows or scheduled downtime.
474
475
```python { .api }
476
def get_mute_timings(self):
477
"""
478
Get all mute timings.
479
480
Returns:
481
list: List of mute timing configurations
482
"""
483
...
484
485
def create_mute_timing(self, mutetiming: dict, disable_provenance: bool = False):
486
"""
487
Create new mute timing.
488
489
Args:
490
mutetiming (dict): Mute timing configuration
491
disable_provenance (bool): Disable provenance checking
492
493
Returns:
494
dict: Created mute timing
495
"""
496
...
497
498
def delete_mute_timing(self, mutetiming_name: str):
499
"""
500
Delete mute timing by name.
501
502
Args:
503
mutetiming_name (str): Mute timing name to delete
504
505
Returns:
506
dict: Deletion result
507
"""
508
...
509
```
510
511
**Mute Timings Usage Example:**
512
513
```python
514
# Maintenance window mute timing
515
maintenance_mute = {
516
"name": "maintenance-window",
517
"time_intervals": [
518
{
519
"times": [
520
{
521
"start_time": "02:00",
522
"end_time": "04:00"
523
}
524
],
525
"weekdays": ["sunday"], # Every Sunday 2-4 AM
526
"months": [],
527
"years": [],
528
"days_of_month": []
529
}
530
]
531
}
532
533
# Business hours mute timing (outside business hours)
534
business_hours_mute = {
535
"name": "outside-business-hours",
536
"time_intervals": [
537
{
538
"times": [
539
{
540
"start_time": "18:00",
541
"end_time": "08:00"
542
}
543
],
544
"weekdays": ["monday", "tuesday", "wednesday", "thursday", "friday"]
545
},
546
{
547
"times": [], # All day
548
"weekdays": ["saturday", "sunday"]
549
}
550
]
551
}
552
553
# Holiday mute timing
554
holiday_mute = {
555
"name": "holidays-2024",
556
"time_intervals": [
557
{
558
"times": [], # All day
559
"weekdays": [],
560
"months": ["december"],
561
"years": ["2024"],
562
"days_of_month": ["25", "26"] # Christmas
563
},
564
{
565
"times": [],
566
"weekdays": [],
567
"months": ["january"],
568
"years": ["2024"],
569
"days_of_month": ["1"] # New Year
570
}
571
]
572
}
573
574
# Create mute timings
575
api.alertingprovisioning.create_mute_timing(maintenance_mute)
576
api.alertingprovisioning.create_mute_timing(business_hours_mute)
577
api.alertingprovisioning.create_mute_timing(holiday_mute)
578
579
print("Mute timings created")
580
581
# List all mute timings
582
mute_timings = api.alertingprovisioning.get_mute_timings()
583
for mt in mute_timings:
584
print(f"Mute timing: {mt['name']} ({len(mt['time_intervals'])} intervals)")
585
```
586
587
### Legacy Notification Channels
588
589
Legacy notification channel management for older Grafana versions.
590
591
```python { .api }
592
def get_channels(self):
593
"""
594
Get legacy notification channels.
595
596
Returns:
597
list: List of notification channels
598
"""
599
...
600
601
def get_channel_by_uid(self, channel_uid: str):
602
"""
603
Get legacy notification channel by UID.
604
605
Args:
606
channel_uid (str): Channel UID
607
608
Returns:
609
dict: Notification channel configuration
610
"""
611
...
612
613
def create_channel(self, channel: dict):
614
"""
615
Create legacy notification channel.
616
617
Args:
618
channel (dict): Channel configuration
619
620
Returns:
621
dict: Created channel with ID and UID
622
"""
623
...
624
625
def update_channel_by_uid(self, uid: str, channel: dict):
626
"""
627
Update legacy notification channel.
628
629
Args:
630
uid (str): Channel UID
631
channel (dict): Updated channel configuration
632
633
Returns:
634
dict: Update result
635
"""
636
...
637
638
def delete_notification_by_uid(self, notification_uid: str):
639
"""
640
Delete legacy notification channel.
641
642
Args:
643
notification_uid (str): Channel UID to delete
644
645
Returns:
646
dict: Deletion result
647
"""
648
...
649
```
650
651
**Legacy Notification Channels Usage Example:**
652
653
```python
654
# Legacy Slack notification channel
655
legacy_slack = {
656
"name": "legacy-slack",
657
"type": "slack",
658
"settings": {
659
"url": "https://hooks.slack.com/services/...",
660
"channel": "#alerts-legacy",
661
"title": "Legacy Alert",
662
"text": "Alert: {{ .Title }}\nMessage: {{ .Message }}"
663
}
664
}
665
666
# Create legacy channel
667
legacy_result = api.notifications.create_channel(legacy_slack)
668
print(f"Created legacy channel: {legacy_result['uid']}")
669
670
# Get all legacy channels
671
channels = api.notifications.get_channels()
672
for channel in channels:
673
print(f"Legacy channel: {channel['name']} ({channel['type']})")
674
```
675
676
### Error Handling
677
678
Common alerting operation errors and handling strategies:
679
680
```python
681
from grafana_client import GrafanaClientError, GrafanaBadInputError
682
683
try:
684
# Invalid alert rule configuration
685
invalid_alert = {
686
"title": "", # Empty title
687
"condition": "X", # Non-existent condition
688
"data": [] # Empty data array
689
}
690
api.alertingprovisioning.create_alertrule(invalid_alert)
691
692
except GrafanaBadInputError as e:
693
print(f"Invalid alert configuration: {e.message}")
694
695
except GrafanaClientError as e:
696
if e.status_code == 404:
697
print("Alert rule or folder not found")
698
elif e.status_code == 409:
699
print("Alert rule already exists")
700
else:
701
print(f"Alert operation failed: {e.message}")
702
703
# Contact point validation
704
try:
705
invalid_contact = {
706
"name": "",
707
"type": "invalid-type",
708
"settings": {}
709
}
710
api.alertingprovisioning.create_contactpoint(invalid_contact)
711
except Exception as e:
712
print(f"Contact point creation failed: {e}")
713
```
714
715
### Async Alerting Operations
716
717
All alerting operations support async versions:
718
719
```python
720
import asyncio
721
from grafana_client import AsyncGrafanaApi, TokenAuth
722
723
async def manage_alerting():
724
api = AsyncGrafanaApi(auth=TokenAuth("your-token"), host="grafana.example.com")
725
726
# Concurrent operations
727
alert_tasks = [
728
api.alertingprovisioning.get_alertrules_all(),
729
api.alertingprovisioning.get_contactpoints(),
730
api.alertingprovisioning.get_mute_timings()
731
]
732
733
alert_rules, contact_points, mute_timings = await asyncio.gather(*alert_tasks)
734
735
print(f"Alert rules: {len(alert_rules)}")
736
print(f"Contact points: {len(contact_points)}")
737
print(f"Mute timings: {len(mute_timings)}")
738
739
# Create multiple contact points concurrently
740
contact_configs = [slack_contact, email_contact, webhook_contact]
741
create_tasks = [
742
api.alertingprovisioning.create_contactpoint(config)
743
for config in contact_configs
744
]
745
746
results = await asyncio.gather(*create_tasks, return_exceptions=True)
747
for i, result in enumerate(results):
748
if isinstance(result, Exception):
749
print(f"Failed to create contact point {i}: {result}")
750
else:
751
print(f"Created contact point: {result['uid']}")
752
753
asyncio.run(manage_alerting())
754
```
755
756
### Best Practices
757
758
1. **Migration Strategy**: Plan migration from legacy to unified alerting carefully
759
2. **Contact Point Testing**: Test contact points before using in production rules
760
3. **Policy Organization**: Structure notification policies hierarchically with clear routing
761
4. **Mute Timing Planning**: Configure maintenance windows to avoid alert noise
762
5. **Label Strategy**: Use consistent labeling for effective alert routing
763
6. **Error Handling**: Implement robust error handling for alert operations
764
7. **Monitoring**: Monitor alert rule execution and notification delivery
765
8. **Documentation**: Document alert rules, escalation procedures, and runbooks