or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

admin-and-rbac.mdalerting.mdauthentication.mdclient-management.mddashboards.mddata-models.mddatasources.mdindex.mdlibrary-elements.mdplugin-management.mdsnapshot-management.mdusers-and-orgs.md

alerting.mddocs/

0

# Alerting and Notifications

1

2

Legacy alerting, modern alerting provisioning, notification channels, contact points, notification policies, and mute timings management. This covers both the legacy alerting system and the new Grafana 8+ unified alerting system.

3

4

## Capabilities

5

6

### Legacy Alerting Operations

7

8

Legacy alert rule management for Grafana versions prior to 8.0 and instances still using the legacy alerting system.

9

10

```python { .api }

11

def get_alertrule(self, folder_name: str, alertrule_name: str):

12

"""

13

Get legacy alert rule by folder and name.

14

15

Args:

16

folder_name (str): Folder name containing the alert rule

17

alertrule_name (str): Alert rule name

18

19

Returns:

20

dict: Alert rule configuration and status

21

"""

22

...

23

24

def create_alertrule(self, folder_name: str, alertrule: dict):

25

"""

26

Create legacy alert rule in specified folder.

27

28

Args:

29

folder_name (str): Target folder name

30

alertrule (dict): Alert rule configuration

31

32

Returns:

33

dict: Created alert rule with ID and metadata

34

"""

35

...

36

37

def update_alertrule(self, folder_name: str, alertrule: dict):

38

"""

39

Update existing legacy alert rule.

40

41

Args:

42

folder_name (str): Folder name containing the alert rule

43

alertrule (dict): Updated alert rule configuration

44

45

Returns:

46

dict: Update result

47

"""

48

...

49

50

def delete_alertrule(self, folder_name: str, alertrule_name: str):

51

"""

52

Delete legacy alert rule.

53

54

Args:

55

folder_name (str): Folder name containing the alert rule

56

alertrule_name (str): Alert rule name to delete

57

58

Returns:

59

dict: Deletion result

60

"""

61

...

62

```

63

64

**Legacy Alerting Usage Example:**

65

66

```python

67

from grafana_client import GrafanaApi, TokenAuth

68

69

api = GrafanaApi(auth=TokenAuth("your-token"), host="grafana.example.com")

70

71

# Create legacy alert rule

72

legacy_alert = {

73

"name": "High CPU Usage",

74

"message": "CPU usage is above 80%",

75

"frequency": "10s",

76

"conditions": [

77

{

78

"query": {

79

"params": ["A", "5m", "now"]

80

},

81

"reducer": {

82

"params": [],

83

"type": "avg"

84

},

85

"evaluator": {

86

"params": [80],

87

"type": "gt"

88

},

89

"operator": {

90

"type": "and"

91

}

92

}

93

],

94

"executionErrorState": "alerting",

95

"noDataState": "no_data",

96

"for": "5m"

97

}

98

99

# Create the alert rule

100

result = api.alerting.create_alertrule("Production", legacy_alert)

101

print(f"Created legacy alert rule: {result}")

102

103

# Get existing alert rule

104

existing_rule = api.alerting.get_alertrule("Production", "High CPU Usage")

105

print(f"Alert rule state: {existing_rule.get('state', 'unknown')}")

106

107

# Update alert rule

108

legacy_alert["message"] = "Updated: CPU usage is critically high"

109

api.alerting.update_alertrule("Production", legacy_alert)

110

print("Alert rule updated")

111

```

112

113

### Modern Alerting Provisioning

114

115

Modern unified alerting system operations for Grafana 8+ with support for alert rules, contact points, notification policies, and mute timings.

116

117

```python { .api }

118

def get_alertrules_all(self):

119

"""

120

Get all alert rules in unified alerting.

121

122

Returns:

123

list: List of all alert rules across folders

124

"""

125

...

126

127

def get_alertrule(self, alertrule_uid: str):

128

"""

129

Get alert rule by UID.

130

131

Args:

132

alertrule_uid (str): Alert rule UID

133

134

Returns:

135

dict: Alert rule configuration and metadata

136

"""

137

...

138

139

def create_alertrule(self, alertrule: dict, disable_provenance: bool = False):

140

"""

141

Create new alert rule.

142

143

Args:

144

alertrule (dict): Alert rule configuration

145

disable_provenance (bool): Disable provenance checking

146

147

Returns:

148

dict: Created alert rule with UID and metadata

149

"""

150

...

151

152

def update_alertrule(self, alertrule_uid: str, alertrule: dict, disable_provenance: bool = False):

153

"""

154

Update existing alert rule.

155

156

Args:

157

alertrule_uid (str): Alert rule UID

158

alertrule (dict): Updated alert rule configuration

159

disable_provenance (bool): Disable provenance checking

160

161

Returns:

162

dict: Update result

163

"""

164

...

165

166

def delete_alertrule(self, alertrule_uid: str):

167

"""

168

Delete alert rule by UID.

169

170

Args:

171

alertrule_uid (str): Alert rule UID to delete

172

173

Returns:

174

dict: Deletion result

175

"""

176

...

177

```

178

179

**Modern Alert Rule Usage Example:**

180

181

```python

182

# Modern alert rule configuration

183

modern_alert = {

184

"uid": "", # Auto-generated if empty

185

"title": "High Memory Usage",

186

"condition": "C", # Condition query ref ID

187

"data": [

188

{

189

"refId": "A",

190

"queryType": "",

191

"relativeTimeRange": {

192

"from": 600,

193

"to": 0

194

},

195

"datasourceUid": "prometheus-uid",

196

"model": {

197

"expr": "node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100",

198

"interval": "",

199

"refId": "A"

200

}

201

},

202

{

203

"refId": "C",

204

"queryType": "",

205

"relativeTimeRange": {

206

"from": 0,

207

"to": 0

208

},

209

"datasourceUid": "__expr__",

210

"model": {

211

"conditions": [

212

{

213

"evaluator": {

214

"params": [20],

215

"type": "lt"

216

},

217

"operator": {

218

"type": "and"

219

},

220

"query": {

221

"params": ["A"]

222

},

223

"reducer": {

224

"params": [],

225

"type": "last"

226

},

227

"type": "query"

228

}

229

],

230

"expression": "",

231

"hide": False,

232

"intervalMs": 1000,

233

"maxDataPoints": 43200,

234

"reducer": "last",

235

"refId": "C",

236

"type": "classic_conditions"

237

}

238

}

239

],

240

"folderUID": "alerts-folder-uid",

241

"ruleGroup": "System Alerts",

242

"noDataState": "NoData",

243

"execErrState": "Alerting",

244

"for": "5m",

245

"annotations": {

246

"description": "Memory usage is below 20%",

247

"runbook_url": "https://wiki.example.com/memory-alerts"

248

},

249

"labels": {

250

"severity": "warning",

251

"team": "sre"

252

}

253

}

254

255

# Create modern alert rule

256

created_rule = api.alertingprovisioning.create_alertrule(modern_alert)

257

print(f"Created alert rule UID: {created_rule['uid']}")

258

259

# Get all alert rules

260

all_rules = api.alertingprovisioning.get_alertrules_all()

261

print(f"Total alert rules: {len(all_rules)}")

262

263

# Get specific rule

264

rule_details = api.alertingprovisioning.get_alertrule(created_rule['uid'])

265

print(f"Rule: {rule_details['title']} - State: {rule_details.get('state', 'unknown')}")

266

```

267

268

### Contact Points Management

269

270

Managing notification channels and contact points for alert delivery.

271

272

```python { .api }

273

def get_contactpoints(self, name: Optional[str] = None):

274

"""

275

Get contact points, optionally filtered by name.

276

277

Args:

278

name (Optional[str]): Filter by contact point name

279

280

Returns:

281

list: List of contact points

282

"""

283

...

284

285

def create_contactpoint(self, contactpoint: dict, disable_provenance: bool = False):

286

"""

287

Create new contact point.

288

289

Args:

290

contactpoint (dict): Contact point configuration

291

disable_provenance (bool): Disable provenance checking

292

293

Returns:

294

dict: Created contact point with UID

295

"""

296

...

297

298

def update_contactpoint(self, contactpoint_uid: str, contactpoint: dict):

299

"""

300

Update existing contact point.

301

302

Args:

303

contactpoint_uid (str): Contact point UID

304

contactpoint (dict): Updated contact point configuration

305

306

Returns:

307

dict: Update result

308

"""

309

...

310

311

def delete_contactpoint(self, contactpoint_uid: str):

312

"""

313

Delete contact point.

314

315

Args:

316

contactpoint_uid (str): Contact point UID to delete

317

318

Returns:

319

dict: Deletion result

320

"""

321

...

322

```

323

324

**Contact Points Usage Example:**

325

326

```python

327

# Slack contact point

328

slack_contact = {

329

"name": "slack-alerts",

330

"type": "slack",

331

"settings": {

332

"url": "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX",

333

"channel": "#alerts",

334

"title": "Grafana Alert",

335

"text": "{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}"

336

}

337

}

338

339

# Email contact point

340

email_contact = {

341

"name": "email-sre",

342

"type": "email",

343

"settings": {

344

"addresses": ["sre-team@example.com", "oncall@example.com"],

345

"subject": "Grafana Alert: {{ .GroupLabels.alertname }}",

346

"body": "Alert Details:\n{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"

347

}

348

}

349

350

# Webhook contact point

351

webhook_contact = {

352

"name": "webhook-pagerduty",

353

"type": "webhook",

354

"settings": {

355

"url": "https://events.pagerduty.com/v2/enqueue",

356

"httpMethod": "POST",

357

"username": "",

358

"password": "",

359

"title": "Grafana Alert",

360

"body": '{"routing_key": "YOUR_ROUTING_KEY", "event_action": "trigger", "payload": {"summary": "{{ .GroupLabels.alertname }}", "severity": "error", "source": "Grafana"}}'

361

}

362

}

363

364

# Create contact points

365

slack_result = api.alertingprovisioning.create_contactpoint(slack_contact)

366

email_result = api.alertingprovisioning.create_contactpoint(email_contact)

367

webhook_result = api.alertingprovisioning.create_contactpoint(webhook_contact)

368

369

print(f"Created contact points:")

370

print(f"- Slack: {slack_result['uid']}")

371

print(f"- Email: {email_result['uid']}")

372

print(f"- Webhook: {webhook_result['uid']}")

373

374

# Get all contact points

375

contact_points = api.alertingprovisioning.get_contactpoints()

376

for cp in contact_points:

377

print(f"Contact point: {cp['name']} ({cp['type']})")

378

```

379

380

### Notification Policies

381

382

Managing notification policy trees that define routing and escalation rules for alerts.

383

384

```python { .api }

385

def get_notification_policy_tree(self):

386

"""

387

Get the notification policy tree.

388

389

Returns:

390

dict: Complete notification policy tree configuration

391

"""

392

...

393

394

def set_notification_policy_tree(self, notification_policy_tree: dict, disable_provenance: bool = False):

395

"""

396

Set/replace the entire notification policy tree.

397

398

Args:

399

notification_policy_tree (dict): Complete policy tree configuration

400

disable_provenance (bool): Disable provenance checking

401

402

Returns:

403

dict: Update result

404

"""

405

...

406

```

407

408

**Notification Policies Usage Example:**

409

410

```python

411

# Get current notification policy tree

412

current_policy = api.alertingprovisioning.get_notification_policy_tree()

413

print(f"Current root policy receiver: {current_policy.get('receiver', 'default')}")

414

415

# Define notification policy tree

416

policy_tree = {

417

"receiver": "default-contact", # Default receiver for unmatched alerts

418

"group_by": ["alertname", "cluster"], # Group alerts by these labels

419

"group_wait": "10s", # Wait before sending first notification

420

"group_interval": "10s", # Wait between notifications for same group

421

"repeat_interval": "1h", # Wait before repeating notifications

422

"routes": [

423

{

424

"receiver": "slack-alerts",

425

"matchers": [

426

{

427

"name": "severity",

428

"value": "warning",

429

"isRegex": False,

430

"isEqual": True

431

}

432

],

433

"group_wait": "5s",

434

"repeat_interval": "30m"

435

},

436

{

437

"receiver": "email-sre",

438

"matchers": [

439

{

440

"name": "severity",

441

"value": "critical",

442

"isRegex": False,

443

"isEqual": True

444

}

445

],

446

"group_wait": "0s",

447

"repeat_interval": "15m",

448

"routes": [

449

{

450

"receiver": "webhook-pagerduty",

451

"matchers": [

452

{

453

"name": "team",

454

"value": "sre",

455

"isRegex": False,

456

"isEqual": True

457

}

458

],

459

"continue": True # Continue to parent route as well

460

}

461

]

462

}

463

]

464

}

465

466

# Set notification policy tree

467

api.alertingprovisioning.set_notification_policy_tree(policy_tree)

468

print("Notification policy tree updated")

469

```

470

471

### Mute Timings

472

473

Managing mute timings to suppress alerts during maintenance windows or scheduled downtime.

474

475

```python { .api }

476

def get_mute_timings(self):

477

"""

478

Get all mute timings.

479

480

Returns:

481

list: List of mute timing configurations

482

"""

483

...

484

485

def create_mute_timing(self, mutetiming: dict, disable_provenance: bool = False):

486

"""

487

Create new mute timing.

488

489

Args:

490

mutetiming (dict): Mute timing configuration

491

disable_provenance (bool): Disable provenance checking

492

493

Returns:

494

dict: Created mute timing

495

"""

496

...

497

498

def delete_mute_timing(self, mutetiming_name: str):

499

"""

500

Delete mute timing by name.

501

502

Args:

503

mutetiming_name (str): Mute timing name to delete

504

505

Returns:

506

dict: Deletion result

507

"""

508

...

509

```

510

511

**Mute Timings Usage Example:**

512

513

```python

514

# Maintenance window mute timing

515

maintenance_mute = {

516

"name": "maintenance-window",

517

"time_intervals": [

518

{

519

"times": [

520

{

521

"start_time": "02:00",

522

"end_time": "04:00"

523

}

524

],

525

"weekdays": ["sunday"], # Every Sunday 2-4 AM

526

"months": [],

527

"years": [],

528

"days_of_month": []

529

}

530

]

531

}

532

533

# Business hours mute timing (outside business hours)

534

business_hours_mute = {

535

"name": "outside-business-hours",

536

"time_intervals": [

537

{

538

"times": [

539

{

540

"start_time": "18:00",

541

"end_time": "08:00"

542

}

543

],

544

"weekdays": ["monday", "tuesday", "wednesday", "thursday", "friday"]

545

},

546

{

547

"times": [], # All day

548

"weekdays": ["saturday", "sunday"]

549

}

550

]

551

}

552

553

# Holiday mute timing

554

holiday_mute = {

555

"name": "holidays-2024",

556

"time_intervals": [

557

{

558

"times": [], # All day

559

"weekdays": [],

560

"months": ["december"],

561

"years": ["2024"],

562

"days_of_month": ["25", "26"] # Christmas

563

},

564

{

565

"times": [],

566

"weekdays": [],

567

"months": ["january"],

568

"years": ["2024"],

569

"days_of_month": ["1"] # New Year

570

}

571

]

572

}

573

574

# Create mute timings

575

api.alertingprovisioning.create_mute_timing(maintenance_mute)

576

api.alertingprovisioning.create_mute_timing(business_hours_mute)

577

api.alertingprovisioning.create_mute_timing(holiday_mute)

578

579

print("Mute timings created")

580

581

# List all mute timings

582

mute_timings = api.alertingprovisioning.get_mute_timings()

583

for mt in mute_timings:

584

print(f"Mute timing: {mt['name']} ({len(mt['time_intervals'])} intervals)")

585

```

586

587

### Legacy Notification Channels

588

589

Legacy notification channel management for older Grafana versions.

590

591

```python { .api }

592

def get_channels(self):

593

"""

594

Get legacy notification channels.

595

596

Returns:

597

list: List of notification channels

598

"""

599

...

600

601

def get_channel_by_uid(self, channel_uid: str):

602

"""

603

Get legacy notification channel by UID.

604

605

Args:

606

channel_uid (str): Channel UID

607

608

Returns:

609

dict: Notification channel configuration

610

"""

611

...

612

613

def create_channel(self, channel: dict):

614

"""

615

Create legacy notification channel.

616

617

Args:

618

channel (dict): Channel configuration

619

620

Returns:

621

dict: Created channel with ID and UID

622

"""

623

...

624

625

def update_channel_by_uid(self, uid: str, channel: dict):

626

"""

627

Update legacy notification channel.

628

629

Args:

630

uid (str): Channel UID

631

channel (dict): Updated channel configuration

632

633

Returns:

634

dict: Update result

635

"""

636

...

637

638

def delete_notification_by_uid(self, notification_uid: str):

639

"""

640

Delete legacy notification channel.

641

642

Args:

643

notification_uid (str): Channel UID to delete

644

645

Returns:

646

dict: Deletion result

647

"""

648

...

649

```

650

651

**Legacy Notification Channels Usage Example:**

652

653

```python

654

# Legacy Slack notification channel

655

legacy_slack = {

656

"name": "legacy-slack",

657

"type": "slack",

658

"settings": {

659

"url": "https://hooks.slack.com/services/...",

660

"channel": "#alerts-legacy",

661

"title": "Legacy Alert",

662

"text": "Alert: {{ .Title }}\nMessage: {{ .Message }}"

663

}

664

}

665

666

# Create legacy channel

667

legacy_result = api.notifications.create_channel(legacy_slack)

668

print(f"Created legacy channel: {legacy_result['uid']}")

669

670

# Get all legacy channels

671

channels = api.notifications.get_channels()

672

for channel in channels:

673

print(f"Legacy channel: {channel['name']} ({channel['type']})")

674

```

675

676

### Error Handling

677

678

Common alerting operation errors and handling strategies:

679

680

```python

681

from grafana_client import GrafanaClientError, GrafanaBadInputError

682

683

try:

684

# Invalid alert rule configuration

685

invalid_alert = {

686

"title": "", # Empty title

687

"condition": "X", # Non-existent condition

688

"data": [] # Empty data array

689

}

690

api.alertingprovisioning.create_alertrule(invalid_alert)

691

692

except GrafanaBadInputError as e:

693

print(f"Invalid alert configuration: {e.message}")

694

695

except GrafanaClientError as e:

696

if e.status_code == 404:

697

print("Alert rule or folder not found")

698

elif e.status_code == 409:

699

print("Alert rule already exists")

700

else:

701

print(f"Alert operation failed: {e.message}")

702

703

# Contact point validation

704

try:

705

invalid_contact = {

706

"name": "",

707

"type": "invalid-type",

708

"settings": {}

709

}

710

api.alertingprovisioning.create_contactpoint(invalid_contact)

711

except Exception as e:

712

print(f"Contact point creation failed: {e}")

713

```

714

715

### Async Alerting Operations

716

717

All alerting operations support async versions:

718

719

```python

720

import asyncio

721

from grafana_client import AsyncGrafanaApi, TokenAuth

722

723

async def manage_alerting():

724

api = AsyncGrafanaApi(auth=TokenAuth("your-token"), host="grafana.example.com")

725

726

# Concurrent operations

727

alert_tasks = [

728

api.alertingprovisioning.get_alertrules_all(),

729

api.alertingprovisioning.get_contactpoints(),

730

api.alertingprovisioning.get_mute_timings()

731

]

732

733

alert_rules, contact_points, mute_timings = await asyncio.gather(*alert_tasks)

734

735

print(f"Alert rules: {len(alert_rules)}")

736

print(f"Contact points: {len(contact_points)}")

737

print(f"Mute timings: {len(mute_timings)}")

738

739

# Create multiple contact points concurrently

740

contact_configs = [slack_contact, email_contact, webhook_contact]

741

create_tasks = [

742

api.alertingprovisioning.create_contactpoint(config)

743

for config in contact_configs

744

]

745

746

results = await asyncio.gather(*create_tasks, return_exceptions=True)

747

for i, result in enumerate(results):

748

if isinstance(result, Exception):

749

print(f"Failed to create contact point {i}: {result}")

750

else:

751

print(f"Created contact point: {result['uid']}")

752

753

asyncio.run(manage_alerting())

754

```

755

756

### Best Practices

757

758

1. **Migration Strategy**: Plan migration from legacy to unified alerting carefully

759

2. **Contact Point Testing**: Test contact points before using in production rules

760

3. **Policy Organization**: Structure notification policies hierarchically with clear routing

761

4. **Mute Timing Planning**: Configure maintenance windows to avoid alert noise

762

5. **Label Strategy**: Use consistent labeling for effective alert routing

763

6. **Error Handling**: Implement robust error handling for alert operations

764

7. **Monitoring**: Monitor alert rule execution and notification delivery

765

8. **Documentation**: Document alert rules, escalation procedures, and runbooks