Create and manage production Grafana dashboards for real-time visualization of system and application metrics. Use when building monitoring dashboards, visualizing metrics, or creating operational observability interfaces.
85
Does it follow best practices?
If you maintain this skill, you can automatically optimize it using the tessl CLI to improve its score:
npx tessl skill review --optimize ./path/to/skillValidation for skill structure
Create and manage production-ready Grafana dashboards for comprehensive system observability.
Design effective Grafana dashboards for monitoring applications, infrastructure, and business metrics.
┌─────────────────────────────────────┐
│ Critical Metrics (Big Numbers) │
├─────────────────────────────────────┤
│ Key Trends (Time Series) │
├─────────────────────────────────────┤
│ Detailed Metrics (Tables/Heatmaps) │
└─────────────────────────────────────┘{
"dashboard": {
"title": "API Monitoring",
"tags": ["api", "production"],
"timezone": "browser",
"refresh": "30s",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (service)",
"legendFormat": "{{service}}"
}
],
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 }
},
{
"title": "Error Rate %",
"type": "graph",
"targets": [
{
"expr": "(sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))) * 100",
"legendFormat": "Error Rate"
}
],
"alert": {
"conditions": [
{
"evaluator": { "params": [5], "type": "gt" },
"operator": { "type": "and" },
"query": { "params": ["A", "5m", "now"] },
"type": "query"
}
]
},
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 }
},
{
"title": "P95 Latency",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service))",
"legendFormat": "{{service}}"
}
],
"gridPos": { "x": 0, "y": 8, "w": 24, "h": 8 }
}
]
}
}Reference: See assets/api-dashboard.json
{
"type": "stat",
"title": "Total Requests",
"targets": [
{
"expr": "sum(http_requests_total)"
}
],
"options": {
"reduceOptions": {
"values": false,
"calcs": ["lastNotNull"]
},
"orientation": "auto",
"textMode": "auto",
"colorMode": "value"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "value": 0, "color": "green" },
{ "value": 80, "color": "yellow" },
{ "value": 90, "color": "red" }
]
}
}
}
}{
"type": "graph",
"title": "CPU Usage",
"targets": [
{
"expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)"
}
],
"yaxes": [
{ "format": "percent", "max": 100, "min": 0 },
{ "format": "short" }
]
}{
"type": "table",
"title": "Service Status",
"targets": [
{
"expr": "up",
"format": "table",
"instant": true
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": { "Time": true },
"indexByName": {},
"renameByName": {
"instance": "Instance",
"job": "Service",
"Value": "Status"
}
}
}
]
}{
"type": "heatmap",
"title": "Latency Heatmap",
"targets": [
{
"expr": "sum(rate(http_request_duration_seconds_bucket[5m])) by (le)",
"format": "heatmap"
}
],
"dataFormat": "tsbuckets",
"yAxis": {
"format": "s"
}
}{
"templating": {
"list": [
{
"name": "namespace",
"type": "query",
"datasource": "Prometheus",
"query": "label_values(kube_pod_info, namespace)",
"refresh": 1,
"multi": false
},
{
"name": "service",
"type": "query",
"datasource": "Prometheus",
"query": "label_values(kube_service_info{namespace=\"$namespace\"}, service)",
"refresh": 1,
"multi": true
}
]
}
}sum(rate(http_requests_total{namespace="$namespace", service=~"$service"}[5m])){
"alert": {
"name": "High Error Rate",
"conditions": [
{
"evaluator": {
"params": [5],
"type": "gt"
},
"operator": { "type": "and" },
"query": {
"params": ["A", "5m", "now"]
},
"reducer": { "type": "avg" },
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"message": "Error rate is above 5%",
"noDataState": "no_data",
"notifications": [{ "uid": "slack-channel" }]
}
}dashboards.yml:
apiVersion: 1
providers:
- name: "default"
orgId: 1
folder: "General"
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboardsKey Panels:
Reference: See assets/infrastructure-dashboard.json
Key Panels:
Reference: See assets/database-dashboard.json
Key Panels:
resource "grafana_dashboard" "api_monitoring" {
config_json = file("${path.module}/dashboards/api-monitoring.json")
folder = grafana_folder.monitoring.id
}
resource "grafana_folder" "monitoring" {
title = "Production Monitoring"
}- name: Deploy Grafana dashboards
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/
with_fileglob:
- "dashboards/*.json"
notify: restart grafanaassets/api-dashboard.json - API monitoring dashboardassets/infrastructure-dashboard.json - Infrastructure dashboardassets/database-dashboard.json - Database monitoring dashboardreferences/dashboard-design.md - Dashboard design guideprometheus-configuration - For metric collectionslo-implementation - For SLO dashboardsade0c7a
If you maintain this skill, you can claim it as your own. Once claimed, you can manage eval scenarios, bundle related skills, attach documentation or rules, and ensure cross-agent compatibility.