From ae3569e897e4db60a1bf7981236aeae5c21ac3a6 Mon Sep 17 00:00:00 2001 From: Jan Willem Mannaerts Date: Sun, 1 Mar 2026 01:06:19 +0100 Subject: [PATCH] Add Kubernetes manifests for Grafana dashboard and Prometheus scraping ConfigMap with grafana_dashboard label for sidecar auto-discovery, and ServiceMonitor for kube-prometheus-stack scrape target. Co-Authored-By: Claude Opus 4.6 --- k8s/grafana-dashboard-configmap.yaml | 232 +++++++++++++++++++++++++++ k8s/servicemonitor.yaml | 18 +++ 2 files changed, 250 insertions(+) create mode 100644 k8s/grafana-dashboard-configmap.yaml create mode 100644 k8s/servicemonitor.yaml diff --git a/k8s/grafana-dashboard-configmap.yaml b/k8s/grafana-dashboard-configmap.yaml new file mode 100644 index 0000000..8d2ab04 --- /dev/null +++ b/k8s/grafana-dashboard-configmap.yaml @@ -0,0 +1,232 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: pokerface-dashboard + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + pokerface.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "thresholds": { "steps": [{ "color": "green", "value": null }] }, + "color": { "mode": "thresholds" } + } + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "id": 1, + "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "title": "Unique Users", + "type": "stat", + "targets": [{ "expr": "pokerface_unique_users", "legendFormat": "", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "thresholds": { "steps": [{ "color": "green", "value": null }] }, + "color": { "mode": "thresholds" } + } + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "id": 2, + "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "title": "Unique Tenants", + "type": "stat", + "targets": [{ "expr": "pokerface_unique_tenants", "legendFormat": "", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "thresholds": { "steps": [{ "color": "blue", "value": null }] }, + "color": { "mode": "thresholds" } + } + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "id": 3, + "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "title": "Active Connections", + "type": "stat", + "targets": [{ "expr": "pokerface_websocket_connections", "legendFormat": "", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "thresholds": { "steps": [{ "color": "purple", "value": null }] }, + "color": { "mode": "thresholds" } + } + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "id": 4, + "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } }, + "title": "Active Rooms", + "type": "stat", + "targets": [{ "expr": "sum(pokerface_rooms_active)", "legendFormat": "", "refId": "A" }] + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "title": "Activity", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 }, + "id": 5, + "options": { "tooltip": { "mode": "single" } }, + "title": "Sessions Created / hr", + "type": "timeseries", + "targets": [{ "expr": "rate(pokerface_sessions_created_total[1h]) * 3600", "legendFormat": "sessions/hr", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 }, + "id": 6, + "options": { "tooltip": { "mode": "single" } }, + "title": "Votes / hr", + "type": "timeseries", + "targets": [{ "expr": "rate(pokerface_votes_total[1h]) * 3600", "legendFormat": "votes/hr", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 }, + "id": 7, + "options": { "tooltip": { "mode": "multi" } }, + "title": "OAuth Logins / hr", + "type": "timeseries", + "targets": [ + { "expr": "rate(pokerface_oauth_logins_total{status=\"success\"}[1h]) * 3600", "legendFormat": "success", "refId": "A" }, + { "expr": "rate(pokerface_oauth_logins_total{status=\"failure\"}[1h]) * 3600", "legendFormat": "failure", "refId": "B" } + ] + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "title": "Performance", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 6, "x": 0, "y": 15 }, + "id": 8, + "options": { "tooltip": { "mode": "multi" } }, + "title": "HTTP Request Rate by Route", + "type": "timeseries", + "targets": [{ "expr": "sum by (route) (rate(pokerface_http_requests_total[5m]))", "legendFormat": "{{route}}", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 6, "x": 6, "y": 15 }, + "id": 9, + "options": { "tooltip": { "mode": "multi" } }, + "title": "HTTP Latency p95", + "type": "timeseries", + "targets": [{ "expr": "histogram_quantile(0.95, sum by (le, route) (rate(pokerface_http_request_duration_seconds_bucket[5m])))", "legendFormat": "{{route}}", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 6, "x": 12, "y": 15 }, + "id": 10, + "options": { "tooltip": { "mode": "single" } }, + "title": "Jira API Latency p95", + "type": "timeseries", + "targets": [{ "expr": "histogram_quantile(0.95, sum by (le) (rate(pokerface_jira_request_duration_seconds_bucket[5m])))", "legendFormat": "p95", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 6, "x": 18, "y": 15 }, + "id": 11, + "options": { "tooltip": { "mode": "single" } }, + "title": "Jira Error Rate", + "type": "timeseries", + "targets": [{ "expr": "rate(pokerface_jira_errors_total[5m])", "legendFormat": "errors/s", "refId": "A" }] + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 103, + "title": "System", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "unit": "bytes", "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 24 }, + "id": 12, + "options": { "tooltip": { "mode": "multi" } }, + "title": "Node.js Memory Usage", + "type": "timeseries", + "targets": [ + { "expr": "process_resident_memory_bytes", "legendFormat": "RSS", "refId": "A" }, + { "expr": "nodejs_heap_size_used_bytes", "legendFormat": "Heap Used", "refId": "B" } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 24 }, + "id": 13, + "options": { "tooltip": { "mode": "single" } }, + "title": "Event Loop Lag", + "type": "timeseries", + "targets": [{ "expr": "nodejs_eventloop_lag_seconds", "legendFormat": "lag", "refId": "A" }] + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "unit": "percentunit", "custom": { "drawStyle": "line", "fillOpacity": 10 } } }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 24 }, + "id": 14, + "options": { "tooltip": { "mode": "single" } }, + "title": "CPU Usage", + "type": "timeseries", + "targets": [{ "expr": "rate(process_cpu_seconds_total[5m])", "legendFormat": "CPU", "refId": "A" }] + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["pokerface"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "type": "datasource" + } + ] + }, + "time": { "from": "now-3h", "to": "now" }, + "title": "Pokerface", + "uid": "pokerface-overview" + } diff --git a/k8s/servicemonitor.yaml b/k8s/servicemonitor.yaml new file mode 100644 index 0000000..f737bd2 --- /dev/null +++ b/k8s/servicemonitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: pokerface + namespace: monitoring + labels: + release: kube-prometheus-stack +spec: + namespaceSelector: + matchNames: + - pokerface + selector: + matchLabels: + app: pokerface + endpoints: + - port: http + path: /metrics + interval: 15s