From 5642a24c8f91e1417614bd530fa4259a159fbc13 Mon Sep 17 00:00:00 2001 From: bunker-admin Date: Thu, 5 Mar 2026 08:32:49 -0700 Subject: [PATCH] Sync CCP templates with production configs for complete instance provisioning Closes 12 template drift gaps between the Control Panel templates and production configs. New instances now provision with full monitoring (alerts fire properly), correct Gitea DB type (postgres not mysql), social sharing previews (OG meta bot routes), Excalidraw subdomain routing, docker-socket-proxy for Homepage, and complete Grafana/ Alertmanager/Prometheus config copying. Key changes: - Rewrite Prometheus template: add alerting, rule_files, 5 scrape jobs - Add cAdvisor, node-exporter, redis-exporter, gotify, docker-socket-proxy - Fix Gitea env from mysql to postgres to match docker-compose - Add OG bot detection + rewrite routes for campaigns/pages/gallery - Add Excalidraw nginx server block + Pangolin draw subdomain - Add embed port to discovery portConfig + emailTestMode to registration - Copy alerts.yml, alertmanager.yml, Grafana dashboards to templates - Add Listmonk proxy port and upgrade volume to API service Bunker Admin --- .../modules/instances/instances.schemas.ts | 2 + .../api/src/services/discovery.service.ts | 7 +- .../api/src/services/template-engine.ts | 15 +- .../configs/alertmanager/alertmanager.yml | 112 ++++ .../grafana/dashboards/api-performance.json | 402 +++++++++++++ .../dashboards/application-overview.json | 533 ++++++++++++++++++ .../configs/grafana/dashboards/dashboards.yml | 12 + .../grafana/dashboards/system-health.json | 415 ++++++++++++++ .../grafana/datasources/datasources.yml.hbs | 11 + .../configs/pangolin/resources.yml.hbs | 5 + .../templates/configs/prometheus/alerts.yml | 215 +++++++ .../configs/prometheus/prometheus.yml.hbs | 48 +- .../templates/docker-compose.yml.hbs | 70 ++- changemaker-control-panel/templates/env.hbs | 11 +- .../templates/nginx/conf.d/default.conf.hbs | 136 +++++ 15 files changed, 1981 insertions(+), 13 deletions(-) create mode 100644 changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml create mode 100644 changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json create mode 100644 changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json create mode 100644 changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml create mode 100644 changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json create mode 100644 changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs create mode 100644 changemaker-control-panel/templates/configs/prometheus/alerts.yml diff --git a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts index e8fcef51..bc1b7117 100644 --- a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts +++ b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts @@ -62,6 +62,7 @@ export const registerInstanceSchema = z.object({ admin: z.coerce.number().int().min(1).max(65535), postgres: z.coerce.number().int().min(1).max(65535), nginx: z.coerce.number().int().min(1).max(65535), + embed: z.coerce.number().int().min(1).max(65535).optional(), }), adminEmail: z.string().email().optional().default('admin@localhost'), enableMedia: z.boolean().default(false), @@ -75,6 +76,7 @@ export const registerInstanceSchema = z.object({ enableSms: z.boolean().default(false), enableSocial: z.boolean().default(false), enablePeople: z.boolean().default(false), + emailTestMode: z.boolean().default(true), notes: z.string().optional(), }); diff --git a/changemaker-control-panel/api/src/services/discovery.service.ts b/changemaker-control-panel/api/src/services/discovery.service.ts index 779ee81f..d6a6c8e6 100644 --- a/changemaker-control-panel/api/src/services/discovery.service.ts +++ b/changemaker-control-panel/api/src/services/discovery.service.ts @@ -19,7 +19,7 @@ export interface DiscoveredInstance { domain: string; basePath: string; composeProject: string; - portConfig: { api: number; admin: number; postgres: number; nginx: number }; + portConfig: { api: number; admin: number; postgres: number; nginx: number; embed: number }; adminEmail: string; enableMedia: boolean; enableChat: boolean; @@ -78,11 +78,13 @@ async function parseCmlEnv(envPath: string): Promise | nu } function extractPortConfig(envVars: Record): DiscoveredInstance['portConfig'] { + const nginx = parseInt(envVars.NGINX_HTTP_PORT || '80', 10); return { api: parseInt(envVars.API_PORT || '4000', 10), admin: parseInt(envVars.ADMIN_PORT || '3000', 10), postgres: parseInt(envVars.V2_POSTGRES_PORT || '5433', 10), - nginx: parseInt(envVars.NGINX_HTTP_PORT || '80', 10), + nginx, + embed: parseInt(envVars.NOCODB_EMBED_PORT || String(nginx + 1), 10), }; } @@ -385,6 +387,7 @@ export async function autoDiscoverOnStartup(): Promise { enableSms: inst.enableSms, enableSocial: inst.enableSocial, enablePeople: inst.enablePeople, + emailTestMode: inst.emailTestMode, }, userId, 'auto-discovery' diff --git a/changemaker-control-panel/api/src/services/template-engine.ts b/changemaker-control-panel/api/src/services/template-engine.ts index 6d9a318d..e312bafe 100644 --- a/changemaker-control-panel/api/src/services/template-engine.ts +++ b/changemaker-control-panel/api/src/services/template-engine.ts @@ -148,7 +148,7 @@ export function buildTemplateContext( admin: ports.admin, postgres: ports.postgres, nginx: ports.nginx, - embed: ports.embed, + embed: ports.embed || (ports.nginx || 80) + 1, }, secrets: { postgresPassword: secrets.postgresPassword, @@ -233,6 +233,7 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st { template: 'nginx/conf.d/services.conf.hbs', output: 'nginx/conf.d/services.conf' }, { template: 'configs/pangolin/resources.yml.hbs', output: 'configs/pangolin/resources.yml' }, { template: 'configs/prometheus/prometheus.yml.hbs', output: 'configs/prometheus/prometheus.yml' }, + { template: 'configs/grafana/datasources/datasources.yml.hbs', output: 'configs/grafana/datasources/datasources.yml' }, ]; for (const { template, output } of templateFiles) { @@ -253,8 +254,16 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st logger.debug(`Rendered ${template} → ${outputPath}`); } - // Copy static files (nginx.conf doesn't need templating) - const staticFiles = ['nginx/nginx.conf']; + // Copy static files (no templating needed) + const staticFiles = [ + 'nginx/nginx.conf', + 'configs/prometheus/alerts.yml', + 'configs/alertmanager/alertmanager.yml', + 'configs/grafana/dashboards/dashboards.yml', + 'configs/grafana/dashboards/application-overview.json', + 'configs/grafana/dashboards/api-performance.json', + 'configs/grafana/dashboards/system-health.json', + ]; for (const file of staticFiles) { const srcPath = path.join(templatesDir, file); try { diff --git a/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml b/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml new file mode 100644 index 00000000..594ee3df --- /dev/null +++ b/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml @@ -0,0 +1,112 @@ +global: + resolve_timeout: 5m + # SMTP configuration for email alerts + # Using MailHog for development - update for production: + # smtp_smarthost: 'smtp.example.com:587' + # smtp_auth_username: 'alerts@example.com' + # smtp_auth_password: 'your-password' + # smtp_require_tls: true + smtp_from: 'alerts@changemaker.local' + smtp_smarthost: 'mailhog-changemaker:1025' + smtp_auth_username: '' + smtp_auth_password: '' + smtp_require_tls: false + +# Templates for notification content +templates: + - '/etc/alertmanager/*.tmpl' + +# Route alerts to appropriate receivers based on severity +route: + group_by: ['alertname', 'cluster', 'service'] + group_wait: 10s + group_interval: 10s + repeat_interval: 12h + receiver: 'default' + + routes: + # Critical alerts go to both Gotify and email + - match: + severity: critical + receiver: 'critical-alerts' + group_wait: 0s + group_interval: 5m + repeat_interval: 4h + + # Warning alerts go to Gotify + email + - match: + severity: warning + receiver: 'warning-alerts' + group_wait: 30s + repeat_interval: 12h + + # Info alerts - Gotify with lower priority + - match: + severity: info + receiver: 'info-alerts' + repeat_interval: 24h + +# Alert receivers +receivers: + # Default receiver (catches all unmatched) + - name: 'default' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: '[Changemaker] {{ .GroupLabels.alertname }}' + + # Critical alerts - email + Gotify push + - name: 'critical-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'CRITICAL Alert: {{ .GroupLabels.alertname }}' + html: | +

Critical Alert Triggered

+ {{ range .Alerts }} +

Alert: {{ .Labels.alertname }}

+

Severity: {{ .Labels.severity }}

+

Summary: {{ .Annotations.summary }}

+

Description: {{ .Annotations.description }}

+

Started: {{ .StartsAt }}

+
+ {{ end }} + # Gotify push notifications (configure GOTIFY_APP_TOKEN in .env) + # Uncomment and set the token to enable: + # webhook_configs: + # - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN' + # send_resolved: true + # http_config: + # follow_redirects: true + + # Warning alerts - email + optional Gotify + - name: 'warning-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'Warning: {{ .GroupLabels.alertname }}' + # webhook_configs: + # - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN' + + # Info alerts - email only + - name: 'info-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'Info: {{ .GroupLabels.alertname }}' + +# Inhibition rules (prevent spam) +inhibit_rules: + # If a critical alert is firing, suppress related warnings + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'instance'] + + # If disk is critical, suppress disk warning + - source_match: + alertname: 'DiskSpaceCritical' + target_match: + alertname: 'DiskSpaceLow' + equal: ['instance'] diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json b/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json new file mode 100644 index 00000000..ca956ca3 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json @@ -0,0 +1,402 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Request Rate", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "reqps" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 1, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum by(method) (rate(http_requests_total[5m]))", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Requests/sec by Method", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "5xx Errors" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "4xx Errors" }, + "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 2, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx Errors", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_requests_total{status_code=~\"4..\"}[5m]))", + "legendFormat": "4xx Errors", + "refId": "B" + } + ], + "title": "Error Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 9 }, + "id": 101, + "title": "Latency", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "transparent", "value": null }, + { "color": "red", "value": 1 } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "p99" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "p95" }, + "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "p50" }, + "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 8, "w": 16, "x": 0, "y": 10 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p99", + "refId": "C" + } + ], + "title": "Request Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "super-light-blue", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 10 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_request_duration_seconds_sum[5m])) / sum(rate(http_request_duration_seconds_count[5m]))", + "legendFormat": "Avg Latency", + "refId": "A" + } + ], + "title": "Average Request Latency", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, + "id": 102, + "title": "Top Endpoints", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Value" }, + "properties": [ + { "id": "displayName", "value": "Requests (5m)" }, + { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } }, + { "id": "color", "value": { "mode": "continuous-BlYlRd" } } + ] + }, + { + "matcher": { "id": "byName", "options": "route" }, + "properties": [ + { "id": "displayName", "value": "Route" }, + { "id": "custom.width", "value": 300 } + ] + }, + { + "matcher": { "id": "byName", "options": "method" }, + "properties": [ + { "id": "displayName", "value": "Method" }, + { "id": "custom.width", "value": 80 } + ] + } + ] + }, + "gridPos": { "h": 9, "w": 12, "x": 0, "y": 19 }, + "id": 5, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "Requests (5m)" }] + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "topk(15, sum by(method, route) (increase(http_requests_total[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top Routes by Request Count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { "Time": true }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.5 }, + { "color": "red", "value": 1 } + ] + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Value" }, + "properties": [ + { "id": "displayName", "value": "p95 Latency (s)" }, + { "id": "unit", "value": "s" }, + { "id": "decimals", "value": 3 }, + { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } }, + { "id": "color", "value": { "mode": "continuous-GrYlRd" } } + ] + }, + { + "matcher": { "id": "byName", "options": "route" }, + "properties": [ + { "id": "displayName", "value": "Route" }, + { "id": "custom.width", "value": 300 } + ] + }, + { + "matcher": { "id": "byName", "options": "method" }, + "properties": [ + { "id": "displayName", "value": "Method" }, + { "id": "custom.width", "value": 80 } + ] + } + ] + }, + "gridPos": { "h": 9, "w": 12, "x": 12, "y": 19 }, + "id": 6, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "p95 Latency (s)" }] + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "topk(15, histogram_quantile(0.95, sum by(method, route, le) (rate(http_request_duration_seconds_bucket[5m]))))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Slowest Routes by p95 Latency", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { "Time": true }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "api", "performance"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - API Performance", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json b/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json new file mode 100644 index 00000000..2622ea45 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json @@ -0,0 +1,533 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": "changemaker-overview", + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Email System", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "red", "value": 100 } + ] + } + } + }, + "gridPos": { "h": 6, "w": 6, "x": 0, "y": 1 }, + "id": 1, + "options": { + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_email_queue_size", + "legendFormat": "Queue Size", + "refId": "A" + } + ], + "title": "Email Queue Size", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 6, "y": 1 }, + "id": 2, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_emails_sent_total[5m])", + "legendFormat": "Sent/sec", + "refId": "A" + } + ], + "title": "Emails Sent Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "red", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 12, "y": 1 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_emails_failed_total[5m])", + "legendFormat": "Failed/sec", + "refId": "A" + } + ], + "title": "Emails Failed Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "purple", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "s" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 18, "y": 1 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.95, rate(cm_email_send_duration_seconds_bucket[5m]))", + "legendFormat": "p95 Send Duration", + "refId": "A" + } + ], + "title": "Email Send Duration (p95)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, + "id": 101, + "title": "Authentication", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Failures" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "Successes" }, + "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 }, + "id": 5, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_login_attempts_total{result=\"success\"}[5m])", + "legendFormat": "Successes", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_login_attempts_total{result=\"failure\"}[5m])", + "legendFormat": "Failures", + "refId": "B" + } + ], + "title": "Login Attempts Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + } + }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_active_sessions", + "legendFormat": "Active Sessions", + "refId": "A" + } + ], + "title": "Active Sessions", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "title": "Campaigns & Responses", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 15 }, + "id": 7, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_campaign_emails_total[5m])", + "legendFormat": "Campaign Emails/sec", + "refId": "A" + } + ], + "title": "Campaign Email Sends Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 15 }, + "id": 8, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_response_submissions_total[5m])", + "legendFormat": "Responses/sec", + "refId": "A" + } + ], + "title": "Response Submissions Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, + "id": 103, + "title": "Canvassing", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + } + }, + "gridPos": { "h": 6, "w": 6, "x": 0, "y": 22 }, + "id": 9, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_active_canvass_sessions", + "legendFormat": "Active Sessions", + "refId": "A" + } + ], + "title": "Active Canvass Sessions", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 10, "x": 6, "y": 22 }, + "id": 10, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_canvass_visits_total[5m])", + "legendFormat": "{{outcome}}", + "refId": "A" + } + ], + "title": "Canvass Visits by Outcome", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "super-light-blue", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 22 }, + "id": 11, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_shift_signups_total[5m])", + "legendFormat": "Signups/sec", + "refId": "A" + } + ], + "title": "Shift Signups Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }, + "id": 104, + "title": "External Services", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 5, "w": 24, "x": 0, "y": 29 }, + "id": 12, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_external_service_up", + "legendFormat": "{{service}}", + "refId": "A" + } + ], + "title": "External Service Health", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "application"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - Application Overview", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml b/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml new file mode 100644 index 00000000..be165c4b --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json b/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json new file mode 100644 index 00000000..8faa7e29 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json @@ -0,0 +1,415 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Service Status", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "up{job=\"api\"}", + "legendFormat": "API", + "refId": "A" + } + ], + "title": "API Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 8, "y": 1 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "redis_up", + "legendFormat": "Redis", + "refId": "A" + } + ], + "title": "Redis Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 1 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "up{job=\"n8n\"}", + "legendFormat": "n8n", + "refId": "A" + } + ], + "title": "N8N Status", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "title": "System Resources", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "legendFormat": "CPU Usage %", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", + "legendFormat": "Memory Usage %", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 75 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", + "legendFormat": "Disk Usage %", + "refId": "A" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "title": "Container Resources", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "percentunit" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 7, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "bytes" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 8, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Container Memory Usage", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "system-health"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - System Health", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs b/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs new file mode 100644 index 00000000..edc59135 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://{{containerPrefix}}-prometheus:9090 + isDefault: true + editable: true + jsonData: + timeInterval: 15s diff --git a/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs b/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs index e45b9a5d..12cbfdd6 100644 --- a/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs +++ b/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs @@ -108,4 +108,9 @@ resources: subdomain: vault target: http://{{containerPrefix}}-nginx:80 isBaseDomain: false + + - name: draw + subdomain: draw + target: http://{{containerPrefix}}-nginx:80 + isBaseDomain: false {{/if}} diff --git a/changemaker-control-panel/templates/configs/prometheus/alerts.yml b/changemaker-control-panel/templates/configs/prometheus/alerts.yml new file mode 100644 index 00000000..ba0a94a4 --- /dev/null +++ b/changemaker-control-panel/templates/configs/prometheus/alerts.yml @@ -0,0 +1,215 @@ +groups: + - name: v2_app_alerts + interval: 30s + rules: + # Application availability + - alert: ApplicationDown + expr: up{job="changemaker-v2-api"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "V2 API is down" + description: "The Changemaker V2 API has been down for more than 2 minutes." + + # High error rate + - alert: HighErrorRate + expr: rate(http_requests_total{status_code=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "High error rate detected" + description: "Application is experiencing {{ $value }} errors per second." + + # Email queue backing up + - alert: EmailQueueBacklog + expr: cm_email_queue_size > 100 + for: 10m + labels: + severity: warning + annotations: + summary: "Email queue has significant backlog" + description: "Email queue size is {{ $value }}, emails may be delayed." + + # High email failure rate + - alert: HighEmailFailureRate + expr: rate(cm_emails_failed_total[5m]) / rate(cm_emails_sent_total[5m]) > 0.2 + for: 10m + labels: + severity: warning + annotations: + summary: "High email failure rate" + description: "{{ $value | humanizePercentage }} of emails are failing to send." + + # Failed login attempts spike + - alert: SuspiciousLoginActivity + expr: rate(cm_login_attempts_total{status="failure"}[5m]) > 5 + for: 2m + labels: + severity: warning + annotations: + summary: "Suspicious login activity detected" + description: "{{ $value }} failed login attempts per second detected." + + # High API latency + - alert: HighAPILatency + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "High API latency" + description: "95th percentile latency is {{ $value }}s for {{ $labels.route }}." + + # External service down + - alert: ExternalServiceDown + expr: cm_external_service_up == 0 + for: 5m + labels: + severity: warning + annotations: + summary: "External service {{ $labels.service }} is down" + description: "Service {{ $labels.service }} has been unreachable for 5 minutes." + + # System health alerts + - name: system_alerts + interval: 30s + rules: + # Redis down + - alert: RedisDown + expr: redis_up == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Redis cache is down" + description: "Redis has been down for more than 1 minute. Caching and session management will fail." + + # Disk space running low + - alert: DiskSpaceLow + expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15 + for: 5m + labels: + severity: warning + annotations: + summary: "Disk space is running low" + description: "Only {{ $value | humanizePercentage }} disk space remaining on root filesystem." + + # Disk space critical + - alert: DiskSpaceCritical + expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.10 + for: 2m + labels: + severity: critical + annotations: + summary: "CRITICAL: Disk space nearly exhausted" + description: "Only {{ $value | humanizePercentage }} disk space remaining! System may fail soon." + + # High CPU usage + - alert: HighCPUUsage + expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85 + for: 10m + labels: + severity: warning + annotations: + summary: "High CPU usage detected" + description: "CPU usage is {{ $value }}% on {{ $labels.instance }}." + + # Memory usage high + - alert: HighMemoryUsage + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > 0.85 + for: 10m + labels: + severity: warning + annotations: + summary: "High memory usage" + description: "Memory usage is above 85% ({{ $value | humanizePercentage }})." + + # Container CPU throttling (only Docker containers) + - alert: ContainerCPUThrottling + expr: rate(container_cpu_cfs_throttled_seconds_total{name!=""}[5m]) > 0.5 + for: 5m + labels: + severity: warning + annotations: + summary: "Container is being CPU throttled" + description: "Container {{ $labels.name }} is experiencing CPU throttling." + + # Container memory usage high (only Docker containers with memory limits) + - alert: ContainerMemoryHigh + expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) > 0.90 and container_spec_memory_limit_bytes{name!=""} > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Container memory usage is high" + description: "Container {{ $labels.name }} is using {{ $value | humanizePercentage }} of its memory limit." + + # Infrastructure alerts + - name: infrastructure_alerts + interval: 30s + rules: + # Prometheus scrape failures + - alert: PrometheusScrapeFailures + expr: rate(prometheus_target_scrapes_failed_total[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "Prometheus scrape failures detected" + description: "Prometheus is failing to scrape {{ $labels.job }} target." + + # Prometheus configuration reload failure + - alert: PrometheusConfigReloadFailed + expr: prometheus_config_last_reload_successful == 0 + for: 1m + labels: + severity: warning + annotations: + summary: "Prometheus configuration reload failed" + description: "Prometheus failed to reload its configuration. Check prometheus logs." + + # Alertmanager down + - alert: AlertmanagerDown + expr: up{job="alertmanager"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Alertmanager is down" + description: "Alertmanager has been down for 2 minutes. Alerts will not be delivered!" + + # Security alerts + - name: security_alerts + interval: 15s + rules: + # Possible DDoS attack + - alert: PossibleDDoSAttack + expr: rate(http_requests_total[1m]) > 1000 + for: 2m + labels: + severity: critical + annotations: + summary: "Possible DDoS attack detected" + description: "Receiving {{ $value }} requests per second for 2 minutes. This may be a DDoS attack." + + # Sustained high traffic + - alert: SustainedHighTraffic + expr: rate(http_requests_total[5m]) > 500 + for: 10m + labels: + severity: warning + annotations: + summary: "Sustained high traffic detected" + description: "Receiving {{ $value }} requests per second for 10 minutes. Monitor for performance issues." + + # Too many 4xx errors + - alert: HighClientErrorRate + expr: rate(http_requests_total{status_code=~"4.."}[5m]) > 5 + for: 5m + labels: + severity: warning + annotations: + summary: "High rate of 4xx client errors" + description: "Receiving {{ $value }} client errors per second. Check for broken links or API misuse." diff --git a/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs b/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs index eba5098e..ce52fbf7 100644 --- a/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs +++ b/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs @@ -3,20 +3,64 @@ global: scrape_interval: 15s evaluation_interval: 15s + external_labels: + monitor: '{{composeProject}}' + +{{#if enableMonitoring}} +alerting: + alertmanagers: + - static_configs: + - targets: ['{{containerPrefix}}-alertmanager:9093'] + +rule_files: + - "alerts.yml" +{{/if}} scrape_configs: - job_name: '{{composeProject}}-api' static_configs: - targets: ['{{containerPrefix}}-api:4000'] - metrics_path: /api/metrics + metrics_path: '/api/metrics' + scrape_interval: 10s + scrape_timeout: 5s {{#if enableMedia}} - job_name: '{{composeProject}}-media-api' static_configs: - targets: ['{{containerPrefix}}-media-api:4100'] - metrics_path: /api/metrics + metrics_path: '/api/metrics' {{/if}} - job_name: '{{composeProject}}-redis' static_configs: - targets: ['{{containerPrefix}}-redis-exporter:9121'] + scrape_interval: 15s + +{{#if enableMonitoring}} + - job_name: '{{composeProject}}-cadvisor' + static_configs: + - targets: ['{{containerPrefix}}-cadvisor:8080'] + scrape_interval: 15s + + - job_name: '{{composeProject}}-node' + static_configs: + - targets: ['{{containerPrefix}}-node-exporter:9100'] + scrape_interval: 15s + + - job_name: '{{composeProject}}-prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: '{{composeProject}}-alertmanager' + static_configs: + - targets: ['{{containerPrefix}}-alertmanager:9093'] + scrape_interval: 30s +{{/if}} + +{{#if enableDevTools}} + - job_name: '{{composeProject}}-n8n' + static_configs: + - targets: ['{{containerPrefix}}-n8n:5678'] + metrics_path: '/metrics' + scrape_interval: 30s +{{/if}} diff --git a/changemaker-control-panel/templates/docker-compose.yml.hbs b/changemaker-control-panel/templates/docker-compose.yml.hbs index 2aa9c9ce..0c4b466e 100644 --- a/changemaker-control-panel/templates/docker-compose.yml.hbs +++ b/changemaker-control-panel/templates/docker-compose.yml.hbs @@ -96,10 +96,14 @@ services: {{/if}} ports: - "{{ports.api}}:4000" +{{#if enableListmonk}} + - "9002:9002" +{{/if}} volumes: - ./assets/uploads:/app/uploads - ./mkdocs:/mkdocs:rw - ./data:/data:ro + - ./data/upgrade:/app/upgrade:rw - ./configs:/app/configs:ro networks: - {{networkName}} @@ -800,13 +804,32 @@ services: timeout: 5s retries: 3 + docker-socket-proxy: + image: ghcr.io/tecnativa/docker-socket-proxy:latest + container_name: {{containerPrefix}}-docker-socket-proxy + restart: unless-stopped + environment: + CONTAINERS: 1 + IMAGES: 1 + INFO: 1 + NETWORKS: 0 + VOLUMES: 0 + POST: 0 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - {{networkName}} + homepage: image: ghcr.io/gethomepage/homepage:latest container_name: {{containerPrefix}}-homepage restart: unless-stopped + environment: + DOCKER_HOST: tcp://{{containerPrefix}}-docker-socket-proxy:2375 volumes: - {{containerPrefix}}-homepage-data:/app/config - - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - docker-socket-proxy networks: - {{networkName}} @@ -958,6 +981,50 @@ services: - {{containerPrefix}}-alertmanager-data:/alertmanager networks: - {{networkName}} + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: {{containerPrefix}}-cadvisor + restart: unless-stopped + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + networks: + - {{networkName}} + + node-exporter: + image: prom/node-exporter:latest + container_name: {{containerPrefix}}-node-exporter + restart: unless-stopped + command: + - '--path.rootfs=/host' + volumes: + - /:/host:ro,rslave + networks: + - {{networkName}} + + redis-exporter: + image: oliver006/redis_exporter:latest + container_name: {{containerPrefix}}-redis-exporter + restart: unless-stopped + environment: + REDIS_ADDR: redis://{{containerPrefix}}-redis:6379 + REDIS_PASSWORD: "{{secrets.redisPassword}}" + depends_on: + - redis + networks: + - {{networkName}} + + gotify: + image: gotify/server:latest + container_name: {{containerPrefix}}-gotify + restart: unless-stopped + volumes: + - {{containerPrefix}}-gotify-data:/app/data + networks: + - {{networkName}} {{/if}} # ─── Volumes ────────────────────────────────────────────── @@ -986,6 +1053,7 @@ volumes: {{containerPrefix}}-prometheus-data: {{containerPrefix}}-grafana-data: {{containerPrefix}}-alertmanager-data: + {{containerPrefix}}-gotify-data: {{/if}} {{#if enableMeet}} {{containerPrefix}}-jitsi-web-config: diff --git a/changemaker-control-panel/templates/env.hbs b/changemaker-control-panel/templates/env.hbs index dfb3e8f1..38f110fa 100644 --- a/changemaker-control-panel/templates/env.hbs +++ b/changemaker-control-panel/templates/env.hbs @@ -103,8 +103,10 @@ LISTMONK_SMTP_FROM={{name}} # Media {{#if enableMedia}} ENABLE_MEDIA_FEATURES=true +MEDIA_API_PUBLIC_URL=https://media.{{domain}} {{else}} ENABLE_MEDIA_FEATURES=false +MEDIA_API_PUBLIC_URL= {{/if}} MEDIA_API_PORT=4100 MEDIA_ROOT=/media/local @@ -256,12 +258,11 @@ BASE_DOMAIN=https://{{domain}} # Gitea GITEA_URL=http://{{containerPrefix}}-gitea:3000 GITEA_SSH_PORT=2222 -GITEA_DB_TYPE=mysql -GITEA_DB_HOST={{containerPrefix}}-gitea-db:3306 +GITEA_DB_TYPE=postgres +GITEA_DB_HOST={{containerPrefix}}-postgres:5432 GITEA_DB_NAME=gitea -GITEA_DB_USER=gitea -GITEA_DB_PASSWD={{secrets.giteaAdminPassword}} -GITEA_DB_ROOT_PASSWORD={{secrets.giteaAdminPassword}} +GITEA_DB_USER=changemaker +GITEA_DB_PASSWD={{secrets.postgresPassword}} GITEA_ROOT_URL=https://git.{{domain}} GITEA_DOMAIN=git.{{domain}} GITEA_COMMENTS_ENABLED=false diff --git a/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs b/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs index 2b3d2afd..c5e24d89 100644 --- a/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs +++ b/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs @@ -11,6 +11,65 @@ server { server_name app.{{domain}}; add_header X-Frame-Options "SAMEORIGIN" always; + # Bot detection for OG meta previews + set $is_bot 0; + if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") { + set $is_bot 1; + } + + # Campaign OG meta + location ~ ^/campaign/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Landing page OG meta + location ~ ^/p/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/p/(.*)$ /api/og/page/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + +{{#if enableMedia}} + # Gallery video OG meta + location ~ ^/gallery/watch/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +{{/if}} + location / { set $upstream_admin http://{{containerPrefix}}-admin:3000; proxy_pass $upstream_admin; @@ -367,6 +426,24 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } } + +# Excalidraw +server { + listen 80; + server_name draw.{{domain}}; + + location / { + set $upstream_excalidraw http://{{containerPrefix}}-excalidraw:80; + proxy_pass $upstream_excalidraw; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } +} {{/if}} # Root domain — MkDocs static site @@ -393,6 +470,65 @@ server { server_name localhost _; add_header X-Frame-Options "SAMEORIGIN" always; + # Bot detection for OG meta previews + set $is_bot 0; + if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") { + set $is_bot 1; + } + + # Campaign OG meta + location ~ ^/campaign/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Landing page OG meta + location ~ ^/p/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/p/(.*)$ /api/og/page/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + +{{#if enableMedia}} + # Gallery video OG meta + location ~ ^/gallery/watch/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +{{/if}} + # Admin GUI + Public pages (default) location / { set $upstream_admin http://{{containerPrefix}}-admin:3000;