diff --git a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts index e8fcef51..bc1b7117 100644 --- a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts +++ b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts @@ -62,6 +62,7 @@ export const registerInstanceSchema = z.object({ admin: z.coerce.number().int().min(1).max(65535), postgres: z.coerce.number().int().min(1).max(65535), nginx: z.coerce.number().int().min(1).max(65535), + embed: z.coerce.number().int().min(1).max(65535).optional(), }), adminEmail: z.string().email().optional().default('admin@localhost'), enableMedia: z.boolean().default(false), @@ -75,6 +76,7 @@ export const registerInstanceSchema = z.object({ enableSms: z.boolean().default(false), enableSocial: z.boolean().default(false), enablePeople: z.boolean().default(false), + emailTestMode: z.boolean().default(true), notes: z.string().optional(), }); diff --git a/changemaker-control-panel/api/src/services/discovery.service.ts b/changemaker-control-panel/api/src/services/discovery.service.ts index 779ee81f..d6a6c8e6 100644 --- a/changemaker-control-panel/api/src/services/discovery.service.ts +++ b/changemaker-control-panel/api/src/services/discovery.service.ts @@ -19,7 +19,7 @@ export interface DiscoveredInstance { domain: string; basePath: string; composeProject: string; - portConfig: { api: number; admin: number; postgres: number; nginx: number }; + portConfig: { api: number; admin: number; postgres: number; nginx: number; embed: number }; adminEmail: string; enableMedia: boolean; enableChat: boolean; @@ -78,11 +78,13 @@ async function parseCmlEnv(envPath: string): Promise | nu } function extractPortConfig(envVars: Record): DiscoveredInstance['portConfig'] { + const nginx = parseInt(envVars.NGINX_HTTP_PORT || '80', 10); return { api: parseInt(envVars.API_PORT || '4000', 10), admin: parseInt(envVars.ADMIN_PORT || '3000', 10), postgres: parseInt(envVars.V2_POSTGRES_PORT || '5433', 10), - nginx: parseInt(envVars.NGINX_HTTP_PORT || '80', 10), + nginx, + embed: parseInt(envVars.NOCODB_EMBED_PORT || String(nginx + 1), 10), }; } @@ -385,6 +387,7 @@ export async function autoDiscoverOnStartup(): Promise { enableSms: inst.enableSms, enableSocial: inst.enableSocial, enablePeople: inst.enablePeople, + emailTestMode: inst.emailTestMode, }, userId, 'auto-discovery' diff --git a/changemaker-control-panel/api/src/services/template-engine.ts b/changemaker-control-panel/api/src/services/template-engine.ts index 6d9a318d..e312bafe 100644 --- a/changemaker-control-panel/api/src/services/template-engine.ts +++ b/changemaker-control-panel/api/src/services/template-engine.ts @@ -148,7 +148,7 @@ export function buildTemplateContext( admin: ports.admin, postgres: ports.postgres, nginx: ports.nginx, - embed: ports.embed, + embed: ports.embed || (ports.nginx || 80) + 1, }, secrets: { postgresPassword: secrets.postgresPassword, @@ -233,6 +233,7 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st { template: 'nginx/conf.d/services.conf.hbs', output: 'nginx/conf.d/services.conf' }, { template: 'configs/pangolin/resources.yml.hbs', output: 'configs/pangolin/resources.yml' }, { template: 'configs/prometheus/prometheus.yml.hbs', output: 'configs/prometheus/prometheus.yml' }, + { template: 'configs/grafana/datasources/datasources.yml.hbs', output: 'configs/grafana/datasources/datasources.yml' }, ]; for (const { template, output } of templateFiles) { @@ -253,8 +254,16 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st logger.debug(`Rendered ${template} → ${outputPath}`); } - // Copy static files (nginx.conf doesn't need templating) - const staticFiles = ['nginx/nginx.conf']; + // Copy static files (no templating needed) + const staticFiles = [ + 'nginx/nginx.conf', + 'configs/prometheus/alerts.yml', + 'configs/alertmanager/alertmanager.yml', + 'configs/grafana/dashboards/dashboards.yml', + 'configs/grafana/dashboards/application-overview.json', + 'configs/grafana/dashboards/api-performance.json', + 'configs/grafana/dashboards/system-health.json', + ]; for (const file of staticFiles) { const srcPath = path.join(templatesDir, file); try { diff --git a/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml b/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml new file mode 100644 index 00000000..594ee3df --- /dev/null +++ b/changemaker-control-panel/templates/configs/alertmanager/alertmanager.yml @@ -0,0 +1,112 @@ +global: + resolve_timeout: 5m + # SMTP configuration for email alerts + # Using MailHog for development - update for production: + # smtp_smarthost: 'smtp.example.com:587' + # smtp_auth_username: 'alerts@example.com' + # smtp_auth_password: 'your-password' + # smtp_require_tls: true + smtp_from: 'alerts@changemaker.local' + smtp_smarthost: 'mailhog-changemaker:1025' + smtp_auth_username: '' + smtp_auth_password: '' + smtp_require_tls: false + +# Templates for notification content +templates: + - '/etc/alertmanager/*.tmpl' + +# Route alerts to appropriate receivers based on severity +route: + group_by: ['alertname', 'cluster', 'service'] + group_wait: 10s + group_interval: 10s + repeat_interval: 12h + receiver: 'default' + + routes: + # Critical alerts go to both Gotify and email + - match: + severity: critical + receiver: 'critical-alerts' + group_wait: 0s + group_interval: 5m + repeat_interval: 4h + + # Warning alerts go to Gotify + email + - match: + severity: warning + receiver: 'warning-alerts' + group_wait: 30s + repeat_interval: 12h + + # Info alerts - Gotify with lower priority + - match: + severity: info + receiver: 'info-alerts' + repeat_interval: 24h + +# Alert receivers +receivers: + # Default receiver (catches all unmatched) + - name: 'default' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: '[Changemaker] {{ .GroupLabels.alertname }}' + + # Critical alerts - email + Gotify push + - name: 'critical-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'CRITICAL Alert: {{ .GroupLabels.alertname }}' + html: | +

Critical Alert Triggered

+ {{ range .Alerts }} +

Alert: {{ .Labels.alertname }}

+

Severity: {{ .Labels.severity }}

+

Summary: {{ .Annotations.summary }}

+

Description: {{ .Annotations.description }}

+

Started: {{ .StartsAt }}

+
+ {{ end }} + # Gotify push notifications (configure GOTIFY_APP_TOKEN in .env) + # Uncomment and set the token to enable: + # webhook_configs: + # - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN' + # send_resolved: true + # http_config: + # follow_redirects: true + + # Warning alerts - email + optional Gotify + - name: 'warning-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'Warning: {{ .GroupLabels.alertname }}' + # webhook_configs: + # - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN' + + # Info alerts - email only + - name: 'info-alerts' + email_configs: + - to: 'admin@changemaker.local' + headers: + Subject: 'Info: {{ .GroupLabels.alertname }}' + +# Inhibition rules (prevent spam) +inhibit_rules: + # If a critical alert is firing, suppress related warnings + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'instance'] + + # If disk is critical, suppress disk warning + - source_match: + alertname: 'DiskSpaceCritical' + target_match: + alertname: 'DiskSpaceLow' + equal: ['instance'] diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json b/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json new file mode 100644 index 00000000..ca956ca3 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/api-performance.json @@ -0,0 +1,402 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Request Rate", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "reqps" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 1, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum by(method) (rate(http_requests_total[5m]))", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "Requests/sec by Method", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "5xx Errors" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "4xx Errors" }, + "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 2, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx Errors", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_requests_total{status_code=~\"4..\"}[5m]))", + "legendFormat": "4xx Errors", + "refId": "B" + } + ], + "title": "Error Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 9 }, + "id": 101, + "title": "Latency", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "transparent", "value": null }, + { "color": "red", "value": 1 } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "p99" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "p95" }, + "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "p50" }, + "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 8, "w": 16, "x": 0, "y": 10 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p99", + "refId": "C" + } + ], + "title": "Request Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "super-light-blue", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 10 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "sum(rate(http_request_duration_seconds_sum[5m])) / sum(rate(http_request_duration_seconds_count[5m]))", + "legendFormat": "Avg Latency", + "refId": "A" + } + ], + "title": "Average Request Latency", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, + "id": 102, + "title": "Top Endpoints", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Value" }, + "properties": [ + { "id": "displayName", "value": "Requests (5m)" }, + { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } }, + { "id": "color", "value": { "mode": "continuous-BlYlRd" } } + ] + }, + { + "matcher": { "id": "byName", "options": "route" }, + "properties": [ + { "id": "displayName", "value": "Route" }, + { "id": "custom.width", "value": 300 } + ] + }, + { + "matcher": { "id": "byName", "options": "method" }, + "properties": [ + { "id": "displayName", "value": "Method" }, + { "id": "custom.width", "value": 80 } + ] + } + ] + }, + "gridPos": { "h": 9, "w": 12, "x": 0, "y": 19 }, + "id": 5, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "Requests (5m)" }] + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "topk(15, sum by(method, route) (increase(http_requests_total[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top Routes by Request Count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { "Time": true }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.5 }, + { "color": "red", "value": 1 } + ] + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Value" }, + "properties": [ + { "id": "displayName", "value": "p95 Latency (s)" }, + { "id": "unit", "value": "s" }, + { "id": "decimals", "value": 3 }, + { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } }, + { "id": "color", "value": { "mode": "continuous-GrYlRd" } } + ] + }, + { + "matcher": { "id": "byName", "options": "route" }, + "properties": [ + { "id": "displayName", "value": "Route" }, + { "id": "custom.width", "value": 300 } + ] + }, + { + "matcher": { "id": "byName", "options": "method" }, + "properties": [ + { "id": "displayName", "value": "Method" }, + { "id": "custom.width", "value": 80 } + ] + } + ] + }, + "gridPos": { "h": 9, "w": 12, "x": 12, "y": 19 }, + "id": 6, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "p95 Latency (s)" }] + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "topk(15, histogram_quantile(0.95, sum by(method, route, le) (rate(http_request_duration_seconds_bucket[5m]))))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Slowest Routes by p95 Latency", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { "Time": true }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "api", "performance"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - API Performance", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json b/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json new file mode 100644 index 00000000..2622ea45 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/application-overview.json @@ -0,0 +1,533 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": "changemaker-overview", + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Email System", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "red", "value": 100 } + ] + } + } + }, + "gridPos": { "h": 6, "w": 6, "x": 0, "y": 1 }, + "id": 1, + "options": { + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_email_queue_size", + "legendFormat": "Queue Size", + "refId": "A" + } + ], + "title": "Email Queue Size", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 6, "y": 1 }, + "id": 2, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_emails_sent_total[5m])", + "legendFormat": "Sent/sec", + "refId": "A" + } + ], + "title": "Emails Sent Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "red", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 12, "y": 1 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_emails_failed_total[5m])", + "legendFormat": "Failed/sec", + "refId": "A" + } + ], + "title": "Emails Failed Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "purple", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "s" + } + }, + "gridPos": { "h": 6, "w": 6, "x": 18, "y": 1 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "histogram_quantile(0.95, rate(cm_email_send_duration_seconds_bucket[5m]))", + "legendFormat": "p95 Send Duration", + "refId": "A" + } + ], + "title": "Email Send Duration (p95)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, + "id": 101, + "title": "Authentication", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Failures" }, + "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] + }, + { + "matcher": { "id": "byName", "options": "Successes" }, + "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] + } + ] + }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 }, + "id": 5, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_login_attempts_total{result=\"success\"}[5m])", + "legendFormat": "Successes", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_login_attempts_total{result=\"failure\"}[5m])", + "legendFormat": "Failures", + "refId": "B" + } + ], + "title": "Login Attempts Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + } + }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_active_sessions", + "legendFormat": "Active Sessions", + "refId": "A" + } + ], + "title": "Active Sessions", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "title": "Campaigns & Responses", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 15 }, + "id": 7, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_campaign_emails_total[5m])", + "legendFormat": "Campaign Emails/sec", + "refId": "A" + } + ], + "title": "Campaign Email Sends Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 15 }, + "id": 8, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_response_submissions_total[5m])", + "legendFormat": "Responses/sec", + "refId": "A" + } + ], + "title": "Response Submissions Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, + "id": 103, + "title": "Canvassing", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + } + }, + "gridPos": { "h": 6, "w": 6, "x": 0, "y": 22 }, + "id": 9, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_active_canvass_sessions", + "legendFormat": "Active Sessions", + "refId": "A" + } + ], + "title": "Active Canvass Sessions", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 10, "x": 6, "y": 22 }, + "id": 10, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_canvass_visits_total[5m])", + "legendFormat": "{{outcome}}", + "refId": "A" + } + ], + "title": "Canvass Visits by Outcome", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "fixedColor": "super-light-blue", "mode": "fixed" }, + "custom": { + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "ops" + } + }, + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 22 }, + "id": 11, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(cm_shift_signups_total[5m])", + "legendFormat": "Signups/sec", + "refId": "A" + } + ], + "title": "Shift Signups Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }, + "id": 104, + "title": "External Services", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 5, "w": 24, "x": 0, "y": 29 }, + "id": 12, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "cm_external_service_up", + "legendFormat": "{{service}}", + "refId": "A" + } + ], + "title": "External Service Health", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "application"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - Application Overview", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml b/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml new file mode 100644 index 00000000..be165c4b --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json b/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json new file mode 100644 index 00000000..8faa7e29 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/dashboards/system-health.json @@ -0,0 +1,415 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "uid": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Service Status", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "up{job=\"api\"}", + "legendFormat": "API", + "refId": "A" + } + ], + "title": "API Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 8, "y": 1 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "redis_up", + "legendFormat": "Redis", + "refId": "A" + } + ], + "title": "Redis Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 1 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "up{job=\"n8n\"}", + "legendFormat": "n8n", + "refId": "A" + } + ], + "title": "N8N Status", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "title": "System Resources", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "legendFormat": "CPU Usage %", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", + "legendFormat": "Memory Usage %", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "line" } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 75 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percent" + } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", + "legendFormat": "Disk Usage %", + "refId": "A" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "title": "Container Resources", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "percentunit" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 7, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "unit": "bytes" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 8, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Container Memory Usage", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["changemaker", "v2", "system-health"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Changemaker - System Health", + "version": 1 +} diff --git a/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs b/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs new file mode 100644 index 00000000..edc59135 --- /dev/null +++ b/changemaker-control-panel/templates/configs/grafana/datasources/datasources.yml.hbs @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://{{containerPrefix}}-prometheus:9090 + isDefault: true + editable: true + jsonData: + timeInterval: 15s diff --git a/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs b/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs index e45b9a5d..12cbfdd6 100644 --- a/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs +++ b/changemaker-control-panel/templates/configs/pangolin/resources.yml.hbs @@ -108,4 +108,9 @@ resources: subdomain: vault target: http://{{containerPrefix}}-nginx:80 isBaseDomain: false + + - name: draw + subdomain: draw + target: http://{{containerPrefix}}-nginx:80 + isBaseDomain: false {{/if}} diff --git a/changemaker-control-panel/templates/configs/prometheus/alerts.yml b/changemaker-control-panel/templates/configs/prometheus/alerts.yml new file mode 100644 index 00000000..ba0a94a4 --- /dev/null +++ b/changemaker-control-panel/templates/configs/prometheus/alerts.yml @@ -0,0 +1,215 @@ +groups: + - name: v2_app_alerts + interval: 30s + rules: + # Application availability + - alert: ApplicationDown + expr: up{job="changemaker-v2-api"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "V2 API is down" + description: "The Changemaker V2 API has been down for more than 2 minutes." + + # High error rate + - alert: HighErrorRate + expr: rate(http_requests_total{status_code=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "High error rate detected" + description: "Application is experiencing {{ $value }} errors per second." + + # Email queue backing up + - alert: EmailQueueBacklog + expr: cm_email_queue_size > 100 + for: 10m + labels: + severity: warning + annotations: + summary: "Email queue has significant backlog" + description: "Email queue size is {{ $value }}, emails may be delayed." + + # High email failure rate + - alert: HighEmailFailureRate + expr: rate(cm_emails_failed_total[5m]) / rate(cm_emails_sent_total[5m]) > 0.2 + for: 10m + labels: + severity: warning + annotations: + summary: "High email failure rate" + description: "{{ $value | humanizePercentage }} of emails are failing to send." + + # Failed login attempts spike + - alert: SuspiciousLoginActivity + expr: rate(cm_login_attempts_total{status="failure"}[5m]) > 5 + for: 2m + labels: + severity: warning + annotations: + summary: "Suspicious login activity detected" + description: "{{ $value }} failed login attempts per second detected." + + # High API latency + - alert: HighAPILatency + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "High API latency" + description: "95th percentile latency is {{ $value }}s for {{ $labels.route }}." + + # External service down + - alert: ExternalServiceDown + expr: cm_external_service_up == 0 + for: 5m + labels: + severity: warning + annotations: + summary: "External service {{ $labels.service }} is down" + description: "Service {{ $labels.service }} has been unreachable for 5 minutes." + + # System health alerts + - name: system_alerts + interval: 30s + rules: + # Redis down + - alert: RedisDown + expr: redis_up == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Redis cache is down" + description: "Redis has been down for more than 1 minute. Caching and session management will fail." + + # Disk space running low + - alert: DiskSpaceLow + expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15 + for: 5m + labels: + severity: warning + annotations: + summary: "Disk space is running low" + description: "Only {{ $value | humanizePercentage }} disk space remaining on root filesystem." + + # Disk space critical + - alert: DiskSpaceCritical + expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.10 + for: 2m + labels: + severity: critical + annotations: + summary: "CRITICAL: Disk space nearly exhausted" + description: "Only {{ $value | humanizePercentage }} disk space remaining! System may fail soon." + + # High CPU usage + - alert: HighCPUUsage + expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85 + for: 10m + labels: + severity: warning + annotations: + summary: "High CPU usage detected" + description: "CPU usage is {{ $value }}% on {{ $labels.instance }}." + + # Memory usage high + - alert: HighMemoryUsage + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > 0.85 + for: 10m + labels: + severity: warning + annotations: + summary: "High memory usage" + description: "Memory usage is above 85% ({{ $value | humanizePercentage }})." + + # Container CPU throttling (only Docker containers) + - alert: ContainerCPUThrottling + expr: rate(container_cpu_cfs_throttled_seconds_total{name!=""}[5m]) > 0.5 + for: 5m + labels: + severity: warning + annotations: + summary: "Container is being CPU throttled" + description: "Container {{ $labels.name }} is experiencing CPU throttling." + + # Container memory usage high (only Docker containers with memory limits) + - alert: ContainerMemoryHigh + expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) > 0.90 and container_spec_memory_limit_bytes{name!=""} > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Container memory usage is high" + description: "Container {{ $labels.name }} is using {{ $value | humanizePercentage }} of its memory limit." + + # Infrastructure alerts + - name: infrastructure_alerts + interval: 30s + rules: + # Prometheus scrape failures + - alert: PrometheusScrapeFailures + expr: rate(prometheus_target_scrapes_failed_total[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "Prometheus scrape failures detected" + description: "Prometheus is failing to scrape {{ $labels.job }} target." + + # Prometheus configuration reload failure + - alert: PrometheusConfigReloadFailed + expr: prometheus_config_last_reload_successful == 0 + for: 1m + labels: + severity: warning + annotations: + summary: "Prometheus configuration reload failed" + description: "Prometheus failed to reload its configuration. Check prometheus logs." + + # Alertmanager down + - alert: AlertmanagerDown + expr: up{job="alertmanager"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Alertmanager is down" + description: "Alertmanager has been down for 2 minutes. Alerts will not be delivered!" + + # Security alerts + - name: security_alerts + interval: 15s + rules: + # Possible DDoS attack + - alert: PossibleDDoSAttack + expr: rate(http_requests_total[1m]) > 1000 + for: 2m + labels: + severity: critical + annotations: + summary: "Possible DDoS attack detected" + description: "Receiving {{ $value }} requests per second for 2 minutes. This may be a DDoS attack." + + # Sustained high traffic + - alert: SustainedHighTraffic + expr: rate(http_requests_total[5m]) > 500 + for: 10m + labels: + severity: warning + annotations: + summary: "Sustained high traffic detected" + description: "Receiving {{ $value }} requests per second for 10 minutes. Monitor for performance issues." + + # Too many 4xx errors + - alert: HighClientErrorRate + expr: rate(http_requests_total{status_code=~"4.."}[5m]) > 5 + for: 5m + labels: + severity: warning + annotations: + summary: "High rate of 4xx client errors" + description: "Receiving {{ $value }} client errors per second. Check for broken links or API misuse." diff --git a/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs b/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs index eba5098e..ce52fbf7 100644 --- a/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs +++ b/changemaker-control-panel/templates/configs/prometheus/prometheus.yml.hbs @@ -3,20 +3,64 @@ global: scrape_interval: 15s evaluation_interval: 15s + external_labels: + monitor: '{{composeProject}}' + +{{#if enableMonitoring}} +alerting: + alertmanagers: + - static_configs: + - targets: ['{{containerPrefix}}-alertmanager:9093'] + +rule_files: + - "alerts.yml" +{{/if}} scrape_configs: - job_name: '{{composeProject}}-api' static_configs: - targets: ['{{containerPrefix}}-api:4000'] - metrics_path: /api/metrics + metrics_path: '/api/metrics' + scrape_interval: 10s + scrape_timeout: 5s {{#if enableMedia}} - job_name: '{{composeProject}}-media-api' static_configs: - targets: ['{{containerPrefix}}-media-api:4100'] - metrics_path: /api/metrics + metrics_path: '/api/metrics' {{/if}} - job_name: '{{composeProject}}-redis' static_configs: - targets: ['{{containerPrefix}}-redis-exporter:9121'] + scrape_interval: 15s + +{{#if enableMonitoring}} + - job_name: '{{composeProject}}-cadvisor' + static_configs: + - targets: ['{{containerPrefix}}-cadvisor:8080'] + scrape_interval: 15s + + - job_name: '{{composeProject}}-node' + static_configs: + - targets: ['{{containerPrefix}}-node-exporter:9100'] + scrape_interval: 15s + + - job_name: '{{composeProject}}-prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: '{{composeProject}}-alertmanager' + static_configs: + - targets: ['{{containerPrefix}}-alertmanager:9093'] + scrape_interval: 30s +{{/if}} + +{{#if enableDevTools}} + - job_name: '{{composeProject}}-n8n' + static_configs: + - targets: ['{{containerPrefix}}-n8n:5678'] + metrics_path: '/metrics' + scrape_interval: 30s +{{/if}} diff --git a/changemaker-control-panel/templates/docker-compose.yml.hbs b/changemaker-control-panel/templates/docker-compose.yml.hbs index 2aa9c9ce..0c4b466e 100644 --- a/changemaker-control-panel/templates/docker-compose.yml.hbs +++ b/changemaker-control-panel/templates/docker-compose.yml.hbs @@ -96,10 +96,14 @@ services: {{/if}} ports: - "{{ports.api}}:4000" +{{#if enableListmonk}} + - "9002:9002" +{{/if}} volumes: - ./assets/uploads:/app/uploads - ./mkdocs:/mkdocs:rw - ./data:/data:ro + - ./data/upgrade:/app/upgrade:rw - ./configs:/app/configs:ro networks: - {{networkName}} @@ -800,13 +804,32 @@ services: timeout: 5s retries: 3 + docker-socket-proxy: + image: ghcr.io/tecnativa/docker-socket-proxy:latest + container_name: {{containerPrefix}}-docker-socket-proxy + restart: unless-stopped + environment: + CONTAINERS: 1 + IMAGES: 1 + INFO: 1 + NETWORKS: 0 + VOLUMES: 0 + POST: 0 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - {{networkName}} + homepage: image: ghcr.io/gethomepage/homepage:latest container_name: {{containerPrefix}}-homepage restart: unless-stopped + environment: + DOCKER_HOST: tcp://{{containerPrefix}}-docker-socket-proxy:2375 volumes: - {{containerPrefix}}-homepage-data:/app/config - - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - docker-socket-proxy networks: - {{networkName}} @@ -958,6 +981,50 @@ services: - {{containerPrefix}}-alertmanager-data:/alertmanager networks: - {{networkName}} + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: {{containerPrefix}}-cadvisor + restart: unless-stopped + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + networks: + - {{networkName}} + + node-exporter: + image: prom/node-exporter:latest + container_name: {{containerPrefix}}-node-exporter + restart: unless-stopped + command: + - '--path.rootfs=/host' + volumes: + - /:/host:ro,rslave + networks: + - {{networkName}} + + redis-exporter: + image: oliver006/redis_exporter:latest + container_name: {{containerPrefix}}-redis-exporter + restart: unless-stopped + environment: + REDIS_ADDR: redis://{{containerPrefix}}-redis:6379 + REDIS_PASSWORD: "{{secrets.redisPassword}}" + depends_on: + - redis + networks: + - {{networkName}} + + gotify: + image: gotify/server:latest + container_name: {{containerPrefix}}-gotify + restart: unless-stopped + volumes: + - {{containerPrefix}}-gotify-data:/app/data + networks: + - {{networkName}} {{/if}} # ─── Volumes ────────────────────────────────────────────── @@ -986,6 +1053,7 @@ volumes: {{containerPrefix}}-prometheus-data: {{containerPrefix}}-grafana-data: {{containerPrefix}}-alertmanager-data: + {{containerPrefix}}-gotify-data: {{/if}} {{#if enableMeet}} {{containerPrefix}}-jitsi-web-config: diff --git a/changemaker-control-panel/templates/env.hbs b/changemaker-control-panel/templates/env.hbs index dfb3e8f1..38f110fa 100644 --- a/changemaker-control-panel/templates/env.hbs +++ b/changemaker-control-panel/templates/env.hbs @@ -103,8 +103,10 @@ LISTMONK_SMTP_FROM={{name}} # Media {{#if enableMedia}} ENABLE_MEDIA_FEATURES=true +MEDIA_API_PUBLIC_URL=https://media.{{domain}} {{else}} ENABLE_MEDIA_FEATURES=false +MEDIA_API_PUBLIC_URL= {{/if}} MEDIA_API_PORT=4100 MEDIA_ROOT=/media/local @@ -256,12 +258,11 @@ BASE_DOMAIN=https://{{domain}} # Gitea GITEA_URL=http://{{containerPrefix}}-gitea:3000 GITEA_SSH_PORT=2222 -GITEA_DB_TYPE=mysql -GITEA_DB_HOST={{containerPrefix}}-gitea-db:3306 +GITEA_DB_TYPE=postgres +GITEA_DB_HOST={{containerPrefix}}-postgres:5432 GITEA_DB_NAME=gitea -GITEA_DB_USER=gitea -GITEA_DB_PASSWD={{secrets.giteaAdminPassword}} -GITEA_DB_ROOT_PASSWORD={{secrets.giteaAdminPassword}} +GITEA_DB_USER=changemaker +GITEA_DB_PASSWD={{secrets.postgresPassword}} GITEA_ROOT_URL=https://git.{{domain}} GITEA_DOMAIN=git.{{domain}} GITEA_COMMENTS_ENABLED=false diff --git a/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs b/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs index 2b3d2afd..c5e24d89 100644 --- a/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs +++ b/changemaker-control-panel/templates/nginx/conf.d/default.conf.hbs @@ -11,6 +11,65 @@ server { server_name app.{{domain}}; add_header X-Frame-Options "SAMEORIGIN" always; + # Bot detection for OG meta previews + set $is_bot 0; + if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") { + set $is_bot 1; + } + + # Campaign OG meta + location ~ ^/campaign/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Landing page OG meta + location ~ ^/p/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/p/(.*)$ /api/og/page/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + +{{#if enableMedia}} + # Gallery video OG meta + location ~ ^/gallery/watch/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +{{/if}} + location / { set $upstream_admin http://{{containerPrefix}}-admin:3000; proxy_pass $upstream_admin; @@ -367,6 +426,24 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } } + +# Excalidraw +server { + listen 80; + server_name draw.{{domain}}; + + location / { + set $upstream_excalidraw http://{{containerPrefix}}-excalidraw:80; + proxy_pass $upstream_excalidraw; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } +} {{/if}} # Root domain — MkDocs static site @@ -393,6 +470,65 @@ server { server_name localhost _; add_header X-Frame-Options "SAMEORIGIN" always; + # Bot detection for OG meta previews + set $is_bot 0; + if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") { + set $is_bot 1; + } + + # Campaign OG meta + location ~ ^/campaign/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Landing page OG meta + location ~ ^/p/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/p/(.*)$ /api/og/page/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + +{{#if enableMedia}} + # Gallery video OG meta + location ~ ^/gallery/watch/([^/]+)$ { + if ($is_bot = 1) { + rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break; + proxy_pass http://{{containerPrefix}}-api:4000; + } + set $upstream_admin http://{{containerPrefix}}-admin:3000; + proxy_pass $upstream_admin; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +{{/if}} + # Admin GUI + Public pages (default) location / { set $upstream_admin http://{{containerPrefix}}-admin:3000;