Sync CCP templates with production configs for complete instance provisioning

Closes 12 template drift gaps between the Control Panel templates and
production configs. New instances now provision with full monitoring
(alerts fire properly), correct Gitea DB type (postgres not mysql),
social sharing previews (OG meta bot routes), Excalidraw subdomain
routing, docker-socket-proxy for Homepage, and complete Grafana/
Alertmanager/Prometheus config copying.

Key changes:
- Rewrite Prometheus template: add alerting, rule_files, 5 scrape jobs
- Add cAdvisor, node-exporter, redis-exporter, gotify, docker-socket-proxy
- Fix Gitea env from mysql to postgres to match docker-compose
- Add OG bot detection + rewrite routes for campaigns/pages/gallery
- Add Excalidraw nginx server block + Pangolin draw subdomain
- Add embed port to discovery portConfig + emailTestMode to registration
- Copy alerts.yml, alertmanager.yml, Grafana dashboards to templates
- Add Listmonk proxy port and upgrade volume to API service

Bunker Admin
This commit is contained in:
bunker-admin 2026-03-05 08:32:49 -07:00
parent da3e43fcf7
commit 5642a24c8f
15 changed files with 1981 additions and 13 deletions

View File

@ -62,6 +62,7 @@ export const registerInstanceSchema = z.object({
admin: z.coerce.number().int().min(1).max(65535),
postgres: z.coerce.number().int().min(1).max(65535),
nginx: z.coerce.number().int().min(1).max(65535),
embed: z.coerce.number().int().min(1).max(65535).optional(),
}),
adminEmail: z.string().email().optional().default('admin@localhost'),
enableMedia: z.boolean().default(false),
@ -75,6 +76,7 @@ export const registerInstanceSchema = z.object({
enableSms: z.boolean().default(false),
enableSocial: z.boolean().default(false),
enablePeople: z.boolean().default(false),
emailTestMode: z.boolean().default(true),
notes: z.string().optional(),
});

View File

@ -19,7 +19,7 @@ export interface DiscoveredInstance {
domain: string;
basePath: string;
composeProject: string;
portConfig: { api: number; admin: number; postgres: number; nginx: number };
portConfig: { api: number; admin: number; postgres: number; nginx: number; embed: number };
adminEmail: string;
enableMedia: boolean;
enableChat: boolean;
@ -78,11 +78,13 @@ async function parseCmlEnv(envPath: string): Promise<Record<string, string> | nu
}
function extractPortConfig(envVars: Record<string, string>): DiscoveredInstance['portConfig'] {
const nginx = parseInt(envVars.NGINX_HTTP_PORT || '80', 10);
return {
api: parseInt(envVars.API_PORT || '4000', 10),
admin: parseInt(envVars.ADMIN_PORT || '3000', 10),
postgres: parseInt(envVars.V2_POSTGRES_PORT || '5433', 10),
nginx: parseInt(envVars.NGINX_HTTP_PORT || '80', 10),
nginx,
embed: parseInt(envVars.NOCODB_EMBED_PORT || String(nginx + 1), 10),
};
}
@ -385,6 +387,7 @@ export async function autoDiscoverOnStartup(): Promise<void> {
enableSms: inst.enableSms,
enableSocial: inst.enableSocial,
enablePeople: inst.enablePeople,
emailTestMode: inst.emailTestMode,
},
userId,
'auto-discovery'

View File

@ -148,7 +148,7 @@ export function buildTemplateContext(
admin: ports.admin,
postgres: ports.postgres,
nginx: ports.nginx,
embed: ports.embed,
embed: ports.embed || (ports.nginx || 80) + 1,
},
secrets: {
postgresPassword: secrets.postgresPassword,
@ -233,6 +233,7 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st
{ template: 'nginx/conf.d/services.conf.hbs', output: 'nginx/conf.d/services.conf' },
{ template: 'configs/pangolin/resources.yml.hbs', output: 'configs/pangolin/resources.yml' },
{ template: 'configs/prometheus/prometheus.yml.hbs', output: 'configs/prometheus/prometheus.yml' },
{ template: 'configs/grafana/datasources/datasources.yml.hbs', output: 'configs/grafana/datasources/datasources.yml' },
];
for (const { template, output } of templateFiles) {
@ -253,8 +254,16 @@ export async function renderAllTemplates(context: TemplateContext, outputDir: st
logger.debug(`Rendered ${template}${outputPath}`);
}
// Copy static files (nginx.conf doesn't need templating)
const staticFiles = ['nginx/nginx.conf'];
// Copy static files (no templating needed)
const staticFiles = [
'nginx/nginx.conf',
'configs/prometheus/alerts.yml',
'configs/alertmanager/alertmanager.yml',
'configs/grafana/dashboards/dashboards.yml',
'configs/grafana/dashboards/application-overview.json',
'configs/grafana/dashboards/api-performance.json',
'configs/grafana/dashboards/system-health.json',
];
for (const file of staticFiles) {
const srcPath = path.join(templatesDir, file);
try {

View File

@ -0,0 +1,112 @@
global:
resolve_timeout: 5m
# SMTP configuration for email alerts
# Using MailHog for development - update for production:
# smtp_smarthost: 'smtp.example.com:587'
# smtp_auth_username: 'alerts@example.com'
# smtp_auth_password: 'your-password'
# smtp_require_tls: true
smtp_from: 'alerts@changemaker.local'
smtp_smarthost: 'mailhog-changemaker:1025'
smtp_auth_username: ''
smtp_auth_password: ''
smtp_require_tls: false
# Templates for notification content
templates:
- '/etc/alertmanager/*.tmpl'
# Route alerts to appropriate receivers based on severity
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
receiver: 'default'
routes:
# Critical alerts go to both Gotify and email
- match:
severity: critical
receiver: 'critical-alerts'
group_wait: 0s
group_interval: 5m
repeat_interval: 4h
# Warning alerts go to Gotify + email
- match:
severity: warning
receiver: 'warning-alerts'
group_wait: 30s
repeat_interval: 12h
# Info alerts - Gotify with lower priority
- match:
severity: info
receiver: 'info-alerts'
repeat_interval: 24h
# Alert receivers
receivers:
# Default receiver (catches all unmatched)
- name: 'default'
email_configs:
- to: 'admin@changemaker.local'
headers:
Subject: '[Changemaker] {{ .GroupLabels.alertname }}'
# Critical alerts - email + Gotify push
- name: 'critical-alerts'
email_configs:
- to: 'admin@changemaker.local'
headers:
Subject: 'CRITICAL Alert: {{ .GroupLabels.alertname }}'
html: |
<h2 style="color: #d32f2f;">Critical Alert Triggered</h2>
{{ range .Alerts }}
<p><strong>Alert:</strong> {{ .Labels.alertname }}</p>
<p><strong>Severity:</strong> {{ .Labels.severity }}</p>
<p><strong>Summary:</strong> {{ .Annotations.summary }}</p>
<p><strong>Description:</strong> {{ .Annotations.description }}</p>
<p><strong>Started:</strong> {{ .StartsAt }}</p>
<hr>
{{ end }}
# Gotify push notifications (configure GOTIFY_APP_TOKEN in .env)
# Uncomment and set the token to enable:
# webhook_configs:
# - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN'
# send_resolved: true
# http_config:
# follow_redirects: true
# Warning alerts - email + optional Gotify
- name: 'warning-alerts'
email_configs:
- to: 'admin@changemaker.local'
headers:
Subject: 'Warning: {{ .GroupLabels.alertname }}'
# webhook_configs:
# - url: 'http://gotify-changemaker:80/message?token=YOUR_GOTIFY_APP_TOKEN'
# Info alerts - email only
- name: 'info-alerts'
email_configs:
- to: 'admin@changemaker.local'
headers:
Subject: 'Info: {{ .GroupLabels.alertname }}'
# Inhibition rules (prevent spam)
inhibit_rules:
# If a critical alert is firing, suppress related warnings
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']
# If disk is critical, suppress disk warning
- source_match:
alertname: 'DiskSpaceCritical'
target_match:
alertname: 'DiskSpaceLow'
equal: ['instance']

View File

@ -0,0 +1,402 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"uid": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"title": "Request Rate",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "reqps"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
"id": 1,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "sum by(method) (rate(http_requests_total[5m]))",
"legendFormat": "{{method}}",
"refId": "A"
}
],
"title": "Requests/sec by Method",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "reqps"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "5xx Errors" },
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
},
{
"matcher": { "id": "byName", "options": "4xx Errors" },
"properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
}
]
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
"id": 2,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))",
"legendFormat": "5xx Errors",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "sum(rate(http_requests_total{status_code=~\"4..\"}[5m]))",
"legendFormat": "4xx Errors",
"refId": "B"
}
],
"title": "Error Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 9 },
"id": 101,
"title": "Latency",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "transparent", "value": null },
{ "color": "red", "value": 1 }
]
},
"unit": "s"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "p99" },
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
},
{
"matcher": { "id": "byName", "options": "p95" },
"properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
},
{
"matcher": { "id": "byName", "options": "p50" },
"properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
}
]
},
"gridPos": { "h": 8, "w": 16, "x": 0, "y": 10 },
"id": 3,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"title": "Request Latency Percentiles",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "fixedColor": "super-light-blue", "mode": "fixed" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s"
}
},
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 10 },
"id": 4,
"options": {
"legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "sum(rate(http_request_duration_seconds_sum[5m])) / sum(rate(http_request_duration_seconds_count[5m]))",
"legendFormat": "Avg Latency",
"refId": "A"
}
],
"title": "Average Request Latency",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 },
"id": 102,
"title": "Top Endpoints",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"inspect": false
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Requests (5m)" },
{ "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } },
{ "id": "color", "value": { "mode": "continuous-BlYlRd" } }
]
},
{
"matcher": { "id": "byName", "options": "route" },
"properties": [
{ "id": "displayName", "value": "Route" },
{ "id": "custom.width", "value": 300 }
]
},
{
"matcher": { "id": "byName", "options": "method" },
"properties": [
{ "id": "displayName", "value": "Method" },
{ "id": "custom.width", "value": 80 }
]
}
]
},
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 19 },
"id": 5,
"options": {
"cellHeight": "sm",
"footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false },
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "Requests (5m)" }]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "topk(15, sum by(method, route) (increase(http_requests_total[5m])))",
"format": "table",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Top Routes by Request Count",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": { "Time": true },
"renameByName": {}
}
}
],
"type": "table"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"inspect": false
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "p95 Latency (s)" },
{ "id": "unit", "value": "s" },
{ "id": "decimals", "value": 3 },
{ "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } },
{ "id": "color", "value": { "mode": "continuous-GrYlRd" } }
]
},
{
"matcher": { "id": "byName", "options": "route" },
"properties": [
{ "id": "displayName", "value": "Route" },
{ "id": "custom.width", "value": 300 }
]
},
{
"matcher": { "id": "byName", "options": "method" },
"properties": [
{ "id": "displayName", "value": "Method" },
{ "id": "custom.width", "value": 80 }
]
}
]
},
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 19 },
"id": 6,
"options": {
"cellHeight": "sm",
"footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false },
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "p95 Latency (s)" }]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "topk(15, histogram_quantile(0.95, sum by(method, route, le) (rate(http_request_duration_seconds_bucket[5m]))))",
"format": "table",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Slowest Routes by p95 Latency",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": { "Time": true },
"renameByName": {}
}
}
],
"type": "table"
}
],
"refresh": "30s",
"schemaVersion": 38,
"tags": ["changemaker", "v2", "api", "performance"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Changemaker - API Performance",
"version": 1
}

View File

@ -0,0 +1,533 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"uid": "changemaker-overview",
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"title": "Email System",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 50 },
{ "color": "red", "value": 100 }
]
}
}
},
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 1 },
"id": 1,
"options": {
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "cm_email_queue_size",
"legendFormat": "Queue Size",
"refId": "A"
}
],
"title": "Email Queue Size",
"type": "gauge"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 6, "x": 6, "y": 1 },
"id": 2,
"options": {
"legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_emails_sent_total[5m])",
"legendFormat": "Sent/sec",
"refId": "A"
}
],
"title": "Emails Sent Rate",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "fixedColor": "red", "mode": "fixed" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 6, "x": 12, "y": 1 },
"id": 3,
"options": {
"legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_emails_failed_total[5m])",
"legendFormat": "Failed/sec",
"refId": "A"
}
],
"title": "Emails Failed Rate",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "fixedColor": "purple", "mode": "fixed" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s"
}
},
"gridPos": { "h": 6, "w": 6, "x": 18, "y": 1 },
"id": 4,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "histogram_quantile(0.95, rate(cm_email_send_duration_seconds_bucket[5m]))",
"legendFormat": "p95 Send Duration",
"refId": "A"
}
],
"title": "Email Send Duration (p95)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
"id": 101,
"title": "Authentication",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "bars",
"fillOpacity": 80,
"gradientMode": "none",
"lineWidth": 1,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Failures" },
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
},
{
"matcher": { "id": "byName", "options": "Successes" },
"properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
}
]
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 },
"id": 5,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_login_attempts_total{result=\"success\"}[5m])",
"legendFormat": "Successes",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_login_attempts_total{result=\"failure\"}[5m])",
"legendFormat": "Failures",
"refId": "B"
}
],
"title": "Login Attempts Rate",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "blue", "value": null }
]
}
}
},
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 },
"id": 6,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "cm_active_sessions",
"legendFormat": "Active Sessions",
"refId": "A"
}
],
"title": "Active Sessions",
"type": "stat"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 102,
"title": "Campaigns & Responses",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 15 },
"id": 7,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_campaign_emails_total[5m])",
"legendFormat": "Campaign Emails/sec",
"refId": "A"
}
],
"title": "Campaign Email Sends Rate",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 15 },
"id": 8,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_response_submissions_total[5m])",
"legendFormat": "Responses/sec",
"refId": "A"
}
],
"title": "Response Submissions Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
"id": 103,
"title": "Canvassing",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
}
},
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 22 },
"id": 9,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "cm_active_canvass_sessions",
"legendFormat": "Active Sessions",
"refId": "A"
}
],
"title": "Active Canvass Sessions",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "bars",
"fillOpacity": 80,
"gradientMode": "none",
"lineWidth": 1,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 10, "x": 6, "y": 22 },
"id": 10,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_canvass_visits_total[5m])",
"legendFormat": "{{outcome}}",
"refId": "A"
}
],
"title": "Canvass Visits by Outcome",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "fixedColor": "super-light-blue", "mode": "fixed" },
"custom": {
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops"
}
},
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 22 },
"id": 11,
"options": {
"legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(cm_shift_signups_total[5m])",
"legendFormat": "Signups/sec",
"refId": "A"
}
],
"title": "Shift Signups Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 },
"id": 104,
"title": "External Services",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [
{
"options": {
"0": { "color": "red", "index": 1, "text": "DOWN" },
"1": { "color": "green", "index": 0, "text": "UP" }
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
}
},
"gridPos": { "h": 5, "w": 24, "x": 0, "y": 29 },
"id": 12,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "cm_external_service_up",
"legendFormat": "{{service}}",
"refId": "A"
}
],
"title": "External Service Health",
"type": "stat"
}
],
"refresh": "30s",
"schemaVersion": 38,
"tags": ["changemaker", "v2", "application"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Changemaker - Application Overview",
"version": 1
}

View File

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@ -0,0 +1,415 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"uid": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"title": "Service Status",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [
{
"options": {
"0": { "color": "red", "index": 1, "text": "DOWN" },
"1": { "color": "green", "index": 0, "text": "UP" }
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
}
},
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 1 },
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "up{job=\"api\"}",
"legendFormat": "API",
"refId": "A"
}
],
"title": "API Status",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [
{
"options": {
"0": { "color": "red", "index": 1, "text": "DOWN" },
"1": { "color": "green", "index": 0, "text": "UP" }
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
}
},
"gridPos": { "h": 4, "w": 8, "x": 8, "y": 1 },
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "redis_up",
"legendFormat": "Redis",
"refId": "A"
}
],
"title": "Redis Status",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [
{
"options": {
"0": { "color": "red", "index": 1, "text": "DOWN" },
"1": { "color": "green", "index": 0, "text": "UP" }
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
}
},
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 1 },
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "up{job=\"n8n\"}",
"legendFormat": "n8n",
"refId": "A"
}
],
"title": "N8N Status",
"type": "stat"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"id": 101,
"title": "System Resources",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 70 },
{ "color": "red", "value": 90 }
]
},
"unit": "percent"
}
},
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 },
"id": 4,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
"legendFormat": "CPU Usage %",
"refId": "A"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 70 },
{ "color": "red", "value": 90 }
]
},
"unit": "percent"
}
},
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 },
"id": 5,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
"legendFormat": "Memory Usage %",
"refId": "A"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 75 },
{ "color": "red", "value": 90 }
]
},
"unit": "percent"
}
},
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 },
"id": 6,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100",
"legendFormat": "Disk Usage %",
"refId": "A"
}
],
"title": "Disk Usage",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 102,
"title": "Container Resources",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "percentunit"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
"id": 7,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m])",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Container CPU Usage",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "bytes"
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
"id": 8,
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
"expr": "container_memory_usage_bytes{name=~\".+\"}",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Container Memory Usage",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 38,
"tags": ["changemaker", "v2", "system-health"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Changemaker - System Health",
"version": 1
}

View File

@ -0,0 +1,11 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://{{containerPrefix}}-prometheus:9090
isDefault: true
editable: true
jsonData:
timeInterval: 15s

View File

@ -108,4 +108,9 @@ resources:
subdomain: vault
target: http://{{containerPrefix}}-nginx:80
isBaseDomain: false
- name: draw
subdomain: draw
target: http://{{containerPrefix}}-nginx:80
isBaseDomain: false
{{/if}}

View File

@ -0,0 +1,215 @@
groups:
- name: v2_app_alerts
interval: 30s
rules:
# Application availability
- alert: ApplicationDown
expr: up{job="changemaker-v2-api"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "V2 API is down"
description: "The Changemaker V2 API has been down for more than 2 minutes."
# High error rate
- alert: HighErrorRate
expr: rate(http_requests_total{status_code=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Application is experiencing {{ $value }} errors per second."
# Email queue backing up
- alert: EmailQueueBacklog
expr: cm_email_queue_size > 100
for: 10m
labels:
severity: warning
annotations:
summary: "Email queue has significant backlog"
description: "Email queue size is {{ $value }}, emails may be delayed."
# High email failure rate
- alert: HighEmailFailureRate
expr: rate(cm_emails_failed_total[5m]) / rate(cm_emails_sent_total[5m]) > 0.2
for: 10m
labels:
severity: warning
annotations:
summary: "High email failure rate"
description: "{{ $value | humanizePercentage }} of emails are failing to send."
# Failed login attempts spike
- alert: SuspiciousLoginActivity
expr: rate(cm_login_attempts_total{status="failure"}[5m]) > 5
for: 2m
labels:
severity: warning
annotations:
summary: "Suspicious login activity detected"
description: "{{ $value }} failed login attempts per second detected."
# High API latency
- alert: HighAPILatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High API latency"
description: "95th percentile latency is {{ $value }}s for {{ $labels.route }}."
# External service down
- alert: ExternalServiceDown
expr: cm_external_service_up == 0
for: 5m
labels:
severity: warning
annotations:
summary: "External service {{ $labels.service }} is down"
description: "Service {{ $labels.service }} has been unreachable for 5 minutes."
# System health alerts
- name: system_alerts
interval: 30s
rules:
# Redis down
- alert: RedisDown
expr: redis_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Redis cache is down"
description: "Redis has been down for more than 1 minute. Caching and session management will fail."
# Disk space running low
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15
for: 5m
labels:
severity: warning
annotations:
summary: "Disk space is running low"
description: "Only {{ $value | humanizePercentage }} disk space remaining on root filesystem."
# Disk space critical
- alert: DiskSpaceCritical
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.10
for: 2m
labels:
severity: critical
annotations:
summary: "CRITICAL: Disk space nearly exhausted"
description: "Only {{ $value | humanizePercentage }} disk space remaining! System may fail soon."
# High CPU usage
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "CPU usage is {{ $value }}% on {{ $labels.instance }}."
# Memory usage high
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > 0.85
for: 10m
labels:
severity: warning
annotations:
summary: "High memory usage"
description: "Memory usage is above 85% ({{ $value | humanizePercentage }})."
# Container CPU throttling (only Docker containers)
- alert: ContainerCPUThrottling
expr: rate(container_cpu_cfs_throttled_seconds_total{name!=""}[5m]) > 0.5
for: 5m
labels:
severity: warning
annotations:
summary: "Container is being CPU throttled"
description: "Container {{ $labels.name }} is experiencing CPU throttling."
# Container memory usage high (only Docker containers with memory limits)
- alert: ContainerMemoryHigh
expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) > 0.90 and container_spec_memory_limit_bytes{name!=""} > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Container memory usage is high"
description: "Container {{ $labels.name }} is using {{ $value | humanizePercentage }} of its memory limit."
# Infrastructure alerts
- name: infrastructure_alerts
interval: 30s
rules:
# Prometheus scrape failures
- alert: PrometheusScrapeFailures
expr: rate(prometheus_target_scrapes_failed_total[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "Prometheus scrape failures detected"
description: "Prometheus is failing to scrape {{ $labels.job }} target."
# Prometheus configuration reload failure
- alert: PrometheusConfigReloadFailed
expr: prometheus_config_last_reload_successful == 0
for: 1m
labels:
severity: warning
annotations:
summary: "Prometheus configuration reload failed"
description: "Prometheus failed to reload its configuration. Check prometheus logs."
# Alertmanager down
- alert: AlertmanagerDown
expr: up{job="alertmanager"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Alertmanager is down"
description: "Alertmanager has been down for 2 minutes. Alerts will not be delivered!"
# Security alerts
- name: security_alerts
interval: 15s
rules:
# Possible DDoS attack
- alert: PossibleDDoSAttack
expr: rate(http_requests_total[1m]) > 1000
for: 2m
labels:
severity: critical
annotations:
summary: "Possible DDoS attack detected"
description: "Receiving {{ $value }} requests per second for 2 minutes. This may be a DDoS attack."
# Sustained high traffic
- alert: SustainedHighTraffic
expr: rate(http_requests_total[5m]) > 500
for: 10m
labels:
severity: warning
annotations:
summary: "Sustained high traffic detected"
description: "Receiving {{ $value }} requests per second for 10 minutes. Monitor for performance issues."
# Too many 4xx errors
- alert: HighClientErrorRate
expr: rate(http_requests_total{status_code=~"4.."}[5m]) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High rate of 4xx client errors"
description: "Receiving {{ $value }} client errors per second. Check for broken links or API misuse."

View File

@ -3,20 +3,64 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: '{{composeProject}}'
{{#if enableMonitoring}}
alerting:
alertmanagers:
- static_configs:
- targets: ['{{containerPrefix}}-alertmanager:9093']
rule_files:
- "alerts.yml"
{{/if}}
scrape_configs:
- job_name: '{{composeProject}}-api'
static_configs:
- targets: ['{{containerPrefix}}-api:4000']
metrics_path: /api/metrics
metrics_path: '/api/metrics'
scrape_interval: 10s
scrape_timeout: 5s
{{#if enableMedia}}
- job_name: '{{composeProject}}-media-api'
static_configs:
- targets: ['{{containerPrefix}}-media-api:4100']
metrics_path: /api/metrics
metrics_path: '/api/metrics'
{{/if}}
- job_name: '{{composeProject}}-redis'
static_configs:
- targets: ['{{containerPrefix}}-redis-exporter:9121']
scrape_interval: 15s
{{#if enableMonitoring}}
- job_name: '{{composeProject}}-cadvisor'
static_configs:
- targets: ['{{containerPrefix}}-cadvisor:8080']
scrape_interval: 15s
- job_name: '{{composeProject}}-node'
static_configs:
- targets: ['{{containerPrefix}}-node-exporter:9100']
scrape_interval: 15s
- job_name: '{{composeProject}}-prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: '{{composeProject}}-alertmanager'
static_configs:
- targets: ['{{containerPrefix}}-alertmanager:9093']
scrape_interval: 30s
{{/if}}
{{#if enableDevTools}}
- job_name: '{{composeProject}}-n8n'
static_configs:
- targets: ['{{containerPrefix}}-n8n:5678']
metrics_path: '/metrics'
scrape_interval: 30s
{{/if}}

View File

@ -96,10 +96,14 @@ services:
{{/if}}
ports:
- "{{ports.api}}:4000"
{{#if enableListmonk}}
- "9002:9002"
{{/if}}
volumes:
- ./assets/uploads:/app/uploads
- ./mkdocs:/mkdocs:rw
- ./data:/data:ro
- ./data/upgrade:/app/upgrade:rw
- ./configs:/app/configs:ro
networks:
- {{networkName}}
@ -800,13 +804,32 @@ services:
timeout: 5s
retries: 3
docker-socket-proxy:
image: ghcr.io/tecnativa/docker-socket-proxy:latest
container_name: {{containerPrefix}}-docker-socket-proxy
restart: unless-stopped
environment:
CONTAINERS: 1
IMAGES: 1
INFO: 1
NETWORKS: 0
VOLUMES: 0
POST: 0
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- {{networkName}}
homepage:
image: ghcr.io/gethomepage/homepage:latest
container_name: {{containerPrefix}}-homepage
restart: unless-stopped
environment:
DOCKER_HOST: tcp://{{containerPrefix}}-docker-socket-proxy:2375
volumes:
- {{containerPrefix}}-homepage-data:/app/config
- /var/run/docker.sock:/var/run/docker.sock:ro
depends_on:
- docker-socket-proxy
networks:
- {{networkName}}
@ -958,6 +981,50 @@ services:
- {{containerPrefix}}-alertmanager-data:/alertmanager
networks:
- {{networkName}}
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: {{containerPrefix}}-cadvisor
restart: unless-stopped
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- {{networkName}}
node-exporter:
image: prom/node-exporter:latest
container_name: {{containerPrefix}}-node-exporter
restart: unless-stopped
command:
- '--path.rootfs=/host'
volumes:
- /:/host:ro,rslave
networks:
- {{networkName}}
redis-exporter:
image: oliver006/redis_exporter:latest
container_name: {{containerPrefix}}-redis-exporter
restart: unless-stopped
environment:
REDIS_ADDR: redis://{{containerPrefix}}-redis:6379
REDIS_PASSWORD: "{{secrets.redisPassword}}"
depends_on:
- redis
networks:
- {{networkName}}
gotify:
image: gotify/server:latest
container_name: {{containerPrefix}}-gotify
restart: unless-stopped
volumes:
- {{containerPrefix}}-gotify-data:/app/data
networks:
- {{networkName}}
{{/if}}
# ─── Volumes ──────────────────────────────────────────────
@ -986,6 +1053,7 @@ volumes:
{{containerPrefix}}-prometheus-data:
{{containerPrefix}}-grafana-data:
{{containerPrefix}}-alertmanager-data:
{{containerPrefix}}-gotify-data:
{{/if}}
{{#if enableMeet}}
{{containerPrefix}}-jitsi-web-config:

View File

@ -103,8 +103,10 @@ LISTMONK_SMTP_FROM={{name}} <noreply@{{domain}}>
# Media
{{#if enableMedia}}
ENABLE_MEDIA_FEATURES=true
MEDIA_API_PUBLIC_URL=https://media.{{domain}}
{{else}}
ENABLE_MEDIA_FEATURES=false
MEDIA_API_PUBLIC_URL=
{{/if}}
MEDIA_API_PORT=4100
MEDIA_ROOT=/media/local
@ -256,12 +258,11 @@ BASE_DOMAIN=https://{{domain}}
# Gitea
GITEA_URL=http://{{containerPrefix}}-gitea:3000
GITEA_SSH_PORT=2222
GITEA_DB_TYPE=mysql
GITEA_DB_HOST={{containerPrefix}}-gitea-db:3306
GITEA_DB_TYPE=postgres
GITEA_DB_HOST={{containerPrefix}}-postgres:5432
GITEA_DB_NAME=gitea
GITEA_DB_USER=gitea
GITEA_DB_PASSWD={{secrets.giteaAdminPassword}}
GITEA_DB_ROOT_PASSWORD={{secrets.giteaAdminPassword}}
GITEA_DB_USER=changemaker
GITEA_DB_PASSWD={{secrets.postgresPassword}}
GITEA_ROOT_URL=https://git.{{domain}}
GITEA_DOMAIN=git.{{domain}}
GITEA_COMMENTS_ENABLED=false

View File

@ -11,6 +11,65 @@ server {
server_name app.{{domain}};
add_header X-Frame-Options "SAMEORIGIN" always;
# Bot detection for OG meta previews
set $is_bot 0;
if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") {
set $is_bot 1;
}
# Campaign OG meta
location ~ ^/campaign/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Landing page OG meta
location ~ ^/p/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/p/(.*)$ /api/og/page/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
{{#if enableMedia}}
# Gallery video OG meta
location ~ ^/gallery/watch/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
{{/if}}
location / {
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
@ -367,6 +426,24 @@ server {
proxy_set_header X-Forwarded-Proto $scheme;
}
}
# Excalidraw
server {
listen 80;
server_name draw.{{domain}};
location / {
set $upstream_excalidraw http://{{containerPrefix}}-excalidraw:80;
proxy_pass $upstream_excalidraw;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
}
{{/if}}
# Root domain — MkDocs static site
@ -393,6 +470,65 @@ server {
server_name localhost _;
add_header X-Frame-Options "SAMEORIGIN" always;
# Bot detection for OG meta previews
set $is_bot 0;
if ($http_user_agent ~* "(Twitterbot|facebookexternalhit|LinkedInBot|Slackbot|TelegramBot|WhatsApp|Discordbot|Googlebot|bingbot|Pinterest)") {
set $is_bot 1;
}
# Campaign OG meta
location ~ ^/campaign/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/campaign/(.*)$ /api/og/campaign/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Landing page OG meta
location ~ ^/p/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/p/(.*)$ /api/og/page/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
{{#if enableMedia}}
# Gallery video OG meta
location ~ ^/gallery/watch/([^/]+)$ {
if ($is_bot = 1) {
rewrite ^/gallery/watch/(.*)$ /api/og/gallery/$1 break;
proxy_pass http://{{containerPrefix}}-api:4000;
}
set $upstream_admin http://{{containerPrefix}}-admin:3000;
proxy_pass $upstream_admin;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
{{/if}}
# Admin GUI + Public pages (default)
location / {
set $upstream_admin http://{{containerPrefix}}-admin:3000;