187 lines
8.7 KiB
JavaScript
187 lines
8.7 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.observabilityRouter = void 0;
|
|
const express_1 = require("express");
|
|
const auth_middleware_1 = require("../../middleware/auth.middleware");
|
|
const rbac_middleware_1 = require("../../middleware/rbac.middleware");
|
|
const rate_limit_1 = require("../../middleware/rate-limit");
|
|
const env_1 = require("../../config/env");
|
|
const logger_1 = require("../../utils/logger");
|
|
const metrics_1 = require("../../utils/metrics");
|
|
const fetch_with_timeout_1 = require("../../utils/fetch-with-timeout");
|
|
const promql_validator_1 = require("../../utils/promql-validator");
|
|
const health_check_1 = require("../../utils/health-check");
|
|
const router = (0, express_1.Router)();
|
|
router.use(auth_middleware_1.authenticate);
|
|
router.use((0, rbac_middleware_1.requireRole)('SUPER_ADMIN'));
|
|
// Stricter rate limit for observability endpoints (info disclosure risk)
|
|
router.use(rate_limit_1.observabilityRateLimit);
|
|
// Removed duplicated isServiceOnline - now using shared utility from utils/health-check.ts
|
|
// GET /api/observability/status — check health of all 7 monitoring services
|
|
router.get('/status', async (_req, res, next) => {
|
|
try {
|
|
const [prometheusOnline, grafanaOnline, alertmanagerOnline, cadvisorOnline, nodeExporterOnline, redisExporterOnline, gotifyOnline,] = await Promise.all([
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.PROMETHEUS_URL}/api/v1/status/config`),
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.GRAFANA_URL}/api/health`),
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.ALERTMANAGER_URL}/api/v2/status`),
|
|
(0, health_check_1.isServiceOnline)(env_1.env.CADVISOR_URL),
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.NODE_EXPORTER_URL}/metrics`),
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.REDIS_EXPORTER_URL}/metrics`),
|
|
(0, health_check_1.isServiceOnline)(`${env_1.env.GOTIFY_URL}/health`),
|
|
]);
|
|
// Update Prometheus gauges
|
|
(0, metrics_1.setServiceUp)('prometheus', prometheusOnline);
|
|
(0, metrics_1.setServiceUp)('grafana', grafanaOnline);
|
|
(0, metrics_1.setServiceUp)('alertmanager', alertmanagerOnline);
|
|
(0, metrics_1.setServiceUp)('cadvisor', cadvisorOnline);
|
|
(0, metrics_1.setServiceUp)('node_exporter', nodeExporterOnline);
|
|
(0, metrics_1.setServiceUp)('redis_exporter', redisExporterOnline);
|
|
(0, metrics_1.setServiceUp)('gotify', gotifyOnline);
|
|
res.json({
|
|
prometheus: { online: prometheusOnline, url: `http://localhost:${env_1.env.PROMETHEUS_PORT}` },
|
|
grafana: { online: grafanaOnline, url: `http://localhost:${env_1.env.GRAFANA_PORT}` },
|
|
alertmanager: { online: alertmanagerOnline, url: `http://localhost:${env_1.env.ALERTMANAGER_PORT}` },
|
|
cadvisor: { online: cadvisorOnline, url: `http://localhost:${env_1.env.CADVISOR_PORT}` },
|
|
nodeExporter: { online: nodeExporterOnline, url: `http://localhost:${env_1.env.NODE_EXPORTER_PORT}` },
|
|
redisExporter: { online: redisExporterOnline, url: `http://localhost:${env_1.env.REDIS_EXPORTER_PORT}` },
|
|
gotify: { online: gotifyOnline, url: `http://localhost:${env_1.env.GOTIFY_PORT}` },
|
|
});
|
|
}
|
|
catch (err) {
|
|
logger_1.logger.error('Failed to check observability services status', err);
|
|
next(err);
|
|
}
|
|
});
|
|
// GET /api/observability/metrics-summary — fetch key metrics from Prometheus
|
|
router.get('/metrics-summary', async (_req, res, next) => {
|
|
try {
|
|
// Query Prometheus for key metrics
|
|
const prometheusOnline = await (0, health_check_1.isServiceOnline)(`${env_1.env.PROMETHEUS_URL}/api/v1/status/config`);
|
|
if (!prometheusOnline) {
|
|
return res.json({
|
|
apiUptime: { value: 0, unit: 's' },
|
|
emailQueueSize: { value: 0, unit: 'jobs' },
|
|
requestRate: { value: 0, unit: 'req/s' },
|
|
emailErrorRate: { value: 0, unit: '%' },
|
|
activeSessions: { value: 0, unit: 'sessions' },
|
|
activeCanvassSessions: { value: 0, unit: 'sessions' },
|
|
redisHealth: { value: 0, unit: 'status' },
|
|
});
|
|
}
|
|
// Define and validate PromQL queries
|
|
const promqlQueries = [
|
|
'time()-process_start_time_seconds',
|
|
'cm_email_queue_size',
|
|
'rate(http_request_total[5m])',
|
|
'rate(cm_email_send_errors_total[5m])*100',
|
|
'cm_active_sessions',
|
|
'cm_active_canvass_sessions',
|
|
'redis_up',
|
|
];
|
|
// Validate all queries before executing (prevents injection)
|
|
(0, promql_validator_1.validatePromQLQueries)(promqlQueries);
|
|
// Fetch metrics in parallel (with 5s timeout per query)
|
|
const queries = promqlQueries.map(query => (0, fetch_with_timeout_1.fetchWithTimeout)(`${env_1.env.PROMETHEUS_URL}/api/v1/query?query=${encodeURIComponent(query)}`, {}, 5000).then(r => r.json()));
|
|
const [uptimeRes, queueSizeRes, requestRateRes, errorRateRes, sessionsRes, canvassSessionsRes, redisHealthRes,] = (await Promise.all(queries));
|
|
res.json({
|
|
apiUptime: {
|
|
value: parseFloat(uptimeRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 's',
|
|
},
|
|
emailQueueSize: {
|
|
value: parseFloat(queueSizeRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 'jobs',
|
|
},
|
|
requestRate: {
|
|
value: parseFloat(requestRateRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 'req/s',
|
|
},
|
|
emailErrorRate: {
|
|
value: parseFloat(errorRateRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: '%',
|
|
},
|
|
activeSessions: {
|
|
value: parseFloat(sessionsRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 'sessions',
|
|
},
|
|
activeCanvassSessions: {
|
|
value: parseFloat(canvassSessionsRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 'sessions',
|
|
},
|
|
redisHealth: {
|
|
value: parseFloat(redisHealthRes?.data?.result?.[0]?.value?.[1] || '0'),
|
|
unit: 'status',
|
|
},
|
|
});
|
|
}
|
|
catch (err) {
|
|
logger_1.logger.error('Failed to fetch metrics summary', err);
|
|
// Return default values on error
|
|
res.json({
|
|
apiUptime: { value: 0, unit: 's' },
|
|
emailQueueSize: { value: 0, unit: 'jobs' },
|
|
requestRate: { value: 0, unit: 'req/s' },
|
|
emailErrorRate: { value: 0, unit: '%' },
|
|
activeSessions: { value: 0, unit: 'sessions' },
|
|
activeCanvassSessions: { value: 0, unit: 'sessions' },
|
|
redisHealth: { value: 0, unit: 'status' },
|
|
});
|
|
}
|
|
});
|
|
// GET /api/observability/alerts — fetch active alerts from Alertmanager
|
|
router.get('/alerts', async (_req, res, next) => {
|
|
try {
|
|
const alertmanagerOnline = await (0, health_check_1.isServiceOnline)(`${env_1.env.ALERTMANAGER_URL}/api/v2/status`);
|
|
if (!alertmanagerOnline) {
|
|
return res.json({
|
|
total: 0,
|
|
critical: 0,
|
|
warning: 0,
|
|
info: 0,
|
|
alerts: [],
|
|
});
|
|
}
|
|
const response = await (0, fetch_with_timeout_1.fetchWithTimeout)(`${env_1.env.ALERTMANAGER_URL}/api/v2/alerts?active=true`, {}, 3000);
|
|
const alerts = (await response.json());
|
|
// Parse alerts by severity
|
|
let critical = 0;
|
|
let warning = 0;
|
|
let info = 0;
|
|
const parsedAlerts = (alerts || []).map((alert) => {
|
|
const severity = alert.labels?.severity || 'info';
|
|
if (severity === 'critical')
|
|
critical++;
|
|
else if (severity === 'warning')
|
|
warning++;
|
|
else
|
|
info++;
|
|
return {
|
|
id: alert.fingerprint || Math.random().toString(),
|
|
severity: severity,
|
|
name: alert.labels?.alertname || 'Unknown',
|
|
summary: alert.annotations?.summary || alert.annotations?.description || 'No description',
|
|
description: alert.annotations?.description,
|
|
startsAt: alert.startsAt,
|
|
};
|
|
});
|
|
res.json({
|
|
total: parsedAlerts.length,
|
|
critical,
|
|
warning,
|
|
info,
|
|
alerts: parsedAlerts,
|
|
});
|
|
}
|
|
catch (err) {
|
|
logger_1.logger.error('Failed to fetch alerts', err);
|
|
res.json({
|
|
total: 0,
|
|
critical: 0,
|
|
warning: 0,
|
|
info: 0,
|
|
alerts: [],
|
|
});
|
|
}
|
|
});
|
|
exports.observabilityRouter = router;
|
|
//# sourceMappingURL=observability.routes.js.map
|