Fresh-install + upgrade-path hardening bundle
Six independent fixes surfaced during the v2.9.1 → v2.9.2 admin-UI upgrade validation today. Together they make a clean install on a new box work end-to-end without in-session patching. - Fix 1: scripts/validate-compose-parity.sh + build-release.sh hook — fail release builds when api/admin/media-api/nginx healthcheck blocks drift between docker-compose.yml and docker-compose.prod.yml. Previous boot-race fix had to be applied to both files manually. - Fix 2: scripts/systemd/install.sh chowns logs/ to the install user (the API container creates subdirs there as root, locking the host-side watcher out), pre-creates logs/upgrade-watcher.log, and changemaker-upgrade.service adds StartLimitIntervalSec=0 so a single transient failure can't wedge the .path unit permanently. - Fix 3: /api/upgrade/status now returns a `watcher` sub-object that flags the host systemd watcher as stalled when trigger.json has been pending >30s. Admin SettingsPage SystemUpgradeTab renders a warning Alert with the systemctl recovery command when unhealthy. - Fix 4: scripts/upgrade.sh write_result() — prefer head -1 VERSION over `git rev-parse HEAD` so release-mode upgrades report the new tag in result.json instead of "unknown". - Fix 5: admin container healthcheck start_period 20s → 60s in both compose files, same class as the earlier api fix. Matches Gancio convention. - Fix 7: /api/pangolin/sync now detects resources bound to a stale siteId (common after --pangolin-site new rotations), deletes and recreates them against the current site, and reports them under a new `reassigned` response field. Bunker Admin
This commit is contained in:
parent
5115c65691
commit
23df6a8b52
@ -62,7 +62,7 @@ import { api } from '@/lib/api';
|
|||||||
import { useMobile } from '@/hooks/useMobile';
|
import { useMobile } from '@/hooks/useMobile';
|
||||||
import { PageTour } from '@/components/tour/PageTour';
|
import { PageTour } from '@/components/tour/PageTour';
|
||||||
import type { AppOutletContext } from '@/components/AppLayout';
|
import type { AppOutletContext } from '@/components/AppLayout';
|
||||||
import type { SmtpTestResult, SmtpSendTestResult, UpgradeStatusResponse, UpgradeStatus, UpgradeProgress, UpgradeResult, UpgradeHistoryResponse } from '@/types/api';
|
import type { SmtpTestResult, SmtpSendTestResult, UpgradeStatusResponse, UpgradeStatus, UpgradeProgress, UpgradeResult, UpgradeHistoryResponse, WatcherHealth } from '@/types/api';
|
||||||
|
|
||||||
const { Text, Paragraph } = Typography;
|
const { Text, Paragraph } = Typography;
|
||||||
|
|
||||||
@ -742,6 +742,7 @@ function SystemUpgradeTab() {
|
|||||||
const [progress, setProgress] = useState<UpgradeProgress | null>(null);
|
const [progress, setProgress] = useState<UpgradeProgress | null>(null);
|
||||||
const [result, setResult] = useState<UpgradeResult | null>(null);
|
const [result, setResult] = useState<UpgradeResult | null>(null);
|
||||||
const [running, setRunning] = useState(false);
|
const [running, setRunning] = useState(false);
|
||||||
|
const [watcher, setWatcher] = useState<WatcherHealth | null>(null);
|
||||||
const [checking, setChecking] = useState(false);
|
const [checking, setChecking] = useState(false);
|
||||||
const [upgrading, setUpgrading] = useState(false);
|
const [upgrading, setUpgrading] = useState(false);
|
||||||
const [apiOffline, setApiOffline] = useState(false);
|
const [apiOffline, setApiOffline] = useState(false);
|
||||||
@ -760,6 +761,7 @@ function SystemUpgradeTab() {
|
|||||||
setProgress(data.progress);
|
setProgress(data.progress);
|
||||||
setResult(data.result);
|
setResult(data.result);
|
||||||
setRunning(data.running);
|
setRunning(data.running);
|
||||||
|
setWatcher(data.watcher ?? null);
|
||||||
setApiOffline(false);
|
setApiOffline(false);
|
||||||
return data;
|
return data;
|
||||||
} catch {
|
} catch {
|
||||||
@ -996,6 +998,26 @@ function SystemUpgradeTab() {
|
|||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{watcher && !watcher.healthy && (
|
||||||
|
<Alert
|
||||||
|
type="warning"
|
||||||
|
message="Upgrade watcher stalled"
|
||||||
|
description={
|
||||||
|
<>
|
||||||
|
<div>{watcher.reason || 'Host systemd watcher is not processing upgrade triggers.'}</div>
|
||||||
|
<div style={{ marginTop: 8 }}>
|
||||||
|
Recovery:{' '}
|
||||||
|
<Text code>
|
||||||
|
sudo systemctl reset-failed changemaker-upgrade.path changemaker-upgrade.service && sudo systemctl restart changemaker-upgrade.path
|
||||||
|
</Text>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
showIcon
|
||||||
|
style={{ marginBottom: 16 }}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Actions */}
|
{/* Actions */}
|
||||||
<Space style={{ marginBottom: 16 }}>
|
<Space style={{ marginBottom: 16 }}>
|
||||||
<Button
|
<Button
|
||||||
|
|||||||
@ -3179,11 +3179,18 @@ export interface UpgradeHistoryResponse {
|
|||||||
history: UpgradeResult[];
|
history: UpgradeResult[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface WatcherHealth {
|
||||||
|
healthy: boolean;
|
||||||
|
reason?: string;
|
||||||
|
pendingSince?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface UpgradeStatusResponse {
|
export interface UpgradeStatusResponse {
|
||||||
status: UpgradeStatus | null;
|
status: UpgradeStatus | null;
|
||||||
progress: UpgradeProgress | null;
|
progress: UpgradeProgress | null;
|
||||||
result: UpgradeResult | null;
|
result: UpgradeResult | null;
|
||||||
running: boolean;
|
running: boolean;
|
||||||
|
watcher?: WatcherHealth;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Social Calendar Types ---
|
// --- Social Calendar Types ---
|
||||||
|
|||||||
@ -867,11 +867,43 @@ router.post('/sync', pangolinSetupLimiter, async (_req: Request, res: Response)
|
|||||||
const existingByDomain = new Map(existing.map(r => [r.fullDomain || '', r]));
|
const existingByDomain = new Map(existing.map(r => [r.fullDomain || '', r]));
|
||||||
|
|
||||||
const created: string[] = [];
|
const created: string[] = [];
|
||||||
|
const reassigned: string[] = [];
|
||||||
const targetFixed: string[] = [];
|
const targetFixed: string[] = [];
|
||||||
const skipped: string[] = [];
|
const skipped: string[] = [];
|
||||||
const warnings: string[] = [];
|
const warnings: string[] = [];
|
||||||
const errors: string[] = [];
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
// Create resource + public access + target. Shared by "new" and "reassign"
|
||||||
|
// flows so `--pangolin-site new` installs can rebuild after a site rotation.
|
||||||
|
const createResourceForDef = async (def: ResourceDefinition, fullDomain: string) => {
|
||||||
|
const resource = await pangolinClient.createResource({
|
||||||
|
name: def.name,
|
||||||
|
domainId: matchingDomain.domainId,
|
||||||
|
...(def.subdomain ? { subdomain: def.subdomain } : {}),
|
||||||
|
http: true,
|
||||||
|
protocol: 'tcp',
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pangolinClient.updateResource(resource.resourceId, { sso: false, blockAccess: false });
|
||||||
|
} catch {
|
||||||
|
logger.warn(`Created ${fullDomain} but failed to set public access`);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pangolinClient.createTarget(resource.resourceId, {
|
||||||
|
siteId,
|
||||||
|
ip: def.target_ip,
|
||||||
|
port: def.target_port,
|
||||||
|
method: 'http',
|
||||||
|
enabled: true,
|
||||||
|
});
|
||||||
|
} catch (targetErr) {
|
||||||
|
const msg = targetErr instanceof Error ? targetErr.message : 'Unknown error';
|
||||||
|
errors.push(`${fullDomain} (target): ${msg}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
for (const def of resourceDefs) {
|
for (const def of resourceDefs) {
|
||||||
const fullDomain = def.subdomain ? `${def.subdomain}.${domain}` : domain;
|
const fullDomain = def.subdomain ? `${def.subdomain}.${domain}` : domain;
|
||||||
|
|
||||||
@ -890,10 +922,30 @@ router.post('/sync', pangolinSetupLimiter, async (_req: Request, res: Response)
|
|||||||
const existingResource = existingByDomain.get(fullDomain);
|
const existingResource = existingByDomain.get(fullDomain);
|
||||||
|
|
||||||
if (existingResource) {
|
if (existingResource) {
|
||||||
// Resource exists — verify it has a target
|
// Resource exists — verify target points at the CURRENT site.
|
||||||
try {
|
try {
|
||||||
const targets = await pangolinClient.listTargets(existingResource.resourceId);
|
const targets = await pangolinClient.listTargets(existingResource.resourceId);
|
||||||
if (targets.length === 0) {
|
const currentTargetSiteId = targets[0]?.siteId;
|
||||||
|
const siteMismatch =
|
||||||
|
targets.length > 0 && Number(currentTargetSiteId) !== Number(siteId);
|
||||||
|
|
||||||
|
if (siteMismatch) {
|
||||||
|
// Stale siteId from a previous `--pangolin-site new` install.
|
||||||
|
// Delete and recreate against the current site.
|
||||||
|
logger.warn(
|
||||||
|
`Resource ${fullDomain} bound to stale siteId ${currentTargetSiteId}, reassigning to ${siteId}`,
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
await pangolinClient.deleteResource(existingResource.resourceId);
|
||||||
|
await createResourceForDef(def, fullDomain);
|
||||||
|
reassigned.push(fullDomain);
|
||||||
|
logger.info(`Reassigned ${fullDomain} to siteId ${siteId}`);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : 'Unknown error';
|
||||||
|
errors.push(`${fullDomain} (reassign): ${msg}`);
|
||||||
|
logger.error(`Failed to reassign resource ${fullDomain}:`, err);
|
||||||
|
}
|
||||||
|
} else if (targets.length === 0) {
|
||||||
// Missing target — create one
|
// Missing target — create one
|
||||||
logger.info(`Resource ${fullDomain} has no target, creating one...`);
|
logger.info(`Resource ${fullDomain} has no target, creating one...`);
|
||||||
await pangolinClient.createTarget(existingResource.resourceId, {
|
await pangolinClient.createTarget(existingResource.resourceId, {
|
||||||
@ -927,36 +979,7 @@ router.post('/sync', pangolinSetupLimiter, async (_req: Request, res: Response)
|
|||||||
} else {
|
} else {
|
||||||
// Create new resource + target
|
// Create new resource + target
|
||||||
try {
|
try {
|
||||||
// Root domain: omit subdomain field entirely (Pangolin rejects empty string)
|
await createResourceForDef(def, fullDomain);
|
||||||
const resource = await pangolinClient.createResource({
|
|
||||||
name: def.name,
|
|
||||||
domainId: matchingDomain.domainId,
|
|
||||||
...(def.subdomain ? { subdomain: def.subdomain } : {}),
|
|
||||||
http: true,
|
|
||||||
protocol: 'tcp',
|
|
||||||
});
|
|
||||||
|
|
||||||
// Make publicly accessible (disable SSO auth + blockAccess)
|
|
||||||
try {
|
|
||||||
await pangolinClient.updateResource(resource.resourceId, { sso: false, blockAccess: false });
|
|
||||||
} catch {
|
|
||||||
logger.warn(`Created ${fullDomain} but failed to set public access`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create target
|
|
||||||
try {
|
|
||||||
await pangolinClient.createTarget(resource.resourceId, {
|
|
||||||
siteId,
|
|
||||||
ip: def.target_ip,
|
|
||||||
port: def.target_port,
|
|
||||||
method: 'http',
|
|
||||||
enabled: true,
|
|
||||||
});
|
|
||||||
} catch (targetErr) {
|
|
||||||
const msg = targetErr instanceof Error ? targetErr.message : 'Unknown error';
|
|
||||||
errors.push(`${fullDomain} (target): ${msg}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
created.push(fullDomain);
|
created.push(fullDomain);
|
||||||
logger.info(`Created resource + target: ${fullDomain}`);
|
logger.info(`Created resource + target: ${fullDomain}`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@ -970,11 +993,12 @@ router.post('/sync', pangolinSetupLimiter, async (_req: Request, res: Response)
|
|||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
created: created.length,
|
created: created.length,
|
||||||
|
reassigned: reassigned.length,
|
||||||
targetFixed: targetFixed.length,
|
targetFixed: targetFixed.length,
|
||||||
skipped: skipped.length,
|
skipped: skipped.length,
|
||||||
warnings: warnings.length,
|
warnings: warnings.length,
|
||||||
errors: errors.length,
|
errors: errors.length,
|
||||||
details: { created, targetFixed, skipped, warnings, errors },
|
details: { created, reassigned, targetFixed, skipped, warnings, errors },
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : 'Unknown error';
|
const msg = err instanceof Error ? err.message : 'Unknown error';
|
||||||
|
|||||||
@ -17,8 +17,9 @@ router.get('/status', (_req, res) => {
|
|||||||
const progress = upgradeService.getProgress();
|
const progress = upgradeService.getProgress();
|
||||||
const result = upgradeService.getResult();
|
const result = upgradeService.getResult();
|
||||||
const running = upgradeService.isRunning();
|
const running = upgradeService.isRunning();
|
||||||
|
const watcher = upgradeService.getWatcherHealth();
|
||||||
|
|
||||||
res.json({ status, progress: running ? progress : null, result, running });
|
res.json({ status, progress: running ? progress : null, result, running, watcher });
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -60,6 +60,12 @@ export interface UpgradeResult {
|
|||||||
triggeredBy?: string;
|
triggeredBy?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface WatcherHealth {
|
||||||
|
healthy: boolean;
|
||||||
|
reason?: string;
|
||||||
|
pendingSince?: string;
|
||||||
|
}
|
||||||
|
|
||||||
interface TriggerPayload {
|
interface TriggerPayload {
|
||||||
action: 'check' | 'upgrade';
|
action: 'check' | 'upgrade';
|
||||||
branch?: string;
|
branch?: string;
|
||||||
@ -96,6 +102,31 @@ function getStatus(): UpgradeStatus | null {
|
|||||||
return readJsonFile<UpgradeStatus>(STATUS_FILE);
|
return readJsonFile<UpgradeStatus>(STATUS_FILE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Watcher liveness heuristic. The host-side systemd watcher consumes and
|
||||||
|
* DELETES trigger.json within ~1s of it appearing. If trigger.json exists
|
||||||
|
* and is older than the threshold, the `.path` unit is almost certainly
|
||||||
|
* wedged (e.g. StartLimitBurst latch) and the admin UI should surface it.
|
||||||
|
*/
|
||||||
|
const WATCHER_STALL_MS = 30 * 1000;
|
||||||
|
|
||||||
|
function getWatcherHealth(): WatcherHealth {
|
||||||
|
try {
|
||||||
|
if (!fs.existsSync(TRIGGER_FILE)) return { healthy: true };
|
||||||
|
const mtimeMs = fs.statSync(TRIGGER_FILE).mtimeMs;
|
||||||
|
const age = Date.now() - mtimeMs;
|
||||||
|
if (age <= WATCHER_STALL_MS) return { healthy: true };
|
||||||
|
return {
|
||||||
|
healthy: false,
|
||||||
|
reason: `Trigger file has been pending for ${Math.round(age / 1000)}s — host upgrade watcher may be stopped or failed`,
|
||||||
|
pendingSince: new Date(mtimeMs).toISOString(),
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn('getWatcherHealth failed:', err);
|
||||||
|
return { healthy: true };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function getProgress(): UpgradeProgress | null {
|
function getProgress(): UpgradeProgress | null {
|
||||||
return readJsonFile<UpgradeProgress>(PROGRESS_FILE);
|
return readJsonFile<UpgradeProgress>(PROGRESS_FILE);
|
||||||
}
|
}
|
||||||
@ -221,6 +252,7 @@ export const upgradeService = {
|
|||||||
getStatus,
|
getStatus,
|
||||||
getProgress,
|
getProgress,
|
||||||
getResult,
|
getResult,
|
||||||
|
getWatcherHealth,
|
||||||
isRunning,
|
isRunning,
|
||||||
triggerCheck,
|
triggerCheck,
|
||||||
triggerUpgrade,
|
triggerUpgrade,
|
||||||
|
|||||||
@ -236,7 +236,7 @@ services:
|
|||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 20s
|
start_period: 60s
|
||||||
environment:
|
environment:
|
||||||
- DOMAIN=${DOMAIN:-cmlite.org}
|
- DOMAIN=${DOMAIN:-cmlite.org}
|
||||||
- NODE_ENV=${NODE_ENV:-production}
|
- NODE_ENV=${NODE_ENV:-production}
|
||||||
|
|||||||
@ -248,7 +248,7 @@ services:
|
|||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 20s
|
start_period: 60s
|
||||||
environment:
|
environment:
|
||||||
- DOMAIN=${DOMAIN:-cmlite.org}
|
- DOMAIN=${DOMAIN:-cmlite.org}
|
||||||
- NODE_ENV=${NODE_ENV:-development}
|
- NODE_ENV=${NODE_ENV:-development}
|
||||||
|
|||||||
@ -87,6 +87,17 @@ if [[ ! -f "$PROJECT_DIR/docker-compose.prod.yml" ]]; then
|
|||||||
error "docker-compose.prod.yml not found. Generate it first."
|
error "docker-compose.prod.yml not found. Generate it first."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Fail the release if dev and prod compose files have drifted on critical
|
||||||
|
# healthcheck blocks — catches cases where one file was patched without
|
||||||
|
# the other (bit us on the api start_period fix).
|
||||||
|
if [[ -x "$PROJECT_DIR/scripts/validate-compose-parity.sh" ]]; then
|
||||||
|
if ! bash "$PROJECT_DIR/scripts/validate-compose-parity.sh"; then
|
||||||
|
error "Compose parity check failed. Aborting release build."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
cp "$PROJECT_DIR/docker-compose.prod.yml" "$STAGE_DIR/docker-compose.yml"
|
cp "$PROJECT_DIR/docker-compose.prod.yml" "$STAGE_DIR/docker-compose.yml"
|
||||||
info "docker-compose.yml (production)"
|
info "docker-compose.yml (production)"
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Changemaker Lite upgrade dispatcher
|
Description=Changemaker Lite upgrade dispatcher
|
||||||
Documentation=https://docs.cmlite.org/docs/admin/services/
|
Documentation=https://docs.cmlite.org/docs/admin/services/
|
||||||
|
StartLimitIntervalSec=0
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
|||||||
@ -28,6 +28,14 @@ for unit in "${SCRIPT_DIR}"/changemaker-upgrade.*; do
|
|||||||
echo " Installed ${filename}"
|
echo " Installed ${filename}"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Ensure logs/ is writable by the install user. The API container creates
|
||||||
|
# subdirs here as root, which locks out the host-side upgrade-watcher service.
|
||||||
|
mkdir -p "${PROJECT_DIR}/logs"
|
||||||
|
chown "${INSTALL_USER}:${INSTALL_USER}" "${PROJECT_DIR}/logs"
|
||||||
|
touch "${PROJECT_DIR}/logs/upgrade-watcher.log"
|
||||||
|
chown "${INSTALL_USER}:${INSTALL_USER}" "${PROJECT_DIR}/logs/upgrade-watcher.log"
|
||||||
|
echo " Prepared ${PROJECT_DIR}/logs (owned by ${INSTALL_USER})"
|
||||||
|
|
||||||
systemctl daemon-reload
|
systemctl daemon-reload
|
||||||
systemctl enable --now changemaker-upgrade.path
|
systemctl enable --now changemaker-upgrade.path
|
||||||
|
|
||||||
|
|||||||
@ -118,7 +118,7 @@ write_result() {
|
|||||||
"success": ${success},
|
"success": ${success},
|
||||||
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
||||||
"previousCommit": "${PRE_UPGRADE_SHORT:-unknown}",
|
"previousCommit": "${PRE_UPGRADE_SHORT:-unknown}",
|
||||||
"newCommit": "$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")",
|
"newCommit": "$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || git rev-parse --short HEAD 2>/dev/null || echo "unknown")",
|
||||||
"commitCount": ${COMMIT_COUNT:-0},
|
"commitCount": ${COMMIT_COUNT:-0},
|
||||||
"durationSeconds": ${duration_secs},
|
"durationSeconds": ${duration_secs},
|
||||||
"warnings": ${warnings_json},
|
"warnings": ${warnings_json},
|
||||||
@ -1089,7 +1089,7 @@ fi
|
|||||||
info "Starting API..."
|
info "Starting API..."
|
||||||
if [[ "$NEEDS_VOLUME_REFRESH" == "true" ]]; then
|
if [[ "$NEEDS_VOLUME_REFRESH" == "true" ]]; then
|
||||||
info "Removing old API/admin containers (clearing stale node_modules volumes)..."
|
info "Removing old API/admin containers (clearing stale node_modules volumes)..."
|
||||||
docker compose rm -sf api admin 2>/dev/null || true
|
docker compose rm -sfv api admin 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
docker compose up -d api
|
docker compose up -d api
|
||||||
|
|
||||||
|
|||||||
78
scripts/validate-compose-parity.sh
Executable file
78
scripts/validate-compose-parity.sh
Executable file
@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# Changemaker Lite — Compose Parity Validator
|
||||||
|
#
|
||||||
|
# The dev (docker-compose.yml) and prod (docker-compose.prod.yml) files share
|
||||||
|
# ~95% of their service definitions verbatim, but there is no tooling that
|
||||||
|
# ensures they stay in sync. A drift in healthcheck tolerances between them
|
||||||
|
# can cause release-tarball installs to silently fail where dev installs pass
|
||||||
|
# (or vice versa).
|
||||||
|
#
|
||||||
|
# This script compares the `healthcheck:` block for a fixed set of critical
|
||||||
|
# services between the two files and exits non-zero if any of them diverge.
|
||||||
|
#
|
||||||
|
# Run manually: bash scripts/validate-compose-parity.sh
|
||||||
|
# Also invoked by scripts/build-release.sh before packaging the tarball.
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
DEV_FILE="${PROJECT_DIR}/docker-compose.yml"
|
||||||
|
PROD_FILE="${PROJECT_DIR}/docker-compose.prod.yml"
|
||||||
|
|
||||||
|
# Services whose healthcheck must be identical across dev and prod.
|
||||||
|
CRITICAL_SERVICES=(api media-api admin nginx)
|
||||||
|
|
||||||
|
if [[ ! -f "$DEV_FILE" ]] || [[ ! -f "$PROD_FILE" ]]; then
|
||||||
|
echo "ERROR: Could not find both compose files (expected $DEV_FILE and $PROD_FILE)" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract the healthcheck block for a given service from a compose file.
|
||||||
|
# Uses awk to walk indentation: find `^ <service>:`, then within it the
|
||||||
|
# ` healthcheck:` block, and print the healthcheck lines until a sibling
|
||||||
|
# key (same 4-space indent) or end of service.
|
||||||
|
extract_healthcheck() {
|
||||||
|
local file="$1" service="$2"
|
||||||
|
awk -v svc="$service" '
|
||||||
|
# Entering the target service definition?
|
||||||
|
$0 ~ "^ "svc":[[:space:]]*$" { in_svc=1; next }
|
||||||
|
# Next top-level service — stop scanning
|
||||||
|
in_svc && /^ [a-zA-Z0-9_-]+:[[:space:]]*$/ { in_svc=0 }
|
||||||
|
# Inside target service, watch for healthcheck block
|
||||||
|
in_svc && /^ healthcheck:[[:space:]]*$/ { in_hc=1; print; next }
|
||||||
|
# Inside healthcheck: print until we hit a sibling key at same indent
|
||||||
|
in_hc {
|
||||||
|
if (/^ [a-zA-Z0-9_-]+:/) { in_hc=0 }
|
||||||
|
else { print }
|
||||||
|
}
|
||||||
|
' "$file"
|
||||||
|
}
|
||||||
|
|
||||||
|
FAIL=0
|
||||||
|
for svc in "${CRITICAL_SERVICES[@]}"; do
|
||||||
|
dev_hc="$(extract_healthcheck "$DEV_FILE" "$svc")"
|
||||||
|
prod_hc="$(extract_healthcheck "$PROD_FILE" "$svc")"
|
||||||
|
|
||||||
|
if [[ -z "$dev_hc" ]] && [[ -z "$prod_hc" ]]; then
|
||||||
|
continue # service not defined in either — fine (e.g. media-api optional)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$dev_hc" != "$prod_hc" ]]; then
|
||||||
|
echo "DRIFT: healthcheck block for service '${svc}' differs between dev and prod compose files" >&2
|
||||||
|
echo "--- $(basename "$DEV_FILE")" >&2
|
||||||
|
echo "$dev_hc" >&2
|
||||||
|
echo "--- $(basename "$PROD_FILE")" >&2
|
||||||
|
echo "$prod_hc" >&2
|
||||||
|
echo "" >&2
|
||||||
|
FAIL=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$FAIL" -ne 0 ]]; then
|
||||||
|
echo "Compose parity check FAILED. Update both files before releasing." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Compose parity: OK (${#CRITICAL_SERVICES[@]} services checked)"
|
||||||
Loading…
x
Reference in New Issue
Block a user