Update system hardening: breaking-release gate + release-mode rollback + health budgets + success archival
Four fixes building on the prior upgrade-path work. All observed on marcelle across today's v2.9.2 → v2.9.5 cycles and addressed here. - Fix 1 (breaking-release gate). upgrade-check.sh now parses the first line of each Gitea release body for `BREAKING: <reason>` and threads `breaking`/`breakingReason` through status.json into the API status response. Admin UI renders a red Alert with a typed-tag confirmation input and gates the Start Upgrade button. auto-upgrade.service.ts refuses to apply breaking releases, logging a skip and holding off until the operator confirms manually. - Fix 2 (release-mode rollback). print_rollback_help and the --rollback flow both used `git checkout`, which silently fails in release installs (no .git). Added INSTALL_MODE branches: release mode downloads the prior tarball from Gitea using a new VERSION.rollback marker seeded at Phase 3 start. Source mode retains the existing git-based flow. - Fix 3 (Phase 7 health budgets). admin verify_service_health budget 30s → 90s (matches the admin container's start_period from commit 47704667). Gancio + MkDocs switched from one-shot to the existing verify_service_health retry wrapper. Cuts the cry-wolf "services may still be starting" warning from every upgrade result. - Fix 4 (symmetric success archival). Bash archive_failure_to_history already logs failures on exit; added a matching archive_success_to_ history called after write_result on the success path. API-side archiveResult now dedupes on completedAt so double-recording (bash + post-restart handler) can't land twice in history.json. Release the bundle as v2.9.6. Bunker Admin
This commit is contained in:
parent
47704667b1
commit
ac901c9e53
@ -743,6 +743,10 @@ function SystemUpgradeTab() {
|
|||||||
const [result, setResult] = useState<UpgradeResult | null>(null);
|
const [result, setResult] = useState<UpgradeResult | null>(null);
|
||||||
const [running, setRunning] = useState(false);
|
const [running, setRunning] = useState(false);
|
||||||
const [watcher, setWatcher] = useState<WatcherHealth | null>(null);
|
const [watcher, setWatcher] = useState<WatcherHealth | null>(null);
|
||||||
|
// Breaking-release gate: operator must type the target tag to confirm.
|
||||||
|
// Resets whenever the remoteCommit changes so we re-prompt on every new
|
||||||
|
// breaking release instead of carrying stale confirmation state.
|
||||||
|
const [breakingConfirmInput, setBreakingConfirmInput] = useState('');
|
||||||
const [checking, setChecking] = useState(false);
|
const [checking, setChecking] = useState(false);
|
||||||
const [upgrading, setUpgrading] = useState(false);
|
const [upgrading, setUpgrading] = useState(false);
|
||||||
const [apiOffline, setApiOffline] = useState(false);
|
const [apiOffline, setApiOffline] = useState(false);
|
||||||
@ -793,6 +797,13 @@ function SystemUpgradeTab() {
|
|||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
// Reset breaking-release typed confirmation whenever the target release
|
||||||
|
// changes — otherwise a stale confirm from a previous release body could
|
||||||
|
// carry over into a new breaking upgrade.
|
||||||
|
useEffect(() => {
|
||||||
|
setBreakingConfirmInput('');
|
||||||
|
}, [status?.remoteCommit]);
|
||||||
|
|
||||||
const stopPoll = () => {
|
const stopPoll = () => {
|
||||||
if (pollRef.current) {
|
if (pollRef.current) {
|
||||||
clearInterval(pollRef.current);
|
clearInterval(pollRef.current);
|
||||||
@ -962,12 +973,13 @@ function SystemUpgradeTab() {
|
|||||||
<div style={{ maxHeight: 240, overflowY: 'auto' }}>
|
<div style={{ maxHeight: 240, overflowY: 'auto' }}>
|
||||||
<Timeline
|
<Timeline
|
||||||
items={status.changelog.map((entry) => ({
|
items={status.changelog.map((entry) => ({
|
||||||
color: 'blue',
|
color: status.breaking ? 'red' : 'blue',
|
||||||
children: (
|
children: (
|
||||||
<div>
|
<div>
|
||||||
<Space size={4}>
|
<Space size={4}>
|
||||||
<Text code style={{ fontSize: 12 }}>{entry.hash}</Text>
|
<Text code style={{ fontSize: 12 }}>{entry.hash}</Text>
|
||||||
<Text type="secondary" style={{ fontSize: 12 }}>{entry.author}</Text>
|
<Text type="secondary" style={{ fontSize: 12 }}>{entry.author}</Text>
|
||||||
|
{status.breaking && <Tag color="red">BREAKING</Tag>}
|
||||||
</Space>
|
</Space>
|
||||||
<div><Text style={{ fontSize: 13 }}>{entry.message}</Text></div>
|
<div><Text style={{ fontSize: 13 }}>{entry.message}</Text></div>
|
||||||
</div>
|
</div>
|
||||||
@ -1018,6 +1030,35 @@ function SystemUpgradeTab() {
|
|||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Breaking-release gate: requires typed target tag to confirm.
|
||||||
|
Auto-upgrade refuses to fire when status.breaking is true. */}
|
||||||
|
{status?.breaking && status.commitsBehind > 0 && (
|
||||||
|
<Alert
|
||||||
|
type="error"
|
||||||
|
message={<Space>⚠️ Breaking release — manual confirmation required</Space>}
|
||||||
|
description={
|
||||||
|
<>
|
||||||
|
<Paragraph style={{ marginBottom: 8 }}>
|
||||||
|
<Text strong>{status.remoteCommit}</Text> is flagged as breaking:{' '}
|
||||||
|
<Text italic>{status.breakingReason || 'No reason provided.'}</Text>
|
||||||
|
</Paragraph>
|
||||||
|
<Paragraph style={{ marginBottom: 8 }} type="secondary">
|
||||||
|
Auto-upgrade will not apply this release. Type the target tag below to confirm you've
|
||||||
|
reviewed the release notes and backed up any at-risk data before proceeding.
|
||||||
|
</Paragraph>
|
||||||
|
<Input
|
||||||
|
placeholder={`Type "${status.remoteCommit}" to confirm`}
|
||||||
|
value={breakingConfirmInput}
|
||||||
|
onChange={(e) => setBreakingConfirmInput(e.target.value)}
|
||||||
|
style={{ maxWidth: 320 }}
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
showIcon
|
||||||
|
style={{ marginBottom: 16 }}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Actions */}
|
{/* Actions */}
|
||||||
<Space style={{ marginBottom: 16 }}>
|
<Space style={{ marginBottom: 16 }}>
|
||||||
<Button
|
<Button
|
||||||
@ -1032,7 +1073,14 @@ function SystemUpgradeTab() {
|
|||||||
type="primary"
|
type="primary"
|
||||||
icon={<RocketOutlined />}
|
icon={<RocketOutlined />}
|
||||||
onClick={() => setConfirmOpen(true)}
|
onClick={() => setConfirmOpen(true)}
|
||||||
disabled={isUpgrading || !status || status.commitsBehind === 0}
|
disabled={
|
||||||
|
isUpgrading ||
|
||||||
|
!status ||
|
||||||
|
status.commitsBehind === 0 ||
|
||||||
|
// Breaking releases require the operator to type the exact target
|
||||||
|
// tag. Prevents accidental Start Upgrade clicks on dangerous releases.
|
||||||
|
(!!status.breaking && breakingConfirmInput.trim() !== (status.remoteCommit || ''))
|
||||||
|
}
|
||||||
>
|
>
|
||||||
Start Upgrade
|
Start Upgrade
|
||||||
</Button>
|
</Button>
|
||||||
|
|||||||
@ -3151,6 +3151,8 @@ export interface UpgradeStatus {
|
|||||||
remoteCommitFull?: string | null;
|
remoteCommitFull?: string | null;
|
||||||
commitsBehind: number;
|
commitsBehind: number;
|
||||||
changelog: UpgradeChangelogEntry[];
|
changelog: UpgradeChangelogEntry[];
|
||||||
|
breaking?: boolean;
|
||||||
|
breakingReason?: string;
|
||||||
checkedAt: string;
|
checkedAt: string;
|
||||||
error: string | null;
|
error: string | null;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -36,6 +36,8 @@ export interface UpgradeStatus {
|
|||||||
date: string;
|
date: string;
|
||||||
author: string;
|
author: string;
|
||||||
}>;
|
}>;
|
||||||
|
breaking?: boolean;
|
||||||
|
breakingReason?: string;
|
||||||
checkedAt: string;
|
checkedAt: string;
|
||||||
error: string | null;
|
error: string | null;
|
||||||
}
|
}
|
||||||
@ -206,10 +208,16 @@ function clearStaleProgress(): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Archive a completed upgrade result to the persistent history file. */
|
/** Archive a completed upgrade result to the persistent history file.
|
||||||
|
* Dedupes on completedAt so the bash-side success archival (upgrade.sh
|
||||||
|
* archive_success_to_history) and this API-side call can't double-record. */
|
||||||
function archiveResult(result: UpgradeResult): void {
|
function archiveResult(result: UpgradeResult): void {
|
||||||
try {
|
try {
|
||||||
const history = readJsonFile<UpgradeResult[]>(HISTORY_FILE) || [];
|
const history = readJsonFile<UpgradeResult[]>(HISTORY_FILE) || [];
|
||||||
|
if (history[0]?.completedAt === result.completedAt) {
|
||||||
|
logger.info('Skipping archive — most recent history entry has same completedAt');
|
||||||
|
return;
|
||||||
|
}
|
||||||
history.unshift(result);
|
history.unshift(result);
|
||||||
// Trim to max entries
|
// Trim to max entries
|
||||||
if (history.length > MAX_HISTORY_ENTRIES) {
|
if (history.length > MAX_HISTORY_ENTRIES) {
|
||||||
|
|||||||
@ -122,6 +122,17 @@ class AutoUpgradeService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Refuse to auto-apply releases flagged `BREAKING:` in their Gitea body.
|
||||||
|
// The admin UI gate ensures manual confirmation — auto-upgrade holds off
|
||||||
|
// and keeps checking, so the block clears once the operator upgrades.
|
||||||
|
if (status.breaking) {
|
||||||
|
logger.warn(
|
||||||
|
`Auto-upgrade: refusing to apply breaking release (${status.remoteCommit}): ${status.breakingReason || '(no reason given)'}. Manual confirmation required via admin UI.`,
|
||||||
|
);
|
||||||
|
upgradeService.clearTriggeredBy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(`Auto-upgrade: ${status.commitsBehind} commits behind, triggering upgrade`);
|
logger.info(`Auto-upgrade: ${status.commitsBehind} commits behind, triggering upgrade`);
|
||||||
|
|
||||||
// Read settings for pullServices and registry options
|
// Read settings for pullServices and registry options
|
||||||
|
|||||||
@ -61,6 +61,23 @@ EOF
|
|||||||
LATEST_DATE=$(echo "$RELEASE_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('created_at',''))" 2>/dev/null)
|
LATEST_DATE=$(echo "$RELEASE_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('created_at',''))" 2>/dev/null)
|
||||||
LATEST_BODY=$(echo "$RELEASE_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('body','').replace('\"','\\\\\"')[:200])" 2>/dev/null)
|
LATEST_BODY=$(echo "$RELEASE_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('body','').replace('\"','\\\\\"')[:200])" 2>/dev/null)
|
||||||
|
|
||||||
|
# Breaking-release marker: first line of the release body matching
|
||||||
|
# `^BREAKING:[[:space:]]*(.+)` (case-insensitive) flags this release as
|
||||||
|
# requiring manual confirmation. Admin UI gates Start Upgrade and
|
||||||
|
# auto-upgrade refuses to apply until the operator confirms.
|
||||||
|
IS_BREAKING=$(echo "$RELEASE_JSON" | python3 -c "
|
||||||
|
import sys, json, re
|
||||||
|
body = json.load(sys.stdin).get('body', '') or ''
|
||||||
|
m = re.match(r'^BREAKING:\s*(.+?)(?:\n|$)', body, re.IGNORECASE)
|
||||||
|
print('true' if m else 'false')
|
||||||
|
" 2>/dev/null || echo "false")
|
||||||
|
BREAKING_REASON=$(echo "$RELEASE_JSON" | python3 -c "
|
||||||
|
import sys, json, re
|
||||||
|
body = json.load(sys.stdin).get('body', '') or ''
|
||||||
|
m = re.match(r'^BREAKING:\s*(.+?)(?:\n|$)', body, re.IGNORECASE)
|
||||||
|
print((m.group(1).strip() if m else '').replace('\"','\\\\\"')[:300])
|
||||||
|
" 2>/dev/null || echo "")
|
||||||
|
|
||||||
if [[ "$CURRENT_VERSION" == "$LATEST_TAG" ]]; then
|
if [[ "$CURRENT_VERSION" == "$LATEST_TAG" ]]; then
|
||||||
COMMITS_BEHIND=0
|
COMMITS_BEHIND=0
|
||||||
else
|
else
|
||||||
@ -78,6 +95,8 @@ EOF
|
|||||||
"remoteCommitFull": "${LATEST_TAG}",
|
"remoteCommitFull": "${LATEST_TAG}",
|
||||||
"commitsBehind": ${COMMITS_BEHIND},
|
"commitsBehind": ${COMMITS_BEHIND},
|
||||||
"changelog": [{"hash":"${LATEST_TAG}","message":"${LATEST_BODY}","date":"${LATEST_DATE}","author":"release"}],
|
"changelog": [{"hash":"${LATEST_TAG}","message":"${LATEST_BODY}","date":"${LATEST_DATE}","author":"release"}],
|
||||||
|
"breaking": ${IS_BREAKING},
|
||||||
|
"breakingReason": "${BREAKING_REASON}",
|
||||||
"checkedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
"checkedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||||
"error": null
|
"error": null
|
||||||
}
|
}
|
||||||
|
|||||||
@ -250,19 +250,34 @@ load_env() {
|
|||||||
# --- Print rollback instructions ---
|
# --- Print rollback instructions ---
|
||||||
print_rollback_help() {
|
print_rollback_help() {
|
||||||
local commit="${PRE_UPGRADE_COMMIT:-unknown}"
|
local commit="${PRE_UPGRADE_COMMIT:-unknown}"
|
||||||
local backup_path="${LATEST_BACKUP:-$BACKUP_DIR}"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${BOLD}${RED}═══════════════════════════════════════════════${NC}"
|
echo -e "${BOLD}${RED}═══════════════════════════════════════════════${NC}"
|
||||||
echo -e "${BOLD}${RED} Upgrade Failed — Rollback Instructions${NC}"
|
echo -e "${BOLD}${RED} Upgrade Failed — Rollback Instructions${NC}"
|
||||||
echo -e "${BOLD}${RED}═══════════════════════════════════════════════${NC}"
|
echo -e "${BOLD}${RED}═══════════════════════════════════════════════${NC}"
|
||||||
echo ""
|
echo ""
|
||||||
echo -e " ${BOLD}1.${NC} Restore code to pre-upgrade commit:"
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
||||||
echo -e " ${CYAN}cd $PROJECT_DIR${NC}"
|
# Release installs have no .git — rollback is "re-download the prior tarball".
|
||||||
echo -e " ${CYAN}git checkout $commit${NC}"
|
# VERSION.rollback is seeded at the start of Phase 3 so we always know what
|
||||||
echo ""
|
# tag to go back to, across multiple failed attempts.
|
||||||
echo -e " ${BOLD}2.${NC} Rebuild and restart:"
|
local prior
|
||||||
echo -e " ${CYAN}docker compose build api admin media-api${NC}"
|
prior="$(cat "${UPGRADE_DIR}/VERSION.rollback" 2>/dev/null | head -1 || echo "vX.Y.Z")"
|
||||||
echo -e " ${CYAN}docker compose up -d${NC}"
|
echo -e " ${BOLD}1.${NC} Restore prior release tarball (${BOLD}${prior}${NC}):"
|
||||||
|
echo -e " ${CYAN}cd $PROJECT_DIR${NC}"
|
||||||
|
echo -e " ${CYAN}URL=https://gitea.bnkops.com/admin/changemaker.lite/releases/download/${prior}/changemaker-lite-${prior}.tar.gz${NC}"
|
||||||
|
echo -e " ${CYAN}curl -fSL \"\$URL\" -o /tmp/rb.tar.gz && tar xzf /tmp/rb.tar.gz --strip-components=1 -C $PROJECT_DIR${NC}"
|
||||||
|
echo ""
|
||||||
|
echo -e " ${BOLD}2.${NC} Pull prior images and restart:"
|
||||||
|
echo -e " ${CYAN}docker compose pull api admin media-api nginx${NC}"
|
||||||
|
echo -e " ${CYAN}docker compose up -d${NC}"
|
||||||
|
else
|
||||||
|
echo -e " ${BOLD}1.${NC} Restore code to pre-upgrade commit:"
|
||||||
|
echo -e " ${CYAN}cd $PROJECT_DIR${NC}"
|
||||||
|
echo -e " ${CYAN}git checkout $commit${NC}"
|
||||||
|
echo ""
|
||||||
|
echo -e " ${BOLD}2.${NC} Rebuild and restart:"
|
||||||
|
echo -e " ${CYAN}docker compose build api admin media-api${NC}"
|
||||||
|
echo -e " ${CYAN}docker compose up -d${NC}"
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
echo -e " ${BOLD}3.${NC} If database rollback is needed (destructive!):"
|
echo -e " ${BOLD}3.${NC} If database rollback is needed (destructive!):"
|
||||||
echo -e " ${CYAN}# Find backup archive:${NC}"
|
echo -e " ${CYAN}# Find backup archive:${NC}"
|
||||||
@ -329,16 +344,28 @@ REOF
|
|||||||
# Append a failure record to history.json (newest first, capped at 50 entries
|
# Append a failure record to history.json (newest first, capped at 50 entries
|
||||||
# to match MAX_HISTORY_ENTRIES in api/src/modules/upgrade/upgrade.service.ts).
|
# to match MAX_HISTORY_ENTRIES in api/src/modules/upgrade/upgrade.service.ts).
|
||||||
archive_failure_to_history() {
|
archive_failure_to_history() {
|
||||||
local msg="$1"
|
_archive_to_history "false" "$1" "[]"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mirror for success path — prior code relied on the API's handlePostRestartResult
|
||||||
|
# to archive, which only fires for auto-upgrade post-restart. Admin-UI-triggered
|
||||||
|
# successes were leaking if the user dismissed the result card before the API
|
||||||
|
# polled. API-side archiveResult dedupes on completedAt, so double-append is safe.
|
||||||
|
archive_success_to_history() {
|
||||||
|
_archive_to_history "true" "$1" "${UPGRADE_WARNINGS:-[]}"
|
||||||
|
}
|
||||||
|
|
||||||
|
_archive_to_history() {
|
||||||
|
local success="$1" msg="$2" warnings_json="$3"
|
||||||
local hist="${UPGRADE_DIR}/history.json"
|
local hist="${UPGRADE_DIR}/history.json"
|
||||||
mkdir -p "$UPGRADE_DIR"
|
mkdir -p "$UPGRADE_DIR"
|
||||||
local entry
|
local entry
|
||||||
entry="$(cat <<HEOF
|
entry="$(cat <<HEOF
|
||||||
{"success":false,"message":"$(echo "$msg" | sed 's/"/\\"/g')","previousCommit":"${PRE_UPGRADE_SHORT:-unknown}","newCommit":"$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")","commitCount":${COMMIT_COUNT:-0},"durationSeconds":$((SECONDS - ${START_TIME:-SECONDS})),"warnings":[],"completedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)"}
|
{"success":${success},"message":"$(echo "$msg" | sed 's/"/\\"/g')","previousCommit":"${PRE_UPGRADE_SHORT:-unknown}","newCommit":"$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")","commitCount":${COMMIT_COUNT:-0},"durationSeconds":$((SECONDS - ${START_TIME:-SECONDS})),"warnings":${warnings_json},"completedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)"}
|
||||||
HEOF
|
HEOF
|
||||||
)"
|
)"
|
||||||
python3 - "$hist" "$entry" <<'PYEOF' 2>/dev/null || true
|
python3 - "$hist" "$entry" <<'PYEOF' 2>/dev/null || true
|
||||||
import json, sys, os
|
import json, sys
|
||||||
hist_path, entry_json = sys.argv[1], sys.argv[2]
|
hist_path, entry_json = sys.argv[1], sys.argv[2]
|
||||||
try:
|
try:
|
||||||
with open(hist_path) as f:
|
with open(hist_path) as f:
|
||||||
@ -453,54 +480,94 @@ fi
|
|||||||
if [[ "$ROLLBACK" == "true" ]]; then
|
if [[ "$ROLLBACK" == "true" ]]; then
|
||||||
phase "R" "Rollback"
|
phase "R" "Rollback"
|
||||||
|
|
||||||
# Find latest backup with git commit reference
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
||||||
LATEST_ARCHIVE="$(ls -t "$BACKUP_DIR"/changemaker-v2-backup-*.tar.gz 2>/dev/null | head -1 || true)"
|
# Release-mode rollback: re-extract the prior release tarball recorded
|
||||||
if [[ -z "$LATEST_ARCHIVE" ]]; then
|
# in VERSION.rollback (seeded at Phase 3 start of any upgrade).
|
||||||
error "No backup archives found in $BACKUP_DIR"
|
PRIOR_TAG="$(cat "${UPGRADE_DIR}/VERSION.rollback" 2>/dev/null | head -1 || true)"
|
||||||
error "Cannot determine pre-upgrade commit. Manual rollback needed."
|
if [[ -z "$PRIOR_TAG" ]]; then
|
||||||
release_lock
|
error "No VERSION.rollback marker found at ${UPGRADE_DIR}/VERSION.rollback"
|
||||||
exit 1
|
error "Cannot determine prior release. Run: curl -fSL <prior-tarball-url> | tar xz -C $PROJECT_DIR --strip-components=1"
|
||||||
|
release_lock
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Rolling back to prior release: ${PRIOR_TAG}"
|
||||||
|
TARBALL_URL="${GITEA_REGISTRY_URL:-https://gitea.bnkops.com}/admin/changemaker.lite/releases/download/${PRIOR_TAG}/changemaker-lite-${PRIOR_TAG}.tar.gz"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would download: $TARBALL_URL"
|
||||||
|
info "[DRY RUN] Would extract to: $PROJECT_DIR (preserving .env)"
|
||||||
|
info "[DRY RUN] Would run: docker compose pull api admin media-api nginx && docker compose up -d"
|
||||||
|
release_lock
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
ROLLBACK_DIR="$(mktemp -d)"
|
||||||
|
if ! curl -fSL "$TARBALL_URL" -o "${ROLLBACK_DIR}/rb.tar.gz"; then
|
||||||
|
error "Failed to download prior release tarball from ${TARBALL_URL}"
|
||||||
|
rm -rf "$ROLLBACK_DIR"
|
||||||
|
release_lock
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
tar xzf "${ROLLBACK_DIR}/rb.tar.gz" -C "$ROLLBACK_DIR"
|
||||||
|
ROLLBACK_SRC="$(find "$ROLLBACK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
|
||||||
|
rsync -a --exclude='.env' "$ROLLBACK_SRC/" "$PROJECT_DIR/"
|
||||||
|
rm -rf "$ROLLBACK_DIR"
|
||||||
|
success "Code rolled back to ${PRIOR_TAG}"
|
||||||
|
|
||||||
|
export IMAGE_TAG="latest"
|
||||||
|
docker compose pull api admin media-api nginx || warn "Some images failed to pull — check registry reachability"
|
||||||
|
docker compose up -d
|
||||||
|
success "Containers restarted on ${PRIOR_TAG} images"
|
||||||
|
else
|
||||||
|
# Source-mode rollback: legacy git-based flow.
|
||||||
|
LATEST_ARCHIVE="$(ls -t "$BACKUP_DIR"/changemaker-v2-backup-*.tar.gz 2>/dev/null | head -1 || true)"
|
||||||
|
if [[ -z "$LATEST_ARCHIVE" ]]; then
|
||||||
|
error "No backup archives found in $BACKUP_DIR"
|
||||||
|
error "Cannot determine pre-upgrade commit. Manual rollback needed."
|
||||||
|
release_lock
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Latest backup: $(basename "$LATEST_ARCHIVE")"
|
||||||
|
|
||||||
|
ARCHIVE_DIR="$(basename "$LATEST_ARCHIVE" .tar.gz)"
|
||||||
|
ROLLBACK_COMMIT="$(tar xzf "$LATEST_ARCHIVE" -O "${ARCHIVE_DIR}/git-commit.txt" 2>/dev/null || true)"
|
||||||
|
|
||||||
|
if [[ -z "$ROLLBACK_COMMIT" ]]; then
|
||||||
|
error "No git-commit.txt found in backup archive."
|
||||||
|
error "Manually specify: git checkout <commit-hash>"
|
||||||
|
release_lock
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Rolling back to commit: $ROLLBACK_COMMIT"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would run: git checkout $ROLLBACK_COMMIT"
|
||||||
|
info "[DRY RUN] Would rebuild: docker compose build $SOURCE_CONTAINERS"
|
||||||
|
info "[DRY RUN] Would restart: docker compose up -d"
|
||||||
|
release_lock
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
git checkout -B "$BRANCH" "$ROLLBACK_COMMIT"
|
||||||
|
docker compose build $SOURCE_CONTAINERS
|
||||||
|
docker compose up -d
|
||||||
|
success "Rolled back to $ROLLBACK_COMMIT"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e " ${BOLD}Database restore:${NC}"
|
||||||
|
echo -e " Code has been rolled back. Database was NOT rolled back."
|
||||||
|
echo -e " The backup archive contains a PostgreSQL dump."
|
||||||
|
echo -e " To restore (${RED}DESTRUCTIVE — replaces current data${NC}):"
|
||||||
|
echo ""
|
||||||
|
ARCHIVE_DIR_NAME="$(basename "$LATEST_ARCHIVE" .tar.gz)"
|
||||||
|
echo -e " ${CYAN}tar xzf $LATEST_ARCHIVE -C /tmp${NC}"
|
||||||
|
echo -e " ${CYAN}gunzip -c /tmp/$ARCHIVE_DIR_NAME/v2-postgres.sql.gz | docker exec -i changemaker-v2-postgres psql -U changemaker -d changemaker_v2${NC}"
|
||||||
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
info "Latest backup: $(basename "$LATEST_ARCHIVE")"
|
|
||||||
|
|
||||||
# Extract just the git-commit.txt from the archive
|
|
||||||
ARCHIVE_DIR="$(basename "$LATEST_ARCHIVE" .tar.gz)"
|
|
||||||
ROLLBACK_COMMIT="$(tar xzf "$LATEST_ARCHIVE" -O "${ARCHIVE_DIR}/git-commit.txt" 2>/dev/null || true)"
|
|
||||||
|
|
||||||
if [[ -z "$ROLLBACK_COMMIT" ]]; then
|
|
||||||
error "No git-commit.txt found in backup archive."
|
|
||||||
error "Manually specify: git checkout <commit-hash>"
|
|
||||||
release_lock
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
info "Rolling back to commit: $ROLLBACK_COMMIT"
|
|
||||||
|
|
||||||
if [[ "$DRY_RUN" == "true" ]]; then
|
|
||||||
info "[DRY RUN] Would run: git checkout $ROLLBACK_COMMIT"
|
|
||||||
info "[DRY RUN] Would rebuild: docker compose build $SOURCE_CONTAINERS"
|
|
||||||
info "[DRY RUN] Would restart: docker compose up -d"
|
|
||||||
release_lock
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
git checkout -B "$BRANCH" "$ROLLBACK_COMMIT"
|
|
||||||
docker compose build $SOURCE_CONTAINERS
|
|
||||||
docker compose up -d
|
|
||||||
success "Rolled back to $ROLLBACK_COMMIT"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e " ${BOLD}Database restore:${NC}"
|
|
||||||
echo -e " Code has been rolled back. Database was NOT rolled back."
|
|
||||||
echo -e " The backup archive contains a PostgreSQL dump."
|
|
||||||
echo -e " To restore (${RED}DESTRUCTIVE — replaces current data${NC}):"
|
|
||||||
echo ""
|
|
||||||
ARCHIVE_DIR_NAME="$(basename "$LATEST_ARCHIVE" .tar.gz)"
|
|
||||||
echo -e " ${CYAN}tar xzf $LATEST_ARCHIVE -C /tmp${NC}"
|
|
||||||
echo -e " ${CYAN}gunzip -c /tmp/$ARCHIVE_DIR_NAME/v2-postgres.sql.gz | docker exec -i changemaker-v2-postgres psql -U changemaker -d changemaker_v2${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
release_lock
|
release_lock
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
@ -733,9 +800,14 @@ for a in json.load(sys.stdin).get('assets', []):
|
|||||||
# Sync new files, preserving .env. VERSION is staged to a pending
|
# Sync new files, preserving .env. VERSION is staged to a pending
|
||||||
# location and only promoted after Phase 7 verification succeeds (Fix B),
|
# location and only promoted after Phase 7 verification succeeds (Fix B),
|
||||||
# so interrupted upgrades don't leave a misleading "upgraded" marker.
|
# so interrupted upgrades don't leave a misleading "upgraded" marker.
|
||||||
|
# Also stash the CURRENT VERSION as VERSION.rollback so --rollback and
|
||||||
|
# print_rollback_help know what release to restore on failure.
|
||||||
write_progress 3 "Code Update" 40 "Applying update..."
|
write_progress 3 "Code Update" 40 "Applying update..."
|
||||||
rsync -a --exclude='.env' --exclude='VERSION' "$UPDATE_SRC/" "$PROJECT_DIR/"
|
|
||||||
mkdir -p "$UPGRADE_DIR"
|
mkdir -p "$UPGRADE_DIR"
|
||||||
|
if [[ -f "$PROJECT_DIR/VERSION" ]]; then
|
||||||
|
cp "$PROJECT_DIR/VERSION" "$UPGRADE_DIR/VERSION.rollback"
|
||||||
|
fi
|
||||||
|
rsync -a --exclude='.env' --exclude='VERSION' "$UPDATE_SRC/" "$PROJECT_DIR/"
|
||||||
cp "$UPDATE_SRC/VERSION" "$UPGRADE_DIR/VERSION.pending"
|
cp "$UPDATE_SRC/VERSION" "$UPGRADE_DIR/VERSION.pending"
|
||||||
|
|
||||||
# Restore user paths
|
# Restore user paths
|
||||||
@ -1259,9 +1331,11 @@ verify_service_health() {
|
|||||||
verify_service_health "API (port 4000)" \
|
verify_service_health "API (port 4000)" \
|
||||||
"docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 45
|
"docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 45
|
||||||
|
|
||||||
# Admin health
|
# Admin health — 90s matches the admin container's start_period + a cushion
|
||||||
|
# for first-boot Vite bundling. 30s was aspirational and produced cry-wolf
|
||||||
|
# warnings on every successful upgrade.
|
||||||
verify_service_health "Admin (port 3000)" \
|
verify_service_health "Admin (port 3000)" \
|
||||||
"docker compose exec -T admin wget -q --spider http://localhost:3000/" 30
|
"docker compose exec -T admin wget -q --spider http://localhost:3000/" 90
|
||||||
|
|
||||||
# Media API health (optional — may not be enabled)
|
# Media API health (optional — may not be enabled)
|
||||||
if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
|
if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
|
||||||
@ -1269,26 +1343,22 @@ if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
|
|||||||
"docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
|
"docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Gancio health (optional)
|
# Gancio health (optional) — restart loop is still a hard signal, but
|
||||||
|
# "starting" now gets retry grace instead of passing silently.
|
||||||
if docker ps --format '{{.Names}}' | grep -q 'gancio-changemaker'; then
|
if docker ps --format '{{.Names}}' | grep -q 'gancio-changemaker'; then
|
||||||
if docker compose ps gancio --format '{{.Status}}' 2>/dev/null | grep -q "healthy"; then
|
if docker compose ps gancio --format '{{.Status}}' 2>/dev/null | grep -qi "restarting"; then
|
||||||
success "Gancio: healthy"
|
|
||||||
elif docker compose ps gancio --format '{{.Status}}' 2>/dev/null | grep -qi "restarting"; then
|
|
||||||
warn "Gancio: restart loop detected (check config.json in gancio-data volume)"
|
warn "Gancio: restart loop detected (check config.json in gancio-data volume)"
|
||||||
VERIFY_FAILED=true
|
VERIFY_FAILED=true
|
||||||
else
|
else
|
||||||
info "Gancio: starting (may take up to 60s)"
|
verify_service_health "Gancio" \
|
||||||
|
"docker compose ps gancio --format '{{.Status}}' 2>/dev/null | grep -q healthy" 60
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# MkDocs static site health
|
# MkDocs static site health (retry — first-boot rebuild can lag)
|
||||||
if docker ps --format '{{.Names}}' | grep -q 'mkdocs-site-server'; then
|
if docker ps --format '{{.Names}}' | grep -q 'mkdocs-site-server'; then
|
||||||
if curl -sf http://localhost:${MKDOCS_SITE_SERVER_PORT:-4004}/ -o /dev/null 2>/dev/null; then
|
verify_service_health "MkDocs site (port ${MKDOCS_SITE_SERVER_PORT:-4004})" \
|
||||||
success "MkDocs site (port ${MKDOCS_SITE_SERVER_PORT:-4004}): healthy"
|
"curl -sf http://localhost:${MKDOCS_SITE_SERVER_PORT:-4004}/ -o /dev/null" 30
|
||||||
else
|
|
||||||
warn "MkDocs site (port ${MKDOCS_SITE_SERVER_PORT:-4004}): not responding"
|
|
||||||
VERIFY_FAILED=true
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check for containers in restart loop
|
# Check for containers in restart loop
|
||||||
@ -1357,6 +1427,7 @@ fi
|
|||||||
|
|
||||||
write_progress 7 "Verification" 100 "Upgrade complete!"
|
write_progress 7 "Verification" 100 "Upgrade complete!"
|
||||||
write_result "true" "Upgraded ${PRE_UPGRADE_SHORT} → ${FINAL_COMMIT} (${COMMIT_COUNT} commits)" "$UPGRADE_WARNINGS"
|
write_result "true" "Upgraded ${PRE_UPGRADE_SHORT} → ${FINAL_COMMIT} (${COMMIT_COUNT} commits)" "$UPGRADE_WARNINGS"
|
||||||
|
archive_success_to_history "Upgraded ${PRE_UPGRADE_SHORT} → ${FINAL_COMMIT} (${COMMIT_COUNT} commits)"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
|
echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user