fix(upgrade): Phase 1 of upgrade-flow redesign (Approach A)
Three coordinated fixes from the upgrade-flow redesign plan (/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md): 1. scripts/lib/mkdocs-snapshot.sh (NEW): pre-upgrade tarball snapshot of the entire mkdocs/ directory into the install root as mkdocs-backup-<timestamp>.tar.gz. Discoverable via `ls`, retained last 5. No-regrets fallback if anything in the upgrade goes sideways. Sourced by upgrade.sh (and later by image-upgrade.sh under Approach B). 2. scripts/upgrade.sh Phase 6 self-destruct fix: previously, the broad `docker compose up -d` recreated the ccp-agent container that was running the script, sending SIGKILL to the bash process before write_result could land result.json. Marcelle's test upgrade hit this tonight. Fix: temporarily remove `ccp-agent` from COMPOSE_PROFILES during Phase 6's broad up -d, then schedule a detached `nohup ... & disown` restart at the very end of the script (after write_result and archive_success_to_history). The deferred subshell sleeps 3s, then recreates ccp-agent under its profile, picking up the new image. 3. scripts/upgrade-stash-cleanup.sh (NEW): one-shot utility to list and drop accumulated `upgrade-*` git stashes left over by older upgrade.sh runs whose pop failed silently (Pride Corner has three from 2026-03-09 alone). Warns loudly if any stash holds tenant mkdocs.yml content so operators verify recovery before dropping. The .gitignore now excludes /mkdocs-backup-*.tar.gz so the rescue archives don't leak into commits. This is Phase 1 of three: Approach B (image-only upgrade mode) and Approach C (CCP template re-render) follow in subsequent commits. Bunker Admin
This commit is contained in:
parent
e88ac79ae8
commit
9613c3ec81
5
.gitignore
vendored
5
.gitignore
vendored
@ -64,6 +64,11 @@ core.*
|
||||
/backups/
|
||||
.upgrade.lock
|
||||
|
||||
# Pre-upgrade mkdocs snapshots (created by scripts/lib/mkdocs-snapshot.sh).
|
||||
# These are the tenant-content rescue archives written before every upgrade;
|
||||
# discoverable in the install root via `ls`. Retention: last 5 (see helper).
|
||||
/mkdocs-backup-*.tar.gz
|
||||
|
||||
# Release tarballs (generated by build-release.sh)
|
||||
/releases/
|
||||
|
||||
|
||||
81
scripts/lib/mkdocs-snapshot.sh
Executable file
81
scripts/lib/mkdocs-snapshot.sh
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# mkdocs-snapshot.sh — shared library function
|
||||
# =============================================================================
|
||||
# Defines snapshot_mkdocs(): writes a tarball of mkdocs/ into the install root
|
||||
# as mkdocs-backup-<timestamp>.tar.gz, keeping the last 5 snapshots.
|
||||
#
|
||||
# Sourced by scripts/upgrade.sh and scripts/image-upgrade.sh (and may be
|
||||
# invoked agent-side by changemaker-control-panel during template re-render).
|
||||
#
|
||||
# Why the install root instead of backups/?
|
||||
# - Discoverable: operators see mkdocs-backup-*.tar.gz with a plain `ls`.
|
||||
# - The agent's /app/instance bind mount maps directly to the install root,
|
||||
# so the agent can restore from this archive without path translation.
|
||||
# - backups/ is owned by root in some installs (DB dumps via container)
|
||||
# and gets rotated on a different schedule than docs snapshots.
|
||||
#
|
||||
# Restoration one-liner:
|
||||
# tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . \
|
||||
# && docker compose restart mkdocs mkdocs-site-server
|
||||
#
|
||||
# Requires: $PROJECT_DIR (absolute path to install root), info() function
|
||||
# from the caller (falls back to plain echo if info is not defined).
|
||||
# =============================================================================
|
||||
|
||||
# Fallback log function if caller didn't define one (e.g. when sourcing standalone)
|
||||
if ! declare -F info >/dev/null 2>&1; then
|
||||
info() { echo "[INFO] $*"; }
|
||||
fi
|
||||
if ! declare -F warn >/dev/null 2>&1; then
|
||||
warn() { echo "[WARN] $*" >&2; }
|
||||
fi
|
||||
|
||||
# snapshot_mkdocs — take a tarball of mkdocs/ into the install root.
|
||||
#
|
||||
# Returns 0 if successful (or if mkdocs/ doesn't exist — non-fatal).
|
||||
# Returns non-zero only if tar itself fails AND $SNAPSHOT_REQUIRED is true.
|
||||
#
|
||||
# Optional env vars:
|
||||
# PROJECT_DIR (required) Install root containing mkdocs/
|
||||
# SNAPSHOT_KEEP Number of snapshots to retain (default 5)
|
||||
# SNAPSHOT_REQUIRED If "true", failure to snapshot aborts (default false)
|
||||
snapshot_mkdocs() {
|
||||
if [[ -z "${PROJECT_DIR:-}" ]]; then
|
||||
warn "snapshot_mkdocs: PROJECT_DIR not set; skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ ! -d "${PROJECT_DIR}/mkdocs" ]]; then
|
||||
# No mkdocs dir = nothing to snapshot. Common on minimal installs.
|
||||
return 0
|
||||
fi
|
||||
|
||||
local stamp
|
||||
stamp="$(date +%Y%m%d_%H%M%S)"
|
||||
local archive="${PROJECT_DIR}/mkdocs-backup-${stamp}.tar.gz"
|
||||
local keep="${SNAPSHOT_KEEP:-5}"
|
||||
|
||||
if tar czf "$archive" -C "$PROJECT_DIR" mkdocs 2>/dev/null; then
|
||||
local size
|
||||
size="$(du -h "$archive" 2>/dev/null | cut -f1)"
|
||||
info "Tenant docs snapshot: $(basename "$archive") (${size})"
|
||||
else
|
||||
warn "snapshot_mkdocs: tar failed for $archive"
|
||||
rm -f "$archive" 2>/dev/null
|
||||
if [[ "${SNAPSHOT_REQUIRED:-false}" == "true" ]]; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Retention: keep the most recent N snapshots, prune older ones.
|
||||
# ls -t lists newest first; tail -n +N+1 selects items after the Nth.
|
||||
local prune_from=$((keep + 1))
|
||||
# shellcheck disable=SC2012 # ls is intentional for mtime sort
|
||||
ls -t "${PROJECT_DIR}"/mkdocs-backup-*.tar.gz 2>/dev/null \
|
||||
| tail -n +${prune_from} \
|
||||
| xargs -r rm -f
|
||||
|
||||
return 0
|
||||
}
|
||||
135
scripts/upgrade-stash-cleanup.sh
Executable file
135
scripts/upgrade-stash-cleanup.sh
Executable file
@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# upgrade-stash-cleanup.sh — clean up stale upgrade-* git stashes
|
||||
# =============================================================================
|
||||
# Older versions of upgrade.sh used `git stash push --include-untracked` to
|
||||
# protect tenant content during pulls. When pop conflicts went unresolved,
|
||||
# the stashes accumulated in `git stash list` forever — Pride Corner ended up
|
||||
# with three from 2026-03-09 alone, each containing displaced tenant
|
||||
# customizations that the running site no longer reflected.
|
||||
#
|
||||
# This script lists every `upgrade-*` stash, shows its scope, and offers to
|
||||
# drop them. It does NOT auto-restore content; that's a separate decision per
|
||||
# tenant. The intent is to clear the backlog so future `git stash list` is
|
||||
# meaningful.
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/upgrade-stash-cleanup.sh # interactive, lists + prompts
|
||||
# bash scripts/upgrade-stash-cleanup.sh --dry # list only
|
||||
# bash scripts/upgrade-stash-cleanup.sh --yes # drop all upgrade-* without prompt
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Colors
|
||||
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
|
||||
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m'
|
||||
BOLD='\033[1m' NC='\033[0m'
|
||||
else
|
||||
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
|
||||
fi
|
||||
|
||||
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
||||
ok() { echo -e "${GREEN}[ OK ]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
|
||||
DRY=false
|
||||
YES=false
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--dry|--dry-run) DRY=true ;;
|
||||
--yes|-y) YES=true ;;
|
||||
--help|-h)
|
||||
sed -n '2,/^# =====/p' "$0" | sed -n '2,/^# =====/p' | sed 's/^# //;s/^#//'
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -d .git ]]; then
|
||||
warn "Not a git repository — this script only applies to source installs."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Collect upgrade-* stash refs
|
||||
mapfile -t STASHES < <(git stash list 2>/dev/null | grep -E ': (On|WIP on) [^:]+: upgrade-' || true)
|
||||
|
||||
if [[ ${#STASHES[@]} -eq 0 ]]; then
|
||||
ok "No upgrade-* stashes found. Nothing to clean up."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Found ${#STASHES[@]} upgrade-* stash(es):${NC}"
|
||||
echo ""
|
||||
for entry in "${STASHES[@]}"; do
|
||||
REF="${entry%%:*}"
|
||||
LABEL="${entry#*: }"
|
||||
FILE_COUNT=$(git stash show "$REF" --name-only 2>/dev/null | wc -l)
|
||||
HAS_MKDOCS_YML=$(git stash show "$REF" --name-only 2>/dev/null | grep -c '^mkdocs/mkdocs\.yml$' || true)
|
||||
printf " %-12s %-50s files=%-4d mkdocs.yml=%s\n" \
|
||||
"$REF" "$LABEL" "$FILE_COUNT" "$HAS_MKDOCS_YML"
|
||||
done
|
||||
echo ""
|
||||
|
||||
if [[ "$DRY" == "true" ]]; then
|
||||
info "Dry-run: no stashes will be dropped."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Warn loudly if any stash holds mkdocs.yml — operator should manually review
|
||||
# before dropping (tenant content might be there).
|
||||
MKDOCS_STASHES=$(printf '%s\n' "${STASHES[@]}" \
|
||||
| while read -r entry; do
|
||||
REF="${entry%%:*}"
|
||||
if git stash show "$REF" --name-only 2>/dev/null | grep -q '^mkdocs/mkdocs\.yml$'; then
|
||||
echo "$REF"
|
||||
fi
|
||||
done)
|
||||
|
||||
if [[ -n "$MKDOCS_STASHES" ]]; then
|
||||
echo ""
|
||||
echo -e "${RED}${BOLD}⚠ WARNING:${NC} the following stashes contain ${BOLD}mkdocs/mkdocs.yml${NC}:"
|
||||
echo "$MKDOCS_STASHES" | sed 's/^/ /'
|
||||
echo ""
|
||||
echo " These may hold tenant branding (site_name, site_url, custom theme, etc.)"
|
||||
echo " that ISN'T reflected on disk. Before dropping, verify:"
|
||||
echo ""
|
||||
echo " git show <stash-ref>:mkdocs/mkdocs.yml | head -10"
|
||||
echo " diff <(git show <stash-ref>:mkdocs/mkdocs.yml) mkdocs/mkdocs.yml"
|
||||
echo ""
|
||||
echo " If disk mkdocs.yml already has the tenant content, the stash is safe to drop."
|
||||
echo " If disk is upstream and stash has tenant content, restore first:"
|
||||
echo " git checkout <stash-ref> -- mkdocs/mkdocs.yml"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if [[ "$YES" != "true" ]]; then
|
||||
echo -en "${BOLD}Drop all ${#STASHES[@]} upgrade-* stashes? [y/N] ${NC}"
|
||||
read -r CONFIRM
|
||||
case "$CONFIRM" in
|
||||
y|Y|yes|YES) ;;
|
||||
*) info "Cancelled. No stashes dropped."; exit 0 ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Drop in reverse order so indices stay stable
|
||||
mapfile -t SORTED_REFS < <(printf '%s\n' "${STASHES[@]}" \
|
||||
| sed 's/:.*//' \
|
||||
| sort -t'{' -k2 -n -r)
|
||||
|
||||
for REF in "${SORTED_REFS[@]}"; do
|
||||
if git stash drop "$REF" >/dev/null 2>&1; then
|
||||
ok "Dropped $REF"
|
||||
else
|
||||
warn "Failed to drop $REF (already gone?)"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
ok "Cleanup complete. Remaining stashes:"
|
||||
git stash list 2>/dev/null || echo " (none)"
|
||||
@ -95,6 +95,14 @@ phase() {
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Pre-upgrade tenant docs snapshot (no-regrets fallback). Sourced regardless
|
||||
# of install mode so snapshot_mkdocs is available in Phase 2.
|
||||
# shellcheck source=lib/mkdocs-snapshot.sh
|
||||
if [[ -f "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
|
||||
# shellcheck disable=SC1091
|
||||
. "$SCRIPT_DIR/lib/mkdocs-snapshot.sh"
|
||||
fi
|
||||
|
||||
# --- API mode: JSON progress/result writing ---
|
||||
UPGRADE_DIR="${PROJECT_DIR}/data/upgrade"
|
||||
PROGRESS_FILE="${UPGRADE_DIR}/progress.json"
|
||||
@ -709,6 +717,18 @@ fi
|
||||
phase "2" "Backup"
|
||||
write_progress 2 "Backup" 15 "Creating backup..."
|
||||
|
||||
# Pre-upgrade tenant docs snapshot — the no-regrets fallback. Runs even when
|
||||
# --skip-backup is set, because this is for tenant content recovery (not DB
|
||||
# state) and is fast enough that skipping it would never be intentional. It
|
||||
# lives in the install root (not backups/) so operators discover it via `ls`.
|
||||
if declare -F snapshot_mkdocs >/dev/null 2>&1; then
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
info "[DRY RUN] Would snapshot mkdocs/ to ${PROJECT_DIR}/mkdocs-backup-*.tar.gz"
|
||||
else
|
||||
snapshot_mkdocs || warn "mkdocs snapshot failed (non-fatal; continuing)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_BACKUP" == "true" ]]; then
|
||||
warn "Backup skipped (--skip-backup --force)"
|
||||
else
|
||||
@ -1284,13 +1304,24 @@ while true; do
|
||||
done
|
||||
success "API healthy (${API_WAIT}s)"
|
||||
|
||||
# Start everything else (exclude one-shot init containers)
|
||||
# Start everything else (exclude one-shot init containers AND the ccp-agent
|
||||
# service that's running this very script). Recreating ccp-agent here would
|
||||
# SIGKILL the script process before write_result has a chance to run; we
|
||||
# instead schedule a detached restart at the very end of the script.
|
||||
#
|
||||
# Mechanism: temporarily drop "ccp-agent" from COMPOSE_PROFILES so the broad
|
||||
# `up -d` doesn't include it. We re-add it only when scheduling the deferred
|
||||
# restart so the new agent comes up under its profile.
|
||||
info "Starting remaining services..."
|
||||
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
|
||||
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
|
||||
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
|
||||
COMPOSE_PROFILES="${COMPOSE_PROFILES_WITHOUT_AGENT}" \
|
||||
docker compose up -d \
|
||||
--scale listmonk-init=0 \
|
||||
--scale gancio-init=0 \
|
||||
--scale vaultwarden-init=0
|
||||
success "All services started"
|
||||
success "All services started (ccp-agent restart deferred to end-of-script)"
|
||||
|
||||
# Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild)
|
||||
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
|
||||
@ -1461,6 +1492,27 @@ echo -e " ${BOLD}Duration:${NC} $ELAPSED"
|
||||
echo -e " ${BOLD}Log:${NC} $LOG_FILE"
|
||||
echo ""
|
||||
|
||||
# Deferred ccp-agent restart — the LAST thing the script does before exit.
|
||||
# This must run AFTER write_result and archive_success_to_history so the new
|
||||
# agent comes up to a complete result.json (otherwise CCP polls forever).
|
||||
# We launch a detached subshell that:
|
||||
# 1. Sleeps briefly so this script has time to exit cleanly first.
|
||||
# 2. Restarts ccp-agent under its profile, picking up any new image.
|
||||
# `nohup` + `disown` ensures the subshell survives the agent container dying
|
||||
# (when ccp-agent is recreated, the parent agent process — which spawned this
|
||||
# upgrade.sh — gets SIGKILL'd; the disowned subshell is reparented to PID 1
|
||||
# on the host and continues).
|
||||
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
|
||||
info "Scheduling deferred ccp-agent restart..."
|
||||
nohup bash -c "
|
||||
sleep 3
|
||||
cd '$PROJECT_DIR'
|
||||
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
|
||||
" >/dev/null 2>&1 < /dev/null &
|
||||
disown
|
||||
success "ccp-agent restart scheduled (will pick up new image)"
|
||||
fi
|
||||
|
||||
release_lock
|
||||
trap - EXIT
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user