From 9613c3ec81310b1ae6d25e8d9864c152f9b0216b Mon Sep 17 00:00:00 2001 From: bunker-admin Date: Wed, 20 May 2026 20:43:34 -0600 Subject: [PATCH] fix(upgrade): Phase 1 of upgrade-flow redesign (Approach A) Three coordinated fixes from the upgrade-flow redesign plan (/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md): 1. scripts/lib/mkdocs-snapshot.sh (NEW): pre-upgrade tarball snapshot of the entire mkdocs/ directory into the install root as mkdocs-backup-.tar.gz. Discoverable via `ls`, retained last 5. No-regrets fallback if anything in the upgrade goes sideways. Sourced by upgrade.sh (and later by image-upgrade.sh under Approach B). 2. scripts/upgrade.sh Phase 6 self-destruct fix: previously, the broad `docker compose up -d` recreated the ccp-agent container that was running the script, sending SIGKILL to the bash process before write_result could land result.json. Marcelle's test upgrade hit this tonight. Fix: temporarily remove `ccp-agent` from COMPOSE_PROFILES during Phase 6's broad up -d, then schedule a detached `nohup ... & disown` restart at the very end of the script (after write_result and archive_success_to_history). The deferred subshell sleeps 3s, then recreates ccp-agent under its profile, picking up the new image. 3. scripts/upgrade-stash-cleanup.sh (NEW): one-shot utility to list and drop accumulated `upgrade-*` git stashes left over by older upgrade.sh runs whose pop failed silently (Pride Corner has three from 2026-03-09 alone). Warns loudly if any stash holds tenant mkdocs.yml content so operators verify recovery before dropping. The .gitignore now excludes /mkdocs-backup-*.tar.gz so the rescue archives don't leak into commits. This is Phase 1 of three: Approach B (image-only upgrade mode) and Approach C (CCP template re-render) follow in subsequent commits. Bunker Admin --- .gitignore | 5 ++ scripts/lib/mkdocs-snapshot.sh | 81 +++++++++++++++++++ scripts/upgrade-stash-cleanup.sh | 135 +++++++++++++++++++++++++++++++ scripts/upgrade.sh | 56 ++++++++++++- 4 files changed, 275 insertions(+), 2 deletions(-) create mode 100755 scripts/lib/mkdocs-snapshot.sh create mode 100755 scripts/upgrade-stash-cleanup.sh diff --git a/.gitignore b/.gitignore index dbb92fd..1b9ac84 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,11 @@ core.* /backups/ .upgrade.lock +# Pre-upgrade mkdocs snapshots (created by scripts/lib/mkdocs-snapshot.sh). +# These are the tenant-content rescue archives written before every upgrade; +# discoverable in the install root via `ls`. Retention: last 5 (see helper). +/mkdocs-backup-*.tar.gz + # Release tarballs (generated by build-release.sh) /releases/ diff --git a/scripts/lib/mkdocs-snapshot.sh b/scripts/lib/mkdocs-snapshot.sh new file mode 100755 index 0000000..fd60a12 --- /dev/null +++ b/scripts/lib/mkdocs-snapshot.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# ============================================================================= +# mkdocs-snapshot.sh — shared library function +# ============================================================================= +# Defines snapshot_mkdocs(): writes a tarball of mkdocs/ into the install root +# as mkdocs-backup-.tar.gz, keeping the last 5 snapshots. +# +# Sourced by scripts/upgrade.sh and scripts/image-upgrade.sh (and may be +# invoked agent-side by changemaker-control-panel during template re-render). +# +# Why the install root instead of backups/? +# - Discoverable: operators see mkdocs-backup-*.tar.gz with a plain `ls`. +# - The agent's /app/instance bind mount maps directly to the install root, +# so the agent can restore from this archive without path translation. +# - backups/ is owned by root in some installs (DB dumps via container) +# and gets rotated on a different schedule than docs snapshots. +# +# Restoration one-liner: +# tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . \ +# && docker compose restart mkdocs mkdocs-site-server +# +# Requires: $PROJECT_DIR (absolute path to install root), info() function +# from the caller (falls back to plain echo if info is not defined). +# ============================================================================= + +# Fallback log function if caller didn't define one (e.g. when sourcing standalone) +if ! declare -F info >/dev/null 2>&1; then + info() { echo "[INFO] $*"; } +fi +if ! declare -F warn >/dev/null 2>&1; then + warn() { echo "[WARN] $*" >&2; } +fi + +# snapshot_mkdocs — take a tarball of mkdocs/ into the install root. +# +# Returns 0 if successful (or if mkdocs/ doesn't exist — non-fatal). +# Returns non-zero only if tar itself fails AND $SNAPSHOT_REQUIRED is true. +# +# Optional env vars: +# PROJECT_DIR (required) Install root containing mkdocs/ +# SNAPSHOT_KEEP Number of snapshots to retain (default 5) +# SNAPSHOT_REQUIRED If "true", failure to snapshot aborts (default false) +snapshot_mkdocs() { + if [[ -z "${PROJECT_DIR:-}" ]]; then + warn "snapshot_mkdocs: PROJECT_DIR not set; skipping" + return 0 + fi + + if [[ ! -d "${PROJECT_DIR}/mkdocs" ]]; then + # No mkdocs dir = nothing to snapshot. Common on minimal installs. + return 0 + fi + + local stamp + stamp="$(date +%Y%m%d_%H%M%S)" + local archive="${PROJECT_DIR}/mkdocs-backup-${stamp}.tar.gz" + local keep="${SNAPSHOT_KEEP:-5}" + + if tar czf "$archive" -C "$PROJECT_DIR" mkdocs 2>/dev/null; then + local size + size="$(du -h "$archive" 2>/dev/null | cut -f1)" + info "Tenant docs snapshot: $(basename "$archive") (${size})" + else + warn "snapshot_mkdocs: tar failed for $archive" + rm -f "$archive" 2>/dev/null + if [[ "${SNAPSHOT_REQUIRED:-false}" == "true" ]]; then + return 1 + fi + return 0 + fi + + # Retention: keep the most recent N snapshots, prune older ones. + # ls -t lists newest first; tail -n +N+1 selects items after the Nth. + local prune_from=$((keep + 1)) + # shellcheck disable=SC2012 # ls is intentional for mtime sort + ls -t "${PROJECT_DIR}"/mkdocs-backup-*.tar.gz 2>/dev/null \ + | tail -n +${prune_from} \ + | xargs -r rm -f + + return 0 +} diff --git a/scripts/upgrade-stash-cleanup.sh b/scripts/upgrade-stash-cleanup.sh new file mode 100755 index 0000000..60d51b1 --- /dev/null +++ b/scripts/upgrade-stash-cleanup.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# ============================================================================= +# upgrade-stash-cleanup.sh — clean up stale upgrade-* git stashes +# ============================================================================= +# Older versions of upgrade.sh used `git stash push --include-untracked` to +# protect tenant content during pulls. When pop conflicts went unresolved, +# the stashes accumulated in `git stash list` forever — Pride Corner ended up +# with three from 2026-03-09 alone, each containing displaced tenant +# customizations that the running site no longer reflected. +# +# This script lists every `upgrade-*` stash, shows its scope, and offers to +# drop them. It does NOT auto-restore content; that's a separate decision per +# tenant. The intent is to clear the backlog so future `git stash list` is +# meaningful. +# +# Usage: +# bash scripts/upgrade-stash-cleanup.sh # interactive, lists + prompts +# bash scripts/upgrade-stash-cleanup.sh --dry # list only +# bash scripts/upgrade-stash-cleanup.sh --yes # drop all upgrade-* without prompt +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" +cd "$PROJECT_DIR" + +# Colors +if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then + RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m' + BOLD='\033[1m' NC='\033[0m' +else + RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC='' +fi + +info() { echo -e "${CYAN}[INFO]${NC} $*"; } +ok() { echo -e "${GREEN}[ OK ]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } + +DRY=false +YES=false +for arg in "$@"; do + case "$arg" in + --dry|--dry-run) DRY=true ;; + --yes|-y) YES=true ;; + --help|-h) + sed -n '2,/^# =====/p' "$0" | sed -n '2,/^# =====/p' | sed 's/^# //;s/^#//' + exit 0 + ;; + esac +done + +if [[ ! -d .git ]]; then + warn "Not a git repository — this script only applies to source installs." + exit 0 +fi + +# Collect upgrade-* stash refs +mapfile -t STASHES < <(git stash list 2>/dev/null | grep -E ': (On|WIP on) [^:]+: upgrade-' || true) + +if [[ ${#STASHES[@]} -eq 0 ]]; then + ok "No upgrade-* stashes found. Nothing to clean up." + exit 0 +fi + +echo "" +echo -e "${BOLD}Found ${#STASHES[@]} upgrade-* stash(es):${NC}" +echo "" +for entry in "${STASHES[@]}"; do + REF="${entry%%:*}" + LABEL="${entry#*: }" + FILE_COUNT=$(git stash show "$REF" --name-only 2>/dev/null | wc -l) + HAS_MKDOCS_YML=$(git stash show "$REF" --name-only 2>/dev/null | grep -c '^mkdocs/mkdocs\.yml$' || true) + printf " %-12s %-50s files=%-4d mkdocs.yml=%s\n" \ + "$REF" "$LABEL" "$FILE_COUNT" "$HAS_MKDOCS_YML" +done +echo "" + +if [[ "$DRY" == "true" ]]; then + info "Dry-run: no stashes will be dropped." + exit 0 +fi + +# Warn loudly if any stash holds mkdocs.yml — operator should manually review +# before dropping (tenant content might be there). +MKDOCS_STASHES=$(printf '%s\n' "${STASHES[@]}" \ + | while read -r entry; do + REF="${entry%%:*}" + if git stash show "$REF" --name-only 2>/dev/null | grep -q '^mkdocs/mkdocs\.yml$'; then + echo "$REF" + fi + done) + +if [[ -n "$MKDOCS_STASHES" ]]; then + echo "" + echo -e "${RED}${BOLD}⚠ WARNING:${NC} the following stashes contain ${BOLD}mkdocs/mkdocs.yml${NC}:" + echo "$MKDOCS_STASHES" | sed 's/^/ /' + echo "" + echo " These may hold tenant branding (site_name, site_url, custom theme, etc.)" + echo " that ISN'T reflected on disk. Before dropping, verify:" + echo "" + echo " git show :mkdocs/mkdocs.yml | head -10" + echo " diff <(git show :mkdocs/mkdocs.yml) mkdocs/mkdocs.yml" + echo "" + echo " If disk mkdocs.yml already has the tenant content, the stash is safe to drop." + echo " If disk is upstream and stash has tenant content, restore first:" + echo " git checkout -- mkdocs/mkdocs.yml" + echo "" +fi + +if [[ "$YES" != "true" ]]; then + echo -en "${BOLD}Drop all ${#STASHES[@]} upgrade-* stashes? [y/N] ${NC}" + read -r CONFIRM + case "$CONFIRM" in + y|Y|yes|YES) ;; + *) info "Cancelled. No stashes dropped."; exit 0 ;; + esac +fi + +# Drop in reverse order so indices stay stable +mapfile -t SORTED_REFS < <(printf '%s\n' "${STASHES[@]}" \ + | sed 's/:.*//' \ + | sort -t'{' -k2 -n -r) + +for REF in "${SORTED_REFS[@]}"; do + if git stash drop "$REF" >/dev/null 2>&1; then + ok "Dropped $REF" + else + warn "Failed to drop $REF (already gone?)" + fi +done + +echo "" +ok "Cleanup complete. Remaining stashes:" +git stash list 2>/dev/null || echo " (none)" diff --git a/scripts/upgrade.sh b/scripts/upgrade.sh index 8e2b95e..425b10e 100755 --- a/scripts/upgrade.sh +++ b/scripts/upgrade.sh @@ -95,6 +95,14 @@ phase() { echo "" } +# Pre-upgrade tenant docs snapshot (no-regrets fallback). Sourced regardless +# of install mode so snapshot_mkdocs is available in Phase 2. +# shellcheck source=lib/mkdocs-snapshot.sh +if [[ -f "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then + # shellcheck disable=SC1091 + . "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" +fi + # --- API mode: JSON progress/result writing --- UPGRADE_DIR="${PROJECT_DIR}/data/upgrade" PROGRESS_FILE="${UPGRADE_DIR}/progress.json" @@ -709,6 +717,18 @@ fi phase "2" "Backup" write_progress 2 "Backup" 15 "Creating backup..." +# Pre-upgrade tenant docs snapshot — the no-regrets fallback. Runs even when +# --skip-backup is set, because this is for tenant content recovery (not DB +# state) and is fast enough that skipping it would never be intentional. It +# lives in the install root (not backups/) so operators discover it via `ls`. +if declare -F snapshot_mkdocs >/dev/null 2>&1; then + if [[ "$DRY_RUN" == "true" ]]; then + info "[DRY RUN] Would snapshot mkdocs/ to ${PROJECT_DIR}/mkdocs-backup-*.tar.gz" + else + snapshot_mkdocs || warn "mkdocs snapshot failed (non-fatal; continuing)" + fi +fi + if [[ "$SKIP_BACKUP" == "true" ]]; then warn "Backup skipped (--skip-backup --force)" else @@ -1284,13 +1304,24 @@ while true; do done success "API healthy (${API_WAIT}s)" -# Start everything else (exclude one-shot init containers) +# Start everything else (exclude one-shot init containers AND the ccp-agent +# service that's running this very script). Recreating ccp-agent here would +# SIGKILL the script process before write_result has a chance to run; we +# instead schedule a detached restart at the very end of the script. +# +# Mechanism: temporarily drop "ccp-agent" from COMPOSE_PROFILES so the broad +# `up -d` doesn't include it. We re-add it only when scheduling the deferred +# restart so the new agent comes up under its profile. info "Starting remaining services..." +PROFILES_SAVED="${COMPOSE_PROFILES:-}" +COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \ + | tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)" +COMPOSE_PROFILES="${COMPOSE_PROFILES_WITHOUT_AGENT}" \ docker compose up -d \ --scale listmonk-init=0 \ --scale gancio-init=0 \ --scale vaultwarden-init=0 -success "All services started" +success "All services started (ccp-agent restart deferred to end-of-script)" # Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild) if docker ps --format '{{.Names}}' | grep -q 'newt'; then @@ -1461,6 +1492,27 @@ echo -e " ${BOLD}Duration:${NC} $ELAPSED" echo -e " ${BOLD}Log:${NC} $LOG_FILE" echo "" +# Deferred ccp-agent restart — the LAST thing the script does before exit. +# This must run AFTER write_result and archive_success_to_history so the new +# agent comes up to a complete result.json (otherwise CCP polls forever). +# We launch a detached subshell that: +# 1. Sleeps briefly so this script has time to exit cleanly first. +# 2. Restarts ccp-agent under its profile, picking up any new image. +# `nohup` + `disown` ensures the subshell survives the agent container dying +# (when ccp-agent is recreated, the parent agent process — which spawned this +# upgrade.sh — gets SIGKILL'd; the disowned subshell is reparented to PID 1 +# on the host and continues). +if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then + info "Scheduling deferred ccp-agent restart..." + nohup bash -c " + sleep 3 + cd '$PROJECT_DIR' + COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent + " >/dev/null 2>&1 < /dev/null & + disown + success "ccp-agent restart scheduled (will pick up new image)" +fi + release_lock trap - EXIT