#!/usr/bin/env bash # image-upgrade.sh — Approach B: image-only upgrade # # Pulls latest images from the registry and recreates services WITHOUT touching # tracked files in the install tree (no git pull, no tarball extract, no VERSION # mutation). Tenant content (mkdocs/, configs/) is implicitly safe because this # script never writes outside data/upgrade/ and the docker daemon. # # Used by CCP "Quick Upgrade" button. Pairs with scripts/upgrade.sh which # remains the full upgrade path for orchestration-changing releases. # # Schema parity: writes data/upgrade/progress.json + result.json with the same # fields upgrade.sh writes, so the CCP poll loop is unchanged. set -euo pipefail PROJECT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)" SCRIPT_DIR="$PROJECT_DIR/scripts" UPGRADE_DIR="$PROJECT_DIR/data/upgrade" LOG_DIR="$PROJECT_DIR/logs" LOG_FILE="$LOG_DIR/image-upgrade-$(date +%Y%m%d_%H%M%S).log" LOCK_FILE="$PROJECT_DIR/.upgrade.lock" PROGRESS_FILE="$UPGRADE_DIR/progress.json" RESULT_FILE="$UPGRADE_DIR/result.json" START_TIME=$SECONDS # --- Detect install mode --- if [[ -f "$PROJECT_DIR/VERSION" ]] && [[ ! -d "$PROJECT_DIR/.git" ]]; then INSTALL_MODE="release" else INSTALL_MODE="source" fi # --- Defaults --- API_MODE=false DRY_RUN=false IMAGE_TAG="" usage() { cat <&2; usage >&2; exit 1 ;; esac done # --- Colors --- if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' else RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC='' fi info() { echo -e "${CYAN}[INFO]${NC} $*"; } success() { echo -e "${GREEN}[ OK ]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } error() { echo -e "${RED}[ERR ]${NC} $*" >&2; } phase() { echo ""; echo -e "${BOLD}${CYAN}=== Phase $1: $2 ===${NC}"; } # --- Logging: mirror stdout/stderr to LOG_FILE --- # logs/ may be root-owned on installs where upgrade.sh has run via ccp-agent. # Fall back to /tmp if we can't write, so bunker-admin manual invocations don't # crash with "Permission denied" on tee. mkdir -p "$UPGRADE_DIR" if mkdir -p "$LOG_DIR" 2>/dev/null && touch "$LOG_FILE" 2>/dev/null; then : # primary log location is writable else LOG_FILE="/tmp/image-upgrade-$(date +%Y%m%d_%H%M%S)-$$.log" echo "[INFO] logs/ not writable; using $LOG_FILE" >&2 fi exec > >(tee -a "$LOG_FILE") 2>&1 # --- Capture previous version for result.json --- if [[ "$INSTALL_MODE" == "release" ]]; then PRE_VERSION="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")" else PRE_VERSION="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")" fi write_progress() { local phase_num="$1" phase_name="$2" pct="$3" msg="$4" [[ "$API_MODE" != "true" ]] && return mkdir -p "$UPGRADE_DIR" cat > "$PROGRESS_FILE" </dev/null || echo "$PRE_VERSION")" else new_version="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "$PRE_VERSION")" fi mkdir -p "$UPGRADE_DIR" cat > "$RESULT_FILE" </dev/null || echo "")" if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then error "Upgrade already running (pid $pid). Refusing to start." write_result "false" "Another upgrade is already running (pid $pid)" exit 1 fi warn "Stale lock file found; removing" rm -f "$LOCK_FILE" fi echo $$ > "$LOCK_FILE" } release_lock() { rm -f "$LOCK_FILE" || true; } on_failure() { local exit_code=$? local line_no=${1:-?} error "image-upgrade.sh failed at line $line_no (exit $exit_code)" write_result "false" "Image upgrade failed at line $line_no (exit $exit_code)" release_lock exit "$exit_code" } trap 'on_failure $LINENO' ERR trap 'release_lock' EXIT # --- Banner --- echo "" echo -e "${BOLD}${CYAN}================================================${NC}" echo -e "${BOLD} Image-Only Upgrade${NC}" echo -e "${BOLD}${CYAN}================================================${NC}" echo "Install mode: $INSTALL_MODE" echo "Project dir: $PROJECT_DIR" echo "Pre-version: $PRE_VERSION" [[ -n "$IMAGE_TAG" ]] && echo "Image tag: $IMAGE_TAG" [[ "$DRY_RUN" == "true" ]] && echo "DRY RUN: no images will be pulled or services recreated" echo "" acquire_lock # ============================================================================= # Phase 1: Pre-flight + mkdocs snapshot (defensive) # ============================================================================= phase "1" "Pre-flight" write_progress 1 "Pre-flight" 10 "Snapshotting mkdocs (defensive)..." # Source mkdocs-snapshot.sh and run it. This is the same snapshot every # upgrade path takes — leaves mkdocs-backup-.tar.gz in project root. # Image-only upgrades shouldn't damage mkdocs (no filesystem mutation), but # the snapshot is cheap insurance and keeps operator habits consistent. if [[ -r "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would snapshot mkdocs/" else # shellcheck disable=SC1091 PROJECT_DIR="$PROJECT_DIR" bash -c ". $SCRIPT_DIR/lib/mkdocs-snapshot.sh; snapshot_mkdocs" \ || warn "mkdocs snapshot failed (non-fatal; continuing)" fi else warn "scripts/lib/mkdocs-snapshot.sh not found; skipping snapshot" fi # Sanity-check docker if ! docker compose version &>/dev/null; then error "docker compose is not available" write_result "false" "docker compose not available" exit 1 fi success "Pre-flight checks passed" # ============================================================================= # Phase 2: Pull images # ============================================================================= phase "2" "Pull Images" write_progress 2 "Pull Images" 30 "Pulling images from registry..." PULL_ENV=() if [[ -n "$IMAGE_TAG" ]]; then PULL_ENV+=("IMAGE_TAG=$IMAGE_TAG") fi if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would run: ${PULL_ENV[*]:-} docker compose pull" else info "Pulling all images (this may take a few minutes)..." if (( ${#PULL_ENV[@]} > 0 )); then if ! env "${PULL_ENV[@]}" docker compose pull; then warn "docker compose pull had errors (continuing — some images may be local)" fi else if ! docker compose pull; then warn "docker compose pull had errors (continuing — some images may be local)" fi fi fi success "Image pull complete" # ============================================================================= # Phase 3: Recreate core app services (targeted, not broad) # ============================================================================= phase "3" "Recreate Services" write_progress 3 "Recreate Services" 60 "Recreating core app services with new images..." # Targeted recreate: only the services whose IMAGES are released as part of # changemaker.lite (api, admin, media-api, nginx). Broader `up -d` is risky # because a single misconfigured mount in any service (e.g. mkdocs-site-server) # can cascade and leave dependent containers in "Created" state. Image-only # upgrade should only touch the actual code containers, not third-party # infrastructure that happens to live in the same compose file. # # Same Phase 6 pattern as upgrade.sh: drop ccp-agent from COMPOSE_PROFILES # during recreate so we don't suicide-restart the agent that spawned us. # Restart ccp-agent at the end via detached subshell. PROFILES_SAVED="${COMPOSE_PROFILES:-}" COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \ | tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)" UP_ENV=("COMPOSE_PROFILES=${COMPOSE_PROFILES_WITHOUT_AGENT}") if [[ -n "$IMAGE_TAG" ]]; then UP_ENV+=("IMAGE_TAG=$IMAGE_TAG") fi # Core services that ship as v2 release images. nginx last so it doesn't # briefly proxy to an old api. media-api may not be enabled on all installs; # tolerate it being missing from compose. CORE_SERVICES=(api admin media-api nginx) EXISTING_SERVICES=() # Capture the service list once. Don't pipe `docker compose config` into # `grep -q` directly: with `set -o pipefail`, grep exits early on match and # SIGPIPEs the docker writer, making the pipeline exit non-zero. The grep -q # would then "match" all services as missing. Capture-then-check avoids it. COMPOSE_SERVICES_LIST="$(docker compose config --services 2>/dev/null || true)" for svc in "${CORE_SERVICES[@]}"; do if grep -qx -- "$svc" <<<"$COMPOSE_SERVICES_LIST"; then EXISTING_SERVICES+=("$svc") else info "Skipping service '$svc' (not in compose file)" fi done if (( ${#EXISTING_SERVICES[@]} == 0 )); then warn "No core app services found in compose; skipping recreate" elif [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would run: ${UP_ENV[*]} docker compose up -d ${EXISTING_SERVICES[*]}" else info "Recreating core services: ${EXISTING_SERVICES[*]}" env "${UP_ENV[@]}" docker compose up -d "${EXISTING_SERVICES[@]}" fi success "Services recreated" # Restart Pangolin tunnel connector if running (image may have changed) if docker ps --format '{{.Names}}' | grep -q 'newt'; then if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would restart newt" else info "Restarting Pangolin tunnel connector..." docker compose restart newt 2>/dev/null || true success "Newt tunnel restarted" fi fi # ============================================================================= # Phase 4: Verify (light health checks) # ============================================================================= phase "4" "Verification" write_progress 4 "Verification" 85 "Running health checks..." VERIFY_FAILED=false UPGRADE_WARNINGS="[]" verify_health() { local name="$1" check_cmd="$2" max_wait="${3:-45}" local waited=0 while [[ $waited -lt $max_wait ]]; do if eval "$check_cmd" 2>/dev/null; then success "$name: healthy (${waited}s)" return 0 fi sleep 3 waited=$((waited + 3)) done warn "$name: not responding after ${max_wait}s" VERIFY_FAILED=true return 0 } if [[ "$DRY_RUN" != "true" ]]; then verify_health "API (port 4000)" \ "docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 60 verify_health "Admin (port 3000)" \ "docker compose exec -T admin wget -q --spider http://localhost:3000/" 90 if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then verify_health "Media API (port 4100)" \ "docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30 fi if "$VERIFY_FAILED"; then UPGRADE_WARNINGS='["Some health checks failed after image-only upgrade — services may still be starting"]' fi fi # ============================================================================= # Summary + deferred ccp-agent restart # ============================================================================= ELAPSED_MIN=$(( (SECONDS - START_TIME) / 60 )) ELAPSED_SEC=$(( (SECONDS - START_TIME) % 60 )) echo "" echo -e "${BOLD}${GREEN}================================================${NC}" echo -e "${BOLD} Image-Only Upgrade Complete${NC}" echo -e "${BOLD}${GREEN}================================================${NC}" printf " Previous: %s\n" "$PRE_VERSION" printf " Duration: %dm %ds\n" "$ELAPSED_MIN" "$ELAPSED_SEC" printf " Log: %s\n" "$LOG_FILE" write_progress 4 "Complete" 100 "Image-only upgrade complete" write_result "true" "Image-only upgrade complete (previous: ${PRE_VERSION})" "$UPGRADE_WARNINGS" # Deferred ccp-agent restart — see upgrade.sh for full rationale. Same # mechanism: nohup'd, disowned subshell that picks up the new image after # this script has cleanly exited. if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would schedule deferred ccp-agent restart" else info "Scheduling deferred ccp-agent restart..." nohup bash -c " sleep 3 cd '$PROJECT_DIR' COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent " >/dev/null 2>&1 < /dev/null & disown success "ccp-agent restart scheduled (will pick up new image)" fi fi release_lock trap - EXIT exit 0