Add a "Quick Upgrade" path that pulls latest container images and recreates only the core app services (api, admin, media-api, nginx) without touching any tracked files. Tenant content (mkdocs/, configs/, scripts/) is implicitly preserved because the script never writes outside docker. Faster (~2 min vs ~4-5 min for full upgrade) and structurally safer for releases that don't change orchestration/templates. Pieces: - scripts/image-upgrade.sh: new ~350-line script. Phases: pre-flight + mkdocs snapshot, image pull, targeted recreate (broad up -d would cascade on misconfigured infra containers — proven on marcelle), light health checks, deferred ccp-agent restart. Writes the same progress.json + result.json schema as upgrade.sh so the CCP poll loop is unchanged. - agent/src/routes/upgrade.routes.ts: POST /instance/:slug/upgrade/start-image-only. Same lock + staleness guards as the existing /upgrade/start endpoint. - api/src/services/remote-driver.ts: RemoteDriver.startImageUpgrade(). - api/src/services/upgrade.service.ts: startImageUpgrade() entry point; reuses runRemoteUpgrade with mode='image-only' (only the initial agent call differs — result schema and polling are identical). - api/src/modules/instances/instances.routes.ts: POST /:id/upgrade-images + startImageUpgradeSchema. - admin/src/pages/InstanceDetailPage.tsx: secondary "Quick Upgrade" button next to "Upgrade Now" on the Updates tab. Tooltip explains when to use it. Tested locally on marcelle (v2.10.2 idempotent run): 1m 49s, mkdocs.yml md5 unchanged, file count unchanged, only api/admin/media-api/nginx touched. Subtle bug found and fixed: `set -o pipefail` + `grep -q` shorts pipe and SIGPIPEs the writer — captured services list once instead. Bunker Admin
384 lines
14 KiB
Bash
Executable File
384 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# image-upgrade.sh — Approach B: image-only upgrade
|
|
#
|
|
# Pulls latest images from the registry and recreates services WITHOUT touching
|
|
# tracked files in the install tree (no git pull, no tarball extract, no VERSION
|
|
# mutation). Tenant content (mkdocs/, configs/) is implicitly safe because this
|
|
# script never writes outside data/upgrade/ and the docker daemon.
|
|
#
|
|
# Used by CCP "Quick Upgrade" button. Pairs with scripts/upgrade.sh which
|
|
# remains the full upgrade path for orchestration-changing releases.
|
|
#
|
|
# Schema parity: writes data/upgrade/progress.json + result.json with the same
|
|
# fields upgrade.sh writes, so the CCP poll loop is unchanged.
|
|
|
|
set -euo pipefail
|
|
|
|
PROJECT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)"
|
|
SCRIPT_DIR="$PROJECT_DIR/scripts"
|
|
UPGRADE_DIR="$PROJECT_DIR/data/upgrade"
|
|
LOG_DIR="$PROJECT_DIR/logs"
|
|
LOG_FILE="$LOG_DIR/image-upgrade-$(date +%Y%m%d_%H%M%S).log"
|
|
LOCK_FILE="$PROJECT_DIR/.upgrade.lock"
|
|
PROGRESS_FILE="$UPGRADE_DIR/progress.json"
|
|
RESULT_FILE="$UPGRADE_DIR/result.json"
|
|
|
|
START_TIME=$SECONDS
|
|
|
|
# --- Detect install mode ---
|
|
if [[ -f "$PROJECT_DIR/VERSION" ]] && [[ ! -d "$PROJECT_DIR/.git" ]]; then
|
|
INSTALL_MODE="release"
|
|
else
|
|
INSTALL_MODE="source"
|
|
fi
|
|
|
|
# --- Defaults ---
|
|
API_MODE=false
|
|
DRY_RUN=false
|
|
IMAGE_TAG=""
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: $(basename "$0") [options]
|
|
|
|
Image-only upgrade: pulls latest images from the configured registry and
|
|
recreates services without touching the install tree.
|
|
|
|
Options:
|
|
--api-mode Emit data/upgrade/{progress,result}.json (no TTY output)
|
|
--dry-run Print what would happen; do not pull or recreate
|
|
--image-tag TAG Override IMAGE_TAG (env var) for this run
|
|
-h, --help Show this help
|
|
|
|
This script never modifies mkdocs/, configs/, scripts/, docker-compose.yml,
|
|
or VERSION. It is the safest upgrade path for orchestration-stable releases.
|
|
EOF
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--api-mode) API_MODE=true; shift ;;
|
|
--dry-run) DRY_RUN=true; shift ;;
|
|
--image-tag) IMAGE_TAG="${2:?--image-tag requires a value}"; shift 2 ;;
|
|
-h|--help) usage; exit 0 ;;
|
|
*) echo "Unknown option: $1" >&2; usage >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# --- Colors ---
|
|
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
|
|
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
|
|
CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m'
|
|
else
|
|
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
|
|
fi
|
|
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
|
success() { echo -e "${GREEN}[ OK ]${NC} $*"; }
|
|
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
|
error() { echo -e "${RED}[ERR ]${NC} $*" >&2; }
|
|
phase() { echo ""; echo -e "${BOLD}${CYAN}=== Phase $1: $2 ===${NC}"; }
|
|
|
|
# --- Logging: mirror stdout/stderr to LOG_FILE ---
|
|
# logs/ may be root-owned on installs where upgrade.sh has run via ccp-agent.
|
|
# Fall back to /tmp if we can't write, so bunker-admin manual invocations don't
|
|
# crash with "Permission denied" on tee.
|
|
mkdir -p "$UPGRADE_DIR"
|
|
if mkdir -p "$LOG_DIR" 2>/dev/null && touch "$LOG_FILE" 2>/dev/null; then
|
|
: # primary log location is writable
|
|
else
|
|
LOG_FILE="/tmp/image-upgrade-$(date +%Y%m%d_%H%M%S)-$$.log"
|
|
echo "[INFO] logs/ not writable; using $LOG_FILE" >&2
|
|
fi
|
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
|
|
|
# --- Capture previous version for result.json ---
|
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
|
PRE_VERSION="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")"
|
|
else
|
|
PRE_VERSION="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
|
|
fi
|
|
|
|
write_progress() {
|
|
local phase_num="$1" phase_name="$2" pct="$3" msg="$4"
|
|
[[ "$API_MODE" != "true" ]] && return
|
|
mkdir -p "$UPGRADE_DIR"
|
|
cat > "$PROGRESS_FILE" <<PEOF
|
|
{
|
|
"phase": ${phase_num},
|
|
"phaseName": "${phase_name}",
|
|
"percentage": ${pct},
|
|
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
|
"lastUpdate": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
}
|
|
PEOF
|
|
}
|
|
|
|
write_result() {
|
|
[[ "$API_MODE" != "true" ]] && return
|
|
local success_val="$1" msg="$2"
|
|
local warnings_json="${3:-[]}"
|
|
local duration_secs=$((SECONDS - START_TIME))
|
|
local new_version="$PRE_VERSION"
|
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
|
new_version="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "$PRE_VERSION")"
|
|
else
|
|
new_version="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "$PRE_VERSION")"
|
|
fi
|
|
mkdir -p "$UPGRADE_DIR"
|
|
cat > "$RESULT_FILE" <<REOF
|
|
{
|
|
"success": ${success_val},
|
|
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
|
"previousCommit": "${PRE_VERSION}",
|
|
"newCommit": "${new_version}",
|
|
"commitCount": 0,
|
|
"durationSeconds": ${duration_secs},
|
|
"warnings": ${warnings_json},
|
|
"completedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
|
"mode": "image-only"
|
|
}
|
|
REOF
|
|
rm -f "$PROGRESS_FILE"
|
|
}
|
|
|
|
# --- Lock + cleanup ---
|
|
acquire_lock() {
|
|
if [[ -f "$LOCK_FILE" ]]; then
|
|
local pid; pid="$(cat "$LOCK_FILE" 2>/dev/null || echo "")"
|
|
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
error "Upgrade already running (pid $pid). Refusing to start."
|
|
write_result "false" "Another upgrade is already running (pid $pid)"
|
|
exit 1
|
|
fi
|
|
warn "Stale lock file found; removing"
|
|
rm -f "$LOCK_FILE"
|
|
fi
|
|
echo $$ > "$LOCK_FILE"
|
|
}
|
|
|
|
release_lock() { rm -f "$LOCK_FILE" || true; }
|
|
|
|
on_failure() {
|
|
local exit_code=$?
|
|
local line_no=${1:-?}
|
|
error "image-upgrade.sh failed at line $line_no (exit $exit_code)"
|
|
write_result "false" "Image upgrade failed at line $line_no (exit $exit_code)"
|
|
release_lock
|
|
exit "$exit_code"
|
|
}
|
|
trap 'on_failure $LINENO' ERR
|
|
trap 'release_lock' EXIT
|
|
|
|
# --- Banner ---
|
|
echo ""
|
|
echo -e "${BOLD}${CYAN}================================================${NC}"
|
|
echo -e "${BOLD} Image-Only Upgrade${NC}"
|
|
echo -e "${BOLD}${CYAN}================================================${NC}"
|
|
echo "Install mode: $INSTALL_MODE"
|
|
echo "Project dir: $PROJECT_DIR"
|
|
echo "Pre-version: $PRE_VERSION"
|
|
[[ -n "$IMAGE_TAG" ]] && echo "Image tag: $IMAGE_TAG"
|
|
[[ "$DRY_RUN" == "true" ]] && echo "DRY RUN: no images will be pulled or services recreated"
|
|
echo ""
|
|
|
|
acquire_lock
|
|
|
|
# =============================================================================
|
|
# Phase 1: Pre-flight + mkdocs snapshot (defensive)
|
|
# =============================================================================
|
|
phase "1" "Pre-flight"
|
|
write_progress 1 "Pre-flight" 10 "Snapshotting mkdocs (defensive)..."
|
|
|
|
# Source mkdocs-snapshot.sh and run it. This is the same snapshot every
|
|
# upgrade path takes — leaves mkdocs-backup-<timestamp>.tar.gz in project root.
|
|
# Image-only upgrades shouldn't damage mkdocs (no filesystem mutation), but
|
|
# the snapshot is cheap insurance and keeps operator habits consistent.
|
|
if [[ -r "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
info "[DRY RUN] Would snapshot mkdocs/"
|
|
else
|
|
# shellcheck disable=SC1091
|
|
PROJECT_DIR="$PROJECT_DIR" bash -c ". $SCRIPT_DIR/lib/mkdocs-snapshot.sh; snapshot_mkdocs" \
|
|
|| warn "mkdocs snapshot failed (non-fatal; continuing)"
|
|
fi
|
|
else
|
|
warn "scripts/lib/mkdocs-snapshot.sh not found; skipping snapshot"
|
|
fi
|
|
|
|
# Sanity-check docker
|
|
if ! docker compose version &>/dev/null; then
|
|
error "docker compose is not available"
|
|
write_result "false" "docker compose not available"
|
|
exit 1
|
|
fi
|
|
success "Pre-flight checks passed"
|
|
|
|
# =============================================================================
|
|
# Phase 2: Pull images
|
|
# =============================================================================
|
|
phase "2" "Pull Images"
|
|
write_progress 2 "Pull Images" 30 "Pulling images from registry..."
|
|
|
|
PULL_ENV=()
|
|
if [[ -n "$IMAGE_TAG" ]]; then
|
|
PULL_ENV+=("IMAGE_TAG=$IMAGE_TAG")
|
|
fi
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
info "[DRY RUN] Would run: ${PULL_ENV[*]:-} docker compose pull"
|
|
else
|
|
info "Pulling all images (this may take a few minutes)..."
|
|
if (( ${#PULL_ENV[@]} > 0 )); then
|
|
if ! env "${PULL_ENV[@]}" docker compose pull; then
|
|
warn "docker compose pull had errors (continuing — some images may be local)"
|
|
fi
|
|
else
|
|
if ! docker compose pull; then
|
|
warn "docker compose pull had errors (continuing — some images may be local)"
|
|
fi
|
|
fi
|
|
fi
|
|
success "Image pull complete"
|
|
|
|
# =============================================================================
|
|
# Phase 3: Recreate core app services (targeted, not broad)
|
|
# =============================================================================
|
|
phase "3" "Recreate Services"
|
|
write_progress 3 "Recreate Services" 60 "Recreating core app services with new images..."
|
|
|
|
# Targeted recreate: only the services whose IMAGES are released as part of
|
|
# changemaker.lite (api, admin, media-api, nginx). Broader `up -d` is risky
|
|
# because a single misconfigured mount in any service (e.g. mkdocs-site-server)
|
|
# can cascade and leave dependent containers in "Created" state. Image-only
|
|
# upgrade should only touch the actual code containers, not third-party
|
|
# infrastructure that happens to live in the same compose file.
|
|
#
|
|
# Same Phase 6 pattern as upgrade.sh: drop ccp-agent from COMPOSE_PROFILES
|
|
# during recreate so we don't suicide-restart the agent that spawned us.
|
|
# Restart ccp-agent at the end via detached subshell.
|
|
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
|
|
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
|
|
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
|
|
|
|
UP_ENV=("COMPOSE_PROFILES=${COMPOSE_PROFILES_WITHOUT_AGENT}")
|
|
if [[ -n "$IMAGE_TAG" ]]; then
|
|
UP_ENV+=("IMAGE_TAG=$IMAGE_TAG")
|
|
fi
|
|
|
|
# Core services that ship as v2 release images. nginx last so it doesn't
|
|
# briefly proxy to an old api. media-api may not be enabled on all installs;
|
|
# tolerate it being missing from compose.
|
|
CORE_SERVICES=(api admin media-api nginx)
|
|
EXISTING_SERVICES=()
|
|
# Capture the service list once. Don't pipe `docker compose config` into
|
|
# `grep -q` directly: with `set -o pipefail`, grep exits early on match and
|
|
# SIGPIPEs the docker writer, making the pipeline exit non-zero. The grep -q
|
|
# would then "match" all services as missing. Capture-then-check avoids it.
|
|
COMPOSE_SERVICES_LIST="$(docker compose config --services 2>/dev/null || true)"
|
|
for svc in "${CORE_SERVICES[@]}"; do
|
|
if grep -qx -- "$svc" <<<"$COMPOSE_SERVICES_LIST"; then
|
|
EXISTING_SERVICES+=("$svc")
|
|
else
|
|
info "Skipping service '$svc' (not in compose file)"
|
|
fi
|
|
done
|
|
|
|
if (( ${#EXISTING_SERVICES[@]} == 0 )); then
|
|
warn "No core app services found in compose; skipping recreate"
|
|
elif [[ "$DRY_RUN" == "true" ]]; then
|
|
info "[DRY RUN] Would run: ${UP_ENV[*]} docker compose up -d ${EXISTING_SERVICES[*]}"
|
|
else
|
|
info "Recreating core services: ${EXISTING_SERVICES[*]}"
|
|
env "${UP_ENV[@]}" docker compose up -d "${EXISTING_SERVICES[@]}"
|
|
fi
|
|
success "Services recreated"
|
|
|
|
# Restart Pangolin tunnel connector if running (image may have changed)
|
|
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
info "[DRY RUN] Would restart newt"
|
|
else
|
|
info "Restarting Pangolin tunnel connector..."
|
|
docker compose restart newt 2>/dev/null || true
|
|
success "Newt tunnel restarted"
|
|
fi
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Phase 4: Verify (light health checks)
|
|
# =============================================================================
|
|
phase "4" "Verification"
|
|
write_progress 4 "Verification" 85 "Running health checks..."
|
|
|
|
VERIFY_FAILED=false
|
|
UPGRADE_WARNINGS="[]"
|
|
|
|
verify_health() {
|
|
local name="$1" check_cmd="$2" max_wait="${3:-45}"
|
|
local waited=0
|
|
while [[ $waited -lt $max_wait ]]; do
|
|
if eval "$check_cmd" 2>/dev/null; then
|
|
success "$name: healthy (${waited}s)"
|
|
return 0
|
|
fi
|
|
sleep 3
|
|
waited=$((waited + 3))
|
|
done
|
|
warn "$name: not responding after ${max_wait}s"
|
|
VERIFY_FAILED=true
|
|
return 0
|
|
}
|
|
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
verify_health "API (port 4000)" \
|
|
"docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 60
|
|
verify_health "Admin (port 3000)" \
|
|
"docker compose exec -T admin wget -q --spider http://localhost:3000/" 90
|
|
if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
|
|
verify_health "Media API (port 4100)" \
|
|
"docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
|
|
fi
|
|
|
|
if "$VERIFY_FAILED"; then
|
|
UPGRADE_WARNINGS='["Some health checks failed after image-only upgrade — services may still be starting"]'
|
|
fi
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Summary + deferred ccp-agent restart
|
|
# =============================================================================
|
|
ELAPSED_MIN=$(( (SECONDS - START_TIME) / 60 ))
|
|
ELAPSED_SEC=$(( (SECONDS - START_TIME) % 60 ))
|
|
echo ""
|
|
echo -e "${BOLD}${GREEN}================================================${NC}"
|
|
echo -e "${BOLD} Image-Only Upgrade Complete${NC}"
|
|
echo -e "${BOLD}${GREEN}================================================${NC}"
|
|
printf " Previous: %s\n" "$PRE_VERSION"
|
|
printf " Duration: %dm %ds\n" "$ELAPSED_MIN" "$ELAPSED_SEC"
|
|
printf " Log: %s\n" "$LOG_FILE"
|
|
|
|
write_progress 4 "Complete" 100 "Image-only upgrade complete"
|
|
write_result "true" "Image-only upgrade complete (previous: ${PRE_VERSION})" "$UPGRADE_WARNINGS"
|
|
|
|
# Deferred ccp-agent restart — see upgrade.sh for full rationale. Same
|
|
# mechanism: nohup'd, disowned subshell that picks up the new image after
|
|
# this script has cleanly exited.
|
|
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
info "[DRY RUN] Would schedule deferred ccp-agent restart"
|
|
else
|
|
info "Scheduling deferred ccp-agent restart..."
|
|
nohup bash -c "
|
|
sleep 3
|
|
cd '$PROJECT_DIR'
|
|
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
|
|
" >/dev/null 2>&1 < /dev/null &
|
|
disown
|
|
success "ccp-agent restart scheduled (will pick up new image)"
|
|
fi
|
|
fi
|
|
|
|
release_lock
|
|
trap - EXIT
|
|
exit 0
|