diff --git a/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx b/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
index d798cc7..3617bb8 100644
--- a/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
@@ -39,6 +39,7 @@ import {
CloudOutlined,
DisconnectOutlined,
UploadOutlined,
+ ThunderboltOutlined,
BellOutlined,
CheckCircleOutlined,
WarningOutlined,
@@ -563,6 +564,24 @@ export default function InstanceDetailPage() {
}
};
+ // Image-only upgrade (Approach B): pulls images + recreates core app services
+ // without touching tracked files. Faster + safer than full upgrade for releases
+ // that don't change compose/templates.
+ const handleStartImageUpgrade = async () => {
+ setUpgradingInstance(true);
+ try {
+ const { data } = await api.post(`/instances/${id}/upgrade-images`, {});
+ setCurrentUpgrade(data.data);
+ message.success('Image-only upgrade started');
+ } catch (err: unknown) {
+ const resp = (err as { response?: { data?: { error?: { message?: string } } } })?.response
+ ?.data?.error;
+ message.error(resp?.message || 'Failed to start image-only upgrade');
+ } finally {
+ setUpgradingInstance(false);
+ }
+ };
+
// Event handlers
const handleAcknowledgeEvent = async (eventId: string) => {
try {
@@ -1632,25 +1651,41 @@ export default function InstanceDetailPage() {
closable
/>
)}
-
-
- Pulls latest code, runs migrations, and restarts services. CCP backup is recommended before upgrading.
+
+
+ Full upgrade pulls the latest code, runs migrations, and restarts services. Quick upgrade only pulls images and recreates the core app — tenant content stays untouched and it's ~2 min faster. Use Quick when the release notes say no orchestration changes.
-
- }
- loading={upgradingInstance}
+
+
- Upgrade Now
-
-
+ }
+ loading={upgradingInstance}
+ disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
+ >
+ Quick Upgrade
+
+
+
+ }
+ loading={upgradingInstance}
+ disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
+ >
+ Upgrade Now
+
+
+
)}
diff --git a/changemaker-control-panel/agent/src/routes/upgrade.routes.ts b/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
index efdd74b..9c73117 100644
--- a/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
@@ -188,6 +188,85 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
res.status(202).json({ started: true });
});
+// POST /instance/:slug/upgrade/start-image-only — Run image-upgrade.sh in background
+//
+// Image-only upgrade: pulls latest images + recreates services without touching
+// tracked files (no git pull, no tarball extract, no VERSION mutation). Tenant
+// content is implicitly safe because the script never writes outside data/upgrade.
+// See scripts/image-upgrade.sh for full rationale.
+//
+// Schema-compatible with /upgrade/start: writes the same progress.json + result.json
+// so the CCP poll loop in runRemoteUpgrade() works unchanged.
+router.post('/instance/:slug/upgrade/start-image-only', async (req: Request, res: Response) => {
+ const slug = param(req, 'slug');
+ const entry = await getSlugEntry(slug);
+ const { imageTag } = req.body || {};
+
+ // SECURITY: imageTag flows into bash via --image-tag. Constrain to a safe
+ // subset of docker tag chars (semver, SHA, named tags). Reject anything
+ // that could shell-escape.
+ if (imageTag && !/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/.test(String(imageTag))) {
+ res.status(400).json({ error: 'VALIDATION', message: 'Invalid imageTag' });
+ return;
+ }
+
+ const scriptPath = path.join(entry.basePath, 'scripts', 'image-upgrade.sh');
+ try {
+ await fs.access(scriptPath);
+ } catch {
+ res.status(404).json({ error: 'NOT_FOUND', message: 'image-upgrade.sh not found' });
+ return;
+ }
+
+ // Same concurrency guards as the full /upgrade/start endpoint — uses the
+ // same lock + on-disk staleness check + backup/restore mutex.
+ if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
+ res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
+ return;
+ }
+ if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
+ res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
+ return;
+ }
+
+ // Clear stale progress/result files (same convention as /upgrade/start)
+ const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
+ const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
+ await fs.mkdir(path.dirname(progressPath), { recursive: true });
+ await fs.rm(progressPath, { force: true });
+ await fs.rm(resultPath, { force: true });
+
+ const args: string[] = [scriptPath, '--api-mode'];
+ if (imageTag) args.push('--image-tag', String(imageTag));
+
+ void withSlugLock(slug, 'upgrade', async () => {
+ logger.info(`[image-upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
+ try {
+ await new Promise((resolve, reject) => {
+ const proc = spawn('bash', args, {
+ cwd: entry.basePath,
+ env: { ...process.env, COMPOSE_ANSI: 'never' },
+ stdio: ['ignore', 'ignore', 'ignore'],
+ });
+ proc.on('error', reject);
+ proc.on('close', (code) => {
+ if (code === 0) resolve();
+ else reject(new Error(`image-upgrade.sh exited with code ${code}`));
+ });
+ });
+ logger.info(`[image-upgrade] ${slug}: image-upgrade.sh completed`);
+ } catch (err) {
+ logger.error(`[image-upgrade] ${slug}: ${(err as Error).message}`);
+ }
+ }).catch((err) => {
+ if (!(err instanceof SlugBusyError)) {
+ logger.error(`[image-upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
+ }
+ });
+
+ res.status(202).json({ started: true, mode: 'image-only' });
+});
+
// GET /instance/:slug/upgrade/progress — Read progress.json
router.get('/instance/:slug/upgrade/progress', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug'));
diff --git a/changemaker-control-panel/api/src/modules/instances/instances.routes.ts b/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
index a22a653..d96ae13 100644
--- a/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
@@ -4,7 +4,7 @@ import rateLimit from 'express-rate-limit';
import { prisma } from '../../lib/prisma';
import { authenticate, requireRole } from '../../middleware/auth';
import { validate } from '../../middleware/validate';
-import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
+import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, startImageUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
import * as instancesService from './instances.service';
import * as healthService from '../../services/health.service';
import * as backupService from '../../services/backup.service';
@@ -362,6 +362,25 @@ router.post(
}
);
+// Image-only upgrade (Approach B). Faster + safer than full upgrade for
+// releases that don't change orchestration/templates. See upgrade.service.ts
+// startImageUpgrade for full rationale.
+router.post(
+ '/:id/upgrade-images',
+ requireRole('SUPER_ADMIN', 'OPERATOR'),
+ validate(startImageUpgradeSchema),
+ async (req: Request, res: Response) => {
+ const { imageTag } = req.body || {};
+ const upgrade = await upgradeService.startImageUpgrade(
+ req.params.id as string,
+ req.user!.id,
+ req.ip,
+ { imageTag }
+ );
+ res.status(201).json({ data: upgrade });
+ }
+);
+
router.get(
'/:id/upgrade-status',
requireRole('SUPER_ADMIN', 'OPERATOR'),
diff --git a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
index ee2f68d..cecb943 100644
--- a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
@@ -121,6 +121,17 @@ export const startUpgradeSchema = z.object({
.optional(),
});
+// Approach B: image-only upgrade. Pulls images + recreates core app services
+// without touching tracked files. imageTag is optional — if omitted, the
+// agent uses whatever IMAGE_TAG the install's .env / compose env defines
+// (typically `latest`). Tag must be a valid Docker tag.
+export const startImageUpgradeSchema = z.object({
+ imageTag: z
+ .string()
+ .regex(/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/, 'Invalid imageTag')
+ .optional(),
+});
+
export const setupRemoteTunnelSchema = z.object({
// Empty string or omitted → resources use standard subdomains (app., api., etc.)
// A value like "ck" → creates ck-app., ck-api., etc. for multi-tenant domains
diff --git a/changemaker-control-panel/api/src/services/remote-driver.ts b/changemaker-control-panel/api/src/services/remote-driver.ts
index 82b6223..df79535 100644
--- a/changemaker-control-panel/api/src/services/remote-driver.ts
+++ b/changemaker-control-panel/api/src/services/remote-driver.ts
@@ -82,6 +82,10 @@ export interface StartAgentUpgradeOptions {
branch?: string;
}
+export interface StartAgentImageUpgradeOptions {
+ imageTag?: string;
+}
+
interface AgentRequestOptions {
method: 'GET' | 'POST' | 'DELETE';
path: string;
@@ -574,6 +578,21 @@ export class RemoteDriver implements ExecutionDriver {
});
}
+ /**
+ * Trigger image-upgrade.sh --api-mode on the remote (Approach B: image-only
+ * upgrade — pulls images + recreates core app services without touching
+ * the install tree). Fire-and-forget; returns 202 immediately. Uses the
+ * same progress/result polling endpoints as startUpgrade.
+ */
+ async startImageUpgrade(options: StartAgentImageUpgradeOptions = {}): Promise {
+ await this.request({
+ method: 'POST',
+ path: `/instance/${this.slug}/upgrade/start-image-only`,
+ body: options,
+ timeoutMs: 30_000,
+ });
+ }
+
/**
* Read the agent's data/upgrade/progress.json. Returns the default zero-state
* if no progress has been written yet.
diff --git a/changemaker-control-panel/api/src/services/upgrade.service.ts b/changemaker-control-panel/api/src/services/upgrade.service.ts
index c1ab063..f854b9f 100644
--- a/changemaker-control-panel/api/src/services/upgrade.service.ts
+++ b/changemaker-control-panel/api/src/services/upgrade.service.ts
@@ -205,6 +205,10 @@ export interface StartUpgradeOptions {
branch?: string;
}
+export interface StartImageUpgradeOptions {
+ imageTag?: string;
+}
+
/**
* Start an upgrade for an instance. Returns the created InstanceUpgrade record.
* The actual upgrade runs asynchronously (fire-and-forget).
@@ -298,6 +302,86 @@ export async function startUpgrade(
return upgrade;
}
+/**
+ * Start an IMAGE-ONLY upgrade (Approach B). Pulls latest images + recreates
+ * core app services without touching tracked files. Faster (~2 min vs ~4-5
+ * min for full upgrade) and safer because no filesystem mutation outside
+ * docker — tenant content (mkdocs/, configs/) is implicitly preserved.
+ *
+ * Use this for releases that only bump container code or schema. For
+ * releases that change compose orchestration, nginx config, or other
+ * tracked files, use startUpgrade() instead.
+ *
+ * Remote-only for now: local mode would need a `runImageUpgrade` runner
+ * which we haven't built (all our instances are remote via mTLS agent).
+ */
+export async function startImageUpgrade(
+ instanceId: string,
+ userId: string,
+ ipAddress?: string,
+ options?: StartImageUpgradeOptions
+) {
+ const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
+ if (!instance) throw new Error('Instance not found');
+
+ if (!instance.isRemote) {
+ throw new Error('Image-only upgrade is currently supported only for remote instances');
+ }
+
+ if (instance.status !== InstanceStatus.RUNNING && instance.status !== InstanceStatus.STOPPED) {
+ throw new Error(`Cannot upgrade instance in ${instance.status} state`);
+ }
+
+ // Reuse the same in-progress guard as startUpgrade: only one upgrade
+ // (of either type) at a time per instance.
+ const active = await prisma.instanceUpgrade.findFirst({
+ where: {
+ instanceId,
+ status: { in: [UpgradeStatus.PENDING, UpgradeStatus.IN_PROGRESS] },
+ },
+ });
+ if (active) {
+ throw new Error('An upgrade is already in progress for this instance');
+ }
+
+ // Create upgrade record. branch is unused for image-only but keep it
+ // populated with current branch for audit trail consistency.
+ const upgrade = await prisma.instanceUpgrade.create({
+ data: {
+ instanceId,
+ status: UpgradeStatus.PENDING,
+ previousCommit: instance.gitCommit,
+ branch: instance.gitBranch,
+ triggeredById: userId,
+ },
+ });
+
+ // Audit log
+ await prisma.auditLog.create({
+ data: {
+ userId,
+ instanceId,
+ action: AuditAction.INSTANCE_UPGRADE,
+ details: {
+ upgradeId: upgrade.id,
+ previousCommit: instance.gitCommit,
+ source: 'remote',
+ mode: 'image-only',
+ options: options || {},
+ } as unknown as Prisma.InputJsonValue,
+ ipAddress,
+ },
+ });
+
+ // Fire-and-forget: reuse runRemoteUpgrade with mode='image-only'. Same
+ // poll loop and result handling — only the initial agent call differs.
+ runRemoteUpgrade(upgrade.id, instance, undefined, 'image-only', options).catch((err) => {
+ logger.error(`[image-upgrade] Remote image upgrade orchestration failed for ${instance.slug}: ${err}`);
+ });
+
+ return upgrade;
+}
+
/**
* Async REMOTE upgrade runner.
*
@@ -316,7 +400,9 @@ export async function startUpgrade(
async function runRemoteUpgrade(
upgradeId: string,
instance: Instance,
- options?: StartUpgradeOptions
+ options?: StartUpgradeOptions,
+ mode: 'full' | 'image-only' = 'full',
+ imageOnlyOptions?: StartImageUpgradeOptions
) {
const slug = instance.slug;
@@ -333,18 +419,27 @@ async function runRemoteUpgrade(
where: { id: upgradeId },
data: {
status: UpgradeStatus.IN_PROGRESS,
- progressMessage: 'Starting remote upgrade...',
+ progressMessage: mode === 'image-only'
+ ? 'Starting image-only upgrade...'
+ : 'Starting remote upgrade...',
},
});
// Tell the agent to start. The agent has its own mutex + stale-progress
// check, so this can return 409 if a previous upgrade is still running.
- logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
- await driver.startUpgrade({
- skipBackup: options?.skipBackup,
- useRegistry: options?.useRegistry,
- branch: options?.branch,
- });
+ if (mode === 'image-only') {
+ logger.info(`[upgrade] ${slug}: triggering remote image-upgrade.sh start`);
+ await driver.startImageUpgrade({
+ imageTag: imageOnlyOptions?.imageTag,
+ });
+ } else {
+ logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
+ await driver.startUpgrade({
+ skipBackup: options?.skipBackup,
+ useRegistry: options?.useRegistry,
+ branch: options?.branch,
+ });
+ }
// Poll progress + result. We treat /result returning 200 as the signal
// that upgrade.sh exited (successfully or with code != 0 — the script
diff --git a/scripts/image-upgrade.sh b/scripts/image-upgrade.sh
new file mode 100755
index 0000000..82c4140
--- /dev/null
+++ b/scripts/image-upgrade.sh
@@ -0,0 +1,383 @@
+#!/usr/bin/env bash
+# image-upgrade.sh — Approach B: image-only upgrade
+#
+# Pulls latest images from the registry and recreates services WITHOUT touching
+# tracked files in the install tree (no git pull, no tarball extract, no VERSION
+# mutation). Tenant content (mkdocs/, configs/) is implicitly safe because this
+# script never writes outside data/upgrade/ and the docker daemon.
+#
+# Used by CCP "Quick Upgrade" button. Pairs with scripts/upgrade.sh which
+# remains the full upgrade path for orchestration-changing releases.
+#
+# Schema parity: writes data/upgrade/progress.json + result.json with the same
+# fields upgrade.sh writes, so the CCP poll loop is unchanged.
+
+set -euo pipefail
+
+PROJECT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)"
+SCRIPT_DIR="$PROJECT_DIR/scripts"
+UPGRADE_DIR="$PROJECT_DIR/data/upgrade"
+LOG_DIR="$PROJECT_DIR/logs"
+LOG_FILE="$LOG_DIR/image-upgrade-$(date +%Y%m%d_%H%M%S).log"
+LOCK_FILE="$PROJECT_DIR/.upgrade.lock"
+PROGRESS_FILE="$UPGRADE_DIR/progress.json"
+RESULT_FILE="$UPGRADE_DIR/result.json"
+
+START_TIME=$SECONDS
+
+# --- Detect install mode ---
+if [[ -f "$PROJECT_DIR/VERSION" ]] && [[ ! -d "$PROJECT_DIR/.git" ]]; then
+ INSTALL_MODE="release"
+else
+ INSTALL_MODE="source"
+fi
+
+# --- Defaults ---
+API_MODE=false
+DRY_RUN=false
+IMAGE_TAG=""
+
+usage() {
+ cat <&2; usage >&2; exit 1 ;;
+ esac
+done
+
+# --- Colors ---
+if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
+ RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
+ CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m'
+else
+ RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
+fi
+info() { echo -e "${CYAN}[INFO]${NC} $*"; }
+success() { echo -e "${GREEN}[ OK ]${NC} $*"; }
+warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
+error() { echo -e "${RED}[ERR ]${NC} $*" >&2; }
+phase() { echo ""; echo -e "${BOLD}${CYAN}=== Phase $1: $2 ===${NC}"; }
+
+# --- Logging: mirror stdout/stderr to LOG_FILE ---
+# logs/ may be root-owned on installs where upgrade.sh has run via ccp-agent.
+# Fall back to /tmp if we can't write, so bunker-admin manual invocations don't
+# crash with "Permission denied" on tee.
+mkdir -p "$UPGRADE_DIR"
+if mkdir -p "$LOG_DIR" 2>/dev/null && touch "$LOG_FILE" 2>/dev/null; then
+ : # primary log location is writable
+else
+ LOG_FILE="/tmp/image-upgrade-$(date +%Y%m%d_%H%M%S)-$$.log"
+ echo "[INFO] logs/ not writable; using $LOG_FILE" >&2
+fi
+exec > >(tee -a "$LOG_FILE") 2>&1
+
+# --- Capture previous version for result.json ---
+if [[ "$INSTALL_MODE" == "release" ]]; then
+ PRE_VERSION="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")"
+else
+ PRE_VERSION="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
+fi
+
+write_progress() {
+ local phase_num="$1" phase_name="$2" pct="$3" msg="$4"
+ [[ "$API_MODE" != "true" ]] && return
+ mkdir -p "$UPGRADE_DIR"
+ cat > "$PROGRESS_FILE" </dev/null || echo "$PRE_VERSION")"
+ else
+ new_version="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "$PRE_VERSION")"
+ fi
+ mkdir -p "$UPGRADE_DIR"
+ cat > "$RESULT_FILE" </dev/null || echo "")"
+ if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
+ error "Upgrade already running (pid $pid). Refusing to start."
+ write_result "false" "Another upgrade is already running (pid $pid)"
+ exit 1
+ fi
+ warn "Stale lock file found; removing"
+ rm -f "$LOCK_FILE"
+ fi
+ echo $$ > "$LOCK_FILE"
+}
+
+release_lock() { rm -f "$LOCK_FILE" || true; }
+
+on_failure() {
+ local exit_code=$?
+ local line_no=${1:-?}
+ error "image-upgrade.sh failed at line $line_no (exit $exit_code)"
+ write_result "false" "Image upgrade failed at line $line_no (exit $exit_code)"
+ release_lock
+ exit "$exit_code"
+}
+trap 'on_failure $LINENO' ERR
+trap 'release_lock' EXIT
+
+# --- Banner ---
+echo ""
+echo -e "${BOLD}${CYAN}================================================${NC}"
+echo -e "${BOLD} Image-Only Upgrade${NC}"
+echo -e "${BOLD}${CYAN}================================================${NC}"
+echo "Install mode: $INSTALL_MODE"
+echo "Project dir: $PROJECT_DIR"
+echo "Pre-version: $PRE_VERSION"
+[[ -n "$IMAGE_TAG" ]] && echo "Image tag: $IMAGE_TAG"
+[[ "$DRY_RUN" == "true" ]] && echo "DRY RUN: no images will be pulled or services recreated"
+echo ""
+
+acquire_lock
+
+# =============================================================================
+# Phase 1: Pre-flight + mkdocs snapshot (defensive)
+# =============================================================================
+phase "1" "Pre-flight"
+write_progress 1 "Pre-flight" 10 "Snapshotting mkdocs (defensive)..."
+
+# Source mkdocs-snapshot.sh and run it. This is the same snapshot every
+# upgrade path takes — leaves mkdocs-backup-.tar.gz in project root.
+# Image-only upgrades shouldn't damage mkdocs (no filesystem mutation), but
+# the snapshot is cheap insurance and keeps operator habits consistent.
+if [[ -r "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "[DRY RUN] Would snapshot mkdocs/"
+ else
+ # shellcheck disable=SC1091
+ PROJECT_DIR="$PROJECT_DIR" bash -c ". $SCRIPT_DIR/lib/mkdocs-snapshot.sh; snapshot_mkdocs" \
+ || warn "mkdocs snapshot failed (non-fatal; continuing)"
+ fi
+else
+ warn "scripts/lib/mkdocs-snapshot.sh not found; skipping snapshot"
+fi
+
+# Sanity-check docker
+if ! docker compose version &>/dev/null; then
+ error "docker compose is not available"
+ write_result "false" "docker compose not available"
+ exit 1
+fi
+success "Pre-flight checks passed"
+
+# =============================================================================
+# Phase 2: Pull images
+# =============================================================================
+phase "2" "Pull Images"
+write_progress 2 "Pull Images" 30 "Pulling images from registry..."
+
+PULL_ENV=()
+if [[ -n "$IMAGE_TAG" ]]; then
+ PULL_ENV+=("IMAGE_TAG=$IMAGE_TAG")
+fi
+
+if [[ "$DRY_RUN" == "true" ]]; then
+ info "[DRY RUN] Would run: ${PULL_ENV[*]:-} docker compose pull"
+else
+ info "Pulling all images (this may take a few minutes)..."
+ if (( ${#PULL_ENV[@]} > 0 )); then
+ if ! env "${PULL_ENV[@]}" docker compose pull; then
+ warn "docker compose pull had errors (continuing — some images may be local)"
+ fi
+ else
+ if ! docker compose pull; then
+ warn "docker compose pull had errors (continuing — some images may be local)"
+ fi
+ fi
+fi
+success "Image pull complete"
+
+# =============================================================================
+# Phase 3: Recreate core app services (targeted, not broad)
+# =============================================================================
+phase "3" "Recreate Services"
+write_progress 3 "Recreate Services" 60 "Recreating core app services with new images..."
+
+# Targeted recreate: only the services whose IMAGES are released as part of
+# changemaker.lite (api, admin, media-api, nginx). Broader `up -d` is risky
+# because a single misconfigured mount in any service (e.g. mkdocs-site-server)
+# can cascade and leave dependent containers in "Created" state. Image-only
+# upgrade should only touch the actual code containers, not third-party
+# infrastructure that happens to live in the same compose file.
+#
+# Same Phase 6 pattern as upgrade.sh: drop ccp-agent from COMPOSE_PROFILES
+# during recreate so we don't suicide-restart the agent that spawned us.
+# Restart ccp-agent at the end via detached subshell.
+PROFILES_SAVED="${COMPOSE_PROFILES:-}"
+COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
+ | tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
+
+UP_ENV=("COMPOSE_PROFILES=${COMPOSE_PROFILES_WITHOUT_AGENT}")
+if [[ -n "$IMAGE_TAG" ]]; then
+ UP_ENV+=("IMAGE_TAG=$IMAGE_TAG")
+fi
+
+# Core services that ship as v2 release images. nginx last so it doesn't
+# briefly proxy to an old api. media-api may not be enabled on all installs;
+# tolerate it being missing from compose.
+CORE_SERVICES=(api admin media-api nginx)
+EXISTING_SERVICES=()
+# Capture the service list once. Don't pipe `docker compose config` into
+# `grep -q` directly: with `set -o pipefail`, grep exits early on match and
+# SIGPIPEs the docker writer, making the pipeline exit non-zero. The grep -q
+# would then "match" all services as missing. Capture-then-check avoids it.
+COMPOSE_SERVICES_LIST="$(docker compose config --services 2>/dev/null || true)"
+for svc in "${CORE_SERVICES[@]}"; do
+ if grep -qx -- "$svc" <<<"$COMPOSE_SERVICES_LIST"; then
+ EXISTING_SERVICES+=("$svc")
+ else
+ info "Skipping service '$svc' (not in compose file)"
+ fi
+done
+
+if (( ${#EXISTING_SERVICES[@]} == 0 )); then
+ warn "No core app services found in compose; skipping recreate"
+elif [[ "$DRY_RUN" == "true" ]]; then
+ info "[DRY RUN] Would run: ${UP_ENV[*]} docker compose up -d ${EXISTING_SERVICES[*]}"
+else
+ info "Recreating core services: ${EXISTING_SERVICES[*]}"
+ env "${UP_ENV[@]}" docker compose up -d "${EXISTING_SERVICES[@]}"
+fi
+success "Services recreated"
+
+# Restart Pangolin tunnel connector if running (image may have changed)
+if docker ps --format '{{.Names}}' | grep -q 'newt'; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "[DRY RUN] Would restart newt"
+ else
+ info "Restarting Pangolin tunnel connector..."
+ docker compose restart newt 2>/dev/null || true
+ success "Newt tunnel restarted"
+ fi
+fi
+
+# =============================================================================
+# Phase 4: Verify (light health checks)
+# =============================================================================
+phase "4" "Verification"
+write_progress 4 "Verification" 85 "Running health checks..."
+
+VERIFY_FAILED=false
+UPGRADE_WARNINGS="[]"
+
+verify_health() {
+ local name="$1" check_cmd="$2" max_wait="${3:-45}"
+ local waited=0
+ while [[ $waited -lt $max_wait ]]; do
+ if eval "$check_cmd" 2>/dev/null; then
+ success "$name: healthy (${waited}s)"
+ return 0
+ fi
+ sleep 3
+ waited=$((waited + 3))
+ done
+ warn "$name: not responding after ${max_wait}s"
+ VERIFY_FAILED=true
+ return 0
+}
+
+if [[ "$DRY_RUN" != "true" ]]; then
+ verify_health "API (port 4000)" \
+ "docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 60
+ verify_health "Admin (port 3000)" \
+ "docker compose exec -T admin wget -q --spider http://localhost:3000/" 90
+ if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
+ verify_health "Media API (port 4100)" \
+ "docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
+ fi
+
+ if "$VERIFY_FAILED"; then
+ UPGRADE_WARNINGS='["Some health checks failed after image-only upgrade — services may still be starting"]'
+ fi
+fi
+
+# =============================================================================
+# Summary + deferred ccp-agent restart
+# =============================================================================
+ELAPSED_MIN=$(( (SECONDS - START_TIME) / 60 ))
+ELAPSED_SEC=$(( (SECONDS - START_TIME) % 60 ))
+echo ""
+echo -e "${BOLD}${GREEN}================================================${NC}"
+echo -e "${BOLD} Image-Only Upgrade Complete${NC}"
+echo -e "${BOLD}${GREEN}================================================${NC}"
+printf " Previous: %s\n" "$PRE_VERSION"
+printf " Duration: %dm %ds\n" "$ELAPSED_MIN" "$ELAPSED_SEC"
+printf " Log: %s\n" "$LOG_FILE"
+
+write_progress 4 "Complete" 100 "Image-only upgrade complete"
+write_result "true" "Image-only upgrade complete (previous: ${PRE_VERSION})" "$UPGRADE_WARNINGS"
+
+# Deferred ccp-agent restart — see upgrade.sh for full rationale. Same
+# mechanism: nohup'd, disowned subshell that picks up the new image after
+# this script has cleanly exited.
+if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
+ if [[ "$DRY_RUN" == "true" ]]; then
+ info "[DRY RUN] Would schedule deferred ccp-agent restart"
+ else
+ info "Scheduling deferred ccp-agent restart..."
+ nohup bash -c "
+ sleep 3
+ cd '$PROJECT_DIR'
+ COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
+ " >/dev/null 2>&1 < /dev/null &
+ disown
+ success "ccp-agent restart scheduled (will pick up new image)"
+ fi
+fi
+
+release_lock
+trap - EXIT
+exit 0