Add database migration phase and stale volume detection to upgrade script

Inserts Phase 5 (Database Migration) between container rebuild and service
restart. Detects failed/incomplete Prisma migrations via _prisma_migrations
query and auto-resolves them before running migrate deploy in a one-off
container — catching errors in the script rather than letting the API enter
a restart loop. Also detects when package.json/package-lock.json changed
and removes old API/admin containers to prevent stale anonymous volumes
from shadowing updated node_modules.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
admin 2026-03-09 12:19:53 -06:00
parent ef11f94e76
commit b061e2ce61

View File

@ -749,28 +749,11 @@ if [[ "$PULL_SERVICES" == "true" ]]; then
fi
# =============================================================================
# Phase 5: Service Restart
# Phase 5: Database Migration
# =============================================================================
phase "5" "Service Restart"
write_progress 5 "Service Restart" 70 "Restarting services..."
# Stop application containers
info "Stopping application containers..."
docker compose stop $APP_CONTAINERS 2>/dev/null || true
success "Application containers stopped"
# Force-recreate LSIO containers to prevent anonymous volume shadowing bind mounts.
# LSIO images define a VOLUME at /config in their Dockerfile. When a container is
# merely restarted, Docker reuses the old anonymous volume whose /config/www is empty,
# which shadows the bind mount (e.g., ./mkdocs/site:/config/www → 403 Forbidden).
# Removing the container first ensures a fresh anonymous volume that respects bind mounts.
info "Removing LSIO containers (clearing anonymous volumes)..."
docker compose rm -sf $LSIO_VOLUME_CONTAINERS 2>/dev/null || true
success "LSIO containers cleared for fresh recreation"
# Verify Gancio config.json exists before starting services
verify_gancio_config
phase "5" "Database Migration"
write_progress 5 "Database Migration" 55 "Checking database state..."
# Ensure infrastructure is running and healthy
info "Ensuring infrastructure is up..."
@ -790,8 +773,101 @@ while ! docker compose exec -T v2-postgres pg_isready -U "${V2_POSTGRES_USER:-ch
done
success "PostgreSQL ready (${PG_WAIT}s)"
# Start API first (entrypoint runs prisma db push + seed)
info "Starting API (migrations will auto-apply)..."
# Check for failed/incomplete migrations
info "Checking for failed migrations..."
FAILED_MIGRATIONS="$(docker compose exec -T v2-postgres psql -U "${V2_POSTGRES_USER:-changemaker}" -d "${V2_POSTGRES_DB:-changemaker_v2}" -t -A -c "
SELECT migration_name FROM _prisma_migrations
WHERE rolled_back_at IS NOT NULL
OR (finished_at IS NULL AND started_at IS NOT NULL
AND started_at < NOW() - INTERVAL '10 minutes')
" 2>/dev/null || true)"
if [[ -n "$FAILED_MIGRATIONS" ]]; then
warn "Found failed/incomplete migrations — auto-resolving..."
while IFS= read -r migration_name; do
[[ -z "$migration_name" ]] && continue
info " Resolving: $migration_name"
docker compose run --rm --no-deps --entrypoint "" api \
npx prisma migrate resolve --applied "$migration_name" 2>&1 || {
warn " Could not auto-resolve $migration_name (may need manual intervention)"
}
done <<< "$FAILED_MIGRATIONS"
success "Failed migrations resolved"
else
success "No failed migrations found"
fi
# Run migrations in a one-off container (catches errors here, not in a restart loop)
info "Running database migrations..."
write_progress 5 "Database Migration" 60 "Applying migrations..."
if ! docker compose run --rm --no-deps --entrypoint "" api \
npx prisma migrate deploy 2>&1; then
error "Database migration failed!"
error ""
error "Common fixes:"
error " 1. Check migration status:"
error " docker compose exec v2-postgres psql -U changemaker -d changemaker_v2 \\"
error " -c \"SELECT migration_name, finished_at, rolled_back_at FROM _prisma_migrations ORDER BY started_at DESC LIMIT 10;\""
error " 2. Mark a stuck migration as applied:"
error " docker compose run --rm --no-deps --entrypoint '' api npx prisma migrate resolve --applied <migration_name>"
error " 3. Check logs: docker compose logs api --tail 50"
error ""
error "After fixing, re-run: ./scripts/upgrade.sh --force --skip-backup"
exit 1
fi
# Count applied migrations
MIGRATION_COUNT="$(docker compose exec -T v2-postgres psql -U "${V2_POSTGRES_USER:-changemaker}" -d "${V2_POSTGRES_DB:-changemaker_v2}" -t -A -c "
SELECT COUNT(*) FROM _prisma_migrations WHERE finished_at IS NOT NULL
" 2>/dev/null || echo "?")"
success "Migrations up to date ($MIGRATION_COUNT total applied)"
# Run database seed (idempotent)
info "Running database seed..."
write_progress 5 "Database Migration" 65 "Seeding database..."
if ! docker compose run --rm --no-deps --entrypoint "" api \
npx prisma db seed 2>&1; then
warn "Database seed had warnings (non-fatal, continuing)"
fi
success "Database seed complete"
# =============================================================================
# Phase 6: Service Restart
# =============================================================================
phase "6" "Service Restart"
write_progress 6 "Service Restart" 70 "Restarting services..."
# Stop application containers
info "Stopping application containers..."
docker compose stop $APP_CONTAINERS 2>/dev/null || true
success "Application containers stopped"
# Force-recreate LSIO containers to prevent anonymous volume shadowing bind mounts.
# LSIO images define a VOLUME at /config in their Dockerfile. When a container is
# merely restarted, Docker reuses the old anonymous volume whose /config/www is empty,
# which shadows the bind mount (e.g., ./mkdocs/site:/config/www → 403 Forbidden).
# Removing the container first ensures a fresh anonymous volume that respects bind mounts.
info "Removing LSIO containers (clearing anonymous volumes)..."
docker compose rm -sf $LSIO_VOLUME_CONTAINERS 2>/dev/null || true
success "LSIO containers cleared for fresh recreation"
# Verify Gancio config.json exists before starting services
verify_gancio_config
# Detect if npm dependencies changed (stale anonymous volumes cause missing modules)
NEEDS_VOLUME_REFRESH=false
if echo "$CHANGED_FILES" | grep -qE "^(api|admin)/(package\.json|package-lock\.json)"; then
NEEDS_VOLUME_REFRESH=true
warn "Package dependencies changed — will recreate containers with fresh volumes"
fi
# Start API (migrations already applied in Phase 5)
info "Starting API..."
if [[ "$NEEDS_VOLUME_REFRESH" == "true" ]]; then
info "Removing old API/admin containers (clearing stale node_modules volumes)..."
docker compose rm -sf api admin 2>/dev/null || true
fi
docker compose up -d api
# Poll API health check
@ -840,11 +916,11 @@ if [[ "$MONITORING_WAS_RUNNING" == "true" ]]; then
fi
# =============================================================================
# Phase 6: Post-Upgrade Verification
# Phase 7: Post-Upgrade Verification
# =============================================================================
phase "6" "Post-Upgrade Verification"
write_progress 6 "Verification" 90 "Running health checks..."
phase "7" "Post-Upgrade Verification"
write_progress 7 "Verification" 90 "Running health checks..."
VERIFY_FAILED=false
@ -924,7 +1000,7 @@ if [[ "$VERIFY_FAILED" == "true" ]]; then
UPGRADE_WARNINGS='["Some health checks failed after upgrade — services may still be starting"]'
fi
write_progress 6 "Verification" 100 "Upgrade complete!"
write_progress 7 "Verification" 100 "Upgrade complete!"
write_result "true" "Upgraded ${PRE_UPGRADE_SHORT}${FINAL_COMMIT} (${COMMIT_COUNT} commits)" "$UPGRADE_WARNINGS"
echo ""