diff --git a/scripts/upgrade.sh b/scripts/upgrade.sh index b4e3e43f..ea597dd1 100755 --- a/scripts/upgrade.sh +++ b/scripts/upgrade.sh @@ -749,28 +749,11 @@ if [[ "$PULL_SERVICES" == "true" ]]; then fi # ============================================================================= -# Phase 5: Service Restart +# Phase 5: Database Migration # ============================================================================= -phase "5" "Service Restart" -write_progress 5 "Service Restart" 70 "Restarting services..." - -# Stop application containers -info "Stopping application containers..." -docker compose stop $APP_CONTAINERS 2>/dev/null || true -success "Application containers stopped" - -# Force-recreate LSIO containers to prevent anonymous volume shadowing bind mounts. -# LSIO images define a VOLUME at /config in their Dockerfile. When a container is -# merely restarted, Docker reuses the old anonymous volume whose /config/www is empty, -# which shadows the bind mount (e.g., ./mkdocs/site:/config/www → 403 Forbidden). -# Removing the container first ensures a fresh anonymous volume that respects bind mounts. -info "Removing LSIO containers (clearing anonymous volumes)..." -docker compose rm -sf $LSIO_VOLUME_CONTAINERS 2>/dev/null || true -success "LSIO containers cleared for fresh recreation" - -# Verify Gancio config.json exists before starting services -verify_gancio_config +phase "5" "Database Migration" +write_progress 5 "Database Migration" 55 "Checking database state..." # Ensure infrastructure is running and healthy info "Ensuring infrastructure is up..." @@ -790,8 +773,101 @@ while ! docker compose exec -T v2-postgres pg_isready -U "${V2_POSTGRES_USER:-ch done success "PostgreSQL ready (${PG_WAIT}s)" -# Start API first (entrypoint runs prisma db push + seed) -info "Starting API (migrations will auto-apply)..." +# Check for failed/incomplete migrations +info "Checking for failed migrations..." +FAILED_MIGRATIONS="$(docker compose exec -T v2-postgres psql -U "${V2_POSTGRES_USER:-changemaker}" -d "${V2_POSTGRES_DB:-changemaker_v2}" -t -A -c " + SELECT migration_name FROM _prisma_migrations + WHERE rolled_back_at IS NOT NULL + OR (finished_at IS NULL AND started_at IS NOT NULL + AND started_at < NOW() - INTERVAL '10 minutes') +" 2>/dev/null || true)" + +if [[ -n "$FAILED_MIGRATIONS" ]]; then + warn "Found failed/incomplete migrations — auto-resolving..." + while IFS= read -r migration_name; do + [[ -z "$migration_name" ]] && continue + info " Resolving: $migration_name" + docker compose run --rm --no-deps --entrypoint "" api \ + npx prisma migrate resolve --applied "$migration_name" 2>&1 || { + warn " Could not auto-resolve $migration_name (may need manual intervention)" + } + done <<< "$FAILED_MIGRATIONS" + success "Failed migrations resolved" +else + success "No failed migrations found" +fi + +# Run migrations in a one-off container (catches errors here, not in a restart loop) +info "Running database migrations..." +write_progress 5 "Database Migration" 60 "Applying migrations..." +if ! docker compose run --rm --no-deps --entrypoint "" api \ + npx prisma migrate deploy 2>&1; then + error "Database migration failed!" + error "" + error "Common fixes:" + error " 1. Check migration status:" + error " docker compose exec v2-postgres psql -U changemaker -d changemaker_v2 \\" + error " -c \"SELECT migration_name, finished_at, rolled_back_at FROM _prisma_migrations ORDER BY started_at DESC LIMIT 10;\"" + error " 2. Mark a stuck migration as applied:" + error " docker compose run --rm --no-deps --entrypoint '' api npx prisma migrate resolve --applied " + error " 3. Check logs: docker compose logs api --tail 50" + error "" + error "After fixing, re-run: ./scripts/upgrade.sh --force --skip-backup" + exit 1 +fi + +# Count applied migrations +MIGRATION_COUNT="$(docker compose exec -T v2-postgres psql -U "${V2_POSTGRES_USER:-changemaker}" -d "${V2_POSTGRES_DB:-changemaker_v2}" -t -A -c " + SELECT COUNT(*) FROM _prisma_migrations WHERE finished_at IS NOT NULL +" 2>/dev/null || echo "?")" +success "Migrations up to date ($MIGRATION_COUNT total applied)" + +# Run database seed (idempotent) +info "Running database seed..." +write_progress 5 "Database Migration" 65 "Seeding database..." +if ! docker compose run --rm --no-deps --entrypoint "" api \ + npx prisma db seed 2>&1; then + warn "Database seed had warnings (non-fatal, continuing)" +fi +success "Database seed complete" + +# ============================================================================= +# Phase 6: Service Restart +# ============================================================================= + +phase "6" "Service Restart" +write_progress 6 "Service Restart" 70 "Restarting services..." + +# Stop application containers +info "Stopping application containers..." +docker compose stop $APP_CONTAINERS 2>/dev/null || true +success "Application containers stopped" + +# Force-recreate LSIO containers to prevent anonymous volume shadowing bind mounts. +# LSIO images define a VOLUME at /config in their Dockerfile. When a container is +# merely restarted, Docker reuses the old anonymous volume whose /config/www is empty, +# which shadows the bind mount (e.g., ./mkdocs/site:/config/www → 403 Forbidden). +# Removing the container first ensures a fresh anonymous volume that respects bind mounts. +info "Removing LSIO containers (clearing anonymous volumes)..." +docker compose rm -sf $LSIO_VOLUME_CONTAINERS 2>/dev/null || true +success "LSIO containers cleared for fresh recreation" + +# Verify Gancio config.json exists before starting services +verify_gancio_config + +# Detect if npm dependencies changed (stale anonymous volumes cause missing modules) +NEEDS_VOLUME_REFRESH=false +if echo "$CHANGED_FILES" | grep -qE "^(api|admin)/(package\.json|package-lock\.json)"; then + NEEDS_VOLUME_REFRESH=true + warn "Package dependencies changed — will recreate containers with fresh volumes" +fi + +# Start API (migrations already applied in Phase 5) +info "Starting API..." +if [[ "$NEEDS_VOLUME_REFRESH" == "true" ]]; then + info "Removing old API/admin containers (clearing stale node_modules volumes)..." + docker compose rm -sf api admin 2>/dev/null || true +fi docker compose up -d api # Poll API health check @@ -840,11 +916,11 @@ if [[ "$MONITORING_WAS_RUNNING" == "true" ]]; then fi # ============================================================================= -# Phase 6: Post-Upgrade Verification +# Phase 7: Post-Upgrade Verification # ============================================================================= -phase "6" "Post-Upgrade Verification" -write_progress 6 "Verification" 90 "Running health checks..." +phase "7" "Post-Upgrade Verification" +write_progress 7 "Verification" 90 "Running health checks..." VERIFY_FAILED=false @@ -924,7 +1000,7 @@ if [[ "$VERIFY_FAILED" == "true" ]]; then UPGRADE_WARNINGS='["Some health checks failed after upgrade — services may still be starting"]' fi -write_progress 6 "Verification" 100 "Upgrade complete!" +write_progress 7 "Verification" 100 "Upgrade complete!" write_result "true" "Upgraded ${PRE_UPGRADE_SHORT} → ${FINAL_COMMIT} (${COMMIT_COUNT} commits)" "$UPGRADE_WARNINGS" echo ""