From a82e95946b3d873a40e0f930938a0d2e1e019a6f Mon Sep 17 00:00:00 2001 From: bunker-admin Date: Tue, 19 May 2026 17:02:55 -0600 Subject: [PATCH] fix(gancio): pre-start config-init sidecar prevents restart loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gancio refuses to start when its DB has tables but the data volume has no config.json ("Non empty db! Please move your current db elsewhere than retry"), which produces an infinite restart loop. This hit production tenants bnkops and trbh (>1200 restart cycles each) — proximate cause was a missing config.json in changemakerlite_gancio-data with the DB fully populated. Add gancio-config-init alpine sidecar that runs on every `up`: - no-op when config.json exists - regenerates from .env when missing (1000:1000 ownership) - gancio service now depends on its service_completed_successfully Also harden verify_gancio_config in upgrade.sh to error loudly when multiple gancio-data volumes match (silent head -1 could pick the wrong one after a compose project rename). --- docker-compose.prod.yml | 35 +++++++++++++++++++++++++++++++++++ docker-compose.yml | 36 ++++++++++++++++++++++++++++++++++++ scripts/upgrade.sh | 17 ++++++++++++++--- 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index fe967f0..6f860c0 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -976,6 +976,39 @@ services: retries: 10 start_period: 30s + # Gancio Config Init — Writes /home/node/data/config.json from .env if missing. + # Gancio refuses to start when its DB has tables but the data volume has no + # config.json ("Non empty db! Please move your current db elsewhere than retry"), + # which causes an infinite restart loop. This sidecar runs on every `up` and is + # a no-op when config.json is already present. See docker-compose.yml for the + # full rationale; the two files must stay in parity per scripts/validate-compose-parity.sh. + gancio-config-init: + image: ${GITEA_REGISTRY:-gitea.bnkops.com/admin}/alpine:3 + container_name: gancio-config-init + restart: "no" + volumes: + - gancio-data:/data + environment: + - GANCIO_BASE_URL=${GANCIO_BASE_URL:-https://events.cmlite.org} + - V2_POSTGRES_USER=${V2_POSTGRES_USER:-changemaker} + - V2_POSTGRES_PASSWORD=${V2_POSTGRES_PASSWORD:?V2_POSTGRES_PASSWORD must be set in .env} + entrypoint: ["sh", "-c"] + command: + - | + set -e + if [ -s /data/config.json ]; then + echo "Gancio config.json present — skipping" + exit 0 + fi + echo "Gancio config.json missing — regenerating from .env" + printf '{"baseurl":"%s","server":{"host":"0.0.0.0","port":13120},"db":{"dialect":"postgres","host":"changemaker-v2-postgres","port":5432,"database":"gancio","username":"%s","password":"%s"}}' \ + "$$GANCIO_BASE_URL" "$$V2_POSTGRES_USER" "$$V2_POSTGRES_PASSWORD" > /data/config.json + chown 1000:1000 /data/config.json + echo "Gancio config.json regenerated" + logging: *default-logging + networks: + - changemaker-lite + # Gancio — Event management platform (uses shared PostgreSQL) gancio: image: ${GITEA_REGISTRY:-gitea.bnkops.com/admin}/gancio:1.28.2 @@ -984,6 +1017,8 @@ services: depends_on: v2-postgres: condition: service_healthy + gancio-config-init: + condition: service_completed_successfully ports: - "127.0.0.1:${GANCIO_PORT:-8092}:13120" healthcheck: diff --git a/docker-compose.yml b/docker-compose.yml index b843fff..a60f3a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -998,6 +998,40 @@ services: start_period: 30s # Gancio — Event management platform (uses shared PostgreSQL) + # Gancio Config Init — Writes /home/node/data/config.json from .env if missing. + # Gancio refuses to start when its DB has tables but the data volume has no + # config.json ("Non empty db! Please move your current db elsewhere than retry"), + # which causes an infinite restart loop. This sidecar runs on every `up` and is + # a no-op when config.json is already present. Reversible: removing this + # service has no effect on healthy stacks; it only matters when the volume + # loses config.json (volume rename, partial restore, manual volume rm, etc.). + gancio-config-init: + image: alpine:3 + container_name: gancio-config-init + restart: "no" + volumes: + - gancio-data:/data + environment: + - GANCIO_BASE_URL=${GANCIO_BASE_URL:-https://events.cmlite.org} + - V2_POSTGRES_USER=${V2_POSTGRES_USER:-changemaker} + - V2_POSTGRES_PASSWORD=${V2_POSTGRES_PASSWORD:?V2_POSTGRES_PASSWORD must be set in .env} + entrypoint: ["sh", "-c"] + command: + - | + set -e + if [ -s /data/config.json ]; then + echo "Gancio config.json present — skipping" + exit 0 + fi + echo "Gancio config.json missing — regenerating from .env" + printf '{"baseurl":"%s","server":{"host":"0.0.0.0","port":13120},"db":{"dialect":"postgres","host":"changemaker-v2-postgres","port":5432,"database":"gancio","username":"%s","password":"%s"}}' \ + "$$GANCIO_BASE_URL" "$$V2_POSTGRES_USER" "$$V2_POSTGRES_PASSWORD" > /data/config.json + chown 1000:1000 /data/config.json + echo "Gancio config.json regenerated" + logging: *default-logging + networks: + - changemaker-lite + gancio: image: cisti/gancio:1.28.2 container_name: gancio-changemaker @@ -1005,6 +1039,8 @@ services: depends_on: v2-postgres: condition: service_healthy + gancio-config-init: + condition: service_completed_successfully ports: - "127.0.0.1:${GANCIO_PORT:-8092}:13120" healthcheck: diff --git a/scripts/upgrade.sh b/scripts/upgrade.sh index 0f2009d..8e2b95e 100755 --- a/scripts/upgrade.sh +++ b/scripts/upgrade.sh @@ -188,11 +188,22 @@ restore_user_paths() { # "Non empty db! Please move your current db elsewhere than retry." # This regenerates config.json from .env vars when missing. verify_gancio_config() { - local gancio_volume - gancio_volume="$(docker volume ls --format '{{.Name}}' | grep 'gancio-data' | head -1 || true)" - if [[ -z "$gancio_volume" ]]; then + # Note: as of the gancio-config-init sidecar in docker-compose{,prod}.yml, + # config.json is regenerated automatically on every `up`. This function is + # kept as belt-and-braces for the upgrade flow specifically (e.g. so the + # check happens before the compose-up rather than at compose-up time, and + # so operators see explicit log output during upgrade). + local matches + matches="$(docker volume ls --format '{{.Name}}' | grep 'gancio-data' || true)" + local count + count=$(printf '%s\n' "$matches" | grep -c '.' || true) + if [[ "$count" -eq 0 ]]; then return # No gancio volume exists yet; first run will handle it fi + if [[ "$count" -gt 1 ]]; then + error "Multiple gancio-data volumes found — refusing to guess. Resolve manually:\n$matches" + fi + local gancio_volume="$matches" # Check if config.json exists and is non-empty if docker run --rm -v "${gancio_volume}:/data" alpine test -s /data/config.json 2>/dev/null; then