Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f34382ebdd | |||
| 4a3d9d7c41 | |||
| 731e70ee42 | |||
| a7d3dd772b | |||
| 9613c3ec81 | |||
| e88ac79ae8 | |||
| 1b80e8294c | |||
| a531f9b9ce | |||
| a82e95946b |
5
.gitignore
vendored
5
.gitignore
vendored
@ -64,6 +64,11 @@ core.*
|
|||||||
/backups/
|
/backups/
|
||||||
.upgrade.lock
|
.upgrade.lock
|
||||||
|
|
||||||
|
# Pre-upgrade mkdocs snapshots (created by scripts/lib/mkdocs-snapshot.sh).
|
||||||
|
# These are the tenant-content rescue archives written before every upgrade;
|
||||||
|
# discoverable in the install root via `ls`. Retention: last 5 (see helper).
|
||||||
|
/mkdocs-backup-*.tar.gz
|
||||||
|
|
||||||
# Release tarballs (generated by build-release.sh)
|
# Release tarballs (generated by build-release.sh)
|
||||||
/releases/
|
/releases/
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,7 @@ import {
|
|||||||
CloudOutlined,
|
CloudOutlined,
|
||||||
DisconnectOutlined,
|
DisconnectOutlined,
|
||||||
UploadOutlined,
|
UploadOutlined,
|
||||||
|
ThunderboltOutlined,
|
||||||
BellOutlined,
|
BellOutlined,
|
||||||
CheckCircleOutlined,
|
CheckCircleOutlined,
|
||||||
WarningOutlined,
|
WarningOutlined,
|
||||||
@ -563,6 +564,24 @@ export default function InstanceDetailPage() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Image-only upgrade (Approach B): pulls images + recreates core app services
|
||||||
|
// without touching tracked files. Faster + safer than full upgrade for releases
|
||||||
|
// that don't change compose/templates.
|
||||||
|
const handleStartImageUpgrade = async () => {
|
||||||
|
setUpgradingInstance(true);
|
||||||
|
try {
|
||||||
|
const { data } = await api.post(`/instances/${id}/upgrade-images`, {});
|
||||||
|
setCurrentUpgrade(data.data);
|
||||||
|
message.success('Image-only upgrade started');
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const resp = (err as { response?: { data?: { error?: { message?: string } } } })?.response
|
||||||
|
?.data?.error;
|
||||||
|
message.error(resp?.message || 'Failed to start image-only upgrade');
|
||||||
|
} finally {
|
||||||
|
setUpgradingInstance(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Event handlers
|
// Event handlers
|
||||||
const handleAcknowledgeEvent = async (eventId: string) => {
|
const handleAcknowledgeEvent = async (eventId: string) => {
|
||||||
try {
|
try {
|
||||||
@ -1632,25 +1651,41 @@ export default function InstanceDetailPage() {
|
|||||||
closable
|
closable
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', gap: 16 }}>
|
||||||
<Typography.Text type="secondary">
|
<Typography.Text type="secondary" style={{ flex: 1 }}>
|
||||||
Pulls latest code, runs migrations, and restarts services. CCP backup is recommended before upgrading.
|
Full upgrade pulls the latest code, runs migrations, and restarts services. Quick upgrade only pulls images and recreates the core app — tenant content stays untouched and it's ~2 min faster. Use Quick when the release notes say no orchestration changes.
|
||||||
</Typography.Text>
|
</Typography.Text>
|
||||||
<Popconfirm
|
<Space>
|
||||||
title="Start upgrade?"
|
<Popconfirm
|
||||||
description="This will pull the latest code, run database migrations, and restart all services. Brief downtime is expected."
|
title="Start quick (image-only) upgrade?"
|
||||||
onConfirm={handleStartUpgrade}
|
description="Pulls new container images and recreates the API/Admin/Media/Nginx services. No filesystem changes — mkdocs and configs are not touched. Brief downtime is expected."
|
||||||
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
onConfirm={handleStartImageUpgrade}
|
||||||
>
|
|
||||||
<Button
|
|
||||||
type="primary"
|
|
||||||
icon={<UploadOutlined />}
|
|
||||||
loading={upgradingInstance}
|
|
||||||
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
||||||
>
|
>
|
||||||
Upgrade Now
|
<Button
|
||||||
</Button>
|
icon={<ThunderboltOutlined />}
|
||||||
</Popconfirm>
|
loading={upgradingInstance}
|
||||||
|
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
||||||
|
>
|
||||||
|
Quick Upgrade
|
||||||
|
</Button>
|
||||||
|
</Popconfirm>
|
||||||
|
<Popconfirm
|
||||||
|
title="Start full upgrade?"
|
||||||
|
description="This will pull the latest code, run database migrations, and restart all services. Brief downtime is expected."
|
||||||
|
onConfirm={handleStartUpgrade}
|
||||||
|
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
||||||
|
>
|
||||||
|
<Button
|
||||||
|
type="primary"
|
||||||
|
icon={<UploadOutlined />}
|
||||||
|
loading={upgradingInstance}
|
||||||
|
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
|
||||||
|
>
|
||||||
|
Upgrade Now
|
||||||
|
</Button>
|
||||||
|
</Popconfirm>
|
||||||
|
</Space>
|
||||||
</div>
|
</div>
|
||||||
</Space>
|
</Space>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@ -8,7 +8,16 @@ COPY src/ ./src/
|
|||||||
RUN npx tsc
|
RUN npx tsc
|
||||||
|
|
||||||
FROM node:20-alpine
|
FROM node:20-alpine
|
||||||
RUN apk add --no-cache docker-cli docker-cli-compose git rsync
|
# bash + curl + jq + python3 are required by the changemaker scripts the agent
|
||||||
|
# shells out to (upgrade-check.sh, upgrade.sh, backup.sh). Without them, every
|
||||||
|
# /upgrade/* and /backup/* call returns "command not found" failures.
|
||||||
|
RUN apk add --no-cache docker-cli docker-cli-compose git rsync bash curl jq python3
|
||||||
|
# Agent runs as root, but the bind-mounted /app/instance is owned by the host
|
||||||
|
# user (UID 1000 = `node` inside the container). Modern git refuses to operate
|
||||||
|
# on repos with mismatched ownership without an explicit safe.directory entry.
|
||||||
|
# Wildcard whitelist all paths — the agent only mounts a single host directory
|
||||||
|
# anyway (the instance's project root).
|
||||||
|
RUN git config --system --add safe.directory '*'
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci --production
|
RUN npm ci --production
|
||||||
|
|||||||
@ -188,6 +188,85 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
|
|||||||
res.status(202).json({ started: true });
|
res.status(202).json({ started: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// POST /instance/:slug/upgrade/start-image-only — Run image-upgrade.sh in background
|
||||||
|
//
|
||||||
|
// Image-only upgrade: pulls latest images + recreates services without touching
|
||||||
|
// tracked files (no git pull, no tarball extract, no VERSION mutation). Tenant
|
||||||
|
// content is implicitly safe because the script never writes outside data/upgrade.
|
||||||
|
// See scripts/image-upgrade.sh for full rationale.
|
||||||
|
//
|
||||||
|
// Schema-compatible with /upgrade/start: writes the same progress.json + result.json
|
||||||
|
// so the CCP poll loop in runRemoteUpgrade() works unchanged.
|
||||||
|
router.post('/instance/:slug/upgrade/start-image-only', async (req: Request, res: Response) => {
|
||||||
|
const slug = param(req, 'slug');
|
||||||
|
const entry = await getSlugEntry(slug);
|
||||||
|
const { imageTag } = req.body || {};
|
||||||
|
|
||||||
|
// SECURITY: imageTag flows into bash via --image-tag. Constrain to a safe
|
||||||
|
// subset of docker tag chars (semver, SHA, named tags). Reject anything
|
||||||
|
// that could shell-escape.
|
||||||
|
if (imageTag && !/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/.test(String(imageTag))) {
|
||||||
|
res.status(400).json({ error: 'VALIDATION', message: 'Invalid imageTag' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const scriptPath = path.join(entry.basePath, 'scripts', 'image-upgrade.sh');
|
||||||
|
try {
|
||||||
|
await fs.access(scriptPath);
|
||||||
|
} catch {
|
||||||
|
res.status(404).json({ error: 'NOT_FOUND', message: 'image-upgrade.sh not found' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same concurrency guards as the full /upgrade/start endpoint — uses the
|
||||||
|
// same lock + on-disk staleness check + backup/restore mutex.
|
||||||
|
if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
|
||||||
|
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
|
||||||
|
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear stale progress/result files (same convention as /upgrade/start)
|
||||||
|
const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
|
||||||
|
const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
|
||||||
|
await fs.mkdir(path.dirname(progressPath), { recursive: true });
|
||||||
|
await fs.rm(progressPath, { force: true });
|
||||||
|
await fs.rm(resultPath, { force: true });
|
||||||
|
|
||||||
|
const args: string[] = [scriptPath, '--api-mode'];
|
||||||
|
if (imageTag) args.push('--image-tag', String(imageTag));
|
||||||
|
|
||||||
|
void withSlugLock(slug, 'upgrade', async () => {
|
||||||
|
logger.info(`[image-upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
|
||||||
|
try {
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
const proc = spawn('bash', args, {
|
||||||
|
cwd: entry.basePath,
|
||||||
|
env: { ...process.env, COMPOSE_ANSI: 'never' },
|
||||||
|
stdio: ['ignore', 'ignore', 'ignore'],
|
||||||
|
});
|
||||||
|
proc.on('error', reject);
|
||||||
|
proc.on('close', (code) => {
|
||||||
|
if (code === 0) resolve();
|
||||||
|
else reject(new Error(`image-upgrade.sh exited with code ${code}`));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
logger.info(`[image-upgrade] ${slug}: image-upgrade.sh completed`);
|
||||||
|
} catch (err) {
|
||||||
|
logger.error(`[image-upgrade] ${slug}: ${(err as Error).message}`);
|
||||||
|
}
|
||||||
|
}).catch((err) => {
|
||||||
|
if (!(err instanceof SlugBusyError)) {
|
||||||
|
logger.error(`[image-upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
res.status(202).json({ started: true, mode: 'image-only' });
|
||||||
|
});
|
||||||
|
|
||||||
// GET /instance/:slug/upgrade/progress — Read progress.json
|
// GET /instance/:slug/upgrade/progress — Read progress.json
|
||||||
router.get('/instance/:slug/upgrade/progress', async (req: Request, res: Response) => {
|
router.get('/instance/:slug/upgrade/progress', async (req: Request, res: Response) => {
|
||||||
const entry = await getSlugEntry(param(req, 'slug'));
|
const entry = await getSlugEntry(param(req, 'slug'));
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import rateLimit from 'express-rate-limit';
|
|||||||
import { prisma } from '../../lib/prisma';
|
import { prisma } from '../../lib/prisma';
|
||||||
import { authenticate, requireRole } from '../../middleware/auth';
|
import { authenticate, requireRole } from '../../middleware/auth';
|
||||||
import { validate } from '../../middleware/validate';
|
import { validate } from '../../middleware/validate';
|
||||||
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
|
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, startImageUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
|
||||||
import * as instancesService from './instances.service';
|
import * as instancesService from './instances.service';
|
||||||
import * as healthService from '../../services/health.service';
|
import * as healthService from '../../services/health.service';
|
||||||
import * as backupService from '../../services/backup.service';
|
import * as backupService from '../../services/backup.service';
|
||||||
@ -362,6 +362,25 @@ router.post(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Image-only upgrade (Approach B). Faster + safer than full upgrade for
|
||||||
|
// releases that don't change orchestration/templates. See upgrade.service.ts
|
||||||
|
// startImageUpgrade for full rationale.
|
||||||
|
router.post(
|
||||||
|
'/:id/upgrade-images',
|
||||||
|
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||||
|
validate(startImageUpgradeSchema),
|
||||||
|
async (req: Request, res: Response) => {
|
||||||
|
const { imageTag } = req.body || {};
|
||||||
|
const upgrade = await upgradeService.startImageUpgrade(
|
||||||
|
req.params.id as string,
|
||||||
|
req.user!.id,
|
||||||
|
req.ip,
|
||||||
|
{ imageTag }
|
||||||
|
);
|
||||||
|
res.status(201).json({ data: upgrade });
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
router.get(
|
router.get(
|
||||||
'/:id/upgrade-status',
|
'/:id/upgrade-status',
|
||||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||||
|
|||||||
@ -121,6 +121,17 @@ export const startUpgradeSchema = z.object({
|
|||||||
.optional(),
|
.optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Approach B: image-only upgrade. Pulls images + recreates core app services
|
||||||
|
// without touching tracked files. imageTag is optional — if omitted, the
|
||||||
|
// agent uses whatever IMAGE_TAG the install's .env / compose env defines
|
||||||
|
// (typically `latest`). Tag must be a valid Docker tag.
|
||||||
|
export const startImageUpgradeSchema = z.object({
|
||||||
|
imageTag: z
|
||||||
|
.string()
|
||||||
|
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/, 'Invalid imageTag')
|
||||||
|
.optional(),
|
||||||
|
});
|
||||||
|
|
||||||
export const setupRemoteTunnelSchema = z.object({
|
export const setupRemoteTunnelSchema = z.object({
|
||||||
// Empty string or omitted → resources use standard subdomains (app., api., etc.)
|
// Empty string or omitted → resources use standard subdomains (app., api., etc.)
|
||||||
// A value like "ck" → creates ck-app., ck-api., etc. for multi-tenant domains
|
// A value like "ck" → creates ck-app., ck-api., etc. for multi-tenant domains
|
||||||
|
|||||||
@ -82,6 +82,10 @@ export interface StartAgentUpgradeOptions {
|
|||||||
branch?: string;
|
branch?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface StartAgentImageUpgradeOptions {
|
||||||
|
imageTag?: string;
|
||||||
|
}
|
||||||
|
|
||||||
interface AgentRequestOptions {
|
interface AgentRequestOptions {
|
||||||
method: 'GET' | 'POST' | 'DELETE';
|
method: 'GET' | 'POST' | 'DELETE';
|
||||||
path: string;
|
path: string;
|
||||||
@ -574,6 +578,21 @@ export class RemoteDriver implements ExecutionDriver {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trigger image-upgrade.sh --api-mode on the remote (Approach B: image-only
|
||||||
|
* upgrade — pulls images + recreates core app services without touching
|
||||||
|
* the install tree). Fire-and-forget; returns 202 immediately. Uses the
|
||||||
|
* same progress/result polling endpoints as startUpgrade.
|
||||||
|
*/
|
||||||
|
async startImageUpgrade(options: StartAgentImageUpgradeOptions = {}): Promise<void> {
|
||||||
|
await this.request({
|
||||||
|
method: 'POST',
|
||||||
|
path: `/instance/${this.slug}/upgrade/start-image-only`,
|
||||||
|
body: options,
|
||||||
|
timeoutMs: 30_000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the agent's data/upgrade/progress.json. Returns the default zero-state
|
* Read the agent's data/upgrade/progress.json. Returns the default zero-state
|
||||||
* if no progress has been written yet.
|
* if no progress has been written yet.
|
||||||
|
|||||||
@ -205,6 +205,10 @@ export interface StartUpgradeOptions {
|
|||||||
branch?: string;
|
branch?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface StartImageUpgradeOptions {
|
||||||
|
imageTag?: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start an upgrade for an instance. Returns the created InstanceUpgrade record.
|
* Start an upgrade for an instance. Returns the created InstanceUpgrade record.
|
||||||
* The actual upgrade runs asynchronously (fire-and-forget).
|
* The actual upgrade runs asynchronously (fire-and-forget).
|
||||||
@ -298,6 +302,86 @@ export async function startUpgrade(
|
|||||||
return upgrade;
|
return upgrade;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start an IMAGE-ONLY upgrade (Approach B). Pulls latest images + recreates
|
||||||
|
* core app services without touching tracked files. Faster (~2 min vs ~4-5
|
||||||
|
* min for full upgrade) and safer because no filesystem mutation outside
|
||||||
|
* docker — tenant content (mkdocs/, configs/) is implicitly preserved.
|
||||||
|
*
|
||||||
|
* Use this for releases that only bump container code or schema. For
|
||||||
|
* releases that change compose orchestration, nginx config, or other
|
||||||
|
* tracked files, use startUpgrade() instead.
|
||||||
|
*
|
||||||
|
* Remote-only for now: local mode would need a `runImageUpgrade` runner
|
||||||
|
* which we haven't built (all our instances are remote via mTLS agent).
|
||||||
|
*/
|
||||||
|
export async function startImageUpgrade(
|
||||||
|
instanceId: string,
|
||||||
|
userId: string,
|
||||||
|
ipAddress?: string,
|
||||||
|
options?: StartImageUpgradeOptions
|
||||||
|
) {
|
||||||
|
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||||
|
if (!instance) throw new Error('Instance not found');
|
||||||
|
|
||||||
|
if (!instance.isRemote) {
|
||||||
|
throw new Error('Image-only upgrade is currently supported only for remote instances');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instance.status !== InstanceStatus.RUNNING && instance.status !== InstanceStatus.STOPPED) {
|
||||||
|
throw new Error(`Cannot upgrade instance in ${instance.status} state`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reuse the same in-progress guard as startUpgrade: only one upgrade
|
||||||
|
// (of either type) at a time per instance.
|
||||||
|
const active = await prisma.instanceUpgrade.findFirst({
|
||||||
|
where: {
|
||||||
|
instanceId,
|
||||||
|
status: { in: [UpgradeStatus.PENDING, UpgradeStatus.IN_PROGRESS] },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (active) {
|
||||||
|
throw new Error('An upgrade is already in progress for this instance');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create upgrade record. branch is unused for image-only but keep it
|
||||||
|
// populated with current branch for audit trail consistency.
|
||||||
|
const upgrade = await prisma.instanceUpgrade.create({
|
||||||
|
data: {
|
||||||
|
instanceId,
|
||||||
|
status: UpgradeStatus.PENDING,
|
||||||
|
previousCommit: instance.gitCommit,
|
||||||
|
branch: instance.gitBranch,
|
||||||
|
triggeredById: userId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Audit log
|
||||||
|
await prisma.auditLog.create({
|
||||||
|
data: {
|
||||||
|
userId,
|
||||||
|
instanceId,
|
||||||
|
action: AuditAction.INSTANCE_UPGRADE,
|
||||||
|
details: {
|
||||||
|
upgradeId: upgrade.id,
|
||||||
|
previousCommit: instance.gitCommit,
|
||||||
|
source: 'remote',
|
||||||
|
mode: 'image-only',
|
||||||
|
options: options || {},
|
||||||
|
} as unknown as Prisma.InputJsonValue,
|
||||||
|
ipAddress,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fire-and-forget: reuse runRemoteUpgrade with mode='image-only'. Same
|
||||||
|
// poll loop and result handling — only the initial agent call differs.
|
||||||
|
runRemoteUpgrade(upgrade.id, instance, undefined, 'image-only', options).catch((err) => {
|
||||||
|
logger.error(`[image-upgrade] Remote image upgrade orchestration failed for ${instance.slug}: ${err}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
return upgrade;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Async REMOTE upgrade runner.
|
* Async REMOTE upgrade runner.
|
||||||
*
|
*
|
||||||
@ -316,7 +400,9 @@ export async function startUpgrade(
|
|||||||
async function runRemoteUpgrade(
|
async function runRemoteUpgrade(
|
||||||
upgradeId: string,
|
upgradeId: string,
|
||||||
instance: Instance,
|
instance: Instance,
|
||||||
options?: StartUpgradeOptions
|
options?: StartUpgradeOptions,
|
||||||
|
mode: 'full' | 'image-only' = 'full',
|
||||||
|
imageOnlyOptions?: StartImageUpgradeOptions
|
||||||
) {
|
) {
|
||||||
const slug = instance.slug;
|
const slug = instance.slug;
|
||||||
|
|
||||||
@ -333,18 +419,27 @@ async function runRemoteUpgrade(
|
|||||||
where: { id: upgradeId },
|
where: { id: upgradeId },
|
||||||
data: {
|
data: {
|
||||||
status: UpgradeStatus.IN_PROGRESS,
|
status: UpgradeStatus.IN_PROGRESS,
|
||||||
progressMessage: 'Starting remote upgrade...',
|
progressMessage: mode === 'image-only'
|
||||||
|
? 'Starting image-only upgrade...'
|
||||||
|
: 'Starting remote upgrade...',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
// Tell the agent to start. The agent has its own mutex + stale-progress
|
// Tell the agent to start. The agent has its own mutex + stale-progress
|
||||||
// check, so this can return 409 if a previous upgrade is still running.
|
// check, so this can return 409 if a previous upgrade is still running.
|
||||||
logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
|
if (mode === 'image-only') {
|
||||||
await driver.startUpgrade({
|
logger.info(`[upgrade] ${slug}: triggering remote image-upgrade.sh start`);
|
||||||
skipBackup: options?.skipBackup,
|
await driver.startImageUpgrade({
|
||||||
useRegistry: options?.useRegistry,
|
imageTag: imageOnlyOptions?.imageTag,
|
||||||
branch: options?.branch,
|
});
|
||||||
});
|
} else {
|
||||||
|
logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
|
||||||
|
await driver.startUpgrade({
|
||||||
|
skipBackup: options?.skipBackup,
|
||||||
|
useRegistry: options?.useRegistry,
|
||||||
|
branch: options?.branch,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Poll progress + result. We treat /result returning 200 as the signal
|
// Poll progress + result. We treat /result returning 200 as the signal
|
||||||
// that upgrade.sh exited (successfully or with code != 0 — the script
|
// that upgrade.sh exited (successfully or with code != 0 — the script
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -976,6 +976,39 @@ services:
|
|||||||
retries: 10
|
retries: 10
|
||||||
start_period: 30s
|
start_period: 30s
|
||||||
|
|
||||||
|
# Gancio Config Init — Writes /home/node/data/config.json from .env if missing.
|
||||||
|
# Gancio refuses to start when its DB has tables but the data volume has no
|
||||||
|
# config.json ("Non empty db! Please move your current db elsewhere than retry"),
|
||||||
|
# which causes an infinite restart loop. This sidecar runs on every `up` and is
|
||||||
|
# a no-op when config.json is already present. See docker-compose.yml for the
|
||||||
|
# full rationale; the two files must stay in parity per scripts/validate-compose-parity.sh.
|
||||||
|
gancio-config-init:
|
||||||
|
image: ${GITEA_REGISTRY:-gitea.bnkops.com/admin}/alpine:3
|
||||||
|
container_name: gancio-config-init
|
||||||
|
restart: "no"
|
||||||
|
volumes:
|
||||||
|
- gancio-data:/data
|
||||||
|
environment:
|
||||||
|
- GANCIO_BASE_URL=${GANCIO_BASE_URL:-https://events.cmlite.org}
|
||||||
|
- V2_POSTGRES_USER=${V2_POSTGRES_USER:-changemaker}
|
||||||
|
- V2_POSTGRES_PASSWORD=${V2_POSTGRES_PASSWORD:?V2_POSTGRES_PASSWORD must be set in .env}
|
||||||
|
entrypoint: ["sh", "-c"]
|
||||||
|
command:
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
if [ -s /data/config.json ]; then
|
||||||
|
echo "Gancio config.json present — skipping"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo "Gancio config.json missing — regenerating from .env"
|
||||||
|
printf '{"baseurl":"%s","server":{"host":"0.0.0.0","port":13120},"db":{"dialect":"postgres","host":"changemaker-v2-postgres","port":5432,"database":"gancio","username":"%s","password":"%s"}}' \
|
||||||
|
"$$GANCIO_BASE_URL" "$$V2_POSTGRES_USER" "$$V2_POSTGRES_PASSWORD" > /data/config.json
|
||||||
|
chown 1000:1000 /data/config.json
|
||||||
|
echo "Gancio config.json regenerated"
|
||||||
|
logging: *default-logging
|
||||||
|
networks:
|
||||||
|
- changemaker-lite
|
||||||
|
|
||||||
# Gancio — Event management platform (uses shared PostgreSQL)
|
# Gancio — Event management platform (uses shared PostgreSQL)
|
||||||
gancio:
|
gancio:
|
||||||
image: ${GITEA_REGISTRY:-gitea.bnkops.com/admin}/gancio:1.28.2
|
image: ${GITEA_REGISTRY:-gitea.bnkops.com/admin}/gancio:1.28.2
|
||||||
@ -984,6 +1017,8 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
v2-postgres:
|
v2-postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
gancio-config-init:
|
||||||
|
condition: service_completed_successfully
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:${GANCIO_PORT:-8092}:13120"
|
- "127.0.0.1:${GANCIO_PORT:-8092}:13120"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@ -1392,9 +1427,10 @@ services:
|
|||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
- ccp-agent-data:/var/lib/ccp-agent
|
- ccp-agent-data:/var/lib/ccp-agent
|
||||||
- ccp-agent-certs:/etc/ccp-agent
|
- ccp-agent-certs:/etc/ccp-agent
|
||||||
# Mount the instance directory so the agent can read compose files and run
|
# Mount the instance directory so the agent can read compose files and
|
||||||
# `docker compose -p <project>` commands against the real project on disk.
|
# write status.json + backups (writable; agent already has docker.sock,
|
||||||
- .:/app/instance:ro
|
# so file write access is not an additional security escalation).
|
||||||
|
- .:/app/instance
|
||||||
environment:
|
environment:
|
||||||
- AGENT_PORT=7443
|
- AGENT_PORT=7443
|
||||||
- AGENT_DATA_DIR=/var/lib/ccp-agent
|
- AGENT_DATA_DIR=/var/lib/ccp-agent
|
||||||
@ -1406,7 +1442,12 @@ services:
|
|||||||
- INSTANCE_BASE_PATH=/app/instance
|
- INSTANCE_BASE_PATH=/app/instance
|
||||||
# Pass the host's compose project name so the agent runs `docker compose -p <project>`
|
# Pass the host's compose project name so the agent runs `docker compose -p <project>`
|
||||||
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
|
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
|
||||||
|
# COMPOSE_PROJECT is read by the agent's TypeScript for slug derivation;
|
||||||
|
# COMPOSE_PROJECT_NAME is what Docker Compose itself reads when upgrade.sh
|
||||||
|
# shells out to `docker compose ...` — without it, compose defaults to
|
||||||
|
# basename(cwd)="instance" and collides with the host's existing containers.
|
||||||
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
||||||
|
- COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
||||||
logging: *default-logging
|
logging: *default-logging
|
||||||
networks:
|
networks:
|
||||||
- changemaker-lite
|
- changemaker-lite
|
||||||
|
|||||||
@ -998,6 +998,40 @@ services:
|
|||||||
start_period: 30s
|
start_period: 30s
|
||||||
|
|
||||||
# Gancio — Event management platform (uses shared PostgreSQL)
|
# Gancio — Event management platform (uses shared PostgreSQL)
|
||||||
|
# Gancio Config Init — Writes /home/node/data/config.json from .env if missing.
|
||||||
|
# Gancio refuses to start when its DB has tables but the data volume has no
|
||||||
|
# config.json ("Non empty db! Please move your current db elsewhere than retry"),
|
||||||
|
# which causes an infinite restart loop. This sidecar runs on every `up` and is
|
||||||
|
# a no-op when config.json is already present. Reversible: removing this
|
||||||
|
# service has no effect on healthy stacks; it only matters when the volume
|
||||||
|
# loses config.json (volume rename, partial restore, manual volume rm, etc.).
|
||||||
|
gancio-config-init:
|
||||||
|
image: alpine:3
|
||||||
|
container_name: gancio-config-init
|
||||||
|
restart: "no"
|
||||||
|
volumes:
|
||||||
|
- gancio-data:/data
|
||||||
|
environment:
|
||||||
|
- GANCIO_BASE_URL=${GANCIO_BASE_URL:-https://events.cmlite.org}
|
||||||
|
- V2_POSTGRES_USER=${V2_POSTGRES_USER:-changemaker}
|
||||||
|
- V2_POSTGRES_PASSWORD=${V2_POSTGRES_PASSWORD:?V2_POSTGRES_PASSWORD must be set in .env}
|
||||||
|
entrypoint: ["sh", "-c"]
|
||||||
|
command:
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
if [ -s /data/config.json ]; then
|
||||||
|
echo "Gancio config.json present — skipping"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo "Gancio config.json missing — regenerating from .env"
|
||||||
|
printf '{"baseurl":"%s","server":{"host":"0.0.0.0","port":13120},"db":{"dialect":"postgres","host":"changemaker-v2-postgres","port":5432,"database":"gancio","username":"%s","password":"%s"}}' \
|
||||||
|
"$$GANCIO_BASE_URL" "$$V2_POSTGRES_USER" "$$V2_POSTGRES_PASSWORD" > /data/config.json
|
||||||
|
chown 1000:1000 /data/config.json
|
||||||
|
echo "Gancio config.json regenerated"
|
||||||
|
logging: *default-logging
|
||||||
|
networks:
|
||||||
|
- changemaker-lite
|
||||||
|
|
||||||
gancio:
|
gancio:
|
||||||
image: cisti/gancio:1.28.2
|
image: cisti/gancio:1.28.2
|
||||||
container_name: gancio-changemaker
|
container_name: gancio-changemaker
|
||||||
@ -1005,6 +1039,8 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
v2-postgres:
|
v2-postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
gancio-config-init:
|
||||||
|
condition: service_completed_successfully
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:${GANCIO_PORT:-8092}:13120"
|
- "127.0.0.1:${GANCIO_PORT:-8092}:13120"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@ -1414,7 +1450,10 @@ services:
|
|||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
- ccp-agent-data:/var/lib/ccp-agent
|
- ccp-agent-data:/var/lib/ccp-agent
|
||||||
- ccp-agent-certs:/etc/ccp-agent
|
- ccp-agent-certs:/etc/ccp-agent
|
||||||
- .:/app/instance:ro
|
# Writable: agent must write data/upgrade/{status,progress,result}.json
|
||||||
|
# and data/backups/*.tar.gz. Agent already has docker.sock — file write
|
||||||
|
# access is not an additional security escalation.
|
||||||
|
- .:/app/instance
|
||||||
environment:
|
environment:
|
||||||
- AGENT_PORT=7443
|
- AGENT_PORT=7443
|
||||||
- AGENT_DATA_DIR=/var/lib/ccp-agent
|
- AGENT_DATA_DIR=/var/lib/ccp-agent
|
||||||
@ -1426,7 +1465,12 @@ services:
|
|||||||
- INSTANCE_BASE_PATH=/app/instance
|
- INSTANCE_BASE_PATH=/app/instance
|
||||||
# Pass the host's compose project name so the agent runs `docker compose -p <project>`
|
# Pass the host's compose project name so the agent runs `docker compose -p <project>`
|
||||||
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
|
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
|
||||||
|
# COMPOSE_PROJECT is read by the agent's TypeScript for slug derivation;
|
||||||
|
# COMPOSE_PROJECT_NAME is what Docker Compose itself reads when upgrade.sh
|
||||||
|
# shells out to `docker compose ...` — without it, compose defaults to
|
||||||
|
# basename(cwd)="instance" and collides with the host's existing containers.
|
||||||
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
||||||
|
- COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME:-changemaker-lite}
|
||||||
logging: *default-logging
|
logging: *default-logging
|
||||||
networks:
|
networks:
|
||||||
- changemaker-lite
|
- changemaker-lite
|
||||||
|
|||||||
266
docs/SESSION_HANDOFF_2026-05-20.md
Normal file
266
docs/SESSION_HANDOFF_2026-05-20.md
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
# Session Handoff: Upgrade Flow Redesign (2026-05-20 → 2026-05-21)
|
||||||
|
|
||||||
|
> Carries forward all context from a long working session into the next conversation. If you're a fresh agent: read this top-to-bottom before touching anything.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick state of the fleet
|
||||||
|
|
||||||
|
| Tenant | Type | Version | Agent patched | Surgical script update | Notes |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| bnkops (n4) | source | main @ 1b80e82 | ✅ | ⏳ pending | Management node; CCP backend runs here in parallel |
|
||||||
|
| marcelle (n5, cursedknowledge.org) | release | v2.9.15 | ✅ | ⏳ pending | Test bench; first end-to-end CCP upgrade test ran here (succeeded after manual Phase 6 recovery) |
|
||||||
|
| trbh (n6) | source | main @ 1b80e82 | ✅ | ⏳ pending | mkdocs content RESTORED from `stash@{0}` — site serves "That Really Blonde Human" correctly |
|
||||||
|
| pia (n3, pia-bnkops) | release | v2.9.10 | ✅ | ✅ **completed 2026-05-21** | First successful surgical update — proof the procedure works |
|
||||||
|
| pridecorner (n1) | source | main @ 1b80e82 | ✅ | ⏳ pending | Has 3 March 9 upgrade-* stashes still on disk (audit done; recovery deferred to another agent) |
|
||||||
|
| soroush (n7) | source | main @ 1b80e82 | ✅ | ⏳ pending | Was earliest-fixed tonight |
|
||||||
|
| linda (n2, lindalindsay.org) | release-converted | v2.9.14 | ✅ | ⏳ pending | Was source-install with broken `.git`; converted to release mode (VERSION file written) |
|
||||||
|
|
||||||
|
**Public sites verified working at session end**: trbh.org, docs.trbh.org, bnkops.com, pridecorner.ca, soroushsamavat.org, publicinterestalberta.org, lindalindsay.org, cursedknowledge.org.
|
||||||
|
|
||||||
|
**Known caveat**: docs.bnkops.com returns HTTP 000 externally (Pangolin tunnel routing issue, pre-existing, NOT caused by this session). bnkops mkdocs container serves correct content locally.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What landed in source (committed + pushed to origin/main)
|
||||||
|
|
||||||
|
| Commit | Description |
|
||||||
|
|---|---|
|
||||||
|
| `1b80e82` | `fix(ccp-agent): whitelist /app/instance for git safe.directory` — ccp-agent Dockerfile |
|
||||||
|
| `e88ac79` | `fix(ccp-agent): export COMPOSE_PROJECT_NAME so upgrade.sh sees correct project` — docker-compose.yml + .prod.yml |
|
||||||
|
| `9613c3e` | `fix(upgrade): Phase 1 of upgrade-flow redesign (Approach A)` — upgrade.sh + scripts/lib/mkdocs-snapshot.sh + scripts/upgrade-stash-cleanup.sh + .gitignore |
|
||||||
|
| `a7d3dd7` | `chore(release): ship scripts/lib/ + classify upgrade-stash-cleanup.sh` — build-release.sh |
|
||||||
|
|
||||||
|
**Release**: v2.10.2 tagged on `a7d3dd7`, uploaded to Gitea Releases as the new "latest" (`/releases/latest` returns v2.10.2 — the timestamp issue from earlier in session is fixed via build-release.sh's `target_commitish` workaround).
|
||||||
|
|
||||||
|
**Earlier in session**: tonight also produced commit `a531f9b` (ccp-agent missing bash/curl/jq/python3 + writable mount) and v2.10.1 release. v2.10.2 supersedes v2.10.1.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The plan — Approach A (DONE) + B + C (pending)
|
||||||
|
|
||||||
|
Full design lives at `/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md`.
|
||||||
|
|
||||||
|
### Approach A — ✅ Done
|
||||||
|
|
||||||
|
Three fixes to existing `scripts/upgrade.sh` shipping in v2.10.2:
|
||||||
|
|
||||||
|
1. **Phase 6 self-destruct fix** — Phase 6's broad `docker compose up -d` no longer recreates ccp-agent (which would SIGKILL the running script). Instead, ccp-agent restart is deferred to AFTER `write_result` writes the final `result.json`, via a detached `nohup ... & disown` subshell.
|
||||||
|
|
||||||
|
2. **mkdocs/ snapshot fallback** — `scripts/lib/mkdocs-snapshot.sh` is sourced by upgrade.sh's Phase 2. Before any other backup or pull operation, it tarballs the entire `mkdocs/` directory into `mkdocs-backup-<timestamp>.tar.gz` in the install root. Retains last 5. Discoverable via `ls`. Restoration is one-liner:
|
||||||
|
```bash
|
||||||
|
tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . && \
|
||||||
|
docker compose restart mkdocs mkdocs-site-server
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **`upgrade-stash-cleanup.sh`** — interactive utility to drop accumulated `upgrade-*` git stashes. Warns LOUDLY if any stash contains `mkdocs/mkdocs.yml` so operators verify recovery before dropping.
|
||||||
|
|
||||||
|
### Approach B — ⏳ Pending (1-2 days)
|
||||||
|
|
||||||
|
Add `--image-only` upgrade mode. Production images are hermetic (bake compiled code + Prisma migrations + entrypoint runs migrations on container start). Therefore `docker compose pull && docker compose up -d` IS a complete code+schema upgrade. **No filesystem mutation outside Docker** → tenant content implicitly safe.
|
||||||
|
|
||||||
|
New files to create:
|
||||||
|
- `scripts/image-upgrade.sh` (~150 lines; sources `scripts/lib/mkdocs-snapshot.sh` for the fallback)
|
||||||
|
- `changemaker-control-panel/agent/src/routes/upgrade.routes.ts` → new endpoint `POST /instance/:slug/upgrade/start-image-only`
|
||||||
|
- `changemaker-control-panel/api/src/services/upgrade.service.ts` → `startImageUpgrade(instanceId, userId, { imageTag })`
|
||||||
|
- `changemaker-control-panel/api/src/services/remote-driver.ts` → `startImageUpgrade()`
|
||||||
|
- `changemaker-control-panel/api/src/modules/instances/instances.routes.ts` → `POST /:id/upgrade-images`
|
||||||
|
- CCP admin UI: "Quick Upgrade (image-only)" button on `InstanceDetailPage.tsx`
|
||||||
|
|
||||||
|
### Approach C — ⏳ Pending (3-5 days)
|
||||||
|
|
||||||
|
CCP-driven template re-render for orchestration-changing upgrades. Reuses existing `template-engine.ts` and `reconfigureInstance` pattern. Only writes templated files (compose, nginx, configs/pangolin); never touches `mkdocs/` or `configs/code-server/data/`. See plan for details.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to apply v2.10.2 fixes to remaining tenants
|
||||||
|
|
||||||
|
**For PIA: already done** — used as the proof-of-concept on 2026-05-21. mkdocs.yml md5 unchanged, file count unchanged. ~5 minutes per tenant.
|
||||||
|
|
||||||
|
**For the other 6 tenants**, use the surgical update — DO NOT run a raw `git pull origin main` (it would resurrect tenant-deleted files via merge logic):
|
||||||
|
|
||||||
|
### Source installs (bnkops, trbh, pridecorner, soroush)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# bnkops, trbh, soroush use ~/changemaker.lite
|
||||||
|
# pridecorner uses ~/cmlite/changemaker.lite
|
||||||
|
cd ~/changemaker.lite # or ~/cmlite/changemaker.lite
|
||||||
|
|
||||||
|
git fetch origin main
|
||||||
|
|
||||||
|
mkdir -p scripts/lib
|
||||||
|
git checkout origin/main -- \
|
||||||
|
scripts/upgrade.sh \
|
||||||
|
scripts/upgrade-stash-cleanup.sh \
|
||||||
|
scripts/lib/mkdocs-snapshot.sh \
|
||||||
|
scripts/build-release.sh \
|
||||||
|
docker-compose.yml \
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Sanity: tenant content should still be ahead/divergent (not touched)
|
||||||
|
git status mkdocs/ configs/ # should show no NEW changes from this update
|
||||||
|
```
|
||||||
|
|
||||||
|
### Release installs (marcelle, linda) — used pia approach
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# marcelle: ~/changemaker.lite, ssh bunker-admin@100.90.78.47
|
||||||
|
# linda: ~/changemaker.lite.canonical, ssh bunker-admin@n2-linda.taile33572.ts.net
|
||||||
|
cd ~/changemaker.lite # or ~/changemaker.lite.canonical
|
||||||
|
|
||||||
|
curl -fSL https://gitea.bnkops.com/admin/changemaker.lite/releases/download/v2.10.2/changemaker-lite-v2.10.2.tar.gz \
|
||||||
|
-o /tmp/v2.10.2.tar.gz
|
||||||
|
|
||||||
|
mkdir -p scripts/lib
|
||||||
|
tar -xzf /tmp/v2.10.2.tar.gz --strip-components=1 \
|
||||||
|
changemaker-lite/scripts/upgrade.sh \
|
||||||
|
changemaker-lite/scripts/upgrade-stash-cleanup.sh \
|
||||||
|
changemaker-lite/scripts/lib/mkdocs-snapshot.sh \
|
||||||
|
changemaker-lite/docker-compose.yml
|
||||||
|
|
||||||
|
chmod +x scripts/upgrade.sh scripts/upgrade-stash-cleanup.sh scripts/lib/mkdocs-snapshot.sh
|
||||||
|
rm -f /tmp/v2.10.2.tar.gz
|
||||||
|
|
||||||
|
# Do NOT update VERSION — only scripts changed, rest of install stays at current version.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verification per tenant
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Before update: capture
|
||||||
|
md5sum mkdocs/mkdocs.yml
|
||||||
|
find mkdocs/docs -type f | wc -l
|
||||||
|
|
||||||
|
# Run the appropriate surgical update above
|
||||||
|
|
||||||
|
# After update: re-verify (should match)
|
||||||
|
md5sum mkdocs/mkdocs.yml
|
||||||
|
find mkdocs/docs -type f | wc -l
|
||||||
|
|
||||||
|
# Confirm new upgrade.sh
|
||||||
|
grep -c 'deferred ccp-agent\|Deferred ccp-agent' scripts/upgrade.sh # expect 2
|
||||||
|
|
||||||
|
# Optional: smoke-test the snapshot helper
|
||||||
|
PROJECT_DIR=$(pwd) bash -c '. scripts/lib/mkdocs-snapshot.sh; snapshot_mkdocs'
|
||||||
|
ls -lh mkdocs-backup-*.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Bug inventory — what we know
|
||||||
|
|
||||||
|
### Fixed in v2.10.2
|
||||||
|
|
||||||
|
| Bug | Memory file | Status |
|
||||||
|
|---|---|---|
|
||||||
|
| Gitea release `created_unix=0` (lightweight tag + Gitea 1.23.x quirk) | `feedback_gitea_release_tag_timing.md` | Fixed in `build-release.sh` — uses `target_commitish` + removes remote tag first |
|
||||||
|
| ccp-agent image missing bash/curl/jq/python3 + git safe.directory | `feedback_ccp_agent_image_deps.md` | Fixed in agent Dockerfile + rolled out to all 7 tenants |
|
||||||
|
| ccp-agent compose mount was `:ro` (blocked status.json writes) | (in `feedback_ccp_agent_image_deps.md`) | Fixed in both compose files |
|
||||||
|
| CCP upgrade Phase 5 collision: `COMPOSE_PROJECT_NAME` mismatch | `feedback_upgrade_compose_project_name.md` | Fixed via env-var addition in compose env block (e88ac79) — also needs `.env` entry on tenants installed before v2.10.2 |
|
||||||
|
| upgrade.sh Phase 6 self-destruct | `feedback_upgrade_sh_bugs.md` | Fixed in v2.10.2 — deferred ccp-agent restart |
|
||||||
|
|
||||||
|
### Open
|
||||||
|
|
||||||
|
- **upgrade.sh `git stash → git pull` stash-no-pop** — Pride Corner has 3 stashes from March 9 holding mkdocs.yml customizations. Existing `save_user_paths`/`restore_user_paths` in upgrade.sh handles the common case; the snapshot fallback (v2.10.2) covers edge cases. Pridecorner-specific recovery handled by another agent.
|
||||||
|
- **Agent-side `detached: true` spawn** — Defense-in-depth. Skip unless Phase 6 self-destruct re-emerges.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tenant content protection layers (all in v2.10.2)
|
||||||
|
|
||||||
|
1. **`save_user_paths`/`restore_user_paths`** in upgrade.sh — preserves working-tree state of `mkdocs/docs/`, `mkdocs/mkdocs.yml`, `mkdocs/site/`, `configs/`, `nginx/conf.d/services.conf` across `git pull`.
|
||||||
|
2. **`git stash` + auto-resolve on USER_PATHS** — modified tracked files stash + pop with `git checkout --theirs` on USER_PATH conflicts.
|
||||||
|
3. **Pre-upgrade mkdocs snapshot** — tarball of `mkdocs/` to install root before any other phase runs. Fallback for everything else.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tonight's recovery work — already applied
|
||||||
|
|
||||||
|
These tenants had content damage from earlier in the session; recovery was completed:
|
||||||
|
|
||||||
|
- **trbh** — mkdocs.yml + 143 M files restored from `stash@{0}`; 538 D-entry files re-deleted. Public sites serve correct branding.
|
||||||
|
- **bnkops** — same pattern, 100 M files restored + 82 D-entry re-deletions. Public sites serve correct branding.
|
||||||
|
- **marcelle** — manual recovery from Phase 6 self-destruct test (file rollback + service restart). On v2.10.1 currently. Operating normally.
|
||||||
|
|
||||||
|
`stash@{0}` is preserved on trbh and bnkops as forensic record + safety net.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## CCP access
|
||||||
|
|
||||||
|
```
|
||||||
|
URL: http://n4-bnkops.taile33572.ts.net:5100 (UI)
|
||||||
|
http://n4-bnkops.taile33572.ts.net:5000 (API)
|
||||||
|
User: admin@thebunkerops.ca
|
||||||
|
Password: NRTgHdC7Zxxs2P2UmNwnEbn3jTwU8uJN (seed; rotate if you want)
|
||||||
|
Role: SUPER_ADMIN
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Test bench (marcelle)
|
||||||
|
|
||||||
|
```
|
||||||
|
SSH: ssh bunker-admin@100.90.78.47
|
||||||
|
Install dir: ~/changemaker.lite
|
||||||
|
Domain: cursedknowledge.org
|
||||||
|
Admin: admin@cursedknowledge.org / @TheBunker2025!
|
||||||
|
CCP slug: changemakerlite
|
||||||
|
CCP id: 71b5bc4a-c47e-4435-b460-e9bc303b76ed
|
||||||
|
```
|
||||||
|
|
||||||
|
Marcelle is the test bench per `docs/TEST_SERVER.md`. Use it for ALL upgrade experiments before touching production tenants.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Per-tenant quick reference
|
||||||
|
|
||||||
|
| Tenant | SSH | Install dir | CCP id |
|
||||||
|
|---|---|---|---|
|
||||||
|
| bnkops | bunker-admin@n4-bnkops.taile33572.ts.net | ~/changemaker.lite | 21238536-7c04-4a3b-a073-38390a939046 |
|
||||||
|
| marcelle | bunker-admin@100.90.78.47 | ~/changemaker.lite | 71b5bc4a-c47e-4435-b460-e9bc303b76ed |
|
||||||
|
| trbh | bunker-admin@n6-trbh.taile33572.ts.net | ~/changemaker.lite | c066dc23-64a5-4684-96a7-992e65c1b82c |
|
||||||
|
| pia | pia-bnkops@n3-pia.taile33572.ts.net | ~/changemaker.lite | 92a11622-d357-4ab4-b21e-60c030c1b026 |
|
||||||
|
| pridecorner | bunker-admin@n1-pridecorner.taile33572.ts.net | ~/cmlite/changemaker.lite | a30de94b-ef28-42b6-a71d-112669526a62 |
|
||||||
|
| soroush | bunker-admin@n7-soroush.taile33572.ts.net | ~/changemaker.lite | 0c70f94c-1319-41e1-867c-5674f17cadda |
|
||||||
|
| linda | bunker-admin@n2-linda.taile33572.ts.net | ~/changemaker.lite.canonical | 6dcc19a1-f4fd-45df-be77-5bf62f8110c8 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Most important "don't repeat my mistakes" notes
|
||||||
|
|
||||||
|
1. **Never `git stash + git pull --ff-only origin main` on a tenant** outside of upgrade.sh. The stash silently displaces tenant content. If you must update files on a source-installed tenant, use targeted `git checkout origin/main -- <specific-file>` instead.
|
||||||
|
|
||||||
|
2. **Never blindly trigger CCP "Upgrade Now"** on a tenant still running pre-v2.10.2 upgrade.sh — it will Phase 6 self-destruct. Apply surgical script update first (instructions above), THEN trigger CCP upgrade.
|
||||||
|
|
||||||
|
3. **mkdocs/docs/ contains upstream tracked files** (default screenshots, demo docs, blog posts). Tenants typically delete these locally without committing. ANY operation that brings origin/main's tracked tree into the working tree (git pull, tarball extract) will resurrect them. v2.10.2's snapshot fallback gives you a recovery path; the surgical update procedure (this doc) avoids the issue entirely.
|
||||||
|
|
||||||
|
4. **mkdocs/mkdocs.yml is tracked, tenant-customized** with branding. Lives under USER_PATHS so v2.10.2's upgrade.sh protects it. But if you do raw git operations outside the script, it's exposed.
|
||||||
|
|
||||||
|
5. **CCP backend on n4 is decoupled from per-tenant ccp-agent**. Restarting a tenant's ccp-agent does NOT affect CCP itself. Verified during bnkops patch (CCP backend stayed at 41h uptime while ccp-agent recreated).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Memory files (in `/home/bunker-admin/.claude/projects/-home-bunker-admin-changemaker-lite/memory/`)
|
||||||
|
|
||||||
|
Latest session work documented in:
|
||||||
|
- `feedback_gitea_release_tag_timing.md`
|
||||||
|
- `feedback_ccp_agent_image_deps.md`
|
||||||
|
- `feedback_upgrade_compose_project_name.md`
|
||||||
|
- `feedback_upgrade_sh_bugs.md`
|
||||||
|
- `feedback_session_2026_05_20_damage_report.md`
|
||||||
|
|
||||||
|
Plus the architectural plan: `/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Where to start the next session
|
||||||
|
|
||||||
|
Recommended sequence:
|
||||||
|
|
||||||
|
1. **Apply surgical update to remaining 6 tenants** (~30-45 min, low risk; pia procedure already proven). Order: marcelle, linda (release), then soroush, trbh, bnkops, pridecorner (source).
|
||||||
|
2. **Test CCP-driven upgrade on marcelle** after surgical update lands. This will verify the deferred ccp-agent restart works end-to-end through the CCP path (the test we couldn't complete tonight because Phase 6 kept self-destructing).
|
||||||
|
3. **Implement Approach B** per the plan — image-only upgrade mode. Estimated 1-2 days.
|
||||||
|
4. **Implement Approach C** — CCP template re-render. 3-5 days.
|
||||||
|
|
||||||
|
If only one thing happens next session: **do step 1**. Six surgical updates × ~5 minutes each. The rest of the fleet stays vulnerable to Phase 6 self-destruct until they're on v2.10.2's upgrade.sh.
|
||||||
169
docs/SESSION_HANDOFF_2026-05-21.md
Normal file
169
docs/SESSION_HANDOFF_2026-05-21.md
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
# Session Handoff: Approach B Rollout + Approach C Planning (2026-05-21)
|
||||||
|
|
||||||
|
Carries forward all context from a long working session. If you're a fresh agent: read this top-to-bottom before touching anything.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What landed in this session (commits on origin/main)
|
||||||
|
|
||||||
|
| Commit | Description |
|
||||||
|
|---|---|
|
||||||
|
| `4a3d9d7` | `feat(upgrade): Approach B - image-only upgrade mode` — 7 files, 666 insertions. scripts/image-upgrade.sh + CCP agent endpoint + CCP backend (driver/service/route/schema) + admin UI "Quick Upgrade" button. |
|
||||||
|
| `<this commit>` | docs: session handoff + Approach C Phase 0 initial template overlay |
|
||||||
|
|
||||||
|
Plus several non-tracked deploys:
|
||||||
|
- v2.10.2 surgical update applied to remaining 6 tenants (soroush, linda, marcelle, bnkops, trbh, pridecorner — pia was done previously). All verified mkdocs untouched, upgrade.sh sha matches `b9f37d59...`.
|
||||||
|
- Fleet rollout of Approach B: new `image-upgrade.sh` script delivered + new `ccp-agent` image (with `/upgrade/start-image-only` endpoint) deployed to all 7 tenants. Bnkops's ccp-agent was rebuilt from source (builds locally rather than pulled from registry).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fleet state at session end
|
||||||
|
|
||||||
|
| Tenant | Surgical update v2.10.2 | image-upgrade.sh | New ccp-agent with image-only endpoint |
|
||||||
|
|---|---|---|---|
|
||||||
|
| pia | ✅ (prior session) | ✅ | ✅ |
|
||||||
|
| soroush | ✅ | ✅ | ✅ |
|
||||||
|
| linda | ✅ | ✅ | ✅ |
|
||||||
|
| marcelle | ✅ + tested both A and B E2E | ✅ | ✅ |
|
||||||
|
| bnkops | ✅ | ✅ | ✅ (rebuilt locally) |
|
||||||
|
| trbh | ✅ | ✅ | ✅ |
|
||||||
|
| pridecorner | ✅ | ✅ | ✅ |
|
||||||
|
|
||||||
|
Marcelle E2E test results:
|
||||||
|
- **Approach A (full upgrade)**: v2.10.1 → v2.10.2 in 250s, COMPLETED, no SIGKILL on script. Phase 6 deferred ccp-agent restart fix worked end-to-end through CCP path.
|
||||||
|
- **Approach B (Quick Upgrade) run 1**: 121s, COMPLETED, mkdocs.yml md5 unchanged.
|
||||||
|
- **Approach B (Quick Upgrade) run 2**: 100s (cached pull), COMPLETED, mkdocs unchanged again — confirms idempotency.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fleet backup (Phase 0 work — defensive)
|
||||||
|
|
||||||
|
All 7 tenants backed up to `/media/bunker-admin/BACKUP/fleet/<node>/2026-05-21-pre-v2.10.2/`:
|
||||||
|
|
||||||
|
| Node | Tenant | Size |
|
||||||
|
|---|---|---|
|
||||||
|
| n1 | pridecorner | 182MB (includes 3 stash patches from March 9) |
|
||||||
|
| n2 | linda | 26MB |
|
||||||
|
| n3 | pia | 45MB (post-surgical state) |
|
||||||
|
| n4 | bnkops | 4.4GB (huge — 2277 mkdocs/docs files) |
|
||||||
|
| n5 | marcelle | 28MB |
|
||||||
|
| n6 | trbh | 336MB |
|
||||||
|
| n7 | soroush | 76MB |
|
||||||
|
|
||||||
|
Each tenant dir has `mkdocs.tar.gz`, `configs-and-nginx.tar.gz`, `config-files.tar.gz`, `host-state.txt`, `git-state.txt` (source installs only), and `MANIFEST.txt`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Approach C planning + initial overlay
|
||||||
|
|
||||||
|
**Decision: rewrite `docker-compose.yml.hbs` in prod-compose style** to make CCP-driven template re-render safe for the install.sh fleet.
|
||||||
|
|
||||||
|
### Why a rewrite (not sync-by-addition)
|
||||||
|
|
||||||
|
Discovered the CCP template and `docker-compose.prod.yml` use fundamentally different conventions:
|
||||||
|
| | Old template (`.hbs`) | Canonical prod |
|
||||||
|
|---|---|---|
|
||||||
|
| Container names | `{{containerPrefix}}-postgres` (dynamic) | `changemaker-v2-postgres` (hardcoded) |
|
||||||
|
| Secrets | `{{secrets.postgresPassword}}` (Handlebars-rendered) | `${POSTGRES_PASSWORD}` (env-substituted) |
|
||||||
|
| Optional services | `{{#if enableX}}` blocks | Always-defined, gated via `COMPOSE_PROFILES` |
|
||||||
|
| Ports | `{{ports.api}}` | Hardcoded |
|
||||||
|
|
||||||
|
Sync-by-additions can't reconcile these. Rewrite is cleaner long-term.
|
||||||
|
|
||||||
|
### Initial overlay committed this session
|
||||||
|
|
||||||
|
`changemaker-control-panel/templates/docker-compose.yml.hbs.OLD-style-pre-approach-c` — preserved old template for reference.
|
||||||
|
|
||||||
|
`changemaker-control-panel/templates/docker-compose.yml.hbs` — now a near-mirror of `changemaker.lite/docker-compose.prod.yml` (1493 lines + Handlebars header):
|
||||||
|
- Header comment includes `{{name}}`, `{{slug}}`, `{{composeProject}}` for traceability.
|
||||||
|
- 5 image refs replaced `${IMAGE_TAG:-latest}` → `{{imageTag}}` so CCP can per-instance override via `Instance.imageTag` once Phase 1 lands.
|
||||||
|
- All other variation flows through env-var substitution from tenant's `.env`.
|
||||||
|
|
||||||
|
### Remaining Approach C work (next session)
|
||||||
|
|
||||||
|
See `/home/bunker-admin/.claude/plans/insight-temporal-bachman.md` for the full plan. Quick summary of what's next:
|
||||||
|
|
||||||
|
**Phase 0 completion (next session):**
|
||||||
|
- Audit `env.hbs` against the new compose's expected env vars. Add missing.
|
||||||
|
- Sync static config files in `templates/`: nginx/, configs/prometheus/, configs/alertmanager/, configs/grafana/. They may have drifted too.
|
||||||
|
- Write a one-off render harness (`api/scripts/render-for-instance.ts`) that loads an instance row, builds context, renders templates to scratch dir.
|
||||||
|
- Render against marcelle, linda, pia. Diff against their actual files. Iterate the template until diff is per-instance values only (`COMPOSE_PROJECT_NAME`, ports, secrets — not structure).
|
||||||
|
|
||||||
|
**Phase 1 (~30 min):** Add `Instance.imageTag` Prisma column + migration. Modify `template-engine.ts:211` to use `instance.imageTag || env.IMAGE_TAG`.
|
||||||
|
|
||||||
|
**Phase 2 (~3-4 hr):** Pre-flight diff endpoint. New agent route `POST /instance/:slug/files/diff` + `RemoteDriver.diffFiles()` + `LocalDriver.diffFiles()` + `previewReleaseUpgrade()` in upgrade.service. Includes `envCoverage` check for registered tenants.
|
||||||
|
|
||||||
|
**Phase 3 (~3-4 hr):** `startReleaseUpgrade()` + `runReleaseUpgrade()` in upgrade.service. Split logic for `isRegistered=true` (skip env render) vs `isRegistered=false` (render env).
|
||||||
|
|
||||||
|
**Phase 4 (~30 min):** CCP routes `/upgrade-release` + `/upgrade-release/preview` + Zod schema.
|
||||||
|
|
||||||
|
**Phase 5 (~2-3 hr):** "Upgrade to Release" UI button + preview modal + env-coverage warning.
|
||||||
|
|
||||||
|
**Phase 6 (~1 hr):** Tag v2.10.3 in changemaker.lite, push images with tag, trigger upgrade-release on marcelle via CCP UI, verify mkdocs untouched + containers on new tag.
|
||||||
|
|
||||||
|
**Total remaining: 11-14 hours.** Recommended split:
|
||||||
|
- Session 2: complete Phase 0 (render harness + iterate template + env.hbs sync + static file syncs). ~half day.
|
||||||
|
- Session 3: Phases 1-5. ~half day.
|
||||||
|
- Session 4: Phase 6 E2E test. ~1 hour.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Critical files for Approach C
|
||||||
|
|
||||||
|
**Already modified this session:**
|
||||||
|
- `changemaker-control-panel/templates/docker-compose.yml.hbs` — overlay from prod compose with minimal Handlebars markup.
|
||||||
|
- `changemaker-control-panel/templates/docker-compose.yml.hbs.OLD-style-pre-approach-c` — preserved old template.
|
||||||
|
|
||||||
|
**To be modified in next sessions (per plan):**
|
||||||
|
- `changemaker-control-panel/templates/env.hbs` (Phase 0 audit)
|
||||||
|
- `changemaker-control-panel/templates/configs/**` (Phase 0 syncs)
|
||||||
|
- `changemaker-control-panel/api/prisma/schema.prisma` (Phase 1)
|
||||||
|
- `changemaker-control-panel/api/prisma/migrations/<ts>_add_instance_image_tag/` (Phase 1)
|
||||||
|
- `changemaker-control-panel/api/src/services/template-engine.ts` line 211 (Phase 1)
|
||||||
|
- `changemaker-control-panel/api/src/services/upgrade.service.ts` (Phases 2-3)
|
||||||
|
- `changemaker-control-panel/api/src/services/remote-driver.ts` + `local-driver.ts` + `execution-driver.ts` (Phase 2)
|
||||||
|
- `changemaker-control-panel/agent/src/routes/files.routes.ts` + `services/file.service.ts` (Phase 2)
|
||||||
|
- `changemaker-control-panel/api/src/modules/instances/instances.routes.ts` + `instances.schemas.ts` (Phase 4)
|
||||||
|
- `changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx` (Phase 5)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Memory key gotchas (write to MEMORY.md next session)
|
||||||
|
|
||||||
|
1. **CCP template vs prod compose: were divergent, now aligned.** As of this session, `templates/docker-compose.yml.hbs` is structurally a near-mirror of `docker-compose.prod.yml`. Going forward, any new service in prod compose must be ported into the template manually (or via a future CI drift check).
|
||||||
|
|
||||||
|
2. **bnkops's ccp-agent is locally built**, not pulled from registry. Has a `build:` directive in compose. The other 6 tenants pull `gitea.bnkops.com/admin/changemaker-ccp-agent:latest`.
|
||||||
|
|
||||||
|
3. **install.sh tenants (`isRegistered=true`)** lack `encryptedSecrets` in CCP DB. Approach C must skip `env.hbs` rendering for them — they keep their tarball-provisioned `.env`. The pre-flight envCoverage check is the safety net.
|
||||||
|
|
||||||
|
4. **n4 SSH lacks marcelle's host key by default** — first `ssh n4 → marcelle` connection needs `StrictHostKeyChecking=accept-new` or interactive accept. Other tenants in the lab have the same pattern.
|
||||||
|
|
||||||
|
5. **`docker save | ssh ... docker load` is the registry-less image distribution path** when n4 doesn't have docker login to gitea.bnkops.com. Worked well for the ccp-agent rollout this session.
|
||||||
|
|
||||||
|
6. **`set -o pipefail` + `grep -q` shorts the pipeline** because grep closes the pipe early on first match, sending SIGPIPE to the writer. Solution: capture upstream output into a variable, then grep against the variable. (Bug found + fixed in `scripts/image-upgrade.sh` during this session.)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## CCP access (unchanged)
|
||||||
|
|
||||||
|
```
|
||||||
|
URL: http://n4-bnkops.taile33572.ts.net:5100 (UI)
|
||||||
|
http://n4-bnkops.taile33572.ts.net:5000 (API)
|
||||||
|
User: admin@thebunkerops.ca
|
||||||
|
Password: NRTgHdC7Zxxs2P2UmNwnEbn3jTwU8uJN (seed)
|
||||||
|
Role: SUPER_ADMIN
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Where to start next session
|
||||||
|
|
||||||
|
Recommended:
|
||||||
|
|
||||||
|
1. **Read this doc + `/home/bunker-admin/.claude/plans/insight-temporal-bachman.md` (Approach C plan)** first.
|
||||||
|
2. **Phase 0 completion:** finish the template rewrite. Build a render harness (`api/scripts/render-for-instance.ts`), render against marcelle/linda/pia, iterate until structural-clean.
|
||||||
|
3. Commit Phase 0 as standalone PR with rendered-vs-actual diffs in description.
|
||||||
|
4. Move to Phases 1-5 in a second commit/PR.
|
||||||
|
5. Phase 6 manual E2E.
|
||||||
|
|
||||||
|
Approach B is in production-ready state across the fleet. Approach C is the longer-term path for releases that change orchestration.
|
||||||
@ -126,7 +126,7 @@ RUNTIME_SCRIPTS=(
|
|||||||
install.sh
|
install.sh
|
||||||
nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh
|
nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh
|
||||||
backup.sh restore.sh
|
backup.sh restore.sh
|
||||||
upgrade.sh upgrade-check.sh upgrade-watcher.sh
|
upgrade.sh upgrade-check.sh upgrade-watcher.sh upgrade-stash-cleanup.sh
|
||||||
uninstall.sh test-deployment.sh
|
uninstall.sh test-deployment.sh
|
||||||
validate-env.sh pangolin-teardown.sh ccp-deregister.sh register-with-ccp.sh
|
validate-env.sh pangolin-teardown.sh ccp-deregister.sh register-with-ccp.sh
|
||||||
update-env.sh
|
update-env.sh
|
||||||
@ -178,6 +178,13 @@ if [[ -f "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" ]]; then
|
|||||||
cp "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" "$STAGE_DIR/scripts/"
|
cp "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" "$STAGE_DIR/scripts/"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Shared shell libraries (scripts/lib/) — sourced by upgrade.sh + image-upgrade.sh.
|
||||||
|
# Whole directory ships verbatim; safe because nothing executable lives here
|
||||||
|
# besides the .sh helpers that the runtime scripts depend on.
|
||||||
|
if [[ -d "$PROJECT_DIR/scripts/lib" ]]; then
|
||||||
|
cp -a "$PROJECT_DIR/scripts/lib" "$STAGE_DIR/scripts/"
|
||||||
|
fi
|
||||||
|
|
||||||
# Systemd units
|
# Systemd units
|
||||||
if [[ -d "$PROJECT_DIR/scripts/systemd" ]]; then
|
if [[ -d "$PROJECT_DIR/scripts/systemd" ]]; then
|
||||||
cp -r "$PROJECT_DIR/scripts/systemd" "$STAGE_DIR/scripts/"
|
cp -r "$PROJECT_DIR/scripts/systemd" "$STAGE_DIR/scripts/"
|
||||||
@ -295,12 +302,23 @@ if [[ "$UPLOAD" == "true" ]]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Gitea 1.23.x only initializes Release.CreatedUnix inside its createTag()
|
||||||
|
# path. If the git tag already exists on origin when we POST /releases,
|
||||||
|
# createTag() is skipped and CreatedUnix stays 0, which makes /releases/latest
|
||||||
|
# silently return an older release. Remove the remote tag first so Gitea
|
||||||
|
# creates it via target_commitish below. The tag is preserved locally and
|
||||||
|
# gets recreated at the same SHA — no history is lost.
|
||||||
|
if git ls-remote --exit-code origin "refs/tags/${TAG}" >/dev/null 2>&1; then
|
||||||
|
warn "Removing remote tag ${TAG} so Gitea can recreate it (CreatedUnix init)"
|
||||||
|
git push origin ":refs/tags/${TAG}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
|
||||||
info "Creating Gitea release ${TAG}..."
|
info "Creating Gitea release ${TAG}..."
|
||||||
RELEASE_RESPONSE=$(curl -sf -X POST \
|
RELEASE_RESPONSE=$(curl -sf -X POST \
|
||||||
"${GITEA_HOST}/api/v1/repos/admin/changemaker.lite/releases" \
|
"${GITEA_HOST}/api/v1/repos/admin/changemaker.lite/releases" \
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "{\"tag_name\":\"${TAG}\",\"name\":\"Changemaker Lite ${TAG}\",\"body\":\"Release ${TAG} (${COMMIT_SHA})\"}" \
|
-d "{\"tag_name\":\"${TAG}\",\"target_commitish\":\"${COMMIT_SHA}\",\"name\":\"Changemaker Lite ${TAG}\",\"body\":\"Release ${TAG} (${COMMIT_SHA})\"}" \
|
||||||
2>/dev/null || true)
|
2>/dev/null || true)
|
||||||
|
|
||||||
RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
|
RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
|
||||||
|
|||||||
383
scripts/image-upgrade.sh
Executable file
383
scripts/image-upgrade.sh
Executable file
@ -0,0 +1,383 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# image-upgrade.sh — Approach B: image-only upgrade
|
||||||
|
#
|
||||||
|
# Pulls latest images from the registry and recreates services WITHOUT touching
|
||||||
|
# tracked files in the install tree (no git pull, no tarball extract, no VERSION
|
||||||
|
# mutation). Tenant content (mkdocs/, configs/) is implicitly safe because this
|
||||||
|
# script never writes outside data/upgrade/ and the docker daemon.
|
||||||
|
#
|
||||||
|
# Used by CCP "Quick Upgrade" button. Pairs with scripts/upgrade.sh which
|
||||||
|
# remains the full upgrade path for orchestration-changing releases.
|
||||||
|
#
|
||||||
|
# Schema parity: writes data/upgrade/progress.json + result.json with the same
|
||||||
|
# fields upgrade.sh writes, so the CCP poll loop is unchanged.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PROJECT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)"
|
||||||
|
SCRIPT_DIR="$PROJECT_DIR/scripts"
|
||||||
|
UPGRADE_DIR="$PROJECT_DIR/data/upgrade"
|
||||||
|
LOG_DIR="$PROJECT_DIR/logs"
|
||||||
|
LOG_FILE="$LOG_DIR/image-upgrade-$(date +%Y%m%d_%H%M%S).log"
|
||||||
|
LOCK_FILE="$PROJECT_DIR/.upgrade.lock"
|
||||||
|
PROGRESS_FILE="$UPGRADE_DIR/progress.json"
|
||||||
|
RESULT_FILE="$UPGRADE_DIR/result.json"
|
||||||
|
|
||||||
|
START_TIME=$SECONDS
|
||||||
|
|
||||||
|
# --- Detect install mode ---
|
||||||
|
if [[ -f "$PROJECT_DIR/VERSION" ]] && [[ ! -d "$PROJECT_DIR/.git" ]]; then
|
||||||
|
INSTALL_MODE="release"
|
||||||
|
else
|
||||||
|
INSTALL_MODE="source"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Defaults ---
|
||||||
|
API_MODE=false
|
||||||
|
DRY_RUN=false
|
||||||
|
IMAGE_TAG=""
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: $(basename "$0") [options]
|
||||||
|
|
||||||
|
Image-only upgrade: pulls latest images from the configured registry and
|
||||||
|
recreates services without touching the install tree.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--api-mode Emit data/upgrade/{progress,result}.json (no TTY output)
|
||||||
|
--dry-run Print what would happen; do not pull or recreate
|
||||||
|
--image-tag TAG Override IMAGE_TAG (env var) for this run
|
||||||
|
-h, --help Show this help
|
||||||
|
|
||||||
|
This script never modifies mkdocs/, configs/, scripts/, docker-compose.yml,
|
||||||
|
or VERSION. It is the safest upgrade path for orchestration-stable releases.
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--api-mode) API_MODE=true; shift ;;
|
||||||
|
--dry-run) DRY_RUN=true; shift ;;
|
||||||
|
--image-tag) IMAGE_TAG="${2:?--image-tag requires a value}"; shift 2 ;;
|
||||||
|
-h|--help) usage; exit 0 ;;
|
||||||
|
*) echo "Unknown option: $1" >&2; usage >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- Colors ---
|
||||||
|
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
|
||||||
|
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
|
||||||
|
CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m'
|
||||||
|
else
|
||||||
|
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
|
||||||
|
fi
|
||||||
|
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
||||||
|
success() { echo -e "${GREEN}[ OK ]${NC} $*"; }
|
||||||
|
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||||
|
error() { echo -e "${RED}[ERR ]${NC} $*" >&2; }
|
||||||
|
phase() { echo ""; echo -e "${BOLD}${CYAN}=== Phase $1: $2 ===${NC}"; }
|
||||||
|
|
||||||
|
# --- Logging: mirror stdout/stderr to LOG_FILE ---
|
||||||
|
# logs/ may be root-owned on installs where upgrade.sh has run via ccp-agent.
|
||||||
|
# Fall back to /tmp if we can't write, so bunker-admin manual invocations don't
|
||||||
|
# crash with "Permission denied" on tee.
|
||||||
|
mkdir -p "$UPGRADE_DIR"
|
||||||
|
if mkdir -p "$LOG_DIR" 2>/dev/null && touch "$LOG_FILE" 2>/dev/null; then
|
||||||
|
: # primary log location is writable
|
||||||
|
else
|
||||||
|
LOG_FILE="/tmp/image-upgrade-$(date +%Y%m%d_%H%M%S)-$$.log"
|
||||||
|
echo "[INFO] logs/ not writable; using $LOG_FILE" >&2
|
||||||
|
fi
|
||||||
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||||
|
|
||||||
|
# --- Capture previous version for result.json ---
|
||||||
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
||||||
|
PRE_VERSION="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")"
|
||||||
|
else
|
||||||
|
PRE_VERSION="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
|
||||||
|
fi
|
||||||
|
|
||||||
|
write_progress() {
|
||||||
|
local phase_num="$1" phase_name="$2" pct="$3" msg="$4"
|
||||||
|
[[ "$API_MODE" != "true" ]] && return
|
||||||
|
mkdir -p "$UPGRADE_DIR"
|
||||||
|
cat > "$PROGRESS_FILE" <<PEOF
|
||||||
|
{
|
||||||
|
"phase": ${phase_num},
|
||||||
|
"phaseName": "${phase_name}",
|
||||||
|
"percentage": ${pct},
|
||||||
|
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
||||||
|
"lastUpdate": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||||
|
}
|
||||||
|
PEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
write_result() {
|
||||||
|
[[ "$API_MODE" != "true" ]] && return
|
||||||
|
local success_val="$1" msg="$2"
|
||||||
|
local warnings_json="${3:-[]}"
|
||||||
|
local duration_secs=$((SECONDS - START_TIME))
|
||||||
|
local new_version="$PRE_VERSION"
|
||||||
|
if [[ "$INSTALL_MODE" == "release" ]]; then
|
||||||
|
new_version="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "$PRE_VERSION")"
|
||||||
|
else
|
||||||
|
new_version="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "$PRE_VERSION")"
|
||||||
|
fi
|
||||||
|
mkdir -p "$UPGRADE_DIR"
|
||||||
|
cat > "$RESULT_FILE" <<REOF
|
||||||
|
{
|
||||||
|
"success": ${success_val},
|
||||||
|
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
|
||||||
|
"previousCommit": "${PRE_VERSION}",
|
||||||
|
"newCommit": "${new_version}",
|
||||||
|
"commitCount": 0,
|
||||||
|
"durationSeconds": ${duration_secs},
|
||||||
|
"warnings": ${warnings_json},
|
||||||
|
"completedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||||
|
"mode": "image-only"
|
||||||
|
}
|
||||||
|
REOF
|
||||||
|
rm -f "$PROGRESS_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Lock + cleanup ---
|
||||||
|
acquire_lock() {
|
||||||
|
if [[ -f "$LOCK_FILE" ]]; then
|
||||||
|
local pid; pid="$(cat "$LOCK_FILE" 2>/dev/null || echo "")"
|
||||||
|
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
||||||
|
error "Upgrade already running (pid $pid). Refusing to start."
|
||||||
|
write_result "false" "Another upgrade is already running (pid $pid)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
warn "Stale lock file found; removing"
|
||||||
|
rm -f "$LOCK_FILE"
|
||||||
|
fi
|
||||||
|
echo $$ > "$LOCK_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
release_lock() { rm -f "$LOCK_FILE" || true; }
|
||||||
|
|
||||||
|
on_failure() {
|
||||||
|
local exit_code=$?
|
||||||
|
local line_no=${1:-?}
|
||||||
|
error "image-upgrade.sh failed at line $line_no (exit $exit_code)"
|
||||||
|
write_result "false" "Image upgrade failed at line $line_no (exit $exit_code)"
|
||||||
|
release_lock
|
||||||
|
exit "$exit_code"
|
||||||
|
}
|
||||||
|
trap 'on_failure $LINENO' ERR
|
||||||
|
trap 'release_lock' EXIT
|
||||||
|
|
||||||
|
# --- Banner ---
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}${CYAN}================================================${NC}"
|
||||||
|
echo -e "${BOLD} Image-Only Upgrade${NC}"
|
||||||
|
echo -e "${BOLD}${CYAN}================================================${NC}"
|
||||||
|
echo "Install mode: $INSTALL_MODE"
|
||||||
|
echo "Project dir: $PROJECT_DIR"
|
||||||
|
echo "Pre-version: $PRE_VERSION"
|
||||||
|
[[ -n "$IMAGE_TAG" ]] && echo "Image tag: $IMAGE_TAG"
|
||||||
|
[[ "$DRY_RUN" == "true" ]] && echo "DRY RUN: no images will be pulled or services recreated"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
acquire_lock
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 1: Pre-flight + mkdocs snapshot (defensive)
|
||||||
|
# =============================================================================
|
||||||
|
phase "1" "Pre-flight"
|
||||||
|
write_progress 1 "Pre-flight" 10 "Snapshotting mkdocs (defensive)..."
|
||||||
|
|
||||||
|
# Source mkdocs-snapshot.sh and run it. This is the same snapshot every
|
||||||
|
# upgrade path takes — leaves mkdocs-backup-<timestamp>.tar.gz in project root.
|
||||||
|
# Image-only upgrades shouldn't damage mkdocs (no filesystem mutation), but
|
||||||
|
# the snapshot is cheap insurance and keeps operator habits consistent.
|
||||||
|
if [[ -r "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would snapshot mkdocs/"
|
||||||
|
else
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
PROJECT_DIR="$PROJECT_DIR" bash -c ". $SCRIPT_DIR/lib/mkdocs-snapshot.sh; snapshot_mkdocs" \
|
||||||
|
|| warn "mkdocs snapshot failed (non-fatal; continuing)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "scripts/lib/mkdocs-snapshot.sh not found; skipping snapshot"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Sanity-check docker
|
||||||
|
if ! docker compose version &>/dev/null; then
|
||||||
|
error "docker compose is not available"
|
||||||
|
write_result "false" "docker compose not available"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
success "Pre-flight checks passed"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 2: Pull images
|
||||||
|
# =============================================================================
|
||||||
|
phase "2" "Pull Images"
|
||||||
|
write_progress 2 "Pull Images" 30 "Pulling images from registry..."
|
||||||
|
|
||||||
|
PULL_ENV=()
|
||||||
|
if [[ -n "$IMAGE_TAG" ]]; then
|
||||||
|
PULL_ENV+=("IMAGE_TAG=$IMAGE_TAG")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would run: ${PULL_ENV[*]:-} docker compose pull"
|
||||||
|
else
|
||||||
|
info "Pulling all images (this may take a few minutes)..."
|
||||||
|
if (( ${#PULL_ENV[@]} > 0 )); then
|
||||||
|
if ! env "${PULL_ENV[@]}" docker compose pull; then
|
||||||
|
warn "docker compose pull had errors (continuing — some images may be local)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if ! docker compose pull; then
|
||||||
|
warn "docker compose pull had errors (continuing — some images may be local)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
success "Image pull complete"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 3: Recreate core app services (targeted, not broad)
|
||||||
|
# =============================================================================
|
||||||
|
phase "3" "Recreate Services"
|
||||||
|
write_progress 3 "Recreate Services" 60 "Recreating core app services with new images..."
|
||||||
|
|
||||||
|
# Targeted recreate: only the services whose IMAGES are released as part of
|
||||||
|
# changemaker.lite (api, admin, media-api, nginx). Broader `up -d` is risky
|
||||||
|
# because a single misconfigured mount in any service (e.g. mkdocs-site-server)
|
||||||
|
# can cascade and leave dependent containers in "Created" state. Image-only
|
||||||
|
# upgrade should only touch the actual code containers, not third-party
|
||||||
|
# infrastructure that happens to live in the same compose file.
|
||||||
|
#
|
||||||
|
# Same Phase 6 pattern as upgrade.sh: drop ccp-agent from COMPOSE_PROFILES
|
||||||
|
# during recreate so we don't suicide-restart the agent that spawned us.
|
||||||
|
# Restart ccp-agent at the end via detached subshell.
|
||||||
|
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
|
||||||
|
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
|
||||||
|
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
|
||||||
|
|
||||||
|
UP_ENV=("COMPOSE_PROFILES=${COMPOSE_PROFILES_WITHOUT_AGENT}")
|
||||||
|
if [[ -n "$IMAGE_TAG" ]]; then
|
||||||
|
UP_ENV+=("IMAGE_TAG=$IMAGE_TAG")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Core services that ship as v2 release images. nginx last so it doesn't
|
||||||
|
# briefly proxy to an old api. media-api may not be enabled on all installs;
|
||||||
|
# tolerate it being missing from compose.
|
||||||
|
CORE_SERVICES=(api admin media-api nginx)
|
||||||
|
EXISTING_SERVICES=()
|
||||||
|
# Capture the service list once. Don't pipe `docker compose config` into
|
||||||
|
# `grep -q` directly: with `set -o pipefail`, grep exits early on match and
|
||||||
|
# SIGPIPEs the docker writer, making the pipeline exit non-zero. The grep -q
|
||||||
|
# would then "match" all services as missing. Capture-then-check avoids it.
|
||||||
|
COMPOSE_SERVICES_LIST="$(docker compose config --services 2>/dev/null || true)"
|
||||||
|
for svc in "${CORE_SERVICES[@]}"; do
|
||||||
|
if grep -qx -- "$svc" <<<"$COMPOSE_SERVICES_LIST"; then
|
||||||
|
EXISTING_SERVICES+=("$svc")
|
||||||
|
else
|
||||||
|
info "Skipping service '$svc' (not in compose file)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if (( ${#EXISTING_SERVICES[@]} == 0 )); then
|
||||||
|
warn "No core app services found in compose; skipping recreate"
|
||||||
|
elif [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would run: ${UP_ENV[*]} docker compose up -d ${EXISTING_SERVICES[*]}"
|
||||||
|
else
|
||||||
|
info "Recreating core services: ${EXISTING_SERVICES[*]}"
|
||||||
|
env "${UP_ENV[@]}" docker compose up -d "${EXISTING_SERVICES[@]}"
|
||||||
|
fi
|
||||||
|
success "Services recreated"
|
||||||
|
|
||||||
|
# Restart Pangolin tunnel connector if running (image may have changed)
|
||||||
|
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would restart newt"
|
||||||
|
else
|
||||||
|
info "Restarting Pangolin tunnel connector..."
|
||||||
|
docker compose restart newt 2>/dev/null || true
|
||||||
|
success "Newt tunnel restarted"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 4: Verify (light health checks)
|
||||||
|
# =============================================================================
|
||||||
|
phase "4" "Verification"
|
||||||
|
write_progress 4 "Verification" 85 "Running health checks..."
|
||||||
|
|
||||||
|
VERIFY_FAILED=false
|
||||||
|
UPGRADE_WARNINGS="[]"
|
||||||
|
|
||||||
|
verify_health() {
|
||||||
|
local name="$1" check_cmd="$2" max_wait="${3:-45}"
|
||||||
|
local waited=0
|
||||||
|
while [[ $waited -lt $max_wait ]]; do
|
||||||
|
if eval "$check_cmd" 2>/dev/null; then
|
||||||
|
success "$name: healthy (${waited}s)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 3
|
||||||
|
waited=$((waited + 3))
|
||||||
|
done
|
||||||
|
warn "$name: not responding after ${max_wait}s"
|
||||||
|
VERIFY_FAILED=true
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" != "true" ]]; then
|
||||||
|
verify_health "API (port 4000)" \
|
||||||
|
"docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 60
|
||||||
|
verify_health "Admin (port 3000)" \
|
||||||
|
"docker compose exec -T admin wget -q --spider http://localhost:3000/" 90
|
||||||
|
if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
|
||||||
|
verify_health "Media API (port 4100)" \
|
||||||
|
"docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
|
||||||
|
fi
|
||||||
|
|
||||||
|
if "$VERIFY_FAILED"; then
|
||||||
|
UPGRADE_WARNINGS='["Some health checks failed after image-only upgrade — services may still be starting"]'
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Summary + deferred ccp-agent restart
|
||||||
|
# =============================================================================
|
||||||
|
ELAPSED_MIN=$(( (SECONDS - START_TIME) / 60 ))
|
||||||
|
ELAPSED_SEC=$(( (SECONDS - START_TIME) % 60 ))
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}${GREEN}================================================${NC}"
|
||||||
|
echo -e "${BOLD} Image-Only Upgrade Complete${NC}"
|
||||||
|
echo -e "${BOLD}${GREEN}================================================${NC}"
|
||||||
|
printf " Previous: %s\n" "$PRE_VERSION"
|
||||||
|
printf " Duration: %dm %ds\n" "$ELAPSED_MIN" "$ELAPSED_SEC"
|
||||||
|
printf " Log: %s\n" "$LOG_FILE"
|
||||||
|
|
||||||
|
write_progress 4 "Complete" 100 "Image-only upgrade complete"
|
||||||
|
write_result "true" "Image-only upgrade complete (previous: ${PRE_VERSION})" "$UPGRADE_WARNINGS"
|
||||||
|
|
||||||
|
# Deferred ccp-agent restart — see upgrade.sh for full rationale. Same
|
||||||
|
# mechanism: nohup'd, disowned subshell that picks up the new image after
|
||||||
|
# this script has cleanly exited.
|
||||||
|
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would schedule deferred ccp-agent restart"
|
||||||
|
else
|
||||||
|
info "Scheduling deferred ccp-agent restart..."
|
||||||
|
nohup bash -c "
|
||||||
|
sleep 3
|
||||||
|
cd '$PROJECT_DIR'
|
||||||
|
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
|
||||||
|
" >/dev/null 2>&1 < /dev/null &
|
||||||
|
disown
|
||||||
|
success "ccp-agent restart scheduled (will pick up new image)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
release_lock
|
||||||
|
trap - EXIT
|
||||||
|
exit 0
|
||||||
81
scripts/lib/mkdocs-snapshot.sh
Executable file
81
scripts/lib/mkdocs-snapshot.sh
Executable file
@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# mkdocs-snapshot.sh — shared library function
|
||||||
|
# =============================================================================
|
||||||
|
# Defines snapshot_mkdocs(): writes a tarball of mkdocs/ into the install root
|
||||||
|
# as mkdocs-backup-<timestamp>.tar.gz, keeping the last 5 snapshots.
|
||||||
|
#
|
||||||
|
# Sourced by scripts/upgrade.sh and scripts/image-upgrade.sh (and may be
|
||||||
|
# invoked agent-side by changemaker-control-panel during template re-render).
|
||||||
|
#
|
||||||
|
# Why the install root instead of backups/?
|
||||||
|
# - Discoverable: operators see mkdocs-backup-*.tar.gz with a plain `ls`.
|
||||||
|
# - The agent's /app/instance bind mount maps directly to the install root,
|
||||||
|
# so the agent can restore from this archive without path translation.
|
||||||
|
# - backups/ is owned by root in some installs (DB dumps via container)
|
||||||
|
# and gets rotated on a different schedule than docs snapshots.
|
||||||
|
#
|
||||||
|
# Restoration one-liner:
|
||||||
|
# tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . \
|
||||||
|
# && docker compose restart mkdocs mkdocs-site-server
|
||||||
|
#
|
||||||
|
# Requires: $PROJECT_DIR (absolute path to install root), info() function
|
||||||
|
# from the caller (falls back to plain echo if info is not defined).
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Fallback log function if caller didn't define one (e.g. when sourcing standalone)
|
||||||
|
if ! declare -F info >/dev/null 2>&1; then
|
||||||
|
info() { echo "[INFO] $*"; }
|
||||||
|
fi
|
||||||
|
if ! declare -F warn >/dev/null 2>&1; then
|
||||||
|
warn() { echo "[WARN] $*" >&2; }
|
||||||
|
fi
|
||||||
|
|
||||||
|
# snapshot_mkdocs — take a tarball of mkdocs/ into the install root.
|
||||||
|
#
|
||||||
|
# Returns 0 if successful (or if mkdocs/ doesn't exist — non-fatal).
|
||||||
|
# Returns non-zero only if tar itself fails AND $SNAPSHOT_REQUIRED is true.
|
||||||
|
#
|
||||||
|
# Optional env vars:
|
||||||
|
# PROJECT_DIR (required) Install root containing mkdocs/
|
||||||
|
# SNAPSHOT_KEEP Number of snapshots to retain (default 5)
|
||||||
|
# SNAPSHOT_REQUIRED If "true", failure to snapshot aborts (default false)
|
||||||
|
snapshot_mkdocs() {
|
||||||
|
if [[ -z "${PROJECT_DIR:-}" ]]; then
|
||||||
|
warn "snapshot_mkdocs: PROJECT_DIR not set; skipping"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -d "${PROJECT_DIR}/mkdocs" ]]; then
|
||||||
|
# No mkdocs dir = nothing to snapshot. Common on minimal installs.
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local stamp
|
||||||
|
stamp="$(date +%Y%m%d_%H%M%S)"
|
||||||
|
local archive="${PROJECT_DIR}/mkdocs-backup-${stamp}.tar.gz"
|
||||||
|
local keep="${SNAPSHOT_KEEP:-5}"
|
||||||
|
|
||||||
|
if tar czf "$archive" -C "$PROJECT_DIR" mkdocs 2>/dev/null; then
|
||||||
|
local size
|
||||||
|
size="$(du -h "$archive" 2>/dev/null | cut -f1)"
|
||||||
|
info "Tenant docs snapshot: $(basename "$archive") (${size})"
|
||||||
|
else
|
||||||
|
warn "snapshot_mkdocs: tar failed for $archive"
|
||||||
|
rm -f "$archive" 2>/dev/null
|
||||||
|
if [[ "${SNAPSHOT_REQUIRED:-false}" == "true" ]]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Retention: keep the most recent N snapshots, prune older ones.
|
||||||
|
# ls -t lists newest first; tail -n +N+1 selects items after the Nth.
|
||||||
|
local prune_from=$((keep + 1))
|
||||||
|
# shellcheck disable=SC2012 # ls is intentional for mtime sort
|
||||||
|
ls -t "${PROJECT_DIR}"/mkdocs-backup-*.tar.gz 2>/dev/null \
|
||||||
|
| tail -n +${prune_from} \
|
||||||
|
| xargs -r rm -f
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
135
scripts/upgrade-stash-cleanup.sh
Executable file
135
scripts/upgrade-stash-cleanup.sh
Executable file
@ -0,0 +1,135 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# upgrade-stash-cleanup.sh — clean up stale upgrade-* git stashes
|
||||||
|
# =============================================================================
|
||||||
|
# Older versions of upgrade.sh used `git stash push --include-untracked` to
|
||||||
|
# protect tenant content during pulls. When pop conflicts went unresolved,
|
||||||
|
# the stashes accumulated in `git stash list` forever — Pride Corner ended up
|
||||||
|
# with three from 2026-03-09 alone, each containing displaced tenant
|
||||||
|
# customizations that the running site no longer reflected.
|
||||||
|
#
|
||||||
|
# This script lists every `upgrade-*` stash, shows its scope, and offers to
|
||||||
|
# drop them. It does NOT auto-restore content; that's a separate decision per
|
||||||
|
# tenant. The intent is to clear the backlog so future `git stash list` is
|
||||||
|
# meaningful.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash scripts/upgrade-stash-cleanup.sh # interactive, lists + prompts
|
||||||
|
# bash scripts/upgrade-stash-cleanup.sh --dry # list only
|
||||||
|
# bash scripts/upgrade-stash-cleanup.sh --yes # drop all upgrade-* without prompt
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
cd "$PROJECT_DIR"
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
|
||||||
|
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m'
|
||||||
|
BOLD='\033[1m' NC='\033[0m'
|
||||||
|
else
|
||||||
|
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
|
||||||
|
fi
|
||||||
|
|
||||||
|
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
||||||
|
ok() { echo -e "${GREEN}[ OK ]${NC} $*"; }
|
||||||
|
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||||
|
|
||||||
|
DRY=false
|
||||||
|
YES=false
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--dry|--dry-run) DRY=true ;;
|
||||||
|
--yes|-y) YES=true ;;
|
||||||
|
--help|-h)
|
||||||
|
sed -n '2,/^# =====/p' "$0" | sed -n '2,/^# =====/p' | sed 's/^# //;s/^#//'
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ! -d .git ]]; then
|
||||||
|
warn "Not a git repository — this script only applies to source installs."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect upgrade-* stash refs
|
||||||
|
mapfile -t STASHES < <(git stash list 2>/dev/null | grep -E ': (On|WIP on) [^:]+: upgrade-' || true)
|
||||||
|
|
||||||
|
if [[ ${#STASHES[@]} -eq 0 ]]; then
|
||||||
|
ok "No upgrade-* stashes found. Nothing to clean up."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Found ${#STASHES[@]} upgrade-* stash(es):${NC}"
|
||||||
|
echo ""
|
||||||
|
for entry in "${STASHES[@]}"; do
|
||||||
|
REF="${entry%%:*}"
|
||||||
|
LABEL="${entry#*: }"
|
||||||
|
FILE_COUNT=$(git stash show "$REF" --name-only 2>/dev/null | wc -l)
|
||||||
|
HAS_MKDOCS_YML=$(git stash show "$REF" --name-only 2>/dev/null | grep -c '^mkdocs/mkdocs\.yml$' || true)
|
||||||
|
printf " %-12s %-50s files=%-4d mkdocs.yml=%s\n" \
|
||||||
|
"$REF" "$LABEL" "$FILE_COUNT" "$HAS_MKDOCS_YML"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [[ "$DRY" == "true" ]]; then
|
||||||
|
info "Dry-run: no stashes will be dropped."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Warn loudly if any stash holds mkdocs.yml — operator should manually review
|
||||||
|
# before dropping (tenant content might be there).
|
||||||
|
MKDOCS_STASHES=$(printf '%s\n' "${STASHES[@]}" \
|
||||||
|
| while read -r entry; do
|
||||||
|
REF="${entry%%:*}"
|
||||||
|
if git stash show "$REF" --name-only 2>/dev/null | grep -q '^mkdocs/mkdocs\.yml$'; then
|
||||||
|
echo "$REF"
|
||||||
|
fi
|
||||||
|
done)
|
||||||
|
|
||||||
|
if [[ -n "$MKDOCS_STASHES" ]]; then
|
||||||
|
echo ""
|
||||||
|
echo -e "${RED}${BOLD}⚠ WARNING:${NC} the following stashes contain ${BOLD}mkdocs/mkdocs.yml${NC}:"
|
||||||
|
echo "$MKDOCS_STASHES" | sed 's/^/ /'
|
||||||
|
echo ""
|
||||||
|
echo " These may hold tenant branding (site_name, site_url, custom theme, etc.)"
|
||||||
|
echo " that ISN'T reflected on disk. Before dropping, verify:"
|
||||||
|
echo ""
|
||||||
|
echo " git show <stash-ref>:mkdocs/mkdocs.yml | head -10"
|
||||||
|
echo " diff <(git show <stash-ref>:mkdocs/mkdocs.yml) mkdocs/mkdocs.yml"
|
||||||
|
echo ""
|
||||||
|
echo " If disk mkdocs.yml already has the tenant content, the stash is safe to drop."
|
||||||
|
echo " If disk is upstream and stash has tenant content, restore first:"
|
||||||
|
echo " git checkout <stash-ref> -- mkdocs/mkdocs.yml"
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$YES" != "true" ]]; then
|
||||||
|
echo -en "${BOLD}Drop all ${#STASHES[@]} upgrade-* stashes? [y/N] ${NC}"
|
||||||
|
read -r CONFIRM
|
||||||
|
case "$CONFIRM" in
|
||||||
|
y|Y|yes|YES) ;;
|
||||||
|
*) info "Cancelled. No stashes dropped."; exit 0 ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Drop in reverse order so indices stay stable
|
||||||
|
mapfile -t SORTED_REFS < <(printf '%s\n' "${STASHES[@]}" \
|
||||||
|
| sed 's/:.*//' \
|
||||||
|
| sort -t'{' -k2 -n -r)
|
||||||
|
|
||||||
|
for REF in "${SORTED_REFS[@]}"; do
|
||||||
|
if git stash drop "$REF" >/dev/null 2>&1; then
|
||||||
|
ok "Dropped $REF"
|
||||||
|
else
|
||||||
|
warn "Failed to drop $REF (already gone?)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
ok "Cleanup complete. Remaining stashes:"
|
||||||
|
git stash list 2>/dev/null || echo " (none)"
|
||||||
@ -95,6 +95,14 @@ phase() {
|
|||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Pre-upgrade tenant docs snapshot (no-regrets fallback). Sourced regardless
|
||||||
|
# of install mode so snapshot_mkdocs is available in Phase 2.
|
||||||
|
# shellcheck source=lib/mkdocs-snapshot.sh
|
||||||
|
if [[ -f "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/mkdocs-snapshot.sh"
|
||||||
|
fi
|
||||||
|
|
||||||
# --- API mode: JSON progress/result writing ---
|
# --- API mode: JSON progress/result writing ---
|
||||||
UPGRADE_DIR="${PROJECT_DIR}/data/upgrade"
|
UPGRADE_DIR="${PROJECT_DIR}/data/upgrade"
|
||||||
PROGRESS_FILE="${UPGRADE_DIR}/progress.json"
|
PROGRESS_FILE="${UPGRADE_DIR}/progress.json"
|
||||||
@ -188,11 +196,22 @@ restore_user_paths() {
|
|||||||
# "Non empty db! Please move your current db elsewhere than retry."
|
# "Non empty db! Please move your current db elsewhere than retry."
|
||||||
# This regenerates config.json from .env vars when missing.
|
# This regenerates config.json from .env vars when missing.
|
||||||
verify_gancio_config() {
|
verify_gancio_config() {
|
||||||
local gancio_volume
|
# Note: as of the gancio-config-init sidecar in docker-compose{,prod}.yml,
|
||||||
gancio_volume="$(docker volume ls --format '{{.Name}}' | grep 'gancio-data' | head -1 || true)"
|
# config.json is regenerated automatically on every `up`. This function is
|
||||||
if [[ -z "$gancio_volume" ]]; then
|
# kept as belt-and-braces for the upgrade flow specifically (e.g. so the
|
||||||
|
# check happens before the compose-up rather than at compose-up time, and
|
||||||
|
# so operators see explicit log output during upgrade).
|
||||||
|
local matches
|
||||||
|
matches="$(docker volume ls --format '{{.Name}}' | grep 'gancio-data' || true)"
|
||||||
|
local count
|
||||||
|
count=$(printf '%s\n' "$matches" | grep -c '.' || true)
|
||||||
|
if [[ "$count" -eq 0 ]]; then
|
||||||
return # No gancio volume exists yet; first run will handle it
|
return # No gancio volume exists yet; first run will handle it
|
||||||
fi
|
fi
|
||||||
|
if [[ "$count" -gt 1 ]]; then
|
||||||
|
error "Multiple gancio-data volumes found — refusing to guess. Resolve manually:\n$matches"
|
||||||
|
fi
|
||||||
|
local gancio_volume="$matches"
|
||||||
|
|
||||||
# Check if config.json exists and is non-empty
|
# Check if config.json exists and is non-empty
|
||||||
if docker run --rm -v "${gancio_volume}:/data" alpine test -s /data/config.json 2>/dev/null; then
|
if docker run --rm -v "${gancio_volume}:/data" alpine test -s /data/config.json 2>/dev/null; then
|
||||||
@ -698,6 +717,18 @@ fi
|
|||||||
phase "2" "Backup"
|
phase "2" "Backup"
|
||||||
write_progress 2 "Backup" 15 "Creating backup..."
|
write_progress 2 "Backup" 15 "Creating backup..."
|
||||||
|
|
||||||
|
# Pre-upgrade tenant docs snapshot — the no-regrets fallback. Runs even when
|
||||||
|
# --skip-backup is set, because this is for tenant content recovery (not DB
|
||||||
|
# state) and is fast enough that skipping it would never be intentional. It
|
||||||
|
# lives in the install root (not backups/) so operators discover it via `ls`.
|
||||||
|
if declare -F snapshot_mkdocs >/dev/null 2>&1; then
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
info "[DRY RUN] Would snapshot mkdocs/ to ${PROJECT_DIR}/mkdocs-backup-*.tar.gz"
|
||||||
|
else
|
||||||
|
snapshot_mkdocs || warn "mkdocs snapshot failed (non-fatal; continuing)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$SKIP_BACKUP" == "true" ]]; then
|
if [[ "$SKIP_BACKUP" == "true" ]]; then
|
||||||
warn "Backup skipped (--skip-backup --force)"
|
warn "Backup skipped (--skip-backup --force)"
|
||||||
else
|
else
|
||||||
@ -1273,13 +1304,24 @@ while true; do
|
|||||||
done
|
done
|
||||||
success "API healthy (${API_WAIT}s)"
|
success "API healthy (${API_WAIT}s)"
|
||||||
|
|
||||||
# Start everything else (exclude one-shot init containers)
|
# Start everything else (exclude one-shot init containers AND the ccp-agent
|
||||||
|
# service that's running this very script). Recreating ccp-agent here would
|
||||||
|
# SIGKILL the script process before write_result has a chance to run; we
|
||||||
|
# instead schedule a detached restart at the very end of the script.
|
||||||
|
#
|
||||||
|
# Mechanism: temporarily drop "ccp-agent" from COMPOSE_PROFILES so the broad
|
||||||
|
# `up -d` doesn't include it. We re-add it only when scheduling the deferred
|
||||||
|
# restart so the new agent comes up under its profile.
|
||||||
info "Starting remaining services..."
|
info "Starting remaining services..."
|
||||||
|
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
|
||||||
|
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
|
||||||
|
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
|
||||||
|
COMPOSE_PROFILES="${COMPOSE_PROFILES_WITHOUT_AGENT}" \
|
||||||
docker compose up -d \
|
docker compose up -d \
|
||||||
--scale listmonk-init=0 \
|
--scale listmonk-init=0 \
|
||||||
--scale gancio-init=0 \
|
--scale gancio-init=0 \
|
||||||
--scale vaultwarden-init=0
|
--scale vaultwarden-init=0
|
||||||
success "All services started"
|
success "All services started (ccp-agent restart deferred to end-of-script)"
|
||||||
|
|
||||||
# Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild)
|
# Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild)
|
||||||
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
|
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
|
||||||
@ -1450,6 +1492,27 @@ echo -e " ${BOLD}Duration:${NC} $ELAPSED"
|
|||||||
echo -e " ${BOLD}Log:${NC} $LOG_FILE"
|
echo -e " ${BOLD}Log:${NC} $LOG_FILE"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# Deferred ccp-agent restart — the LAST thing the script does before exit.
|
||||||
|
# This must run AFTER write_result and archive_success_to_history so the new
|
||||||
|
# agent comes up to a complete result.json (otherwise CCP polls forever).
|
||||||
|
# We launch a detached subshell that:
|
||||||
|
# 1. Sleeps briefly so this script has time to exit cleanly first.
|
||||||
|
# 2. Restarts ccp-agent under its profile, picking up any new image.
|
||||||
|
# `nohup` + `disown` ensures the subshell survives the agent container dying
|
||||||
|
# (when ccp-agent is recreated, the parent agent process — which spawned this
|
||||||
|
# upgrade.sh — gets SIGKILL'd; the disowned subshell is reparented to PID 1
|
||||||
|
# on the host and continues).
|
||||||
|
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
|
||||||
|
info "Scheduling deferred ccp-agent restart..."
|
||||||
|
nohup bash -c "
|
||||||
|
sleep 3
|
||||||
|
cd '$PROJECT_DIR'
|
||||||
|
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
|
||||||
|
" >/dev/null 2>&1 < /dev/null &
|
||||||
|
disown
|
||||||
|
success "ccp-agent restart scheduled (will pick up new image)"
|
||||||
|
fi
|
||||||
|
|
||||||
release_lock
|
release_lock
|
||||||
trap - EXIT
|
trap - EXIT
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user