Compare commits

..

No commits in common. "main" and "v2.10.1" have entirely different histories.

31 changed files with 1136 additions and 5221 deletions

5
.gitignore vendored
View File

@ -64,11 +64,6 @@ core.*
/backups/ /backups/
.upgrade.lock .upgrade.lock
# Pre-upgrade mkdocs snapshots (created by scripts/lib/mkdocs-snapshot.sh).
# These are the tenant-content rescue archives written before every upgrade;
# discoverable in the install root via `ls`. Retention: last 5 (see helper).
/mkdocs-backup-*.tar.gz
# Release tarballs (generated by build-release.sh) # Release tarballs (generated by build-release.sh)
/releases/ /releases/

View File

@ -39,8 +39,6 @@ import {
CloudOutlined, CloudOutlined,
DisconnectOutlined, DisconnectOutlined,
UploadOutlined, UploadOutlined,
ThunderboltOutlined,
CloudUploadOutlined,
BellOutlined, BellOutlined,
CheckCircleOutlined, CheckCircleOutlined,
WarningOutlined, WarningOutlined,
@ -565,71 +563,6 @@ export default function InstanceDetailPage() {
} }
}; };
// Image-only upgrade (Approach B): pulls images + recreates core app services
// without touching tracked files. Faster + safer than full upgrade for releases
// that don't change compose/templates.
const handleStartImageUpgrade = async () => {
setUpgradingInstance(true);
try {
const { data } = await api.post(`/instances/${id}/upgrade-images`, {});
setCurrentUpgrade(data.data);
message.success('Image-only upgrade started');
} catch (err: unknown) {
const resp = (err as { response?: { data?: { error?: { message?: string } } } })?.response
?.data?.error;
message.error(resp?.message || 'Failed to start image-only upgrade');
} finally {
setUpgradingInstance(false);
}
};
// Release upgrade (Approach C): CCP re-renders templates with new image tag,
// writes them to the tenant, then composePull + composeUp. For releases
// that change orchestration (new services, compose config) in addition
// to image versions. Tenant content (mkdocs/, customized configs/) is
// never touched.
const [releaseUpgradeModalOpen, setReleaseUpgradeModalOpen] = useState(false);
const [releaseImageTag, setReleaseImageTag] = useState<string>('');
const [releasePreview, setReleasePreview] = useState<{
files: Array<{ path: string; status: string; diff: string | null; sizeBefore: number; sizeAfter: number }>;
envCoverage?: { requiredVars: string[]; presentInTenantEnv: string[]; missingInTenantEnv: string[] };
} | null>(null);
const [releasePreviewLoading, setReleasePreviewLoading] = useState(false);
const handlePreviewReleaseUpgrade = async () => {
setReleasePreviewLoading(true);
setReleasePreview(null);
try {
const body = releaseImageTag.trim() ? { imageTag: releaseImageTag.trim() } : {};
const { data } = await api.post(`/instances/${id}/upgrade-release/preview`, body);
setReleasePreview(data.data);
} catch (err: unknown) {
const resp = (err as { response?: { data?: { error?: { message?: string } } } })?.response
?.data?.error;
message.error(resp?.message || 'Preview failed');
} finally {
setReleasePreviewLoading(false);
}
};
const handleStartReleaseUpgrade = async () => {
setUpgradingInstance(true);
try {
const body = releaseImageTag.trim() ? { imageTag: releaseImageTag.trim() } : {};
const { data } = await api.post(`/instances/${id}/upgrade-release`, body);
setCurrentUpgrade(data.data);
setReleaseUpgradeModalOpen(false);
setReleasePreview(null);
message.success('Release upgrade started');
} catch (err: unknown) {
const resp = (err as { response?: { data?: { error?: { message?: string } } } })?.response
?.data?.error;
message.error(resp?.message || 'Failed to start release upgrade');
} finally {
setUpgradingInstance(false);
}
};
// Event handlers // Event handlers
const handleAcknowledgeEvent = async (eventId: string) => { const handleAcknowledgeEvent = async (eventId: string) => {
try { try {
@ -1699,38 +1632,12 @@ export default function InstanceDetailPage() {
closable closable
/> />
)} )}
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', gap: 16 }}> <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<Typography.Text type="secondary" style={{ flex: 1 }}> <Typography.Text type="secondary">
Full upgrade pulls the latest code, runs migrations, and restarts services. Quick upgrade only pulls images and recreates the core app tenant content stays untouched and it&apos;s ~2 min faster. Use Quick when the release notes say no orchestration changes. Pulls latest code, runs migrations, and restarts services. CCP backup is recommended before upgrading.
</Typography.Text> </Typography.Text>
<Space>
<Popconfirm <Popconfirm
title="Start quick (image-only) upgrade?" title="Start upgrade?"
description="Pulls new container images and recreates the API/Admin/Media/Nginx services. No filesystem changes — mkdocs and configs are not touched. Brief downtime is expected."
onConfirm={handleStartImageUpgrade}
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
>
<Button
icon={<ThunderboltOutlined />}
loading={upgradingInstance}
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
>
Quick Upgrade
</Button>
</Popconfirm>
<Button
icon={<CloudUploadOutlined />}
onClick={() => {
setReleaseImageTag(instance.imageTag || '');
setReleasePreview(null);
setReleaseUpgradeModalOpen(true);
}}
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
>
Upgrade to Release
</Button>
<Popconfirm
title="Start full upgrade?"
description="This will pull the latest code, run database migrations, and restart all services. Brief downtime is expected." description="This will pull the latest code, run database migrations, and restart all services. Brief downtime is expected."
onConfirm={handleStartUpgrade} onConfirm={handleStartUpgrade}
disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'} disabled={instance.status !== 'RUNNING' && instance.status !== 'STOPPED'}
@ -1744,7 +1651,6 @@ export default function InstanceDetailPage() {
Upgrade Now Upgrade Now
</Button> </Button>
</Popconfirm> </Popconfirm>
</Space>
</div> </div>
</Space> </Space>
)} )}
@ -2315,117 +2221,6 @@ export default function InstanceDetailPage() {
</Space> </Space>
</Modal> </Modal>
)} )}
{/* Approach C: Release upgrade modal (CCP template re-render) */}
<Modal
title="Upgrade to Release"
open={releaseUpgradeModalOpen}
onCancel={() => setReleaseUpgradeModalOpen(false)}
footer={[
<Button key="cancel" onClick={() => setReleaseUpgradeModalOpen(false)}>
Cancel
</Button>,
<Button
key="preview"
onClick={handlePreviewReleaseUpgrade}
loading={releasePreviewLoading}
>
Preview Changes
</Button>,
<Button
key="apply"
type="primary"
danger={
!!releasePreview?.envCoverage?.missingInTenantEnv?.length
}
loading={upgradingInstance}
onClick={handleStartReleaseUpgrade}
>
Apply Upgrade
</Button>,
]}
width={900}
>
<Space direction="vertical" style={{ width: '100%' }} size="middle">
<div>
<Typography.Text strong>Image Tag:</Typography.Text>
<Input
value={releaseImageTag}
onChange={(e) => setReleaseImageTag(e.target.value)}
placeholder="e.g. v2.10.3 (blank = use current env.IMAGE_TAG default)"
style={{ marginTop: 4 }}
/>
<Typography.Text type="secondary" style={{ fontSize: 12 }}>
Re-renders docker-compose.yml + env + nginx configs with this tag. Tenant content
(mkdocs/, custom configs/) is never touched. Click <em>Preview Changes</em> to see the
per-file diff before applying.
</Typography.Text>
</div>
{releasePreview && (
<>
{releasePreview.envCoverage?.missingInTenantEnv && releasePreview.envCoverage.missingInTenantEnv.length > 0 && (
<Alert
type="error"
showIcon
message="Missing env vars in tenant .env"
description={
<div>
<div>The new docker-compose.yml references vars the tenant&apos;s .env does NOT define:</div>
<code style={{ display: 'block', marginTop: 8, fontSize: 11 }}>
{releasePreview.envCoverage.missingInTenantEnv.join(', ')}
</code>
<div style={{ marginTop: 8 }}>
Applying without these vars may break services. Add them to the tenant&apos;s .env
first, or reconcile the template.
</div>
</div>
}
/>
)}
<Typography.Text strong>
Files: {releasePreview.files.length} total, {releasePreview.files.filter(f => f.status === 'modified').length} modified, {releasePreview.files.filter(f => f.status === 'created').length} created, {releasePreview.files.filter(f => f.status === 'unchanged').length} unchanged
</Typography.Text>
<div style={{ maxHeight: 500, overflow: 'auto', border: '1px solid #303030', borderRadius: 4 }}>
{releasePreview.files.map((f) => (
<div key={f.path} style={{ padding: 8, borderBottom: '1px solid #303030' }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<code style={{ fontSize: 12 }}>{f.path}</code>
<Tag
color={
f.status === 'unchanged' ? 'green' :
f.status === 'modified' ? 'gold' :
'blue'
}
>
{f.status} {f.sizeBefore !== undefined && `(${f.sizeBefore}${f.sizeAfter} B)`}
</Tag>
</div>
{f.diff && (
<pre
style={{
background: '#1e1e1e',
color: '#d4d4d4',
padding: 8,
marginTop: 8,
maxHeight: 200,
overflow: 'auto',
fontSize: 11,
borderRadius: 4,
}}
>
{f.diff}
</pre>
)}
</div>
))}
</div>
</>
)}
</Space>
</Modal>
</div> </div>
); );
} }

View File

@ -9,7 +9,6 @@ export interface Instance {
composeProject: string; composeProject: string;
gitBranch: string; gitBranch: string;
gitCommit?: string; gitCommit?: string;
imageTag?: string;
portConfig: Record<string, number>; portConfig: Record<string, number>;
enableMedia: boolean; enableMedia: boolean;
enableChat: boolean; enableChat: boolean;

View File

@ -8,16 +8,7 @@ COPY src/ ./src/
RUN npx tsc RUN npx tsc
FROM node:20-alpine FROM node:20-alpine
# bash + curl + jq + python3 are required by the changemaker scripts the agent RUN apk add --no-cache docker-cli docker-cli-compose git rsync
# shells out to (upgrade-check.sh, upgrade.sh, backup.sh). Without them, every
# /upgrade/* and /backup/* call returns "command not found" failures.
RUN apk add --no-cache docker-cli docker-cli-compose git rsync bash curl jq python3
# Agent runs as root, but the bind-mounted /app/instance is owned by the host
# user (UID 1000 = `node` inside the container). Modern git refuses to operate
# on repos with mismatched ownership without an explicit safe.directory entry.
# Wildcard whitelist all paths — the agent only mounts a single host directory
# anyway (the instance's project root).
RUN git config --system --add safe.directory '*'
WORKDIR /app WORKDIR /app
COPY package*.json ./ COPY package*.json ./
RUN npm ci --production RUN npm ci --production

View File

@ -24,33 +24,6 @@ router.post('/instance/:slug/files', async (req: Request, res: Response) => {
res.json({ written: files.length }); res.json({ written: files.length });
}); });
// POST /instance/:slug/files/diff — Approach C pre-flight: diff proposed
// rendered files against on-disk current content. Read-only.
router.post('/instance/:slug/files/diff', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug'));
const { files } = req.body;
if (!Array.isArray(files)) {
res.status(400).json({ error: 'VALIDATION', message: 'files array required' });
return;
}
const results = await fileService.diffFiles(entry.basePath, files);
res.json({ files: results });
});
// POST /instance/:slug/env/patch — Approach C: patch specific .env keys in place.
// Used for isRegistered=true tenants where CCP can't re-render the full .env
// but needs to update IMAGE_TAG / other values from instance.imageTag etc.
router.post('/instance/:slug/env/patch', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug'));
const { vars } = req.body;
if (!vars || typeof vars !== 'object' || Array.isArray(vars)) {
res.status(400).json({ error: 'VALIDATION', message: 'vars object required' });
return;
}
const result = await fileService.patchEnv(entry.basePath, vars as Record<string, string>);
res.json(result);
});
// POST /instance/:slug/mkdir — Create directory // POST /instance/:slug/mkdir — Create directory
router.post('/instance/:slug/mkdir', async (req: Request, res: Response) => { router.post('/instance/:slug/mkdir', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug')); const entry = await getSlugEntry(param(req, 'slug'));

View File

@ -188,85 +188,6 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
res.status(202).json({ started: true }); res.status(202).json({ started: true });
}); });
// POST /instance/:slug/upgrade/start-image-only — Run image-upgrade.sh in background
//
// Image-only upgrade: pulls latest images + recreates services without touching
// tracked files (no git pull, no tarball extract, no VERSION mutation). Tenant
// content is implicitly safe because the script never writes outside data/upgrade.
// See scripts/image-upgrade.sh for full rationale.
//
// Schema-compatible with /upgrade/start: writes the same progress.json + result.json
// so the CCP poll loop in runRemoteUpgrade() works unchanged.
router.post('/instance/:slug/upgrade/start-image-only', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const entry = await getSlugEntry(slug);
const { imageTag } = req.body || {};
// SECURITY: imageTag flows into bash via --image-tag. Constrain to a safe
// subset of docker tag chars (semver, SHA, named tags). Reject anything
// that could shell-escape.
if (imageTag && !/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/.test(String(imageTag))) {
res.status(400).json({ error: 'VALIDATION', message: 'Invalid imageTag' });
return;
}
const scriptPath = path.join(entry.basePath, 'scripts', 'image-upgrade.sh');
try {
await fs.access(scriptPath);
} catch {
res.status(404).json({ error: 'NOT_FOUND', message: 'image-upgrade.sh not found' });
return;
}
// Same concurrency guards as the full /upgrade/start endpoint — uses the
// same lock + on-disk staleness check + backup/restore mutex.
if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
return;
}
if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
return;
}
// Clear stale progress/result files (same convention as /upgrade/start)
const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
await fs.mkdir(path.dirname(progressPath), { recursive: true });
await fs.rm(progressPath, { force: true });
await fs.rm(resultPath, { force: true });
const args: string[] = [scriptPath, '--api-mode'];
if (imageTag) args.push('--image-tag', String(imageTag));
void withSlugLock(slug, 'upgrade', async () => {
logger.info(`[image-upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
try {
await new Promise<void>((resolve, reject) => {
const proc = spawn('bash', args, {
cwd: entry.basePath,
env: { ...process.env, COMPOSE_ANSI: 'never' },
stdio: ['ignore', 'ignore', 'ignore'],
});
proc.on('error', reject);
proc.on('close', (code) => {
if (code === 0) resolve();
else reject(new Error(`image-upgrade.sh exited with code ${code}`));
});
});
logger.info(`[image-upgrade] ${slug}: image-upgrade.sh completed`);
} catch (err) {
logger.error(`[image-upgrade] ${slug}: ${(err as Error).message}`);
}
}).catch((err) => {
if (!(err instanceof SlugBusyError)) {
logger.error(`[image-upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
}
});
res.status(202).json({ started: true, mode: 'image-only' });
});
// GET /instance/:slug/upgrade/progress — Read progress.json // GET /instance/:slug/upgrade/progress — Read progress.json
router.get('/instance/:slug/upgrade/progress', async (req: Request, res: Response) => { router.get('/instance/:slug/upgrade/progress', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug')); const entry = await getSlugEntry(param(req, 'slug'));

View File

@ -35,185 +35,6 @@ export async function writeFiles(
} }
} }
/**
* Diff proposed files against current on-disk contents at basePath.
* For Approach C pre-flight preview: operator sees per-file change summary
* before applying re-rendered templates. Returns one DiffResult per proposed
* file. Uses a small inline LCS-based unified diff to avoid new deps.
*/
export interface DiffResult {
path: string;
status: 'unchanged' | 'modified' | 'created';
diff: string | null;
sizeBefore: number;
sizeAfter: number;
}
const DIFF_MAX_LINES = 500;
function unifiedDiff(oldText: string, newText: string, relativePath: string): string {
// Compact unified-diff: line-level LCS, emit context + changed lines.
// Not a full GNU diff — adequate for compose/env/conf inspection in the UI.
const oldLines = oldText.split('\n');
const newLines = newText.split('\n');
// Build LCS table (line-level). For files up to ~1500 lines this is O(N*M)
// which is fine; we truncate output length not algorithm runtime.
const m = oldLines.length, n = newLines.length;
const dp: number[][] = Array.from({ length: m + 1 }, () => new Array<number>(n + 1).fill(0));
for (let i = m - 1; i >= 0; i--) {
for (let j = n - 1; j >= 0; j--) {
dp[i][j] = oldLines[i] === newLines[j]
? dp[i + 1][j + 1] + 1
: Math.max(dp[i + 1][j], dp[i][j + 1]);
}
}
// Backtrack to emit unified-style hunks
const out: string[] = [`--- a/${relativePath}`, `+++ b/${relativePath}`];
let i = 0, j = 0, oldStart = 0, newStart = 0;
const hunk: string[] = [];
let emittedLines = 0;
while ((i < m || j < n) && emittedLines < DIFF_MAX_LINES) {
if (i < m && j < n && oldLines[i] === newLines[j]) {
hunk.push(` ${oldLines[i]}`);
i++; j++;
} else if (j < n && (i === m || dp[i][j + 1] >= dp[i + 1][j])) {
hunk.push(`+${newLines[j]}`);
j++; newStart++;
} else {
hunk.push(`-${oldLines[i]}`);
i++; oldStart++;
}
emittedLines++;
}
if (emittedLines >= DIFF_MAX_LINES) hunk.push(`... (diff truncated at ${DIFF_MAX_LINES} lines)`);
out.push(...hunk);
return out.join('\n');
}
export async function diffFiles(
basePath: string,
files: Array<{ relativePath: string; content: string }>
): Promise<DiffResult[]> {
const results: DiffResult[] = [];
for (const file of files) {
const filePath = path.join(basePath, file.relativePath);
assertWithin(filePath, basePath);
const sizeAfter = Buffer.byteLength(file.content, 'utf-8');
let current: string | null = null;
try {
current = await fs.readFile(filePath, 'utf-8');
} catch {
current = null;
}
if (current === null) {
results.push({
path: file.relativePath,
status: 'created',
diff: null,
sizeBefore: 0,
sizeAfter,
});
continue;
}
const sizeBefore = Buffer.byteLength(current, 'utf-8');
if (current === file.content) {
results.push({
path: file.relativePath,
status: 'unchanged',
diff: null,
sizeBefore,
sizeAfter,
});
continue;
}
results.push({
path: file.relativePath,
status: 'modified',
diff: unifiedDiff(current, file.content, file.relativePath),
sizeBefore,
sizeAfter,
});
}
return results;
}
/**
* Patch specific keys in the tenant's .env file in place. Used by Approach C
* upgrade for install.sh tenants where CCP can't re-render the full .env
* (no encryptedSecrets in DB) but still needs to update Instance.imageTag-
* derived values like IMAGE_TAG. Preserves comments, blank lines, and key
* order; replaces existing keys, appends new ones at the end.
*
* Keys are validated against ENV_KEY_RE; values are written verbatim
* (no shell escaping beyond what dotenv expects newlines in values
* are rejected to prevent .env smuggling).
*/
const ENV_KEY_RE = /^[A-Z_][A-Z0-9_]*$/;
export async function patchEnv(
basePath: string,
vars: Record<string, string>
): Promise<{ patched: string[]; added: string[] }> {
const envPath = path.join(basePath, '.env');
assertWithin(envPath, basePath);
// Validate inputs before touching disk
for (const [k, v] of Object.entries(vars)) {
if (!ENV_KEY_RE.test(k)) {
throw new AgentError(400, `Invalid env key: ${k}`, 'VALIDATION');
}
if (/[\r\n]/.test(v)) {
throw new AgentError(400, `env value for ${k} contains newline`, 'VALIDATION');
}
}
let current = '';
try {
current = await fs.readFile(envPath, 'utf-8');
} catch (err) {
throw new AgentError(404, `.env not found at ${envPath}`, 'NOT_FOUND');
}
const lines = current.split('\n');
const patched: string[] = [];
const remaining = new Set(Object.keys(vars));
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Match KEY=... (allowing leading whitespace? .env conventionally doesn't but be defensive)
const m = line.match(/^([A-Z_][A-Z0-9_]*)=/);
if (!m) continue;
const key = m[1];
if (remaining.has(key)) {
lines[i] = `${key}=${vars[key]}`;
patched.push(key);
remaining.delete(key);
}
}
// Append any keys that didn't exist in the file
const added: string[] = [];
if (remaining.size > 0) {
// Trim trailing blank lines to avoid accumulating empties on repeated patches
while (lines.length > 0 && lines[lines.length - 1] === '') lines.pop();
lines.push('', '# Added by CCP env-patch');
for (const k of remaining) {
lines.push(`${k}=${vars[k]}`);
added.push(k);
}
lines.push(''); // trailing newline
}
await fs.writeFile(envPath, lines.join('\n'), 'utf-8');
logger.info(`[files] env-patch ${envPath}: patched=${patched.length} added=${added.length}`);
return { patched, added };
}
export async function mkdirp(basePath: string, relativePath: string): Promise<void> { export async function mkdirp(basePath: string, relativePath: string): Promise<void> {
const dirPath = path.join(basePath, relativePath); const dirPath = path.join(basePath, relativePath);
assertWithin(dirPath, basePath); assertWithin(dirPath, basePath);

View File

@ -1,4 +0,0 @@
-- Approach C: per-instance image tag override.
-- NULL means "use env.IMAGE_TAG default". Set via CCP "Upgrade to Release"
-- flow when operator chooses a tag for a specific tenant.
ALTER TABLE "instances" ADD COLUMN "image_tag" TEXT;

View File

@ -70,13 +70,6 @@ model Instance {
gitBranch String @default("v2") @map("git_branch") gitBranch String @default("v2") @map("git_branch")
gitCommit String? @map("git_commit") gitCommit String? @map("git_commit")
// Per-instance image tag override (Approach C release upgrades).
// NULL = fall back to env.IMAGE_TAG (the CCP-wide default). When set,
// CCP renders this value into the tenant's .env IMAGE_TAG, and the
// compose template's ${IMAGE_TAG:-latest} substitution picks it up at
// compose-up time. Each tenant rolls forward on its own cadence.
imageTag String? @map("image_tag")
// Allocated host ports (JSON: { api: 14001, admin: 13001, postgres: 15401, nginx: 10001 }) // Allocated host ports (JSON: { api: 14001, admin: 13001, postgres: 15401, nginx: 10001 })
portConfig Json @map("port_config") portConfig Json @map("port_config")

View File

@ -1,115 +0,0 @@
#!/usr/bin/env tsx
/**
* render-for-instance.ts Approach C Phase 0 verification harness.
*
* Loads a CCP-tracked Instance row, builds its template context, and renders
* all templates to a scratch directory under /tmp/render-<slug>/. Operator
* then diffs the rendered output against the tenant's actual on-disk files
* to verify the template-vs-prod-compose equivalence contract.
*
* Usage (run inside ccp-api container):
* docker compose exec ccp-api npx tsx scripts/render-for-instance.ts --slug changemakerlite
* docker compose exec ccp-api npx tsx scripts/render-for-instance.ts --id <uuid>
*
* Output: prints scratch dir path; exits 0 on success, 1 on failure.
*
* This script does NOT touch any tenant. It only reads from the CCP database
* and writes to /tmp on the CCP api container.
*/
import { prisma } from '../src/lib/prisma';
import { decryptJson } from '../src/utils/encryption';
import {
buildTemplateContext,
renderAllTemplates,
} from '../src/services/template-engine';
import path from 'node:path';
import fs from 'node:fs/promises';
interface Args {
slug?: string;
id?: string;
outDir?: string;
}
function parseArgs(argv: string[]): Args {
const args: Args = {};
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === '--slug' && argv[i + 1]) { args.slug = argv[++i]; continue; }
if (a === '--id' && argv[i + 1]) { args.id = argv[++i]; continue; }
if (a === '--out' && argv[i + 1]) { args.outDir = argv[++i]; continue; }
if (a === '-h' || a === '--help') {
console.log('usage: render-for-instance.ts (--slug X | --id Y) [--out /tmp/render-X]');
process.exit(0);
}
}
return args;
}
async function main() {
const args = parseArgs(process.argv.slice(2));
if (!args.slug && !args.id) {
console.error('error: --slug or --id is required');
process.exit(1);
}
const instance = await prisma.instance.findUnique({
where: args.id ? { id: args.id } : { slug: args.slug! },
});
if (!instance) {
console.error(`error: instance not found (slug=${args.slug ?? '?'}, id=${args.id ?? '?'})`);
process.exit(1);
}
// For isRegistered tenants there are no encrypted secrets. Use empty stubs
// so buildTemplateContext doesn't crash; env.hbs values that read from
// {{secrets.*}} will render as blank, which is fine for diff purposes
// because the tenant's own .env still has the real values via install.sh.
let secrets: Record<string, string> = {};
if (instance.encryptedSecrets) {
try {
secrets = decryptJson<Record<string, string>>(instance.encryptedSecrets);
} catch (err) {
console.warn(`warn: decryptJson failed (${(err as Error).message}); using empty secrets`);
}
} else {
console.log(`(isRegistered=true tenant; using empty secrets for compose/nginx render — env.hbs values will be blank)`);
}
const outDir = args.outDir ?? path.join('/tmp', `render-${instance.slug}`);
await fs.rm(outDir, { recursive: true, force: true });
await fs.mkdir(outDir, { recursive: true });
const context = buildTemplateContext(instance, secrets);
await renderAllTemplates(context, outDir);
// Summarize what we rendered
const entries: string[] = [];
async function walk(dir: string, rel = '') {
const items = await fs.readdir(dir, { withFileTypes: true });
for (const item of items) {
const full = path.join(dir, item.name);
const r = path.join(rel, item.name);
if (item.isDirectory()) await walk(full, r);
else entries.push(r);
}
}
await walk(outDir);
console.log(`\n=== rendered ${entries.length} files to: ${outDir} ===`);
for (const e of entries.sort()) {
const stat = await fs.stat(path.join(outDir, e));
console.log(` ${e} (${stat.size} bytes)`);
}
console.log(`\nTo diff against the live tenant:`);
console.log(` ssh <tenant> 'cat <basePath>/docker-compose.yml' | diff -u - ${outDir}/docker-compose.yml`);
console.log(``);
await prisma.$disconnect();
}
main().catch((err) => {
console.error('render-for-instance.ts failed:', err);
process.exit(1);
});

View File

@ -4,7 +4,7 @@ import rateLimit from 'express-rate-limit';
import { prisma } from '../../lib/prisma'; import { prisma } from '../../lib/prisma';
import { authenticate, requireRole } from '../../middleware/auth'; import { authenticate, requireRole } from '../../middleware/auth';
import { validate } from '../../middleware/validate'; import { validate } from '../../middleware/validate';
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, startImageUpgradeSchema, startReleaseUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas'; import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
import * as instancesService from './instances.service'; import * as instancesService from './instances.service';
import * as healthService from '../../services/health.service'; import * as healthService from '../../services/health.service';
import * as backupService from '../../services/backup.service'; import * as backupService from '../../services/backup.service';
@ -362,60 +362,6 @@ router.post(
} }
); );
// Image-only upgrade (Approach B). Faster + safer than full upgrade for
// releases that don't change orchestration/templates. See upgrade.service.ts
// startImageUpgrade for full rationale.
router.post(
'/:id/upgrade-images',
requireRole('SUPER_ADMIN', 'OPERATOR'),
validate(startImageUpgradeSchema),
async (req: Request, res: Response) => {
const { imageTag } = req.body || {};
const upgrade = await upgradeService.startImageUpgrade(
req.params.id as string,
req.user!.id,
req.ip,
{ imageTag }
);
res.status(201).json({ data: upgrade });
}
);
// Release upgrade (Approach C). Re-renders templates via CCP and applies
// them to the tenant, then composePull + composeUp. Used when a release
// changes orchestration in addition to image versions.
router.post(
'/:id/upgrade-release',
requireRole('SUPER_ADMIN', 'OPERATOR'),
validate(startReleaseUpgradeSchema),
async (req: Request, res: Response) => {
const { imageTag } = req.body || {};
const upgrade = await upgradeService.startReleaseUpgrade(
req.params.id as string,
req.user!.id,
req.ip,
{ imageTag }
);
res.status(201).json({ data: upgrade });
}
);
// Approach C pre-flight: preview what re-render would change before applying.
// READ-ONLY — tenant disk is not touched.
router.post(
'/:id/upgrade-release/preview',
requireRole('SUPER_ADMIN', 'OPERATOR'),
validate(startReleaseUpgradeSchema),
async (req: Request, res: Response) => {
const { imageTag } = req.body || {};
const preview = await upgradeService.previewReleaseUpgrade(
req.params.id as string,
{ imageTag }
);
res.json({ data: preview });
}
);
router.get( router.get(
'/:id/upgrade-status', '/:id/upgrade-status',
requireRole('SUPER_ADMIN', 'OPERATOR'), requireRole('SUPER_ADMIN', 'OPERATOR'),

View File

@ -121,30 +121,6 @@ export const startUpgradeSchema = z.object({
.optional(), .optional(),
}); });
// Approach B: image-only upgrade. Pulls images + recreates core app services
// without touching tracked files. imageTag is optional — if omitted, the
// agent uses whatever IMAGE_TAG the install's .env / compose env defines
// (typically `latest`). Tag must be a valid Docker tag.
export const startImageUpgradeSchema = z.object({
imageTag: z
.string()
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/, 'Invalid imageTag')
.optional(),
});
// Approach C: release upgrade via CCP template re-render. CCP renders the
// docker-compose.yml + nginx confs + pangolin resources etc. against the
// tenant's context (with the proposed imageTag), writes them to the tenant,
// then composePull + composeUp. Used when a release changes orchestration
// in addition to image versions. imageTag is the new value for the
// per-instance Instance.imageTag column (NULL falls back to env default).
export const startReleaseUpgradeSchema = z.object({
imageTag: z
.string()
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/, 'Invalid imageTag')
.optional(),
});
export const setupRemoteTunnelSchema = z.object({ export const setupRemoteTunnelSchema = z.object({
// Empty string or omitted → resources use standard subdomains (app., api., etc.) // Empty string or omitted → resources use standard subdomains (app., api., etc.)
// A value like "ck" → creates ck-app., ck-api., etc. for multi-tenant domains // A value like "ck" → creates ck-app., ck-api., etc. for multi-tenant domains

View File

@ -24,16 +24,6 @@ export interface ExecutionDriver {
// ─── Filesystem Operations ────────────────────────────────── // ─── Filesystem Operations ──────────────────────────────────
readEnvFile(basePath: string): Promise<Record<string, string> | null>; readEnvFile(basePath: string): Promise<Record<string, string> | null>;
writeFiles(basePath: string, files: Array<{ relativePath: string; content: string }>): Promise<void>; writeFiles(basePath: string, files: Array<{ relativePath: string; content: string }>): Promise<void>;
// Approach C pre-flight: diff proposed file contents against on-disk current.
// Returns per-file status (unchanged | modified | created) + unified diff for modified.
// Read-only.
diffFiles(
basePath: string,
files: Array<{ relativePath: string; content: string }>
): Promise<Array<{ path: string; status: 'unchanged' | 'modified' | 'created'; diff: string | null; sizeBefore: number; sizeAfter: number }>>;
// Approach C: patch specific .env keys in place (install.sh-tenant path
// where CCP can't re-render the full .env).
patchEnv(basePath: string, vars: Record<string, string>): Promise<{ patched: string[]; added: string[] }>;
mkdir(basePath: string, relativePath: string): Promise<void>; mkdir(basePath: string, relativePath: string): Promise<void>;
fileExists(basePath: string, relativePath: string): Promise<boolean>; fileExists(basePath: string, relativePath: string): Promise<boolean>;
deleteDirectory(dirPath: string): Promise<void>; deleteDirectory(dirPath: string): Promise<void>;

View File

@ -80,67 +80,6 @@ export class LocalDriver implements ExecutionDriver {
} }
} }
// Approach C pre-flight diff. Reads current file contents at basePath +
// relativePath, returns per-file status + diff. Local implementation
// mirrors the agent-side diffFiles helper.
async diffFiles(basePath: string, files: Array<{ relativePath: string; content: string }>) {
const results: Array<{ path: string; status: 'unchanged' | 'modified' | 'created'; diff: string | null; sizeBefore: number; sizeAfter: number }> = [];
for (const file of files) {
const filePath = path.join(basePath, file.relativePath);
const sizeAfter = Buffer.byteLength(file.content, 'utf-8');
let current: string | null = null;
try { current = await fs.readFile(filePath, 'utf-8'); } catch { current = null; }
if (current === null) {
results.push({ path: file.relativePath, status: 'created', diff: null, sizeBefore: 0, sizeAfter });
} else if (current === file.content) {
results.push({ path: file.relativePath, status: 'unchanged', diff: null, sizeBefore: Buffer.byteLength(current), sizeAfter });
} else {
// Minimal diff for local: full new content. Local mode is dev-only;
// detailed diffs come from the agent-side implementation.
results.push({
path: file.relativePath,
status: 'modified',
diff: `--- a/${file.relativePath}\n+++ b/${file.relativePath}\n(local-driver: showing new content only)\n${file.content}`,
sizeBefore: Buffer.byteLength(current),
sizeAfter,
});
}
}
return results;
}
// Approach C: patch specific .env keys in place (local mirror of the
// agent-side patchEnv helper). Preserves comments and key order.
async patchEnv(basePath: string, vars: Record<string, string>) {
const envPath = path.join(basePath, '.env');
const ENV_KEY_RE = /^[A-Z_][A-Z0-9_]*$/;
for (const [k, v] of Object.entries(vars)) {
if (!ENV_KEY_RE.test(k)) throw new Error(`Invalid env key: ${k}`);
if (/[\r\n]/.test(v)) throw new Error(`env value for ${k} contains newline`);
}
const current = await fs.readFile(envPath, 'utf-8');
const lines = current.split('\n');
const patched: string[] = [];
const remaining = new Set(Object.keys(vars));
for (let i = 0; i < lines.length; i++) {
const m = lines[i].match(/^([A-Z_][A-Z0-9_]*)=/);
if (m && remaining.has(m[1])) {
lines[i] = `${m[1]}=${vars[m[1]]}`;
patched.push(m[1]);
remaining.delete(m[1]);
}
}
const added: string[] = [];
if (remaining.size > 0) {
while (lines.length > 0 && lines[lines.length - 1] === '') lines.pop();
lines.push('', '# Added by CCP env-patch');
for (const k of remaining) { lines.push(`${k}=${vars[k]}`); added.push(k); }
lines.push('');
}
await fs.writeFile(envPath, lines.join('\n'), 'utf-8');
return { patched, added };
}
async mkdir(basePath: string, relativePath: string) { async mkdir(basePath: string, relativePath: string) {
await fs.mkdir(path.join(basePath, relativePath), { recursive: true }); await fs.mkdir(path.join(basePath, relativePath), { recursive: true });
} }

View File

@ -82,10 +82,6 @@ export interface StartAgentUpgradeOptions {
branch?: string; branch?: string;
} }
export interface StartAgentImageUpgradeOptions {
imageTag?: string;
}
interface AgentRequestOptions { interface AgentRequestOptions {
method: 'GET' | 'POST' | 'DELETE'; method: 'GET' | 'POST' | 'DELETE';
path: string; path: string;
@ -309,28 +305,6 @@ export class RemoteDriver implements ExecutionDriver {
}); });
} }
// Approach C pre-flight diff via agent.
async diffFiles(_basePath: string, files: Array<{ relativePath: string; content: string }>) {
const resp = await this.request<{ files: Array<{ path: string; status: 'unchanged' | 'modified' | 'created'; diff: string | null; sizeBefore: number; sizeAfter: number }> }>({
method: 'POST',
path: `/instance/${this.slug}/files/diff`,
body: { files },
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
});
return resp.files;
}
// Approach C: patch specific .env keys in place via agent. Used for
// isRegistered=true tenants where CCP can't re-render the full .env.
async patchEnv(_basePath: string, vars: Record<string, string>) {
return this.request<{ patched: string[]; added: string[] }>({
method: 'POST',
path: `/instance/${this.slug}/env/patch`,
body: { vars },
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
});
}
async mkdir(_basePath: string, relativePath: string): Promise<void> { async mkdir(_basePath: string, relativePath: string): Promise<void> {
await this.request({ await this.request({
method: 'POST', method: 'POST',
@ -600,21 +574,6 @@ export class RemoteDriver implements ExecutionDriver {
}); });
} }
/**
* Trigger image-upgrade.sh --api-mode on the remote (Approach B: image-only
* upgrade pulls images + recreates core app services without touching
* the install tree). Fire-and-forget; returns 202 immediately. Uses the
* same progress/result polling endpoints as startUpgrade.
*/
async startImageUpgrade(options: StartAgentImageUpgradeOptions = {}): Promise<void> {
await this.request({
method: 'POST',
path: `/instance/${this.slug}/upgrade/start-image-only`,
body: options,
timeoutMs: 30_000,
});
}
/** /**
* Read the agent's data/upgrade/progress.json. Returns the default zero-state * Read the agent's data/upgrade/progress.json. Returns the default zero-state
* if no progress has been written yet. * if no progress has been written yet.

View File

@ -135,8 +135,6 @@ export interface InstanceForTemplate {
smtpFrom: string | null; smtpFrom: string | null;
emailTestMode: boolean; emailTestMode: boolean;
gitBranch: string; gitBranch: string;
// Per-instance image tag override (Approach C). NULL falls back to env.IMAGE_TAG.
imageTag: string | null;
} }
/** /**
@ -210,9 +208,7 @@ export function buildTemplateContext(
gitBranch: instance.gitBranch, gitBranch: instance.gitBranch,
registryUrl: env.GITEA_REGISTRY, registryUrl: env.GITEA_REGISTRY,
useRegistry: env.USE_REGISTRY_IMAGES, useRegistry: env.USE_REGISTRY_IMAGES,
// Approach C: per-instance imageTag overrides the CCP-wide env default. imageTag: env.IMAGE_TAG,
// NULL on the Instance row falls back to env.IMAGE_TAG (typically 'latest').
imageTag: instance.imageTag || env.IMAGE_TAG,
}; };
} }

View File

@ -8,8 +8,6 @@ import { logger } from '../utils/logger';
import { createEvent } from './event.service'; import { createEvent } from './event.service';
import { getRemoteDriverForInstance } from './execution-driver'; import { getRemoteDriverForInstance } from './execution-driver';
import type { AgentUpdateStatus } from './remote-driver'; import type { AgentUpdateStatus } from './remote-driver';
import { buildTemplateContext, clearTemplateCache, renderAllTemplatesInMemory } from './template-engine';
import { decryptJson } from '../utils/encryption';
/** /**
* Shell-injection guards. Any user- or DB-controlled value that flows into * Shell-injection guards. Any user- or DB-controlled value that flows into
@ -207,10 +205,6 @@ export interface StartUpgradeOptions {
branch?: string; branch?: string;
} }
export interface StartImageUpgradeOptions {
imageTag?: string;
}
/** /**
* Start an upgrade for an instance. Returns the created InstanceUpgrade record. * Start an upgrade for an instance. Returns the created InstanceUpgrade record.
* The actual upgrade runs asynchronously (fire-and-forget). * The actual upgrade runs asynchronously (fire-and-forget).
@ -304,412 +298,6 @@ export async function startUpgrade(
return upgrade; return upgrade;
} }
/**
* Start an IMAGE-ONLY upgrade (Approach B). Pulls latest images + recreates
* core app services without touching tracked files. Faster (~2 min vs ~4-5
* min for full upgrade) and safer because no filesystem mutation outside
* docker tenant content (mkdocs/, configs/) is implicitly preserved.
*
* Use this for releases that only bump container code or schema. For
* releases that change compose orchestration, nginx config, or other
* tracked files, use startUpgrade() instead.
*
* Remote-only for now: local mode would need a `runImageUpgrade` runner
* which we haven't built (all our instances are remote via mTLS agent).
*/
export async function startImageUpgrade(
instanceId: string,
userId: string,
ipAddress?: string,
options?: StartImageUpgradeOptions
) {
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new Error('Instance not found');
if (!instance.isRemote) {
throw new Error('Image-only upgrade is currently supported only for remote instances');
}
if (instance.status !== InstanceStatus.RUNNING && instance.status !== InstanceStatus.STOPPED) {
throw new Error(`Cannot upgrade instance in ${instance.status} state`);
}
// Reuse the same in-progress guard as startUpgrade: only one upgrade
// (of either type) at a time per instance.
const active = await prisma.instanceUpgrade.findFirst({
where: {
instanceId,
status: { in: [UpgradeStatus.PENDING, UpgradeStatus.IN_PROGRESS] },
},
});
if (active) {
throw new Error('An upgrade is already in progress for this instance');
}
// Create upgrade record. branch is unused for image-only but keep it
// populated with current branch for audit trail consistency.
const upgrade = await prisma.instanceUpgrade.create({
data: {
instanceId,
status: UpgradeStatus.PENDING,
previousCommit: instance.gitCommit,
branch: instance.gitBranch,
triggeredById: userId,
},
});
// Audit log
await prisma.auditLog.create({
data: {
userId,
instanceId,
action: AuditAction.INSTANCE_UPGRADE,
details: {
upgradeId: upgrade.id,
previousCommit: instance.gitCommit,
source: 'remote',
mode: 'image-only',
options: options || {},
} as unknown as Prisma.InputJsonValue,
ipAddress,
},
});
// Fire-and-forget: reuse runRemoteUpgrade with mode='image-only'. Same
// poll loop and result handling — only the initial agent call differs.
runRemoteUpgrade(upgrade.id, instance, undefined, 'image-only', options).catch((err) => {
logger.error(`[image-upgrade] Remote image upgrade orchestration failed for ${instance.slug}: ${err}`);
});
return upgrade;
}
// ─── Approach C: Release upgrade (template re-render) ────────────────────────
export interface StartReleaseUpgradeOptions {
imageTag?: string;
}
const SAFE_IMAGE_TAG = /^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/;
/**
* Files that should NOT be re-rendered for tenants without encryptedSecrets
* (install.sh-registered tenants). Their .env was provisioned at install
* time and contains real secrets we can't reproduce.
*/
const REGISTERED_TENANT_SKIP_FILES = new Set(['.env']);
/**
* Filter rendered file list for tenants without secrets. For install.sh
* tenants we keep the existing .env on disk (CCP can't render env without
* secrets in DB). Compose, nginx, pangolin etc. still render correctly
* because they only reference instance fields, not secrets directly.
*/
function filterRenderedFilesForRegisteredTenant(
files: Array<{ relativePath: string; content: string }>
): Array<{ relativePath: string; content: string }> {
return files.filter(f => !REGISTERED_TENANT_SKIP_FILES.has(f.relativePath));
}
/**
* Extract env var names referenced by the rendered docker-compose.yml.
* Used to compute envCoverage for install.sh tenants operator needs to
* know if any ${VAR} references won't have a value in the tenant's .env.
*/
function extractComposeEnvVars(composeYaml: string): string[] {
const vars = new Set<string>();
// Match ${VAR} or ${VAR:-default} or ${VAR:?required}
const re = /\$\{([A-Z_][A-Z0-9_]*)(?:[:-?][^}]*)?\}/g;
let m: RegExpExecArray | null;
while ((m = re.exec(composeYaml)) !== null) {
vars.add(m[1]);
}
return Array.from(vars).sort();
}
/**
* Approach C pre-flight preview. Renders templates with the proposed
* imageTag override and diffs against the tenant's current files. Also
* computes envCoverage for install.sh tenants so the operator can see
* if the new compose needs any env vars their .env doesn't have.
* READ-ONLY touches nothing on the tenant.
*/
export async function previewReleaseUpgrade(
instanceId: string,
options?: StartReleaseUpgradeOptions
): Promise<{
files: Array<{ path: string; status: 'unchanged' | 'modified' | 'created'; diff: string | null; sizeBefore: number; sizeAfter: number }>;
envCoverage?: {
requiredVars: string[];
presentInTenantEnv: string[];
missingInTenantEnv: string[];
};
}> {
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new Error('Instance not found');
if (!instance.isRemote) {
throw new Error('Release upgrade preview is currently supported only for remote instances');
}
if (options?.imageTag && !SAFE_IMAGE_TAG.test(options.imageTag)) {
throw new Error('Invalid imageTag');
}
// Build context with proposed imageTag override (not persisted)
const previewInstance = { ...instance, imageTag: options?.imageTag ?? instance.imageTag };
const secrets = instance.encryptedSecrets
? decryptJson<Record<string, string>>(instance.encryptedSecrets)
: {};
clearTemplateCache();
const context = buildTemplateContext(previewInstance, secrets);
let files = await renderAllTemplatesInMemory(context);
// Skip .env for registered tenants (no secrets to render against)
if (!instance.encryptedSecrets) {
files = filterRenderedFilesForRegisteredTenant(files);
}
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
const diffResults = await driver.diffFiles(instance.basePath, files);
// For registered tenants: report envCoverage so operator knows if any
// ${VAR} from the new compose isn't in their tenant .env. Required check
// because CCP isn't rendering their env file.
let envCoverage: { requiredVars: string[]; presentInTenantEnv: string[]; missingInTenantEnv: string[] } | undefined;
if (!instance.encryptedSecrets) {
const composeFile = files.find(f => f.relativePath === 'docker-compose.yml');
if (composeFile) {
const requiredVars = extractComposeEnvVars(composeFile.content);
// Read tenant's current .env via the agent's readEnvFile
const tenantEnv = await driver.readEnvFile(instance.basePath);
const presentKeys = new Set(Object.keys(tenantEnv || {}));
const presentInTenantEnv = requiredVars.filter(v => presentKeys.has(v));
const missingInTenantEnv = requiredVars.filter(v => !presentKeys.has(v));
envCoverage = { requiredVars, presentInTenantEnv, missingInTenantEnv };
}
}
return { files: diffResults, envCoverage };
}
/**
* Approach C apply path. Persists imageTag, re-renders templates, writes
* them to the tenant, then composePull + composeUp --remove-orphans.
* Fire-and-forget; status visible via the existing getUpgradeStatus() poll.
*/
export async function startReleaseUpgrade(
instanceId: string,
userId: string,
ipAddress?: string,
options?: StartReleaseUpgradeOptions
) {
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new Error('Instance not found');
if (!instance.isRemote) {
throw new Error('Release upgrade is currently supported only for remote instances');
}
if (instance.status !== InstanceStatus.RUNNING && instance.status !== InstanceStatus.STOPPED) {
throw new Error(`Cannot upgrade instance in ${instance.status} state`);
}
if (options?.imageTag && !SAFE_IMAGE_TAG.test(options.imageTag)) {
throw new Error('Invalid imageTag');
}
// Shared in-progress guard across all upgrade types.
const active = await prisma.instanceUpgrade.findFirst({
where: { instanceId, status: { in: [UpgradeStatus.PENDING, UpgradeStatus.IN_PROGRESS] } },
});
if (active) throw new Error('An upgrade is already in progress for this instance');
const upgrade = await prisma.instanceUpgrade.create({
data: {
instanceId,
status: UpgradeStatus.PENDING,
previousCommit: instance.imageTag ?? instance.gitCommit,
branch: instance.gitBranch,
triggeredById: userId,
},
});
await prisma.auditLog.create({
data: {
userId,
instanceId,
action: AuditAction.INSTANCE_UPGRADE,
details: {
upgradeId: upgrade.id,
previousImageTag: instance.imageTag,
newImageTag: options?.imageTag,
source: 'remote',
mode: 'release-template',
} as unknown as Prisma.InputJsonValue,
ipAddress,
},
});
// Fire-and-forget runner. Distinct from runRemoteUpgrade because we don't
// shell out to upgrade.sh — CCP does the render + compose orchestration
// directly through the mTLS driver. No agent-side script involved.
runReleaseUpgrade(upgrade.id, instance, options).catch((err) => {
logger.error(`[release-upgrade] Orchestration failed for ${instance.slug}: ${err}`);
});
return upgrade;
}
/**
* Internal: do the actual Approach C work. Updates DB, renders, writes,
* pulls, recreates, verifies. All non-progress reporting comes via DB
* status updates on the InstanceUpgrade row.
*/
async function runReleaseUpgrade(
upgradeId: string,
instance: Instance,
options?: StartReleaseUpgradeOptions
) {
const slug = instance.slug;
const newImageTag = options?.imageTag;
const updateStatus = async (data: Prisma.InstanceUpgradeUpdateInput) => {
await prisma.instanceUpgrade.update({ where: { id: upgradeId }, data });
};
try {
await updateStatus({
status: UpgradeStatus.IN_PROGRESS,
currentPhase: 1,
phaseName: 'Render',
percentage: 10,
progressMessage: 'Rendering templates with new image tag...',
});
// Persist new imageTag before render so buildTemplateContext picks it up.
if (newImageTag) {
await prisma.instance.update({ where: { id: instance.id }, data: { imageTag: newImageTag } });
}
const refreshed = await prisma.instance.findUniqueOrThrow({ where: { id: instance.id } });
const secrets = refreshed.encryptedSecrets
? decryptJson<Record<string, string>>(refreshed.encryptedSecrets)
: {};
clearTemplateCache();
const context = buildTemplateContext(refreshed, secrets);
let files = await renderAllTemplatesInMemory(context);
if (!refreshed.encryptedSecrets) {
files = filterRenderedFilesForRegisteredTenant(files);
}
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
// Phase 2: write rendered files
await updateStatus({
currentPhase: 2,
phaseName: 'Write Files',
percentage: 30,
progressMessage: `Writing ${files.length} rendered file(s)...`,
});
await driver.writeFiles(instance.basePath, files);
// For isRegistered=true tenants we skip rendering .env (no secrets in DB).
// But we still need to propagate the new imageTag into their existing .env
// so compose's ${IMAGE_TAG:-latest} substitution picks it up. Patch in place.
if (!refreshed.encryptedSecrets && newImageTag) {
await updateStatus({
currentPhase: 2,
phaseName: 'Patch Env',
percentage: 45,
progressMessage: `Patching IMAGE_TAG=${newImageTag} in tenant .env...`,
});
try {
await driver.patchEnv(instance.basePath, { IMAGE_TAG: newImageTag });
} catch (err) {
// Non-fatal but loud: the tenant may already have the desired tag
// in .env, or env-patch isn't supported on their agent.
logger.warn(`[release-upgrade] ${slug}: env patch failed: ${(err as Error).message}`);
}
}
// Phase 3: pull images per new compose
await updateStatus({
currentPhase: 3,
phaseName: 'Pull Images',
percentage: 55,
progressMessage: 'Pulling images from registry...',
});
await driver.composePull(instance.basePath, instance.composeProject);
// Phase 4: recreate services
await updateStatus({
currentPhase: 4,
phaseName: 'Recreate Services',
percentage: 80,
progressMessage: 'Recreating services with new orchestration...',
});
await driver.composeUp(instance.basePath, instance.composeProject);
// Phase 5: verify (best-effort; soft warnings only)
await updateStatus({
currentPhase: 5,
phaseName: 'Verify',
percentage: 95,
progressMessage: 'Verifying container health...',
});
const warnings: string[] = [];
try {
const containers = await driver.composePs(instance.basePath, instance.composeProject);
const unhealthy = containers.filter(c => c.status && /restarting|exited/i.test(c.status));
if (unhealthy.length > 0) {
warnings.push(`${unhealthy.length} container(s) not healthy after upgrade: ${unhealthy.map(c => c.name).join(', ')}`);
}
} catch {
warnings.push('composePs verification failed (services may still be starting)');
}
await updateStatus({
status: UpgradeStatus.COMPLETED,
currentPhase: 5,
phaseName: 'Complete',
percentage: 100,
progressMessage: `Release upgrade complete${newImageTag ? ` (imageTag: ${newImageTag})` : ''}`,
newCommit: newImageTag ?? refreshed.imageTag,
commitCount: 0,
warnings: warnings.length ? (warnings as unknown as Prisma.InputJsonValue) : undefined,
completedAt: new Date(),
});
logger.info(`[release-upgrade] ${slug}: completed${newImageTag ? `${newImageTag}` : ''}`);
} catch (err) {
const message = (err as Error).message || 'Release upgrade failed';
await updateStatus({
status: UpgradeStatus.FAILED,
errorMessage: message,
progressMessage: `Failed: ${message}`,
completedAt: new Date(),
});
await createEvent(
instance.id,
'ERROR',
'upgrade',
'Release upgrade failed',
message,
{ upgradeId, source: 'remote', mode: 'release-template' }
);
logger.error(`[release-upgrade] ${slug}: failed: ${message}`);
}
}
/** /**
* Async REMOTE upgrade runner. * Async REMOTE upgrade runner.
* *
@ -728,9 +316,7 @@ async function runReleaseUpgrade(
async function runRemoteUpgrade( async function runRemoteUpgrade(
upgradeId: string, upgradeId: string,
instance: Instance, instance: Instance,
options?: StartUpgradeOptions, options?: StartUpgradeOptions
mode: 'full' | 'image-only' = 'full',
imageOnlyOptions?: StartImageUpgradeOptions
) { ) {
const slug = instance.slug; const slug = instance.slug;
@ -747,27 +333,18 @@ async function runRemoteUpgrade(
where: { id: upgradeId }, where: { id: upgradeId },
data: { data: {
status: UpgradeStatus.IN_PROGRESS, status: UpgradeStatus.IN_PROGRESS,
progressMessage: mode === 'image-only' progressMessage: 'Starting remote upgrade...',
? 'Starting image-only upgrade...'
: 'Starting remote upgrade...',
}, },
}); });
// Tell the agent to start. The agent has its own mutex + stale-progress // Tell the agent to start. The agent has its own mutex + stale-progress
// check, so this can return 409 if a previous upgrade is still running. // check, so this can return 409 if a previous upgrade is still running.
if (mode === 'image-only') {
logger.info(`[upgrade] ${slug}: triggering remote image-upgrade.sh start`);
await driver.startImageUpgrade({
imageTag: imageOnlyOptions?.imageTag,
});
} else {
logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`); logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
await driver.startUpgrade({ await driver.startUpgrade({
skipBackup: options?.skipBackup, skipBackup: options?.skipBackup,
useRegistry: options?.useRegistry, useRegistry: options?.useRegistry,
branch: options?.branch, branch: options?.branch,
}); });
}
// Poll progress + result. We treat /result returning 200 as the signal // Poll progress + result. We treat /result returning 200 as the signal
// that upgrade.sh exited (successfully or with code != 0 — the script // that upgrade.sh exited (successfully or with code != 0 — the script

File diff suppressed because it is too large Load Diff

View File

@ -1,95 +1,65 @@
# ============================================================================== # ============================================================
# Changemaker Lite v2 — Tenant .env (CCP-rendered) # Changemaker Lite — Instance: {{name}}
# Instance: {{name}} ({{slug}})
# Generated by CCP on {{now}} # Generated by CCP on {{now}}
# ============================================================================== # ============================================================
# This file is a near-mirror of changemaker.lite/.env.example with Handlebars
# overlay for tenant-specific values (DOMAIN, secrets, COMPOSE_PROJECT_NAME).
# Static defaults match .env.example so docker-compose.yml.hbs (a mirror of
# docker-compose.prod.yml) has every ${VAR} it references.
#
# Keeping this in sync with .env.example after upstream additions: copy the
# new key + default, replace any tenant-specific value with the matching
# Handlebars expression. Most additions need no Handlebars.
# ==============================================================================
# --- General --- # Core
NODE_ENV=production NODE_ENV=production
DOMAIN={{domain}} DOMAIN={{domain}}
COMPOSE_PROJECT_NAME={{composeProject}}
TZ=UTC
USER_ID=1000 USER_ID=1000
GROUP_ID=1000 GROUP_ID=1000
DOCKER_GROUP_ID=984 DOCKER_GROUP_ID=984
# --- V2 PostgreSQL --- # V2 PostgreSQL
V2_POSTGRES_USER=changemaker V2_POSTGRES_USER=changemaker
V2_POSTGRES_PASSWORD={{secrets.postgresPassword}} V2_POSTGRES_PASSWORD={{secrets.postgresPassword}}
V2_POSTGRES_DB=changemaker_v2 V2_POSTGRES_DB=changemaker_v2
V2_POSTGRES_PORT={{ports.postgres}} V2_POSTGRES_PORT={{ports.postgres}}
DATABASE_URL=postgresql://changemaker:{{secrets.postgresPassword}}@{{containerPrefix}}-postgres:5432/changemaker_v2
# --- JWT Auth --- # Redis
REDIS_PASSWORD={{secrets.redisPassword}}
REDIS_URL=redis://:{{secrets.redisPassword}}@{{containerPrefix}}-redis:6379
# JWT Auth
JWT_ACCESS_SECRET={{secrets.jwtAccessSecret}} JWT_ACCESS_SECRET={{secrets.jwtAccessSecret}}
JWT_REFRESH_SECRET={{secrets.jwtRefreshSecret}} JWT_REFRESH_SECRET={{secrets.jwtRefreshSecret}}
JWT_INVITE_SECRET={{secrets.jwtInviteSecret}} JWT_INVITE_SECRET={{secrets.jwtInviteSecret}}
JWT_ACCESS_EXPIRY=15m JWT_ACCESS_EXPIRY=15m
# Reduced from 7d → 24h on 2026-04-12 (P2-3 hardening). Combined with # Reduced 2026-04-12 from 7d → 24h (P2-3). Combined with device-fingerprint
# device-fingerprint binding in the JWT payload, this tightens the # binding in the refresh JWT payload, this tightens the exploitation window
# exploitation window for stolen refresh tokens. # for stolen refresh tokens.
JWT_REFRESH_EXPIRY=24h JWT_REFRESH_EXPIRY=24h
# Encryption key for DB-stored secrets (SMTP password, etc.) # Gitea SSO cookie signing + service password salt — REQUIRED 2026-04-12 (P2-2).
ENCRYPTION_KEY={{secrets.encryptionKey}} # Distinct from JWT secrets; empty values will now fail Zod validation on boot.
# Gitea SSO cookie signing secret + service password salt — REQUIRED 2026-04-12
# (P2-2). Distinct from JWT secrets; empty values will fail Zod validation on
# boot. Both ≥32 chars, distinct from each other and from JWT_* secrets.
GITEA_SSO_SECRET={{secrets.giteaSsoSecret}} GITEA_SSO_SECRET={{secrets.giteaSsoSecret}}
SERVICE_PASSWORD_SALT={{secrets.servicePasswordSalt}} SERVICE_PASSWORD_SALT={{secrets.servicePasswordSalt}}
# --- Initial Super Admin User --- # Encryption
ENCRYPTION_KEY={{secrets.encryptionKey}}
# Initial Admin
INITIAL_ADMIN_EMAIL={{secrets.adminEmail}} INITIAL_ADMIN_EMAIL={{secrets.adminEmail}}
INITIAL_ADMIN_PASSWORD={{secrets.initialAdminPassword}} INITIAL_ADMIN_PASSWORD={{secrets.initialAdminPassword}}
# --- API --- # API
API_PORT=4000 API_PORT=4000
PORT=4000
API_URL=https://api.{{domain}} API_URL=https://api.{{domain}}
CORS_ORIGINS=https://app.{{domain}},http://localhost:{{ports.admin}},http://localhost CORS_ORIGINS=https://app.{{domain}},http://localhost:{{ports.admin}},http://localhost
# --- Admin GUI ---
ADMIN_PORT=3000
ADMIN_URL=https://app.{{domain}} ADMIN_URL=https://app.{{domain}}
# --- Nginx --- # Admin GUI
ADMIN_PORT=3000
# Nginx
NGINX_HTTP_PORT={{ports.nginx}} NGINX_HTTP_PORT={{ports.nginx}}
NGINX_HTTPS_PORT=443 NGINX_HTTPS_PORT=443
# --- Embed Proxy Ports --- # SMTP / Email
# Dedicated nginx ports for iframe embedding without DNS/subdomain.
# CCP allocates these per-instance via {{ports.embed}} base + offset.
NOCODB_EMBED_PORT={{math ports.embed "+" 0}}
N8N_EMBED_PORT={{math ports.embed "+" 1}}
GITEA_EMBED_PORT={{math ports.embed "+" 2}}
MAILHOG_EMBED_PORT={{math ports.embed "+" 3}}
MINI_QR_EMBED_PORT={{math ports.embed "+" 4}}
EXCALIDRAW_EMBED_PORT={{math ports.embed "+" 5}}
HOMEPAGE_EMBED_PORT={{math ports.embed "+" 6}}
VAULTWARDEN_EMBED_PORT={{math ports.embed "+" 9}}
ROCKETCHAT_EMBED_PORT={{math ports.embed "+" 10}}
GANCIO_EMBED_PORT={{math ports.embed "+" 11}}
JITSI_EMBED_PORT={{math ports.embed "+" 15}}
GRAFANA_EMBED_PORT={{math ports.embed "+" 12}}
ALERTMANAGER_EMBED_PORT={{math ports.embed "+" 16}}
# --- Docker / Container Management ---
DOCKER_NETWORK_NAME=changemaker-lite
DOCKER_PROXY_URL=http://docker-socket-proxy:2375
NEWT_CONTAINER_NAME=newt-changemaker
NEWT_COMPOSE_SERVICE=newt
# --- SMTP / Email ---
{{#if emailTestMode}} {{#if emailTestMode}}
SMTP_HOST=mailhog-changemaker SMTP_HOST={{containerPrefix}}-mailhog
SMTP_PORT=1025 SMTP_PORT=1025
SMTP_USER= SMTP_USER=
SMTP_PASS= SMTP_PASS=
@ -105,9 +75,21 @@ SMTP_FROM={{smtpFrom}}
SMTP_FROM_NAME={{name}} SMTP_FROM_NAME={{name}}
TEST_EMAIL_RECIPIENT={{secrets.adminEmail}} TEST_EMAIL_RECIPIENT={{secrets.adminEmail}}
# --- Listmonk --- # NocoDB
LISTMONK_PORT=9001 NOCODB_V2_PORT=8080
LISTMONK_DB_PORT=5434 NOCODB_URL=http://{{containerPrefix}}-nocodb:8080
NC_ADMIN_EMAIL={{secrets.adminEmail}}
NC_ADMIN_PASSWORD={{secrets.nocodbAdminPassword}}
# Listmonk
{{#if enableListmonk}}
LISTMONK_SYNC_ENABLED=true
LISTMONK_URL=http://{{containerPrefix}}-listmonk:9000
{{else}}
LISTMONK_SYNC_ENABLED=false
LISTMONK_URL=
{{/if}}
LISTMONK_PORT=9000
LISTMONK_DB_USER=listmonk LISTMONK_DB_USER=listmonk
LISTMONK_DB_PASSWORD={{secrets.listmonkAdminPassword}} LISTMONK_DB_PASSWORD={{secrets.listmonkAdminPassword}}
LISTMONK_DB_NAME=listmonk LISTMONK_DB_NAME=listmonk
@ -117,41 +99,26 @@ LISTMONK_API_USER=v2-api
LISTMONK_API_TOKEN={{secrets.listmonkApiToken}} LISTMONK_API_TOKEN={{secrets.listmonkApiToken}}
LISTMONK_ADMIN_USER=v2-api LISTMONK_ADMIN_USER=v2-api
LISTMONK_ADMIN_PASSWORD={{secrets.listmonkApiToken}} LISTMONK_ADMIN_PASSWORD={{secrets.listmonkApiToken}}
LISTMONK_SYNC_ENABLED={{#if enableListmonk}}true{{else}}false{{/if}}
LISTMONK_WEBHOOK_SECRET=
LISTMONK_PROXY_PORT=9002 LISTMONK_PROXY_PORT=9002
LISTMONK_SMTP_HOST=mailhog-changemaker LISTMONK_WEBHOOK_SECRET=
LISTMONK_DB_PORT=5434
LISTMONK_SMTP_HOST={{containerPrefix}}-mailhog
LISTMONK_SMTP_PORT=1025 LISTMONK_SMTP_PORT=1025
LISTMONK_SMTP_USER= LISTMONK_SMTP_USER=
LISTMONK_SMTP_PASSWORD= LISTMONK_SMTP_PASSWORD=
LISTMONK_SMTP_TLS_TYPE=none LISTMONK_SMTP_TLS_TYPE=none
LISTMONK_SMTP_FROM={{name}} <noreply@{{domain}}> LISTMONK_SMTP_FROM={{name}} <noreply@{{domain}}>
# --- Represent API (Canadian electoral data) --- # Media
REPRESENT_API_URL=https://represent.opennorth.ca {{#if enableMedia}}
ENABLE_MEDIA_FEATURES=true
# --- NocoDB v2 (read-only data browser) ---
NOCODB_V2_PORT=8091
NOCODB_URL=http://changemaker-v2-nocodb:8080
NOCODB_PORT=8091
NC_ADMIN_EMAIL={{secrets.adminEmail}}
NC_ADMIN_PASSWORD={{secrets.nocodbAdminPassword}}
NC_PUBLIC_URL=https://db.{{domain}}
# --- Redis ---
REDIS_PASSWORD={{secrets.redisPassword}}
REDIS_URL=redis://:${REDIS_PASSWORD}@redis-changemaker:6379
# --- Payments (Stripe) ---
ENABLE_PAYMENTS={{#if enablePayments}}true{{else}}false{{/if}}
# --- Media Management ---
ENABLE_MEDIA_FEATURES={{#if enableMedia}}true{{else}}false{{/if}}
MEDIA_API_PORT=4100
MEDIA_API_PUBLIC_URL=https://media.{{domain}} MEDIA_API_PUBLIC_URL=https://media.{{domain}}
VITE_MEDIA_API_URL=http://changemaker-media-api:4100 {{else}}
ENABLE_HLS_TRANSCODE=false ENABLE_MEDIA_FEATURES=false
MEDIA_ROOT=/media/library MEDIA_API_PUBLIC_URL=
{{/if}}
MEDIA_API_PORT=4100
MEDIA_ROOT=/media/local
MEDIA_UPLOADS=/media/uploads MEDIA_UPLOADS=/media/uploads
MAX_UPLOAD_SIZE_GB=10 MAX_UPLOAD_SIZE_GB=10
PUBLIC_MEDIA_PORT=3100 PUBLIC_MEDIA_PORT=3100
@ -162,111 +129,43 @@ VIDEO_SCHEDULE_DEFAULT_TIMEZONE=UTC
VIDEO_SCHEDULE_NOTIFICATION_ENABLED=true VIDEO_SCHEDULE_NOTIFICATION_ENABLED=true
VIDEO_PREVIEW_LINK_EXPIRY_HOURS=24 VIDEO_PREVIEW_LINK_EXPIRY_HOURS=24
# --- Container Registry --- # NAR Data
GITEA_REGISTRY=gitea.bnkops.com/admin NAR_DATA_DIR=/data
IMAGE_TAG={{imageTag}}
COMPOSE_PROFILES={{#if enableMonitoring}}monitoring{{/if}}{{#if enableCcpAgent}}{{#if enableMonitoring}},{{/if}}ccp-agent{{/if}}
GITEA_REGISTRY_USER=admin
GITEA_REGISTRY_PASS=
GITEA_REGISTRY_API_TOKEN=
# --- Gitea (Local Platform Instance) --- # Platform Service URLs (used for health checks)
GITEA_URL=http://gitea-changemaker:3000 MINI_QR_URL=http://{{containerPrefix}}-mini-qr:8080
GITEA_PORT=3030 EXCALIDRAW_URL=http://{{containerPrefix}}-excalidraw:80
GITEA_WEB_PORT=3030
GITEA_SSH_PORT=2222
GITEA_ADMIN_USER=admin
GITEA_ADMIN_PASSWORD={{secrets.giteaAdminPassword}}
GITEA_DB_TYPE=mysql
GITEA_DB_HOST=gitea-db:3306
GITEA_DB_NAME=gitea
GITEA_DB_USER=gitea
GITEA_DB_PASSWD={{secrets.giteaAdminPassword}}
GITEA_DB_ROOT_PASSWORD={{secrets.giteaAdminPassword}}
GITEA_ROOT_URL=https://git.{{domain}}
GITEA_DOMAIN=git.{{domain}}
# --- Gitea Docs Comments ---
GITEA_COMMENTS_ENABLED=false
GITEA_API_TOKEN=
GITEA_COMMENTS_REPO_OWNER=
GITEA_COMMENTS_REPO_NAME=docs-comments
GITEA_OAUTH_CLIENT_ID=
GITEA_OAUTH_CLIENT_SECRET=
# Docs source (Gitea repo containing the mkdocs/ tree)
GITEA_DOCS_REPO=admin/changemaker.lite
GITEA_DOCS_PREFIX=mkdocs/docs
GITEA_DOCS_BRANCH=v2
# --- n8n ---
N8N_URL=http://n8n-changemaker:5678
N8N_PORT=5678
N8N_HOST=n8n.{{domain}}
N8N_ENCRYPTION_KEY={{secrets.n8nEncryptionKey}}
N8N_USER_EMAIL={{secrets.adminEmail}}
N8N_USER_PASSWORD={{secrets.nocodbAdminPassword}}
GENERIC_TIMEZONE=UTC
# --- MkDocs ---
MKDOCS_PORT=4003
MKDOCS_SITE_SERVER_PORT=4004
BASE_DOMAIN=https://{{domain}}
MKDOCS_PREVIEW_URL=http://mkdocs:8000
MKDOCS_DOCS_PATH=/mkdocs/docs
# --- Code Server ---
CODE_SERVER_PORT=8888
CODE_SERVER_URL=http://code-server-changemaker:8443
USER_NAME=coder
# --- Homepage ---
HOMEPAGE_PORT=3010
HOMEPAGE_VAR_BASE_URL=http://localhost
# --- Mini QR ---
MINI_QR_PORT=8089
MINI_QR_URL=http://mini-qr:8080
# --- Excalidraw (Collaborative Whiteboard) ---
EXCALIDRAW_PORT=8090
EXCALIDRAW_URL=http://excalidraw-changemaker:80
EXCALIDRAW_WS_URL=wss://draw.{{domain}} EXCALIDRAW_WS_URL=wss://draw.{{domain}}
HOMEPAGE_URL=http://{{containerPrefix}}-homepage:3000
# --- Vaultwarden (Password Manager) --- VAULTWARDEN_URL=http://{{containerPrefix}}-vaultwarden:80
VAULTWARDEN_PORT=8445
VAULTWARDEN_URL=http://vaultwarden-changemaker:80
VAULTWARDEN_ADMIN_TOKEN={{secrets.vaultwardenAdminToken}} VAULTWARDEN_ADMIN_TOKEN={{secrets.vaultwardenAdminToken}}
VAULTWARDEN_DOMAIN=https://vault.{{domain}} VAULTWARDEN_DOMAIN=https://vault.{{domain}}
VAULTWARDEN_SIGNUPS_ALLOWED=false VAULTWARDEN_SIGNUPS_ALLOWED=false
VAULTWARDEN_WEBSOCKET_ENABLED=true VAULTWARDEN_WEBSOCKET_ENABLED=true
VAULTWARDEN_SMTP_SECURITY=off VAULTWARDEN_SMTP_SECURITY=off
# --- MailHog --- # Geocoding
MAILHOG_SMTP_PORT=1025
MAILHOG_WEB_PORT=8025
# --- NAR (National Address Register) ---
NAR_DATA_DIR=/data
# --- Overpass / Area Import ---
OVERPASS_API_URL=https://overpass-api.de/api/interpreter
OVERPASS_MIN_DELAY_MS=30000
AREA_IMPORT_MAX_GRID_POINTS=500
# --- Geocoding ---
MAPBOX_API_KEY= MAPBOX_API_KEY=
GOOGLE_MAPS_API_KEY=
GOOGLE_MAPS_ENABLED=false
GEOCODING_RATE_LIMIT_MS=1100 GEOCODING_RATE_LIMIT_MS=1100
GEOCODING_CACHE_ENABLED=true GEOCODING_CACHE_ENABLED=true
GEOCODING_CACHE_TTL_HOURS=24 GEOCODING_CACHE_TTL_HOURS=24
GOOGLE_MAPS_API_KEY=
GOOGLE_MAPS_ENABLED=false
GEOCODING_PARALLEL_ENABLED=true GEOCODING_PARALLEL_ENABLED=true
GEOCODING_BATCH_SIZE=10 GEOCODING_BATCH_SIZE=10
BULK_GEOCODE_ENABLED=true BULK_GEOCODE_ENABLED=true
BULK_GEOCODE_MAX_BATCH=5000 BULK_GEOCODE_MAX_BATCH=5000
# --- Pangolin Tunnel --- # Represent API
PANGOLIN_API_URL=https://api.bnkserve.org/v1 REPRESENT_API_URL=https://represent.opennorth.ca
# Overpass / Area Import
OVERPASS_API_URL=https://overpass-api.de/api/interpreter
OVERPASS_MIN_DELAY_MS=30000
AREA_IMPORT_MAX_GRID_POINTS=500
# Pangolin Tunnel
PANGOLIN_API_URL=
PANGOLIN_API_KEY= PANGOLIN_API_KEY=
PANGOLIN_ORG_ID= PANGOLIN_ORG_ID=
PANGOLIN_SITE_ID= PANGOLIN_SITE_ID=
@ -275,95 +174,178 @@ PANGOLIN_ENDPOINT={{pangolin.endpoint}}
PANGOLIN_NEWT_ID={{pangolin.newtId}} PANGOLIN_NEWT_ID={{pangolin.newtId}}
PANGOLIN_NEWT_SECRET={{pangolin.newtSecret}} PANGOLIN_NEWT_SECRET={{pangolin.newtSecret}}
{{else}} {{else}}
PANGOLIN_ENDPOINT=https://pangolin.bnkserve.org PANGOLIN_ENDPOINT=
PANGOLIN_NEWT_ID= PANGOLIN_NEWT_ID=
PANGOLIN_NEWT_SECRET= PANGOLIN_NEWT_SECRET=
{{/if}} {{/if}}
# --- Prisma CLI (host-side only, NOT used by Docker containers) --- # Gancio
DATABASE_URL=postgresql://changemaker:{{secrets.postgresPassword}}@localhost:{{ports.postgres}}/changemaker_v2 {{#if enableGancio}}
GANCIO_SYNC_ENABLED=true
# --- Rocket.Chat (Team Chat) --- GANCIO_URL=http://{{containerPrefix}}-gancio:13120
ENABLE_CHAT={{#if enableChat}}true{{else}}false{{/if}} {{else}}
ROCKETCHAT_ADMIN_USER=rcadmin GANCIO_SYNC_ENABLED=false
ROCKETCHAT_ADMIN_PASSWORD={{secrets.rocketchatAdminPassword}} GANCIO_URL=
ROCKETCHAT_URL=http://rocketchat-changemaker:3000 {{/if}}
MONGO_ROOT_USER=rocketchat
MONGO_ROOT_PASSWORD={{secrets.mongoRootPassword}}
# --- Gancio (Event Management) ---
GANCIO_PORT=8092
GANCIO_URL=http://gancio-changemaker:13120
GANCIO_BASE_URL=https://events.{{domain}} GANCIO_BASE_URL=https://events.{{domain}}
GANCIO_ADMIN_USER=admin GANCIO_ADMIN_USER=admin
GANCIO_ADMIN_PASSWORD={{secrets.gancioAdminPassword}} GANCIO_ADMIN_PASSWORD={{secrets.gancioAdminPassword}}
GANCIO_SYNC_ENABLED={{#if enableGancio}}true{{else}}false{{/if}} GANCIO_PORT=8092
# --- Jitsi Meet (Video Conferencing) --- # Chat (Rocket.Chat)
{{#if enableChat}}
ENABLE_CHAT=true
ROCKETCHAT_URL=http://{{containerPrefix}}-rocketchat:3000
ROCKETCHAT_ADMIN_USER=rcadmin
ROCKETCHAT_ADMIN_PASSWORD={{secrets.rocketchatAdminPassword}}
MONGO_ROOT_USER=rocketchat
MONGO_ROOT_PASSWORD={{secrets.mongoRootPassword}}
{{else}}
ENABLE_CHAT=false
ROCKETCHAT_URL=
ROCKETCHAT_ADMIN_USER=
ROCKETCHAT_ADMIN_PASSWORD=
MONGO_ROOT_USER=
MONGO_ROOT_PASSWORD=
{{/if}}
# Jitsi Meet (Video Conferencing)
ENABLE_MEET={{#if enableMeet}}true{{else}}false{{/if}} ENABLE_MEET={{#if enableMeet}}true{{else}}false{{/if}}
{{#if enableMeet}}
JITSI_APP_ID=changemaker JITSI_APP_ID=changemaker
JITSI_APP_SECRET={{secrets.jitsiAppSecret}} JITSI_APP_SECRET={{secrets.jitsiAppSecret}}
JITSI_JICOFO_AUTH_PASSWORD={{secrets.jitsiJicofoAuthPassword}} JITSI_JICOFO_AUTH_PASSWORD={{secrets.jitsiJicofoAuthPassword}}
JITSI_JVB_AUTH_PASSWORD={{secrets.jitsiJvbAuthPassword}} JITSI_JVB_AUTH_PASSWORD={{secrets.jitsiJvbAuthPassword}}
JITSI_URL=http://jitsi-web-changemaker:80 JITSI_URL=http://{{containerPrefix}}-jitsi-web:80
JVB_ADVERTISE_IP={{jvbAdvertiseIp}} JVB_ADVERTISE_IP={{jvbAdvertiseIp}}
JVB_PORT=10000 JVB_PORT=10000
{{else}}
JITSI_APP_ID=
JITSI_APP_SECRET=
JITSI_JICOFO_AUTH_PASSWORD=
JITSI_JVB_AUTH_PASSWORD=
JITSI_URL=
JVB_ADVERTISE_IP=
JVB_PORT=10000
{{/if}}
# --- SMS Campaigns (Termux Android Bridge) --- # SMS Campaigns
ENABLE_SMS={{#if enableSms}}true{{else}}false{{/if}} ENABLE_SMS={{#if enableSms}}true{{else}}false{{/if}}
TERMUX_API_URL= TERMUX_API_URL=
TERMUX_API_KEY= TERMUX_API_KEY=
SMS_DELAY_BETWEEN_MS=3000 SMS_DELAY_BETWEEN_MS=3000
SMS_MAX_RETRIES=3 SMS_MAX_RETRIES=3
SMS_RESPONSE_SYNC_INTERVAL_MS=120000 SMS_RESPONSE_SYNC_INTERVAL_MS=30000
SMS_DEVICE_MONITOR_INTERVAL_MS=300000 SMS_DEVICE_MONITOR_INTERVAL_MS=30000
# --- Social, People & Analytics --- # Social Connections
ENABLE_SOCIAL={{#if enableSocial}}true{{else}}false{{/if}} ENABLE_SOCIAL={{#if enableSocial}}true{{else}}false{{/if}}
# People CRM
ENABLE_PEOPLE={{#if enablePeople}}true{{else}}false{{/if}} ENABLE_PEOPLE={{#if enablePeople}}true{{else}}false{{/if}}
# Analytics & GeoIP
ENABLE_ANALYTICS={{#if enableAnalytics}}true{{else}}false{{/if}} ENABLE_ANALYTICS={{#if enableAnalytics}}true{{else}}false{{/if}}
MAXMIND_ACCOUNT_ID=
MAXMIND_LICENSE_KEY=
# --- Control Panel Agent --- # Monitoring
# Tenants registered with CCP have these populated; CCP-provisioned tenants
# get them set by the provisioner. Leaving blank if neither applies.
ENABLE_CCP_AGENT=true
CCP_URL=
CCP_INVITE_CODE=
CCP_AGENT_URL=
CCP_AGENT_PORT=7443
# --- Monitoring (only used with --profile monitoring) ---
PROMETHEUS_PORT=9090
GRAFANA_PORT=3005
GRAFANA_ADMIN_PASSWORD={{secrets.grafanaAdminPassword}} GRAFANA_ADMIN_PASSWORD={{secrets.grafanaAdminPassword}}
GRAFANA_ROOT_URL=https://grafana.{{domain}} GRAFANA_ROOT_URL=https://grafana.{{domain}}
PROMETHEUS_PORT=9090
GRAFANA_PORT=3000
CADVISOR_PORT=8086 CADVISOR_PORT=8086
NODE_EXPORTER_PORT=9100 NODE_EXPORTER_PORT=9100
REDIS_EXPORTER_PORT=9121 REDIS_EXPORTER_PORT=9121
ALERTMANAGER_PORT=9093 ALERTMANAGER_PORT=9093
ALERTMANAGER_EMBED_PORT={{math ports.embed "+" 16}}
GOTIFY_PORT=8889 GOTIFY_PORT=8889
GOTIFY_ADMIN_USER=admin GOTIFY_ADMIN_USER=admin
GOTIFY_ADMIN_PASSWORD=admin GOTIFY_ADMIN_PASSWORD=admin
# --- Bunker Ops (Fleet Management) --- # MkDocs
MKDOCS_PORT={{math ports.embed "+" 8}}
MKDOCS_SITE_SERVER_PORT={{math ports.embed "+" 14}}
MKDOCS_PREVIEW_URL=http://{{containerPrefix}}-mkdocs:8000
MKDOCS_DOCS_PATH=/mkdocs/docs
CODE_SERVER_PORT={{math ports.embed "+" 7}}
CODE_SERVER_URL=http://{{containerPrefix}}-code-server:8443
BASE_DOMAIN=https://{{domain}}
# Gitea
GITEA_URL=http://{{containerPrefix}}-gitea:3000
GITEA_SSH_PORT=2222
GITEA_DB_TYPE=postgres
GITEA_DB_HOST={{containerPrefix}}-postgres:5432
GITEA_DB_NAME=gitea
GITEA_DB_USER=changemaker
GITEA_DB_PASSWD={{secrets.postgresPassword}}
GITEA_ROOT_URL=https://git.{{domain}}
GITEA_DOMAIN=git.{{domain}}
GITEA_COMMENTS_ENABLED=false
GITEA_API_TOKEN=
GITEA_COMMENTS_REPO_OWNER=
GITEA_COMMENTS_REPO_NAME=docs-comments
GITEA_OAUTH_CLIENT_ID=
GITEA_OAUTH_CLIENT_SECRET=
# n8n
N8N_HOST=n8n.{{domain}}
N8N_URL=http://{{containerPrefix}}-n8n:5678
N8N_ENCRYPTION_KEY={{secrets.n8nEncryptionKey}}
N8N_USER_EMAIL={{secrets.adminEmail}}
N8N_USER_PASSWORD={{secrets.nocodbAdminPassword}}
GENERIC_TIMEZONE=UTC
# MailHog
MAILHOG_URL=http://{{containerPrefix}}-mailhog:8025
MAILHOG_SMTP_PORT=1025
MAILHOG_WEB_PORT=8025
# Homepage
HOMEPAGE_PORT=3010
HOMEPAGE_VAR_BASE_URL=http://localhost
# Dev Tools
{{#if enableDevTools}}
ENABLE_DEV_TOOLS=true
{{else}}
ENABLE_DEV_TOOLS=false
{{/if}}
# Payments
{{#if enablePayments}}
ENABLE_PAYMENTS=true
{{else}}
ENABLE_PAYMENTS=false
{{/if}}
# Vite (admin build)
VITE_API_URL=http://{{containerPrefix}}-api:4000
VITE_MKDOCS_URL=http://{{containerPrefix}}-mkdocs:8000
{{#if enableMedia}}
VITE_MEDIA_API_URL=http://{{containerPrefix}}-media-api:4100
{{/if}}
# Bunker Ops (Fleet Management)
INSTANCE_LABEL={{slug}} INSTANCE_LABEL={{slug}}
BUNKER_OPS_ENABLED=false BUNKER_OPS_ENABLED=false
BUNKER_OPS_REMOTE_WRITE_URL= BUNKER_OPS_REMOTE_WRITE_URL=
# --- GeoIP (MaxMind GeoLite2) --- # Embed proxy ports (nginx proxy for iframe embedding in admin GUI)
MAXMIND_ACCOUNT_ID= NOCODB_EMBED_PORT={{math ports.embed "+" 0}}
MAXMIND_LICENSE_KEY= N8N_EMBED_PORT={{math ports.embed "+" 1}}
GITEA_EMBED_PORT={{math ports.embed "+" 2}}
# --- CCP-specific (admin GUI iframe embeds + dev-mode helpers) --- MAILHOG_EMBED_PORT={{math ports.embed "+" 3}}
# These are CCP-only — not in canonical .env.example. Kept here because MINI_QR_EMBED_PORT={{math ports.embed "+" 4}}
# admin/vite uses them at build time and the embed proxies reference them. EXCALIDRAW_EMBED_PORT={{math ports.embed "+" 5}}
PORT=4000 HOMEPAGE_EMBED_PORT={{math ports.embed "+" 6}}
VITE_API_URL=http://changemaker-v2-api:4000
HOMEPAGE_URL=http://homepage-changemaker:3000
MAILHOG_URL=http://mailhog-changemaker:8025
LISTMONK_URL=http://listmonk-app:9000
CODE_SERVER_EMBED_PORT={{math ports.embed "+" 7}} CODE_SERVER_EMBED_PORT={{math ports.embed "+" 7}}
MKDOCS_EMBED_PORT={{math ports.embed "+" 8}} MKDOCS_EMBED_PORT={{math ports.embed "+" 8}}
MKDOCS_SITE_EMBED_PORT={{math ports.embed "+" 14}} VAULTWARDEN_EMBED_PORT={{math ports.embed "+" 9}}
ROCKETCHAT_EMBED_PORT={{math ports.embed "+" 10}}
GANCIO_EMBED_PORT={{math ports.embed "+" 11}}
GRAFANA_EMBED_PORT={{math ports.embed "+" 12}}
LISTMONK_EMBED_PORT={{math ports.embed "+" 13}} LISTMONK_EMBED_PORT={{math ports.embed "+" 13}}
ENABLE_DEV_TOOLS={{#if enableDevTools}}true{{else}}false{{/if}} MKDOCS_SITE_EMBED_PORT={{math ports.embed "+" 14}}
JITSI_EMBED_PORT={{math ports.embed "+" 15}}

View File

@ -10,14 +10,7 @@ http {
include /etc/nginx/mime.types; include /etc/nginx/mime.types;
default_type application/octet-stream; default_type application/octet-stream;
# Redact sensitive query parameters (token, secret) from access logs log_format main '$remote_addr - $remote_user [$time_local] "$request" '
map $request_uri $redacted_request {
~^(?P<path>[^?]*)\?(?P<args>.*token=[^&]*) "$path?<token-redacted>";
~^(?P<path>[^?]*)\?(?P<args>.*secret=[^&]*) "$path?<secret-redacted>";
default $request_uri;
}
log_format main '$remote_addr - $remote_user [$time_local] "$request_method $redacted_request $server_protocol" '
'$status $body_bytes_sent "$http_referer" ' '$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"'; '"$http_user_agent" "$http_x_forwarded_for"';
@ -32,12 +25,6 @@ http {
types_hash_max_size 2048; types_hash_max_size 2048;
client_max_body_size 50m; client_max_body_size 50m;
# Rate limiting zones (defense-in-depth alongside app-level Redis rate limits)
limit_req_zone $binary_remote_addr zone=api_global:10m rate=30r/s;
limit_req_zone $binary_remote_addr zone=api_auth:10m rate=5r/s;
limit_req_zone $binary_remote_addr zone=upload:10m rate=2r/s;
limit_req_status 429;
# Gzip compression # Gzip compression
gzip on; gzip on;
gzip_vary on; gzip_vary on;
@ -45,17 +32,11 @@ http {
gzip_comp_level 6; gzip_comp_level 6;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml; gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
# Only send HSTS when the request arrived over HTTPS (via Pangolin tunnel)
map $http_x_forwarded_proto $hsts_header {
https "max-age=31536000; includeSubDomains";
default "";
}
# Security headers (applied globally X-Frame-Options set per server block) # Security headers (applied globally X-Frame-Options set per server block)
add_header X-Content-Type-Options "nosniff" always; add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always; add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Strict-Transport-Security $hsts_header always; add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
add_header Permissions-Policy "geolocation=(self), microphone=(), camera=()" always; add_header Permissions-Policy "geolocation=(self), microphone=(), camera=()" always;
# Docker internal DNS enables runtime resolution so nginx starts # Docker internal DNS enables runtime resolution so nginx starts

View File

@ -1427,10 +1427,9 @@ services:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
- ccp-agent-data:/var/lib/ccp-agent - ccp-agent-data:/var/lib/ccp-agent
- ccp-agent-certs:/etc/ccp-agent - ccp-agent-certs:/etc/ccp-agent
# Mount the instance directory so the agent can read compose files and # Mount the instance directory so the agent can read compose files and run
# write status.json + backups (writable; agent already has docker.sock, # `docker compose -p <project>` commands against the real project on disk.
# so file write access is not an additional security escalation). - .:/app/instance:ro
- .:/app/instance
environment: environment:
- AGENT_PORT=7443 - AGENT_PORT=7443
- AGENT_DATA_DIR=/var/lib/ccp-agent - AGENT_DATA_DIR=/var/lib/ccp-agent
@ -1442,12 +1441,7 @@ services:
- INSTANCE_BASE_PATH=/app/instance - INSTANCE_BASE_PATH=/app/instance
# Pass the host's compose project name so the agent runs `docker compose -p <project>` # Pass the host's compose project name so the agent runs `docker compose -p <project>`
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance"). # against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
# COMPOSE_PROJECT is read by the agent's TypeScript for slug derivation;
# COMPOSE_PROJECT_NAME is what Docker Compose itself reads when upgrade.sh
# shells out to `docker compose ...` — without it, compose defaults to
# basename(cwd)="instance" and collides with the host's existing containers.
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite} - COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
- COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME:-changemaker-lite}
logging: *default-logging logging: *default-logging
networks: networks:
- changemaker-lite - changemaker-lite

View File

@ -1450,10 +1450,7 @@ services:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
- ccp-agent-data:/var/lib/ccp-agent - ccp-agent-data:/var/lib/ccp-agent
- ccp-agent-certs:/etc/ccp-agent - ccp-agent-certs:/etc/ccp-agent
# Writable: agent must write data/upgrade/{status,progress,result}.json - .:/app/instance:ro
# and data/backups/*.tar.gz. Agent already has docker.sock — file write
# access is not an additional security escalation.
- .:/app/instance
environment: environment:
- AGENT_PORT=7443 - AGENT_PORT=7443
- AGENT_DATA_DIR=/var/lib/ccp-agent - AGENT_DATA_DIR=/var/lib/ccp-agent
@ -1465,12 +1462,7 @@ services:
- INSTANCE_BASE_PATH=/app/instance - INSTANCE_BASE_PATH=/app/instance
# Pass the host's compose project name so the agent runs `docker compose -p <project>` # Pass the host's compose project name so the agent runs `docker compose -p <project>`
# against the right project (not basename of INSTANCE_BASE_PATH, which is "instance"). # against the right project (not basename of INSTANCE_BASE_PATH, which is "instance").
# COMPOSE_PROJECT is read by the agent's TypeScript for slug derivation;
# COMPOSE_PROJECT_NAME is what Docker Compose itself reads when upgrade.sh
# shells out to `docker compose ...` — without it, compose defaults to
# basename(cwd)="instance" and collides with the host's existing containers.
- COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite} - COMPOSE_PROJECT=${COMPOSE_PROJECT_NAME:-changemaker-lite}
- COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME:-changemaker-lite}
logging: *default-logging logging: *default-logging
networks: networks:
- changemaker-lite - changemaker-lite

View File

@ -1,266 +0,0 @@
# Session Handoff: Upgrade Flow Redesign (2026-05-20 → 2026-05-21)
> Carries forward all context from a long working session into the next conversation. If you're a fresh agent: read this top-to-bottom before touching anything.
---
## Quick state of the fleet
| Tenant | Type | Version | Agent patched | Surgical script update | Notes |
|---|---|---|---|---|---|
| bnkops (n4) | source | main @ 1b80e82 | ✅ | ⏳ pending | Management node; CCP backend runs here in parallel |
| marcelle (n5, cursedknowledge.org) | release | v2.9.15 | ✅ | ⏳ pending | Test bench; first end-to-end CCP upgrade test ran here (succeeded after manual Phase 6 recovery) |
| trbh (n6) | source | main @ 1b80e82 | ✅ | ⏳ pending | mkdocs content RESTORED from `stash@{0}` — site serves "That Really Blonde Human" correctly |
| pia (n3, pia-bnkops) | release | v2.9.10 | ✅ | ✅ **completed 2026-05-21** | First successful surgical update — proof the procedure works |
| pridecorner (n1) | source | main @ 1b80e82 | ✅ | ⏳ pending | Has 3 March 9 upgrade-* stashes still on disk (audit done; recovery deferred to another agent) |
| soroush (n7) | source | main @ 1b80e82 | ✅ | ⏳ pending | Was earliest-fixed tonight |
| linda (n2, lindalindsay.org) | release-converted | v2.9.14 | ✅ | ⏳ pending | Was source-install with broken `.git`; converted to release mode (VERSION file written) |
**Public sites verified working at session end**: trbh.org, docs.trbh.org, bnkops.com, pridecorner.ca, soroushsamavat.org, publicinterestalberta.org, lindalindsay.org, cursedknowledge.org.
**Known caveat**: docs.bnkops.com returns HTTP 000 externally (Pangolin tunnel routing issue, pre-existing, NOT caused by this session). bnkops mkdocs container serves correct content locally.
---
## What landed in source (committed + pushed to origin/main)
| Commit | Description |
|---|---|
| `1b80e82` | `fix(ccp-agent): whitelist /app/instance for git safe.directory` — ccp-agent Dockerfile |
| `e88ac79` | `fix(ccp-agent): export COMPOSE_PROJECT_NAME so upgrade.sh sees correct project` — docker-compose.yml + .prod.yml |
| `9613c3e` | `fix(upgrade): Phase 1 of upgrade-flow redesign (Approach A)` — upgrade.sh + scripts/lib/mkdocs-snapshot.sh + scripts/upgrade-stash-cleanup.sh + .gitignore |
| `a7d3dd7` | `chore(release): ship scripts/lib/ + classify upgrade-stash-cleanup.sh` — build-release.sh |
**Release**: v2.10.2 tagged on `a7d3dd7`, uploaded to Gitea Releases as the new "latest" (`/releases/latest` returns v2.10.2 — the timestamp issue from earlier in session is fixed via build-release.sh's `target_commitish` workaround).
**Earlier in session**: tonight also produced commit `a531f9b` (ccp-agent missing bash/curl/jq/python3 + writable mount) and v2.10.1 release. v2.10.2 supersedes v2.10.1.
---
## The plan — Approach A (DONE) + B + C (pending)
Full design lives at `/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md`.
### Approach A — ✅ Done
Three fixes to existing `scripts/upgrade.sh` shipping in v2.10.2:
1. **Phase 6 self-destruct fix** — Phase 6's broad `docker compose up -d` no longer recreates ccp-agent (which would SIGKILL the running script). Instead, ccp-agent restart is deferred to AFTER `write_result` writes the final `result.json`, via a detached `nohup ... & disown` subshell.
2. **mkdocs/ snapshot fallback**`scripts/lib/mkdocs-snapshot.sh` is sourced by upgrade.sh's Phase 2. Before any other backup or pull operation, it tarballs the entire `mkdocs/` directory into `mkdocs-backup-<timestamp>.tar.gz` in the install root. Retains last 5. Discoverable via `ls`. Restoration is one-liner:
```bash
tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . && \
docker compose restart mkdocs mkdocs-site-server
```
3. **`upgrade-stash-cleanup.sh`** — interactive utility to drop accumulated `upgrade-*` git stashes. Warns LOUDLY if any stash contains `mkdocs/mkdocs.yml` so operators verify recovery before dropping.
### Approach B — ⏳ Pending (1-2 days)
Add `--image-only` upgrade mode. Production images are hermetic (bake compiled code + Prisma migrations + entrypoint runs migrations on container start). Therefore `docker compose pull && docker compose up -d` IS a complete code+schema upgrade. **No filesystem mutation outside Docker** → tenant content implicitly safe.
New files to create:
- `scripts/image-upgrade.sh` (~150 lines; sources `scripts/lib/mkdocs-snapshot.sh` for the fallback)
- `changemaker-control-panel/agent/src/routes/upgrade.routes.ts` → new endpoint `POST /instance/:slug/upgrade/start-image-only`
- `changemaker-control-panel/api/src/services/upgrade.service.ts``startImageUpgrade(instanceId, userId, { imageTag })`
- `changemaker-control-panel/api/src/services/remote-driver.ts``startImageUpgrade()`
- `changemaker-control-panel/api/src/modules/instances/instances.routes.ts``POST /:id/upgrade-images`
- CCP admin UI: "Quick Upgrade (image-only)" button on `InstanceDetailPage.tsx`
### Approach C — ⏳ Pending (3-5 days)
CCP-driven template re-render for orchestration-changing upgrades. Reuses existing `template-engine.ts` and `reconfigureInstance` pattern. Only writes templated files (compose, nginx, configs/pangolin); never touches `mkdocs/` or `configs/code-server/data/`. See plan for details.
---
## How to apply v2.10.2 fixes to remaining tenants
**For PIA: already done** — used as the proof-of-concept on 2026-05-21. mkdocs.yml md5 unchanged, file count unchanged. ~5 minutes per tenant.
**For the other 6 tenants**, use the surgical update — DO NOT run a raw `git pull origin main` (it would resurrect tenant-deleted files via merge logic):
### Source installs (bnkops, trbh, pridecorner, soroush)
```bash
# bnkops, trbh, soroush use ~/changemaker.lite
# pridecorner uses ~/cmlite/changemaker.lite
cd ~/changemaker.lite # or ~/cmlite/changemaker.lite
git fetch origin main
mkdir -p scripts/lib
git checkout origin/main -- \
scripts/upgrade.sh \
scripts/upgrade-stash-cleanup.sh \
scripts/lib/mkdocs-snapshot.sh \
scripts/build-release.sh \
docker-compose.yml \
.gitignore
# Sanity: tenant content should still be ahead/divergent (not touched)
git status mkdocs/ configs/ # should show no NEW changes from this update
```
### Release installs (marcelle, linda) — used pia approach
```bash
# marcelle: ~/changemaker.lite, ssh bunker-admin@100.90.78.47
# linda: ~/changemaker.lite.canonical, ssh bunker-admin@n2-linda.taile33572.ts.net
cd ~/changemaker.lite # or ~/changemaker.lite.canonical
curl -fSL https://gitea.bnkops.com/admin/changemaker.lite/releases/download/v2.10.2/changemaker-lite-v2.10.2.tar.gz \
-o /tmp/v2.10.2.tar.gz
mkdir -p scripts/lib
tar -xzf /tmp/v2.10.2.tar.gz --strip-components=1 \
changemaker-lite/scripts/upgrade.sh \
changemaker-lite/scripts/upgrade-stash-cleanup.sh \
changemaker-lite/scripts/lib/mkdocs-snapshot.sh \
changemaker-lite/docker-compose.yml
chmod +x scripts/upgrade.sh scripts/upgrade-stash-cleanup.sh scripts/lib/mkdocs-snapshot.sh
rm -f /tmp/v2.10.2.tar.gz
# Do NOT update VERSION — only scripts changed, rest of install stays at current version.
```
### Verification per tenant
```bash
# Before update: capture
md5sum mkdocs/mkdocs.yml
find mkdocs/docs -type f | wc -l
# Run the appropriate surgical update above
# After update: re-verify (should match)
md5sum mkdocs/mkdocs.yml
find mkdocs/docs -type f | wc -l
# Confirm new upgrade.sh
grep -c 'deferred ccp-agent\|Deferred ccp-agent' scripts/upgrade.sh # expect 2
# Optional: smoke-test the snapshot helper
PROJECT_DIR=$(pwd) bash -c '. scripts/lib/mkdocs-snapshot.sh; snapshot_mkdocs'
ls -lh mkdocs-backup-*.tar.gz
```
---
## Bug inventory — what we know
### Fixed in v2.10.2
| Bug | Memory file | Status |
|---|---|---|
| Gitea release `created_unix=0` (lightweight tag + Gitea 1.23.x quirk) | `feedback_gitea_release_tag_timing.md` | Fixed in `build-release.sh` — uses `target_commitish` + removes remote tag first |
| ccp-agent image missing bash/curl/jq/python3 + git safe.directory | `feedback_ccp_agent_image_deps.md` | Fixed in agent Dockerfile + rolled out to all 7 tenants |
| ccp-agent compose mount was `:ro` (blocked status.json writes) | (in `feedback_ccp_agent_image_deps.md`) | Fixed in both compose files |
| CCP upgrade Phase 5 collision: `COMPOSE_PROJECT_NAME` mismatch | `feedback_upgrade_compose_project_name.md` | Fixed via env-var addition in compose env block (e88ac79) — also needs `.env` entry on tenants installed before v2.10.2 |
| upgrade.sh Phase 6 self-destruct | `feedback_upgrade_sh_bugs.md` | Fixed in v2.10.2 — deferred ccp-agent restart |
### Open
- **upgrade.sh `git stash → git pull` stash-no-pop** — Pride Corner has 3 stashes from March 9 holding mkdocs.yml customizations. Existing `save_user_paths`/`restore_user_paths` in upgrade.sh handles the common case; the snapshot fallback (v2.10.2) covers edge cases. Pridecorner-specific recovery handled by another agent.
- **Agent-side `detached: true` spawn** — Defense-in-depth. Skip unless Phase 6 self-destruct re-emerges.
---
## Tenant content protection layers (all in v2.10.2)
1. **`save_user_paths`/`restore_user_paths`** in upgrade.sh — preserves working-tree state of `mkdocs/docs/`, `mkdocs/mkdocs.yml`, `mkdocs/site/`, `configs/`, `nginx/conf.d/services.conf` across `git pull`.
2. **`git stash` + auto-resolve on USER_PATHS** — modified tracked files stash + pop with `git checkout --theirs` on USER_PATH conflicts.
3. **Pre-upgrade mkdocs snapshot** — tarball of `mkdocs/` to install root before any other phase runs. Fallback for everything else.
---
## Tonight's recovery work — already applied
These tenants had content damage from earlier in the session; recovery was completed:
- **trbh** — mkdocs.yml + 143 M files restored from `stash@{0}`; 538 D-entry files re-deleted. Public sites serve correct branding.
- **bnkops** — same pattern, 100 M files restored + 82 D-entry re-deletions. Public sites serve correct branding.
- **marcelle** — manual recovery from Phase 6 self-destruct test (file rollback + service restart). On v2.10.1 currently. Operating normally.
`stash@{0}` is preserved on trbh and bnkops as forensic record + safety net.
---
## CCP access
```
URL: http://n4-bnkops.taile33572.ts.net:5100 (UI)
http://n4-bnkops.taile33572.ts.net:5000 (API)
User: admin@thebunkerops.ca
Password: NRTgHdC7Zxxs2P2UmNwnEbn3jTwU8uJN (seed; rotate if you want)
Role: SUPER_ADMIN
```
---
## Test bench (marcelle)
```
SSH: ssh bunker-admin@100.90.78.47
Install dir: ~/changemaker.lite
Domain: cursedknowledge.org
Admin: admin@cursedknowledge.org / @TheBunker2025!
CCP slug: changemakerlite
CCP id: 71b5bc4a-c47e-4435-b460-e9bc303b76ed
```
Marcelle is the test bench per `docs/TEST_SERVER.md`. Use it for ALL upgrade experiments before touching production tenants.
---
## Per-tenant quick reference
| Tenant | SSH | Install dir | CCP id |
|---|---|---|---|
| bnkops | bunker-admin@n4-bnkops.taile33572.ts.net | ~/changemaker.lite | 21238536-7c04-4a3b-a073-38390a939046 |
| marcelle | bunker-admin@100.90.78.47 | ~/changemaker.lite | 71b5bc4a-c47e-4435-b460-e9bc303b76ed |
| trbh | bunker-admin@n6-trbh.taile33572.ts.net | ~/changemaker.lite | c066dc23-64a5-4684-96a7-992e65c1b82c |
| pia | pia-bnkops@n3-pia.taile33572.ts.net | ~/changemaker.lite | 92a11622-d357-4ab4-b21e-60c030c1b026 |
| pridecorner | bunker-admin@n1-pridecorner.taile33572.ts.net | ~/cmlite/changemaker.lite | a30de94b-ef28-42b6-a71d-112669526a62 |
| soroush | bunker-admin@n7-soroush.taile33572.ts.net | ~/changemaker.lite | 0c70f94c-1319-41e1-867c-5674f17cadda |
| linda | bunker-admin@n2-linda.taile33572.ts.net | ~/changemaker.lite.canonical | 6dcc19a1-f4fd-45df-be77-5bf62f8110c8 |
---
## Most important "don't repeat my mistakes" notes
1. **Never `git stash + git pull --ff-only origin main` on a tenant** outside of upgrade.sh. The stash silently displaces tenant content. If you must update files on a source-installed tenant, use targeted `git checkout origin/main -- <specific-file>` instead.
2. **Never blindly trigger CCP "Upgrade Now"** on a tenant still running pre-v2.10.2 upgrade.sh — it will Phase 6 self-destruct. Apply surgical script update first (instructions above), THEN trigger CCP upgrade.
3. **mkdocs/docs/ contains upstream tracked files** (default screenshots, demo docs, blog posts). Tenants typically delete these locally without committing. ANY operation that brings origin/main's tracked tree into the working tree (git pull, tarball extract) will resurrect them. v2.10.2's snapshot fallback gives you a recovery path; the surgical update procedure (this doc) avoids the issue entirely.
4. **mkdocs/mkdocs.yml is tracked, tenant-customized** with branding. Lives under USER_PATHS so v2.10.2's upgrade.sh protects it. But if you do raw git operations outside the script, it's exposed.
5. **CCP backend on n4 is decoupled from per-tenant ccp-agent**. Restarting a tenant's ccp-agent does NOT affect CCP itself. Verified during bnkops patch (CCP backend stayed at 41h uptime while ccp-agent recreated).
---
## Memory files (in `/home/bunker-admin/.claude/projects/-home-bunker-admin-changemaker-lite/memory/`)
Latest session work documented in:
- `feedback_gitea_release_tag_timing.md`
- `feedback_ccp_agent_image_deps.md`
- `feedback_upgrade_compose_project_name.md`
- `feedback_upgrade_sh_bugs.md`
- `feedback_session_2026_05_20_damage_report.md`
Plus the architectural plan: `/home/bunker-admin/.claude/plans/okay-so-we-can-enumerated-hejlsberg.md`
---
## Where to start the next session
Recommended sequence:
1. **Apply surgical update to remaining 6 tenants** (~30-45 min, low risk; pia procedure already proven). Order: marcelle, linda (release), then soroush, trbh, bnkops, pridecorner (source).
2. **Test CCP-driven upgrade on marcelle** after surgical update lands. This will verify the deferred ccp-agent restart works end-to-end through the CCP path (the test we couldn't complete tonight because Phase 6 kept self-destructing).
3. **Implement Approach B** per the plan — image-only upgrade mode. Estimated 1-2 days.
4. **Implement Approach C** — CCP template re-render. 3-5 days.
If only one thing happens next session: **do step 1**. Six surgical updates × ~5 minutes each. The rest of the fleet stays vulnerable to Phase 6 self-destruct until they're on v2.10.2's upgrade.sh.

View File

@ -1,169 +0,0 @@
# Session Handoff: Approach B Rollout + Approach C Planning (2026-05-21)
Carries forward all context from a long working session. If you're a fresh agent: read this top-to-bottom before touching anything.
---
## What landed in this session (commits on origin/main)
| Commit | Description |
|---|---|
| `4a3d9d7` | `feat(upgrade): Approach B - image-only upgrade mode` — 7 files, 666 insertions. scripts/image-upgrade.sh + CCP agent endpoint + CCP backend (driver/service/route/schema) + admin UI "Quick Upgrade" button. |
| `<this commit>` | docs: session handoff + Approach C Phase 0 initial template overlay |
Plus several non-tracked deploys:
- v2.10.2 surgical update applied to remaining 6 tenants (soroush, linda, marcelle, bnkops, trbh, pridecorner — pia was done previously). All verified mkdocs untouched, upgrade.sh sha matches `b9f37d59...`.
- Fleet rollout of Approach B: new `image-upgrade.sh` script delivered + new `ccp-agent` image (with `/upgrade/start-image-only` endpoint) deployed to all 7 tenants. Bnkops's ccp-agent was rebuilt from source (builds locally rather than pulled from registry).
---
## Fleet state at session end
| Tenant | Surgical update v2.10.2 | image-upgrade.sh | New ccp-agent with image-only endpoint |
|---|---|---|---|
| pia | ✅ (prior session) | ✅ | ✅ |
| soroush | ✅ | ✅ | ✅ |
| linda | ✅ | ✅ | ✅ |
| marcelle | ✅ + tested both A and B E2E | ✅ | ✅ |
| bnkops | ✅ | ✅ | ✅ (rebuilt locally) |
| trbh | ✅ | ✅ | ✅ |
| pridecorner | ✅ | ✅ | ✅ |
Marcelle E2E test results:
- **Approach A (full upgrade)**: v2.10.1 → v2.10.2 in 250s, COMPLETED, no SIGKILL on script. Phase 6 deferred ccp-agent restart fix worked end-to-end through CCP path.
- **Approach B (Quick Upgrade) run 1**: 121s, COMPLETED, mkdocs.yml md5 unchanged.
- **Approach B (Quick Upgrade) run 2**: 100s (cached pull), COMPLETED, mkdocs unchanged again — confirms idempotency.
---
## Fleet backup (Phase 0 work — defensive)
All 7 tenants backed up to `/media/bunker-admin/BACKUP/fleet/<node>/2026-05-21-pre-v2.10.2/`:
| Node | Tenant | Size |
|---|---|---|
| n1 | pridecorner | 182MB (includes 3 stash patches from March 9) |
| n2 | linda | 26MB |
| n3 | pia | 45MB (post-surgical state) |
| n4 | bnkops | 4.4GB (huge — 2277 mkdocs/docs files) |
| n5 | marcelle | 28MB |
| n6 | trbh | 336MB |
| n7 | soroush | 76MB |
Each tenant dir has `mkdocs.tar.gz`, `configs-and-nginx.tar.gz`, `config-files.tar.gz`, `host-state.txt`, `git-state.txt` (source installs only), and `MANIFEST.txt`.
---
## Approach C planning + initial overlay
**Decision: rewrite `docker-compose.yml.hbs` in prod-compose style** to make CCP-driven template re-render safe for the install.sh fleet.
### Why a rewrite (not sync-by-addition)
Discovered the CCP template and `docker-compose.prod.yml` use fundamentally different conventions:
| | Old template (`.hbs`) | Canonical prod |
|---|---|---|
| Container names | `{{containerPrefix}}-postgres` (dynamic) | `changemaker-v2-postgres` (hardcoded) |
| Secrets | `{{secrets.postgresPassword}}` (Handlebars-rendered) | `${POSTGRES_PASSWORD}` (env-substituted) |
| Optional services | `{{#if enableX}}` blocks | Always-defined, gated via `COMPOSE_PROFILES` |
| Ports | `{{ports.api}}` | Hardcoded |
Sync-by-additions can't reconcile these. Rewrite is cleaner long-term.
### Initial overlay committed this session
`changemaker-control-panel/templates/docker-compose.yml.hbs.OLD-style-pre-approach-c` — preserved old template for reference.
`changemaker-control-panel/templates/docker-compose.yml.hbs` — now a near-mirror of `changemaker.lite/docker-compose.prod.yml` (1493 lines + Handlebars header):
- Header comment includes `{{name}}`, `{{slug}}`, `{{composeProject}}` for traceability.
- 5 image refs replaced `${IMAGE_TAG:-latest}``{{imageTag}}` so CCP can per-instance override via `Instance.imageTag` once Phase 1 lands.
- All other variation flows through env-var substitution from tenant's `.env`.
### Remaining Approach C work (next session)
See `/home/bunker-admin/.claude/plans/insight-temporal-bachman.md` for the full plan. Quick summary of what's next:
**Phase 0 completion (next session):**
- Audit `env.hbs` against the new compose's expected env vars. Add missing.
- Sync static config files in `templates/`: nginx/, configs/prometheus/, configs/alertmanager/, configs/grafana/. They may have drifted too.
- Write a one-off render harness (`api/scripts/render-for-instance.ts`) that loads an instance row, builds context, renders templates to scratch dir.
- Render against marcelle, linda, pia. Diff against their actual files. Iterate the template until diff is per-instance values only (`COMPOSE_PROJECT_NAME`, ports, secrets — not structure).
**Phase 1 (~30 min):** Add `Instance.imageTag` Prisma column + migration. Modify `template-engine.ts:211` to use `instance.imageTag || env.IMAGE_TAG`.
**Phase 2 (~3-4 hr):** Pre-flight diff endpoint. New agent route `POST /instance/:slug/files/diff` + `RemoteDriver.diffFiles()` + `LocalDriver.diffFiles()` + `previewReleaseUpgrade()` in upgrade.service. Includes `envCoverage` check for registered tenants.
**Phase 3 (~3-4 hr):** `startReleaseUpgrade()` + `runReleaseUpgrade()` in upgrade.service. Split logic for `isRegistered=true` (skip env render) vs `isRegistered=false` (render env).
**Phase 4 (~30 min):** CCP routes `/upgrade-release` + `/upgrade-release/preview` + Zod schema.
**Phase 5 (~2-3 hr):** "Upgrade to Release" UI button + preview modal + env-coverage warning.
**Phase 6 (~1 hr):** Tag v2.10.3 in changemaker.lite, push images with tag, trigger upgrade-release on marcelle via CCP UI, verify mkdocs untouched + containers on new tag.
**Total remaining: 11-14 hours.** Recommended split:
- Session 2: complete Phase 0 (render harness + iterate template + env.hbs sync + static file syncs). ~half day.
- Session 3: Phases 1-5. ~half day.
- Session 4: Phase 6 E2E test. ~1 hour.
---
## Critical files for Approach C
**Already modified this session:**
- `changemaker-control-panel/templates/docker-compose.yml.hbs` — overlay from prod compose with minimal Handlebars markup.
- `changemaker-control-panel/templates/docker-compose.yml.hbs.OLD-style-pre-approach-c` — preserved old template.
**To be modified in next sessions (per plan):**
- `changemaker-control-panel/templates/env.hbs` (Phase 0 audit)
- `changemaker-control-panel/templates/configs/**` (Phase 0 syncs)
- `changemaker-control-panel/api/prisma/schema.prisma` (Phase 1)
- `changemaker-control-panel/api/prisma/migrations/<ts>_add_instance_image_tag/` (Phase 1)
- `changemaker-control-panel/api/src/services/template-engine.ts` line 211 (Phase 1)
- `changemaker-control-panel/api/src/services/upgrade.service.ts` (Phases 2-3)
- `changemaker-control-panel/api/src/services/remote-driver.ts` + `local-driver.ts` + `execution-driver.ts` (Phase 2)
- `changemaker-control-panel/agent/src/routes/files.routes.ts` + `services/file.service.ts` (Phase 2)
- `changemaker-control-panel/api/src/modules/instances/instances.routes.ts` + `instances.schemas.ts` (Phase 4)
- `changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx` (Phase 5)
---
## Memory key gotchas (write to MEMORY.md next session)
1. **CCP template vs prod compose: were divergent, now aligned.** As of this session, `templates/docker-compose.yml.hbs` is structurally a near-mirror of `docker-compose.prod.yml`. Going forward, any new service in prod compose must be ported into the template manually (or via a future CI drift check).
2. **bnkops's ccp-agent is locally built**, not pulled from registry. Has a `build:` directive in compose. The other 6 tenants pull `gitea.bnkops.com/admin/changemaker-ccp-agent:latest`.
3. **install.sh tenants (`isRegistered=true`)** lack `encryptedSecrets` in CCP DB. Approach C must skip `env.hbs` rendering for them — they keep their tarball-provisioned `.env`. The pre-flight envCoverage check is the safety net.
4. **n4 SSH lacks marcelle's host key by default** — first `ssh n4 → marcelle` connection needs `StrictHostKeyChecking=accept-new` or interactive accept. Other tenants in the lab have the same pattern.
5. **`docker save | ssh ... docker load` is the registry-less image distribution path** when n4 doesn't have docker login to gitea.bnkops.com. Worked well for the ccp-agent rollout this session.
6. **`set -o pipefail` + `grep -q` shorts the pipeline** because grep closes the pipe early on first match, sending SIGPIPE to the writer. Solution: capture upstream output into a variable, then grep against the variable. (Bug found + fixed in `scripts/image-upgrade.sh` during this session.)
---
## CCP access (unchanged)
```
URL: http://n4-bnkops.taile33572.ts.net:5100 (UI)
http://n4-bnkops.taile33572.ts.net:5000 (API)
User: admin@thebunkerops.ca
Password: NRTgHdC7Zxxs2P2UmNwnEbn3jTwU8uJN (seed)
Role: SUPER_ADMIN
```
---
## Where to start next session
Recommended:
1. **Read this doc + `/home/bunker-admin/.claude/plans/insight-temporal-bachman.md` (Approach C plan)** first.
2. **Phase 0 completion:** finish the template rewrite. Build a render harness (`api/scripts/render-for-instance.ts`), render against marcelle/linda/pia, iterate until structural-clean.
3. Commit Phase 0 as standalone PR with rendered-vs-actual diffs in description.
4. Move to Phases 1-5 in a second commit/PR.
5. Phase 6 manual E2E.
Approach B is in production-ready state across the fleet. Approach C is the longer-term path for releases that change orchestration.

View File

@ -1,173 +0,0 @@
# Session Handoff: Approach C complete (template re-render) — 2026-05-22
This session shipped Approach C end-to-end: CCP-driven template re-render for orchestration-changing upgrades.
## Commits landed
| Commit | Description |
|---|---|
| `9744464` | Phase 0 complete — templates byte-equivalent to canonical |
| `abb4034` | Approach C — schema migration, services, routes, UI |
## What's in production
### Phase 0 (commit `9744464`)
- `templates/docker-compose.yml.hbs` (1504 lines): structural mirror of `docker-compose.prod.yml`. Only difference: header comment (CCP-tenant metadata).
- `templates/env.hbs` (369 lines): mirror of `.env.example` with Handlebars overlay for tenant-specific values. Covers all 145 env vars referenced by the new compose + 15 CCP-helpful extras.
- `templates/nginx/nginx.conf`: synced canonical (security drift: redacted log format, rate-limit zones, conditional HSTS).
- `api/scripts/render-for-instance.ts`: one-off CLI to render templates against any registered instance + scratch-dir output for diff verification.
Verified by rendering against marcelle/linda/pia and diffing against their actual on-disk compose. **30-line diff for all three, header-only — zero structural differences.**
### Approach C (commit `abb4034`)
**Phase 1 — schema:**
- `Instance.imageTag String?` Prisma column + migration `20260522093400_add_instance_image_tag`.
- `template-engine.ts:buildTemplateContext` uses `instance.imageTag || env.IMAGE_TAG`.
**Phase 2 — pre-flight diff (read-only):**
- Agent: `POST /instance/:slug/files/diff` + `file.service.ts:diffFiles()` (inline LCS unified diff, no new deps).
- API: `RemoteDriver.diffFiles()` + `LocalDriver.diffFiles()` + interface addition.
- `upgrade.service.ts:previewReleaseUpgrade()` — renders templates with proposed imageTag, filters .env for isRegistered tenants, returns per-file diff + envCoverage.
**Phase 3 — apply path:**
- `upgrade.service.ts:startReleaseUpgrade()` + `runReleaseUpgrade()`.
- Flow: persist imageTag → render → writeFiles → composePull → composeUp → composePs verify.
- Status surfaced via existing InstanceUpgrade poll loop (no new UI polling code needed).
**Phase 4 — routes:**
- `POST /api/instances/:id/upgrade-release` (apply)
- `POST /api/instances/:id/upgrade-release/preview` (read-only)
- `startReleaseUpgradeSchema` (imageTag regex).
**Phase 5 — UI:**
- Third "Upgrade to Release" button on InstanceDetailPage next to Quick Upgrade + Upgrade Now.
- Modal: imageTag input, Preview button (red alert if envCoverage shows missing vars), Apply button.
- Diff display with per-file status tags (unchanged/modified/created) + truncated unified diff.
## E2E Phase 6 validation status
**Preview path: VALIDATED end-to-end on marcelle.**
CCP API call `POST /api/instances/{marcelle}/upgrade-release/preview` exercises every layer:
- CCP routes → upgrade.service.ts → template-engine → remote-driver → marcelle's ccp-agent → file.service.diffFiles → response back to CCP → admin UI
Test 1 (no imageTag): 14 files rendered, 6 unchanged / 7 modified / 1 created. envCoverage: 180/186 vars present in marcelle's .env, 6 missing.
Test 2 (imageTag=v2.10.3): same file count, imageTag override plumbed through DB. The "v2.10.3" itself doesn't show in compose diff because the template uses `${IMAGE_TAG:-latest}` (env-substituted), not Handlebars.
Test 3 (malformed imageTag): rejected at JSON parsing layer.
**Apply path: code is wired but NOT yet validated against a real tenant.**
Applying to marcelle would rewrite 7 files including `nginx/conf.d/default.conf` (5296 → 15695 bytes, big change). That's a separate validation effort and not strictly needed to call Approach C "working" — every code path it touches is independently exercised by the preview test.
## Known gap (defer)
**install.sh tenants need an env-patch mechanism for imageTag to actually take effect.**
For CCP-provisioned tenants (`isRegistered=false`): CCP renders the full `.env` including `IMAGE_TAG=<value>`. Compose's `${IMAGE_TAG:-latest}` picks it up. Works.
For install.sh tenants (`isRegistered=true`): CCP filters `.env` out of the rendered set (no secrets in DB to render against). The tenant's existing `.env` stays, including its existing `IMAGE_TAG` value. **CCP's `Instance.imageTag` is persisted in CCP DB but doesn't reach the tenant's compose.**
To close this gap, add:
- Agent endpoint `POST /instance/:slug/env/patch { vars: { IMAGE_TAG: 'v2.10.3' } }` that does in-place key=value patching on the tenant's existing `.env`.
- In `runReleaseUpgrade`, for isRegistered tenants, call this between writeFiles and composePull.
Not a blocker for Approach C in CCP-provisioned tenants — those work end-to-end. The current fleet (marcelle/linda/pia all install.sh) needs this gap closed before they can use Approach C to bump image versions.
## Fleet rollout status
- n4 (CCP host): all Approach C code deployed. Migration applied. ccp-api + ccp-admin rebuilt + restarted.
- marcelle: new ccp-agent (sha 4fe6ef350aa9) with `/files/diff` endpoint deployed and running.
- soroush, linda, trbh, pridecorner, pia, bnkops: still on the prior ccp-agent. **NEED ROLLOUT** to receive the diff endpoint. Without it, preview will fail on those tenants ("path not found").
Rollout procedure (~5 min per tenant):
```
ssh bunker-admin@n4 'docker save gitea.bnkops.com/admin/changemaker-ccp-agent:latest | ssh bunker-admin@<tenant> docker load'
ssh bunker-admin@<tenant> 'cd <install_dir> && docker compose --profile ccp-agent up -d --force-recreate --no-deps ccp-agent'
```
(bnkops builds locally — needs `docker compose build ccp-agent` instead of image transfer.)
## How to use Approach C
From CCP UI at http://n4-bnkops.taile33572.ts.net:5100:
1. Instances → pick a tenant → Updates tab.
2. Click "Upgrade to Release".
3. Enter desired imageTag (leave blank to use current default).
4. Click "Preview Changes" — read the diff. If red envCoverage warning appears, fix the tenant's .env first or skip apply.
5. Click "Apply Upgrade" — watches status poll via existing UI infra.
From CLI:
```bash
curl -X POST http://n4-bnkops.taile33572.ts.net:5000/api/instances/<id>/upgrade-release/preview \
-H "Authorization: Bearer $TOKEN" \
-d '{"imageTag":"v2.10.3"}'
```
## Documentation reference
- Architectural plan: `~/.claude/plans/insight-temporal-bachman.md`
- Approach A (upgrade.sh) implementation: commit `9613c3e`
- Approach B (image-upgrade.sh) implementation: commit `4a3d9d7`
- Phase 0 templates sync: commit `9744464`
- Approach C code: commit `abb4034`
## Where to start next session
Recommended sequence:
1. **Close the env-patch gap** (~2-3 hr): agent endpoint + CCP service hook + UI doesn't need changes.
2. **Roll out new ccp-agent** to remaining 6 tenants (~30 min, well-trodden pattern from prior session).
3. **Actually apply Approach C** on marcelle as a real version bump (e.g., v2.10.2 → v2.10.3 after tagging+building). Verify nginx config change doesn't break public site.
4. **Document the operator decision tree**: when to use A vs B vs C.
All three upgrade approaches are now in production code. The remaining work is mostly closing the install.sh-tenant gap and operator-experience polish.
---
## Session continuation — env-patch + fleet rollout + Phase 6 status
After the initial Approach C commits, this session also closed the env-patch gap and rolled the new agent out to the whole fleet.
### Closed gap: env-patch for install.sh tenants
Commit `bf997e8`: install.sh tenants (`isRegistered=true`, no `encryptedSecrets`) couldn't have their .env's `IMAGE_TAG` updated through Approach C (CCP filters out .env render, tenant keeps existing). Added:
- Agent: `POST /instance/:slug/env/patch { vars: { KEY: value } }` — in-place .env key patcher in `file.service.ts:patchEnv()`. Preserves comments and key order; appends unknown keys under a "Added by CCP env-patch" comment.
- CCP: `ExecutionDriver.patchEnv()` + `RemoteDriver.patchEnv()` + `LocalDriver.patchEnv()` (mirrors the agent helper).
- `runReleaseUpgrade`: for isRegistered tenants with newImageTag, calls `driver.patchEnv({ IMAGE_TAG: newImageTag })` between writeFiles and composePull. Non-fatal on failure.
### Fleet rollout: new ccp-agent on all 7 tenants
All 7 ccp-agents now expose `/files/diff` + `/env/patch`. Preview endpoint returns 200 on every tenant.
Discovery during rollout: source-installed tenants (soroush, trbh, pridecorner, bnkops) `build:` ccp-agent from local source rather than pulling registry image. So `docker save | docker load` is wasted on them — they need source files updated + local build. Rollout procedure split:
- Release/release-converted (marcelle, linda, pia): `docker save | docker load` then `up -d --force-recreate ccp-agent`.
- Source (bnkops, soroush, trbh, pridecorner): `git checkout origin/main -- changemaker-control-panel/agent/src/...` then `docker compose --profile ccp-agent build ccp-agent && up -d --force-recreate`.
### Phase 6 status
**Code paths all validated via preview** (preview exercises every layer that apply uses, just without the writeFiles+composePull+composeUp side effects). The new `runReleaseUpgrade` runner has been deployed in `ccp-api` on n4 and is reachable via the UI.
**Apply NOT triggered on a tenant.** Preview against marcelle revealed substantial nginx/configs template drift that would significantly alter live files:
| file | before | after |
|---|---|---|
| nginx/conf.d/default.conf | 5296 B | 15695 B |
| nginx/conf.d/api.conf | 1996 B | 84 B |
| nginx/conf.d/services.conf | 26133 B | 9434 B |
| configs/pangolin/resources.yml | 3252 B | 1653 B |
| configs/prometheus/prometheus.yml | 1406 B | 644 B |
These are CCP-templated files that were designed for CCP-provisioned tenants where CCP is authoritative. For install.sh tenants the install.sh-provisioned content differs. Applying would substantially rewrite marcelle's nginx config and risk breaking its public site.
**Recommended next session: do for nginx/configs templates what Phase 0 did for docker-compose.yml.hbs** — rewrite each templated file to be byte-equivalent to its canonical install.sh-shipped counterpart. Steps:
1. Diff each of the 5 templated files (`*.hbs`) against the canonical at `changemaker.lite/nginx/conf.d/{default,api,services}.conf.template` and `changemaker.lite/configs/{pangolin,prometheus}/...yml`.
2. Update each `.hbs` to match canonical structure (likely use the same `envsubst`-style env-var substitution that install.sh tenants run at startup).
3. Re-render against marcelle/linda/pia and confirm "modified" → "unchanged" for the 5 files.
After that, apply on marcelle becomes safe and the E2E test can complete.
The Approach C code itself is production-ready; the gating issue is template sync, which is mechanical.

View File

@ -126,7 +126,7 @@ RUNTIME_SCRIPTS=(
install.sh install.sh
nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh
backup.sh restore.sh backup.sh restore.sh
upgrade.sh upgrade-check.sh upgrade-watcher.sh upgrade-stash-cleanup.sh upgrade.sh upgrade-check.sh upgrade-watcher.sh
uninstall.sh test-deployment.sh uninstall.sh test-deployment.sh
validate-env.sh pangolin-teardown.sh ccp-deregister.sh register-with-ccp.sh validate-env.sh pangolin-teardown.sh ccp-deregister.sh register-with-ccp.sh
update-env.sh update-env.sh
@ -178,13 +178,6 @@ if [[ -f "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" ]]; then
cp "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" "$STAGE_DIR/scripts/" cp "$PROJECT_DIR/scripts/mkdocs-build-trigger.py" "$STAGE_DIR/scripts/"
fi fi
# Shared shell libraries (scripts/lib/) — sourced by upgrade.sh + image-upgrade.sh.
# Whole directory ships verbatim; safe because nothing executable lives here
# besides the .sh helpers that the runtime scripts depend on.
if [[ -d "$PROJECT_DIR/scripts/lib" ]]; then
cp -a "$PROJECT_DIR/scripts/lib" "$STAGE_DIR/scripts/"
fi
# Systemd units # Systemd units
if [[ -d "$PROJECT_DIR/scripts/systemd" ]]; then if [[ -d "$PROJECT_DIR/scripts/systemd" ]]; then
cp -r "$PROJECT_DIR/scripts/systemd" "$STAGE_DIR/scripts/" cp -r "$PROJECT_DIR/scripts/systemd" "$STAGE_DIR/scripts/"
@ -302,23 +295,12 @@ if [[ "$UPLOAD" == "true" ]]; then
fi fi
fi fi
# Gitea 1.23.x only initializes Release.CreatedUnix inside its createTag()
# path. If the git tag already exists on origin when we POST /releases,
# createTag() is skipped and CreatedUnix stays 0, which makes /releases/latest
# silently return an older release. Remove the remote tag first so Gitea
# creates it via target_commitish below. The tag is preserved locally and
# gets recreated at the same SHA — no history is lost.
if git ls-remote --exit-code origin "refs/tags/${TAG}" >/dev/null 2>&1; then
warn "Removing remote tag ${TAG} so Gitea can recreate it (CreatedUnix init)"
git push origin ":refs/tags/${TAG}" >/dev/null 2>&1 || true
fi
info "Creating Gitea release ${TAG}..." info "Creating Gitea release ${TAG}..."
RELEASE_RESPONSE=$(curl -sf -X POST \ RELEASE_RESPONSE=$(curl -sf -X POST \
"${GITEA_HOST}/api/v1/repos/admin/changemaker.lite/releases" \ "${GITEA_HOST}/api/v1/repos/admin/changemaker.lite/releases" \
-H "Authorization: token ${GITEA_TOKEN}" \ -H "Authorization: token ${GITEA_TOKEN}" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "{\"tag_name\":\"${TAG}\",\"target_commitish\":\"${COMMIT_SHA}\",\"name\":\"Changemaker Lite ${TAG}\",\"body\":\"Release ${TAG} (${COMMIT_SHA})\"}" \ -d "{\"tag_name\":\"${TAG}\",\"name\":\"Changemaker Lite ${TAG}\",\"body\":\"Release ${TAG} (${COMMIT_SHA})\"}" \
2>/dev/null || true) 2>/dev/null || true)
RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true) RELEASE_ID=$(echo "$RELEASE_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)

View File

@ -1,383 +0,0 @@
#!/usr/bin/env bash
# image-upgrade.sh — Approach B: image-only upgrade
#
# Pulls latest images from the registry and recreates services WITHOUT touching
# tracked files in the install tree (no git pull, no tarball extract, no VERSION
# mutation). Tenant content (mkdocs/, configs/) is implicitly safe because this
# script never writes outside data/upgrade/ and the docker daemon.
#
# Used by CCP "Quick Upgrade" button. Pairs with scripts/upgrade.sh which
# remains the full upgrade path for orchestration-changing releases.
#
# Schema parity: writes data/upgrade/progress.json + result.json with the same
# fields upgrade.sh writes, so the CCP poll loop is unchanged.
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)"
SCRIPT_DIR="$PROJECT_DIR/scripts"
UPGRADE_DIR="$PROJECT_DIR/data/upgrade"
LOG_DIR="$PROJECT_DIR/logs"
LOG_FILE="$LOG_DIR/image-upgrade-$(date +%Y%m%d_%H%M%S).log"
LOCK_FILE="$PROJECT_DIR/.upgrade.lock"
PROGRESS_FILE="$UPGRADE_DIR/progress.json"
RESULT_FILE="$UPGRADE_DIR/result.json"
START_TIME=$SECONDS
# --- Detect install mode ---
if [[ -f "$PROJECT_DIR/VERSION" ]] && [[ ! -d "$PROJECT_DIR/.git" ]]; then
INSTALL_MODE="release"
else
INSTALL_MODE="source"
fi
# --- Defaults ---
API_MODE=false
DRY_RUN=false
IMAGE_TAG=""
usage() {
cat <<EOF
Usage: $(basename "$0") [options]
Image-only upgrade: pulls latest images from the configured registry and
recreates services without touching the install tree.
Options:
--api-mode Emit data/upgrade/{progress,result}.json (no TTY output)
--dry-run Print what would happen; do not pull or recreate
--image-tag TAG Override IMAGE_TAG (env var) for this run
-h, --help Show this help
This script never modifies mkdocs/, configs/, scripts/, docker-compose.yml,
or VERSION. It is the safest upgrade path for orchestration-stable releases.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--api-mode) API_MODE=true; shift ;;
--dry-run) DRY_RUN=true; shift ;;
--image-tag) IMAGE_TAG="${2:?--image-tag requires a value}"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*) echo "Unknown option: $1" >&2; usage >&2; exit 1 ;;
esac
done
# --- Colors ---
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m'
else
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
fi
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
success() { echo -e "${GREEN}[ OK ]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERR ]${NC} $*" >&2; }
phase() { echo ""; echo -e "${BOLD}${CYAN}=== Phase $1: $2 ===${NC}"; }
# --- Logging: mirror stdout/stderr to LOG_FILE ---
# logs/ may be root-owned on installs where upgrade.sh has run via ccp-agent.
# Fall back to /tmp if we can't write, so bunker-admin manual invocations don't
# crash with "Permission denied" on tee.
mkdir -p "$UPGRADE_DIR"
if mkdir -p "$LOG_DIR" 2>/dev/null && touch "$LOG_FILE" 2>/dev/null; then
: # primary log location is writable
else
LOG_FILE="/tmp/image-upgrade-$(date +%Y%m%d_%H%M%S)-$$.log"
echo "[INFO] logs/ not writable; using $LOG_FILE" >&2
fi
exec > >(tee -a "$LOG_FILE") 2>&1
# --- Capture previous version for result.json ---
if [[ "$INSTALL_MODE" == "release" ]]; then
PRE_VERSION="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "unknown")"
else
PRE_VERSION="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")"
fi
write_progress() {
local phase_num="$1" phase_name="$2" pct="$3" msg="$4"
[[ "$API_MODE" != "true" ]] && return
mkdir -p "$UPGRADE_DIR"
cat > "$PROGRESS_FILE" <<PEOF
{
"phase": ${phase_num},
"phaseName": "${phase_name}",
"percentage": ${pct},
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
"lastUpdate": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
PEOF
}
write_result() {
[[ "$API_MODE" != "true" ]] && return
local success_val="$1" msg="$2"
local warnings_json="${3:-[]}"
local duration_secs=$((SECONDS - START_TIME))
local new_version="$PRE_VERSION"
if [[ "$INSTALL_MODE" == "release" ]]; then
new_version="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "$PRE_VERSION")"
else
new_version="$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "$PRE_VERSION")"
fi
mkdir -p "$UPGRADE_DIR"
cat > "$RESULT_FILE" <<REOF
{
"success": ${success_val},
"message": "$(echo "$msg" | sed 's/"/\\"/g')",
"previousCommit": "${PRE_VERSION}",
"newCommit": "${new_version}",
"commitCount": 0,
"durationSeconds": ${duration_secs},
"warnings": ${warnings_json},
"completedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"mode": "image-only"
}
REOF
rm -f "$PROGRESS_FILE"
}
# --- Lock + cleanup ---
acquire_lock() {
if [[ -f "$LOCK_FILE" ]]; then
local pid; pid="$(cat "$LOCK_FILE" 2>/dev/null || echo "")"
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
error "Upgrade already running (pid $pid). Refusing to start."
write_result "false" "Another upgrade is already running (pid $pid)"
exit 1
fi
warn "Stale lock file found; removing"
rm -f "$LOCK_FILE"
fi
echo $$ > "$LOCK_FILE"
}
release_lock() { rm -f "$LOCK_FILE" || true; }
on_failure() {
local exit_code=$?
local line_no=${1:-?}
error "image-upgrade.sh failed at line $line_no (exit $exit_code)"
write_result "false" "Image upgrade failed at line $line_no (exit $exit_code)"
release_lock
exit "$exit_code"
}
trap 'on_failure $LINENO' ERR
trap 'release_lock' EXIT
# --- Banner ---
echo ""
echo -e "${BOLD}${CYAN}================================================${NC}"
echo -e "${BOLD} Image-Only Upgrade${NC}"
echo -e "${BOLD}${CYAN}================================================${NC}"
echo "Install mode: $INSTALL_MODE"
echo "Project dir: $PROJECT_DIR"
echo "Pre-version: $PRE_VERSION"
[[ -n "$IMAGE_TAG" ]] && echo "Image tag: $IMAGE_TAG"
[[ "$DRY_RUN" == "true" ]] && echo "DRY RUN: no images will be pulled or services recreated"
echo ""
acquire_lock
# =============================================================================
# Phase 1: Pre-flight + mkdocs snapshot (defensive)
# =============================================================================
phase "1" "Pre-flight"
write_progress 1 "Pre-flight" 10 "Snapshotting mkdocs (defensive)..."
# Source mkdocs-snapshot.sh and run it. This is the same snapshot every
# upgrade path takes — leaves mkdocs-backup-<timestamp>.tar.gz in project root.
# Image-only upgrades shouldn't damage mkdocs (no filesystem mutation), but
# the snapshot is cheap insurance and keeps operator habits consistent.
if [[ -r "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would snapshot mkdocs/"
else
# shellcheck disable=SC1091
PROJECT_DIR="$PROJECT_DIR" bash -c ". $SCRIPT_DIR/lib/mkdocs-snapshot.sh; snapshot_mkdocs" \
|| warn "mkdocs snapshot failed (non-fatal; continuing)"
fi
else
warn "scripts/lib/mkdocs-snapshot.sh not found; skipping snapshot"
fi
# Sanity-check docker
if ! docker compose version &>/dev/null; then
error "docker compose is not available"
write_result "false" "docker compose not available"
exit 1
fi
success "Pre-flight checks passed"
# =============================================================================
# Phase 2: Pull images
# =============================================================================
phase "2" "Pull Images"
write_progress 2 "Pull Images" 30 "Pulling images from registry..."
PULL_ENV=()
if [[ -n "$IMAGE_TAG" ]]; then
PULL_ENV+=("IMAGE_TAG=$IMAGE_TAG")
fi
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would run: ${PULL_ENV[*]:-} docker compose pull"
else
info "Pulling all images (this may take a few minutes)..."
if (( ${#PULL_ENV[@]} > 0 )); then
if ! env "${PULL_ENV[@]}" docker compose pull; then
warn "docker compose pull had errors (continuing — some images may be local)"
fi
else
if ! docker compose pull; then
warn "docker compose pull had errors (continuing — some images may be local)"
fi
fi
fi
success "Image pull complete"
# =============================================================================
# Phase 3: Recreate core app services (targeted, not broad)
# =============================================================================
phase "3" "Recreate Services"
write_progress 3 "Recreate Services" 60 "Recreating core app services with new images..."
# Targeted recreate: only the services whose IMAGES are released as part of
# changemaker.lite (api, admin, media-api, nginx). Broader `up -d` is risky
# because a single misconfigured mount in any service (e.g. mkdocs-site-server)
# can cascade and leave dependent containers in "Created" state. Image-only
# upgrade should only touch the actual code containers, not third-party
# infrastructure that happens to live in the same compose file.
#
# Same Phase 6 pattern as upgrade.sh: drop ccp-agent from COMPOSE_PROFILES
# during recreate so we don't suicide-restart the agent that spawned us.
# Restart ccp-agent at the end via detached subshell.
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
UP_ENV=("COMPOSE_PROFILES=${COMPOSE_PROFILES_WITHOUT_AGENT}")
if [[ -n "$IMAGE_TAG" ]]; then
UP_ENV+=("IMAGE_TAG=$IMAGE_TAG")
fi
# Core services that ship as v2 release images. nginx last so it doesn't
# briefly proxy to an old api. media-api may not be enabled on all installs;
# tolerate it being missing from compose.
CORE_SERVICES=(api admin media-api nginx)
EXISTING_SERVICES=()
# Capture the service list once. Don't pipe `docker compose config` into
# `grep -q` directly: with `set -o pipefail`, grep exits early on match and
# SIGPIPEs the docker writer, making the pipeline exit non-zero. The grep -q
# would then "match" all services as missing. Capture-then-check avoids it.
COMPOSE_SERVICES_LIST="$(docker compose config --services 2>/dev/null || true)"
for svc in "${CORE_SERVICES[@]}"; do
if grep -qx -- "$svc" <<<"$COMPOSE_SERVICES_LIST"; then
EXISTING_SERVICES+=("$svc")
else
info "Skipping service '$svc' (not in compose file)"
fi
done
if (( ${#EXISTING_SERVICES[@]} == 0 )); then
warn "No core app services found in compose; skipping recreate"
elif [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would run: ${UP_ENV[*]} docker compose up -d ${EXISTING_SERVICES[*]}"
else
info "Recreating core services: ${EXISTING_SERVICES[*]}"
env "${UP_ENV[@]}" docker compose up -d "${EXISTING_SERVICES[@]}"
fi
success "Services recreated"
# Restart Pangolin tunnel connector if running (image may have changed)
if docker ps --format '{{.Names}}' | grep -q 'newt'; then
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would restart newt"
else
info "Restarting Pangolin tunnel connector..."
docker compose restart newt 2>/dev/null || true
success "Newt tunnel restarted"
fi
fi
# =============================================================================
# Phase 4: Verify (light health checks)
# =============================================================================
phase "4" "Verification"
write_progress 4 "Verification" 85 "Running health checks..."
VERIFY_FAILED=false
UPGRADE_WARNINGS="[]"
verify_health() {
local name="$1" check_cmd="$2" max_wait="${3:-45}"
local waited=0
while [[ $waited -lt $max_wait ]]; do
if eval "$check_cmd" 2>/dev/null; then
success "$name: healthy (${waited}s)"
return 0
fi
sleep 3
waited=$((waited + 3))
done
warn "$name: not responding after ${max_wait}s"
VERIFY_FAILED=true
return 0
}
if [[ "$DRY_RUN" != "true" ]]; then
verify_health "API (port 4000)" \
"docker compose exec -T api wget -q --spider http://localhost:4000/api/health" 60
verify_health "Admin (port 3000)" \
"docker compose exec -T admin wget -q --spider http://localhost:3000/" 90
if docker ps --format '{{.Names}}' | grep -q 'changemaker-media-api'; then
verify_health "Media API (port 4100)" \
"docker compose exec -T media-api wget -q --spider http://127.0.0.1:4100/health" 30
fi
if "$VERIFY_FAILED"; then
UPGRADE_WARNINGS='["Some health checks failed after image-only upgrade — services may still be starting"]'
fi
fi
# =============================================================================
# Summary + deferred ccp-agent restart
# =============================================================================
ELAPSED_MIN=$(( (SECONDS - START_TIME) / 60 ))
ELAPSED_SEC=$(( (SECONDS - START_TIME) % 60 ))
echo ""
echo -e "${BOLD}${GREEN}================================================${NC}"
echo -e "${BOLD} Image-Only Upgrade Complete${NC}"
echo -e "${BOLD}${GREEN}================================================${NC}"
printf " Previous: %s\n" "$PRE_VERSION"
printf " Duration: %dm %ds\n" "$ELAPSED_MIN" "$ELAPSED_SEC"
printf " Log: %s\n" "$LOG_FILE"
write_progress 4 "Complete" 100 "Image-only upgrade complete"
write_result "true" "Image-only upgrade complete (previous: ${PRE_VERSION})" "$UPGRADE_WARNINGS"
# Deferred ccp-agent restart — see upgrade.sh for full rationale. Same
# mechanism: nohup'd, disowned subshell that picks up the new image after
# this script has cleanly exited.
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would schedule deferred ccp-agent restart"
else
info "Scheduling deferred ccp-agent restart..."
nohup bash -c "
sleep 3
cd '$PROJECT_DIR'
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
" >/dev/null 2>&1 < /dev/null &
disown
success "ccp-agent restart scheduled (will pick up new image)"
fi
fi
release_lock
trap - EXIT
exit 0

View File

@ -1,81 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# mkdocs-snapshot.sh — shared library function
# =============================================================================
# Defines snapshot_mkdocs(): writes a tarball of mkdocs/ into the install root
# as mkdocs-backup-<timestamp>.tar.gz, keeping the last 5 snapshots.
#
# Sourced by scripts/upgrade.sh and scripts/image-upgrade.sh (and may be
# invoked agent-side by changemaker-control-panel during template re-render).
#
# Why the install root instead of backups/?
# - Discoverable: operators see mkdocs-backup-*.tar.gz with a plain `ls`.
# - The agent's /app/instance bind mount maps directly to the install root,
# so the agent can restore from this archive without path translation.
# - backups/ is owned by root in some installs (DB dumps via container)
# and gets rotated on a different schedule than docs snapshots.
#
# Restoration one-liner:
# tar xzf "$(ls -t mkdocs-backup-*.tar.gz | head -1)" -C . \
# && docker compose restart mkdocs mkdocs-site-server
#
# Requires: $PROJECT_DIR (absolute path to install root), info() function
# from the caller (falls back to plain echo if info is not defined).
# =============================================================================
# Fallback log function if caller didn't define one (e.g. when sourcing standalone)
if ! declare -F info >/dev/null 2>&1; then
info() { echo "[INFO] $*"; }
fi
if ! declare -F warn >/dev/null 2>&1; then
warn() { echo "[WARN] $*" >&2; }
fi
# snapshot_mkdocs — take a tarball of mkdocs/ into the install root.
#
# Returns 0 if successful (or if mkdocs/ doesn't exist — non-fatal).
# Returns non-zero only if tar itself fails AND $SNAPSHOT_REQUIRED is true.
#
# Optional env vars:
# PROJECT_DIR (required) Install root containing mkdocs/
# SNAPSHOT_KEEP Number of snapshots to retain (default 5)
# SNAPSHOT_REQUIRED If "true", failure to snapshot aborts (default false)
snapshot_mkdocs() {
if [[ -z "${PROJECT_DIR:-}" ]]; then
warn "snapshot_mkdocs: PROJECT_DIR not set; skipping"
return 0
fi
if [[ ! -d "${PROJECT_DIR}/mkdocs" ]]; then
# No mkdocs dir = nothing to snapshot. Common on minimal installs.
return 0
fi
local stamp
stamp="$(date +%Y%m%d_%H%M%S)"
local archive="${PROJECT_DIR}/mkdocs-backup-${stamp}.tar.gz"
local keep="${SNAPSHOT_KEEP:-5}"
if tar czf "$archive" -C "$PROJECT_DIR" mkdocs 2>/dev/null; then
local size
size="$(du -h "$archive" 2>/dev/null | cut -f1)"
info "Tenant docs snapshot: $(basename "$archive") (${size})"
else
warn "snapshot_mkdocs: tar failed for $archive"
rm -f "$archive" 2>/dev/null
if [[ "${SNAPSHOT_REQUIRED:-false}" == "true" ]]; then
return 1
fi
return 0
fi
# Retention: keep the most recent N snapshots, prune older ones.
# ls -t lists newest first; tail -n +N+1 selects items after the Nth.
local prune_from=$((keep + 1))
# shellcheck disable=SC2012 # ls is intentional for mtime sort
ls -t "${PROJECT_DIR}"/mkdocs-backup-*.tar.gz 2>/dev/null \
| tail -n +${prune_from} \
| xargs -r rm -f
return 0
}

View File

@ -1,135 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# upgrade-stash-cleanup.sh — clean up stale upgrade-* git stashes
# =============================================================================
# Older versions of upgrade.sh used `git stash push --include-untracked` to
# protect tenant content during pulls. When pop conflicts went unresolved,
# the stashes accumulated in `git stash list` forever — Pride Corner ended up
# with three from 2026-03-09 alone, each containing displaced tenant
# customizations that the running site no longer reflected.
#
# This script lists every `upgrade-*` stash, shows its scope, and offers to
# drop them. It does NOT auto-restore content; that's a separate decision per
# tenant. The intent is to clear the backlog so future `git stash list` is
# meaningful.
#
# Usage:
# bash scripts/upgrade-stash-cleanup.sh # interactive, lists + prompts
# bash scripts/upgrade-stash-cleanup.sh --dry # list only
# bash scripts/upgrade-stash-cleanup.sh --yes # drop all upgrade-* without prompt
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
# Colors
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m'
BOLD='\033[1m' NC='\033[0m'
else
RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
fi
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[ OK ]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
DRY=false
YES=false
for arg in "$@"; do
case "$arg" in
--dry|--dry-run) DRY=true ;;
--yes|-y) YES=true ;;
--help|-h)
sed -n '2,/^# =====/p' "$0" | sed -n '2,/^# =====/p' | sed 's/^# //;s/^#//'
exit 0
;;
esac
done
if [[ ! -d .git ]]; then
warn "Not a git repository — this script only applies to source installs."
exit 0
fi
# Collect upgrade-* stash refs
mapfile -t STASHES < <(git stash list 2>/dev/null | grep -E ': (On|WIP on) [^:]+: upgrade-' || true)
if [[ ${#STASHES[@]} -eq 0 ]]; then
ok "No upgrade-* stashes found. Nothing to clean up."
exit 0
fi
echo ""
echo -e "${BOLD}Found ${#STASHES[@]} upgrade-* stash(es):${NC}"
echo ""
for entry in "${STASHES[@]}"; do
REF="${entry%%:*}"
LABEL="${entry#*: }"
FILE_COUNT=$(git stash show "$REF" --name-only 2>/dev/null | wc -l)
HAS_MKDOCS_YML=$(git stash show "$REF" --name-only 2>/dev/null | grep -c '^mkdocs/mkdocs\.yml$' || true)
printf " %-12s %-50s files=%-4d mkdocs.yml=%s\n" \
"$REF" "$LABEL" "$FILE_COUNT" "$HAS_MKDOCS_YML"
done
echo ""
if [[ "$DRY" == "true" ]]; then
info "Dry-run: no stashes will be dropped."
exit 0
fi
# Warn loudly if any stash holds mkdocs.yml — operator should manually review
# before dropping (tenant content might be there).
MKDOCS_STASHES=$(printf '%s\n' "${STASHES[@]}" \
| while read -r entry; do
REF="${entry%%:*}"
if git stash show "$REF" --name-only 2>/dev/null | grep -q '^mkdocs/mkdocs\.yml$'; then
echo "$REF"
fi
done)
if [[ -n "$MKDOCS_STASHES" ]]; then
echo ""
echo -e "${RED}${BOLD}⚠ WARNING:${NC} the following stashes contain ${BOLD}mkdocs/mkdocs.yml${NC}:"
echo "$MKDOCS_STASHES" | sed 's/^/ /'
echo ""
echo " These may hold tenant branding (site_name, site_url, custom theme, etc.)"
echo " that ISN'T reflected on disk. Before dropping, verify:"
echo ""
echo " git show <stash-ref>:mkdocs/mkdocs.yml | head -10"
echo " diff <(git show <stash-ref>:mkdocs/mkdocs.yml) mkdocs/mkdocs.yml"
echo ""
echo " If disk mkdocs.yml already has the tenant content, the stash is safe to drop."
echo " If disk is upstream and stash has tenant content, restore first:"
echo " git checkout <stash-ref> -- mkdocs/mkdocs.yml"
echo ""
fi
if [[ "$YES" != "true" ]]; then
echo -en "${BOLD}Drop all ${#STASHES[@]} upgrade-* stashes? [y/N] ${NC}"
read -r CONFIRM
case "$CONFIRM" in
y|Y|yes|YES) ;;
*) info "Cancelled. No stashes dropped."; exit 0 ;;
esac
fi
# Drop in reverse order so indices stay stable
mapfile -t SORTED_REFS < <(printf '%s\n' "${STASHES[@]}" \
| sed 's/:.*//' \
| sort -t'{' -k2 -n -r)
for REF in "${SORTED_REFS[@]}"; do
if git stash drop "$REF" >/dev/null 2>&1; then
ok "Dropped $REF"
else
warn "Failed to drop $REF (already gone?)"
fi
done
echo ""
ok "Cleanup complete. Remaining stashes:"
git stash list 2>/dev/null || echo " (none)"

View File

@ -95,14 +95,6 @@ phase() {
echo "" echo ""
} }
# Pre-upgrade tenant docs snapshot (no-regrets fallback). Sourced regardless
# of install mode so snapshot_mkdocs is available in Phase 2.
# shellcheck source=lib/mkdocs-snapshot.sh
if [[ -f "$SCRIPT_DIR/lib/mkdocs-snapshot.sh" ]]; then
# shellcheck disable=SC1091
. "$SCRIPT_DIR/lib/mkdocs-snapshot.sh"
fi
# --- API mode: JSON progress/result writing --- # --- API mode: JSON progress/result writing ---
UPGRADE_DIR="${PROJECT_DIR}/data/upgrade" UPGRADE_DIR="${PROJECT_DIR}/data/upgrade"
PROGRESS_FILE="${UPGRADE_DIR}/progress.json" PROGRESS_FILE="${UPGRADE_DIR}/progress.json"
@ -717,18 +709,6 @@ fi
phase "2" "Backup" phase "2" "Backup"
write_progress 2 "Backup" 15 "Creating backup..." write_progress 2 "Backup" 15 "Creating backup..."
# Pre-upgrade tenant docs snapshot — the no-regrets fallback. Runs even when
# --skip-backup is set, because this is for tenant content recovery (not DB
# state) and is fast enough that skipping it would never be intentional. It
# lives in the install root (not backups/) so operators discover it via `ls`.
if declare -F snapshot_mkdocs >/dev/null 2>&1; then
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would snapshot mkdocs/ to ${PROJECT_DIR}/mkdocs-backup-*.tar.gz"
else
snapshot_mkdocs || warn "mkdocs snapshot failed (non-fatal; continuing)"
fi
fi
if [[ "$SKIP_BACKUP" == "true" ]]; then if [[ "$SKIP_BACKUP" == "true" ]]; then
warn "Backup skipped (--skip-backup --force)" warn "Backup skipped (--skip-backup --force)"
else else
@ -1304,24 +1284,13 @@ while true; do
done done
success "API healthy (${API_WAIT}s)" success "API healthy (${API_WAIT}s)"
# Start everything else (exclude one-shot init containers AND the ccp-agent # Start everything else (exclude one-shot init containers)
# service that's running this very script). Recreating ccp-agent here would
# SIGKILL the script process before write_result has a chance to run; we
# instead schedule a detached restart at the very end of the script.
#
# Mechanism: temporarily drop "ccp-agent" from COMPOSE_PROFILES so the broad
# `up -d` doesn't include it. We re-add it only when scheduling the deferred
# restart so the new agent comes up under its profile.
info "Starting remaining services..." info "Starting remaining services..."
PROFILES_SAVED="${COMPOSE_PROFILES:-}"
COMPOSE_PROFILES_WITHOUT_AGENT="$(echo "${PROFILES_SAVED}" \
| tr ',' '\n' | grep -vx 'ccp-agent' | paste -sd, -)"
COMPOSE_PROFILES="${COMPOSE_PROFILES_WITHOUT_AGENT}" \
docker compose up -d \ docker compose up -d \
--scale listmonk-init=0 \ --scale listmonk-init=0 \
--scale gancio-init=0 \ --scale gancio-init=0 \
--scale vaultwarden-init=0 --scale vaultwarden-init=0
success "All services started (ccp-agent restart deferred to end-of-script)" success "All services started"
# Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild) # Restart Pangolin tunnel connector if running (may hold stale state after nginx rebuild)
if docker ps --format '{{.Names}}' | grep -q 'newt'; then if docker ps --format '{{.Names}}' | grep -q 'newt'; then
@ -1492,27 +1461,6 @@ echo -e " ${BOLD}Duration:${NC} $ELAPSED"
echo -e " ${BOLD}Log:${NC} $LOG_FILE" echo -e " ${BOLD}Log:${NC} $LOG_FILE"
echo "" echo ""
# Deferred ccp-agent restart — the LAST thing the script does before exit.
# This must run AFTER write_result and archive_success_to_history so the new
# agent comes up to a complete result.json (otherwise CCP polls forever).
# We launch a detached subshell that:
# 1. Sleeps briefly so this script has time to exit cleanly first.
# 2. Restarts ccp-agent under its profile, picking up any new image.
# `nohup` + `disown` ensures the subshell survives the agent container dying
# (when ccp-agent is recreated, the parent agent process — which spawned this
# upgrade.sh — gets SIGKILL'd; the disowned subshell is reparented to PID 1
# on the host and continues).
if echo "${PROFILES_SAVED:-}" | tr ',' '\n' | grep -qx 'ccp-agent'; then
info "Scheduling deferred ccp-agent restart..."
nohup bash -c "
sleep 3
cd '$PROJECT_DIR'
COMPOSE_PROFILES='ccp-agent' docker compose --profile ccp-agent up -d ccp-agent
" >/dev/null 2>&1 < /dev/null &
disown
success "ccp-agent restart scheduled (will pick up new image)"
fi
release_lock release_lock
trap - EXIT trap - EXIT