CCP restore/tunnel/upgrade + upgrade.sh release-mode fixes + volunteer dashboard polish

- Add instance restore model, routes, and agent backup/restore endpoints
- Add Pangolin tunnel service (subdomain prefix, teardown action, CCP client)
- Add slug mutex for concurrent operation safety in agent
- Expand upgrade service with remote driver orchestration
- Fix upgrade.sh to properly handle release-mode installs (no git operations)
- Add CCP registration flags to config.sh (--ccp-url, --ccp-invite-code, --ccp-agent-url)
- Auto-detect JVB advertise IP in non-interactive mode
- Polish volunteer dashboard ActionStepsList with highlighted step component
- Add ticketed event description field + volunteer dashboard query refinements

Bunker Admin
This commit is contained in:
bunker-admin 2026-04-12 11:09:46 -06:00
parent 29d1f3998a
commit 26ec925d9b
35 changed files with 4191 additions and 329 deletions

View File

@ -10,6 +10,8 @@ import {
LinkOutlined, LinkOutlined,
CheckSquareOutlined, CheckSquareOutlined,
CheckCircleFilled, CheckCircleFilled,
RightOutlined,
ThunderboltOutlined,
} from '@ant-design/icons'; } from '@ant-design/icons';
import { useNavigate } from 'react-router-dom'; import { useNavigate } from 'react-router-dom';
import { api } from '@/lib/api'; import { api } from '@/lib/api';
@ -66,6 +68,97 @@ function resolveStepLink(step: DashboardActionStep): { to: string; external: boo
} }
} }
function HighlightedStep({
step,
onNavigate,
onSelfReport,
loading,
}: {
step: DashboardActionStep;
onNavigate: (step: DashboardActionStep) => void;
onSelfReport: (step: DashboardActionStep) => void;
loading: boolean;
}) {
const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
const canNavigate = resolveStepLink(step) !== null;
return (
<div
style={{
background: 'linear-gradient(135deg, rgba(52,152,219,0.25) 0%, rgba(41,128,185,0.15) 100%)',
border: '1px solid rgba(52,152,219,0.3)',
borderRadius: 8,
padding: '16px 20px',
margin: '0 0 2px',
}}
>
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 8 }}>
<ThunderboltOutlined style={{ fontSize: 12, color: '#3498db' }} />
<Typography.Text strong style={{ fontSize: 12, color: '#3498db', textTransform: 'uppercase', letterSpacing: 0.5 }}>
Next Up
</Typography.Text>
</div>
<div style={{ display: 'flex', alignItems: 'center', gap: 10, marginBottom: 8 }}>
<div
style={{
width: 32,
height: 32,
borderRadius: '50%',
background: 'rgba(52,152,219,0.25)',
display: 'flex',
alignItems: 'center',
justifyContent: 'center',
fontSize: 16,
color: '#3498db',
flexShrink: 0,
}}
>
{KIND_ICONS[step.kind]}
</div>
<div style={{ flex: 1, minWidth: 0 }}>
<Typography.Text strong style={{ fontSize: 15, display: 'block' }}>
{step.label}
</Typography.Text>
{step.description && (
<Typography.Text type="secondary" style={{ fontSize: 12, display: 'block', marginTop: 2 }}>
{step.description}
</Typography.Text>
)}
</div>
</div>
<div style={{ display: 'flex', gap: 8, marginTop: 4 }}>
{isSelfReport ? (
<>
{canNavigate && (
<Button size="middle" onClick={() => onNavigate(step)} icon={<RightOutlined />}>
Open
</Button>
)}
<Button
type="primary"
size="middle"
loading={loading}
onClick={() => onSelfReport(step)}
>
Mark as done
</Button>
</>
) : (
<Button
type="primary"
size="middle"
icon={<RightOutlined />}
onClick={() => onNavigate(step)}
disabled={!canNavigate}
>
Take Action
</Button>
)}
</div>
</div>
);
}
export default function ActionStepsList({ campaign, onRefresh }: ActionStepsListProps) { export default function ActionStepsList({ campaign, onRefresh }: ActionStepsListProps) {
const navigate = useNavigate(); const navigate = useNavigate();
const { message } = App.useApp(); const { message } = App.useApp();
@ -95,6 +188,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
}; };
const sortedSteps = [...campaign.steps].sort((a, b) => a.order - b.order); const sortedSteps = [...campaign.steps].sort((a, b) => a.order - b.order);
const highlightedStep = sortedSteps.find((s) => !s.completed);
const remainingSteps = sortedSteps.filter((s) => s.id !== highlightedStep?.id);
return ( return (
<Card <Card
@ -108,7 +203,18 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
</Typography.Text> </Typography.Text>
} }
> >
{sortedSteps.map((step, i) => { {highlightedStep && (
<div style={{ padding: '12px 12px 0' }}>
<HighlightedStep
step={highlightedStep}
onNavigate={handleNavigate}
onSelfReport={handleSelfReport}
loading={completingStepId === highlightedStep.id}
/>
</div>
)}
{remainingSteps.map((step, i) => {
const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK'; const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
const canNavigate = resolveStepLink(step) !== null; const canNavigate = resolveStepLink(step) !== null;
@ -119,8 +225,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
display: 'flex', display: 'flex',
alignItems: 'center', alignItems: 'center',
justifyContent: 'space-between', justifyContent: 'space-between',
padding: '12px 20px', padding: '10px 20px',
borderTop: i > 0 ? '1px solid rgba(255,255,255,0.04)' : undefined, borderTop: (highlightedStep || i > 0) ? '1px solid rgba(255,255,255,0.04)' : undefined,
opacity: step.completed ? 0.55 : 1, opacity: step.completed ? 0.55 : 1,
gap: 12, gap: 12,
}} }}
@ -128,22 +234,22 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
<div style={{ display: 'flex', alignItems: 'center', gap: 10, flex: 1, minWidth: 0 }}> <div style={{ display: 'flex', alignItems: 'center', gap: 10, flex: 1, minWidth: 0 }}>
<div <div
style={{ style={{
width: 26, width: 24,
height: 26, height: 24,
borderRadius: '50%', borderRadius: '50%',
background: step.completed ? '#52c41a' : 'rgba(52,152,219,0.15)', background: step.completed ? '#52c41a' : 'rgba(255,255,255,0.06)',
display: 'flex', display: 'flex',
alignItems: 'center', alignItems: 'center',
justifyContent: 'center', justifyContent: 'center',
fontSize: 13, fontSize: 12,
flexShrink: 0, flexShrink: 0,
color: step.completed ? '#fff' : 'rgba(255,255,255,0.7)', color: step.completed ? '#fff' : 'rgba(255,255,255,0.5)',
}} }}
> >
{step.completed ? <CheckCircleFilled /> : KIND_ICONS[step.kind]} {step.completed ? <CheckCircleFilled /> : KIND_ICONS[step.kind]}
</div> </div>
<div style={{ minWidth: 0 }}> <div style={{ minWidth: 0 }}>
<Typography.Text strong style={{ fontSize: 12, color: 'rgba(255,255,255,0.45)', display: 'block' }}> <Typography.Text strong style={{ fontSize: 11, color: 'rgba(255,255,255,0.35)', display: 'block', lineHeight: 1 }}>
{KIND_LABELS[step.kind]} {KIND_LABELS[step.kind]}
</Typography.Text> </Typography.Text>
<Typography.Text <Typography.Text
@ -163,7 +269,7 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
<div style={{ flexShrink: 0 }}> <div style={{ flexShrink: 0 }}>
{step.completed ? ( {step.completed ? (
<Tag color="success" style={{ margin: 0 }}>Done</Tag> <Tag color="success" style={{ margin: 0, fontSize: 11 }}>Done</Tag>
) : isSelfReport ? ( ) : isSelfReport ? (
<Space size={4}> <Space size={4}>
{canNavigate && ( {canNavigate && (

View File

@ -7,7 +7,7 @@ import {
import { import {
PlusOutlined, SearchOutlined, EditOutlined, EyeOutlined, DeleteOutlined, PlusOutlined, SearchOutlined, EditOutlined, EyeOutlined, DeleteOutlined,
CheckCircleOutlined, CloseCircleOutlined, CopyOutlined, ScanOutlined, CheckCircleOutlined, CloseCircleOutlined, CopyOutlined, ScanOutlined,
TagOutlined, VideoCameraOutlined, EnvironmentOutlined, TagOutlined, VideoCameraOutlined, EnvironmentOutlined, StarOutlined, StarFilled,
} from '@ant-design/icons'; } from '@ant-design/icons';
import { api } from '@/lib/api'; import { api } from '@/lib/api';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
@ -45,6 +45,7 @@ interface TicketedEvent {
currentAttendees: number; currentAttendees: number;
coverImageUrl: string | null; coverImageUrl: string | null;
organizerName: string | null; organizerName: string | null;
featured: boolean;
ticketTiers: TicketTier[]; ticketTiers: TicketTier[];
_count: { tickets: number; checkIns: number }; _count: { tickets: number; checkIns: number };
createdAt: string; createdAt: string;
@ -198,18 +199,55 @@ export default function TicketedEventsPage() {
} }
}; };
const handleFeature = async (id: string, featured: boolean) => {
try {
if (featured) {
// Unfeature all others first (exclusive toggle)
const othersToUnfeature = events.filter((e) => e.featured && e.id !== id);
await Promise.all(
othersToUnfeature.map((e) => api.put(`/api/ticketed-events/admin/${e.id}`, { featured: false }))
);
}
await api.put(`/api/ticketed-events/admin/${id}`, { featured });
message.success(featured ? 'Event featured on volunteer dashboard' : 'Event unfeatured');
fetchEvents();
} catch {
message.error('Failed to update featured status');
}
};
const copyLink = (slug: string) => { const copyLink = (slug: string) => {
navigator.clipboard.writeText(`${window.location.origin}/event/${slug}`); navigator.clipboard.writeText(`${window.location.origin}/event/${slug}`);
message.success('Link copied'); message.success('Link copied');
}; };
const columns = [ const columns = [
{
title: '',
key: 'featured',
width: 36,
render: (_: unknown, record: TicketedEvent) => (
<Tooltip title={record.featured ? 'Remove from volunteer dashboard' : 'Feature on volunteer dashboard'}>
<Button
type="text"
size="small"
icon={record.featured
? <StarFilled style={{ color: '#faad14' }} />
: <StarOutlined style={{ color: 'rgba(255,255,255,0.25)' }} />}
onClick={(e) => { e.stopPropagation(); handleFeature(record.id, !record.featured); }}
/>
</Tooltip>
),
},
{ {
title: 'Title', title: 'Title',
dataIndex: 'title', dataIndex: 'title',
key: 'title', key: 'title',
render: (text: string, record: TicketedEvent) => ( render: (text: string, record: TicketedEvent) => (
<Space>
<a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a> <a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
{record.featured && <Tag color="gold" style={{ fontSize: 11 }}>Featured</Tag>}
</Space>
), ),
}, },
{ {

View File

@ -50,6 +50,7 @@ export const updateEventSchema = z.object({
maxAttendees: z.number().int().positive().nullable().optional(), maxAttendees: z.number().int().positive().nullable().optional(),
organizerName: z.string().max(200).nullable().optional(), organizerName: z.string().max(200).nullable().optional(),
organizerEmail: z.string().email().nullable().optional(), organizerEmail: z.string().email().nullable().optional(),
featured: z.boolean().optional(),
}); });
export const createTierSchema = z.object({ export const createTierSchema = z.object({

View File

@ -114,14 +114,7 @@ async function getReferral(userId: string): Promise<DashboardReferral> {
async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> { async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> {
const today = new Date(); const today = new Date();
today.setHours(0, 0, 0, 0); today.setHours(0, 0, 0, 0);
const event = await prisma.ticketedEvent.findFirst({ const eventSelect = {
where: {
featured: true,
status: TicketedEventStatus.PUBLISHED,
date: { gte: today },
},
orderBy: { date: 'asc' },
select: {
slug: true, slug: true,
title: true, title: true,
date: true, date: true,
@ -130,8 +123,22 @@ async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> {
coverImageUrl: true, coverImageUrl: true,
currentAttendees: true, currentAttendees: true,
maxAttendees: true, maxAttendees: true,
}, } as const;
const baseWhere = { status: TicketedEventStatus.PUBLISHED, date: { gte: today } };
// Prefer admin-featured event; fall back to next upcoming published event
const event =
await prisma.ticketedEvent.findFirst({
where: { ...baseWhere, featured: true },
orderBy: { date: 'asc' },
select: eventSelect,
}) ??
await prisma.ticketedEvent.findFirst({
where: baseWhere,
orderBy: { date: 'asc' },
select: eventSelect,
}); });
if (!event) return null; if (!event) return null;
return { return {
slug: event.slug, slug: event.slug,

View File

@ -14,7 +14,7 @@ export default function AgentRegistrationsPage() {
const fetchRegistrations = useCallback(async () => { const fetchRegistrations = useCallback(async () => {
try { try {
setLoading(true); setLoading(true);
const { data } = await api.get('/api/agents/registrations'); const { data } = await api.get('/agents/registrations');
setRegistrations(data); setRegistrations(data);
} catch { } catch {
message.error('Failed to load registrations'); message.error('Failed to load registrations');
@ -27,7 +27,7 @@ export default function AgentRegistrationsPage() {
const handleApprove = async (id: string) => { const handleApprove = async (id: string) => {
try { try {
await api.post(`/api/agents/registrations/${id}/approve`); await api.post(`/agents/registrations/${id}/approve`);
message.success('Registration approved — agent will receive certificates on next poll'); message.success('Registration approved — agent will receive certificates on next poll');
fetchRegistrations(); fetchRegistrations();
setDetailModal(null); setDetailModal(null);
@ -39,7 +39,7 @@ export default function AgentRegistrationsPage() {
const handleReject = async (id: string) => { const handleReject = async (id: string) => {
try { try {
await api.post(`/api/agents/registrations/${id}/reject`); await api.post(`/agents/registrations/${id}/reject`);
message.success('Registration rejected'); message.success('Registration rejected');
fetchRegistrations(); fetchRegistrations();
setDetailModal(null); setDetailModal(null);

View File

@ -203,8 +203,16 @@ export default function BackupsPage() {
{ {
title: 'Instance', title: 'Instance',
dataIndex: 'instance', dataIndex: 'instance',
width: 160, width: 180,
render: (inst: BackupRow['instance']) => inst?.name || '-', render: (inst: BackupRow['instance'], record: BackupRow) => {
const isRemote = record.manifest?.source === 'remote';
return (
<Space size={4}>
<span>{inst?.name || '-'}</span>
{isRemote && <Tag color="blue">remote</Tag>}
</Space>
);
},
}, },
{ {
title: 'Status', title: 'Status',

View File

@ -44,6 +44,7 @@ import {
WarningOutlined, WarningOutlined,
CloseCircleOutlined, CloseCircleOutlined,
InfoCircleOutlined, InfoCircleOutlined,
UndoOutlined,
} from '@ant-design/icons'; } from '@ant-design/icons';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
import { useNavigate, useParams } from 'react-router-dom'; import { useNavigate, useParams } from 'react-router-dom';
@ -89,6 +90,16 @@ export default function InstanceDetailPage() {
const [backupsLoading, setBackupsLoading] = useState(false); const [backupsLoading, setBackupsLoading] = useState(false);
const [creatingBackup, setCreatingBackup] = useState(false); const [creatingBackup, setCreatingBackup] = useState(false);
// Restore state
const [restoreModal, setRestoreModal] = useState<{ backup: Backup; typedSlug: string } | null>(null);
const [restoring, setRestoring] = useState(false);
const [activeRestoreId, setActiveRestoreId] = useState<string | null>(null);
const [activeRestoreState, setActiveRestoreState] = useState<{
status: string;
logTail?: string | null;
errorMessage?: string | null;
} | null>(null);
// Feature reconfiguration state // Feature reconfiguration state
const [featureFlags, setFeatureFlags] = useState<Record<string, boolean>>({}); const [featureFlags, setFeatureFlags] = useState<Record<string, boolean>>({});
const [reconfiguring, setReconfiguring] = useState(false); const [reconfiguring, setReconfiguring] = useState(false);
@ -109,6 +120,18 @@ export default function InstanceDetailPage() {
const [tunnelSaving, setTunnelSaving] = useState(false); const [tunnelSaving, setTunnelSaving] = useState(false);
const [tunnelRemoving, setTunnelRemoving] = useState(false); const [tunnelRemoving, setTunnelRemoving] = useState(false);
// Remote tunnel state (Pangolin API managed by CCP)
const [tunnelStatus, setTunnelStatus] = useState<{
configured: boolean;
online?: boolean;
siteId?: string;
endpoint?: string;
resources?: Array<{ subdomain: string; name: string; resourceId: string; hasTarget: boolean; targetIp?: string; targetPort?: number }>;
} | null>(null);
const [tunnelStatusLoading, setTunnelStatusLoading] = useState(false);
const [tunnelSetupRunning, setTunnelSetupRunning] = useState(false);
const [tunnelSyncing, setTunnelSyncing] = useState(false);
// Upgrade state // Upgrade state
const [updateStatus, setUpdateStatus] = useState<UpdateStatus | null>(null); const [updateStatus, setUpdateStatus] = useState<UpdateStatus | null>(null);
const [checkingUpdate, setCheckingUpdate] = useState(false); const [checkingUpdate, setCheckingUpdate] = useState(false);
@ -390,6 +413,64 @@ export default function InstanceDetailPage() {
window.open(`/api/backups/${backupId}/download`, '_blank'); window.open(`/api/backups/${backupId}/download`, '_blank');
}; };
const handleRestoreConfirm = async () => {
if (!restoreModal) return;
if (restoreModal.typedSlug !== instance?.slug) {
message.error('Typed slug does not match — restore cancelled');
return;
}
setRestoring(true);
try {
const { data } = await api.post(`/instances/${id}/restore`, {
backupId: restoreModal.backup.id,
});
const restoreId = data.data.id as string;
setActiveRestoreId(restoreId);
setActiveRestoreState({ status: 'PENDING' });
setRestoreModal(null);
message.success('Restore started — polling for progress');
} catch (err: unknown) {
const e = err as { response?: { data?: { error?: { message?: string } } } };
message.error(e?.response?.data?.error?.message || 'Failed to start restore');
} finally {
setRestoring(false);
}
};
// Poll the active restore's status every 3s until it completes or fails
useEffect(() => {
if (!activeRestoreId) return;
let cancelled = false;
const poll = async () => {
try {
const { data } = await api.get(`/instances/${id}/restores/${activeRestoreId}`);
if (cancelled) return;
const row = data.data;
setActiveRestoreState({
status: row.status,
logTail: row.logTail,
errorMessage: row.errorMessage,
});
if (row.status === 'COMPLETED') {
message.success('Restore completed successfully');
setActiveRestoreId(null);
fetchBackups();
} else if (row.status === 'FAILED') {
message.error(`Restore failed: ${row.errorMessage || 'unknown error'}`);
setActiveRestoreId(null);
}
} catch {
// keep trying; transient errors are expected during remote restart
}
};
poll();
const handle = setInterval(poll, 3000);
return () => {
cancelled = true;
clearInterval(handle);
};
}, [activeRestoreId, id, fetchBackups]);
// Initialize feature flags and tunnel form when instance loads // Initialize feature flags and tunnel form when instance loads
useEffect(() => { useEffect(() => {
if (instance) { if (instance) {
@ -508,6 +589,11 @@ export default function InstanceDetailPage() {
const ports = instance.portConfig as Record<string, number>; const ports = instance.portConfig as Record<string, number>;
const isProvisioning = instance.status === 'PROVISIONING'; const isProvisioning = instance.status === 'PROVISIONING';
const isRegistered = instance.isRegistered; const isRegistered = instance.isRegistered;
const isRemote = instance.isRemote;
// A "managed" instance is one CCP can run backup/restore/upgrade on.
// Local CCP-managed and remote (agent-backed) both qualify; only locally-
// adopted registered instances (isRegistered && !isRemote) are unmanaged.
const isManaged = !isRegistered || isRemote;
const canStart = instance.status === 'STOPPED' || instance.status === 'ERROR'; const canStart = instance.status === 'STOPPED' || instance.status === 'ERROR';
const canStop = instance.status === 'RUNNING' || instance.status === 'ERROR'; const canStop = instance.status === 'RUNNING' || instance.status === 'ERROR';
const canRestart = instance.status === 'RUNNING'; const canRestart = instance.status === 'RUNNING';
@ -731,7 +817,7 @@ export default function InstanceDetailPage() {
const backupsTab = ( const backupsTab = (
<div> <div>
{isRegistered && ( {!isManaged && (
<Alert <Alert
message="Backups not managed by CCP" message="Backups not managed by CCP"
description="This instance was deployed outside the control panel. Use its own backup tools to manage backups." description="This instance was deployed outside the control panel. Use its own backup tools to manage backups."
@ -740,6 +826,15 @@ export default function InstanceDetailPage() {
style={{ marginBottom: 16 }} style={{ marginBottom: 16 }}
/> />
)} )}
{isRemote && (
<Alert
message="Remote instance"
description="Backups and restores run via the remote agent over mTLS. Create Backup triggers scripts/backup.sh on the remote host and streams the archive back to the control panel."
type="info"
showIcon
style={{ marginBottom: 16 }}
/>
)}
<div style={{ marginBottom: 12, display: 'flex', justifyContent: 'space-between' }}> <div style={{ marginBottom: 12, display: 'flex', justifyContent: 'space-between' }}>
<Typography.Text type="secondary"> <Typography.Text type="secondary">
{backups.length} backup{backups.length !== 1 ? 's' : ''} {backups.length} backup{backups.length !== 1 ? 's' : ''}
@ -749,7 +844,7 @@ export default function InstanceDetailPage() {
type="primary" type="primary"
onClick={handleCreateBackup} onClick={handleCreateBackup}
loading={creatingBackup} loading={creatingBackup}
disabled={instance.status !== 'RUNNING' || isRegistered} disabled={instance.status !== 'RUNNING' || !isManaged}
> >
Create Backup Create Backup
</Button> </Button>
@ -784,20 +879,36 @@ export default function InstanceDetailPage() {
{ {
title: 'Size', title: 'Size',
dataIndex: 'sizeBytes', dataIndex: 'sizeBytes',
render: (b: number | null) => (b ? `${(b / 1024 / 1024).toFixed(1)} MB` : '-'), render: (b: number | string | null) => {
if (b == null) return '-';
const n = typeof b === 'string' ? parseInt(b, 10) : b;
return `${(n / 1024 / 1024).toFixed(1)} MB`;
},
}, },
{ {
title: 'Actions', title: 'Actions',
width: 120, width: 160,
render: (_: unknown, record: Backup) => ( render: (_: unknown, record: Backup) => (
<Space size="small"> <Space size="small">
{record.status === 'COMPLETED' && ( {record.status === 'COMPLETED' && (
<>
<Button <Button
icon={<CloudDownloadOutlined />} icon={<CloudDownloadOutlined />}
size="small" size="small"
type="text" type="text"
title="Download archive"
onClick={() => handleDownloadBackup(record.id)} onClick={() => handleDownloadBackup(record.id)}
/> />
{isManaged && (
<Button
icon={<UndoOutlined />}
size="small"
type="text"
title="Restore this backup (destructive)"
onClick={() => setRestoreModal({ backup: record, typedSlug: '' })}
/>
)}
</>
)} )}
<Popconfirm <Popconfirm
title="Delete this backup?" title="Delete this backup?"
@ -1049,7 +1160,73 @@ export default function InstanceDetailPage() {
); );
const tunnelConfigured = !!(instance.pangolinEndpoint && instance.pangolinNewtId); const tunnelConfigured = !!(instance.pangolinEndpoint && instance.pangolinNewtId);
const canConfigureTunnel = !isRegistered && (instance.status === 'RUNNING' || instance.status === 'STOPPED'); const canConfigureTunnel = isManaged && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
// Fetch tunnel status for remote instances
const fetchTunnelStatus = useCallback(async () => {
if (!isRemote) return;
setTunnelStatusLoading(true);
try {
const { data } = await api.get(`/instances/${id}/tunnel/status`);
setTunnelStatus(data.data);
} catch {
setTunnelStatus(null);
} finally {
setTunnelStatusLoading(false);
}
}, [id, isRemote]);
useEffect(() => {
if (activeTab === 'tunnel' && isRemote) {
fetchTunnelStatus();
}
}, [activeTab, isRemote, fetchTunnelStatus]);
const handleRemoteTunnelSetup = async (values: { subdomainPrefix?: string }) => {
setTunnelSetupRunning(true);
try {
await api.post(`/instances/${id}/tunnel/setup`, {
subdomainPrefix: values.subdomainPrefix || instance.slug,
});
message.success('Tunnel setup complete — Newt credentials pushed to remote instance');
fetchInstance();
fetchTunnelStatus();
} catch (err: unknown) {
const e = err as { response?: { data?: { error?: { message?: string } } } };
message.error(e?.response?.data?.error?.message || 'Tunnel setup failed');
} finally {
setTunnelSetupRunning(false);
}
};
const handleTunnelSync = async () => {
setTunnelSyncing(true);
try {
const { data } = await api.post(`/instances/${id}/tunnel/sync`);
message.success(`Sync complete — ${data.data.created} new resource(s) created`);
fetchTunnelStatus();
} catch (err: unknown) {
const e = err as { response?: { data?: { error?: { message?: string } } } };
message.error(e?.response?.data?.error?.message || 'Sync failed');
} finally {
setTunnelSyncing(false);
}
};
const handleRemoteTunnelTeardown = async () => {
setTunnelRemoving(true);
try {
await api.delete(`/instances/${id}/tunnel`);
message.success('Tunnel torn down — Pangolin site deleted');
fetchInstance();
setTunnelStatus(null);
} catch (err: unknown) {
const e = err as { response?: { data?: { error?: { message?: string } } } };
message.error(e?.response?.data?.error?.message || 'Teardown failed');
} finally {
setTunnelRemoving(false);
}
};
const handleConfigureTunnel = async (values: { pangolinEndpoint: string; pangolinNewtId: string; pangolinNewtSecret?: string }) => { const handleConfigureTunnel = async (values: { pangolinEndpoint: string; pangolinNewtId: string; pangolinNewtSecret?: string }) => {
setTunnelSaving(true); setTunnelSaving(true);
@ -1088,9 +1265,111 @@ export default function InstanceDetailPage() {
} }
}; };
const tunnelTab = ( const remoteTunnelTab = (
<Space direction="vertical" size="large" style={{ width: '100%' }}> <Space direction="vertical" size="large" style={{ width: '100%' }}>
{isRegistered && ( {tunnelStatus?.configured ? (
<>
<Alert
message={`Tunnel active — ${tunnelStatus.online ? 'online' : 'offline'}`}
description={`Connected to ${tunnelStatus.endpoint || instance.pangolinEndpoint} (site: ${tunnelStatus.siteId})`}
type={tunnelStatus.online ? 'success' : 'warning'}
showIcon
icon={<CloudOutlined />}
/>
<Card title="Current Configuration" size="small">
<Descriptions bordered column={1}>
<Descriptions.Item label="Endpoint">
<Typography.Text copyable>{tunnelStatus.endpoint || instance.pangolinEndpoint}</Typography.Text>
</Descriptions.Item>
<Descriptions.Item label="Site ID">
<Typography.Text copyable>{tunnelStatus.siteId || instance.pangolinSiteId}</Typography.Text>
</Descriptions.Item>
<Descriptions.Item label="Newt ID">
<Typography.Text copyable>{instance.pangolinNewtId}</Typography.Text>
</Descriptions.Item>
<Descriptions.Item label="Status">
<Tag color={tunnelStatus.online ? 'green' : 'orange'}>{tunnelStatus.online ? 'Online' : 'Offline'}</Tag>
</Descriptions.Item>
</Descriptions>
</Card>
{tunnelStatus.resources && tunnelStatus.resources.length > 0 && (
<Card
title="Resources"
size="small"
extra={
<Space>
<Button icon={<SyncOutlined />} size="small" onClick={handleTunnelSync} loading={tunnelSyncing}>
Sync
</Button>
<Button icon={<ReloadOutlined />} size="small" onClick={fetchTunnelStatus} loading={tunnelStatusLoading}>
Refresh
</Button>
</Space>
}
>
<Table
dataSource={tunnelStatus.resources}
rowKey="resourceId"
size="small"
pagination={false}
columns={[
{ title: 'Subdomain', dataIndex: 'subdomain', render: (s: string) => s || '(root)' },
{ title: 'Name', dataIndex: 'name' },
{ title: 'Target', render: (_: unknown, r: { hasTarget: boolean; targetIp?: string; targetPort?: number }) =>
r.hasTarget ? `${r.targetIp}:${r.targetPort}` : <Tag color="red">No target</Tag>
},
]}
/>
</Card>
)}
<Popconfirm
title="Tear down tunnel?"
description="This will delete the Pangolin site and all resources. The Newt container will be stopped."
onConfirm={handleRemoteTunnelTeardown}
>
<Button danger icon={<DisconnectOutlined />} loading={tunnelRemoving}>
Teardown Tunnel
</Button>
</Popconfirm>
</>
) : (
<>
<Alert
message="No tunnel configured"
description="The CCP will create a Pangolin site and resources for this instance, push Newt credentials to its .env, and start the tunnel container."
type="info"
showIcon
/>
<Card title="Setup Tunnel" size="small">
<Form layout="vertical" onFinish={handleRemoteTunnelSetup}>
<Form.Item
name="subdomainPrefix"
label="Subdomain Prefix"
initialValue={instance.slug}
extra={`Resources will be created as <prefix>-app.${instance.domain}, <prefix>-api.${instance.domain}, etc.`}
rules={[{ required: true }, { pattern: /^[a-z0-9-]+$/, message: 'Lowercase alphanumeric + hyphens only' }]}
>
<Input placeholder={instance.slug} />
</Form.Item>
<Form.Item style={{ marginBottom: 0 }}>
<Button type="primary" htmlType="submit" icon={<CloudOutlined />} loading={tunnelSetupRunning}>
Setup Tunnel
</Button>
</Form.Item>
</Form>
</Card>
</>
)}
</Space>
);
const localTunnelTab = (
<Space direction="vertical" size="large" style={{ width: '100%' }}>
{!isManaged && (
<Alert <Alert
message="Tunnel management is not available for external instances" message="Tunnel management is not available for external instances"
description="This instance was deployed outside the control panel. Manage its tunnel configuration directly." description="This instance was deployed outside the control panel. Manage its tunnel configuration directly."
@ -1099,7 +1378,7 @@ export default function InstanceDetailPage() {
/> />
)} )}
{!isRegistered && tunnelConfigured && ( {isManaged && tunnelConfigured && (
<Alert <Alert
message={`Tunnel active — connected to ${instance.pangolinEndpoint}`} message={`Tunnel active — connected to ${instance.pangolinEndpoint}`}
type="success" type="success"
@ -1108,7 +1387,7 @@ export default function InstanceDetailPage() {
/> />
)} )}
{!isRegistered && !tunnelConfigured && ( {isManaged && !tunnelConfigured && (
<Alert <Alert
message="No tunnel configured" message="No tunnel configured"
description="Enter your Pangolin Newt credentials below to enable tunnel access for this instance. You can get these from your Pangolin dashboard." description="Enter your Pangolin Newt credentials below to enable tunnel access for this instance. You can get these from your Pangolin dashboard."
@ -1133,7 +1412,7 @@ export default function InstanceDetailPage() {
</Card> </Card>
)} )}
{canConfigureTunnel && ( {canConfigureTunnel && !isRemote && (
<Card title={tunnelConfigured ? 'Update Tunnel' : 'Enable Tunnel'} size="small"> <Card title={tunnelConfigured ? 'Update Tunnel' : 'Enable Tunnel'} size="small">
<Form <Form
form={tunnelForm} form={tunnelForm}
@ -1200,6 +1479,8 @@ export default function InstanceDetailPage() {
</Space> </Space>
); );
const tunnelTab = isRemote ? remoteTunnelTab : localTunnelTab;
// ─── Updates Tab ────────────────────────────────────────────── // ─── Updates Tab ──────────────────────────────────────────────
const isUpgrading = currentUpgrade?.status === 'IN_PROGRESS' || currentUpgrade?.status === 'PENDING'; const isUpgrading = currentUpgrade?.status === 'IN_PROGRESS' || currentUpgrade?.status === 'PENDING';
@ -1278,7 +1559,7 @@ export default function InstanceDetailPage() {
)} )}
{/* Upgrade Action */} {/* Upgrade Action */}
{!isRegistered && ( {isManaged && (
<Card title="Upgrade" size="small"> <Card title="Upgrade" size="small">
{isUpgrading && currentUpgrade ? ( {isUpgrading && currentUpgrade ? (
<Space direction="vertical" style={{ width: '100%' }}> <Space direction="vertical" style={{ width: '100%' }}>
@ -1340,7 +1621,7 @@ export default function InstanceDetailPage() {
</Card> </Card>
)} )}
{isRegistered && ( {!isManaged && (
<Alert <Alert
message="Upgrades are not managed by CCP for external instances" message="Upgrades are not managed by CCP for external instances"
description="Run the upgrade script directly on the instance or use its own upgrade mechanism." description="Run the upgrade script directly on the instance or use its own upgrade mechanism."
@ -1348,6 +1629,14 @@ export default function InstanceDetailPage() {
showIcon showIcon
/> />
)} )}
{isRemote && (
<Alert
message="Remote instance"
description="Upgrades run via the remote agent over mTLS. The agent shells out to scripts/upgrade.sh --api-mode and the control panel polls progress every 2s."
type="info"
showIcon
/>
)}
{/* Upgrade History */} {/* Upgrade History */}
<Card title="Upgrade History" size="small"> <Card title="Upgrade History" size="small">
@ -1794,6 +2083,108 @@ export default function InstanceDetailPage() {
{ key: 'tunnel', label: 'Tunnel', children: tunnelTab }, { key: 'tunnel', label: 'Tunnel', children: tunnelTab },
]} ]}
/> />
{/* Restore confirmation modal (destructive action guard) */}
<Modal
title="Restore backup — destructive"
open={!!restoreModal}
onCancel={() => setRestoreModal(null)}
onOk={handleRestoreConfirm}
okText="Restore"
okButtonProps={{
danger: true,
loading: restoring,
disabled: restoreModal?.typedSlug !== instance.slug,
}}
cancelButtonProps={{ disabled: restoring }}
width={560}
>
<Alert
type="error"
showIcon
message="This will OVERWRITE the instance's databases and uploads"
description="The agent will stop application containers, drop databases, and restore from the selected backup. This cannot be undone without another backup."
style={{ marginBottom: 16 }}
/>
{restoreModal && (
<Descriptions column={1} size="small" bordered style={{ marginBottom: 16 }}>
<Descriptions.Item label="Backup ID">
<code>{restoreModal.backup.id.substring(0, 8)}</code>
</Descriptions.Item>
<Descriptions.Item label="Archive size">
{restoreModal.backup.sizeBytes
? `${(Number(restoreModal.backup.sizeBytes) / 1024 / 1024).toFixed(1)} MB`
: '-'}
</Descriptions.Item>
<Descriptions.Item label="Created">
{restoreModal.backup.completedAt
? dayjs(restoreModal.backup.completedAt).format('YYYY-MM-DD HH:mm')
: '-'}
</Descriptions.Item>
</Descriptions>
)}
<Typography.Paragraph>
Type the instance slug <strong><code>{instance.slug}</code></strong> to confirm:
</Typography.Paragraph>
<Input
value={restoreModal?.typedSlug || ''}
onChange={(e) =>
setRestoreModal((cur) => (cur ? { ...cur, typedSlug: e.target.value } : cur))
}
placeholder={instance.slug}
autoFocus
/>
</Modal>
{/* Active restore progress banner */}
{activeRestoreId && activeRestoreState && (
<Modal
title="Restore in progress"
open
closable={false}
footer={null}
width={640}
>
<Space direction="vertical" style={{ width: '100%' }}>
<div>
<Tag
color={
activeRestoreState.status === 'COMPLETED'
? 'green'
: activeRestoreState.status === 'FAILED'
? 'red'
: 'processing'
}
>
{activeRestoreState.status}
</Tag>
{activeRestoreState.status === 'RUNNING' && (
<Typography.Text type="secondary" style={{ marginLeft: 8 }}>
Agent is running scripts/restore.sh this can take several minutes
</Typography.Text>
)}
</div>
{activeRestoreState.errorMessage && (
<Alert type="error" message={activeRestoreState.errorMessage} showIcon />
)}
{activeRestoreState.logTail && (
<pre
style={{
background: '#1e1e1e',
color: '#d4d4d4',
padding: 12,
maxHeight: 300,
overflow: 'auto',
fontSize: 12,
borderRadius: 4,
}}
>
{activeRestoreState.logTail}
</pre>
)}
</Space>
</Modal>
)}
</div> </div>
); );
} }

View File

@ -14,7 +14,7 @@ export default function InviteCodesPage() {
const fetchCodes = useCallback(async () => { const fetchCodes = useCallback(async () => {
try { try {
setLoading(true); setLoading(true);
const { data } = await api.get('/api/invite-codes'); const { data } = await api.get('/invite-codes');
setCodes(data.data || []); setCodes(data.data || []);
} catch { } catch {
message.error('Failed to load invite codes'); message.error('Failed to load invite codes');
@ -28,7 +28,7 @@ export default function InviteCodesPage() {
const handleCreate = async () => { const handleCreate = async () => {
try { try {
setCreating(true); setCreating(true);
const { data } = await api.post('/api/invite-codes'); const { data } = await api.post('/invite-codes');
message.success(`Invite code created: ${data.code}`); message.success(`Invite code created: ${data.code}`);
fetchCodes(); fetchCodes();
} catch { } catch {
@ -40,7 +40,7 @@ export default function InviteCodesPage() {
const handleRevoke = async (id: string) => { const handleRevoke = async (id: string) => {
try { try {
await api.delete(`/api/invite-codes/${id}`); await api.delete(`/invite-codes/${id}`);
message.success('Invite code revoked'); message.success('Invite code revoked');
fetchCodes(); fetchCodes();
} catch { } catch {

View File

@ -26,6 +26,7 @@ const envSchema = z.object({
INSTANCE_SLUG: z.string().default(''), INSTANCE_SLUG: z.string().default(''),
INSTANCE_DOMAIN: z.string().default(''), INSTANCE_DOMAIN: z.string().default(''),
INSTANCE_BASE_PATH: z.string().default(''), INSTANCE_BASE_PATH: z.string().default(''),
COMPOSE_PROJECT: z.string().default(''),
}); });
function validateEnv() { function validateEnv() {

View File

@ -1,105 +1,623 @@
import { Router, Request, Response } from 'express'; import { Router, Request, Response } from 'express';
import { param } from '../utils/params'; import { param } from '../utils/params';
import fs from 'fs/promises'; import fs from 'fs/promises';
import path from 'path'; import { createReadStream, createWriteStream } from 'fs';
import { exec as execCb } from 'child_process'; import { pipeline as pipelineCb, Transform } from 'stream';
import { promisify } from 'util'; import { promisify } from 'util';
import * as docker from '../services/docker.service'; import path from 'path';
import crypto from 'crypto';
import { spawn } from 'child_process';
import { getSlugEntry } from '../services/registry.service'; import { getSlugEntry } from '../services/registry.service';
import { env } from '../config/env'; import { env } from '../config/env';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
import { AgentError } from '../middleware/error-handler';
const pipeline = promisify(pipelineCb);
const exec = promisify(execCb);
const router = Router(); const router = Router();
// POST /instance/:slug/backup — Run pg_dump + tar uploads → return backup info // ─── Helpers ──────────────────────────────────────────────────────────
const ID_REGEX = /^[a-zA-Z0-9_-]+$/;
const ARCHIVE_PREFIX = 'changemaker-v2-backup-';
const ARCHIVE_SUFFIX = '.tar.gz';
function backupsDirFor(slug: string): string {
return path.join(env.AGENT_DATA_DIR, 'backups', slug);
}
function archivePathFor(slug: string, id: string): string {
return path.join(backupsDirFor(slug), `${ARCHIVE_PREFIX}${id}${ARCHIVE_SUFFIX}`);
}
async function sha256File(filePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const hash = crypto.createHash('sha256');
const stream = createReadStream(filePath);
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
/**
* Read the manifest.json out of a backup archive without extracting it.
* backup.sh stores it at <archive>/changemaker-v2-backup-<ts>/manifest.json
*/
async function readManifestFromArchive(archivePath: string): Promise<unknown | null> {
return new Promise((resolve) => {
const proc = spawn('tar', ['-xzOf', archivePath, '--wildcards', '*/manifest.json'], {
stdio: ['ignore', 'pipe', 'ignore'],
});
let buf = '';
proc.stdout.on('data', (chunk) => (buf += chunk.toString('utf-8')));
proc.on('error', () => resolve(null));
proc.on('close', (code) => {
if (code !== 0 || !buf.trim()) return resolve(null);
try {
resolve(JSON.parse(buf));
} catch {
resolve(null);
}
});
});
}
/**
* Extract the timestamp ID from a filename like "changemaker-v2-backup-20260409_143000.tar.gz".
*/
function idFromFilename(filename: string): string | null {
if (!filename.startsWith(ARCHIVE_PREFIX) || !filename.endsWith(ARCHIVE_SUFFIX)) return null;
return filename.slice(ARCHIVE_PREFIX.length, filename.length - ARCHIVE_SUFFIX.length);
}
// ─── Routes ───────────────────────────────────────────────────────────
/**
* POST /instance/:slug/backup
* Shells out to the remote CML's scripts/backup.sh. Returns archive metadata
* so the CCP can immediately stream it down via the /download endpoint.
*/
router.post('/instance/:slug/backup', async (req: Request, res: Response) => { router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug')); const slug = param(req, 'slug');
const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const entry = await getSlugEntry(slug);
const backupDir = path.join(env.AGENT_DATA_DIR, 'backups', param(req, 'slug'), timestamp);
await fs.mkdir(backupDir, { recursive: true });
const { pgPassword } = req.body;
try { try {
// 1. pg_dump const result = await withSlugLock(slug, 'backup', async () => {
const dumpFile = path.join(backupDir, 'database.sql'); const backupsDir = backupsDirFor(slug);
const dump = await docker.composeExec( await fs.mkdir(backupsDir, { recursive: true });
entry.basePath, entry.composeProject,
'v2-postgres',
'pg_dump -U changemaker -d changemaker',
300_000,
pgPassword ? { PGPASSWORD: pgPassword } : undefined
);
await fs.writeFile(dumpFile, dump, 'utf-8');
// Gzip the dump // Verify scripts/backup.sh exists
await exec(`gzip '${dumpFile}'`, { timeout: 120_000 }); const scriptPath = path.join(entry.basePath, 'scripts', 'backup.sh');
// 2. Tar uploads if exists
const uploadsDir = path.join(entry.basePath, 'uploads');
let hasUploads = false;
try { try {
await fs.access(uploadsDir); await fs.access(scriptPath);
hasUploads = true; } catch {
} catch { /* no uploads dir */ } throw new AgentError(500, `scripts/backup.sh not found at ${scriptPath}`, 'BACKUP_SCRIPT_MISSING');
if (hasUploads) {
await exec(
`tar -czf '${path.join(backupDir, 'uploads.tar.gz')}' -C '${entry.basePath}' uploads`,
{ timeout: 300_000 }
);
} }
// 3. Create final archive // Snapshot existing archive filenames so we can identify the new one
const archiveName = `backup-${param(req, 'slug')}-${timestamp}.tar.gz`; const existingFiles = new Set(
const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName); (await fs.readdir(backupsDir)).filter((f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX))
await exec(
`tar -czf '${archivePath}' -C '${path.dirname(backupDir)}' '${timestamp}'`,
{ timeout: 300_000 }
); );
// Clean up temp dir const logPath = path.join(backupsDir, `backup-${Date.now()}.log`);
await fs.rm(backupDir, { recursive: true, force: true }); const logFd = await fs.open(logPath, 'w');
// Spawn backup.sh with cwd=basePath so its .env detection works.
// Retention is effectively disabled here — CCP manages retention of
// the streamed-down archives, not the agent's transient copies.
//
// Container names: backup.sh defaults to `changemaker-v2-postgres` and
// `listmonk-db`, which match the main CML's `container_name:` overrides.
// If a deployment has custom naming, the operator can set PG_CONTAINER /
// LISTMONK_PG_CONTAINER in the instance's own .env (backup.sh loads it).
const spawnEnv: NodeJS.ProcessEnv = {
...process.env,
BACKUP_DIR: backupsDir,
RETENTION_DAYS: '36500', // ~100 years; CCP controls retention
};
logger.info(`[backup] Running scripts/backup.sh for ${slug} (basePath=${entry.basePath})`);
const exitCode: number = await new Promise((resolve, reject) => {
const proc = spawn('bash', ['scripts/backup.sh'], {
cwd: entry.basePath,
env: spawnEnv,
stdio: ['ignore', 'pipe', 'pipe'],
});
proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
proc.on('error', reject);
proc.on('close', (code) => resolve(code ?? 1));
});
await logFd.close();
if (exitCode !== 0) {
// Return the tail of the log so the CCP can display it
let logTail = '';
try {
const fullLog = await fs.readFile(logPath, 'utf-8');
logTail = fullLog.split('\n').slice(-40).join('\n');
} catch { /* ignore */ }
throw new AgentError(500, `backup.sh exited with code ${exitCode}\n${logTail}`, 'BACKUP_FAILED');
}
// Find the new archive
const afterFiles = (await fs.readdir(backupsDir)).filter(
(f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX)
);
const newFiles = afterFiles.filter((f) => !existingFiles.has(f));
if (newFiles.length === 0) {
throw new AgentError(500, 'backup.sh succeeded but no new archive was created', 'BACKUP_NO_OUTPUT');
}
// Pick the most recently modified (in case of oddities)
newFiles.sort();
const newest = newFiles[newFiles.length - 1] as string;
const archivePath = path.join(backupsDir, newest);
const backupId = idFromFilename(newest);
if (!backupId || !ID_REGEX.test(backupId)) {
throw new AgentError(500, `Unexpected archive filename: ${newest}`, 'BACKUP_NAME_INVALID');
}
const stats = await fs.stat(archivePath); const stats = await fs.stat(archivePath);
const backupId = timestamp; const sha256 = await sha256File(archivePath);
const manifest = await readManifestFromArchive(archivePath);
logger.info(`[backup] Created backup for ${param(req, 'slug')}: ${archivePath} (${stats.size} bytes)`); // Delete the log file once we know the backup succeeded
try { await fs.unlink(logPath); } catch { /* ignore */ }
res.json({ logger.info(`[backup] ${slug}: created ${newest} (${stats.size} bytes, sha256=${sha256.substring(0, 16)}...)`);
return {
backupId, backupId,
archivePath, filename: newest,
sizeBytes: stats.size, sizeBytes: stats.size,
timestamp, sha256,
manifest,
createdAt: stats.mtime.toISOString(),
};
}); });
res.json(result);
} catch (err) { } catch (err) {
// Clean up on failure if (err instanceof SlugBusyError) {
try { await fs.rm(backupDir, { recursive: true, force: true }); } catch { /* ignore */ } res.status(409).json({ error: 'SLUG_BUSY', message: err.message });
return;
}
throw err; throw err;
} }
}); });
// GET /instance/:slug/backup/:id/download — Stream backup archive /**
router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => { * GET /instance/:slug/backups
const archiveName = `backup-${param(req, 'slug')}-${param(req, 'id')}.tar.gz`; * Lists backup archives currently held on the agent for this slug.
const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName); */
router.get('/instance/:slug/backups', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
await getSlugEntry(slug); // validate slug is registered
const backupsDir = backupsDirFor(slug);
let entries: string[] = [];
try { try {
await fs.access(archivePath); entries = await fs.readdir(backupsDir);
} catch {
res.json({ data: [] });
return;
}
const results = [];
for (const filename of entries) {
const id = idFromFilename(filename);
if (!id) continue;
try {
const stats = await fs.stat(path.join(backupsDir, filename));
results.push({
backupId: id,
filename,
sizeBytes: stats.size,
createdAt: stats.mtime.toISOString(),
});
} catch { /* skip */ }
}
results.sort((a, b) => (a.createdAt < b.createdAt ? 1 : -1));
res.json({ data: results });
});
/**
* GET /instance/:slug/backup/:id/download
* Streams the backup archive (supports Content-Length so the CCP can verify size).
*/
router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const id = param(req, 'id');
if (!ID_REGEX.test(id)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
return;
}
await getSlugEntry(slug);
const archivePath = archivePathFor(slug, id);
try {
const stats = await fs.stat(archivePath);
res.setHeader('Content-Type', 'application/gzip');
res.setHeader('Content-Length', String(stats.size));
res.setHeader('Content-Disposition', `attachment; filename="${path.basename(archivePath)}"`);
const stream = createReadStream(archivePath);
stream.on('error', (err) => {
logger.error(`[backup] stream error for ${archivePath}: ${err.message}`);
if (!res.headersSent) res.status(500).end();
else res.destroy(err);
});
stream.pipe(res);
} catch { } catch {
res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' }); res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
return; return;
} }
});
/**
* DELETE /instance/:slug/backup/:id
* Deletes the archive from the agent's disk. The CCP calls this after it has
* successfully streamed the archive to its own storage.
*/
router.delete('/instance/:slug/backup/:id', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const id = param(req, 'id');
if (!ID_REGEX.test(id)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
return;
}
await getSlugEntry(slug);
const archivePath = archivePathFor(slug, id);
// Path traversal defense: ensure the resolved path is still inside the slug's backups dir
const resolved = path.resolve(archivePath);
const boundary = path.resolve(backupsDirFor(slug));
if (!resolved.startsWith(boundary + path.sep)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
return;
}
try {
await fs.unlink(archivePath);
logger.info(`[backup] ${slug}: deleted ${path.basename(archivePath)}`);
res.json({ deleted: true });
} catch (err) {
const code = (err as NodeJS.ErrnoException).code;
if (code === 'ENOENT') {
res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
return;
}
throw err;
}
});
// ─── Restore ──────────────────────────────────────────────────────────
// Hard cap on a single restore upload. The CCP is trusted, but a buggy or
// compromised CCP shouldn't be able to fill the agent's disk in one request.
// 20 GB is well above any realistic Changemaker Lite backup size.
const MAX_RESTORE_UPLOAD_BYTES = 20 * 1024 * 1024 * 1024;
function restoresDirFor(slug: string): string {
return path.join(env.AGENT_DATA_DIR, 'restores', slug);
}
function restoreUploadDir(slug: string, uploadId: string): string {
return path.join(restoresDirFor(slug), uploadId);
}
interface RestoreState {
status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
uploadId: string;
startedAt: string;
completedAt?: string;
exitCode?: number;
logTail?: string;
errorMessage?: string;
options?: Record<string, unknown>;
}
async function readRestoreState(slug: string, uploadId: string): Promise<RestoreState | null> {
const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
try {
const content = await fs.readFile(statePath, 'utf-8');
return JSON.parse(content) as RestoreState;
} catch {
return null;
}
}
async function writeRestoreState(slug: string, uploadId: string, state: RestoreState): Promise<void> {
const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
await fs.writeFile(statePath, JSON.stringify(state, null, 2), 'utf-8');
}
/**
* POST /instance/:slug/restore/upload?sha256=<hex>
* Accepts an application/octet-stream upload of a backup archive and writes
* it to the agent's restores directory. Verifies SHA256 as it streams if
* the hash doesn't match, the partial file is deleted and we return 400.
*
* Returns `{ uploadId, sizeBytes, sha256 }`.
*/
router.post('/instance/:slug/restore/upload', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
await getSlugEntry(slug);
if (isSlugLocked(slug, 'restore')) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'A restore is already in progress for this slug' });
return;
}
if (isSlugLocked(slug, 'backup')) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup is in progress for this slug' });
return;
}
const expectedSha256 = typeof req.query.sha256 === 'string' ? req.query.sha256.toLowerCase() : undefined;
if (!expectedSha256 || !/^[a-f0-9]{64}$/.test(expectedSha256)) {
res.status(400).json({ error: 'VALIDATION', message: 'sha256 query parameter required (64 hex chars)' });
return;
}
const uploadId = crypto.randomBytes(16).toString('hex');
const uploadDir = restoreUploadDir(slug, uploadId);
await fs.mkdir(uploadDir, { recursive: true });
const archivePath = path.join(uploadDir, 'archive.tar.gz');
const hash = crypto.createHash('sha256');
let bytesWritten = 0;
const hashTransform = new Transform({
transform(chunk: Buffer, _enc, cb) {
bytesWritten += chunk.length;
if (bytesWritten > MAX_RESTORE_UPLOAD_BYTES) {
// Abort the stream — pipeline() will reject and the catch block below
// will remove the partial upload directory.
cb(new AgentError(
413,
`Upload exceeds maximum allowed size of ${MAX_RESTORE_UPLOAD_BYTES} bytes`,
'UPLOAD_TOO_LARGE'
));
return;
}
hash.update(chunk);
cb(null, chunk);
},
});
try {
const writeStream = createWriteStream(archivePath);
await pipeline(req, hashTransform, writeStream);
const sha256 = hash.digest('hex');
if (sha256 !== expectedSha256) {
// Integrity failure — nuke the upload
await fs.rm(uploadDir, { recursive: true, force: true });
res.status(400).json({
error: 'SHA256_MISMATCH',
message: `Expected sha256 ${expectedSha256}, got ${sha256}`,
});
return;
}
const stats = await fs.stat(archivePath); const stats = await fs.stat(archivePath);
res.setHeader('Content-Type', 'application/gzip');
res.setHeader('Content-Length', stats.size);
res.setHeader('Content-Disposition', `attachment; filename="${archiveName}"`);
const { createReadStream } = await import('fs'); // Persist initial state so the progress endpoint works even before apply
const stream = createReadStream(archivePath); await writeRestoreState(slug, uploadId, {
stream.pipe(res); status: 'UPLOADED',
uploadId,
startedAt: new Date().toISOString(),
});
logger.info(`[restore] ${slug}: uploaded ${bytesWritten} bytes (sha256=${sha256.substring(0, 16)}...) upload_id=${uploadId}`);
res.json({
uploadId,
sizeBytes: stats.size,
sha256,
});
} catch (err) {
// Stream error or write error — clean up
try { await fs.rm(uploadDir, { recursive: true, force: true }); } catch { /* ignore */ }
throw err;
}
});
/**
* POST /instance/:slug/restore/:uploadId/apply
* Body: { confirm: true, skipDb?, skipUploads?, skipListmonk?, dryRun? }
*
* Fires off `scripts/restore.sh --archive <path> --force` in the background
* and writes progress to restore-state.json. The CCP polls the progress
* endpoint for updates. Mutex prevents concurrent restores/backups.
*/
router.post('/instance/:slug/restore/:uploadId/apply', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const uploadId = param(req, 'uploadId');
if (!ID_REGEX.test(uploadId)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
return;
}
const entry = await getSlugEntry(slug);
const { confirm, skipDb, skipUploads, skipListmonk, dryRun } = req.body ?? {};
if (confirm !== true) {
res.status(400).json({ error: 'CONFIRMATION_REQUIRED', message: 'Body must include { confirm: true }' });
return;
}
const uploadDir = restoreUploadDir(slug, uploadId);
// Path traversal defense
const resolvedDir = path.resolve(uploadDir);
const boundary = path.resolve(restoresDirFor(slug));
if (!resolvedDir.startsWith(boundary + path.sep)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
return;
}
const archivePath = path.join(uploadDir, 'archive.tar.gz');
try {
await fs.access(archivePath);
} catch {
res.status(404).json({ error: 'NOT_FOUND', message: 'Upload not found or already applied' });
return;
}
// Verify scripts/restore.sh exists
const scriptPath = path.join(entry.basePath, 'scripts', 'restore.sh');
try {
await fs.access(scriptPath);
} catch {
res.status(500).json({ error: 'RESTORE_SCRIPT_MISSING', message: `scripts/restore.sh not found at ${scriptPath}` });
return;
}
// Check mutex state (don't block — tell caller it's busy)
if (isSlugLocked(slug, 'restore') || isSlugLocked(slug, 'backup')) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'Slug is busy with backup or restore' });
return;
}
// Fire-and-forget: acquire lock and run in background. Return immediately
// so CCP can start polling /progress.
const options = {
skipDb: !!skipDb,
skipUploads: !!skipUploads,
skipListmonk: !!skipListmonk,
dryRun: !!dryRun,
};
await writeRestoreState(slug, uploadId, {
status: 'RUNNING',
uploadId,
startedAt: new Date().toISOString(),
options,
});
// Build restore.sh args (all flags, no user input interpolated into a shell string)
const args = ['scripts/restore.sh', '--archive', archivePath, '--force'];
if (options.skipDb) args.push('--skip-db');
if (options.skipUploads) args.push('--skip-uploads');
if (options.skipListmonk) args.push('--skip-listmonk');
if (options.dryRun) args.push('--dry-run');
const logPath = path.join(uploadDir, 'restore.log');
// Schedule the background task — don't await inside the handler
void withSlugLock(slug, 'restore', async () => {
const logFd = await fs.open(logPath, 'w');
logger.info(`[restore] ${slug}: running ${args.join(' ')} (cwd=${entry.basePath})`);
const exitCode: number = await new Promise((resolve, reject) => {
const proc = spawn('bash', args, {
cwd: entry.basePath,
env: { ...process.env },
stdio: ['ignore', 'pipe', 'pipe'],
});
proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
proc.on('error', reject);
proc.on('close', (code) => resolve(code ?? 1));
});
await logFd.close();
// Read the tail of the log for the state file
let logTail = '';
try {
const fullLog = await fs.readFile(logPath, 'utf-8');
logTail = fullLog.split('\n').slice(-80).join('\n');
} catch { /* ignore */ }
const state: RestoreState = {
status: exitCode === 0 ? 'COMPLETED' : 'FAILED',
uploadId,
startedAt: (await readRestoreState(slug, uploadId))?.startedAt || new Date().toISOString(),
completedAt: new Date().toISOString(),
exitCode,
logTail,
options,
...(exitCode !== 0 ? { errorMessage: `restore.sh exited with code ${exitCode}` } : {}),
};
await writeRestoreState(slug, uploadId, state);
logger.info(`[restore] ${slug}: restore.sh finished with exit ${exitCode}`);
}).catch(async (err) => {
logger.error(`[restore] ${slug}: background restore failed: ${(err as Error).message}`);
// If the mutex was the issue, state is already written. Otherwise, mark failed.
if (!(err instanceof SlugBusyError)) {
try {
await writeRestoreState(slug, uploadId, {
status: 'FAILED',
uploadId,
startedAt: new Date().toISOString(),
completedAt: new Date().toISOString(),
errorMessage: (err as Error).message,
options,
});
} catch { /* ignore */ }
}
});
res.status(202).json({ applied: true, uploadId, options });
});
/**
* GET /instance/:slug/restore/:uploadId/progress
* Returns the current state of a running or completed restore.
*/
router.get('/instance/:slug/restore/:uploadId/progress', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const uploadId = param(req, 'uploadId');
if (!ID_REGEX.test(uploadId)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
return;
}
await getSlugEntry(slug);
const state = await readRestoreState(slug, uploadId);
if (!state) {
res.status(404).json({ error: 'NOT_FOUND', message: 'Restore not found' });
return;
}
res.json(state);
});
/**
* DELETE /instance/:slug/restore/:uploadId
* Removes a restore upload directory. Refuses if a restore is currently running.
*/
router.delete('/instance/:slug/restore/:uploadId', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const uploadId = param(req, 'uploadId');
if (!ID_REGEX.test(uploadId)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
return;
}
await getSlugEntry(slug);
const uploadDir = restoreUploadDir(slug, uploadId);
const resolvedDir = path.resolve(uploadDir);
const boundary = path.resolve(restoresDirFor(slug));
if (!resolvedDir.startsWith(boundary + path.sep)) {
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
return;
}
const state = await readRestoreState(slug, uploadId);
if (state?.status === 'RUNNING') {
res.status(409).json({ error: 'RESTORE_RUNNING', message: 'Cannot delete a running restore' });
return;
}
try {
await fs.rm(uploadDir, { recursive: true, force: true });
res.json({ deleted: true });
} catch (err) {
throw err;
}
}); });
export default router; export default router;

View File

@ -4,6 +4,13 @@ import { registerSlug, unregisterSlug, listSlugs } from '../services/registry.se
const router = Router(); const router = Router();
// SECURITY: defense-in-depth slug validation. The CCP enforces ^[a-z0-9-]+$
// upstream via Zod, but the registry slug is later interpolated into
// filesystem paths (backupsDirFor, etc.), so we validate independently here.
// A poisoned registry entry could otherwise let a compromised or buggy CCP
// escape AGENT_DATA_DIR.
const SLUG_RE = /^[a-z0-9-]{2,50}$/;
// POST /instances/register — Register a slug→basePath mapping // POST /instances/register — Register a slug→basePath mapping
router.post('/instances/register', async (req: Request, res: Response) => { router.post('/instances/register', async (req: Request, res: Response) => {
const { slug, basePath, composeProject } = req.body; const { slug, basePath, composeProject } = req.body;
@ -11,14 +18,23 @@ router.post('/instances/register', async (req: Request, res: Response) => {
res.status(400).json({ error: 'VALIDATION', message: 'slug, basePath, and composeProject required' }); res.status(400).json({ error: 'VALIDATION', message: 'slug, basePath, and composeProject required' });
return; return;
} }
if (typeof slug !== 'string' || !SLUG_RE.test(slug)) {
res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format (expected ^[a-z0-9-]{2,50}$)' });
return;
}
await registerSlug(slug, basePath, composeProject); await registerSlug(slug, basePath, composeProject);
res.json({ registered: slug }); res.json({ registered: slug });
}); });
// DELETE /instances/:slug — Unregister slug // DELETE /instances/:slug — Unregister slug
router.delete('/instances/:slug', async (req: Request, res: Response) => { router.delete('/instances/:slug', async (req: Request, res: Response) => {
await unregisterSlug(param(req, 'slug')); const slug = param(req, 'slug');
res.json({ unregistered: param(req, 'slug') }); if (!SLUG_RE.test(slug)) {
res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format' });
return;
}
await unregisterSlug(slug);
res.json({ unregistered: slug });
}); });
// GET /instances — List all managed slugs // GET /instances — List all managed slugs

View File

@ -1,11 +1,12 @@
import { Router, Request, Response } from 'express'; import { Router, Request, Response } from 'express';
import { param } from '../utils/params'; import { param } from '../utils/params';
import { execFile } from 'child_process'; import { execFile, spawn } from 'child_process';
import { promisify } from 'util'; import { promisify } from 'util';
import fs from 'fs/promises'; import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import { getSlugEntry } from '../services/registry.service'; import { getSlugEntry } from '../services/registry.service';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
const execFileAsync = promisify(execFile); const execFileAsync = promisify(execFile);
const router = Router(); const router = Router();
@ -13,9 +14,108 @@ const router = Router();
/** Validate a git branch name — prevent shell injection. */ /** Validate a git branch name — prevent shell injection. */
const SAFE_BRANCH = /^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/; const SAFE_BRANCH = /^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/;
// POST /instance/:slug/upgrade/start — Run upgrade.sh /**
* Max age of an in-progress upgrade (by progress.json mtime) before we
* consider a previous attempt dead and allow a new one through.
*
* SECURITY NOTE: this must be LONGER than the CCP's REMOTE_UPGRADE_TIMEOUT
* AND longer than any realistic legitimate upgrade duration. The concern is
* a concurrent-upgrade scenario:
* - upgrade.sh is running and legitimately slow (large image pull + DB
* migration)
* - at 15 min the CCP side times out and marks the row FAILED
* - admin clicks "Upgrade" again CCP's DB check sees no active row
* - if this staleness window is <= realistic upgrade time, the second
* /upgrade/start call would ALSO pass this check, spawning a second
* upgrade.sh process racing against the still-running first one
*
* 45 min gives headroom over the 15-min CCP timeout and covers realistic
* upgrade durations. For a truly bulletproof guard, switch to a PID lock
* file that verifies the process is still alive.
*/
const STALE_UPGRADE_MTIME_MS = 45 * 60 * 1000;
/**
* Returns true if there's an in-progress upgrade visible on disk.
*
* Used as a second-line guard in case the in-memory mutex was lost to an
* agent restart mid-upgrade. The check looks at progress.json mtime and
* the absence of a result.json together they indicate "started but not
* finished within the staleness window".
*/
async function isUpgradeRunningOnDisk(basePath: string): Promise<boolean> {
const progressPath = path.join(basePath, 'data', 'upgrade', 'progress.json');
const resultPath = path.join(basePath, 'data', 'upgrade', 'result.json');
let progressStat: import('fs').Stats;
try {
progressStat = await fs.stat(progressPath);
} catch {
return false; // no progress file → no in-progress upgrade
}
// If a result file exists with mtime >= progress mtime, the run is finished
try {
const resultStat = await fs.stat(resultPath);
if (resultStat.mtimeMs >= progressStat.mtimeMs) return false;
} catch { /* no result file yet */ }
// Stale: progress file is old and no result was written → assume the
// previous attempt died and let a new one through
if (Date.now() - progressStat.mtimeMs > STALE_UPGRADE_MTIME_MS) return false;
return true;
}
// POST /instance/:slug/upgrade/check — Run upgrade-check.sh and return status.json
router.post('/instance/:slug/upgrade/check', async (req: Request, res: Response) => {
const slug = param(req, 'slug');
const entry = await getSlugEntry(slug);
// Refuse during a running upgrade — check writes status.json which could
// race with upgrade.sh writing other files in data/upgrade/
if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is currently running' });
return;
}
const scriptPath = path.join(entry.basePath, 'scripts', 'upgrade-check.sh');
try {
await fs.access(scriptPath);
} catch {
res.status(404).json({ error: 'SCRIPT_NOT_FOUND', message: `upgrade-check.sh not found at ${scriptPath}` });
return;
}
// Run upgrade-check.sh — it writes data/upgrade/status.json. Use execFile
// (no shell) and a 60s timeout. Failures are non-fatal: the script may
// still have written status.json before erroring out, so we always try
// to read it afterwards.
try {
await execFileAsync('bash', [scriptPath], {
cwd: entry.basePath,
timeout: 60_000,
maxBuffer: 4 * 1024 * 1024,
env: { ...process.env, COMPOSE_ANSI: 'never' },
});
} catch (err) {
logger.warn(`[upgrade] ${slug}: upgrade-check.sh failed: ${(err as Error).message}`);
// continue — try to read status.json anyway
}
const statusPath = path.join(entry.basePath, 'data', 'upgrade', 'status.json');
try {
const content = await fs.readFile(statusPath, 'utf-8');
res.json(JSON.parse(content));
} catch {
res.status(500).json({ error: 'STATUS_NOT_AVAILABLE', message: 'upgrade-check.sh did not produce status.json' });
}
});
// POST /instance/:slug/upgrade/start — Run upgrade.sh in the background
router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response) => { router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response) => {
const entry = await getSlugEntry(param(req, 'slug')); const slug = param(req, 'slug');
const entry = await getSlugEntry(slug);
const { skipBackup, useRegistry, branch } = req.body || {}; const { skipBackup, useRegistry, branch } = req.body || {};
// SECURITY: Validate branch name to prevent injection // SECURITY: Validate branch name to prevent injection
@ -28,26 +128,64 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
try { try {
await fs.access(scriptPath); await fs.access(scriptPath);
} catch { } catch {
res.status(400).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' }); res.status(404).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
return; return;
} }
// SECURITY: Use execFile with args array — no shell interpolation // Refuse if an upgrade is already running (in-memory or on-disk indicators)
const args = ['--api-mode', '--force']; if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
return;
}
// Backup or restore concurrency: refuse to start an upgrade while either is running
if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
return;
}
// Clear stale progress/result files before starting so the on-disk staleness
// check doesn't think a brand-new upgrade is still finishing.
const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
await fs.mkdir(path.dirname(progressPath), { recursive: true });
await fs.rm(progressPath, { force: true });
await fs.rm(resultPath, { force: true });
// SECURITY: Use spawn with args array — no shell interpolation
const args: string[] = [scriptPath, '--api-mode', '--force'];
if (skipBackup) args.push('--skip-backup'); if (skipBackup) args.push('--skip-backup');
if (useRegistry) args.push('--use-registry'); if (useRegistry) args.push('--use-registry');
if (branch) args.push('--branch', branch); if (branch) args.push('--branch', branch);
// Fire-and-forget — CCP polls progress // Schedule the background task under the slug lock. Use void so the
execFileAsync('bash', [scriptPath, ...args], { // promise doesn't block the response. Errors are caught and logged; the
// CCP detects them via the absence of a result file or via the timeout.
void withSlugLock(slug, 'upgrade', async () => {
logger.info(`[upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
try {
await new Promise<void>((resolve, reject) => {
const proc = spawn('bash', args, {
cwd: entry.basePath, cwd: entry.basePath,
timeout: 600_000, env: { ...process.env, COMPOSE_ANSI: 'never' },
maxBuffer: 10 * 1024 * 1024, stdio: ['ignore', 'ignore', 'ignore'], // upgrade.sh writes its own logs
});
proc.on('error', reject);
proc.on('close', (code) => {
if (code === 0) resolve();
else reject(new Error(`upgrade.sh exited with code ${code}`));
});
});
logger.info(`[upgrade] ${slug}: upgrade.sh completed`);
} catch (err) {
logger.error(`[upgrade] ${slug}: ${(err as Error).message}`);
}
}).catch((err) => { }).catch((err) => {
logger.error(`[upgrade] ${param(req, 'slug')} failed: ${(err as Error).message}`); if (!(err instanceof SlugBusyError)) {
logger.error(`[upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
}
}); });
res.json({ started: true }); res.status(202).json({ started: true });
}); });
// GET /instance/:slug/upgrade/progress — Read progress.json // GET /instance/:slug/upgrade/progress — Read progress.json

View File

@ -53,8 +53,24 @@ if (hasCerts()) {
app.use(errorHandler); app.use(errorHandler);
const server = https.createServer(tlsOptions, app); const server = https.createServer(tlsOptions, app);
server.listen(env.AGENT_PORT, () => { server.listen(env.AGENT_PORT, async () => {
logger.info(`CCP Agent (mTLS) listening on port ${env.AGENT_PORT}`); logger.info(`CCP Agent (mTLS) listening on port ${env.AGENT_PORT}`);
// Auto-register this instance's slug if configured
if (env.INSTANCE_SLUG && env.INSTANCE_BASE_PATH) {
const { registerSlug, getSlugEntry } = await import('./services/registry.service');
try {
await getSlugEntry(env.INSTANCE_SLUG);
logger.debug(`[registry] Slug ${env.INSTANCE_SLUG} already registered`);
} catch {
// Detect compose project name: use env override, or derive from basePath directory name
// (Docker Compose default: directory name with special chars stripped)
const pathMod = await import('path');
const composeProject = env.COMPOSE_PROJECT
|| pathMod.basename(env.INSTANCE_BASE_PATH).replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
await registerSlug(env.INSTANCE_SLUG, env.INSTANCE_BASE_PATH, composeProject);
}
}
}); });
} else { } else {
// Pre-approval mode — start HTTP, only health + phone-home polling // Pre-approval mode — start HTTP, only health + phone-home polling

View File

@ -0,0 +1,65 @@
/**
* Per-slug single-flight mutex.
*
* Guards long-running, mutating operations (backup, restore, upgrade) so that
* two concurrent CCP calls for the same slug can't trample each other.
*
* Usage:
* await withSlugLock(slug, 'backup', async () => { ... });
*
* If a lock is already held for (slug, op), throws SlugBusyError which the
* route handler should convert to HTTP 409.
*/
export class SlugBusyError extends Error {
constructor(public slug: string, public op: string) {
super(`Slug ${slug} is busy: ${op} already in progress`);
this.name = 'SlugBusyError';
}
}
type LockKey = string;
const locks = new Map<LockKey, { op: string; startedAt: number }>();
function key(slug: string, op: string): LockKey {
return `${slug}::${op}`;
}
/**
* Run `fn` while holding a single-flight lock on (slug, op).
* Throws SlugBusyError immediately if another call is already running.
*/
export async function withSlugLock<T>(
slug: string,
op: string,
fn: () => Promise<T>
): Promise<T> {
const k = key(slug, op);
if (locks.has(k)) {
throw new SlugBusyError(slug, op);
}
locks.set(k, { op, startedAt: Date.now() });
try {
return await fn();
} finally {
locks.delete(k);
}
}
/**
* Returns true if a lock is currently held for (slug, op).
*/
export function isSlugLocked(slug: string, op: string): boolean {
return locks.has(key(slug, op));
}
/**
* Returns debug info about all active locks.
*/
export function listActiveLocks(): Array<{ slug: string; op: string; ageMs: number }> {
const now = Date.now();
return Array.from(locks.entries()).map(([k, v]) => {
const [slug] = k.split('::');
return { slug: slug ?? '', op: v.op, ageMs: now - v.startedAt };
});
}

View File

@ -0,0 +1,34 @@
-- CreateEnum
CREATE TYPE "RestoreStatus" AS ENUM ('PENDING', 'UPLOADING', 'RUNNING', 'COMPLETED', 'FAILED');
-- AlterEnum
ALTER TYPE "AuditAction" ADD VALUE 'BACKUP_RESTORE';
-- CreateTable
CREATE TABLE "instance_restores" (
"id" TEXT NOT NULL,
"instance_id" TEXT NOT NULL,
"backup_id" TEXT NOT NULL,
"status" "RestoreStatus" NOT NULL DEFAULT 'PENDING',
"upload_id" TEXT,
"progress_json" JSONB,
"log_tail" TEXT,
"error_message" TEXT,
"triggered_by_id" TEXT,
"started_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"completed_at" TIMESTAMP(3),
CONSTRAINT "instance_restores_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "instance_restores_instance_id_started_at_idx" ON "instance_restores"("instance_id", "started_at");
-- CreateIndex
CREATE INDEX "instance_restores_backup_id_idx" ON "instance_restores"("backup_id");
-- AddForeignKey
ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_instance_id_fkey" FOREIGN KEY ("instance_id") REFERENCES "instances"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_backup_id_fkey" FOREIGN KEY ("backup_id") REFERENCES "backups"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "instances" ADD COLUMN "pangolin_subdomain_prefix" TEXT;

View File

@ -0,0 +1,2 @@
-- AlterEnum
ALTER TYPE "AuditAction" ADD VALUE 'PANGOLIN_TEARDOWN';

View File

@ -109,6 +109,7 @@ model Instance {
pangolinSiteId String? @map("pangolin_site_id") pangolinSiteId String? @map("pangolin_site_id")
pangolinNewtId String? @map("pangolin_newt_id") pangolinNewtId String? @map("pangolin_newt_id")
pangolinNewtSecret String? @map("pangolin_newt_secret") pangolinNewtSecret String? @map("pangolin_newt_secret")
pangolinSubdomainPrefix String? @map("pangolin_subdomain_prefix")
// SMTP // SMTP
smtpHost String? @map("smtp_host") smtpHost String? @map("smtp_host")
@ -125,6 +126,7 @@ model Instance {
portAllocations PortAllocation[] portAllocations PortAllocation[]
healthChecks HealthCheck[] healthChecks HealthCheck[]
backups Backup[] backups Backup[]
restores InstanceRestore[]
auditLogs AuditLog[] auditLogs AuditLog[]
upgrades InstanceUpgrade[] upgrades InstanceUpgrade[]
events InstanceEvent[] events InstanceEvent[]
@ -197,11 +199,43 @@ model Backup {
s3Key String? @map("s3_key") s3Key String? @map("s3_key")
instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade) instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
restores InstanceRestore[]
@@index([instanceId, startedAt]) @@index([instanceId, startedAt])
@@map("backups") @@map("backups")
} }
// ─── Restore ───────────────────────────────────────────────
enum RestoreStatus {
PENDING
UPLOADING
RUNNING
COMPLETED
FAILED
}
model InstanceRestore {
id String @id @default(uuid())
instanceId String @map("instance_id")
backupId String @map("backup_id")
status RestoreStatus @default(PENDING)
uploadId String? @map("upload_id")
progressJson Json? @map("progress_json")
logTail String? @map("log_tail")
errorMessage String? @map("error_message")
triggeredById String? @map("triggered_by_id")
startedAt DateTime @default(now()) @map("started_at")
completedAt DateTime? @map("completed_at")
instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
backup Backup @relation(fields: [backupId], references: [id], onDelete: Cascade)
@@index([instanceId, startedAt])
@@index([backupId])
@@map("instance_restores")
}
// ─── Audit Log ───────────────────────────────────────────── // ─── Audit Log ─────────────────────────────────────────────
enum AuditAction { enum AuditAction {
@ -215,7 +249,9 @@ enum AuditAction {
SECRETS_VIEWED SECRETS_VIEWED
BACKUP_CREATE BACKUP_CREATE
BACKUP_DELETE BACKUP_DELETE
BACKUP_RESTORE
PANGOLIN_SETUP PANGOLIN_SETUP
PANGOLIN_TEARDOWN
PANGOLIN_SYNC PANGOLIN_SYNC
AGENT_CONNECT AGENT_CONNECT
AGENT_REGISTER AGENT_REGISTER

View File

@ -54,10 +54,11 @@ const envSchema = z.object({
USE_REGISTRY_IMAGES: z.enum(['true', 'false']).default('true').transform((v) => v === 'true'), USE_REGISTRY_IMAGES: z.enum(['true', 'false']).default('true').transform((v) => v === 'true'),
IMAGE_TAG: z.string().default('latest'), IMAGE_TAG: z.string().default('latest'),
// Pangolin (optional) // Pangolin (optional — for remote tunnel management)
PANGOLIN_API_URL: z.string().default(''), PANGOLIN_API_URL: z.string().default(''),
PANGOLIN_API_KEY: z.string().default(''), PANGOLIN_API_KEY: z.string().default(''),
PANGOLIN_ORG_ID: z.string().default(''), PANGOLIN_ORG_ID: z.string().default(''),
PANGOLIN_ENDPOINT: z.string().default(''), // Newt WebSocket URL (may differ from API URL)
// Health checks // Health checks
HEALTH_CHECK_INTERVAL_MS: z.coerce.number().default(300_000), // 5 min (0 to disable) HEALTH_CHECK_INTERVAL_MS: z.coerce.number().default(300_000), // 5 min (0 to disable)

View File

@ -169,7 +169,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
}); });
// Issue mTLS certificates // Issue mTLS certificates
const certMaterials = await issueAgentCert(instance.id, registration.slug); const certMaterials = await issueAgentCert(instance.id, registration.slug, registration.agentUrl);
// Mark invite code as used // Mark invite code as used
const invite = await prisma.agentInviteCode.findUnique({ where: { id: registration.inviteCodeId } }); const invite = await prisma.agentInviteCode.findUnique({ where: { id: registration.inviteCodeId } });
@ -189,7 +189,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
caCertPem: certMaterials.caCertPem, caCertPem: certMaterials.caCertPem,
agentCertPem: certMaterials.agentCertPem, agentCertPem: certMaterials.agentCertPem,
agentKeyPem: certMaterials.agentKeyPem, agentKeyPem: certMaterials.agentKeyPem,
ccpFingerprint: certMaterials.caFingerprint, ccpFingerprint: certMaterials.fingerprint,
}, },
}, },
}); });

View File

@ -4,11 +4,13 @@ import rateLimit from 'express-rate-limit';
import { prisma } from '../../lib/prisma'; import { prisma } from '../../lib/prisma';
import { authenticate, requireRole } from '../../middleware/auth'; import { authenticate, requireRole } from '../../middleware/auth';
import { validate } from '../../middleware/validate'; import { validate } from '../../middleware/validate';
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema } from './instances.schemas'; import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
import * as instancesService from './instances.service'; import * as instancesService from './instances.service';
import * as healthService from '../../services/health.service'; import * as healthService from '../../services/health.service';
import * as backupService from '../../services/backup.service'; import * as backupService from '../../services/backup.service';
import * as restoreService from '../../services/restore.service';
import * as upgradeService from '../../services/upgrade.service'; import * as upgradeService from '../../services/upgrade.service';
import * as tunnelService from '../../services/tunnel.service';
import { discoverInstances } from '../../services/discovery.service'; import { discoverInstances } from '../../services/discovery.service';
const secretsLimiter = rateLimit({ const secretsLimiter = rateLimit({
@ -186,6 +188,18 @@ router.delete(
'/:id/tunnel', '/:id/tunnel',
requireRole('SUPER_ADMIN', 'OPERATOR'), requireRole('SUPER_ADMIN', 'OPERATOR'),
async (req: Request, res: Response) => { async (req: Request, res: Response) => {
// Branch: remote instances use the CCP's Pangolin API to teardown;
// local instances use the existing manual removal logic.
const instance = await prisma.instance.findUnique({ where: { id: req.params.id as string } });
if (instance?.isRemote && instance.pangolinSiteId) {
const result = await tunnelService.teardownTunnel(
req.params.id as string,
req.user!.id,
req.ip
);
res.json({ data: result });
return;
}
const result = await instancesService.removeTunnel( const result = await instancesService.removeTunnel(
req.params.id as string, req.params.id as string,
req.user!.id, req.user!.id,
@ -195,6 +209,47 @@ router.delete(
} }
); );
// Remote tunnel setup via CCP's Pangolin API credentials
router.post(
'/:id/tunnel/setup',
requireRole('SUPER_ADMIN'),
validate(setupRemoteTunnelSchema),
async (req: Request, res: Response) => {
const { subdomainPrefix } = req.body || {};
const result = await tunnelService.setupTunnel(
req.params.id as string,
{ subdomainPrefix },
req.user!.id,
req.ip
);
res.status(201).json({ data: result });
}
);
// Get tunnel status (resource matrix) — works for both local and remote
router.get(
'/:id/tunnel/status',
requireRole('SUPER_ADMIN', 'OPERATOR'),
async (req: Request, res: Response) => {
const status = await tunnelService.getTunnelStatus(req.params.id as string);
res.json({ data: status });
}
);
// Re-sync resources (idempotent — creates missing, leaves existing)
router.post(
'/:id/tunnel/sync',
requireRole('SUPER_ADMIN'),
async (req: Request, res: Response) => {
const result = await tunnelService.syncResources(
req.params.id as string,
req.user!.id,
req.ip
);
res.json({ data: result });
}
);
// ─── Lifecycle Endpoints ───────────────────────────────────────────── // ─── Lifecycle Endpoints ─────────────────────────────────────────────
router.post( router.post(
@ -280,6 +335,7 @@ router.post(
router.post( router.post(
'/:id/upgrade', '/:id/upgrade',
requireRole('SUPER_ADMIN', 'OPERATOR'), requireRole('SUPER_ADMIN', 'OPERATOR'),
validate(startUpgradeSchema),
async (req: Request, res: Response) => { async (req: Request, res: Response) => {
const { skipBackup, useRegistry, branch } = req.body || {}; const { skipBackup, useRegistry, branch } = req.body || {};
const upgrade = await upgradeService.startUpgrade( const upgrade = await upgradeService.startUpgrade(
@ -356,4 +412,76 @@ router.get(
} }
); );
// ─── Restores ──────────────────────────────────────────────────────
/**
* POST /:id/restore
* Body: { backupId, options? }
* Starts a restore of the given backup onto this instance. Returns the
* InstanceRestore row immediately; caller polls GET /:id/restores or
* GET /:id/restores/:restoreId for status.
*
* DESTRUCTIVE: overwrites databases and uploads. Requires SUPER_ADMIN.
*/
router.post(
'/:id/restore',
requireRole('SUPER_ADMIN'),
async (req: Request, res: Response) => {
const instanceId = req.params.id as string;
const { backupId, options } = req.body ?? {};
if (!backupId || typeof backupId !== 'string') {
res.status(400).json({ error: { message: 'backupId (string) is required', code: 'VALIDATION' } });
return;
}
// Defensive: ensure the backup belongs to this instance
const backup = await prisma.backup.findUnique({ where: { id: backupId } });
if (!backup) {
res.status(404).json({ error: { message: 'Backup not found', code: 'NOT_FOUND' } });
return;
}
if (backup.instanceId !== instanceId) {
res.status(400).json({
error: {
message: 'Backup does not belong to this instance (cross-instance restore is not supported)',
code: 'CROSS_INSTANCE_RESTORE',
},
});
return;
}
const restore = await restoreService.createRestore({
backupId,
triggeredById: req.user!.id,
ipAddress: req.ip,
options,
});
res.status(201).json({ data: restore });
}
);
router.get(
'/:id/restores',
requireRole('SUPER_ADMIN', 'OPERATOR'),
async (req: Request, res: Response) => {
const page = Math.max(1, parseInt(req.query.page as string, 10) || 1);
const limit = Math.min(100, Math.max(1, parseInt(req.query.limit as string, 10) || 50));
const result = await restoreService.listRestores(req.params.id as string, page, limit);
res.json(result);
}
);
router.get(
'/:id/restores/:restoreId',
requireRole('SUPER_ADMIN', 'OPERATOR'),
async (req: Request, res: Response) => {
const restore = await restoreService.getRestore(req.params.restoreId as string);
if (restore.instanceId !== req.params.id) {
res.status(404).json({ error: { message: 'Restore not found', code: 'NOT_FOUND' } });
return;
}
res.json({ data: restore });
}
);
export default router; export default router;

View File

@ -108,9 +108,32 @@ export const importInstancesSchema = z.object({
instances: z.array(registerInstanceSchema).min(1).max(50), instances: z.array(registerInstanceSchema).min(1).max(50),
}); });
// SECURITY: branch name is interpolated into a shell command string in the
// local `runUpgrade` path (exec, not spawn), so we must enforce the same
// strict allow-list the agent uses on its own end. This blocks names starting
// with `-` (avoiding flag confusion), shell metachars, and anything exotic.
export const startUpgradeSchema = z.object({
skipBackup: z.boolean().optional(),
useRegistry: z.boolean().optional(),
branch: z
.string()
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/, 'Invalid branch name')
.optional(),
});
export const setupRemoteTunnelSchema = z.object({
subdomainPrefix: z
.string()
.min(1)
.max(50)
.regex(/^[a-z0-9-]+$/, 'Prefix must be lowercase alphanumeric with hyphens')
.optional(),
});
export type CreateInstanceInput = z.infer<typeof createInstanceSchema>; export type CreateInstanceInput = z.infer<typeof createInstanceSchema>;
export type UpdateInstanceInput = z.infer<typeof updateInstanceSchema>; export type UpdateInstanceInput = z.infer<typeof updateInstanceSchema>;
export type RegisterInstanceInput = z.infer<typeof registerInstanceSchema>; export type RegisterInstanceInput = z.infer<typeof registerInstanceSchema>;
export type ReconfigureInstanceInput = z.infer<typeof reconfigureInstanceSchema>; export type ReconfigureInstanceInput = z.infer<typeof reconfigureInstanceSchema>;
export type ConfigureTunnelInput = z.infer<typeof configureTunnelSchema>; export type ConfigureTunnelInput = z.infer<typeof configureTunnelSchema>;
export type ImportInstancesInput = z.infer<typeof importInstancesSchema>; export type ImportInstancesInput = z.infer<typeof importInstancesSchema>;
export type StartUpgradeInput = z.infer<typeof startUpgradeSchema>;

View File

@ -8,6 +8,12 @@ import { env } from './config/env';
import { logger } from './utils/logger'; import { logger } from './utils/logger';
import { errorHandler } from './middleware/error-handler'; import { errorHandler } from './middleware/error-handler';
// BigInt JSON serialization. Prisma's BigInt columns (e.g. Backup.sizeBytes)
// don't have a toJSON method by default, so res.json() throws. Stringify them.
(BigInt.prototype as unknown as { toJSON: () => string }).toJSON = function () {
return this.toString();
};
// Route imports // Route imports
import authRoutes from './modules/auth/auth.routes'; import authRoutes from './modules/auth/auth.routes';
import instanceRoutes from './modules/instances/instances.routes'; import instanceRoutes from './modules/instances/instances.routes';

View File

@ -1,5 +1,6 @@
import { Prisma, BackupStatus, AuditAction, InstanceStatus } from '@prisma/client'; import { Prisma, BackupStatus, AuditAction, InstanceStatus } from '@prisma/client';
import fs from 'fs/promises'; import fs from 'fs/promises';
import { createReadStream } from 'fs';
import path from 'path'; import path from 'path';
import crypto from 'crypto'; import crypto from 'crypto';
import { execFile as execFileCb } from 'child_process'; import { execFile as execFileCb } from 'child_process';
@ -10,6 +11,7 @@ import { AppError } from '../middleware/error-handler';
import { decryptJson } from '../utils/encryption'; import { decryptJson } from '../utils/encryption';
import * as docker from './docker.service'; import * as docker from './docker.service';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
import { getRemoteDriverForInstance } from './execution-driver';
const execFile = promisify(execFileCb); const execFile = promisify(execFileCb);
/** /**
@ -24,11 +26,16 @@ function assertPathWithinBoundary(filePath: string, boundary: string, label: str
} }
/** /**
* Compute SHA-256 hash of a file. * Compute SHA-256 hash of a file by streaming its contents.
*/ */
async function fileHash(filePath: string): Promise<string> { async function fileHash(filePath: string): Promise<string> {
const fileBuffer = await fs.readFile(filePath); return new Promise((resolve, reject) => {
return crypto.createHash('sha256').update(fileBuffer).digest('hex'); const hash = crypto.createHash('sha256');
const stream = createReadStream(filePath);
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
} }
/** /**
@ -52,7 +59,11 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
throw new AppError(400, `Cannot backup instance in ${instance.status} state`, 'INVALID_STATE'); throw new AppError(400, `Cannot backup instance in ${instance.status} state`, 'INVALID_STATE');
} }
if ((instance as { isRegistered?: boolean }).isRegistered) { // `isRegistered` + `isRemote` = a remote CCP-managed instance (agent on the
// far side). `isRegistered` alone (without `isRemote`) would mean a local
// host-managed instance that CCP doesn't own the compose files for — that
// case we still can't back up.
if (instance.isRegistered && !instance.isRemote) {
throw new AppError(400, 'Backups not managed by CCP for registered instances', 'NOT_MANAGED'); throw new AppError(400, 'Backups not managed by CCP for registered instances', 'NOT_MANAGED');
} }
@ -72,9 +83,31 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
return backup; return backup;
} }
type BackupInstance = {
id: string;
slug: string;
basePath: string;
composeProject: string;
encryptedSecrets: string | null;
isRemote: boolean;
agentUrl: string | null;
};
async function performBackup( async function performBackup(
backupId: string, backupId: string,
instance: { id: string; slug: string; basePath: string; composeProject: string; encryptedSecrets: string | null }, instance: BackupInstance,
userId?: string,
ipAddress?: string
) {
if (instance.isRemote) {
return performRemoteBackup(backupId, instance, userId, ipAddress);
}
return performLocalBackup(backupId, instance, userId, ipAddress);
}
async function performLocalBackup(
backupId: string,
instance: BackupInstance,
userId?: string, userId?: string,
ipAddress?: string ipAddress?: string
) { ) {
@ -221,6 +254,168 @@ async function performBackup(
} }
} }
/**
* Run a backup on a remote agent and stream the resulting archive to CCP storage.
*
* Flow:
* 1. Tell agent to run scripts/backup.sh { backupId, sizeBytes, sha256, manifest }
* 2. Stream archive from agent $BACKUP_STORAGE_PATH/{slug}/backup-{slug}-{backupId}.tar.gz
* 3. Verify local SHA256 matches what the agent reported (defense in depth)
* 4. Tell agent to delete its local copy (reclaim remote disk)
* 5. Update Backup row as COMPLETED
*
* On failure at any step after the remote backup was created, we leave the
* agent-side archive in place so the operator can retry the download.
*/
async function performRemoteBackup(
backupId: string,
instance: BackupInstance,
userId?: string,
ipAddress?: string
) {
let archivePath: string | null = null;
let agentBackupId: string | null = null;
try {
await prisma.backup.update({
where: { id: backupId },
data: { status: BackupStatus.IN_PROGRESS },
});
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
// 1. Trigger the backup on the agent (this blocks until backup.sh completes)
logger.info(`[backup] ${instance.slug}: triggering remote backup via agent`);
const result = await driver.createBackup();
agentBackupId = result.backupId;
logger.info(
`[backup] ${instance.slug}: agent backup complete — ${result.filename} ` +
`(${(result.sizeBytes / 1024 / 1024).toFixed(1)} MB, sha256=${result.sha256.substring(0, 16)}...)`
);
// 2. Resolve the destination archive path on CCP storage
const archiveName = `backup-${instance.slug}-${result.backupId}.tar.gz`;
archivePath = path.join(env.BACKUP_STORAGE_PATH, instance.slug, archiveName);
// Path traversal guard (slug should be safe but better to assert)
assertPathWithinBoundary(archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
await fs.mkdir(path.dirname(archivePath), { recursive: true });
// 3. Stream the archive from the agent to CCP storage
logger.info(`[backup] ${instance.slug}: streaming archive to ${archivePath}`);
const { bytesWritten } = await driver.downloadBackup(result.backupId, archivePath);
if (bytesWritten !== result.sizeBytes) {
throw new Error(
`Downloaded size ${bytesWritten} does not match agent-reported size ${result.sizeBytes}`
);
}
// 4. Re-hash the downloaded file and compare to the agent-reported hash.
//
// SECURITY NOTE: this check authenticates *transmission integrity* only,
// not content integrity against a malicious agent. Both the file bytes
// and the expected hash are supplied by the (semi-trusted) agent, so a
// compromised agent can trivially make this check pass while delivering
// arbitrary content. The check still catches accidental corruption (bit
// flips, truncation) and is essentially free.
//
// The mTLS channel guarantees that the bytes weren't modified in transit
// by an outside attacker. The remaining trust gap — "what if the agent
// itself is compromised?" — must be addressed before Phase B (restore)
// ships, since restore feeds the archive into pg_restore. Either:
// (a) HMAC-sign the hash on the agent with its mTLS private key and
// verify on the CCP using the agent cert public key, or
// (b) limit restore operations to require an additional out-of-band
// admin confirmation step.
const localSha256 = await fileHash(archivePath);
if (localSha256 !== result.sha256) {
throw new Error(
`SHA256 mismatch: agent reported ${result.sha256}, local file hashed ${localSha256}`
);
}
// 5. Reclaim disk on the remote agent
try {
await driver.deleteBackup(result.backupId);
} catch (err) {
logger.warn(
`[backup] ${instance.slug}: failed to delete remote backup ${result.backupId}: ${(err as Error).message}`
);
// Non-fatal — CCP has the archive, remote copy will age out next retention sweep
}
// 6. Persist the result. Store sha256 and agentBackupId inside the manifest
// since we don't have dedicated columns.
const mergedManifest = {
...(result.manifest as Record<string, unknown> | null ?? {}),
source: 'remote',
agentBackupId: result.backupId,
sha256: result.sha256,
createdAt: result.createdAt,
};
await prisma.backup.update({
where: { id: backupId },
data: {
status: BackupStatus.COMPLETED,
archivePath,
sizeBytes: BigInt(bytesWritten),
manifest: mergedManifest as unknown as Prisma.InputJsonValue,
completedAt: new Date(),
},
});
if (userId) {
await prisma.auditLog.create({
data: {
userId,
instanceId: instance.id,
action: AuditAction.BACKUP_CREATE,
details: {
backupId,
archiveName,
sizeBytes: bytesWritten,
source: 'remote',
agentBackupId: result.backupId,
},
ipAddress,
},
});
}
logger.info(
`[backup] ${instance.slug}: remote backup stored at ${archivePath} ` +
`(${(bytesWritten / 1024 / 1024).toFixed(1)} MB)`
);
} catch (err) {
await prisma.backup.update({
where: { id: backupId },
data: {
status: BackupStatus.FAILED,
errorMessage: (err as Error).message,
completedAt: new Date(),
},
});
// Clean up any partial local file; leave the remote copy so retry is possible
if (archivePath) {
try { await fs.unlink(archivePath); } catch { /* ignore */ }
}
if (agentBackupId) {
logger.warn(
`[backup] ${instance.slug}: leaving agent-side backup ${agentBackupId} in place for retry`
);
}
throw err;
}
}
/** /**
* Delete a backup (file + DB record). * Delete a backup (file + DB record).
*/ */

View File

@ -0,0 +1,368 @@
/**
* Pangolin Integration API client for the CCP.
*
* Ported from the main CML's pangolin.client.ts. Adapted to:
* - Accept credentials via constructor (not env singleton)
* - Be instantiable per-call so the CCP can use its own API token
* to manage tunnels for multiple remote instances
*
* The CCP never exposes its Pangolin API key to remote instances it
* only pushes the resulting Newt credentials via the agent's writeFiles.
*/
import { logger } from '../utils/logger';
// ─── Types ─────────────────────────────────────────────────────────
export interface PangolinSite {
siteId: string;
name: string;
orgId: string;
niceId: string;
pubKey?: string;
subnet?: string;
megabytesIn?: number;
megabytesOut?: number;
lastSeen?: string;
online?: boolean;
type?: string;
address?: string;
}
export interface PangolinExitNode {
exitNodeId: string;
name: string;
location?: string;
region?: string;
online: boolean;
capacity?: number;
latency?: number;
}
export interface PangolinResource {
resourceId: string;
name: string;
subdomain?: string;
fullDomain?: string;
ssl?: boolean;
blockAccess?: boolean;
active?: boolean;
proxyPort?: number;
protocol?: string;
domainBindings?: string[];
http?: boolean;
targets?: PangolinTarget[];
}
export interface PangolinTarget {
targetId: string;
resourceId: string;
siteId: string;
ip: string;
port: number;
method: string;
enabled?: boolean;
}
export interface PangolinNewt {
newtId: string;
secret: string;
siteId: string;
}
export interface PangolinSiteDefaults {
newtId: string;
newtSecret: string;
address: string;
}
export interface CreateSitePayload {
name: string;
type?: string;
subnet?: string;
exitNodeId?: string;
newtId?: string;
secret?: string;
address?: string;
}
export interface CreateHttpResourcePayload {
name: string;
domainId: string;
subdomain?: string;
http: true;
protocol: 'tcp';
}
export interface CreateTargetPayload {
siteId: string | number;
ip: string;
port: number;
method: 'http' | 'https';
enabled?: boolean;
}
export interface PangolinDomain {
domainId: string;
baseDomain: string;
verified: boolean;
type?: string;
failed?: boolean;
configManaged?: boolean;
}
export interface UpdateResourcePayload {
name?: string;
subdomain?: string;
fullDomain?: string;
ssl?: boolean;
sso?: boolean;
active?: boolean;
blockAccess?: boolean;
proxyPort?: number;
protocol?: string;
domainBindings?: string[];
}
export interface UpdateCertificatePayload {
autoRenew?: boolean;
}
export interface PangolinCertificate {
certId: string;
domainId: string;
domain: string;
status: 'PENDING' | 'ACTIVE' | 'EXPIRED' | 'FAILED';
issuedAt?: string;
expiresAt?: string;
autoRenew?: boolean;
issuer?: string;
}
export interface PangolinConnectedClient {
clientId: string;
resourceId: string;
ipAddress: string;
connectedAt: string;
lastSeen: string;
bytesIn: number;
bytesOut: number;
online: boolean;
}
// ─── Helpers ───────────────────────────────────────────────────────
/** Redact credential fields from Pangolin API request bodies before logging. */
function redactSecrets(body: unknown): unknown {
if (!body || typeof body !== 'object') return body;
const obj = body as Record<string, unknown>;
const redacted = { ...obj };
if ('secret' in redacted) redacted.secret = '[REDACTED]';
if ('newtSecret' in redacted) redacted.newtSecret = '[REDACTED]';
return redacted;
}
// ─── Client ────────────────────────────────────────────────────────
export class CcpPangolinClient {
constructor(
private baseUrl: string,
private apiKey: string,
private orgId: string
) {}
get configured(): boolean {
return !!(this.baseUrl && this.apiKey && this.orgId);
}
private async request<T>(method: string, path: string, body?: unknown): Promise<T> {
if (!this.configured) {
throw new Error('Pangolin API not configured. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in CCP .env');
}
const url = `${this.baseUrl}${path}`;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 15000);
try {
logger.debug(`[pangolin] ${method} ${path}${body ? ` body=${JSON.stringify(redactSecrets(body))}` : ''}`);
const res = await fetch(url, {
method,
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: body ? JSON.stringify(body) : undefined,
signal: controller.signal,
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Pangolin API ${method} ${path} returned ${res.status}: ${text}`);
}
const contentType = res.headers.get('content-type') || '';
if (contentType.includes('application/json')) {
const json = await res.json();
return this.unwrapResponse<T>(json);
}
return {} as T;
} finally {
clearTimeout(timeout);
}
}
private unwrapResponse<T>(json: unknown): T {
if (json && typeof json === 'object' && !Array.isArray(json)) {
const obj = json as Record<string, unknown>;
if ('data' in obj && 'success' in obj) {
return obj.data as T;
}
}
return json as T;
}
// ─── Health ───────────────────────────────────────────────────
async healthCheck(): Promise<boolean> {
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 5000);
try {
const res = await fetch(`${this.baseUrl}/`, {
headers: { 'Authorization': `Bearer ${this.apiKey}` },
signal: controller.signal,
});
return res.ok;
} finally {
clearTimeout(timeout);
}
} catch {
return false;
}
}
// ─── Site Defaults ────────────────────────────────────────────
async pickSiteDefaults(): Promise<PangolinSiteDefaults> {
const res = await this.request<unknown>('GET', `/org/${this.orgId}/pick-site-defaults`);
const obj = res as Record<string, unknown>;
const newtId = obj.newtId as string || '';
const newtSecret = obj.newtSecret as string || obj.secret as string || '';
const address = obj.clientAddress as string || obj.address as string || '';
if (!newtId || !newtSecret) {
throw new Error('Pangolin did not return Newt credentials from pick-site-defaults');
}
return { newtId, newtSecret, address };
}
// ─── Sites ────────────────────────────────────────────────────
async listSites(): Promise<PangolinSite[]> {
const res = await this.request<unknown>('GET', `/org/${this.orgId}/sites`);
return this.extractArray(res, 'sites', 'listSites');
}
async getSite(siteId: string): Promise<PangolinSite> {
return this.request<PangolinSite>('GET', `/site/${siteId}`);
}
async createSite(data: CreateSitePayload): Promise<PangolinSite & { newt?: PangolinNewt }> {
return this.request<PangolinSite & { newt?: PangolinNewt }>('PUT', `/org/${this.orgId}/site`, data);
}
async deleteSite(siteId: string): Promise<void> {
await this.request<void>('DELETE', `/site/${siteId}`);
}
async listExitNodes(): Promise<PangolinExitNode[]> {
try {
const res = await this.request<unknown>('GET', `/org/${this.orgId}/exit-nodes`);
return this.extractArray(res, 'exitNodes', 'listExitNodes');
} catch {
return [];
}
}
// ─── Resources ────────────────────────────────────────────────
async listResources(): Promise<PangolinResource[]> {
const res = await this.request<unknown>('GET', `/org/${this.orgId}/resources`);
return this.extractArray(res, 'resources', 'listResources');
}
async getResource(resourceId: string): Promise<PangolinResource> {
return this.request<PangolinResource>('GET', `/resource/${resourceId}`);
}
async createResource(data: CreateHttpResourcePayload): Promise<PangolinResource> {
logger.info(`[pangolin] createResource: ${data.name} (subdomain: ${data.subdomain || '(root)'})`);
return this.request<PangolinResource>('PUT', `/org/${this.orgId}/resource`, data);
}
async updateResource(resourceId: string, data: UpdateResourcePayload): Promise<PangolinResource> {
return this.request<PangolinResource>('POST', `/resource/${resourceId}`, data);
}
async deleteResource(resourceId: string): Promise<void> {
await this.request<void>('DELETE', `/resource/${resourceId}`);
}
// ─── Targets ──────────────────────────────────────────────────
async createTarget(resourceId: string, data: CreateTargetPayload): Promise<PangolinTarget> {
logger.info(`[pangolin] createTarget: resource=${resourceId}, ip=${data.ip}:${data.port}`);
const payload = { ...data, siteId: Number(data.siteId) };
return this.request<PangolinTarget>('PUT', `/resource/${resourceId}/target`, payload);
}
async listTargets(resourceId: string): Promise<PangolinTarget[]> {
const res = await this.request<unknown>('GET', `/resource/${resourceId}/targets`);
return this.extractArray(res, 'targets', 'listTargets');
}
async deleteTarget(targetId: string): Promise<void> {
await this.request<void>('DELETE', `/target/${targetId}`);
}
// ─── Domains ──────────────────────────────────────────────────
async listDomains(): Promise<PangolinDomain[]> {
const res = await this.request<unknown>('GET', `/org/${this.orgId}/domains`);
return this.extractArray(res, 'domains', 'listDomains');
}
// ─── Certificates ─────────────────────────────────────────────
async getCertificate(domainId: string, domain: string): Promise<PangolinCertificate> {
return this.request<PangolinCertificate>('GET', `/org/${this.orgId}/certificate/${domainId}/${domain}`);
}
async updateCertificate(certId: string, data: UpdateCertificatePayload): Promise<PangolinCertificate> {
return this.request<PangolinCertificate>('POST', `/certificate/${certId}`, data);
}
// ─── Clients ──────────────────────────────────────────────────
async listClients(resourceId: string): Promise<PangolinConnectedClient[]> {
const res = await this.request<unknown>('GET', `/resource/${resourceId}/clients`);
return this.extractArray(res, 'clients', 'listClients');
}
// ─── Helpers ──────────────────────────────────────────────────
private extractArray<T>(res: unknown, key: string, context: string): T[] {
if (Array.isArray(res)) return res as T[];
if (res && typeof res === 'object') {
const obj = res as Record<string, unknown>;
if (Array.isArray(obj[key])) return obj[key] as T[];
if (obj.data && typeof obj.data === 'object') {
const dataObj = obj.data as Record<string, unknown>;
if (Array.isArray(dataObj[key])) return dataObj[key] as T[];
}
if (Array.isArray(obj.data)) return obj.data as T[];
}
logger.warn(`[pangolin] ${context}: could not extract array from response`);
return [];
}
}

View File

@ -90,7 +90,7 @@ export async function ensureCA() {
* Issue a certificate for a remote agent, signed by the CA. * Issue a certificate for a remote agent, signed by the CA.
* Returns the certificate materials (plaintext) for one-time display. * Returns the certificate materials (plaintext) for one-time display.
*/ */
export async function issueAgentCert(instanceId: string, slug: string) { export async function issueAgentCert(instanceId: string, slug: string, agentUrl?: string) {
const ca = await ensureCA(); const ca = await ensureCA();
const caKeyPem = decrypt(ca.encryptedKey); const caKeyPem = decrypt(ca.encryptedKey);
@ -110,12 +110,29 @@ export async function issueAgentCert(instanceId: string, slug: string) {
await fs.writeFile(caCertFile, ca.certPem); await fs.writeFile(caCertFile, ca.certPem);
await fs.writeFile(serialFile, crypto.randomBytes(16).toString('hex')); await fs.writeFile(serialFile, crypto.randomBytes(16).toString('hex'));
// Extensions for server+client auth // Build SAN entries from the agent URL hostname
await fs.writeFile(extFile, [ const sanEntries: string[] = [];
if (agentUrl) {
try {
const hostname = new URL(agentUrl).hostname;
// Detect IP vs DNS name
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname) || hostname.includes(':')) {
sanEntries.push(`IP:${hostname}`);
} else {
sanEntries.push(`DNS:${hostname}`);
}
} catch { /* ignore invalid URL */ }
}
sanEntries.push(`DNS:${commonName}`);
// Extensions for server+client auth with SANs
const extLines = [
'basicConstraints=CA:FALSE', 'basicConstraints=CA:FALSE',
'keyUsage=digitalSignature,keyEncipherment', 'keyUsage=digitalSignature,keyEncipherment',
'extendedKeyUsage=serverAuth,clientAuth', 'extendedKeyUsage=serverAuth,clientAuth',
].join('\n')); `subjectAltName=${sanEntries.join(',')}`,
];
await fs.writeFile(extFile, extLines.join('\n'));
// Generate agent key // Generate agent key
await exec( await exec(

View File

@ -60,7 +60,20 @@ export async function getDriverForInstance(instance: DriverInstance): Promise<Ex
const { getLocalDriver } = await import('./local-driver'); const { getLocalDriver } = await import('./local-driver');
return getLocalDriver(); return getLocalDriver();
} }
return getRemoteDriverForInstance(instance);
}
/**
* Resolve a RemoteDriver for a remote instance. Throws if the instance is
* local, missing an agent URL, or has no valid mTLS certificate.
*
* Use this when you need to call RemoteDriver-specific methods like
* createBackup() that don't exist on the ExecutionDriver interface.
*/
export async function getRemoteDriverForInstance(instance: DriverInstance) {
if (!instance.isRemote) {
throw new Error(`Instance ${instance.slug} is not remote`);
}
if (!instance.agentUrl) { if (!instance.agentUrl) {
throw new Error(`Remote instance ${instance.slug} has no agent URL configured`); throw new Error(`Remote instance ${instance.slug} has no agent URL configured`);
} }

View File

@ -1,10 +1,87 @@
import https from 'https'; import https from 'https';
import fs from 'fs';
import { pipeline } from 'stream/promises';
import { env } from '../config/env'; import { env } from '../config/env';
import type { ExecutionDriver } from './execution-driver'; import type { ExecutionDriver } from './execution-driver';
import { AgentUnreachableError } from './execution-driver'; import { AgentUnreachableError } from './execution-driver';
import type { ContainerInfo } from './docker.service'; import type { ContainerInfo } from './docker.service';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
export interface AgentBackupResult {
backupId: string;
filename: string;
sizeBytes: number;
sha256: string;
manifest: unknown | null;
createdAt: string;
}
export interface AgentBackupListEntry {
backupId: string;
filename: string;
sizeBytes: number;
createdAt: string;
}
export interface AgentRestoreUploadResult {
uploadId: string;
sizeBytes: number;
sha256: string;
}
export interface AgentRestoreOptions {
skipDb?: boolean;
skipUploads?: boolean;
skipListmonk?: boolean;
dryRun?: boolean;
}
export interface AgentRestoreState {
status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
uploadId: string;
startedAt: string;
completedAt?: string;
exitCode?: number;
logTail?: string;
errorMessage?: string;
options?: AgentRestoreOptions;
}
export interface AgentUpdateStatus {
branch: string;
currentCommit: string;
currentMessage?: string;
remoteCommit: string | null;
commitsBehind: number;
changelog: Array<{ hash: string; message: string; date: string; author: string }>;
checkedAt: string;
error: string | null;
}
export interface AgentUpgradeProgress {
phase?: number;
phaseName?: string;
percentage?: number;
message?: string;
timestamp?: string;
}
export interface AgentUpgradeResult {
success: boolean;
message?: string;
previousCommit?: string;
newCommit?: string;
commitCount?: number;
durationSeconds?: number;
warnings?: string[];
}
export interface StartAgentUpgradeOptions {
skipBackup?: boolean;
useRegistry?: boolean;
branch?: string;
}
interface AgentRequestOptions { interface AgentRequestOptions {
method: 'GET' | 'POST' | 'DELETE'; method: 'GET' | 'POST' | 'DELETE';
path: string; path: string;
@ -261,4 +338,261 @@ export class RemoteDriver implements ExecutionDriver {
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS, timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
}); });
} }
// ─── Backup Operations ──────────────────────────────────────
/**
* Trigger a backup on the remote agent. The agent shells out to scripts/backup.sh
* and returns metadata for the resulting archive. The archive stays on the
* agent's disk until downloadBackup() + deleteBackup() are called.
*/
async createBackup(): Promise<AgentBackupResult> {
return this.request<AgentBackupResult>({
method: 'POST',
path: `/instance/${this.slug}/backup`,
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
});
}
/**
* List backup archives currently held on the agent for this slug.
*/
async listAgentBackups(): Promise<AgentBackupListEntry[]> {
const resp = await this.request<{ data: AgentBackupListEntry[] }>({
method: 'GET',
path: `/instance/${this.slug}/backups`,
});
return resp.data;
}
/**
* Delete an archive from the agent's disk. Called after a successful download.
*/
async deleteBackup(backupId: string): Promise<void> {
await this.request({
method: 'DELETE',
path: `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}`,
});
}
/**
* Stream a backup archive from the agent to a local file path.
* Verifies the Content-Length header matches the bytes written.
*/
async downloadBackup(backupId: string, destPath: string): Promise<{ bytesWritten: number }> {
const url = new URL(
`/instance/${this.slug}/backup/${encodeURIComponent(backupId)}/download`,
this.agentUrl
);
const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
return new Promise((resolve, reject) => {
const req = https.request(
{
hostname: url.hostname,
port: url.port || 7443,
path: url.pathname + url.search,
method: 'GET',
headers: { Accept: 'application/gzip' },
cert: this.clientCert,
key: this.clientKey,
ca: this.caCert,
rejectUnauthorized: true,
timeout: timeoutMs,
},
async (res) => {
if (res.statusCode && res.statusCode >= 400) {
let body = '';
res.on('data', (c) => (body += c));
res.on('end', () => reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`)));
return;
}
const expectedSize = res.headers['content-length']
? parseInt(res.headers['content-length'] as string, 10)
: null;
try {
const out = fs.createWriteStream(destPath);
await pipeline(res, out);
const stats = await fs.promises.stat(destPath);
if (expectedSize !== null && stats.size !== expectedSize) {
reject(new Error(`Downloaded size ${stats.size} does not match Content-Length ${expectedSize}`));
return;
}
resolve({ bytesWritten: stats.size });
} catch (err) {
reject(err);
}
}
);
req.on('error', (err) => {
reject(new AgentUnreachableError(this.agentUrl, err));
});
req.on('timeout', () => {
req.destroy();
reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
});
req.end();
});
}
// ─── Restore Operations ─────────────────────────────────────
/**
* Stream a backup archive from a local path to the agent's upload endpoint.
* The expected SHA256 is passed as a query parameter and the agent verifies
* it during ingestion if it mismatches, the upload is rejected with 400.
*/
async uploadRestore(
archivePath: string,
expectedSha256: string
): Promise<AgentRestoreUploadResult> {
const stats = await fs.promises.stat(archivePath);
const url = new URL(
`/instance/${this.slug}/restore/upload?sha256=${encodeURIComponent(expectedSha256)}`,
this.agentUrl
);
const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
return new Promise((resolve, reject) => {
const req = https.request(
{
hostname: url.hostname,
port: url.port || 7443,
path: url.pathname + url.search,
method: 'POST',
headers: {
'Content-Type': 'application/octet-stream',
'Content-Length': String(stats.size),
},
cert: this.clientCert,
key: this.clientKey,
ca: this.caCert,
rejectUnauthorized: true,
timeout: timeoutMs,
},
(res) => {
let body = '';
res.on('data', (c) => (body += c));
res.on('end', () => {
if (res.statusCode && res.statusCode >= 400) {
try {
const err = JSON.parse(body);
reject(new Error(err.message || `Agent returned ${res.statusCode}`));
} catch {
reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`));
}
return;
}
try {
resolve(JSON.parse(body) as AgentRestoreUploadResult);
} catch (err) {
reject(err);
}
});
}
);
req.on('error', (err) => {
reject(new AgentUnreachableError(this.agentUrl, err));
});
req.on('timeout', () => {
req.destroy();
reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
});
const fileStream = fs.createReadStream(archivePath);
fileStream.on('error', (err) => {
req.destroy();
reject(err);
});
fileStream.pipe(req);
});
}
/**
* Tell the agent to apply a previously-uploaded restore archive. The agent
* fires `scripts/restore.sh` in the background and returns immediately.
* Use `getRestoreProgress()` to poll for completion.
*/
async applyRestore(uploadId: string, options: AgentRestoreOptions = {}): Promise<void> {
await this.request({
method: 'POST',
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/apply`,
body: { confirm: true, ...options },
});
}
/**
* Poll the agent for the current state of a restore.
*/
async getRestoreProgress(uploadId: string): Promise<AgentRestoreState> {
return this.request<AgentRestoreState>({
method: 'GET',
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/progress`,
});
}
/**
* Delete a restore upload dir from the agent's disk. Called after the CCP
* has finalized the InstanceRestore row.
*/
async deleteRestoreUpload(uploadId: string): Promise<void> {
await this.request({
method: 'DELETE',
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}`,
});
}
// ─── Upgrade Operations ─────────────────────────────────────
/**
* Run upgrade-check.sh on the remote and return the parsed status.json.
*/
async checkForUpdates(): Promise<AgentUpdateStatus> {
return this.request<AgentUpdateStatus>({
method: 'POST',
path: `/instance/${this.slug}/upgrade/check`,
timeoutMs: 90_000,
});
}
/**
* Trigger upgrade.sh --api-mode on the remote. Fire-and-forget; agent
* spawns the script in the background and returns 202 immediately.
* Use getUpgradeProgress / getUpgradeResult to track completion.
*/
async startUpgrade(options: StartAgentUpgradeOptions = {}): Promise<void> {
await this.request({
method: 'POST',
path: `/instance/${this.slug}/upgrade/start`,
body: options,
timeoutMs: 30_000,
});
}
/**
* Read the agent's data/upgrade/progress.json. Returns the default zero-state
* if no progress has been written yet.
*/
async getUpgradeProgress(): Promise<AgentUpgradeProgress> {
return this.request<AgentUpgradeProgress>({
method: 'GET',
path: `/instance/${this.slug}/upgrade/progress`,
});
}
/**
* Read the agent's data/upgrade/result.json. Throws if no result is yet
* available; the caller should treat that as "still running".
*/
async getUpgradeResult(): Promise<AgentUpgradeResult> {
return this.request<AgentUpgradeResult>({
method: 'GET',
path: `/instance/${this.slug}/upgrade/result`,
});
}
} }

View File

@ -0,0 +1,376 @@
import fs from 'fs/promises';
import path from 'path';
import crypto from 'crypto';
import { createReadStream } from 'fs';
import { Prisma, RestoreStatus, AuditAction, InstanceStatus } from '@prisma/client';
import { prisma } from '../lib/prisma';
import { env } from '../config/env';
import { AppError } from '../middleware/error-handler';
import { logger } from '../utils/logger';
import { getRemoteDriverForInstance } from './execution-driver';
import type { AgentRestoreOptions, AgentRestoreState } from './remote-driver';
/**
* Validate that a path is within the allowed backup storage boundary.
*/
function assertPathWithinBoundary(filePath: string, boundary: string, label: string): void {
const normalized = path.resolve(filePath);
const normalizedBoundary = path.resolve(boundary);
if (!normalized.startsWith(normalizedBoundary + path.sep)) {
throw new AppError(403, `${label} path outside allowed directory`, 'FORBIDDEN');
}
}
/**
* Compute SHA-256 hash of a file by streaming its contents.
*/
async function fileHash(filePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const hash = crypto.createHash('sha256');
const stream = createReadStream(filePath);
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
const POLL_INTERVAL_MS = 3_000;
const POLL_TIMEOUT_MS = 15 * 60 * 1_000; // 15 min
interface StartRestoreArgs {
backupId: string;
triggeredById?: string;
ipAddress?: string | null;
options?: AgentRestoreOptions;
}
/**
* Kick off a restore for the given backup. Creates an InstanceRestore row
* and runs the full upload apply poll loop asynchronously. Returns the
* row so the caller (HTTP handler) can respond immediately.
*/
export async function createRestore(args: StartRestoreArgs) {
const backup = await prisma.backup.findUnique({
where: { id: args.backupId },
include: { instance: true },
});
if (!backup) {
throw new AppError(404, 'Backup not found', 'NOT_FOUND');
}
if (backup.status !== 'COMPLETED') {
throw new AppError(400, `Backup is ${backup.status}, not COMPLETED`, 'INVALID_STATE');
}
if (!backup.archivePath) {
throw new AppError(400, 'Backup has no archive path', 'NO_ARCHIVE');
}
const instance = backup.instance;
if (instance.status !== InstanceStatus.RUNNING) {
throw new AppError(400, `Cannot restore to instance in ${instance.status} state`, 'INVALID_STATE');
}
// Phase B only supports remote restore. Local restore is deliberately stubbed
// — if you need it, add a performLocalRestore branch below. This also covers
// the registered-but-local case (CCP-adopted instances) since they have
// isRemote=false.
if (!instance.isRemote) {
throw new AppError(501, 'Local restore is not implemented — Phase B covers remote only', 'NOT_IMPLEMENTED');
}
// Make sure the archive is where it says it is and inside the boundary
assertPathWithinBoundary(backup.archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
try {
await fs.access(backup.archivePath);
} catch {
throw new AppError(404, 'Archive file is missing on disk', 'ARCHIVE_MISSING');
}
const restore = await prisma.instanceRestore.create({
data: {
instanceId: instance.id,
backupId: backup.id,
status: RestoreStatus.PENDING,
triggeredById: args.triggeredById ?? null,
},
});
// Fire-and-forget orchestration
performRemoteRestore(restore.id, backup.archivePath, args.options ?? {}, args.triggeredById, args.ipAddress ?? null)
.catch((err) => {
logger.error(`[restore] ${restore.id} failed: ${(err as Error).message}`);
});
return restore;
}
/**
* End-to-end remote restore orchestration.
*
* Flow:
* 1. Compute sha256 of the archive on CCP disk
* 2. Upload to agent with sha256 query param (agent re-verifies on stream)
* 3. Apply via agent (shells out to restore.sh --force)
* 4. Poll progress every 3s until COMPLETED/FAILED or timeout
* 5. Delete the agent-side upload
* 6. Update the InstanceRestore row + audit log
*/
/**
* Write a BACKUP_RESTORE audit log entry. Wrapped in a try/catch so that an
* audit-log DB failure can never mask the underlying restore status update.
*
* Called in all three terminal paths:
* - success (outcome: 'success')
* - agent reported failure (outcome: 'agent_failed')
* - orchestration error / timeout / unexpected throw (outcome: 'orchestration_error')
*/
async function writeRestoreAuditLog(args: {
restoreId: string;
instanceId: string;
backupId: string;
triggeredById?: string;
ipAddress?: string | null;
options: AgentRestoreOptions;
outcome: 'success' | 'agent_failed' | 'orchestration_error';
sha256?: string;
uploadId?: string | null;
errorMessage?: string;
}): Promise<void> {
if (!args.triggeredById) return;
try {
await prisma.auditLog.create({
data: {
userId: args.triggeredById,
instanceId: args.instanceId,
action: AuditAction.BACKUP_RESTORE,
details: {
backupId: args.backupId,
restoreId: args.restoreId,
source: 'remote',
outcome: args.outcome,
options: args.options as unknown as Prisma.InputJsonValue,
...(args.sha256 ? { sha256: args.sha256 } : {}),
...(args.uploadId ? { agentUploadId: args.uploadId } : {}),
...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
},
ipAddress: args.ipAddress ?? null,
},
});
} catch (err) {
logger.error(`[restore] failed to write audit log for ${args.restoreId}: ${(err as Error).message}`);
}
}
async function performRemoteRestore(
restoreId: string,
archivePath: string,
options: AgentRestoreOptions,
triggeredById?: string,
ipAddress?: string | null
) {
const restore = await prisma.instanceRestore.findUnique({
where: { id: restoreId },
include: { instance: true, backup: true },
});
if (!restore) {
logger.error(`[restore] row ${restoreId} vanished mid-flight`);
return;
}
const instance = restore.instance;
let uploadId: string | null = null;
let sha256: string | undefined;
try {
await prisma.instanceRestore.update({
where: { id: restoreId },
data: { status: RestoreStatus.UPLOADING },
});
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
// 1. Compute local SHA256 (authoritative — the agent will verify against this).
// We persist this in the audit log so there's an immutable record of exactly
// which bytes were restored, useful for post-incident comparison.
logger.info(`[restore] ${instance.slug}: hashing archive ${path.basename(archivePath)}`);
sha256 = await fileHash(archivePath);
// 2. Stream upload to agent
logger.info(`[restore] ${instance.slug}: uploading archive (sha256=${sha256.substring(0, 16)}...)`);
const uploadResult = await driver.uploadRestore(archivePath, sha256);
uploadId = uploadResult.uploadId;
await prisma.instanceRestore.update({
where: { id: restoreId },
data: { uploadId, status: RestoreStatus.RUNNING },
});
// 3. Apply
logger.info(`[restore] ${instance.slug}: applying restore ${uploadId}`);
await driver.applyRestore(uploadId, options);
// 4. Poll progress
const deadline = Date.now() + POLL_TIMEOUT_MS;
let finalState: AgentRestoreState | null = null;
while (Date.now() < deadline) {
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
try {
const state = await driver.getRestoreProgress(uploadId);
// Mirror progress to the DB row so the UI shows updates
await prisma.instanceRestore.update({
where: { id: restoreId },
data: {
progressJson: state as unknown as Prisma.InputJsonValue,
logTail: state.logTail ?? null,
},
});
if (state.status === 'COMPLETED' || state.status === 'FAILED') {
finalState = state;
break;
}
} catch (err) {
logger.warn(`[restore] ${instance.slug}: poll error: ${(err as Error).message}`);
// Keep polling — transient network blips shouldn't fail the restore
}
}
if (!finalState) {
throw new Error(`Restore timed out after ${Math.round(POLL_TIMEOUT_MS / 1000)}s`);
}
// 5. Clean up agent-side upload (best effort)
try {
await driver.deleteRestoreUpload(uploadId);
} catch (err) {
logger.warn(`[restore] ${instance.slug}: failed to delete agent upload ${uploadId}: ${(err as Error).message}`);
}
// 6. Finalize DB row
if (finalState.status === 'COMPLETED') {
await prisma.instanceRestore.update({
where: { id: restoreId },
data: {
status: RestoreStatus.COMPLETED,
progressJson: finalState as unknown as Prisma.InputJsonValue,
logTail: finalState.logTail ?? null,
completedAt: new Date(),
},
});
await writeRestoreAuditLog({
restoreId,
instanceId: instance.id,
backupId: restore.backupId,
triggeredById,
ipAddress,
options,
outcome: 'success',
sha256,
uploadId,
});
logger.info(`[restore] ${instance.slug}: restore ${restoreId} COMPLETED`);
} else {
const errMsg = finalState.errorMessage || `Agent reported FAILED (exit ${finalState.exitCode})`;
await prisma.instanceRestore.update({
where: { id: restoreId },
data: {
status: RestoreStatus.FAILED,
progressJson: finalState as unknown as Prisma.InputJsonValue,
logTail: finalState.logTail ?? null,
errorMessage: errMsg,
completedAt: new Date(),
},
});
await writeRestoreAuditLog({
restoreId,
instanceId: instance.id,
backupId: restore.backupId,
triggeredById,
ipAddress,
options,
outcome: 'agent_failed',
sha256,
uploadId,
errorMessage: errMsg,
});
logger.warn(`[restore] ${instance.slug}: restore ${restoreId} FAILED (exit ${finalState.exitCode})`);
}
} catch (err) {
const errMsg = (err as Error).message;
await prisma.instanceRestore.update({
where: { id: restoreId },
data: {
status: RestoreStatus.FAILED,
errorMessage: errMsg,
completedAt: new Date(),
},
});
await writeRestoreAuditLog({
restoreId,
instanceId: instance.id,
backupId: restore.backupId,
triggeredById,
ipAddress,
options,
outcome: 'orchestration_error',
sha256,
uploadId,
errorMessage: errMsg,
});
logger.error(`[restore] ${restore.instance.slug}: ${errMsg}`);
// Best-effort cleanup of the agent upload if we got that far
if (uploadId) {
try {
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
await driver.deleteRestoreUpload(uploadId);
} catch { /* ignore */ }
}
}
}
/**
* List restores with optional filtering and pagination.
*/
export async function listRestores(instanceId?: string, page = 1, limit = 50) {
const where = instanceId ? { instanceId } : {};
const [data, total] = await Promise.all([
prisma.instanceRestore.findMany({
where,
orderBy: { startedAt: 'desc' },
skip: (page - 1) * limit,
take: limit,
include: {
instance: { select: { id: true, name: true, slug: true } },
backup: { select: { id: true, archivePath: true, sizeBytes: true } },
},
}),
prisma.instanceRestore.count({ where }),
]);
return { data, total, page, limit };
}
/**
* Get a single restore by ID.
*/
export async function getRestore(restoreId: string) {
const restore = await prisma.instanceRestore.findUnique({
where: { id: restoreId },
include: {
instance: { select: { id: true, name: true, slug: true } },
backup: { select: { id: true, archivePath: true, sizeBytes: true, manifest: true } },
},
});
if (!restore) {
throw new AppError(404, 'Restore not found', 'NOT_FOUND');
}
return restore;
}

View File

@ -0,0 +1,599 @@
/**
* Remote tunnel management service.
*
* Orchestrates Pangolin site/resource/target creation on behalf of remote CML
* instances, then pushes Newt credentials to the remote host via the mTLS agent.
* The CCP holds the Pangolin API token centrally remote instances never touch
* the Pangolin API themselves.
*/
import { AuditAction, Prisma } from '@prisma/client';
import { prisma } from '../lib/prisma';
import { env } from '../config/env';
import { AppError } from '../middleware/error-handler';
import { logger } from '../utils/logger';
import { getRemoteDriverForInstance } from './execution-driver';
import {
CcpPangolinClient,
type PangolinDomain,
type PangolinResource,
} from './ccp-pangolin.client';
// ─── Resource definitions ──────────────────────────────────────────
interface ResourceDef {
subdomain: string;
name: string;
required?: boolean;
featureFlag?: string;
}
const RESOURCE_DEFINITIONS: ResourceDef[] = [
{ subdomain: 'app', name: 'Admin GUI', required: true },
{ subdomain: 'api', name: 'API', required: true },
{ subdomain: '', name: 'Public Site', required: true },
{ subdomain: 'media', name: 'Media API', featureFlag: 'enableMedia' },
{ subdomain: 'db', name: 'NocoDB', required: false },
{ subdomain: 'docs', name: 'Docs', required: false },
{ subdomain: 'code', name: 'Code Server', required: false },
{ subdomain: 'git', name: 'Gitea', required: false },
{ subdomain: 'home', name: 'Homepage', required: false },
{ subdomain: 'listmonk', name: 'Listmonk', featureFlag: 'enableListmonk' },
{ subdomain: 'qr', name: 'Mini QR', required: false },
{ subdomain: 'draw', name: 'Excalidraw', required: false },
{ subdomain: 'vault', name: 'Vaultwarden', required: false },
{ subdomain: 'mail', name: 'MailHog', required: false },
{ subdomain: 'chat', name: 'Rocket.Chat', featureFlag: 'enableChat' },
{ subdomain: 'events', name: 'Gancio', featureFlag: 'enableGancio' },
{ subdomain: 'meet', name: 'Jitsi Meet', featureFlag: 'enableMeet' },
{ subdomain: 'grafana', name: 'Grafana', featureFlag: 'enableMonitoring' },
];
// ─── Helpers ───────────────────────────────────────────────────────
function getPangolinClient(): CcpPangolinClient {
if (!env.PANGOLIN_API_URL || !env.PANGOLIN_API_KEY || !env.PANGOLIN_ORG_ID) {
throw new AppError(
501,
'Pangolin API not configured on this CCP. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in the CCP .env file.',
'PANGOLIN_NOT_CONFIGURED'
);
}
return new CcpPangolinClient(env.PANGOLIN_API_URL, env.PANGOLIN_API_KEY, env.PANGOLIN_ORG_ID);
}
function fullSubdomain(prefix: string, sub: string): string {
if (!sub) return prefix; // root domain → prefix alone (e.g., "ck")
return `${prefix}-${sub}`; // e.g., "ck-app", "ck-api"
}
function shouldCreateResource(
def: ResourceDef,
instance: Record<string, unknown>
): boolean {
if (def.required) return true;
if (def.featureFlag) return !!(instance as Record<string, unknown>)[def.featureFlag];
return true; // optional with no feature flag → always create
}
async function findDomainForInstance(
client: CcpPangolinClient,
instanceDomain: string
): Promise<PangolinDomain> {
const domains = await client.listDomains();
// Match the instance's domain against registered Pangolin base domains
// e.g., instance.domain = "cursedknowledge.org" → look for base domain "cursedknowledge.org"
// or broader: instance.domain = "app.example.com" → look for "example.com"
const exact = domains.find((d) => d.baseDomain === instanceDomain);
if (exact) return exact;
// Try matching parent domain (e.g., sub.example.com → example.com)
const parts = instanceDomain.split('.');
for (let i = 1; i < parts.length - 1; i++) {
const parent = parts.slice(i).join('.');
const match = domains.find((d) => d.baseDomain === parent);
if (match) return match;
}
throw new AppError(
400,
`No Pangolin domain matches instance domain "${instanceDomain}". Available: ${domains.map((d) => d.baseDomain).join(', ')}`,
'DOMAIN_NOT_FOUND'
);
}
// ─── Setup ─────────────────────────────────────────────────────────
export interface SetupTunnelOptions {
subdomainPrefix?: string;
}
export interface TunnelSetupResult {
siteId: string;
newtId: string;
endpoint: string;
resourceCount: number;
resources: Array<{ subdomain: string; name: string; resourceId: string }>;
}
export async function setupTunnel(
instanceId: string,
options: SetupTunnelOptions,
userId?: string,
ipAddress?: string | null
): Promise<TunnelSetupResult> {
const client = getPangolinClient();
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
if (!instance.isRemote) throw new AppError(400, 'Tunnel setup via Pangolin API is only for remote instances', 'NOT_REMOTE');
if (instance.pangolinSiteId) {
throw new AppError(400, 'Tunnel is already configured. Use sync to update resources, or teardown first.', 'ALREADY_CONFIGURED');
}
const prefix = options.subdomainPrefix || instance.slug;
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
// 1. Get Newt credentials
logger.info(`[tunnel] ${instance.slug}: picking site defaults`);
const defaults = await client.pickSiteDefaults();
// 2. Create site
logger.info(`[tunnel] ${instance.slug}: creating Pangolin site`);
const site = await client.createSite({
name: instance.slug,
type: 'newt',
newtId: defaults.newtId,
secret: defaults.newtSecret,
address: defaults.address,
});
const siteId = String(site.siteId);
const newtId = site.newt?.newtId || defaults.newtId;
const newtSecret = site.newt?.secret || defaults.newtSecret;
// The Pangolin endpoint (what Newt connects to) may be different from
// the API URL. E.g., API = api.bnkserve.org/v1, endpoint = pangolin.bnkserve.org.
// If PANGOLIN_ENDPOINT is set, use it. Otherwise derive from API URL.
let endpoint = env.PANGOLIN_ENDPOINT || '';
if (!endpoint) {
const endpointUrl = new URL(env.PANGOLIN_API_URL);
endpoint = `${endpointUrl.protocol}//${endpointUrl.hostname}${endpointUrl.port ? ':' + endpointUrl.port : ''}`;
}
// 3. Find matching domain
const domain = await findDomainForInstance(client, instance.domain);
logger.info(`[tunnel] ${instance.slug}: matched domain ${domain.baseDomain} (id: ${domain.domainId})`);
// 4. Create resources + targets
const createdResources: Array<{ subdomain: string; name: string; resourceId: string }> = [];
const existingResources = await client.listResources();
for (const def of RESOURCE_DEFINITIONS) {
if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) {
logger.debug(`[tunnel] ${instance.slug}: skipping ${def.name} (feature not enabled)`);
continue;
}
const sub = fullSubdomain(prefix, def.subdomain);
// Build the expected full domain so we can do an idempotent check against
// Pangolin's existing resources. Pangolin returns `fullDomain` not `subdomain`.
const expectedFullDomain = sub
? `${sub}.${domain.baseDomain}`
: domain.baseDomain;
// Idempotent: skip if a resource with this fullDomain already exists
const existing = existingResources.find(
(r) => r.fullDomain === expectedFullDomain
);
if (existing) {
logger.debug(`[tunnel] ${instance.slug}: resource ${def.name} (${expectedFullDomain}) already exists`);
createdResources.push({ subdomain: sub, name: def.name, resourceId: String(existing.resourceId) });
continue;
}
try {
const resourcePayload: Record<string, unknown> = {
name: def.name,
domainId: domain.domainId,
http: true,
protocol: 'tcp',
};
// Root domain: omit subdomain entirely (empty string is rejected by Pangolin)
if (sub) resourcePayload.subdomain = sub;
const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
// Make the resource public (no SSO, no access block)
try {
await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: failed to make ${def.name} public: ${(err as Error).message}`);
}
// Create target pointing to nginx:80 on the remote host
await client.createTarget(resource.resourceId, {
siteId: Number(siteId),
ip: 'nginx',
port: 80,
method: 'http',
enabled: true,
});
createdResources.push({ subdomain: sub, name: def.name, resourceId: resource.resourceId });
logger.info(`[tunnel] ${instance.slug}: created resource ${def.name}${sub}.${domain.baseDomain}`);
} catch (err) {
if (def.required) throw err;
logger.warn(`[tunnel] ${instance.slug}: failed to create optional resource ${def.name}: ${(err as Error).message}`);
}
}
// 5. Push Newt credentials to remote .env
logger.info(`[tunnel] ${instance.slug}: pushing Newt credentials to remote .env`);
const envLines = [
`PANGOLIN_ENDPOINT=${endpoint}`,
`PANGOLIN_SITE_ID=${siteId}`,
`PANGOLIN_NEWT_ID=${newtId}`,
`PANGOLIN_NEWT_SECRET=${newtSecret}`,
].join('\n') + '\n';
// Read current .env, append/replace Pangolin vars
const currentEnv = await driver.readEnvFile('');
const envContent = buildUpdatedEnv(currentEnv, {
PANGOLIN_ENDPOINT: endpoint,
PANGOLIN_SITE_ID: siteId,
PANGOLIN_NEWT_ID: newtId,
PANGOLIN_NEWT_SECRET: newtSecret,
});
await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
// 6. Persist on Instance row
await prisma.instance.update({
where: { id: instanceId },
data: {
pangolinEndpoint: endpoint,
pangolinSiteId: siteId,
pangolinNewtId: newtId,
pangolinNewtSecret: newtSecret,
pangolinSubdomainPrefix: prefix,
},
});
// 7. Recreate Newt container to pick up the new .env vars.
// `docker compose restart` does NOT re-read .env — it only sends SIGTERM+restart.
// `docker compose up -d newt` detects env var changes (via ${PANGOLIN_NEWT_ID}
// expansion in docker-compose.yml) and recreates the container automatically.
logger.info(`[tunnel] ${instance.slug}: recreating newt container with new credentials`);
try {
await driver.composeUp('', '', ['newt']);
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: composeUp(newt) failed: ${(err as Error).message}`);
}
// 8. Audit log
if (userId) {
await prisma.auditLog.create({
data: {
userId,
instanceId,
action: AuditAction.PANGOLIN_SETUP,
details: {
source: 'remote',
siteId,
newtId,
endpoint,
resourceCount: createdResources.length,
subdomainPrefix: prefix,
} as unknown as Prisma.InputJsonValue,
ipAddress: ipAddress ?? null,
},
});
}
logger.info(`[tunnel] ${instance.slug}: tunnel setup complete — ${createdResources.length} resources created`);
return {
siteId,
newtId,
endpoint,
resourceCount: createdResources.length,
resources: createdResources,
};
}
// ─── Sync ──────────────────────────────────────────────────────────
export async function syncResources(
instanceId: string,
userId?: string,
ipAddress?: string | null
) {
const client = getPangolinClient();
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
const prefix = instance.pangolinSubdomainPrefix || instance.slug;
const domain = await findDomainForInstance(client, instance.domain);
const existingResources = await client.listResources();
const siteId = instance.pangolinSiteId;
let created = 0;
for (const def of RESOURCE_DEFINITIONS) {
if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) continue;
const sub = fullSubdomain(prefix, def.subdomain);
const expectedFullDomain = sub ? `${sub}.${domain.baseDomain}` : domain.baseDomain;
const existing = existingResources.find((r) => r.fullDomain === expectedFullDomain);
if (existing) continue;
try {
const resourcePayload: Record<string, unknown> = {
name: def.name,
domainId: domain.domainId,
http: true,
protocol: 'tcp',
};
if (sub) resourcePayload.subdomain = sub;
const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
await client.createTarget(resource.resourceId, {
siteId: Number(siteId),
ip: 'nginx',
port: 80,
method: 'http',
enabled: true,
});
created++;
logger.info(`[tunnel] ${instance.slug}: sync created ${def.name} (${sub})`);
} catch (err) {
if (def.required) throw err;
logger.warn(`[tunnel] ${instance.slug}: sync failed for ${def.name}: ${(err as Error).message}`);
}
}
if (userId) {
await prisma.auditLog.create({
data: {
userId,
instanceId,
action: AuditAction.PANGOLIN_SYNC,
details: { source: 'remote', created, siteId } as unknown as Prisma.InputJsonValue,
ipAddress: ipAddress ?? null,
},
});
}
return { synced: true, created };
}
// ─── Teardown ──────────────────────────────────────────────────────
export async function teardownTunnel(
instanceId: string,
userId?: string,
ipAddress?: string | null
) {
const client = getPangolinClient();
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
const siteId = instance.pangolinSiteId;
// Delete site from Pangolin (cascades resources + targets)
try {
await client.deleteSite(siteId);
logger.info(`[tunnel] ${instance.slug}: deleted Pangolin site ${siteId}`);
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: deleteSite failed (may already be gone): ${(err as Error).message}`);
}
// Clear Instance fields
await prisma.instance.update({
where: { id: instanceId },
data: {
pangolinEndpoint: null,
pangolinSiteId: null,
pangolinNewtId: null,
pangolinNewtSecret: null,
},
});
// Push empty Pangolin vars to remote .env
if (instance.isRemote) {
try {
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
const currentEnv = await driver.readEnvFile('');
const envContent = buildUpdatedEnv(currentEnv, {
PANGOLIN_ENDPOINT: '',
PANGOLIN_SITE_ID: '',
PANGOLIN_NEWT_ID: '',
PANGOLIN_NEWT_SECRET: '',
});
await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
// Stop newt container (best effort)
try {
await driver.composeStop('', '');
await driver.composeUp('', ''); // restart everything except newt won't start without creds
} catch { /* ignore */ }
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: failed to push empty env to remote: ${(err as Error).message}`);
}
}
// Audit log
if (userId) {
await prisma.auditLog.create({
data: {
userId,
instanceId,
action: AuditAction.PANGOLIN_TEARDOWN,
details: { source: 'remote', siteId } as unknown as Prisma.InputJsonValue,
ipAddress: ipAddress ?? null,
},
});
}
return { tornDown: true };
}
// ─── Status ────────────────────────────────────────────────────────
export interface TunnelStatus {
configured: boolean;
online?: boolean;
siteId?: string;
endpoint?: string;
resources?: Array<{
subdomain: string;
name: string;
resourceId: string;
hasTarget: boolean;
targetIp?: string;
targetPort?: number;
}>;
}
export async function getTunnelStatus(instanceId: string): Promise<TunnelStatus> {
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
if (!instance.pangolinSiteId) {
return { configured: false };
}
// For local instances, return stored values without querying Pangolin API
if (!instance.isRemote) {
return {
configured: true,
siteId: instance.pangolinSiteId ?? undefined,
endpoint: instance.pangolinEndpoint ?? undefined,
};
}
const client = getPangolinClient();
let online = false;
try {
const site = await client.getSite(instance.pangolinSiteId);
online = site.online ?? false;
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: getSite failed: ${(err as Error).message}`);
}
const resources: TunnelStatus['resources'] = [];
try {
const allResources = await client.listResources();
const siteIdNum = Number(instance.pangolinSiteId);
// Filter to resources that have a target pointing to our siteId.
// This is the most reliable filter since it uses the actual Pangolin
// site association rather than guessing from subdomain names.
for (const res of allResources) {
let hasTarget = false;
let targetIp: string | undefined;
let targetPort: number | undefined;
let belongsToUs = false;
try {
const targets = await client.listTargets(String(res.resourceId));
for (const t of targets) {
if (Number(t.siteId) === siteIdNum) {
belongsToUs = true;
hasTarget = true;
targetIp = t.ip;
targetPort = t.port;
break;
}
}
} catch { /* ignore */ }
if (belongsToUs) {
// Extract subdomain from fullDomain for display
const fd = res.fullDomain || '';
const domainSuffix = `.${instance.domain}`;
const subdomain = fd.endsWith(domainSuffix)
? fd.slice(0, -domainSuffix.length)
: fd === instance.domain ? '' : fd;
resources.push({
subdomain,
name: res.name,
resourceId: String(res.resourceId),
hasTarget,
targetIp,
targetPort,
});
}
}
} catch (err) {
logger.warn(`[tunnel] ${instance.slug}: listResources failed: ${(err as Error).message}`);
}
return {
configured: true,
online,
siteId: instance.pangolinSiteId ?? undefined,
endpoint: instance.pangolinEndpoint ?? undefined,
resources,
};
}
// ─── .env Helpers ──────────────────────────────────────────────────
/**
* Quote a .env value if it contains characters that dotenv parsers interpret:
* # (comment), = (separator), spaces, quotes, backslashes, newlines.
* Pangolin-issued UUIDs/base64 secrets typically don't need quoting, but
* defensive quoting prevents silent corruption if they ever do.
*/
function quoteEnvValue(value: string): string {
if (/[\s#"'\\=\n\r]/.test(value)) {
return `"${value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
}
return value;
}
/**
* Build an updated .env string by replacing/appending the given key-value pairs.
* Preserves all existing keys not in the update set.
*/
function buildUpdatedEnv(
currentEnv: Record<string, string> | null,
updates: Record<string, string>
): string {
const lines: string[] = [];
const seen = new Set<string>();
// If we have the current env, reproduce it with replacements
if (currentEnv) {
for (const [key, value] of Object.entries(currentEnv)) {
if (key in updates) {
if (updates[key]) lines.push(`${key}=${quoteEnvValue(updates[key]!)}`);
// If update value is empty, omit the line (remove the var)
seen.add(key);
} else {
lines.push(`${key}=${quoteEnvValue(value)}`);
}
}
}
// Append new keys not already in the file
for (const [key, value] of Object.entries(updates)) {
if (!seen.has(key) && value) {
lines.push(`${key}=${quoteEnvValue(value)}`);
}
}
return lines.join('\n') + '\n';
}

View File

@ -2,14 +2,61 @@ import { exec as execCb } from 'child_process';
import { promisify } from 'util'; import { promisify } from 'util';
import fs from 'fs/promises'; import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import { UpgradeStatus, AuditAction, InstanceStatus, Prisma } from '@prisma/client'; import { UpgradeStatus, AuditAction, InstanceStatus, Prisma, Instance } from '@prisma/client';
import { prisma } from '../lib/prisma'; import { prisma } from '../lib/prisma';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
import { createEvent } from './event.service'; import { createEvent } from './event.service';
import { getRemoteDriverForInstance } from './execution-driver';
import type { AgentUpdateStatus } from './remote-driver';
/**
* Write an INSTANCE_UPGRADE audit log entry capturing a terminal outcome.
* Wrapped in try/catch so that an audit-log DB failure cannot mask the
* underlying upgrade row status update.
*
* Called from all three terminal paths (both local and remote):
* - 'completed' upgrade.sh/agent reported success
* - 'failed' upgrade.sh/agent reported failure
* - 'orchestration_error' CCP-side exception, timeout, or unreachable agent
*/
async function writeUpgradeAuditLog(args: {
upgradeId: string;
instanceId: string;
triggeredById: string | null;
source: 'local' | 'remote';
outcome: 'completed' | 'failed' | 'orchestration_error';
previousCommit: string | null;
newCommit: string | null;
durationSeconds: number | null;
errorMessage?: string | null;
}): Promise<void> {
if (!args.triggeredById) return;
try {
await prisma.auditLog.create({
data: {
userId: args.triggeredById,
instanceId: args.instanceId,
action: AuditAction.INSTANCE_UPGRADE,
details: {
upgradeId: args.upgradeId,
source: args.source,
outcome: args.outcome,
previousCommit: args.previousCommit,
newCommit: args.newCommit,
durationSeconds: args.durationSeconds,
...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
} as unknown as Prisma.InputJsonValue,
},
});
} catch (err) {
logger.error(`[upgrade] failed to write audit log for ${args.upgradeId}: ${(err as Error).message}`);
}
}
const exec = promisify(execCb); const exec = promisify(execCb);
const UPGRADE_TIMEOUT = 600_000; // 10 minutes const UPGRADE_TIMEOUT = 600_000; // 10 minutes — local upgrades
const REMOTE_UPGRADE_TIMEOUT = 15 * 60 * 1000; // 15 minutes — remote (network round trips)
const PROGRESS_POLL_INTERVAL = 2_000; // 2 seconds const PROGRESS_POLL_INTERVAL = 2_000; // 2 seconds
// ─── Update Check ───────────────────────────────────────────────── // ─── Update Check ─────────────────────────────────────────────────
@ -26,13 +73,57 @@ export interface UpdateStatus {
} }
/** /**
* Check for available updates by running upgrade-check.sh in the instance's basePath. * Check for available updates. Branches on instance.isRemote:
* Falls back to reading an existing status.json if the script isn't available. * - Local: runs upgrade-check.sh in the instance's basePath and reads status.json
* - Remote: calls the agent's POST /upgrade/check endpoint over mTLS
*/ */
export async function checkForUpdates(instanceId: string): Promise<UpdateStatus> { export async function checkForUpdates(instanceId: string): Promise<UpdateStatus> {
const instance = await prisma.instance.findUnique({ where: { id: instanceId } }); const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
if (!instance) throw new Error('Instance not found'); if (!instance) throw new Error('Instance not found');
if (instance.isRemote) {
return checkForUpdatesRemote(instance);
}
return checkForUpdatesLocal(instance);
}
/**
* Remote check: ask the agent to run upgrade-check.sh and return its status.json.
*/
async function checkForUpdatesRemote(instance: Instance): Promise<UpdateStatus> {
try {
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
const status: AgentUpdateStatus = await driver.checkForUpdates();
return {
branch: status.branch,
currentCommit: status.currentCommit,
currentMessage: status.currentMessage,
remoteCommit: status.remoteCommit,
commitsBehind: status.commitsBehind,
changelog: status.changelog,
checkedAt: status.checkedAt,
error: status.error,
};
} catch (err) {
logger.warn(`[upgrade] remote check failed for ${instance.slug}: ${(err as Error).message}`);
return {
branch: instance.gitBranch,
currentCommit: instance.gitCommit || 'unknown',
remoteCommit: null,
commitsBehind: 0,
changelog: [],
checkedAt: new Date().toISOString(),
error: `Remote check failed: ${(err as Error).message}`,
};
}
}
async function checkForUpdatesLocal(instance: Instance): Promise<UpdateStatus> {
const basePath = instance.basePath; const basePath = instance.basePath;
const statusFile = path.join(basePath, 'data', 'upgrade', 'status.json'); const statusFile = path.join(basePath, 'data', 'upgrade', 'status.json');
const scriptPath = path.join(basePath, 'scripts', 'upgrade-check.sh'); const scriptPath = path.join(basePath, 'scripts', 'upgrade-check.sh');
@ -119,8 +210,12 @@ export async function startUpgrade(
throw new Error('An upgrade is already in progress for this instance'); throw new Error('An upgrade is already in progress for this instance');
} }
// Get current commit for tracking // Get current commit for tracking. For local instances we can read it from
let currentCommit: string | null = null; // git directly; for remote instances we either trust the DB-tracked value
// (set by previous upgrade-check) or leave it null and let upgrade.sh
// report the previous commit in result.json.
let currentCommit: string | null = instance.gitCommit;
if (!instance.isRemote) {
try { try {
const { stdout } = await exec('git rev-parse --short HEAD', { const { stdout } = await exec('git rev-parse --short HEAD', {
cwd: instance.basePath, cwd: instance.basePath,
@ -130,6 +225,7 @@ export async function startUpgrade(
} catch { } catch {
// Non-critical — may be a release install without .git // Non-critical — may be a release install without .git
} }
}
const branch = options?.branch || instance.gitBranch; const branch = options?.branch || instance.gitBranch;
@ -154,20 +250,222 @@ export async function startUpgrade(
upgradeId: upgrade.id, upgradeId: upgrade.id,
previousCommit: currentCommit, previousCommit: currentCommit,
branch, branch,
source: instance.isRemote ? 'remote' : 'local',
options: options || {}, options: options || {},
} as unknown as Prisma.InputJsonValue, } as unknown as Prisma.InputJsonValue,
ipAddress, ipAddress,
}, },
}); });
// Fire-and-forget: run the upgrade asynchronously // Fire-and-forget: branch on isRemote
if (instance.isRemote) {
runRemoteUpgrade(upgrade.id, instance, options).catch((err) => {
logger.error(`[upgrade] Remote upgrade orchestration failed for ${instance.slug}: ${err}`);
});
} else {
runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => { runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`); logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
}); });
}
return upgrade; return upgrade;
} }
/**
* Async REMOTE upgrade runner.
*
* Flow:
* 1. Get RemoteDriver
* 2. Mark InstanceUpgrade IN_PROGRESS
* 3. Tell agent to start upgrade.sh in --api-mode
* 4. Poll agent /upgrade/progress every 2s, mirror to DB
* 5. Try /upgrade/result every poll cycle; when present, finalize
* 6. On timeout (15 min), mark FAILED and create error event
*
* Note: there is no shell or filesystem access on the CCP side everything
* goes through the mTLS agent. The agent's spawn of upgrade.sh is itself
* fire-and-forget under a slug mutex.
*/
async function runRemoteUpgrade(
upgradeId: string,
instance: Instance,
options?: StartUpgradeOptions
) {
const slug = instance.slug;
try {
const driver = await getRemoteDriverForInstance({
id: instance.id,
slug: instance.slug,
isRemote: instance.isRemote,
agentUrl: instance.agentUrl,
});
// Mark IN_PROGRESS
await prisma.instanceUpgrade.update({
where: { id: upgradeId },
data: {
status: UpgradeStatus.IN_PROGRESS,
progressMessage: 'Starting remote upgrade...',
},
});
// Tell the agent to start. The agent has its own mutex + stale-progress
// check, so this can return 409 if a previous upgrade is still running.
logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
await driver.startUpgrade({
skipBackup: options?.skipBackup,
useRegistry: options?.useRegistry,
branch: options?.branch,
});
// Poll progress + result. We treat /result returning 200 as the signal
// that upgrade.sh exited (successfully or with code != 0 — the script
// writes result.json either way in --api-mode).
const deadline = Date.now() + REMOTE_UPGRADE_TIMEOUT;
let lastProgress: { phase?: number; phaseName?: string; percentage?: number; message?: string } = {};
while (Date.now() < deadline) {
await new Promise((r) => setTimeout(r, PROGRESS_POLL_INTERVAL));
// Try to fetch the result first; if it exists, we're done
let result = null;
try {
result = await driver.getUpgradeResult();
} catch {
// No result yet — keep polling progress
}
if (result) {
// Final result available — write it and exit
const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
await prisma.instanceUpgrade.update({
where: { id: upgradeId },
data: {
status: result.success ? UpgradeStatus.COMPLETED : UpgradeStatus.FAILED,
newCommit: result.newCommit || null,
commitCount: result.commitCount || 0,
percentage: 100,
phaseName: 'Complete',
progressMessage: result.message || 'Upgrade completed',
durationSeconds: result.durationSeconds || null,
warnings: result.warnings?.length ? (result.warnings as unknown as Prisma.InputJsonValue) : undefined,
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
completedAt: new Date(),
},
});
// Update Instance.gitCommit if we have a new commit
if (result.newCommit) {
await prisma.instance.update({
where: { id: instance.id },
data: { gitCommit: result.newCommit },
});
}
if (!result.success) {
await createEvent(
instance.id,
'ERROR',
'upgrade',
'Remote upgrade failed',
result.message || 'The remote upgrade process failed. Check the agent log for details.',
{ upgradeId, source: 'remote', warnings: result.warnings }
);
}
await writeUpgradeAuditLog({
upgradeId,
instanceId: instance.id,
triggeredById: upgradeRowBefore?.triggeredById ?? null,
source: 'remote',
outcome: result.success ? 'completed' : 'failed',
previousCommit: upgradeRowBefore?.previousCommit ?? null,
newCommit: result.newCommit || null,
durationSeconds: result.durationSeconds || null,
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
});
logger.info(`[upgrade] ${slug}: remote upgrade ${result.success ? 'COMPLETED' : 'FAILED'}`);
return;
}
// No result yet — pull progress
try {
const progress = await driver.getUpgradeProgress();
// Only update DB if something actually changed (avoid hot-loop writes)
if (
progress.phase !== lastProgress.phase ||
progress.percentage !== lastProgress.percentage ||
progress.message !== lastProgress.message
) {
lastProgress = {
phase: progress.phase,
phaseName: progress.phaseName,
percentage: progress.percentage,
message: progress.message,
};
await prisma.instanceUpgrade.update({
where: { id: upgradeId },
data: {
currentPhase: progress.phase || 0,
phaseName: progress.phaseName || null,
percentage: progress.percentage || 0,
progressMessage: progress.message || null,
},
});
}
} catch (err) {
// Transient network blip during a long upgrade — keep polling
logger.debug(`[upgrade] ${slug}: progress poll error: ${(err as Error).message}`);
}
}
// Timeout — mark FAILED
throw new Error(`Remote upgrade timed out after ${Math.round(REMOTE_UPGRADE_TIMEOUT / 60_000)} minutes`);
} catch (err) {
const errorMsg = (err as Error).message;
const isTimeout = errorMsg.includes('timed out');
const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
await prisma.instanceUpgrade.update({
where: { id: upgradeId },
data: {
status: UpgradeStatus.FAILED,
errorMessage: isTimeout ? errorMsg : errorMsg.slice(0, 2000),
progressMessage: 'Failed',
completedAt: new Date(),
},
});
await createEvent(
instance.id,
'ERROR',
'upgrade',
isTimeout ? 'Remote upgrade timed out' : 'Remote upgrade failed',
errorMsg.slice(0, 500),
{ upgradeId, source: 'remote' }
);
await writeUpgradeAuditLog({
upgradeId,
instanceId: instance.id,
triggeredById: upgradeRowBefore?.triggeredById ?? null,
source: 'remote',
outcome: 'orchestration_error',
previousCommit: upgradeRowBefore?.previousCommit ?? null,
newCommit: null,
durationSeconds: null,
errorMessage: errorMsg,
});
// Don't flip the instance to ERROR state for remote upgrades — the agent
// health check will reflect the real state on the next poll, and we don't
// want to mask a recovered instance with stale CCP-side ERROR.
logger.error(`[upgrade] ${slug}: ${errorMsg}`);
}
}
/** /**
* Async upgrade runner. Runs upgrade.sh and polls progress. * Async upgrade runner. Runs upgrade.sh and polls progress.
*/ */
@ -271,19 +569,32 @@ async function runUpgrade(
}); });
} }
if (!result.success) { const upgradeRow = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
if (!result.success && upgradeRow) {
// Create error event // Create error event
const upgrade = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
if (upgrade) {
await createEvent( await createEvent(
upgrade.instanceId, upgradeRow.instanceId,
'ERROR', 'ERROR',
'upgrade', 'upgrade',
'Upgrade failed', 'Upgrade failed',
result.message || 'The upgrade process failed. Check logs for details.', result.message || 'The upgrade process failed. Check logs for details.',
{ upgradeId, previousCommit: upgrade.previousCommit, warnings: result.warnings } { upgradeId, previousCommit: upgradeRow.previousCommit, warnings: result.warnings }
); );
} }
if (upgradeRow) {
await writeUpgradeAuditLog({
upgradeId,
instanceId: upgradeRow.instanceId,
triggeredById: upgradeRow.triggeredById,
source: 'local',
outcome: result.success ? 'completed' : 'failed',
previousCommit: upgradeRow.previousCommit,
newCommit: result.newCommit || newCommit,
durationSeconds: result.durationSeconds || null,
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
});
} }
logger.info(`[upgrade] ${slug}: Upgrade ${result.success ? 'completed' : 'failed'}`); logger.info(`[upgrade] ${slug}: Upgrade ${result.success ? 'completed' : 'failed'}`);
@ -327,6 +638,18 @@ async function runUpgrade(
statusMessage: `Upgrade failed: ${isTimeout ? 'timeout' : errorMsg.slice(0, 200)}`, statusMessage: `Upgrade failed: ${isTimeout ? 'timeout' : errorMsg.slice(0, 200)}`,
}, },
}); });
await writeUpgradeAuditLog({
upgradeId,
instanceId: upgrade.instanceId,
triggeredById: upgrade.triggeredById,
source: 'local',
outcome: 'orchestration_error',
previousCommit: upgrade.previousCommit,
newCommit: null,
durationSeconds: result.durationSeconds || null,
errorMessage: errorMsg,
});
} }
logger.error(`[upgrade] ${slug}: Upgrade failed: ${errorMsg}`); logger.error(`[upgrade] ${slug}: Upgrade failed: ${errorMsg}`);

View File

@ -38,6 +38,11 @@ NI_MAPBOX_KEY=""
NI_MAXMIND_ACCOUNT_ID="" NI_MAXMIND_ACCOUNT_ID=""
NI_MAXMIND_LICENSE_KEY="" NI_MAXMIND_LICENSE_KEY=""
# CCP (Changemaker Control Panel) registration flags
NI_CCP_URL=""
NI_CCP_INVITE_CODE=""
NI_CCP_AGENT_URL=""
# --- Arg parser --- # --- Arg parser ---
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
@ -62,6 +67,10 @@ while [[ $# -gt 0 ]]; do
--mapbox-key) NI_MAPBOX_KEY="$2"; shift 2 ;; --mapbox-key) NI_MAPBOX_KEY="$2"; shift 2 ;;
--maxmind-account-id) NI_MAXMIND_ACCOUNT_ID="$2"; shift 2 ;; --maxmind-account-id) NI_MAXMIND_ACCOUNT_ID="$2"; shift 2 ;;
--maxmind-license-key) NI_MAXMIND_LICENSE_KEY="$2"; shift 2 ;; --maxmind-license-key) NI_MAXMIND_LICENSE_KEY="$2"; shift 2 ;;
# CCP (Changemaker Control Panel)
--ccp-url) NI_CCP_URL="$2"; shift 2 ;;
--ccp-invite-code) NI_CCP_INVITE_CODE="$2"; shift 2 ;;
--ccp-agent-url) NI_CCP_AGENT_URL="$2"; shift 2 ;;
--help|-h) --help|-h)
echo "Usage: bash config.sh [OPTIONS]" echo "Usage: bash config.sh [OPTIONS]"
echo "" echo ""
@ -91,6 +100,11 @@ while [[ $# -gt 0 ]]; do
echo " --maxmind-account-id ID MaxMind GeoIP account ID" echo " --maxmind-account-id ID MaxMind GeoIP account ID"
echo " --maxmind-license-key K MaxMind GeoIP license key" echo " --maxmind-license-key K MaxMind GeoIP license key"
echo "" echo ""
echo "CCP (Changemaker Control Panel) — all 3 flags required to register:"
echo " --ccp-url URL CCP server URL (e.g., https://ccp.example.com)"
echo " --ccp-invite-code CODE One-time invite code from CCP"
echo " --ccp-agent-url URL Agent URL the CCP reaches (e.g., https://this-host:7443)"
echo ""
echo "Example:" echo "Example:"
echo " bash config.sh --non-interactive --domain example.org --admin-password MyStr0ngPass123" echo " bash config.sh --non-interactive --domain example.org --admin-password MyStr0ngPass123"
echo " bash config.sh -y --domain example.org --admin-password MyStr0ngPass123 \\" echo " bash config.sh -y --domain example.org --admin-password MyStr0ngPass123 \\"
@ -798,6 +812,17 @@ configure_features() {
else else
warn "Set JVB_ADVERTISE_IP in .env before starting Jitsi containers." warn "Set JVB_ADVERTISE_IP in .env before starting Jitsi containers."
fi fi
else
# Non-interactive: auto-detect public IP for NAT traversal
local detected_ip
detected_ip=$(curl -sf --max-time 5 https://ifconfig.me 2>/dev/null || \
curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true)
if [[ -n "$detected_ip" ]]; then
update_env_var "JVB_ADVERTISE_IP" "$detected_ip"
success "JVB advertise IP auto-detected: $detected_ip"
else
warn "Could not auto-detect public IP. Set JVB_ADVERTISE_IP in .env before starting Jitsi."
fi
fi fi
else else
MEET_ENABLED="no" MEET_ENABLED="no"
@ -838,13 +863,6 @@ configure_features() {
update_env_var "ENABLE_PEOPLE" "false" update_env_var "ENABLE_PEOPLE" "false"
fi fi
if prompt_yes_no "Enable Analytics & GeoIP (visitor tracking, geo dashboard)?"; then
update_env_var "ENABLE_ANALYTICS" "true"
success "Analytics enabled"
else
update_env_var "ENABLE_ANALYTICS" "false"
fi
if prompt_yes_no "Enable Docs Comments & Version History (Gitea-backed)?"; then if prompt_yes_no "Enable Docs Comments & Version History (Gitea-backed)?"; then
update_env_var "GITEA_COMMENTS_ENABLED" "true" update_env_var "GITEA_COMMENTS_ENABLED" "true"
success "Docs Comments & Version History enabled" success "Docs Comments & Version History enabled"
@ -881,8 +899,14 @@ configure_features() {
fi fi
if prompt_yes_no "Enable Monitoring stack (Prometheus, Grafana, Alertmanager, cAdvisor)?" "y"; then if prompt_yes_no "Enable Monitoring stack (Prometheus, Grafana, Alertmanager, cAdvisor)?" "y"; then
local existing_profiles
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
if [[ -z "$existing_profiles" ]]; then
update_env_var "COMPOSE_PROFILES" "monitoring" update_env_var "COMPOSE_PROFILES" "monitoring"
success "Monitoring enabled (COMPOSE_PROFILES=monitoring)" elif [[ "$existing_profiles" != *"monitoring"* ]]; then
update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
fi
success "Monitoring enabled (COMPOSE_PROFILES includes monitoring)"
MONITORING_ENABLED="yes" MONITORING_ENABLED="yes"
else else
MONITORING_ENABLED="no" MONITORING_ENABLED="no"
@ -1401,6 +1425,35 @@ pangolin_connect_first_site() {
configure_control_panel() { configure_control_panel() {
header "Control Panel Registration" header "Control Panel Registration"
# Non-interactive: use --ccp-* flags if all three provided, otherwise skip
if [[ "$NON_INTERACTIVE" == "true" ]]; then
if [[ -n "$NI_CCP_URL" && -n "$NI_CCP_INVITE_CODE" && -n "$NI_CCP_AGENT_URL" ]]; then
update_env_var "ENABLE_CCP_AGENT" "true"
update_env_var "CCP_URL" "$NI_CCP_URL"
update_env_var "CCP_INVITE_CODE" "$NI_CCP_INVITE_CODE"
update_env_var "CCP_AGENT_URL" "$NI_CCP_AGENT_URL"
# Append ccp-agent to existing profiles (don't clobber monitoring)
local existing_profiles
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
if [[ -z "$existing_profiles" ]]; then
update_env_var "COMPOSE_PROFILES" "ccp-agent"
elif [[ "$existing_profiles" != *"ccp-agent"* ]]; then
update_env_var "COMPOSE_PROFILES" "${existing_profiles},ccp-agent"
fi
success "CCP registration configured ($NI_CCP_URL)"
else
update_env_var "ENABLE_CCP_AGENT" "false"
if [[ -n "$NI_CCP_URL" || -n "$NI_CCP_INVITE_CODE" || -n "$NI_CCP_AGENT_URL" ]]; then
warn "CCP registration needs all 3 flags: --ccp-url, --ccp-invite-code, --ccp-agent-url"
else
info "Skipping CCP registration (no --ccp-url provided)"
fi
fi
return
fi
if prompt_yes_no "Register this instance with a Changemaker Control Panel?"; then if prompt_yes_no "Register this instance with a Changemaker Control Panel?"; then
echo "" echo ""
read -rp " Enter Control Panel URL (e.g., https://ccp.example.com): " ccp_url read -rp " Enter Control Panel URL (e.g., https://ccp.example.com): " ccp_url
@ -2152,9 +2205,15 @@ main() {
header "Release Mode Settings" header "Release Mode Settings"
update_env_var "IMAGE_TAG" "latest" update_env_var "IMAGE_TAG" "latest"
update_env_var "NODE_ENV" "production" update_env_var "NODE_ENV" "production"
# Ensure monitoring is included if user opted in # Ensure monitoring is included if user opted in (preserve existing profiles)
if [[ "${MONITORING_ENABLED:-no}" == "yes" ]]; then if [[ "${MONITORING_ENABLED:-no}" == "yes" ]]; then
local existing_profiles
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
if [[ -z "$existing_profiles" ]]; then
update_env_var "COMPOSE_PROFILES" "monitoring" update_env_var "COMPOSE_PROFILES" "monitoring"
elif [[ "$existing_profiles" != *"monitoring"* ]]; then
update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
fi
fi fi
success "Set IMAGE_TAG=latest, NODE_ENV=production (pre-built images)" success "Set IMAGE_TAG=latest, NODE_ENV=production (pre-built images)"
fi fi

View File

@ -103,7 +103,8 @@ cp "$PROJECT_DIR/api/prisma/init-nocodb-db.sh" "$STAGE_DIR/scripts/"
cp "$PROJECT_DIR/api/prisma/init-gancio-db.sh" "$STAGE_DIR/scripts/" cp "$PROJECT_DIR/api/prisma/init-gancio-db.sh" "$STAGE_DIR/scripts/"
# Runtime scripts # Runtime scripts
for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh backup.sh \ for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh \
backup.sh restore.sh \
upgrade.sh upgrade-check.sh upgrade-watcher.sh \ upgrade.sh upgrade-check.sh upgrade-watcher.sh \
uninstall.sh test-deployment.sh; do uninstall.sh test-deployment.sh; do
if [[ -f "$PROJECT_DIR/scripts/$script" ]]; then if [[ -f "$PROJECT_DIR/scripts/$script" ]]; then

View File

@ -294,7 +294,7 @@ if [[ "$START_SERVICES" =~ ^[Yy]$ ]]; then
info " Database migrations and seeding run automatically on first boot." info " Database migrations and seeding run automatically on first boot."
echo "" echo ""
CORE_SERVICES=("v2-postgres" "redis" "api" "admin") CORE_SERVICES=("v2-postgres" "redis" "api" "admin" "nginx")
ELAPSED=0 ELAPSED=0
ALL_HEALTHY=false ALL_HEALTHY=false

View File

@ -359,9 +359,13 @@ trap on_failure EXIT
acquire_lock acquire_lock
load_env load_env
# Determine branch # Determine branch (source mode only — release installs have no git)
if [[ -z "$BRANCH" ]]; then if [[ -z "$BRANCH" ]]; then
if [[ "$INSTALL_MODE" == "release" ]]; then
BRANCH="release"
else
BRANCH="$(git rev-parse --abbrev-ref HEAD)" BRANCH="$(git rev-parse --abbrev-ref HEAD)"
fi
fi fi
# ============================================================================= # =============================================================================
@ -461,13 +465,15 @@ else
exit 1 exit 1
fi fi
# Remote reachable # Remote reachable (source mode only — release mode pulls from Gitea API later)
info "Checking git remote..." if [[ "$INSTALL_MODE" == "source" ]]; then
if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then info "Checking git remote..."
if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
success "Git remote reachable" success "Git remote reachable"
else else
error "Cannot reach git remote. Check your network or remote configuration." error "Cannot reach git remote. Check your network or remote configuration."
exit 1 exit 1
fi
fi fi
# Working directory checks # Working directory checks
@ -490,9 +496,16 @@ fi
success "Disk space: ${AVAILABLE_MB}MB available" success "Disk space: ${AVAILABLE_MB}MB available"
# Record pre-upgrade state # Record pre-upgrade state
PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)" if [[ "$INSTALL_MODE" == "source" ]]; then
PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)" PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))" PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
else
# Release mode: derive "commit" from VERSION file (format: <tag>\n<sha>)
PRE_UPGRADE_COMMIT="$(head -2 "$PROJECT_DIR/VERSION" 2>/dev/null | tail -1 || echo "release")"
PRE_UPGRADE_SHORT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
info "Current version: $PRE_UPGRADE_SHORT"
fi
info "Target branch: $BRANCH" info "Target branch: $BRANCH"
# Record running containers (for restoring monitoring profile later) # Record running containers (for restoring monitoring profile later)
@ -502,10 +515,12 @@ if docker ps --format '{{.Names}}' | grep -q 'prometheus-changemaker'; then
info "Monitoring stack detected (will restart after upgrade)" info "Monitoring stack detected (will restart after upgrade)"
fi fi
# Warn about uncommitted changes in project-owned paths # Source-mode-only checks: dirty files + upstream commit comparison
PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml" if [[ "$INSTALL_MODE" == "source" ]]; then
DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)" # Warn about uncommitted changes in project-owned paths
if [[ -n "$DIRTY_PROJECT_FILES" ]]; then PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
warn "Uncommitted changes in project-owned files:" warn "Uncommitted changes in project-owned files:"
echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo " $f"; done echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo " $f"; done
if [[ "$FORCE" != "true" ]]; then if [[ "$FORCE" != "true" ]]; then
@ -513,12 +528,12 @@ if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
exit 1 exit 1
fi fi
warn "Continuing with --force (changes will be stashed)" warn "Continuing with --force (changes will be stashed)"
fi fi
# Check for available updates # Check for available updates
LOCAL_HEAD="$(git rev-parse HEAD)" LOCAL_HEAD="$(git rev-parse HEAD)"
REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)" REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes." info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
if [[ "$FORCE" != "true" ]]; then if [[ "$FORCE" != "true" ]]; then
success "Nothing to upgrade." success "Nothing to upgrade."
@ -526,7 +541,10 @@ if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
exit 0 exit 0
fi fi
warn "Continuing with --force despite no upstream changes." warn "Continuing with --force despite no upstream changes."
fi
fi fi
# Release mode: the upstream-version comparison happens later in the
# release-mode block (line ~597) which queries the Gitea Releases API.
# ============================================================================= # =============================================================================
# Phase 2: Backup # Phase 2: Backup
@ -669,50 +687,53 @@ elif [[ "$DRY_RUN" == "true" ]]; then
exit 0 exit 0
fi fi
# Step 0: Save user-modifiable paths before any git operations # Source-mode git pull flow. Release mode handles its update via tarball
save_user_paths # download in the block above and skips this entire section.
if [[ "$INSTALL_MODE" == "source" ]]; then
# Step 0: Save user-modifiable paths before any git operations
save_user_paths
# Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files) # Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)" SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
if [[ -n "$SKIP_WORKTREE_FILES" ]]; then if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..." info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
success "Skip-worktree flags cleared" success "Skip-worktree flags cleared"
fi fi
# Step 0c: Fix Docker-owned directories that block git checkout # Step 0c: Fix Docker-owned directories that block git checkout
for owned_dir in api/upgrade api/uploads api/configs; do for owned_dir in api/upgrade api/uploads api/configs; do
if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
info "Fixing permissions on $owned_dir..." info "Fixing permissions on $owned_dir..."
docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
fi fi
done done
# Step 1: Stash user changes if any exist # Step 1: Stash user changes if any exist
HAS_CHANGES=false HAS_CHANGES=false
if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
HAS_CHANGES=true HAS_CHANGES=true
STASH_NAME="upgrade-${TIMESTAMP}" STASH_NAME="upgrade-${TIMESTAMP}"
info "Stashing local changes as '$STASH_NAME'..." info "Stashing local changes as '$STASH_NAME'..."
git stash push --include-untracked -m "$STASH_NAME" git stash push --include-untracked -m "$STASH_NAME"
success "Local changes stashed" success "Local changes stashed"
fi fi
# Step 3: Pull updates # Step 3: Pull updates
info "Pulling updates from origin/$BRANCH..." info "Pulling updates from origin/$BRANCH..."
if ! git pull origin "$BRANCH" --no-edit 2>&1; then if ! git pull origin "$BRANCH" --no-edit 2>&1; then
error "git pull failed. This may indicate upstream force-push or branch issues." error "git pull failed. This may indicate upstream force-push or branch issues."
if [[ "$HAS_CHANGES" == "true" ]]; then if [[ "$HAS_CHANGES" == "true" ]]; then
warn "Your stashed changes can be recovered with: git stash pop" warn "Your stashed changes can be recovered with: git stash pop"
fi fi
exit 1 exit 1
fi fi
POST_PULL_COMMIT="$(git rev-parse --short HEAD)" POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
success "Updated to $POST_PULL_COMMIT" success "Updated to $POST_PULL_COMMIT"
# Step 4: Pop stash and handle conflicts # Step 4: Pop stash and handle conflicts
if [[ "$HAS_CHANGES" == "true" ]]; then if [[ "$HAS_CHANGES" == "true" ]]; then
info "Restoring local changes..." info "Restoring local changes..."
if git stash pop 2>&1; then if git stash pop 2>&1; then
success "Local changes restored cleanly" success "Local changes restored cleanly"
@ -750,19 +771,21 @@ if [[ "$HAS_CHANGES" == "true" ]]; then
success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)" success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
fi fi
fi fi
fi fi
# Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies) # Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
restore_user_paths restore_user_paths
# Step 4c: Restore any tracked files accidentally deleted by restore_user_paths # Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
# (can happen when save_user_paths can't read root-owned files in user paths) # (can happen when save_user_paths can't read root-owned files in user paths)
DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)" DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
if [[ -n "$DELETED_TRACKED" ]]; then if [[ -n "$DELETED_TRACKED" ]]; then
info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..." info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
success "Tracked files restored from HEAD" success "Tracked files restored from HEAD"
fi
fi fi
# End of source-mode git pull flow
# Step 5: Detect new env vars # Step 5: Detect new env vars
info "Checking for new environment variables..." info "Checking for new environment variables..."
@ -791,24 +814,30 @@ if [[ -f "$PROJECT_DIR/.env.example" ]] && [[ -f "$PROJECT_DIR/.env" ]]; then
fi fi
fi fi
# Step 6: Print update summary # Step 6: Print update summary (source mode only — release mode has no commit range)
COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}" COMMIT_COUNT=0
COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs)" if [[ "$INSTALL_MODE" == "source" ]]; then
echo "" COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)" # Use || true and check pipefail-safe to survive git failures
git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20 COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs || echo 0)"
if [[ "$COMMIT_COUNT" -gt 20 ]]; then echo ""
info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20 || true
if [[ "$COMMIT_COUNT" -gt 20 ]]; then
info " ... and $((COMMIT_COUNT - 20)) more" info " ... and $((COMMIT_COUNT - 20)) more"
fi fi
# Flag commits that may require manual attention # Flag commits that may require manual attention
BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)" BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
if [[ -n "$BREAKING_COMMITS" ]]; then if [[ -n "$BREAKING_COMMITS" ]]; then
echo "" echo ""
warn "Commits requiring manual attention:" warn "Commits requiring manual attention:"
echo "$BREAKING_COMMITS" | while read -r line; do echo "$BREAKING_COMMITS" | while read -r line; do
echo -e " ${YELLOW}$line${NC}" echo -e " ${YELLOW}$line${NC}"
done done
fi
else
info "Update summary: ${PRE_UPGRADE_SHORT} → release"
fi fi
# ============================================================================= # =============================================================================
@ -1135,7 +1164,10 @@ verify_service_health() {
done done
warn "$name: not responding after ${max_wait}s" warn "$name: not responding after ${max_wait}s"
VERIFY_FAILED=true VERIFY_FAILED=true
return 1 # Always return 0 — under set -e a non-zero return from this helper would
# exit the script before write_result runs. The VERIFY_FAILED flag is the
# signal the caller actually checks.
return 0
} }
# API health (with polling — may still be running migrations) # API health (with polling — may still be running migrations)
@ -1194,7 +1226,11 @@ fi
# ============================================================================= # =============================================================================
ELAPSED="$(elapsed)" ELAPSED="$(elapsed)"
FINAL_COMMIT="$(git rev-parse --short HEAD)" if [[ "$INSTALL_MODE" == "source" ]]; then
FINAL_COMMIT="$(git rev-parse --short HEAD)"
else
FINAL_COMMIT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
fi
# Collect warnings for API mode result # Collect warnings for API mode result
UPGRADE_WARNINGS="[]" UPGRADE_WARNINGS="[]"
@ -1211,7 +1247,11 @@ echo -e "${BOLD}${GREEN} Upgrade Complete${NC}"
echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}" echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
echo "" echo ""
echo -e " ${BOLD}Previous:${NC} $PRE_UPGRADE_SHORT" echo -e " ${BOLD}Previous:${NC} $PRE_UPGRADE_SHORT"
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT ($(git log -1 --format='%s' HEAD))" if [[ "$INSTALL_MODE" == "source" ]]; then
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT ($(git log -1 --format='%s' HEAD 2>/dev/null || echo "$FINAL_COMMIT"))"
else
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT"
fi
echo -e " ${BOLD}Commits:${NC} $COMMIT_COUNT" echo -e " ${BOLD}Commits:${NC} $COMMIT_COUNT"
echo -e " ${BOLD}Duration:${NC} $ELAPSED" echo -e " ${BOLD}Duration:${NC} $ELAPSED"
echo -e " ${BOLD}Log:${NC} $LOG_FILE" echo -e " ${BOLD}Log:${NC} $LOG_FILE"