CCP restore/tunnel/upgrade + upgrade.sh release-mode fixes + volunteer dashboard polish
- Add instance restore model, routes, and agent backup/restore endpoints - Add Pangolin tunnel service (subdomain prefix, teardown action, CCP client) - Add slug mutex for concurrent operation safety in agent - Expand upgrade service with remote driver orchestration - Fix upgrade.sh to properly handle release-mode installs (no git operations) - Add CCP registration flags to config.sh (--ccp-url, --ccp-invite-code, --ccp-agent-url) - Auto-detect JVB advertise IP in non-interactive mode - Polish volunteer dashboard ActionStepsList with highlighted step component - Add ticketed event description field + volunteer dashboard query refinements Bunker Admin
This commit is contained in:
parent
29d1f3998a
commit
26ec925d9b
@ -10,6 +10,8 @@ import {
|
||||
LinkOutlined,
|
||||
CheckSquareOutlined,
|
||||
CheckCircleFilled,
|
||||
RightOutlined,
|
||||
ThunderboltOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { api } from '@/lib/api';
|
||||
@ -66,6 +68,97 @@ function resolveStepLink(step: DashboardActionStep): { to: string; external: boo
|
||||
}
|
||||
}
|
||||
|
||||
function HighlightedStep({
|
||||
step,
|
||||
onNavigate,
|
||||
onSelfReport,
|
||||
loading,
|
||||
}: {
|
||||
step: DashboardActionStep;
|
||||
onNavigate: (step: DashboardActionStep) => void;
|
||||
onSelfReport: (step: DashboardActionStep) => void;
|
||||
loading: boolean;
|
||||
}) {
|
||||
const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
|
||||
const canNavigate = resolveStepLink(step) !== null;
|
||||
|
||||
return (
|
||||
<div
|
||||
style={{
|
||||
background: 'linear-gradient(135deg, rgba(52,152,219,0.25) 0%, rgba(41,128,185,0.15) 100%)',
|
||||
border: '1px solid rgba(52,152,219,0.3)',
|
||||
borderRadius: 8,
|
||||
padding: '16px 20px',
|
||||
margin: '0 0 2px',
|
||||
}}
|
||||
>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 8 }}>
|
||||
<ThunderboltOutlined style={{ fontSize: 12, color: '#3498db' }} />
|
||||
<Typography.Text strong style={{ fontSize: 12, color: '#3498db', textTransform: 'uppercase', letterSpacing: 0.5 }}>
|
||||
Next Up
|
||||
</Typography.Text>
|
||||
</div>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 10, marginBottom: 8 }}>
|
||||
<div
|
||||
style={{
|
||||
width: 32,
|
||||
height: 32,
|
||||
borderRadius: '50%',
|
||||
background: 'rgba(52,152,219,0.25)',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
fontSize: 16,
|
||||
color: '#3498db',
|
||||
flexShrink: 0,
|
||||
}}
|
||||
>
|
||||
{KIND_ICONS[step.kind]}
|
||||
</div>
|
||||
<div style={{ flex: 1, minWidth: 0 }}>
|
||||
<Typography.Text strong style={{ fontSize: 15, display: 'block' }}>
|
||||
{step.label}
|
||||
</Typography.Text>
|
||||
{step.description && (
|
||||
<Typography.Text type="secondary" style={{ fontSize: 12, display: 'block', marginTop: 2 }}>
|
||||
{step.description}
|
||||
</Typography.Text>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div style={{ display: 'flex', gap: 8, marginTop: 4 }}>
|
||||
{isSelfReport ? (
|
||||
<>
|
||||
{canNavigate && (
|
||||
<Button size="middle" onClick={() => onNavigate(step)} icon={<RightOutlined />}>
|
||||
Open
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
type="primary"
|
||||
size="middle"
|
||||
loading={loading}
|
||||
onClick={() => onSelfReport(step)}
|
||||
>
|
||||
Mark as done
|
||||
</Button>
|
||||
</>
|
||||
) : (
|
||||
<Button
|
||||
type="primary"
|
||||
size="middle"
|
||||
icon={<RightOutlined />}
|
||||
onClick={() => onNavigate(step)}
|
||||
disabled={!canNavigate}
|
||||
>
|
||||
Take Action
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function ActionStepsList({ campaign, onRefresh }: ActionStepsListProps) {
|
||||
const navigate = useNavigate();
|
||||
const { message } = App.useApp();
|
||||
@ -95,6 +188,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
|
||||
};
|
||||
|
||||
const sortedSteps = [...campaign.steps].sort((a, b) => a.order - b.order);
|
||||
const highlightedStep = sortedSteps.find((s) => !s.completed);
|
||||
const remainingSteps = sortedSteps.filter((s) => s.id !== highlightedStep?.id);
|
||||
|
||||
return (
|
||||
<Card
|
||||
@ -108,7 +203,18 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
|
||||
</Typography.Text>
|
||||
}
|
||||
>
|
||||
{sortedSteps.map((step, i) => {
|
||||
{highlightedStep && (
|
||||
<div style={{ padding: '12px 12px 0' }}>
|
||||
<HighlightedStep
|
||||
step={highlightedStep}
|
||||
onNavigate={handleNavigate}
|
||||
onSelfReport={handleSelfReport}
|
||||
loading={completingStepId === highlightedStep.id}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{remainingSteps.map((step, i) => {
|
||||
const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
|
||||
const canNavigate = resolveStepLink(step) !== null;
|
||||
|
||||
@ -119,8 +225,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
padding: '12px 20px',
|
||||
borderTop: i > 0 ? '1px solid rgba(255,255,255,0.04)' : undefined,
|
||||
padding: '10px 20px',
|
||||
borderTop: (highlightedStep || i > 0) ? '1px solid rgba(255,255,255,0.04)' : undefined,
|
||||
opacity: step.completed ? 0.55 : 1,
|
||||
gap: 12,
|
||||
}}
|
||||
@ -128,22 +234,22 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 10, flex: 1, minWidth: 0 }}>
|
||||
<div
|
||||
style={{
|
||||
width: 26,
|
||||
height: 26,
|
||||
width: 24,
|
||||
height: 24,
|
||||
borderRadius: '50%',
|
||||
background: step.completed ? '#52c41a' : 'rgba(52,152,219,0.15)',
|
||||
background: step.completed ? '#52c41a' : 'rgba(255,255,255,0.06)',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
fontSize: 13,
|
||||
fontSize: 12,
|
||||
flexShrink: 0,
|
||||
color: step.completed ? '#fff' : 'rgba(255,255,255,0.7)',
|
||||
color: step.completed ? '#fff' : 'rgba(255,255,255,0.5)',
|
||||
}}
|
||||
>
|
||||
{step.completed ? <CheckCircleFilled /> : KIND_ICONS[step.kind]}
|
||||
</div>
|
||||
<div style={{ minWidth: 0 }}>
|
||||
<Typography.Text strong style={{ fontSize: 12, color: 'rgba(255,255,255,0.45)', display: 'block' }}>
|
||||
<Typography.Text strong style={{ fontSize: 11, color: 'rgba(255,255,255,0.35)', display: 'block', lineHeight: 1 }}>
|
||||
{KIND_LABELS[step.kind]}
|
||||
</Typography.Text>
|
||||
<Typography.Text
|
||||
@ -163,7 +269,7 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
|
||||
|
||||
<div style={{ flexShrink: 0 }}>
|
||||
{step.completed ? (
|
||||
<Tag color="success" style={{ margin: 0 }}>Done</Tag>
|
||||
<Tag color="success" style={{ margin: 0, fontSize: 11 }}>Done</Tag>
|
||||
) : isSelfReport ? (
|
||||
<Space size={4}>
|
||||
{canNavigate && (
|
||||
|
||||
@ -7,7 +7,7 @@ import {
|
||||
import {
|
||||
PlusOutlined, SearchOutlined, EditOutlined, EyeOutlined, DeleteOutlined,
|
||||
CheckCircleOutlined, CloseCircleOutlined, CopyOutlined, ScanOutlined,
|
||||
TagOutlined, VideoCameraOutlined, EnvironmentOutlined,
|
||||
TagOutlined, VideoCameraOutlined, EnvironmentOutlined, StarOutlined, StarFilled,
|
||||
} from '@ant-design/icons';
|
||||
import { api } from '@/lib/api';
|
||||
import dayjs from 'dayjs';
|
||||
@ -45,6 +45,7 @@ interface TicketedEvent {
|
||||
currentAttendees: number;
|
||||
coverImageUrl: string | null;
|
||||
organizerName: string | null;
|
||||
featured: boolean;
|
||||
ticketTiers: TicketTier[];
|
||||
_count: { tickets: number; checkIns: number };
|
||||
createdAt: string;
|
||||
@ -198,18 +199,55 @@ export default function TicketedEventsPage() {
|
||||
}
|
||||
};
|
||||
|
||||
const handleFeature = async (id: string, featured: boolean) => {
|
||||
try {
|
||||
if (featured) {
|
||||
// Unfeature all others first (exclusive toggle)
|
||||
const othersToUnfeature = events.filter((e) => e.featured && e.id !== id);
|
||||
await Promise.all(
|
||||
othersToUnfeature.map((e) => api.put(`/api/ticketed-events/admin/${e.id}`, { featured: false }))
|
||||
);
|
||||
}
|
||||
await api.put(`/api/ticketed-events/admin/${id}`, { featured });
|
||||
message.success(featured ? 'Event featured on volunteer dashboard' : 'Event unfeatured');
|
||||
fetchEvents();
|
||||
} catch {
|
||||
message.error('Failed to update featured status');
|
||||
}
|
||||
};
|
||||
|
||||
const copyLink = (slug: string) => {
|
||||
navigator.clipboard.writeText(`${window.location.origin}/event/${slug}`);
|
||||
message.success('Link copied');
|
||||
};
|
||||
|
||||
const columns = [
|
||||
{
|
||||
title: '',
|
||||
key: 'featured',
|
||||
width: 36,
|
||||
render: (_: unknown, record: TicketedEvent) => (
|
||||
<Tooltip title={record.featured ? 'Remove from volunteer dashboard' : 'Feature on volunteer dashboard'}>
|
||||
<Button
|
||||
type="text"
|
||||
size="small"
|
||||
icon={record.featured
|
||||
? <StarFilled style={{ color: '#faad14' }} />
|
||||
: <StarOutlined style={{ color: 'rgba(255,255,255,0.25)' }} />}
|
||||
onClick={(e) => { e.stopPropagation(); handleFeature(record.id, !record.featured); }}
|
||||
/>
|
||||
</Tooltip>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: 'Title',
|
||||
dataIndex: 'title',
|
||||
key: 'title',
|
||||
render: (text: string, record: TicketedEvent) => (
|
||||
<a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
|
||||
<Space>
|
||||
<a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
|
||||
{record.featured && <Tag color="gold" style={{ fontSize: 11 }}>Featured</Tag>}
|
||||
</Space>
|
||||
),
|
||||
},
|
||||
{
|
||||
|
||||
@ -50,6 +50,7 @@ export const updateEventSchema = z.object({
|
||||
maxAttendees: z.number().int().positive().nullable().optional(),
|
||||
organizerName: z.string().max(200).nullable().optional(),
|
||||
organizerEmail: z.string().email().nullable().optional(),
|
||||
featured: z.boolean().optional(),
|
||||
});
|
||||
|
||||
export const createTierSchema = z.object({
|
||||
|
||||
@ -114,24 +114,31 @@ async function getReferral(userId: string): Promise<DashboardReferral> {
|
||||
async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> {
|
||||
const today = new Date();
|
||||
today.setHours(0, 0, 0, 0);
|
||||
const event = await prisma.ticketedEvent.findFirst({
|
||||
where: {
|
||||
featured: true,
|
||||
status: TicketedEventStatus.PUBLISHED,
|
||||
date: { gte: today },
|
||||
},
|
||||
orderBy: { date: 'asc' },
|
||||
select: {
|
||||
slug: true,
|
||||
title: true,
|
||||
date: true,
|
||||
startTime: true,
|
||||
venueName: true,
|
||||
coverImageUrl: true,
|
||||
currentAttendees: true,
|
||||
maxAttendees: true,
|
||||
},
|
||||
});
|
||||
const eventSelect = {
|
||||
slug: true,
|
||||
title: true,
|
||||
date: true,
|
||||
startTime: true,
|
||||
venueName: true,
|
||||
coverImageUrl: true,
|
||||
currentAttendees: true,
|
||||
maxAttendees: true,
|
||||
} as const;
|
||||
const baseWhere = { status: TicketedEventStatus.PUBLISHED, date: { gte: today } };
|
||||
|
||||
// Prefer admin-featured event; fall back to next upcoming published event
|
||||
const event =
|
||||
await prisma.ticketedEvent.findFirst({
|
||||
where: { ...baseWhere, featured: true },
|
||||
orderBy: { date: 'asc' },
|
||||
select: eventSelect,
|
||||
}) ??
|
||||
await prisma.ticketedEvent.findFirst({
|
||||
where: baseWhere,
|
||||
orderBy: { date: 'asc' },
|
||||
select: eventSelect,
|
||||
});
|
||||
|
||||
if (!event) return null;
|
||||
return {
|
||||
slug: event.slug,
|
||||
|
||||
@ -14,7 +14,7 @@ export default function AgentRegistrationsPage() {
|
||||
const fetchRegistrations = useCallback(async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const { data } = await api.get('/api/agents/registrations');
|
||||
const { data } = await api.get('/agents/registrations');
|
||||
setRegistrations(data);
|
||||
} catch {
|
||||
message.error('Failed to load registrations');
|
||||
@ -27,7 +27,7 @@ export default function AgentRegistrationsPage() {
|
||||
|
||||
const handleApprove = async (id: string) => {
|
||||
try {
|
||||
await api.post(`/api/agents/registrations/${id}/approve`);
|
||||
await api.post(`/agents/registrations/${id}/approve`);
|
||||
message.success('Registration approved — agent will receive certificates on next poll');
|
||||
fetchRegistrations();
|
||||
setDetailModal(null);
|
||||
@ -39,7 +39,7 @@ export default function AgentRegistrationsPage() {
|
||||
|
||||
const handleReject = async (id: string) => {
|
||||
try {
|
||||
await api.post(`/api/agents/registrations/${id}/reject`);
|
||||
await api.post(`/agents/registrations/${id}/reject`);
|
||||
message.success('Registration rejected');
|
||||
fetchRegistrations();
|
||||
setDetailModal(null);
|
||||
|
||||
@ -203,8 +203,16 @@ export default function BackupsPage() {
|
||||
{
|
||||
title: 'Instance',
|
||||
dataIndex: 'instance',
|
||||
width: 160,
|
||||
render: (inst: BackupRow['instance']) => inst?.name || '-',
|
||||
width: 180,
|
||||
render: (inst: BackupRow['instance'], record: BackupRow) => {
|
||||
const isRemote = record.manifest?.source === 'remote';
|
||||
return (
|
||||
<Space size={4}>
|
||||
<span>{inst?.name || '-'}</span>
|
||||
{isRemote && <Tag color="blue">remote</Tag>}
|
||||
</Space>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: 'Status',
|
||||
|
||||
@ -44,6 +44,7 @@ import {
|
||||
WarningOutlined,
|
||||
CloseCircleOutlined,
|
||||
InfoCircleOutlined,
|
||||
UndoOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import dayjs from 'dayjs';
|
||||
import { useNavigate, useParams } from 'react-router-dom';
|
||||
@ -89,6 +90,16 @@ export default function InstanceDetailPage() {
|
||||
const [backupsLoading, setBackupsLoading] = useState(false);
|
||||
const [creatingBackup, setCreatingBackup] = useState(false);
|
||||
|
||||
// Restore state
|
||||
const [restoreModal, setRestoreModal] = useState<{ backup: Backup; typedSlug: string } | null>(null);
|
||||
const [restoring, setRestoring] = useState(false);
|
||||
const [activeRestoreId, setActiveRestoreId] = useState<string | null>(null);
|
||||
const [activeRestoreState, setActiveRestoreState] = useState<{
|
||||
status: string;
|
||||
logTail?: string | null;
|
||||
errorMessage?: string | null;
|
||||
} | null>(null);
|
||||
|
||||
// Feature reconfiguration state
|
||||
const [featureFlags, setFeatureFlags] = useState<Record<string, boolean>>({});
|
||||
const [reconfiguring, setReconfiguring] = useState(false);
|
||||
@ -109,6 +120,18 @@ export default function InstanceDetailPage() {
|
||||
const [tunnelSaving, setTunnelSaving] = useState(false);
|
||||
const [tunnelRemoving, setTunnelRemoving] = useState(false);
|
||||
|
||||
// Remote tunnel state (Pangolin API managed by CCP)
|
||||
const [tunnelStatus, setTunnelStatus] = useState<{
|
||||
configured: boolean;
|
||||
online?: boolean;
|
||||
siteId?: string;
|
||||
endpoint?: string;
|
||||
resources?: Array<{ subdomain: string; name: string; resourceId: string; hasTarget: boolean; targetIp?: string; targetPort?: number }>;
|
||||
} | null>(null);
|
||||
const [tunnelStatusLoading, setTunnelStatusLoading] = useState(false);
|
||||
const [tunnelSetupRunning, setTunnelSetupRunning] = useState(false);
|
||||
const [tunnelSyncing, setTunnelSyncing] = useState(false);
|
||||
|
||||
// Upgrade state
|
||||
const [updateStatus, setUpdateStatus] = useState<UpdateStatus | null>(null);
|
||||
const [checkingUpdate, setCheckingUpdate] = useState(false);
|
||||
@ -390,6 +413,64 @@ export default function InstanceDetailPage() {
|
||||
window.open(`/api/backups/${backupId}/download`, '_blank');
|
||||
};
|
||||
|
||||
const handleRestoreConfirm = async () => {
|
||||
if (!restoreModal) return;
|
||||
if (restoreModal.typedSlug !== instance?.slug) {
|
||||
message.error('Typed slug does not match — restore cancelled');
|
||||
return;
|
||||
}
|
||||
setRestoring(true);
|
||||
try {
|
||||
const { data } = await api.post(`/instances/${id}/restore`, {
|
||||
backupId: restoreModal.backup.id,
|
||||
});
|
||||
const restoreId = data.data.id as string;
|
||||
setActiveRestoreId(restoreId);
|
||||
setActiveRestoreState({ status: 'PENDING' });
|
||||
setRestoreModal(null);
|
||||
message.success('Restore started — polling for progress');
|
||||
} catch (err: unknown) {
|
||||
const e = err as { response?: { data?: { error?: { message?: string } } } };
|
||||
message.error(e?.response?.data?.error?.message || 'Failed to start restore');
|
||||
} finally {
|
||||
setRestoring(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Poll the active restore's status every 3s until it completes or fails
|
||||
useEffect(() => {
|
||||
if (!activeRestoreId) return;
|
||||
let cancelled = false;
|
||||
const poll = async () => {
|
||||
try {
|
||||
const { data } = await api.get(`/instances/${id}/restores/${activeRestoreId}`);
|
||||
if (cancelled) return;
|
||||
const row = data.data;
|
||||
setActiveRestoreState({
|
||||
status: row.status,
|
||||
logTail: row.logTail,
|
||||
errorMessage: row.errorMessage,
|
||||
});
|
||||
if (row.status === 'COMPLETED') {
|
||||
message.success('Restore completed successfully');
|
||||
setActiveRestoreId(null);
|
||||
fetchBackups();
|
||||
} else if (row.status === 'FAILED') {
|
||||
message.error(`Restore failed: ${row.errorMessage || 'unknown error'}`);
|
||||
setActiveRestoreId(null);
|
||||
}
|
||||
} catch {
|
||||
// keep trying; transient errors are expected during remote restart
|
||||
}
|
||||
};
|
||||
poll();
|
||||
const handle = setInterval(poll, 3000);
|
||||
return () => {
|
||||
cancelled = true;
|
||||
clearInterval(handle);
|
||||
};
|
||||
}, [activeRestoreId, id, fetchBackups]);
|
||||
|
||||
// Initialize feature flags and tunnel form when instance loads
|
||||
useEffect(() => {
|
||||
if (instance) {
|
||||
@ -508,6 +589,11 @@ export default function InstanceDetailPage() {
|
||||
const ports = instance.portConfig as Record<string, number>;
|
||||
const isProvisioning = instance.status === 'PROVISIONING';
|
||||
const isRegistered = instance.isRegistered;
|
||||
const isRemote = instance.isRemote;
|
||||
// A "managed" instance is one CCP can run backup/restore/upgrade on.
|
||||
// Local CCP-managed and remote (agent-backed) both qualify; only locally-
|
||||
// adopted registered instances (isRegistered && !isRemote) are unmanaged.
|
||||
const isManaged = !isRegistered || isRemote;
|
||||
const canStart = instance.status === 'STOPPED' || instance.status === 'ERROR';
|
||||
const canStop = instance.status === 'RUNNING' || instance.status === 'ERROR';
|
||||
const canRestart = instance.status === 'RUNNING';
|
||||
@ -731,7 +817,7 @@ export default function InstanceDetailPage() {
|
||||
|
||||
const backupsTab = (
|
||||
<div>
|
||||
{isRegistered && (
|
||||
{!isManaged && (
|
||||
<Alert
|
||||
message="Backups not managed by CCP"
|
||||
description="This instance was deployed outside the control panel. Use its own backup tools to manage backups."
|
||||
@ -740,6 +826,15 @@ export default function InstanceDetailPage() {
|
||||
style={{ marginBottom: 16 }}
|
||||
/>
|
||||
)}
|
||||
{isRemote && (
|
||||
<Alert
|
||||
message="Remote instance"
|
||||
description="Backups and restores run via the remote agent over mTLS. Create Backup triggers scripts/backup.sh on the remote host and streams the archive back to the control panel."
|
||||
type="info"
|
||||
showIcon
|
||||
style={{ marginBottom: 16 }}
|
||||
/>
|
||||
)}
|
||||
<div style={{ marginBottom: 12, display: 'flex', justifyContent: 'space-between' }}>
|
||||
<Typography.Text type="secondary">
|
||||
{backups.length} backup{backups.length !== 1 ? 's' : ''}
|
||||
@ -749,7 +844,7 @@ export default function InstanceDetailPage() {
|
||||
type="primary"
|
||||
onClick={handleCreateBackup}
|
||||
loading={creatingBackup}
|
||||
disabled={instance.status !== 'RUNNING' || isRegistered}
|
||||
disabled={instance.status !== 'RUNNING' || !isManaged}
|
||||
>
|
||||
Create Backup
|
||||
</Button>
|
||||
@ -784,20 +879,36 @@ export default function InstanceDetailPage() {
|
||||
{
|
||||
title: 'Size',
|
||||
dataIndex: 'sizeBytes',
|
||||
render: (b: number | null) => (b ? `${(b / 1024 / 1024).toFixed(1)} MB` : '-'),
|
||||
render: (b: number | string | null) => {
|
||||
if (b == null) return '-';
|
||||
const n = typeof b === 'string' ? parseInt(b, 10) : b;
|
||||
return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: 'Actions',
|
||||
width: 120,
|
||||
width: 160,
|
||||
render: (_: unknown, record: Backup) => (
|
||||
<Space size="small">
|
||||
{record.status === 'COMPLETED' && (
|
||||
<Button
|
||||
icon={<CloudDownloadOutlined />}
|
||||
size="small"
|
||||
type="text"
|
||||
onClick={() => handleDownloadBackup(record.id)}
|
||||
/>
|
||||
<>
|
||||
<Button
|
||||
icon={<CloudDownloadOutlined />}
|
||||
size="small"
|
||||
type="text"
|
||||
title="Download archive"
|
||||
onClick={() => handleDownloadBackup(record.id)}
|
||||
/>
|
||||
{isManaged && (
|
||||
<Button
|
||||
icon={<UndoOutlined />}
|
||||
size="small"
|
||||
type="text"
|
||||
title="Restore this backup (destructive)"
|
||||
onClick={() => setRestoreModal({ backup: record, typedSlug: '' })}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
<Popconfirm
|
||||
title="Delete this backup?"
|
||||
@ -1049,7 +1160,73 @@ export default function InstanceDetailPage() {
|
||||
);
|
||||
|
||||
const tunnelConfigured = !!(instance.pangolinEndpoint && instance.pangolinNewtId);
|
||||
const canConfigureTunnel = !isRegistered && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
|
||||
const canConfigureTunnel = isManaged && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
|
||||
|
||||
// Fetch tunnel status for remote instances
|
||||
const fetchTunnelStatus = useCallback(async () => {
|
||||
if (!isRemote) return;
|
||||
setTunnelStatusLoading(true);
|
||||
try {
|
||||
const { data } = await api.get(`/instances/${id}/tunnel/status`);
|
||||
setTunnelStatus(data.data);
|
||||
} catch {
|
||||
setTunnelStatus(null);
|
||||
} finally {
|
||||
setTunnelStatusLoading(false);
|
||||
}
|
||||
}, [id, isRemote]);
|
||||
|
||||
useEffect(() => {
|
||||
if (activeTab === 'tunnel' && isRemote) {
|
||||
fetchTunnelStatus();
|
||||
}
|
||||
}, [activeTab, isRemote, fetchTunnelStatus]);
|
||||
|
||||
const handleRemoteTunnelSetup = async (values: { subdomainPrefix?: string }) => {
|
||||
setTunnelSetupRunning(true);
|
||||
try {
|
||||
await api.post(`/instances/${id}/tunnel/setup`, {
|
||||
subdomainPrefix: values.subdomainPrefix || instance.slug,
|
||||
});
|
||||
message.success('Tunnel setup complete — Newt credentials pushed to remote instance');
|
||||
fetchInstance();
|
||||
fetchTunnelStatus();
|
||||
} catch (err: unknown) {
|
||||
const e = err as { response?: { data?: { error?: { message?: string } } } };
|
||||
message.error(e?.response?.data?.error?.message || 'Tunnel setup failed');
|
||||
} finally {
|
||||
setTunnelSetupRunning(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleTunnelSync = async () => {
|
||||
setTunnelSyncing(true);
|
||||
try {
|
||||
const { data } = await api.post(`/instances/${id}/tunnel/sync`);
|
||||
message.success(`Sync complete — ${data.data.created} new resource(s) created`);
|
||||
fetchTunnelStatus();
|
||||
} catch (err: unknown) {
|
||||
const e = err as { response?: { data?: { error?: { message?: string } } } };
|
||||
message.error(e?.response?.data?.error?.message || 'Sync failed');
|
||||
} finally {
|
||||
setTunnelSyncing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRemoteTunnelTeardown = async () => {
|
||||
setTunnelRemoving(true);
|
||||
try {
|
||||
await api.delete(`/instances/${id}/tunnel`);
|
||||
message.success('Tunnel torn down — Pangolin site deleted');
|
||||
fetchInstance();
|
||||
setTunnelStatus(null);
|
||||
} catch (err: unknown) {
|
||||
const e = err as { response?: { data?: { error?: { message?: string } } } };
|
||||
message.error(e?.response?.data?.error?.message || 'Teardown failed');
|
||||
} finally {
|
||||
setTunnelRemoving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleConfigureTunnel = async (values: { pangolinEndpoint: string; pangolinNewtId: string; pangolinNewtSecret?: string }) => {
|
||||
setTunnelSaving(true);
|
||||
@ -1088,9 +1265,111 @@ export default function InstanceDetailPage() {
|
||||
}
|
||||
};
|
||||
|
||||
const tunnelTab = (
|
||||
const remoteTunnelTab = (
|
||||
<Space direction="vertical" size="large" style={{ width: '100%' }}>
|
||||
{isRegistered && (
|
||||
{tunnelStatus?.configured ? (
|
||||
<>
|
||||
<Alert
|
||||
message={`Tunnel active — ${tunnelStatus.online ? 'online' : 'offline'}`}
|
||||
description={`Connected to ${tunnelStatus.endpoint || instance.pangolinEndpoint} (site: ${tunnelStatus.siteId})`}
|
||||
type={tunnelStatus.online ? 'success' : 'warning'}
|
||||
showIcon
|
||||
icon={<CloudOutlined />}
|
||||
/>
|
||||
|
||||
<Card title="Current Configuration" size="small">
|
||||
<Descriptions bordered column={1}>
|
||||
<Descriptions.Item label="Endpoint">
|
||||
<Typography.Text copyable>{tunnelStatus.endpoint || instance.pangolinEndpoint}</Typography.Text>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="Site ID">
|
||||
<Typography.Text copyable>{tunnelStatus.siteId || instance.pangolinSiteId}</Typography.Text>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="Newt ID">
|
||||
<Typography.Text copyable>{instance.pangolinNewtId}</Typography.Text>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="Status">
|
||||
<Tag color={tunnelStatus.online ? 'green' : 'orange'}>{tunnelStatus.online ? 'Online' : 'Offline'}</Tag>
|
||||
</Descriptions.Item>
|
||||
</Descriptions>
|
||||
</Card>
|
||||
|
||||
{tunnelStatus.resources && tunnelStatus.resources.length > 0 && (
|
||||
<Card
|
||||
title="Resources"
|
||||
size="small"
|
||||
extra={
|
||||
<Space>
|
||||
<Button icon={<SyncOutlined />} size="small" onClick={handleTunnelSync} loading={tunnelSyncing}>
|
||||
Sync
|
||||
</Button>
|
||||
<Button icon={<ReloadOutlined />} size="small" onClick={fetchTunnelStatus} loading={tunnelStatusLoading}>
|
||||
Refresh
|
||||
</Button>
|
||||
</Space>
|
||||
}
|
||||
>
|
||||
<Table
|
||||
dataSource={tunnelStatus.resources}
|
||||
rowKey="resourceId"
|
||||
size="small"
|
||||
pagination={false}
|
||||
columns={[
|
||||
{ title: 'Subdomain', dataIndex: 'subdomain', render: (s: string) => s || '(root)' },
|
||||
{ title: 'Name', dataIndex: 'name' },
|
||||
{ title: 'Target', render: (_: unknown, r: { hasTarget: boolean; targetIp?: string; targetPort?: number }) =>
|
||||
r.hasTarget ? `${r.targetIp}:${r.targetPort}` : <Tag color="red">No target</Tag>
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
<Popconfirm
|
||||
title="Tear down tunnel?"
|
||||
description="This will delete the Pangolin site and all resources. The Newt container will be stopped."
|
||||
onConfirm={handleRemoteTunnelTeardown}
|
||||
>
|
||||
<Button danger icon={<DisconnectOutlined />} loading={tunnelRemoving}>
|
||||
Teardown Tunnel
|
||||
</Button>
|
||||
</Popconfirm>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Alert
|
||||
message="No tunnel configured"
|
||||
description="The CCP will create a Pangolin site and resources for this instance, push Newt credentials to its .env, and start the tunnel container."
|
||||
type="info"
|
||||
showIcon
|
||||
/>
|
||||
|
||||
<Card title="Setup Tunnel" size="small">
|
||||
<Form layout="vertical" onFinish={handleRemoteTunnelSetup}>
|
||||
<Form.Item
|
||||
name="subdomainPrefix"
|
||||
label="Subdomain Prefix"
|
||||
initialValue={instance.slug}
|
||||
extra={`Resources will be created as <prefix>-app.${instance.domain}, <prefix>-api.${instance.domain}, etc.`}
|
||||
rules={[{ required: true }, { pattern: /^[a-z0-9-]+$/, message: 'Lowercase alphanumeric + hyphens only' }]}
|
||||
>
|
||||
<Input placeholder={instance.slug} />
|
||||
</Form.Item>
|
||||
<Form.Item style={{ marginBottom: 0 }}>
|
||||
<Button type="primary" htmlType="submit" icon={<CloudOutlined />} loading={tunnelSetupRunning}>
|
||||
Setup Tunnel
|
||||
</Button>
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
</Space>
|
||||
);
|
||||
|
||||
const localTunnelTab = (
|
||||
<Space direction="vertical" size="large" style={{ width: '100%' }}>
|
||||
{!isManaged && (
|
||||
<Alert
|
||||
message="Tunnel management is not available for external instances"
|
||||
description="This instance was deployed outside the control panel. Manage its tunnel configuration directly."
|
||||
@ -1099,7 +1378,7 @@ export default function InstanceDetailPage() {
|
||||
/>
|
||||
)}
|
||||
|
||||
{!isRegistered && tunnelConfigured && (
|
||||
{isManaged && tunnelConfigured && (
|
||||
<Alert
|
||||
message={`Tunnel active — connected to ${instance.pangolinEndpoint}`}
|
||||
type="success"
|
||||
@ -1108,7 +1387,7 @@ export default function InstanceDetailPage() {
|
||||
/>
|
||||
)}
|
||||
|
||||
{!isRegistered && !tunnelConfigured && (
|
||||
{isManaged && !tunnelConfigured && (
|
||||
<Alert
|
||||
message="No tunnel configured"
|
||||
description="Enter your Pangolin Newt credentials below to enable tunnel access for this instance. You can get these from your Pangolin dashboard."
|
||||
@ -1133,7 +1412,7 @@ export default function InstanceDetailPage() {
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{canConfigureTunnel && (
|
||||
{canConfigureTunnel && !isRemote && (
|
||||
<Card title={tunnelConfigured ? 'Update Tunnel' : 'Enable Tunnel'} size="small">
|
||||
<Form
|
||||
form={tunnelForm}
|
||||
@ -1200,6 +1479,8 @@ export default function InstanceDetailPage() {
|
||||
</Space>
|
||||
);
|
||||
|
||||
const tunnelTab = isRemote ? remoteTunnelTab : localTunnelTab;
|
||||
|
||||
// ─── Updates Tab ──────────────────────────────────────────────
|
||||
|
||||
const isUpgrading = currentUpgrade?.status === 'IN_PROGRESS' || currentUpgrade?.status === 'PENDING';
|
||||
@ -1278,7 +1559,7 @@ export default function InstanceDetailPage() {
|
||||
)}
|
||||
|
||||
{/* Upgrade Action */}
|
||||
{!isRegistered && (
|
||||
{isManaged && (
|
||||
<Card title="Upgrade" size="small">
|
||||
{isUpgrading && currentUpgrade ? (
|
||||
<Space direction="vertical" style={{ width: '100%' }}>
|
||||
@ -1340,7 +1621,7 @@ export default function InstanceDetailPage() {
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{isRegistered && (
|
||||
{!isManaged && (
|
||||
<Alert
|
||||
message="Upgrades are not managed by CCP for external instances"
|
||||
description="Run the upgrade script directly on the instance or use its own upgrade mechanism."
|
||||
@ -1348,6 +1629,14 @@ export default function InstanceDetailPage() {
|
||||
showIcon
|
||||
/>
|
||||
)}
|
||||
{isRemote && (
|
||||
<Alert
|
||||
message="Remote instance"
|
||||
description="Upgrades run via the remote agent over mTLS. The agent shells out to scripts/upgrade.sh --api-mode and the control panel polls progress every 2s."
|
||||
type="info"
|
||||
showIcon
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Upgrade History */}
|
||||
<Card title="Upgrade History" size="small">
|
||||
@ -1794,6 +2083,108 @@ export default function InstanceDetailPage() {
|
||||
{ key: 'tunnel', label: 'Tunnel', children: tunnelTab },
|
||||
]}
|
||||
/>
|
||||
|
||||
{/* Restore confirmation modal (destructive action guard) */}
|
||||
<Modal
|
||||
title="Restore backup — destructive"
|
||||
open={!!restoreModal}
|
||||
onCancel={() => setRestoreModal(null)}
|
||||
onOk={handleRestoreConfirm}
|
||||
okText="Restore"
|
||||
okButtonProps={{
|
||||
danger: true,
|
||||
loading: restoring,
|
||||
disabled: restoreModal?.typedSlug !== instance.slug,
|
||||
}}
|
||||
cancelButtonProps={{ disabled: restoring }}
|
||||
width={560}
|
||||
>
|
||||
<Alert
|
||||
type="error"
|
||||
showIcon
|
||||
message="This will OVERWRITE the instance's databases and uploads"
|
||||
description="The agent will stop application containers, drop databases, and restore from the selected backup. This cannot be undone without another backup."
|
||||
style={{ marginBottom: 16 }}
|
||||
/>
|
||||
{restoreModal && (
|
||||
<Descriptions column={1} size="small" bordered style={{ marginBottom: 16 }}>
|
||||
<Descriptions.Item label="Backup ID">
|
||||
<code>{restoreModal.backup.id.substring(0, 8)}</code>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="Archive size">
|
||||
{restoreModal.backup.sizeBytes
|
||||
? `${(Number(restoreModal.backup.sizeBytes) / 1024 / 1024).toFixed(1)} MB`
|
||||
: '-'}
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="Created">
|
||||
{restoreModal.backup.completedAt
|
||||
? dayjs(restoreModal.backup.completedAt).format('YYYY-MM-DD HH:mm')
|
||||
: '-'}
|
||||
</Descriptions.Item>
|
||||
</Descriptions>
|
||||
)}
|
||||
<Typography.Paragraph>
|
||||
Type the instance slug <strong><code>{instance.slug}</code></strong> to confirm:
|
||||
</Typography.Paragraph>
|
||||
<Input
|
||||
value={restoreModal?.typedSlug || ''}
|
||||
onChange={(e) =>
|
||||
setRestoreModal((cur) => (cur ? { ...cur, typedSlug: e.target.value } : cur))
|
||||
}
|
||||
placeholder={instance.slug}
|
||||
autoFocus
|
||||
/>
|
||||
</Modal>
|
||||
|
||||
{/* Active restore progress banner */}
|
||||
{activeRestoreId && activeRestoreState && (
|
||||
<Modal
|
||||
title="Restore in progress"
|
||||
open
|
||||
closable={false}
|
||||
footer={null}
|
||||
width={640}
|
||||
>
|
||||
<Space direction="vertical" style={{ width: '100%' }}>
|
||||
<div>
|
||||
<Tag
|
||||
color={
|
||||
activeRestoreState.status === 'COMPLETED'
|
||||
? 'green'
|
||||
: activeRestoreState.status === 'FAILED'
|
||||
? 'red'
|
||||
: 'processing'
|
||||
}
|
||||
>
|
||||
{activeRestoreState.status}
|
||||
</Tag>
|
||||
{activeRestoreState.status === 'RUNNING' && (
|
||||
<Typography.Text type="secondary" style={{ marginLeft: 8 }}>
|
||||
Agent is running scripts/restore.sh — this can take several minutes
|
||||
</Typography.Text>
|
||||
)}
|
||||
</div>
|
||||
{activeRestoreState.errorMessage && (
|
||||
<Alert type="error" message={activeRestoreState.errorMessage} showIcon />
|
||||
)}
|
||||
{activeRestoreState.logTail && (
|
||||
<pre
|
||||
style={{
|
||||
background: '#1e1e1e',
|
||||
color: '#d4d4d4',
|
||||
padding: 12,
|
||||
maxHeight: 300,
|
||||
overflow: 'auto',
|
||||
fontSize: 12,
|
||||
borderRadius: 4,
|
||||
}}
|
||||
>
|
||||
{activeRestoreState.logTail}
|
||||
</pre>
|
||||
)}
|
||||
</Space>
|
||||
</Modal>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@ -14,7 +14,7 @@ export default function InviteCodesPage() {
|
||||
const fetchCodes = useCallback(async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const { data } = await api.get('/api/invite-codes');
|
||||
const { data } = await api.get('/invite-codes');
|
||||
setCodes(data.data || []);
|
||||
} catch {
|
||||
message.error('Failed to load invite codes');
|
||||
@ -28,7 +28,7 @@ export default function InviteCodesPage() {
|
||||
const handleCreate = async () => {
|
||||
try {
|
||||
setCreating(true);
|
||||
const { data } = await api.post('/api/invite-codes');
|
||||
const { data } = await api.post('/invite-codes');
|
||||
message.success(`Invite code created: ${data.code}`);
|
||||
fetchCodes();
|
||||
} catch {
|
||||
@ -40,7 +40,7 @@ export default function InviteCodesPage() {
|
||||
|
||||
const handleRevoke = async (id: string) => {
|
||||
try {
|
||||
await api.delete(`/api/invite-codes/${id}`);
|
||||
await api.delete(`/invite-codes/${id}`);
|
||||
message.success('Invite code revoked');
|
||||
fetchCodes();
|
||||
} catch {
|
||||
|
||||
@ -26,6 +26,7 @@ const envSchema = z.object({
|
||||
INSTANCE_SLUG: z.string().default(''),
|
||||
INSTANCE_DOMAIN: z.string().default(''),
|
||||
INSTANCE_BASE_PATH: z.string().default(''),
|
||||
COMPOSE_PROJECT: z.string().default(''),
|
||||
});
|
||||
|
||||
function validateEnv() {
|
||||
|
||||
@ -1,105 +1,623 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { param } from '../utils/params';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { exec as execCb } from 'child_process';
|
||||
import { createReadStream, createWriteStream } from 'fs';
|
||||
import { pipeline as pipelineCb, Transform } from 'stream';
|
||||
import { promisify } from 'util';
|
||||
import * as docker from '../services/docker.service';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
import { spawn } from 'child_process';
|
||||
import { getSlugEntry } from '../services/registry.service';
|
||||
import { env } from '../config/env';
|
||||
import { logger } from '../utils/logger';
|
||||
import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
|
||||
import { AgentError } from '../middleware/error-handler';
|
||||
|
||||
const pipeline = promisify(pipelineCb);
|
||||
|
||||
const exec = promisify(execCb);
|
||||
const router = Router();
|
||||
|
||||
// POST /instance/:slug/backup — Run pg_dump + tar uploads → return backup info
|
||||
router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
|
||||
const entry = await getSlugEntry(param(req, 'slug'));
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const backupDir = path.join(env.AGENT_DATA_DIR, 'backups', param(req, 'slug'), timestamp);
|
||||
await fs.mkdir(backupDir, { recursive: true });
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
const { pgPassword } = req.body;
|
||||
const ID_REGEX = /^[a-zA-Z0-9_-]+$/;
|
||||
const ARCHIVE_PREFIX = 'changemaker-v2-backup-';
|
||||
const ARCHIVE_SUFFIX = '.tar.gz';
|
||||
|
||||
function backupsDirFor(slug: string): string {
|
||||
return path.join(env.AGENT_DATA_DIR, 'backups', slug);
|
||||
}
|
||||
|
||||
function archivePathFor(slug: string, id: string): string {
|
||||
return path.join(backupsDirFor(slug), `${ARCHIVE_PREFIX}${id}${ARCHIVE_SUFFIX}`);
|
||||
}
|
||||
|
||||
async function sha256File(filePath: string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const hash = crypto.createHash('sha256');
|
||||
const stream = createReadStream(filePath);
|
||||
stream.on('data', (chunk) => hash.update(chunk));
|
||||
stream.on('end', () => resolve(hash.digest('hex')));
|
||||
stream.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the manifest.json out of a backup archive without extracting it.
|
||||
* backup.sh stores it at <archive>/changemaker-v2-backup-<ts>/manifest.json
|
||||
*/
|
||||
async function readManifestFromArchive(archivePath: string): Promise<unknown | null> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn('tar', ['-xzOf', archivePath, '--wildcards', '*/manifest.json'], {
|
||||
stdio: ['ignore', 'pipe', 'ignore'],
|
||||
});
|
||||
let buf = '';
|
||||
proc.stdout.on('data', (chunk) => (buf += chunk.toString('utf-8')));
|
||||
proc.on('error', () => resolve(null));
|
||||
proc.on('close', (code) => {
|
||||
if (code !== 0 || !buf.trim()) return resolve(null);
|
||||
try {
|
||||
resolve(JSON.parse(buf));
|
||||
} catch {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the timestamp ID from a filename like "changemaker-v2-backup-20260409_143000.tar.gz".
|
||||
*/
|
||||
function idFromFilename(filename: string): string | null {
|
||||
if (!filename.startsWith(ARCHIVE_PREFIX) || !filename.endsWith(ARCHIVE_SUFFIX)) return null;
|
||||
return filename.slice(ARCHIVE_PREFIX.length, filename.length - ARCHIVE_SUFFIX.length);
|
||||
}
|
||||
|
||||
// ─── Routes ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* POST /instance/:slug/backup
|
||||
* Shells out to the remote CML's scripts/backup.sh. Returns archive metadata
|
||||
* so the CCP can immediately stream it down via the /download endpoint.
|
||||
*/
|
||||
router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const entry = await getSlugEntry(slug);
|
||||
|
||||
try {
|
||||
// 1. pg_dump
|
||||
const dumpFile = path.join(backupDir, 'database.sql');
|
||||
const dump = await docker.composeExec(
|
||||
entry.basePath, entry.composeProject,
|
||||
'v2-postgres',
|
||||
'pg_dump -U changemaker -d changemaker',
|
||||
300_000,
|
||||
pgPassword ? { PGPASSWORD: pgPassword } : undefined
|
||||
);
|
||||
await fs.writeFile(dumpFile, dump, 'utf-8');
|
||||
const result = await withSlugLock(slug, 'backup', async () => {
|
||||
const backupsDir = backupsDirFor(slug);
|
||||
await fs.mkdir(backupsDir, { recursive: true });
|
||||
|
||||
// Gzip the dump
|
||||
await exec(`gzip '${dumpFile}'`, { timeout: 120_000 });
|
||||
// Verify scripts/backup.sh exists
|
||||
const scriptPath = path.join(entry.basePath, 'scripts', 'backup.sh');
|
||||
try {
|
||||
await fs.access(scriptPath);
|
||||
} catch {
|
||||
throw new AgentError(500, `scripts/backup.sh not found at ${scriptPath}`, 'BACKUP_SCRIPT_MISSING');
|
||||
}
|
||||
|
||||
// 2. Tar uploads if exists
|
||||
const uploadsDir = path.join(entry.basePath, 'uploads');
|
||||
let hasUploads = false;
|
||||
try {
|
||||
await fs.access(uploadsDir);
|
||||
hasUploads = true;
|
||||
} catch { /* no uploads dir */ }
|
||||
|
||||
if (hasUploads) {
|
||||
await exec(
|
||||
`tar -czf '${path.join(backupDir, 'uploads.tar.gz')}' -C '${entry.basePath}' uploads`,
|
||||
{ timeout: 300_000 }
|
||||
// Snapshot existing archive filenames so we can identify the new one
|
||||
const existingFiles = new Set(
|
||||
(await fs.readdir(backupsDir)).filter((f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX))
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Create final archive
|
||||
const archiveName = `backup-${param(req, 'slug')}-${timestamp}.tar.gz`;
|
||||
const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
|
||||
await exec(
|
||||
`tar -czf '${archivePath}' -C '${path.dirname(backupDir)}' '${timestamp}'`,
|
||||
{ timeout: 300_000 }
|
||||
);
|
||||
const logPath = path.join(backupsDir, `backup-${Date.now()}.log`);
|
||||
const logFd = await fs.open(logPath, 'w');
|
||||
|
||||
// Clean up temp dir
|
||||
await fs.rm(backupDir, { recursive: true, force: true });
|
||||
// Spawn backup.sh with cwd=basePath so its .env detection works.
|
||||
// Retention is effectively disabled here — CCP manages retention of
|
||||
// the streamed-down archives, not the agent's transient copies.
|
||||
//
|
||||
// Container names: backup.sh defaults to `changemaker-v2-postgres` and
|
||||
// `listmonk-db`, which match the main CML's `container_name:` overrides.
|
||||
// If a deployment has custom naming, the operator can set PG_CONTAINER /
|
||||
// LISTMONK_PG_CONTAINER in the instance's own .env (backup.sh loads it).
|
||||
const spawnEnv: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
BACKUP_DIR: backupsDir,
|
||||
RETENTION_DAYS: '36500', // ~100 years; CCP controls retention
|
||||
};
|
||||
|
||||
const stats = await fs.stat(archivePath);
|
||||
const backupId = timestamp;
|
||||
logger.info(`[backup] Running scripts/backup.sh for ${slug} (basePath=${entry.basePath})`);
|
||||
|
||||
logger.info(`[backup] Created backup for ${param(req, 'slug')}: ${archivePath} (${stats.size} bytes)`);
|
||||
const exitCode: number = await new Promise((resolve, reject) => {
|
||||
const proc = spawn('bash', ['scripts/backup.sh'], {
|
||||
cwd: entry.basePath,
|
||||
env: spawnEnv,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
|
||||
proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
|
||||
proc.on('error', reject);
|
||||
proc.on('close', (code) => resolve(code ?? 1));
|
||||
});
|
||||
|
||||
res.json({
|
||||
backupId,
|
||||
archivePath,
|
||||
sizeBytes: stats.size,
|
||||
timestamp,
|
||||
await logFd.close();
|
||||
|
||||
if (exitCode !== 0) {
|
||||
// Return the tail of the log so the CCP can display it
|
||||
let logTail = '';
|
||||
try {
|
||||
const fullLog = await fs.readFile(logPath, 'utf-8');
|
||||
logTail = fullLog.split('\n').slice(-40).join('\n');
|
||||
} catch { /* ignore */ }
|
||||
throw new AgentError(500, `backup.sh exited with code ${exitCode}\n${logTail}`, 'BACKUP_FAILED');
|
||||
}
|
||||
|
||||
// Find the new archive
|
||||
const afterFiles = (await fs.readdir(backupsDir)).filter(
|
||||
(f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX)
|
||||
);
|
||||
const newFiles = afterFiles.filter((f) => !existingFiles.has(f));
|
||||
if (newFiles.length === 0) {
|
||||
throw new AgentError(500, 'backup.sh succeeded but no new archive was created', 'BACKUP_NO_OUTPUT');
|
||||
}
|
||||
// Pick the most recently modified (in case of oddities)
|
||||
newFiles.sort();
|
||||
const newest = newFiles[newFiles.length - 1] as string;
|
||||
const archivePath = path.join(backupsDir, newest);
|
||||
const backupId = idFromFilename(newest);
|
||||
if (!backupId || !ID_REGEX.test(backupId)) {
|
||||
throw new AgentError(500, `Unexpected archive filename: ${newest}`, 'BACKUP_NAME_INVALID');
|
||||
}
|
||||
|
||||
const stats = await fs.stat(archivePath);
|
||||
const sha256 = await sha256File(archivePath);
|
||||
const manifest = await readManifestFromArchive(archivePath);
|
||||
|
||||
// Delete the log file once we know the backup succeeded
|
||||
try { await fs.unlink(logPath); } catch { /* ignore */ }
|
||||
|
||||
logger.info(`[backup] ${slug}: created ${newest} (${stats.size} bytes, sha256=${sha256.substring(0, 16)}...)`);
|
||||
|
||||
return {
|
||||
backupId,
|
||||
filename: newest,
|
||||
sizeBytes: stats.size,
|
||||
sha256,
|
||||
manifest,
|
||||
createdAt: stats.mtime.toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
res.json(result);
|
||||
} catch (err) {
|
||||
// Clean up on failure
|
||||
try { await fs.rm(backupDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
if (err instanceof SlugBusyError) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: err.message });
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
// GET /instance/:slug/backup/:id/download — Stream backup archive
|
||||
router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
|
||||
const archiveName = `backup-${param(req, 'slug')}-${param(req, 'id')}.tar.gz`;
|
||||
const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
|
||||
/**
|
||||
* GET /instance/:slug/backups
|
||||
* Lists backup archives currently held on the agent for this slug.
|
||||
*/
|
||||
router.get('/instance/:slug/backups', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
await getSlugEntry(slug); // validate slug is registered
|
||||
|
||||
const backupsDir = backupsDirFor(slug);
|
||||
let entries: string[] = [];
|
||||
try {
|
||||
await fs.access(archivePath);
|
||||
entries = await fs.readdir(backupsDir);
|
||||
} catch {
|
||||
res.json({ data: [] });
|
||||
return;
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (const filename of entries) {
|
||||
const id = idFromFilename(filename);
|
||||
if (!id) continue;
|
||||
try {
|
||||
const stats = await fs.stat(path.join(backupsDir, filename));
|
||||
results.push({
|
||||
backupId: id,
|
||||
filename,
|
||||
sizeBytes: stats.size,
|
||||
createdAt: stats.mtime.toISOString(),
|
||||
});
|
||||
} catch { /* skip */ }
|
||||
}
|
||||
results.sort((a, b) => (a.createdAt < b.createdAt ? 1 : -1));
|
||||
res.json({ data: results });
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /instance/:slug/backup/:id/download
|
||||
* Streams the backup archive (supports Content-Length so the CCP can verify size).
|
||||
*/
|
||||
router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const id = param(req, 'id');
|
||||
if (!ID_REGEX.test(id)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
|
||||
return;
|
||||
}
|
||||
await getSlugEntry(slug);
|
||||
|
||||
const archivePath = archivePathFor(slug, id);
|
||||
try {
|
||||
const stats = await fs.stat(archivePath);
|
||||
res.setHeader('Content-Type', 'application/gzip');
|
||||
res.setHeader('Content-Length', String(stats.size));
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${path.basename(archivePath)}"`);
|
||||
const stream = createReadStream(archivePath);
|
||||
stream.on('error', (err) => {
|
||||
logger.error(`[backup] stream error for ${archivePath}: ${err.message}`);
|
||||
if (!res.headersSent) res.status(500).end();
|
||||
else res.destroy(err);
|
||||
});
|
||||
stream.pipe(res);
|
||||
} catch {
|
||||
res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
|
||||
return;
|
||||
}
|
||||
});
|
||||
|
||||
const stats = await fs.stat(archivePath);
|
||||
res.setHeader('Content-Type', 'application/gzip');
|
||||
res.setHeader('Content-Length', stats.size);
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${archiveName}"`);
|
||||
/**
|
||||
* DELETE /instance/:slug/backup/:id
|
||||
* Deletes the archive from the agent's disk. The CCP calls this after it has
|
||||
* successfully streamed the archive to its own storage.
|
||||
*/
|
||||
router.delete('/instance/:slug/backup/:id', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const id = param(req, 'id');
|
||||
if (!ID_REGEX.test(id)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
|
||||
return;
|
||||
}
|
||||
await getSlugEntry(slug);
|
||||
|
||||
const { createReadStream } = await import('fs');
|
||||
const stream = createReadStream(archivePath);
|
||||
stream.pipe(res);
|
||||
const archivePath = archivePathFor(slug, id);
|
||||
// Path traversal defense: ensure the resolved path is still inside the slug's backups dir
|
||||
const resolved = path.resolve(archivePath);
|
||||
const boundary = path.resolve(backupsDirFor(slug));
|
||||
if (!resolved.startsWith(boundary + path.sep)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.unlink(archivePath);
|
||||
logger.info(`[backup] ${slug}: deleted ${path.basename(archivePath)}`);
|
||||
res.json({ deleted: true });
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === 'ENOENT') {
|
||||
res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Restore ──────────────────────────────────────────────────────────
|
||||
|
||||
// Hard cap on a single restore upload. The CCP is trusted, but a buggy or
|
||||
// compromised CCP shouldn't be able to fill the agent's disk in one request.
|
||||
// 20 GB is well above any realistic Changemaker Lite backup size.
|
||||
const MAX_RESTORE_UPLOAD_BYTES = 20 * 1024 * 1024 * 1024;
|
||||
|
||||
function restoresDirFor(slug: string): string {
|
||||
return path.join(env.AGENT_DATA_DIR, 'restores', slug);
|
||||
}
|
||||
|
||||
function restoreUploadDir(slug: string, uploadId: string): string {
|
||||
return path.join(restoresDirFor(slug), uploadId);
|
||||
}
|
||||
|
||||
interface RestoreState {
|
||||
status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
|
||||
uploadId: string;
|
||||
startedAt: string;
|
||||
completedAt?: string;
|
||||
exitCode?: number;
|
||||
logTail?: string;
|
||||
errorMessage?: string;
|
||||
options?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
async function readRestoreState(slug: string, uploadId: string): Promise<RestoreState | null> {
|
||||
const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
|
||||
try {
|
||||
const content = await fs.readFile(statePath, 'utf-8');
|
||||
return JSON.parse(content) as RestoreState;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function writeRestoreState(slug: string, uploadId: string, state: RestoreState): Promise<void> {
|
||||
const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
|
||||
await fs.writeFile(statePath, JSON.stringify(state, null, 2), 'utf-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /instance/:slug/restore/upload?sha256=<hex>
|
||||
* Accepts an application/octet-stream upload of a backup archive and writes
|
||||
* it to the agent's restores directory. Verifies SHA256 as it streams — if
|
||||
* the hash doesn't match, the partial file is deleted and we return 400.
|
||||
*
|
||||
* Returns `{ uploadId, sizeBytes, sha256 }`.
|
||||
*/
|
||||
router.post('/instance/:slug/restore/upload', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
await getSlugEntry(slug);
|
||||
|
||||
if (isSlugLocked(slug, 'restore')) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'A restore is already in progress for this slug' });
|
||||
return;
|
||||
}
|
||||
if (isSlugLocked(slug, 'backup')) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup is in progress for this slug' });
|
||||
return;
|
||||
}
|
||||
|
||||
const expectedSha256 = typeof req.query.sha256 === 'string' ? req.query.sha256.toLowerCase() : undefined;
|
||||
if (!expectedSha256 || !/^[a-f0-9]{64}$/.test(expectedSha256)) {
|
||||
res.status(400).json({ error: 'VALIDATION', message: 'sha256 query parameter required (64 hex chars)' });
|
||||
return;
|
||||
}
|
||||
|
||||
const uploadId = crypto.randomBytes(16).toString('hex');
|
||||
const uploadDir = restoreUploadDir(slug, uploadId);
|
||||
await fs.mkdir(uploadDir, { recursive: true });
|
||||
const archivePath = path.join(uploadDir, 'archive.tar.gz');
|
||||
|
||||
const hash = crypto.createHash('sha256');
|
||||
let bytesWritten = 0;
|
||||
const hashTransform = new Transform({
|
||||
transform(chunk: Buffer, _enc, cb) {
|
||||
bytesWritten += chunk.length;
|
||||
if (bytesWritten > MAX_RESTORE_UPLOAD_BYTES) {
|
||||
// Abort the stream — pipeline() will reject and the catch block below
|
||||
// will remove the partial upload directory.
|
||||
cb(new AgentError(
|
||||
413,
|
||||
`Upload exceeds maximum allowed size of ${MAX_RESTORE_UPLOAD_BYTES} bytes`,
|
||||
'UPLOAD_TOO_LARGE'
|
||||
));
|
||||
return;
|
||||
}
|
||||
hash.update(chunk);
|
||||
cb(null, chunk);
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const writeStream = createWriteStream(archivePath);
|
||||
await pipeline(req, hashTransform, writeStream);
|
||||
const sha256 = hash.digest('hex');
|
||||
|
||||
if (sha256 !== expectedSha256) {
|
||||
// Integrity failure — nuke the upload
|
||||
await fs.rm(uploadDir, { recursive: true, force: true });
|
||||
res.status(400).json({
|
||||
error: 'SHA256_MISMATCH',
|
||||
message: `Expected sha256 ${expectedSha256}, got ${sha256}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const stats = await fs.stat(archivePath);
|
||||
|
||||
// Persist initial state so the progress endpoint works even before apply
|
||||
await writeRestoreState(slug, uploadId, {
|
||||
status: 'UPLOADED',
|
||||
uploadId,
|
||||
startedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
logger.info(`[restore] ${slug}: uploaded ${bytesWritten} bytes (sha256=${sha256.substring(0, 16)}...) upload_id=${uploadId}`);
|
||||
|
||||
res.json({
|
||||
uploadId,
|
||||
sizeBytes: stats.size,
|
||||
sha256,
|
||||
});
|
||||
} catch (err) {
|
||||
// Stream error or write error — clean up
|
||||
try { await fs.rm(uploadDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /instance/:slug/restore/:uploadId/apply
|
||||
* Body: { confirm: true, skipDb?, skipUploads?, skipListmonk?, dryRun? }
|
||||
*
|
||||
* Fires off `scripts/restore.sh --archive <path> --force` in the background
|
||||
* and writes progress to restore-state.json. The CCP polls the progress
|
||||
* endpoint for updates. Mutex prevents concurrent restores/backups.
|
||||
*/
|
||||
router.post('/instance/:slug/restore/:uploadId/apply', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const uploadId = param(req, 'uploadId');
|
||||
if (!ID_REGEX.test(uploadId)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
|
||||
return;
|
||||
}
|
||||
const entry = await getSlugEntry(slug);
|
||||
|
||||
const { confirm, skipDb, skipUploads, skipListmonk, dryRun } = req.body ?? {};
|
||||
if (confirm !== true) {
|
||||
res.status(400).json({ error: 'CONFIRMATION_REQUIRED', message: 'Body must include { confirm: true }' });
|
||||
return;
|
||||
}
|
||||
|
||||
const uploadDir = restoreUploadDir(slug, uploadId);
|
||||
// Path traversal defense
|
||||
const resolvedDir = path.resolve(uploadDir);
|
||||
const boundary = path.resolve(restoresDirFor(slug));
|
||||
if (!resolvedDir.startsWith(boundary + path.sep)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
|
||||
return;
|
||||
}
|
||||
|
||||
const archivePath = path.join(uploadDir, 'archive.tar.gz');
|
||||
try {
|
||||
await fs.access(archivePath);
|
||||
} catch {
|
||||
res.status(404).json({ error: 'NOT_FOUND', message: 'Upload not found or already applied' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify scripts/restore.sh exists
|
||||
const scriptPath = path.join(entry.basePath, 'scripts', 'restore.sh');
|
||||
try {
|
||||
await fs.access(scriptPath);
|
||||
} catch {
|
||||
res.status(500).json({ error: 'RESTORE_SCRIPT_MISSING', message: `scripts/restore.sh not found at ${scriptPath}` });
|
||||
return;
|
||||
}
|
||||
|
||||
// Check mutex state (don't block — tell caller it's busy)
|
||||
if (isSlugLocked(slug, 'restore') || isSlugLocked(slug, 'backup')) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'Slug is busy with backup or restore' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Fire-and-forget: acquire lock and run in background. Return immediately
|
||||
// so CCP can start polling /progress.
|
||||
const options = {
|
||||
skipDb: !!skipDb,
|
||||
skipUploads: !!skipUploads,
|
||||
skipListmonk: !!skipListmonk,
|
||||
dryRun: !!dryRun,
|
||||
};
|
||||
|
||||
await writeRestoreState(slug, uploadId, {
|
||||
status: 'RUNNING',
|
||||
uploadId,
|
||||
startedAt: new Date().toISOString(),
|
||||
options,
|
||||
});
|
||||
|
||||
// Build restore.sh args (all flags, no user input interpolated into a shell string)
|
||||
const args = ['scripts/restore.sh', '--archive', archivePath, '--force'];
|
||||
if (options.skipDb) args.push('--skip-db');
|
||||
if (options.skipUploads) args.push('--skip-uploads');
|
||||
if (options.skipListmonk) args.push('--skip-listmonk');
|
||||
if (options.dryRun) args.push('--dry-run');
|
||||
|
||||
const logPath = path.join(uploadDir, 'restore.log');
|
||||
|
||||
// Schedule the background task — don't await inside the handler
|
||||
void withSlugLock(slug, 'restore', async () => {
|
||||
const logFd = await fs.open(logPath, 'w');
|
||||
logger.info(`[restore] ${slug}: running ${args.join(' ')} (cwd=${entry.basePath})`);
|
||||
|
||||
const exitCode: number = await new Promise((resolve, reject) => {
|
||||
const proc = spawn('bash', args, {
|
||||
cwd: entry.basePath,
|
||||
env: { ...process.env },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
|
||||
proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
|
||||
proc.on('error', reject);
|
||||
proc.on('close', (code) => resolve(code ?? 1));
|
||||
});
|
||||
|
||||
await logFd.close();
|
||||
|
||||
// Read the tail of the log for the state file
|
||||
let logTail = '';
|
||||
try {
|
||||
const fullLog = await fs.readFile(logPath, 'utf-8');
|
||||
logTail = fullLog.split('\n').slice(-80).join('\n');
|
||||
} catch { /* ignore */ }
|
||||
|
||||
const state: RestoreState = {
|
||||
status: exitCode === 0 ? 'COMPLETED' : 'FAILED',
|
||||
uploadId,
|
||||
startedAt: (await readRestoreState(slug, uploadId))?.startedAt || new Date().toISOString(),
|
||||
completedAt: new Date().toISOString(),
|
||||
exitCode,
|
||||
logTail,
|
||||
options,
|
||||
...(exitCode !== 0 ? { errorMessage: `restore.sh exited with code ${exitCode}` } : {}),
|
||||
};
|
||||
await writeRestoreState(slug, uploadId, state);
|
||||
|
||||
logger.info(`[restore] ${slug}: restore.sh finished with exit ${exitCode}`);
|
||||
}).catch(async (err) => {
|
||||
logger.error(`[restore] ${slug}: background restore failed: ${(err as Error).message}`);
|
||||
// If the mutex was the issue, state is already written. Otherwise, mark failed.
|
||||
if (!(err instanceof SlugBusyError)) {
|
||||
try {
|
||||
await writeRestoreState(slug, uploadId, {
|
||||
status: 'FAILED',
|
||||
uploadId,
|
||||
startedAt: new Date().toISOString(),
|
||||
completedAt: new Date().toISOString(),
|
||||
errorMessage: (err as Error).message,
|
||||
options,
|
||||
});
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
});
|
||||
|
||||
res.status(202).json({ applied: true, uploadId, options });
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /instance/:slug/restore/:uploadId/progress
|
||||
* Returns the current state of a running or completed restore.
|
||||
*/
|
||||
router.get('/instance/:slug/restore/:uploadId/progress', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const uploadId = param(req, 'uploadId');
|
||||
if (!ID_REGEX.test(uploadId)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
|
||||
return;
|
||||
}
|
||||
await getSlugEntry(slug);
|
||||
|
||||
const state = await readRestoreState(slug, uploadId);
|
||||
if (!state) {
|
||||
res.status(404).json({ error: 'NOT_FOUND', message: 'Restore not found' });
|
||||
return;
|
||||
}
|
||||
res.json(state);
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE /instance/:slug/restore/:uploadId
|
||||
* Removes a restore upload directory. Refuses if a restore is currently running.
|
||||
*/
|
||||
router.delete('/instance/:slug/restore/:uploadId', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const uploadId = param(req, 'uploadId');
|
||||
if (!ID_REGEX.test(uploadId)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
|
||||
return;
|
||||
}
|
||||
await getSlugEntry(slug);
|
||||
|
||||
const uploadDir = restoreUploadDir(slug, uploadId);
|
||||
const resolvedDir = path.resolve(uploadDir);
|
||||
const boundary = path.resolve(restoresDirFor(slug));
|
||||
if (!resolvedDir.startsWith(boundary + path.sep)) {
|
||||
res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
|
||||
return;
|
||||
}
|
||||
|
||||
const state = await readRestoreState(slug, uploadId);
|
||||
if (state?.status === 'RUNNING') {
|
||||
res.status(409).json({ error: 'RESTORE_RUNNING', message: 'Cannot delete a running restore' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.rm(uploadDir, { recursive: true, force: true });
|
||||
res.json({ deleted: true });
|
||||
} catch (err) {
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@ -4,6 +4,13 @@ import { registerSlug, unregisterSlug, listSlugs } from '../services/registry.se
|
||||
|
||||
const router = Router();
|
||||
|
||||
// SECURITY: defense-in-depth slug validation. The CCP enforces ^[a-z0-9-]+$
|
||||
// upstream via Zod, but the registry slug is later interpolated into
|
||||
// filesystem paths (backupsDirFor, etc.), so we validate independently here.
|
||||
// A poisoned registry entry could otherwise let a compromised or buggy CCP
|
||||
// escape AGENT_DATA_DIR.
|
||||
const SLUG_RE = /^[a-z0-9-]{2,50}$/;
|
||||
|
||||
// POST /instances/register — Register a slug→basePath mapping
|
||||
router.post('/instances/register', async (req: Request, res: Response) => {
|
||||
const { slug, basePath, composeProject } = req.body;
|
||||
@ -11,14 +18,23 @@ router.post('/instances/register', async (req: Request, res: Response) => {
|
||||
res.status(400).json({ error: 'VALIDATION', message: 'slug, basePath, and composeProject required' });
|
||||
return;
|
||||
}
|
||||
if (typeof slug !== 'string' || !SLUG_RE.test(slug)) {
|
||||
res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format (expected ^[a-z0-9-]{2,50}$)' });
|
||||
return;
|
||||
}
|
||||
await registerSlug(slug, basePath, composeProject);
|
||||
res.json({ registered: slug });
|
||||
});
|
||||
|
||||
// DELETE /instances/:slug — Unregister slug
|
||||
router.delete('/instances/:slug', async (req: Request, res: Response) => {
|
||||
await unregisterSlug(param(req, 'slug'));
|
||||
res.json({ unregistered: param(req, 'slug') });
|
||||
const slug = param(req, 'slug');
|
||||
if (!SLUG_RE.test(slug)) {
|
||||
res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format' });
|
||||
return;
|
||||
}
|
||||
await unregisterSlug(slug);
|
||||
res.json({ unregistered: slug });
|
||||
});
|
||||
|
||||
// GET /instances — List all managed slugs
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { param } from '../utils/params';
|
||||
import { execFile } from 'child_process';
|
||||
import { execFile, spawn } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { getSlugEntry } from '../services/registry.service';
|
||||
import { logger } from '../utils/logger';
|
||||
import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const router = Router();
|
||||
@ -13,9 +14,108 @@ const router = Router();
|
||||
/** Validate a git branch name — prevent shell injection. */
|
||||
const SAFE_BRANCH = /^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/;
|
||||
|
||||
// POST /instance/:slug/upgrade/start — Run upgrade.sh
|
||||
/**
|
||||
* Max age of an in-progress upgrade (by progress.json mtime) before we
|
||||
* consider a previous attempt dead and allow a new one through.
|
||||
*
|
||||
* SECURITY NOTE: this must be LONGER than the CCP's REMOTE_UPGRADE_TIMEOUT
|
||||
* AND longer than any realistic legitimate upgrade duration. The concern is
|
||||
* a concurrent-upgrade scenario:
|
||||
* - upgrade.sh is running and legitimately slow (large image pull + DB
|
||||
* migration)
|
||||
* - at 15 min the CCP side times out and marks the row FAILED
|
||||
* - admin clicks "Upgrade" again → CCP's DB check sees no active row
|
||||
* - if this staleness window is <= realistic upgrade time, the second
|
||||
* /upgrade/start call would ALSO pass this check, spawning a second
|
||||
* upgrade.sh process racing against the still-running first one
|
||||
*
|
||||
* 45 min gives headroom over the 15-min CCP timeout and covers realistic
|
||||
* upgrade durations. For a truly bulletproof guard, switch to a PID lock
|
||||
* file that verifies the process is still alive.
|
||||
*/
|
||||
const STALE_UPGRADE_MTIME_MS = 45 * 60 * 1000;
|
||||
|
||||
/**
|
||||
* Returns true if there's an in-progress upgrade visible on disk.
|
||||
*
|
||||
* Used as a second-line guard in case the in-memory mutex was lost to an
|
||||
* agent restart mid-upgrade. The check looks at progress.json mtime and
|
||||
* the absence of a result.json — together they indicate "started but not
|
||||
* finished within the staleness window".
|
||||
*/
|
||||
async function isUpgradeRunningOnDisk(basePath: string): Promise<boolean> {
|
||||
const progressPath = path.join(basePath, 'data', 'upgrade', 'progress.json');
|
||||
const resultPath = path.join(basePath, 'data', 'upgrade', 'result.json');
|
||||
|
||||
let progressStat: import('fs').Stats;
|
||||
try {
|
||||
progressStat = await fs.stat(progressPath);
|
||||
} catch {
|
||||
return false; // no progress file → no in-progress upgrade
|
||||
}
|
||||
|
||||
// If a result file exists with mtime >= progress mtime, the run is finished
|
||||
try {
|
||||
const resultStat = await fs.stat(resultPath);
|
||||
if (resultStat.mtimeMs >= progressStat.mtimeMs) return false;
|
||||
} catch { /* no result file yet */ }
|
||||
|
||||
// Stale: progress file is old and no result was written → assume the
|
||||
// previous attempt died and let a new one through
|
||||
if (Date.now() - progressStat.mtimeMs > STALE_UPGRADE_MTIME_MS) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// POST /instance/:slug/upgrade/check — Run upgrade-check.sh and return status.json
|
||||
router.post('/instance/:slug/upgrade/check', async (req: Request, res: Response) => {
|
||||
const slug = param(req, 'slug');
|
||||
const entry = await getSlugEntry(slug);
|
||||
|
||||
// Refuse during a running upgrade — check writes status.json which could
|
||||
// race with upgrade.sh writing other files in data/upgrade/
|
||||
if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is currently running' });
|
||||
return;
|
||||
}
|
||||
|
||||
const scriptPath = path.join(entry.basePath, 'scripts', 'upgrade-check.sh');
|
||||
try {
|
||||
await fs.access(scriptPath);
|
||||
} catch {
|
||||
res.status(404).json({ error: 'SCRIPT_NOT_FOUND', message: `upgrade-check.sh not found at ${scriptPath}` });
|
||||
return;
|
||||
}
|
||||
|
||||
// Run upgrade-check.sh — it writes data/upgrade/status.json. Use execFile
|
||||
// (no shell) and a 60s timeout. Failures are non-fatal: the script may
|
||||
// still have written status.json before erroring out, so we always try
|
||||
// to read it afterwards.
|
||||
try {
|
||||
await execFileAsync('bash', [scriptPath], {
|
||||
cwd: entry.basePath,
|
||||
timeout: 60_000,
|
||||
maxBuffer: 4 * 1024 * 1024,
|
||||
env: { ...process.env, COMPOSE_ANSI: 'never' },
|
||||
});
|
||||
} catch (err) {
|
||||
logger.warn(`[upgrade] ${slug}: upgrade-check.sh failed: ${(err as Error).message}`);
|
||||
// continue — try to read status.json anyway
|
||||
}
|
||||
|
||||
const statusPath = path.join(entry.basePath, 'data', 'upgrade', 'status.json');
|
||||
try {
|
||||
const content = await fs.readFile(statusPath, 'utf-8');
|
||||
res.json(JSON.parse(content));
|
||||
} catch {
|
||||
res.status(500).json({ error: 'STATUS_NOT_AVAILABLE', message: 'upgrade-check.sh did not produce status.json' });
|
||||
}
|
||||
});
|
||||
|
||||
// POST /instance/:slug/upgrade/start — Run upgrade.sh in the background
|
||||
router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response) => {
|
||||
const entry = await getSlugEntry(param(req, 'slug'));
|
||||
const slug = param(req, 'slug');
|
||||
const entry = await getSlugEntry(slug);
|
||||
const { skipBackup, useRegistry, branch } = req.body || {};
|
||||
|
||||
// SECURITY: Validate branch name to prevent injection
|
||||
@ -28,26 +128,64 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
|
||||
try {
|
||||
await fs.access(scriptPath);
|
||||
} catch {
|
||||
res.status(400).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
|
||||
res.status(404).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
// SECURITY: Use execFile with args array — no shell interpolation
|
||||
const args = ['--api-mode', '--force'];
|
||||
// Refuse if an upgrade is already running (in-memory or on-disk indicators)
|
||||
if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
|
||||
return;
|
||||
}
|
||||
// Backup or restore concurrency: refuse to start an upgrade while either is running
|
||||
if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
|
||||
res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear stale progress/result files before starting so the on-disk staleness
|
||||
// check doesn't think a brand-new upgrade is still finishing.
|
||||
const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
|
||||
const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
|
||||
await fs.mkdir(path.dirname(progressPath), { recursive: true });
|
||||
await fs.rm(progressPath, { force: true });
|
||||
await fs.rm(resultPath, { force: true });
|
||||
|
||||
// SECURITY: Use spawn with args array — no shell interpolation
|
||||
const args: string[] = [scriptPath, '--api-mode', '--force'];
|
||||
if (skipBackup) args.push('--skip-backup');
|
||||
if (useRegistry) args.push('--use-registry');
|
||||
if (branch) args.push('--branch', branch);
|
||||
|
||||
// Fire-and-forget — CCP polls progress
|
||||
execFileAsync('bash', [scriptPath, ...args], {
|
||||
cwd: entry.basePath,
|
||||
timeout: 600_000,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
// Schedule the background task under the slug lock. Use void so the
|
||||
// promise doesn't block the response. Errors are caught and logged; the
|
||||
// CCP detects them via the absence of a result file or via the timeout.
|
||||
void withSlugLock(slug, 'upgrade', async () => {
|
||||
logger.info(`[upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
|
||||
try {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const proc = spawn('bash', args, {
|
||||
cwd: entry.basePath,
|
||||
env: { ...process.env, COMPOSE_ANSI: 'never' },
|
||||
stdio: ['ignore', 'ignore', 'ignore'], // upgrade.sh writes its own logs
|
||||
});
|
||||
proc.on('error', reject);
|
||||
proc.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`upgrade.sh exited with code ${code}`));
|
||||
});
|
||||
});
|
||||
logger.info(`[upgrade] ${slug}: upgrade.sh completed`);
|
||||
} catch (err) {
|
||||
logger.error(`[upgrade] ${slug}: ${(err as Error).message}`);
|
||||
}
|
||||
}).catch((err) => {
|
||||
logger.error(`[upgrade] ${param(req, 'slug')} failed: ${(err as Error).message}`);
|
||||
if (!(err instanceof SlugBusyError)) {
|
||||
logger.error(`[upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
|
||||
}
|
||||
});
|
||||
|
||||
res.json({ started: true });
|
||||
res.status(202).json({ started: true });
|
||||
});
|
||||
|
||||
// GET /instance/:slug/upgrade/progress — Read progress.json
|
||||
|
||||
@ -53,8 +53,24 @@ if (hasCerts()) {
|
||||
app.use(errorHandler);
|
||||
|
||||
const server = https.createServer(tlsOptions, app);
|
||||
server.listen(env.AGENT_PORT, () => {
|
||||
server.listen(env.AGENT_PORT, async () => {
|
||||
logger.info(`CCP Agent (mTLS) listening on port ${env.AGENT_PORT}`);
|
||||
|
||||
// Auto-register this instance's slug if configured
|
||||
if (env.INSTANCE_SLUG && env.INSTANCE_BASE_PATH) {
|
||||
const { registerSlug, getSlugEntry } = await import('./services/registry.service');
|
||||
try {
|
||||
await getSlugEntry(env.INSTANCE_SLUG);
|
||||
logger.debug(`[registry] Slug ${env.INSTANCE_SLUG} already registered`);
|
||||
} catch {
|
||||
// Detect compose project name: use env override, or derive from basePath directory name
|
||||
// (Docker Compose default: directory name with special chars stripped)
|
||||
const pathMod = await import('path');
|
||||
const composeProject = env.COMPOSE_PROJECT
|
||||
|| pathMod.basename(env.INSTANCE_BASE_PATH).replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
|
||||
await registerSlug(env.INSTANCE_SLUG, env.INSTANCE_BASE_PATH, composeProject);
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// Pre-approval mode — start HTTP, only health + phone-home polling
|
||||
|
||||
65
changemaker-control-panel/agent/src/services/slug-mutex.ts
Normal file
65
changemaker-control-panel/agent/src/services/slug-mutex.ts
Normal file
@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Per-slug single-flight mutex.
|
||||
*
|
||||
* Guards long-running, mutating operations (backup, restore, upgrade) so that
|
||||
* two concurrent CCP calls for the same slug can't trample each other.
|
||||
*
|
||||
* Usage:
|
||||
* await withSlugLock(slug, 'backup', async () => { ... });
|
||||
*
|
||||
* If a lock is already held for (slug, op), throws SlugBusyError which the
|
||||
* route handler should convert to HTTP 409.
|
||||
*/
|
||||
|
||||
export class SlugBusyError extends Error {
|
||||
constructor(public slug: string, public op: string) {
|
||||
super(`Slug ${slug} is busy: ${op} already in progress`);
|
||||
this.name = 'SlugBusyError';
|
||||
}
|
||||
}
|
||||
|
||||
type LockKey = string;
|
||||
const locks = new Map<LockKey, { op: string; startedAt: number }>();
|
||||
|
||||
function key(slug: string, op: string): LockKey {
|
||||
return `${slug}::${op}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `fn` while holding a single-flight lock on (slug, op).
|
||||
* Throws SlugBusyError immediately if another call is already running.
|
||||
*/
|
||||
export async function withSlugLock<T>(
|
||||
slug: string,
|
||||
op: string,
|
||||
fn: () => Promise<T>
|
||||
): Promise<T> {
|
||||
const k = key(slug, op);
|
||||
if (locks.has(k)) {
|
||||
throw new SlugBusyError(slug, op);
|
||||
}
|
||||
locks.set(k, { op, startedAt: Date.now() });
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
locks.delete(k);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a lock is currently held for (slug, op).
|
||||
*/
|
||||
export function isSlugLocked(slug: string, op: string): boolean {
|
||||
return locks.has(key(slug, op));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns debug info about all active locks.
|
||||
*/
|
||||
export function listActiveLocks(): Array<{ slug: string; op: string; ageMs: number }> {
|
||||
const now = Date.now();
|
||||
return Array.from(locks.entries()).map(([k, v]) => {
|
||||
const [slug] = k.split('::');
|
||||
return { slug: slug ?? '', op: v.op, ageMs: now - v.startedAt };
|
||||
});
|
||||
}
|
||||
@ -0,0 +1,34 @@
|
||||
-- CreateEnum
|
||||
CREATE TYPE "RestoreStatus" AS ENUM ('PENDING', 'UPLOADING', 'RUNNING', 'COMPLETED', 'FAILED');
|
||||
|
||||
-- AlterEnum
|
||||
ALTER TYPE "AuditAction" ADD VALUE 'BACKUP_RESTORE';
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "instance_restores" (
|
||||
"id" TEXT NOT NULL,
|
||||
"instance_id" TEXT NOT NULL,
|
||||
"backup_id" TEXT NOT NULL,
|
||||
"status" "RestoreStatus" NOT NULL DEFAULT 'PENDING',
|
||||
"upload_id" TEXT,
|
||||
"progress_json" JSONB,
|
||||
"log_tail" TEXT,
|
||||
"error_message" TEXT,
|
||||
"triggered_by_id" TEXT,
|
||||
"started_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"completed_at" TIMESTAMP(3),
|
||||
|
||||
CONSTRAINT "instance_restores_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "instance_restores_instance_id_started_at_idx" ON "instance_restores"("instance_id", "started_at");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "instance_restores_backup_id_idx" ON "instance_restores"("backup_id");
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_instance_id_fkey" FOREIGN KEY ("instance_id") REFERENCES "instances"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_backup_id_fkey" FOREIGN KEY ("backup_id") REFERENCES "backups"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
@ -0,0 +1,2 @@
|
||||
-- AlterTable
|
||||
ALTER TABLE "instances" ADD COLUMN "pangolin_subdomain_prefix" TEXT;
|
||||
@ -0,0 +1,2 @@
|
||||
-- AlterEnum
|
||||
ALTER TYPE "AuditAction" ADD VALUE 'PANGOLIN_TEARDOWN';
|
||||
@ -109,6 +109,7 @@ model Instance {
|
||||
pangolinSiteId String? @map("pangolin_site_id")
|
||||
pangolinNewtId String? @map("pangolin_newt_id")
|
||||
pangolinNewtSecret String? @map("pangolin_newt_secret")
|
||||
pangolinSubdomainPrefix String? @map("pangolin_subdomain_prefix")
|
||||
|
||||
// SMTP
|
||||
smtpHost String? @map("smtp_host")
|
||||
@ -125,6 +126,7 @@ model Instance {
|
||||
portAllocations PortAllocation[]
|
||||
healthChecks HealthCheck[]
|
||||
backups Backup[]
|
||||
restores InstanceRestore[]
|
||||
auditLogs AuditLog[]
|
||||
upgrades InstanceUpgrade[]
|
||||
events InstanceEvent[]
|
||||
@ -196,12 +198,44 @@ model Backup {
|
||||
s3Uploaded Boolean @default(false) @map("s3_uploaded")
|
||||
s3Key String? @map("s3_key")
|
||||
|
||||
instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
|
||||
instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
|
||||
restores InstanceRestore[]
|
||||
|
||||
@@index([instanceId, startedAt])
|
||||
@@map("backups")
|
||||
}
|
||||
|
||||
// ─── Restore ───────────────────────────────────────────────
|
||||
|
||||
enum RestoreStatus {
|
||||
PENDING
|
||||
UPLOADING
|
||||
RUNNING
|
||||
COMPLETED
|
||||
FAILED
|
||||
}
|
||||
|
||||
model InstanceRestore {
|
||||
id String @id @default(uuid())
|
||||
instanceId String @map("instance_id")
|
||||
backupId String @map("backup_id")
|
||||
status RestoreStatus @default(PENDING)
|
||||
uploadId String? @map("upload_id")
|
||||
progressJson Json? @map("progress_json")
|
||||
logTail String? @map("log_tail")
|
||||
errorMessage String? @map("error_message")
|
||||
triggeredById String? @map("triggered_by_id")
|
||||
startedAt DateTime @default(now()) @map("started_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
|
||||
instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
|
||||
backup Backup @relation(fields: [backupId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([instanceId, startedAt])
|
||||
@@index([backupId])
|
||||
@@map("instance_restores")
|
||||
}
|
||||
|
||||
// ─── Audit Log ─────────────────────────────────────────────
|
||||
|
||||
enum AuditAction {
|
||||
@ -215,7 +249,9 @@ enum AuditAction {
|
||||
SECRETS_VIEWED
|
||||
BACKUP_CREATE
|
||||
BACKUP_DELETE
|
||||
BACKUP_RESTORE
|
||||
PANGOLIN_SETUP
|
||||
PANGOLIN_TEARDOWN
|
||||
PANGOLIN_SYNC
|
||||
AGENT_CONNECT
|
||||
AGENT_REGISTER
|
||||
|
||||
@ -54,10 +54,11 @@ const envSchema = z.object({
|
||||
USE_REGISTRY_IMAGES: z.enum(['true', 'false']).default('true').transform((v) => v === 'true'),
|
||||
IMAGE_TAG: z.string().default('latest'),
|
||||
|
||||
// Pangolin (optional)
|
||||
// Pangolin (optional — for remote tunnel management)
|
||||
PANGOLIN_API_URL: z.string().default(''),
|
||||
PANGOLIN_API_KEY: z.string().default(''),
|
||||
PANGOLIN_ORG_ID: z.string().default(''),
|
||||
PANGOLIN_ENDPOINT: z.string().default(''), // Newt WebSocket URL (may differ from API URL)
|
||||
|
||||
// Health checks
|
||||
HEALTH_CHECK_INTERVAL_MS: z.coerce.number().default(300_000), // 5 min (0 to disable)
|
||||
|
||||
@ -169,7 +169,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
|
||||
});
|
||||
|
||||
// Issue mTLS certificates
|
||||
const certMaterials = await issueAgentCert(instance.id, registration.slug);
|
||||
const certMaterials = await issueAgentCert(instance.id, registration.slug, registration.agentUrl);
|
||||
|
||||
// Mark invite code as used
|
||||
const invite = await prisma.agentInviteCode.findUnique({ where: { id: registration.inviteCodeId } });
|
||||
@ -189,7 +189,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
|
||||
caCertPem: certMaterials.caCertPem,
|
||||
agentCertPem: certMaterials.agentCertPem,
|
||||
agentKeyPem: certMaterials.agentKeyPem,
|
||||
ccpFingerprint: certMaterials.caFingerprint,
|
||||
ccpFingerprint: certMaterials.fingerprint,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@ -4,11 +4,13 @@ import rateLimit from 'express-rate-limit';
|
||||
import { prisma } from '../../lib/prisma';
|
||||
import { authenticate, requireRole } from '../../middleware/auth';
|
||||
import { validate } from '../../middleware/validate';
|
||||
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema } from './instances.schemas';
|
||||
import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
|
||||
import * as instancesService from './instances.service';
|
||||
import * as healthService from '../../services/health.service';
|
||||
import * as backupService from '../../services/backup.service';
|
||||
import * as restoreService from '../../services/restore.service';
|
||||
import * as upgradeService from '../../services/upgrade.service';
|
||||
import * as tunnelService from '../../services/tunnel.service';
|
||||
import { discoverInstances } from '../../services/discovery.service';
|
||||
|
||||
const secretsLimiter = rateLimit({
|
||||
@ -186,6 +188,18 @@ router.delete(
|
||||
'/:id/tunnel',
|
||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||
async (req: Request, res: Response) => {
|
||||
// Branch: remote instances use the CCP's Pangolin API to teardown;
|
||||
// local instances use the existing manual removal logic.
|
||||
const instance = await prisma.instance.findUnique({ where: { id: req.params.id as string } });
|
||||
if (instance?.isRemote && instance.pangolinSiteId) {
|
||||
const result = await tunnelService.teardownTunnel(
|
||||
req.params.id as string,
|
||||
req.user!.id,
|
||||
req.ip
|
||||
);
|
||||
res.json({ data: result });
|
||||
return;
|
||||
}
|
||||
const result = await instancesService.removeTunnel(
|
||||
req.params.id as string,
|
||||
req.user!.id,
|
||||
@ -195,6 +209,47 @@ router.delete(
|
||||
}
|
||||
);
|
||||
|
||||
// Remote tunnel setup via CCP's Pangolin API credentials
|
||||
router.post(
|
||||
'/:id/tunnel/setup',
|
||||
requireRole('SUPER_ADMIN'),
|
||||
validate(setupRemoteTunnelSchema),
|
||||
async (req: Request, res: Response) => {
|
||||
const { subdomainPrefix } = req.body || {};
|
||||
const result = await tunnelService.setupTunnel(
|
||||
req.params.id as string,
|
||||
{ subdomainPrefix },
|
||||
req.user!.id,
|
||||
req.ip
|
||||
);
|
||||
res.status(201).json({ data: result });
|
||||
}
|
||||
);
|
||||
|
||||
// Get tunnel status (resource matrix) — works for both local and remote
|
||||
router.get(
|
||||
'/:id/tunnel/status',
|
||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||
async (req: Request, res: Response) => {
|
||||
const status = await tunnelService.getTunnelStatus(req.params.id as string);
|
||||
res.json({ data: status });
|
||||
}
|
||||
);
|
||||
|
||||
// Re-sync resources (idempotent — creates missing, leaves existing)
|
||||
router.post(
|
||||
'/:id/tunnel/sync',
|
||||
requireRole('SUPER_ADMIN'),
|
||||
async (req: Request, res: Response) => {
|
||||
const result = await tunnelService.syncResources(
|
||||
req.params.id as string,
|
||||
req.user!.id,
|
||||
req.ip
|
||||
);
|
||||
res.json({ data: result });
|
||||
}
|
||||
);
|
||||
|
||||
// ─── Lifecycle Endpoints ─────────────────────────────────────────────
|
||||
|
||||
router.post(
|
||||
@ -280,6 +335,7 @@ router.post(
|
||||
router.post(
|
||||
'/:id/upgrade',
|
||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||
validate(startUpgradeSchema),
|
||||
async (req: Request, res: Response) => {
|
||||
const { skipBackup, useRegistry, branch } = req.body || {};
|
||||
const upgrade = await upgradeService.startUpgrade(
|
||||
@ -356,4 +412,76 @@ router.get(
|
||||
}
|
||||
);
|
||||
|
||||
// ─── Restores ──────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* POST /:id/restore
|
||||
* Body: { backupId, options? }
|
||||
* Starts a restore of the given backup onto this instance. Returns the
|
||||
* InstanceRestore row immediately; caller polls GET /:id/restores or
|
||||
* GET /:id/restores/:restoreId for status.
|
||||
*
|
||||
* DESTRUCTIVE: overwrites databases and uploads. Requires SUPER_ADMIN.
|
||||
*/
|
||||
router.post(
|
||||
'/:id/restore',
|
||||
requireRole('SUPER_ADMIN'),
|
||||
async (req: Request, res: Response) => {
|
||||
const instanceId = req.params.id as string;
|
||||
const { backupId, options } = req.body ?? {};
|
||||
if (!backupId || typeof backupId !== 'string') {
|
||||
res.status(400).json({ error: { message: 'backupId (string) is required', code: 'VALIDATION' } });
|
||||
return;
|
||||
}
|
||||
|
||||
// Defensive: ensure the backup belongs to this instance
|
||||
const backup = await prisma.backup.findUnique({ where: { id: backupId } });
|
||||
if (!backup) {
|
||||
res.status(404).json({ error: { message: 'Backup not found', code: 'NOT_FOUND' } });
|
||||
return;
|
||||
}
|
||||
if (backup.instanceId !== instanceId) {
|
||||
res.status(400).json({
|
||||
error: {
|
||||
message: 'Backup does not belong to this instance (cross-instance restore is not supported)',
|
||||
code: 'CROSS_INSTANCE_RESTORE',
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const restore = await restoreService.createRestore({
|
||||
backupId,
|
||||
triggeredById: req.user!.id,
|
||||
ipAddress: req.ip,
|
||||
options,
|
||||
});
|
||||
res.status(201).json({ data: restore });
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
'/:id/restores',
|
||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||
async (req: Request, res: Response) => {
|
||||
const page = Math.max(1, parseInt(req.query.page as string, 10) || 1);
|
||||
const limit = Math.min(100, Math.max(1, parseInt(req.query.limit as string, 10) || 50));
|
||||
const result = await restoreService.listRestores(req.params.id as string, page, limit);
|
||||
res.json(result);
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
'/:id/restores/:restoreId',
|
||||
requireRole('SUPER_ADMIN', 'OPERATOR'),
|
||||
async (req: Request, res: Response) => {
|
||||
const restore = await restoreService.getRestore(req.params.restoreId as string);
|
||||
if (restore.instanceId !== req.params.id) {
|
||||
res.status(404).json({ error: { message: 'Restore not found', code: 'NOT_FOUND' } });
|
||||
return;
|
||||
}
|
||||
res.json({ data: restore });
|
||||
}
|
||||
);
|
||||
|
||||
export default router;
|
||||
|
||||
@ -108,9 +108,32 @@ export const importInstancesSchema = z.object({
|
||||
instances: z.array(registerInstanceSchema).min(1).max(50),
|
||||
});
|
||||
|
||||
// SECURITY: branch name is interpolated into a shell command string in the
|
||||
// local `runUpgrade` path (exec, not spawn), so we must enforce the same
|
||||
// strict allow-list the agent uses on its own end. This blocks names starting
|
||||
// with `-` (avoiding flag confusion), shell metachars, and anything exotic.
|
||||
export const startUpgradeSchema = z.object({
|
||||
skipBackup: z.boolean().optional(),
|
||||
useRegistry: z.boolean().optional(),
|
||||
branch: z
|
||||
.string()
|
||||
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/, 'Invalid branch name')
|
||||
.optional(),
|
||||
});
|
||||
|
||||
export const setupRemoteTunnelSchema = z.object({
|
||||
subdomainPrefix: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(50)
|
||||
.regex(/^[a-z0-9-]+$/, 'Prefix must be lowercase alphanumeric with hyphens')
|
||||
.optional(),
|
||||
});
|
||||
|
||||
export type CreateInstanceInput = z.infer<typeof createInstanceSchema>;
|
||||
export type UpdateInstanceInput = z.infer<typeof updateInstanceSchema>;
|
||||
export type RegisterInstanceInput = z.infer<typeof registerInstanceSchema>;
|
||||
export type ReconfigureInstanceInput = z.infer<typeof reconfigureInstanceSchema>;
|
||||
export type ConfigureTunnelInput = z.infer<typeof configureTunnelSchema>;
|
||||
export type ImportInstancesInput = z.infer<typeof importInstancesSchema>;
|
||||
export type StartUpgradeInput = z.infer<typeof startUpgradeSchema>;
|
||||
|
||||
@ -8,6 +8,12 @@ import { env } from './config/env';
|
||||
import { logger } from './utils/logger';
|
||||
import { errorHandler } from './middleware/error-handler';
|
||||
|
||||
// BigInt JSON serialization. Prisma's BigInt columns (e.g. Backup.sizeBytes)
|
||||
// don't have a toJSON method by default, so res.json() throws. Stringify them.
|
||||
(BigInt.prototype as unknown as { toJSON: () => string }).toJSON = function () {
|
||||
return this.toString();
|
||||
};
|
||||
|
||||
// Route imports
|
||||
import authRoutes from './modules/auth/auth.routes';
|
||||
import instanceRoutes from './modules/instances/instances.routes';
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import { Prisma, BackupStatus, AuditAction, InstanceStatus } from '@prisma/client';
|
||||
import fs from 'fs/promises';
|
||||
import { createReadStream } from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
import { execFile as execFileCb } from 'child_process';
|
||||
@ -10,6 +11,7 @@ import { AppError } from '../middleware/error-handler';
|
||||
import { decryptJson } from '../utils/encryption';
|
||||
import * as docker from './docker.service';
|
||||
import { logger } from '../utils/logger';
|
||||
import { getRemoteDriverForInstance } from './execution-driver';
|
||||
const execFile = promisify(execFileCb);
|
||||
|
||||
/**
|
||||
@ -24,11 +26,16 @@ function assertPathWithinBoundary(filePath: string, boundary: string, label: str
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute SHA-256 hash of a file.
|
||||
* Compute SHA-256 hash of a file by streaming its contents.
|
||||
*/
|
||||
async function fileHash(filePath: string): Promise<string> {
|
||||
const fileBuffer = await fs.readFile(filePath);
|
||||
return crypto.createHash('sha256').update(fileBuffer).digest('hex');
|
||||
return new Promise((resolve, reject) => {
|
||||
const hash = crypto.createHash('sha256');
|
||||
const stream = createReadStream(filePath);
|
||||
stream.on('data', (chunk) => hash.update(chunk));
|
||||
stream.on('end', () => resolve(hash.digest('hex')));
|
||||
stream.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@ -52,7 +59,11 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
|
||||
throw new AppError(400, `Cannot backup instance in ${instance.status} state`, 'INVALID_STATE');
|
||||
}
|
||||
|
||||
if ((instance as { isRegistered?: boolean }).isRegistered) {
|
||||
// `isRegistered` + `isRemote` = a remote CCP-managed instance (agent on the
|
||||
// far side). `isRegistered` alone (without `isRemote`) would mean a local
|
||||
// host-managed instance that CCP doesn't own the compose files for — that
|
||||
// case we still can't back up.
|
||||
if (instance.isRegistered && !instance.isRemote) {
|
||||
throw new AppError(400, 'Backups not managed by CCP for registered instances', 'NOT_MANAGED');
|
||||
}
|
||||
|
||||
@ -72,9 +83,31 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
|
||||
return backup;
|
||||
}
|
||||
|
||||
type BackupInstance = {
|
||||
id: string;
|
||||
slug: string;
|
||||
basePath: string;
|
||||
composeProject: string;
|
||||
encryptedSecrets: string | null;
|
||||
isRemote: boolean;
|
||||
agentUrl: string | null;
|
||||
};
|
||||
|
||||
async function performBackup(
|
||||
backupId: string,
|
||||
instance: { id: string; slug: string; basePath: string; composeProject: string; encryptedSecrets: string | null },
|
||||
instance: BackupInstance,
|
||||
userId?: string,
|
||||
ipAddress?: string
|
||||
) {
|
||||
if (instance.isRemote) {
|
||||
return performRemoteBackup(backupId, instance, userId, ipAddress);
|
||||
}
|
||||
return performLocalBackup(backupId, instance, userId, ipAddress);
|
||||
}
|
||||
|
||||
async function performLocalBackup(
|
||||
backupId: string,
|
||||
instance: BackupInstance,
|
||||
userId?: string,
|
||||
ipAddress?: string
|
||||
) {
|
||||
@ -221,6 +254,168 @@ async function performBackup(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a backup on a remote agent and stream the resulting archive to CCP storage.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Tell agent to run scripts/backup.sh → { backupId, sizeBytes, sha256, manifest }
|
||||
* 2. Stream archive from agent → $BACKUP_STORAGE_PATH/{slug}/backup-{slug}-{backupId}.tar.gz
|
||||
* 3. Verify local SHA256 matches what the agent reported (defense in depth)
|
||||
* 4. Tell agent to delete its local copy (reclaim remote disk)
|
||||
* 5. Update Backup row as COMPLETED
|
||||
*
|
||||
* On failure at any step after the remote backup was created, we leave the
|
||||
* agent-side archive in place so the operator can retry the download.
|
||||
*/
|
||||
async function performRemoteBackup(
|
||||
backupId: string,
|
||||
instance: BackupInstance,
|
||||
userId?: string,
|
||||
ipAddress?: string
|
||||
) {
|
||||
let archivePath: string | null = null;
|
||||
let agentBackupId: string | null = null;
|
||||
|
||||
try {
|
||||
await prisma.backup.update({
|
||||
where: { id: backupId },
|
||||
data: { status: BackupStatus.IN_PROGRESS },
|
||||
});
|
||||
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
|
||||
// 1. Trigger the backup on the agent (this blocks until backup.sh completes)
|
||||
logger.info(`[backup] ${instance.slug}: triggering remote backup via agent`);
|
||||
const result = await driver.createBackup();
|
||||
agentBackupId = result.backupId;
|
||||
|
||||
logger.info(
|
||||
`[backup] ${instance.slug}: agent backup complete — ${result.filename} ` +
|
||||
`(${(result.sizeBytes / 1024 / 1024).toFixed(1)} MB, sha256=${result.sha256.substring(0, 16)}...)`
|
||||
);
|
||||
|
||||
// 2. Resolve the destination archive path on CCP storage
|
||||
const archiveName = `backup-${instance.slug}-${result.backupId}.tar.gz`;
|
||||
archivePath = path.join(env.BACKUP_STORAGE_PATH, instance.slug, archiveName);
|
||||
// Path traversal guard (slug should be safe but better to assert)
|
||||
assertPathWithinBoundary(archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
|
||||
await fs.mkdir(path.dirname(archivePath), { recursive: true });
|
||||
|
||||
// 3. Stream the archive from the agent to CCP storage
|
||||
logger.info(`[backup] ${instance.slug}: streaming archive to ${archivePath}`);
|
||||
const { bytesWritten } = await driver.downloadBackup(result.backupId, archivePath);
|
||||
if (bytesWritten !== result.sizeBytes) {
|
||||
throw new Error(
|
||||
`Downloaded size ${bytesWritten} does not match agent-reported size ${result.sizeBytes}`
|
||||
);
|
||||
}
|
||||
|
||||
// 4. Re-hash the downloaded file and compare to the agent-reported hash.
|
||||
//
|
||||
// SECURITY NOTE: this check authenticates *transmission integrity* only,
|
||||
// not content integrity against a malicious agent. Both the file bytes
|
||||
// and the expected hash are supplied by the (semi-trusted) agent, so a
|
||||
// compromised agent can trivially make this check pass while delivering
|
||||
// arbitrary content. The check still catches accidental corruption (bit
|
||||
// flips, truncation) and is essentially free.
|
||||
//
|
||||
// The mTLS channel guarantees that the bytes weren't modified in transit
|
||||
// by an outside attacker. The remaining trust gap — "what if the agent
|
||||
// itself is compromised?" — must be addressed before Phase B (restore)
|
||||
// ships, since restore feeds the archive into pg_restore. Either:
|
||||
// (a) HMAC-sign the hash on the agent with its mTLS private key and
|
||||
// verify on the CCP using the agent cert public key, or
|
||||
// (b) limit restore operations to require an additional out-of-band
|
||||
// admin confirmation step.
|
||||
const localSha256 = await fileHash(archivePath);
|
||||
if (localSha256 !== result.sha256) {
|
||||
throw new Error(
|
||||
`SHA256 mismatch: agent reported ${result.sha256}, local file hashed ${localSha256}`
|
||||
);
|
||||
}
|
||||
|
||||
// 5. Reclaim disk on the remote agent
|
||||
try {
|
||||
await driver.deleteBackup(result.backupId);
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
`[backup] ${instance.slug}: failed to delete remote backup ${result.backupId}: ${(err as Error).message}`
|
||||
);
|
||||
// Non-fatal — CCP has the archive, remote copy will age out next retention sweep
|
||||
}
|
||||
|
||||
// 6. Persist the result. Store sha256 and agentBackupId inside the manifest
|
||||
// since we don't have dedicated columns.
|
||||
const mergedManifest = {
|
||||
...(result.manifest as Record<string, unknown> | null ?? {}),
|
||||
source: 'remote',
|
||||
agentBackupId: result.backupId,
|
||||
sha256: result.sha256,
|
||||
createdAt: result.createdAt,
|
||||
};
|
||||
|
||||
await prisma.backup.update({
|
||||
where: { id: backupId },
|
||||
data: {
|
||||
status: BackupStatus.COMPLETED,
|
||||
archivePath,
|
||||
sizeBytes: BigInt(bytesWritten),
|
||||
manifest: mergedManifest as unknown as Prisma.InputJsonValue,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
if (userId) {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId,
|
||||
instanceId: instance.id,
|
||||
action: AuditAction.BACKUP_CREATE,
|
||||
details: {
|
||||
backupId,
|
||||
archiveName,
|
||||
sizeBytes: bytesWritten,
|
||||
source: 'remote',
|
||||
agentBackupId: result.backupId,
|
||||
},
|
||||
ipAddress,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`[backup] ${instance.slug}: remote backup stored at ${archivePath} ` +
|
||||
`(${(bytesWritten / 1024 / 1024).toFixed(1)} MB)`
|
||||
);
|
||||
} catch (err) {
|
||||
await prisma.backup.update({
|
||||
where: { id: backupId },
|
||||
data: {
|
||||
status: BackupStatus.FAILED,
|
||||
errorMessage: (err as Error).message,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
// Clean up any partial local file; leave the remote copy so retry is possible
|
||||
if (archivePath) {
|
||||
try { await fs.unlink(archivePath); } catch { /* ignore */ }
|
||||
}
|
||||
if (agentBackupId) {
|
||||
logger.warn(
|
||||
`[backup] ${instance.slug}: leaving agent-side backup ${agentBackupId} in place for retry`
|
||||
);
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a backup (file + DB record).
|
||||
*/
|
||||
|
||||
@ -0,0 +1,368 @@
|
||||
/**
|
||||
* Pangolin Integration API client for the CCP.
|
||||
*
|
||||
* Ported from the main CML's pangolin.client.ts. Adapted to:
|
||||
* - Accept credentials via constructor (not env singleton)
|
||||
* - Be instantiable per-call so the CCP can use its own API token
|
||||
* to manage tunnels for multiple remote instances
|
||||
*
|
||||
* The CCP never exposes its Pangolin API key to remote instances — it
|
||||
* only pushes the resulting Newt credentials via the agent's writeFiles.
|
||||
*/
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
// ─── Types ─────────────────────────────────────────────────────────
|
||||
|
||||
export interface PangolinSite {
|
||||
siteId: string;
|
||||
name: string;
|
||||
orgId: string;
|
||||
niceId: string;
|
||||
pubKey?: string;
|
||||
subnet?: string;
|
||||
megabytesIn?: number;
|
||||
megabytesOut?: number;
|
||||
lastSeen?: string;
|
||||
online?: boolean;
|
||||
type?: string;
|
||||
address?: string;
|
||||
}
|
||||
|
||||
export interface PangolinExitNode {
|
||||
exitNodeId: string;
|
||||
name: string;
|
||||
location?: string;
|
||||
region?: string;
|
||||
online: boolean;
|
||||
capacity?: number;
|
||||
latency?: number;
|
||||
}
|
||||
|
||||
export interface PangolinResource {
|
||||
resourceId: string;
|
||||
name: string;
|
||||
subdomain?: string;
|
||||
fullDomain?: string;
|
||||
ssl?: boolean;
|
||||
blockAccess?: boolean;
|
||||
active?: boolean;
|
||||
proxyPort?: number;
|
||||
protocol?: string;
|
||||
domainBindings?: string[];
|
||||
http?: boolean;
|
||||
targets?: PangolinTarget[];
|
||||
}
|
||||
|
||||
export interface PangolinTarget {
|
||||
targetId: string;
|
||||
resourceId: string;
|
||||
siteId: string;
|
||||
ip: string;
|
||||
port: number;
|
||||
method: string;
|
||||
enabled?: boolean;
|
||||
}
|
||||
|
||||
export interface PangolinNewt {
|
||||
newtId: string;
|
||||
secret: string;
|
||||
siteId: string;
|
||||
}
|
||||
|
||||
export interface PangolinSiteDefaults {
|
||||
newtId: string;
|
||||
newtSecret: string;
|
||||
address: string;
|
||||
}
|
||||
|
||||
export interface CreateSitePayload {
|
||||
name: string;
|
||||
type?: string;
|
||||
subnet?: string;
|
||||
exitNodeId?: string;
|
||||
newtId?: string;
|
||||
secret?: string;
|
||||
address?: string;
|
||||
}
|
||||
|
||||
export interface CreateHttpResourcePayload {
|
||||
name: string;
|
||||
domainId: string;
|
||||
subdomain?: string;
|
||||
http: true;
|
||||
protocol: 'tcp';
|
||||
}
|
||||
|
||||
export interface CreateTargetPayload {
|
||||
siteId: string | number;
|
||||
ip: string;
|
||||
port: number;
|
||||
method: 'http' | 'https';
|
||||
enabled?: boolean;
|
||||
}
|
||||
|
||||
export interface PangolinDomain {
|
||||
domainId: string;
|
||||
baseDomain: string;
|
||||
verified: boolean;
|
||||
type?: string;
|
||||
failed?: boolean;
|
||||
configManaged?: boolean;
|
||||
}
|
||||
|
||||
export interface UpdateResourcePayload {
|
||||
name?: string;
|
||||
subdomain?: string;
|
||||
fullDomain?: string;
|
||||
ssl?: boolean;
|
||||
sso?: boolean;
|
||||
active?: boolean;
|
||||
blockAccess?: boolean;
|
||||
proxyPort?: number;
|
||||
protocol?: string;
|
||||
domainBindings?: string[];
|
||||
}
|
||||
|
||||
export interface UpdateCertificatePayload {
|
||||
autoRenew?: boolean;
|
||||
}
|
||||
|
||||
export interface PangolinCertificate {
|
||||
certId: string;
|
||||
domainId: string;
|
||||
domain: string;
|
||||
status: 'PENDING' | 'ACTIVE' | 'EXPIRED' | 'FAILED';
|
||||
issuedAt?: string;
|
||||
expiresAt?: string;
|
||||
autoRenew?: boolean;
|
||||
issuer?: string;
|
||||
}
|
||||
|
||||
export interface PangolinConnectedClient {
|
||||
clientId: string;
|
||||
resourceId: string;
|
||||
ipAddress: string;
|
||||
connectedAt: string;
|
||||
lastSeen: string;
|
||||
bytesIn: number;
|
||||
bytesOut: number;
|
||||
online: boolean;
|
||||
}
|
||||
|
||||
// ─── Helpers ───────────────────────────────────────────────────────
|
||||
|
||||
/** Redact credential fields from Pangolin API request bodies before logging. */
|
||||
function redactSecrets(body: unknown): unknown {
|
||||
if (!body || typeof body !== 'object') return body;
|
||||
const obj = body as Record<string, unknown>;
|
||||
const redacted = { ...obj };
|
||||
if ('secret' in redacted) redacted.secret = '[REDACTED]';
|
||||
if ('newtSecret' in redacted) redacted.newtSecret = '[REDACTED]';
|
||||
return redacted;
|
||||
}
|
||||
|
||||
// ─── Client ────────────────────────────────────────────────────────
|
||||
|
||||
export class CcpPangolinClient {
|
||||
constructor(
|
||||
private baseUrl: string,
|
||||
private apiKey: string,
|
||||
private orgId: string
|
||||
) {}
|
||||
|
||||
get configured(): boolean {
|
||||
return !!(this.baseUrl && this.apiKey && this.orgId);
|
||||
}
|
||||
|
||||
private async request<T>(method: string, path: string, body?: unknown): Promise<T> {
|
||||
if (!this.configured) {
|
||||
throw new Error('Pangolin API not configured. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in CCP .env');
|
||||
}
|
||||
|
||||
const url = `${this.baseUrl}${path}`;
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 15000);
|
||||
|
||||
try {
|
||||
logger.debug(`[pangolin] ${method} ${path}${body ? ` body=${JSON.stringify(redactSecrets(body))}` : ''}`);
|
||||
|
||||
const res = await fetch(url, {
|
||||
method,
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: body ? JSON.stringify(body) : undefined,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
throw new Error(`Pangolin API ${method} ${path} returned ${res.status}: ${text}`);
|
||||
}
|
||||
|
||||
const contentType = res.headers.get('content-type') || '';
|
||||
if (contentType.includes('application/json')) {
|
||||
const json = await res.json();
|
||||
return this.unwrapResponse<T>(json);
|
||||
}
|
||||
return {} as T;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
private unwrapResponse<T>(json: unknown): T {
|
||||
if (json && typeof json === 'object' && !Array.isArray(json)) {
|
||||
const obj = json as Record<string, unknown>;
|
||||
if ('data' in obj && 'success' in obj) {
|
||||
return obj.data as T;
|
||||
}
|
||||
}
|
||||
return json as T;
|
||||
}
|
||||
|
||||
// ─── Health ───────────────────────────────────────────────────
|
||||
|
||||
async healthCheck(): Promise<boolean> {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 5000);
|
||||
try {
|
||||
const res = await fetch(`${this.baseUrl}/`, {
|
||||
headers: { 'Authorization': `Bearer ${this.apiKey}` },
|
||||
signal: controller.signal,
|
||||
});
|
||||
return res.ok;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Site Defaults ────────────────────────────────────────────
|
||||
|
||||
async pickSiteDefaults(): Promise<PangolinSiteDefaults> {
|
||||
const res = await this.request<unknown>('GET', `/org/${this.orgId}/pick-site-defaults`);
|
||||
const obj = res as Record<string, unknown>;
|
||||
const newtId = obj.newtId as string || '';
|
||||
const newtSecret = obj.newtSecret as string || obj.secret as string || '';
|
||||
const address = obj.clientAddress as string || obj.address as string || '';
|
||||
if (!newtId || !newtSecret) {
|
||||
throw new Error('Pangolin did not return Newt credentials from pick-site-defaults');
|
||||
}
|
||||
return { newtId, newtSecret, address };
|
||||
}
|
||||
|
||||
// ─── Sites ────────────────────────────────────────────────────
|
||||
|
||||
async listSites(): Promise<PangolinSite[]> {
|
||||
const res = await this.request<unknown>('GET', `/org/${this.orgId}/sites`);
|
||||
return this.extractArray(res, 'sites', 'listSites');
|
||||
}
|
||||
|
||||
async getSite(siteId: string): Promise<PangolinSite> {
|
||||
return this.request<PangolinSite>('GET', `/site/${siteId}`);
|
||||
}
|
||||
|
||||
async createSite(data: CreateSitePayload): Promise<PangolinSite & { newt?: PangolinNewt }> {
|
||||
return this.request<PangolinSite & { newt?: PangolinNewt }>('PUT', `/org/${this.orgId}/site`, data);
|
||||
}
|
||||
|
||||
async deleteSite(siteId: string): Promise<void> {
|
||||
await this.request<void>('DELETE', `/site/${siteId}`);
|
||||
}
|
||||
|
||||
async listExitNodes(): Promise<PangolinExitNode[]> {
|
||||
try {
|
||||
const res = await this.request<unknown>('GET', `/org/${this.orgId}/exit-nodes`);
|
||||
return this.extractArray(res, 'exitNodes', 'listExitNodes');
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Resources ────────────────────────────────────────────────
|
||||
|
||||
async listResources(): Promise<PangolinResource[]> {
|
||||
const res = await this.request<unknown>('GET', `/org/${this.orgId}/resources`);
|
||||
return this.extractArray(res, 'resources', 'listResources');
|
||||
}
|
||||
|
||||
async getResource(resourceId: string): Promise<PangolinResource> {
|
||||
return this.request<PangolinResource>('GET', `/resource/${resourceId}`);
|
||||
}
|
||||
|
||||
async createResource(data: CreateHttpResourcePayload): Promise<PangolinResource> {
|
||||
logger.info(`[pangolin] createResource: ${data.name} (subdomain: ${data.subdomain || '(root)'})`);
|
||||
return this.request<PangolinResource>('PUT', `/org/${this.orgId}/resource`, data);
|
||||
}
|
||||
|
||||
async updateResource(resourceId: string, data: UpdateResourcePayload): Promise<PangolinResource> {
|
||||
return this.request<PangolinResource>('POST', `/resource/${resourceId}`, data);
|
||||
}
|
||||
|
||||
async deleteResource(resourceId: string): Promise<void> {
|
||||
await this.request<void>('DELETE', `/resource/${resourceId}`);
|
||||
}
|
||||
|
||||
// ─── Targets ──────────────────────────────────────────────────
|
||||
|
||||
async createTarget(resourceId: string, data: CreateTargetPayload): Promise<PangolinTarget> {
|
||||
logger.info(`[pangolin] createTarget: resource=${resourceId}, ip=${data.ip}:${data.port}`);
|
||||
const payload = { ...data, siteId: Number(data.siteId) };
|
||||
return this.request<PangolinTarget>('PUT', `/resource/${resourceId}/target`, payload);
|
||||
}
|
||||
|
||||
async listTargets(resourceId: string): Promise<PangolinTarget[]> {
|
||||
const res = await this.request<unknown>('GET', `/resource/${resourceId}/targets`);
|
||||
return this.extractArray(res, 'targets', 'listTargets');
|
||||
}
|
||||
|
||||
async deleteTarget(targetId: string): Promise<void> {
|
||||
await this.request<void>('DELETE', `/target/${targetId}`);
|
||||
}
|
||||
|
||||
// ─── Domains ──────────────────────────────────────────────────
|
||||
|
||||
async listDomains(): Promise<PangolinDomain[]> {
|
||||
const res = await this.request<unknown>('GET', `/org/${this.orgId}/domains`);
|
||||
return this.extractArray(res, 'domains', 'listDomains');
|
||||
}
|
||||
|
||||
// ─── Certificates ─────────────────────────────────────────────
|
||||
|
||||
async getCertificate(domainId: string, domain: string): Promise<PangolinCertificate> {
|
||||
return this.request<PangolinCertificate>('GET', `/org/${this.orgId}/certificate/${domainId}/${domain}`);
|
||||
}
|
||||
|
||||
async updateCertificate(certId: string, data: UpdateCertificatePayload): Promise<PangolinCertificate> {
|
||||
return this.request<PangolinCertificate>('POST', `/certificate/${certId}`, data);
|
||||
}
|
||||
|
||||
// ─── Clients ──────────────────────────────────────────────────
|
||||
|
||||
async listClients(resourceId: string): Promise<PangolinConnectedClient[]> {
|
||||
const res = await this.request<unknown>('GET', `/resource/${resourceId}/clients`);
|
||||
return this.extractArray(res, 'clients', 'listClients');
|
||||
}
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────
|
||||
|
||||
private extractArray<T>(res: unknown, key: string, context: string): T[] {
|
||||
if (Array.isArray(res)) return res as T[];
|
||||
if (res && typeof res === 'object') {
|
||||
const obj = res as Record<string, unknown>;
|
||||
if (Array.isArray(obj[key])) return obj[key] as T[];
|
||||
if (obj.data && typeof obj.data === 'object') {
|
||||
const dataObj = obj.data as Record<string, unknown>;
|
||||
if (Array.isArray(dataObj[key])) return dataObj[key] as T[];
|
||||
}
|
||||
if (Array.isArray(obj.data)) return obj.data as T[];
|
||||
}
|
||||
logger.warn(`[pangolin] ${context}: could not extract array from response`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
@ -90,7 +90,7 @@ export async function ensureCA() {
|
||||
* Issue a certificate for a remote agent, signed by the CA.
|
||||
* Returns the certificate materials (plaintext) for one-time display.
|
||||
*/
|
||||
export async function issueAgentCert(instanceId: string, slug: string) {
|
||||
export async function issueAgentCert(instanceId: string, slug: string, agentUrl?: string) {
|
||||
const ca = await ensureCA();
|
||||
const caKeyPem = decrypt(ca.encryptedKey);
|
||||
|
||||
@ -110,12 +110,29 @@ export async function issueAgentCert(instanceId: string, slug: string) {
|
||||
await fs.writeFile(caCertFile, ca.certPem);
|
||||
await fs.writeFile(serialFile, crypto.randomBytes(16).toString('hex'));
|
||||
|
||||
// Extensions for server+client auth
|
||||
await fs.writeFile(extFile, [
|
||||
// Build SAN entries from the agent URL hostname
|
||||
const sanEntries: string[] = [];
|
||||
if (agentUrl) {
|
||||
try {
|
||||
const hostname = new URL(agentUrl).hostname;
|
||||
// Detect IP vs DNS name
|
||||
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname) || hostname.includes(':')) {
|
||||
sanEntries.push(`IP:${hostname}`);
|
||||
} else {
|
||||
sanEntries.push(`DNS:${hostname}`);
|
||||
}
|
||||
} catch { /* ignore invalid URL */ }
|
||||
}
|
||||
sanEntries.push(`DNS:${commonName}`);
|
||||
|
||||
// Extensions for server+client auth with SANs
|
||||
const extLines = [
|
||||
'basicConstraints=CA:FALSE',
|
||||
'keyUsage=digitalSignature,keyEncipherment',
|
||||
'extendedKeyUsage=serverAuth,clientAuth',
|
||||
].join('\n'));
|
||||
`subjectAltName=${sanEntries.join(',')}`,
|
||||
];
|
||||
await fs.writeFile(extFile, extLines.join('\n'));
|
||||
|
||||
// Generate agent key
|
||||
await exec(
|
||||
|
||||
@ -60,7 +60,20 @@ export async function getDriverForInstance(instance: DriverInstance): Promise<Ex
|
||||
const { getLocalDriver } = await import('./local-driver');
|
||||
return getLocalDriver();
|
||||
}
|
||||
return getRemoteDriverForInstance(instance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a RemoteDriver for a remote instance. Throws if the instance is
|
||||
* local, missing an agent URL, or has no valid mTLS certificate.
|
||||
*
|
||||
* Use this when you need to call RemoteDriver-specific methods like
|
||||
* createBackup() that don't exist on the ExecutionDriver interface.
|
||||
*/
|
||||
export async function getRemoteDriverForInstance(instance: DriverInstance) {
|
||||
if (!instance.isRemote) {
|
||||
throw new Error(`Instance ${instance.slug} is not remote`);
|
||||
}
|
||||
if (!instance.agentUrl) {
|
||||
throw new Error(`Remote instance ${instance.slug} has no agent URL configured`);
|
||||
}
|
||||
|
||||
@ -1,10 +1,87 @@
|
||||
import https from 'https';
|
||||
import fs from 'fs';
|
||||
import { pipeline } from 'stream/promises';
|
||||
import { env } from '../config/env';
|
||||
import type { ExecutionDriver } from './execution-driver';
|
||||
import { AgentUnreachableError } from './execution-driver';
|
||||
import type { ContainerInfo } from './docker.service';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
export interface AgentBackupResult {
|
||||
backupId: string;
|
||||
filename: string;
|
||||
sizeBytes: number;
|
||||
sha256: string;
|
||||
manifest: unknown | null;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface AgentBackupListEntry {
|
||||
backupId: string;
|
||||
filename: string;
|
||||
sizeBytes: number;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface AgentRestoreUploadResult {
|
||||
uploadId: string;
|
||||
sizeBytes: number;
|
||||
sha256: string;
|
||||
}
|
||||
|
||||
export interface AgentRestoreOptions {
|
||||
skipDb?: boolean;
|
||||
skipUploads?: boolean;
|
||||
skipListmonk?: boolean;
|
||||
dryRun?: boolean;
|
||||
}
|
||||
|
||||
export interface AgentRestoreState {
|
||||
status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
|
||||
uploadId: string;
|
||||
startedAt: string;
|
||||
completedAt?: string;
|
||||
exitCode?: number;
|
||||
logTail?: string;
|
||||
errorMessage?: string;
|
||||
options?: AgentRestoreOptions;
|
||||
}
|
||||
|
||||
export interface AgentUpdateStatus {
|
||||
branch: string;
|
||||
currentCommit: string;
|
||||
currentMessage?: string;
|
||||
remoteCommit: string | null;
|
||||
commitsBehind: number;
|
||||
changelog: Array<{ hash: string; message: string; date: string; author: string }>;
|
||||
checkedAt: string;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
export interface AgentUpgradeProgress {
|
||||
phase?: number;
|
||||
phaseName?: string;
|
||||
percentage?: number;
|
||||
message?: string;
|
||||
timestamp?: string;
|
||||
}
|
||||
|
||||
export interface AgentUpgradeResult {
|
||||
success: boolean;
|
||||
message?: string;
|
||||
previousCommit?: string;
|
||||
newCommit?: string;
|
||||
commitCount?: number;
|
||||
durationSeconds?: number;
|
||||
warnings?: string[];
|
||||
}
|
||||
|
||||
export interface StartAgentUpgradeOptions {
|
||||
skipBackup?: boolean;
|
||||
useRegistry?: boolean;
|
||||
branch?: string;
|
||||
}
|
||||
|
||||
interface AgentRequestOptions {
|
||||
method: 'GET' | 'POST' | 'DELETE';
|
||||
path: string;
|
||||
@ -261,4 +338,261 @@ export class RemoteDriver implements ExecutionDriver {
|
||||
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Backup Operations ──────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Trigger a backup on the remote agent. The agent shells out to scripts/backup.sh
|
||||
* and returns metadata for the resulting archive. The archive stays on the
|
||||
* agent's disk until downloadBackup() + deleteBackup() are called.
|
||||
*/
|
||||
async createBackup(): Promise<AgentBackupResult> {
|
||||
return this.request<AgentBackupResult>({
|
||||
method: 'POST',
|
||||
path: `/instance/${this.slug}/backup`,
|
||||
timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* List backup archives currently held on the agent for this slug.
|
||||
*/
|
||||
async listAgentBackups(): Promise<AgentBackupListEntry[]> {
|
||||
const resp = await this.request<{ data: AgentBackupListEntry[] }>({
|
||||
method: 'GET',
|
||||
path: `/instance/${this.slug}/backups`,
|
||||
});
|
||||
return resp.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete an archive from the agent's disk. Called after a successful download.
|
||||
*/
|
||||
async deleteBackup(backupId: string): Promise<void> {
|
||||
await this.request({
|
||||
method: 'DELETE',
|
||||
path: `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}`,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a backup archive from the agent to a local file path.
|
||||
* Verifies the Content-Length header matches the bytes written.
|
||||
*/
|
||||
async downloadBackup(backupId: string, destPath: string): Promise<{ bytesWritten: number }> {
|
||||
const url = new URL(
|
||||
`/instance/${this.slug}/backup/${encodeURIComponent(backupId)}/download`,
|
||||
this.agentUrl
|
||||
);
|
||||
const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = https.request(
|
||||
{
|
||||
hostname: url.hostname,
|
||||
port: url.port || 7443,
|
||||
path: url.pathname + url.search,
|
||||
method: 'GET',
|
||||
headers: { Accept: 'application/gzip' },
|
||||
cert: this.clientCert,
|
||||
key: this.clientKey,
|
||||
ca: this.caCert,
|
||||
rejectUnauthorized: true,
|
||||
timeout: timeoutMs,
|
||||
},
|
||||
async (res) => {
|
||||
if (res.statusCode && res.statusCode >= 400) {
|
||||
let body = '';
|
||||
res.on('data', (c) => (body += c));
|
||||
res.on('end', () => reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`)));
|
||||
return;
|
||||
}
|
||||
|
||||
const expectedSize = res.headers['content-length']
|
||||
? parseInt(res.headers['content-length'] as string, 10)
|
||||
: null;
|
||||
|
||||
try {
|
||||
const out = fs.createWriteStream(destPath);
|
||||
await pipeline(res, out);
|
||||
const stats = await fs.promises.stat(destPath);
|
||||
if (expectedSize !== null && stats.size !== expectedSize) {
|
||||
reject(new Error(`Downloaded size ${stats.size} does not match Content-Length ${expectedSize}`));
|
||||
return;
|
||||
}
|
||||
resolve({ bytesWritten: stats.size });
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
req.on('error', (err) => {
|
||||
reject(new AgentUnreachableError(this.agentUrl, err));
|
||||
});
|
||||
req.on('timeout', () => {
|
||||
req.destroy();
|
||||
reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
|
||||
});
|
||||
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Restore Operations ─────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Stream a backup archive from a local path to the agent's upload endpoint.
|
||||
* The expected SHA256 is passed as a query parameter and the agent verifies
|
||||
* it during ingestion — if it mismatches, the upload is rejected with 400.
|
||||
*/
|
||||
async uploadRestore(
|
||||
archivePath: string,
|
||||
expectedSha256: string
|
||||
): Promise<AgentRestoreUploadResult> {
|
||||
const stats = await fs.promises.stat(archivePath);
|
||||
const url = new URL(
|
||||
`/instance/${this.slug}/restore/upload?sha256=${encodeURIComponent(expectedSha256)}`,
|
||||
this.agentUrl
|
||||
);
|
||||
const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = https.request(
|
||||
{
|
||||
hostname: url.hostname,
|
||||
port: url.port || 7443,
|
||||
path: url.pathname + url.search,
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/octet-stream',
|
||||
'Content-Length': String(stats.size),
|
||||
},
|
||||
cert: this.clientCert,
|
||||
key: this.clientKey,
|
||||
ca: this.caCert,
|
||||
rejectUnauthorized: true,
|
||||
timeout: timeoutMs,
|
||||
},
|
||||
(res) => {
|
||||
let body = '';
|
||||
res.on('data', (c) => (body += c));
|
||||
res.on('end', () => {
|
||||
if (res.statusCode && res.statusCode >= 400) {
|
||||
try {
|
||||
const err = JSON.parse(body);
|
||||
reject(new Error(err.message || `Agent returned ${res.statusCode}`));
|
||||
} catch {
|
||||
reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`));
|
||||
}
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(JSON.parse(body) as AgentRestoreUploadResult);
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
req.on('error', (err) => {
|
||||
reject(new AgentUnreachableError(this.agentUrl, err));
|
||||
});
|
||||
req.on('timeout', () => {
|
||||
req.destroy();
|
||||
reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
|
||||
});
|
||||
|
||||
const fileStream = fs.createReadStream(archivePath);
|
||||
fileStream.on('error', (err) => {
|
||||
req.destroy();
|
||||
reject(err);
|
||||
});
|
||||
fileStream.pipe(req);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell the agent to apply a previously-uploaded restore archive. The agent
|
||||
* fires `scripts/restore.sh` in the background and returns immediately.
|
||||
* Use `getRestoreProgress()` to poll for completion.
|
||||
*/
|
||||
async applyRestore(uploadId: string, options: AgentRestoreOptions = {}): Promise<void> {
|
||||
await this.request({
|
||||
method: 'POST',
|
||||
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/apply`,
|
||||
body: { confirm: true, ...options },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll the agent for the current state of a restore.
|
||||
*/
|
||||
async getRestoreProgress(uploadId: string): Promise<AgentRestoreState> {
|
||||
return this.request<AgentRestoreState>({
|
||||
method: 'GET',
|
||||
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/progress`,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a restore upload dir from the agent's disk. Called after the CCP
|
||||
* has finalized the InstanceRestore row.
|
||||
*/
|
||||
async deleteRestoreUpload(uploadId: string): Promise<void> {
|
||||
await this.request({
|
||||
method: 'DELETE',
|
||||
path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}`,
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Upgrade Operations ─────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run upgrade-check.sh on the remote and return the parsed status.json.
|
||||
*/
|
||||
async checkForUpdates(): Promise<AgentUpdateStatus> {
|
||||
return this.request<AgentUpdateStatus>({
|
||||
method: 'POST',
|
||||
path: `/instance/${this.slug}/upgrade/check`,
|
||||
timeoutMs: 90_000,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Trigger upgrade.sh --api-mode on the remote. Fire-and-forget; agent
|
||||
* spawns the script in the background and returns 202 immediately.
|
||||
* Use getUpgradeProgress / getUpgradeResult to track completion.
|
||||
*/
|
||||
async startUpgrade(options: StartAgentUpgradeOptions = {}): Promise<void> {
|
||||
await this.request({
|
||||
method: 'POST',
|
||||
path: `/instance/${this.slug}/upgrade/start`,
|
||||
body: options,
|
||||
timeoutMs: 30_000,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the agent's data/upgrade/progress.json. Returns the default zero-state
|
||||
* if no progress has been written yet.
|
||||
*/
|
||||
async getUpgradeProgress(): Promise<AgentUpgradeProgress> {
|
||||
return this.request<AgentUpgradeProgress>({
|
||||
method: 'GET',
|
||||
path: `/instance/${this.slug}/upgrade/progress`,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the agent's data/upgrade/result.json. Throws if no result is yet
|
||||
* available; the caller should treat that as "still running".
|
||||
*/
|
||||
async getUpgradeResult(): Promise<AgentUpgradeResult> {
|
||||
return this.request<AgentUpgradeResult>({
|
||||
method: 'GET',
|
||||
path: `/instance/${this.slug}/upgrade/result`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
376
changemaker-control-panel/api/src/services/restore.service.ts
Normal file
376
changemaker-control-panel/api/src/services/restore.service.ts
Normal file
@ -0,0 +1,376 @@
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
import { createReadStream } from 'fs';
|
||||
import { Prisma, RestoreStatus, AuditAction, InstanceStatus } from '@prisma/client';
|
||||
import { prisma } from '../lib/prisma';
|
||||
import { env } from '../config/env';
|
||||
import { AppError } from '../middleware/error-handler';
|
||||
import { logger } from '../utils/logger';
|
||||
import { getRemoteDriverForInstance } from './execution-driver';
|
||||
import type { AgentRestoreOptions, AgentRestoreState } from './remote-driver';
|
||||
|
||||
/**
|
||||
* Validate that a path is within the allowed backup storage boundary.
|
||||
*/
|
||||
function assertPathWithinBoundary(filePath: string, boundary: string, label: string): void {
|
||||
const normalized = path.resolve(filePath);
|
||||
const normalizedBoundary = path.resolve(boundary);
|
||||
if (!normalized.startsWith(normalizedBoundary + path.sep)) {
|
||||
throw new AppError(403, `${label} path outside allowed directory`, 'FORBIDDEN');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute SHA-256 hash of a file by streaming its contents.
|
||||
*/
|
||||
async function fileHash(filePath: string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const hash = crypto.createHash('sha256');
|
||||
const stream = createReadStream(filePath);
|
||||
stream.on('data', (chunk) => hash.update(chunk));
|
||||
stream.on('end', () => resolve(hash.digest('hex')));
|
||||
stream.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
const POLL_INTERVAL_MS = 3_000;
|
||||
const POLL_TIMEOUT_MS = 15 * 60 * 1_000; // 15 min
|
||||
|
||||
interface StartRestoreArgs {
|
||||
backupId: string;
|
||||
triggeredById?: string;
|
||||
ipAddress?: string | null;
|
||||
options?: AgentRestoreOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Kick off a restore for the given backup. Creates an InstanceRestore row
|
||||
* and runs the full upload → apply → poll loop asynchronously. Returns the
|
||||
* row so the caller (HTTP handler) can respond immediately.
|
||||
*/
|
||||
export async function createRestore(args: StartRestoreArgs) {
|
||||
const backup = await prisma.backup.findUnique({
|
||||
where: { id: args.backupId },
|
||||
include: { instance: true },
|
||||
});
|
||||
if (!backup) {
|
||||
throw new AppError(404, 'Backup not found', 'NOT_FOUND');
|
||||
}
|
||||
if (backup.status !== 'COMPLETED') {
|
||||
throw new AppError(400, `Backup is ${backup.status}, not COMPLETED`, 'INVALID_STATE');
|
||||
}
|
||||
if (!backup.archivePath) {
|
||||
throw new AppError(400, 'Backup has no archive path', 'NO_ARCHIVE');
|
||||
}
|
||||
|
||||
const instance = backup.instance;
|
||||
if (instance.status !== InstanceStatus.RUNNING) {
|
||||
throw new AppError(400, `Cannot restore to instance in ${instance.status} state`, 'INVALID_STATE');
|
||||
}
|
||||
// Phase B only supports remote restore. Local restore is deliberately stubbed
|
||||
// — if you need it, add a performLocalRestore branch below. This also covers
|
||||
// the registered-but-local case (CCP-adopted instances) since they have
|
||||
// isRemote=false.
|
||||
if (!instance.isRemote) {
|
||||
throw new AppError(501, 'Local restore is not implemented — Phase B covers remote only', 'NOT_IMPLEMENTED');
|
||||
}
|
||||
|
||||
// Make sure the archive is where it says it is and inside the boundary
|
||||
assertPathWithinBoundary(backup.archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
|
||||
try {
|
||||
await fs.access(backup.archivePath);
|
||||
} catch {
|
||||
throw new AppError(404, 'Archive file is missing on disk', 'ARCHIVE_MISSING');
|
||||
}
|
||||
|
||||
const restore = await prisma.instanceRestore.create({
|
||||
data: {
|
||||
instanceId: instance.id,
|
||||
backupId: backup.id,
|
||||
status: RestoreStatus.PENDING,
|
||||
triggeredById: args.triggeredById ?? null,
|
||||
},
|
||||
});
|
||||
|
||||
// Fire-and-forget orchestration
|
||||
performRemoteRestore(restore.id, backup.archivePath, args.options ?? {}, args.triggeredById, args.ipAddress ?? null)
|
||||
.catch((err) => {
|
||||
logger.error(`[restore] ${restore.id} failed: ${(err as Error).message}`);
|
||||
});
|
||||
|
||||
return restore;
|
||||
}
|
||||
|
||||
/**
|
||||
* End-to-end remote restore orchestration.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Compute sha256 of the archive on CCP disk
|
||||
* 2. Upload to agent with sha256 query param (agent re-verifies on stream)
|
||||
* 3. Apply via agent (shells out to restore.sh --force)
|
||||
* 4. Poll progress every 3s until COMPLETED/FAILED or timeout
|
||||
* 5. Delete the agent-side upload
|
||||
* 6. Update the InstanceRestore row + audit log
|
||||
*/
|
||||
/**
|
||||
* Write a BACKUP_RESTORE audit log entry. Wrapped in a try/catch so that an
|
||||
* audit-log DB failure can never mask the underlying restore status update.
|
||||
*
|
||||
* Called in all three terminal paths:
|
||||
* - success (outcome: 'success')
|
||||
* - agent reported failure (outcome: 'agent_failed')
|
||||
* - orchestration error / timeout / unexpected throw (outcome: 'orchestration_error')
|
||||
*/
|
||||
async function writeRestoreAuditLog(args: {
|
||||
restoreId: string;
|
||||
instanceId: string;
|
||||
backupId: string;
|
||||
triggeredById?: string;
|
||||
ipAddress?: string | null;
|
||||
options: AgentRestoreOptions;
|
||||
outcome: 'success' | 'agent_failed' | 'orchestration_error';
|
||||
sha256?: string;
|
||||
uploadId?: string | null;
|
||||
errorMessage?: string;
|
||||
}): Promise<void> {
|
||||
if (!args.triggeredById) return;
|
||||
try {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId: args.triggeredById,
|
||||
instanceId: args.instanceId,
|
||||
action: AuditAction.BACKUP_RESTORE,
|
||||
details: {
|
||||
backupId: args.backupId,
|
||||
restoreId: args.restoreId,
|
||||
source: 'remote',
|
||||
outcome: args.outcome,
|
||||
options: args.options as unknown as Prisma.InputJsonValue,
|
||||
...(args.sha256 ? { sha256: args.sha256 } : {}),
|
||||
...(args.uploadId ? { agentUploadId: args.uploadId } : {}),
|
||||
...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
|
||||
},
|
||||
ipAddress: args.ipAddress ?? null,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error(`[restore] failed to write audit log for ${args.restoreId}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function performRemoteRestore(
|
||||
restoreId: string,
|
||||
archivePath: string,
|
||||
options: AgentRestoreOptions,
|
||||
triggeredById?: string,
|
||||
ipAddress?: string | null
|
||||
) {
|
||||
const restore = await prisma.instanceRestore.findUnique({
|
||||
where: { id: restoreId },
|
||||
include: { instance: true, backup: true },
|
||||
});
|
||||
if (!restore) {
|
||||
logger.error(`[restore] row ${restoreId} vanished mid-flight`);
|
||||
return;
|
||||
}
|
||||
const instance = restore.instance;
|
||||
|
||||
let uploadId: string | null = null;
|
||||
let sha256: string | undefined;
|
||||
try {
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: { status: RestoreStatus.UPLOADING },
|
||||
});
|
||||
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
|
||||
// 1. Compute local SHA256 (authoritative — the agent will verify against this).
|
||||
// We persist this in the audit log so there's an immutable record of exactly
|
||||
// which bytes were restored, useful for post-incident comparison.
|
||||
logger.info(`[restore] ${instance.slug}: hashing archive ${path.basename(archivePath)}`);
|
||||
sha256 = await fileHash(archivePath);
|
||||
|
||||
// 2. Stream upload to agent
|
||||
logger.info(`[restore] ${instance.slug}: uploading archive (sha256=${sha256.substring(0, 16)}...)`);
|
||||
const uploadResult = await driver.uploadRestore(archivePath, sha256);
|
||||
uploadId = uploadResult.uploadId;
|
||||
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: { uploadId, status: RestoreStatus.RUNNING },
|
||||
});
|
||||
|
||||
// 3. Apply
|
||||
logger.info(`[restore] ${instance.slug}: applying restore ${uploadId}`);
|
||||
await driver.applyRestore(uploadId, options);
|
||||
|
||||
// 4. Poll progress
|
||||
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
||||
let finalState: AgentRestoreState | null = null;
|
||||
while (Date.now() < deadline) {
|
||||
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
||||
try {
|
||||
const state = await driver.getRestoreProgress(uploadId);
|
||||
// Mirror progress to the DB row so the UI shows updates
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: {
|
||||
progressJson: state as unknown as Prisma.InputJsonValue,
|
||||
logTail: state.logTail ?? null,
|
||||
},
|
||||
});
|
||||
if (state.status === 'COMPLETED' || state.status === 'FAILED') {
|
||||
finalState = state;
|
||||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(`[restore] ${instance.slug}: poll error: ${(err as Error).message}`);
|
||||
// Keep polling — transient network blips shouldn't fail the restore
|
||||
}
|
||||
}
|
||||
|
||||
if (!finalState) {
|
||||
throw new Error(`Restore timed out after ${Math.round(POLL_TIMEOUT_MS / 1000)}s`);
|
||||
}
|
||||
|
||||
// 5. Clean up agent-side upload (best effort)
|
||||
try {
|
||||
await driver.deleteRestoreUpload(uploadId);
|
||||
} catch (err) {
|
||||
logger.warn(`[restore] ${instance.slug}: failed to delete agent upload ${uploadId}: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
// 6. Finalize DB row
|
||||
if (finalState.status === 'COMPLETED') {
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: {
|
||||
status: RestoreStatus.COMPLETED,
|
||||
progressJson: finalState as unknown as Prisma.InputJsonValue,
|
||||
logTail: finalState.logTail ?? null,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
await writeRestoreAuditLog({
|
||||
restoreId,
|
||||
instanceId: instance.id,
|
||||
backupId: restore.backupId,
|
||||
triggeredById,
|
||||
ipAddress,
|
||||
options,
|
||||
outcome: 'success',
|
||||
sha256,
|
||||
uploadId,
|
||||
});
|
||||
|
||||
logger.info(`[restore] ${instance.slug}: restore ${restoreId} COMPLETED`);
|
||||
} else {
|
||||
const errMsg = finalState.errorMessage || `Agent reported FAILED (exit ${finalState.exitCode})`;
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: {
|
||||
status: RestoreStatus.FAILED,
|
||||
progressJson: finalState as unknown as Prisma.InputJsonValue,
|
||||
logTail: finalState.logTail ?? null,
|
||||
errorMessage: errMsg,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
await writeRestoreAuditLog({
|
||||
restoreId,
|
||||
instanceId: instance.id,
|
||||
backupId: restore.backupId,
|
||||
triggeredById,
|
||||
ipAddress,
|
||||
options,
|
||||
outcome: 'agent_failed',
|
||||
sha256,
|
||||
uploadId,
|
||||
errorMessage: errMsg,
|
||||
});
|
||||
logger.warn(`[restore] ${instance.slug}: restore ${restoreId} FAILED (exit ${finalState.exitCode})`);
|
||||
}
|
||||
} catch (err) {
|
||||
const errMsg = (err as Error).message;
|
||||
await prisma.instanceRestore.update({
|
||||
where: { id: restoreId },
|
||||
data: {
|
||||
status: RestoreStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
await writeRestoreAuditLog({
|
||||
restoreId,
|
||||
instanceId: instance.id,
|
||||
backupId: restore.backupId,
|
||||
triggeredById,
|
||||
ipAddress,
|
||||
options,
|
||||
outcome: 'orchestration_error',
|
||||
sha256,
|
||||
uploadId,
|
||||
errorMessage: errMsg,
|
||||
});
|
||||
logger.error(`[restore] ${restore.instance.slug}: ${errMsg}`);
|
||||
|
||||
// Best-effort cleanup of the agent upload if we got that far
|
||||
if (uploadId) {
|
||||
try {
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
await driver.deleteRestoreUpload(uploadId);
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List restores with optional filtering and pagination.
|
||||
*/
|
||||
export async function listRestores(instanceId?: string, page = 1, limit = 50) {
|
||||
const where = instanceId ? { instanceId } : {};
|
||||
const [data, total] = await Promise.all([
|
||||
prisma.instanceRestore.findMany({
|
||||
where,
|
||||
orderBy: { startedAt: 'desc' },
|
||||
skip: (page - 1) * limit,
|
||||
take: limit,
|
||||
include: {
|
||||
instance: { select: { id: true, name: true, slug: true } },
|
||||
backup: { select: { id: true, archivePath: true, sizeBytes: true } },
|
||||
},
|
||||
}),
|
||||
prisma.instanceRestore.count({ where }),
|
||||
]);
|
||||
return { data, total, page, limit };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single restore by ID.
|
||||
*/
|
||||
export async function getRestore(restoreId: string) {
|
||||
const restore = await prisma.instanceRestore.findUnique({
|
||||
where: { id: restoreId },
|
||||
include: {
|
||||
instance: { select: { id: true, name: true, slug: true } },
|
||||
backup: { select: { id: true, archivePath: true, sizeBytes: true, manifest: true } },
|
||||
},
|
||||
});
|
||||
if (!restore) {
|
||||
throw new AppError(404, 'Restore not found', 'NOT_FOUND');
|
||||
}
|
||||
return restore;
|
||||
}
|
||||
599
changemaker-control-panel/api/src/services/tunnel.service.ts
Normal file
599
changemaker-control-panel/api/src/services/tunnel.service.ts
Normal file
@ -0,0 +1,599 @@
|
||||
/**
|
||||
* Remote tunnel management service.
|
||||
*
|
||||
* Orchestrates Pangolin site/resource/target creation on behalf of remote CML
|
||||
* instances, then pushes Newt credentials to the remote host via the mTLS agent.
|
||||
* The CCP holds the Pangolin API token centrally — remote instances never touch
|
||||
* the Pangolin API themselves.
|
||||
*/
|
||||
import { AuditAction, Prisma } from '@prisma/client';
|
||||
import { prisma } from '../lib/prisma';
|
||||
import { env } from '../config/env';
|
||||
import { AppError } from '../middleware/error-handler';
|
||||
import { logger } from '../utils/logger';
|
||||
import { getRemoteDriverForInstance } from './execution-driver';
|
||||
import {
|
||||
CcpPangolinClient,
|
||||
type PangolinDomain,
|
||||
type PangolinResource,
|
||||
} from './ccp-pangolin.client';
|
||||
|
||||
// ─── Resource definitions ──────────────────────────────────────────
|
||||
|
||||
interface ResourceDef {
|
||||
subdomain: string;
|
||||
name: string;
|
||||
required?: boolean;
|
||||
featureFlag?: string;
|
||||
}
|
||||
|
||||
const RESOURCE_DEFINITIONS: ResourceDef[] = [
|
||||
{ subdomain: 'app', name: 'Admin GUI', required: true },
|
||||
{ subdomain: 'api', name: 'API', required: true },
|
||||
{ subdomain: '', name: 'Public Site', required: true },
|
||||
{ subdomain: 'media', name: 'Media API', featureFlag: 'enableMedia' },
|
||||
{ subdomain: 'db', name: 'NocoDB', required: false },
|
||||
{ subdomain: 'docs', name: 'Docs', required: false },
|
||||
{ subdomain: 'code', name: 'Code Server', required: false },
|
||||
{ subdomain: 'git', name: 'Gitea', required: false },
|
||||
{ subdomain: 'home', name: 'Homepage', required: false },
|
||||
{ subdomain: 'listmonk', name: 'Listmonk', featureFlag: 'enableListmonk' },
|
||||
{ subdomain: 'qr', name: 'Mini QR', required: false },
|
||||
{ subdomain: 'draw', name: 'Excalidraw', required: false },
|
||||
{ subdomain: 'vault', name: 'Vaultwarden', required: false },
|
||||
{ subdomain: 'mail', name: 'MailHog', required: false },
|
||||
{ subdomain: 'chat', name: 'Rocket.Chat', featureFlag: 'enableChat' },
|
||||
{ subdomain: 'events', name: 'Gancio', featureFlag: 'enableGancio' },
|
||||
{ subdomain: 'meet', name: 'Jitsi Meet', featureFlag: 'enableMeet' },
|
||||
{ subdomain: 'grafana', name: 'Grafana', featureFlag: 'enableMonitoring' },
|
||||
];
|
||||
|
||||
// ─── Helpers ───────────────────────────────────────────────────────
|
||||
|
||||
function getPangolinClient(): CcpPangolinClient {
|
||||
if (!env.PANGOLIN_API_URL || !env.PANGOLIN_API_KEY || !env.PANGOLIN_ORG_ID) {
|
||||
throw new AppError(
|
||||
501,
|
||||
'Pangolin API not configured on this CCP. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in the CCP .env file.',
|
||||
'PANGOLIN_NOT_CONFIGURED'
|
||||
);
|
||||
}
|
||||
return new CcpPangolinClient(env.PANGOLIN_API_URL, env.PANGOLIN_API_KEY, env.PANGOLIN_ORG_ID);
|
||||
}
|
||||
|
||||
function fullSubdomain(prefix: string, sub: string): string {
|
||||
if (!sub) return prefix; // root domain → prefix alone (e.g., "ck")
|
||||
return `${prefix}-${sub}`; // e.g., "ck-app", "ck-api"
|
||||
}
|
||||
|
||||
function shouldCreateResource(
|
||||
def: ResourceDef,
|
||||
instance: Record<string, unknown>
|
||||
): boolean {
|
||||
if (def.required) return true;
|
||||
if (def.featureFlag) return !!(instance as Record<string, unknown>)[def.featureFlag];
|
||||
return true; // optional with no feature flag → always create
|
||||
}
|
||||
|
||||
async function findDomainForInstance(
|
||||
client: CcpPangolinClient,
|
||||
instanceDomain: string
|
||||
): Promise<PangolinDomain> {
|
||||
const domains = await client.listDomains();
|
||||
// Match the instance's domain against registered Pangolin base domains
|
||||
// e.g., instance.domain = "cursedknowledge.org" → look for base domain "cursedknowledge.org"
|
||||
// or broader: instance.domain = "app.example.com" → look for "example.com"
|
||||
const exact = domains.find((d) => d.baseDomain === instanceDomain);
|
||||
if (exact) return exact;
|
||||
|
||||
// Try matching parent domain (e.g., sub.example.com → example.com)
|
||||
const parts = instanceDomain.split('.');
|
||||
for (let i = 1; i < parts.length - 1; i++) {
|
||||
const parent = parts.slice(i).join('.');
|
||||
const match = domains.find((d) => d.baseDomain === parent);
|
||||
if (match) return match;
|
||||
}
|
||||
|
||||
throw new AppError(
|
||||
400,
|
||||
`No Pangolin domain matches instance domain "${instanceDomain}". Available: ${domains.map((d) => d.baseDomain).join(', ')}`,
|
||||
'DOMAIN_NOT_FOUND'
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Setup ─────────────────────────────────────────────────────────
|
||||
|
||||
export interface SetupTunnelOptions {
|
||||
subdomainPrefix?: string;
|
||||
}
|
||||
|
||||
export interface TunnelSetupResult {
|
||||
siteId: string;
|
||||
newtId: string;
|
||||
endpoint: string;
|
||||
resourceCount: number;
|
||||
resources: Array<{ subdomain: string; name: string; resourceId: string }>;
|
||||
}
|
||||
|
||||
export async function setupTunnel(
|
||||
instanceId: string,
|
||||
options: SetupTunnelOptions,
|
||||
userId?: string,
|
||||
ipAddress?: string | null
|
||||
): Promise<TunnelSetupResult> {
|
||||
const client = getPangolinClient();
|
||||
|
||||
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
|
||||
if (!instance.isRemote) throw new AppError(400, 'Tunnel setup via Pangolin API is only for remote instances', 'NOT_REMOTE');
|
||||
if (instance.pangolinSiteId) {
|
||||
throw new AppError(400, 'Tunnel is already configured. Use sync to update resources, or teardown first.', 'ALREADY_CONFIGURED');
|
||||
}
|
||||
|
||||
const prefix = options.subdomainPrefix || instance.slug;
|
||||
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
|
||||
// 1. Get Newt credentials
|
||||
logger.info(`[tunnel] ${instance.slug}: picking site defaults`);
|
||||
const defaults = await client.pickSiteDefaults();
|
||||
|
||||
// 2. Create site
|
||||
logger.info(`[tunnel] ${instance.slug}: creating Pangolin site`);
|
||||
const site = await client.createSite({
|
||||
name: instance.slug,
|
||||
type: 'newt',
|
||||
newtId: defaults.newtId,
|
||||
secret: defaults.newtSecret,
|
||||
address: defaults.address,
|
||||
});
|
||||
const siteId = String(site.siteId);
|
||||
const newtId = site.newt?.newtId || defaults.newtId;
|
||||
const newtSecret = site.newt?.secret || defaults.newtSecret;
|
||||
|
||||
// The Pangolin endpoint (what Newt connects to) may be different from
|
||||
// the API URL. E.g., API = api.bnkserve.org/v1, endpoint = pangolin.bnkserve.org.
|
||||
// If PANGOLIN_ENDPOINT is set, use it. Otherwise derive from API URL.
|
||||
let endpoint = env.PANGOLIN_ENDPOINT || '';
|
||||
if (!endpoint) {
|
||||
const endpointUrl = new URL(env.PANGOLIN_API_URL);
|
||||
endpoint = `${endpointUrl.protocol}//${endpointUrl.hostname}${endpointUrl.port ? ':' + endpointUrl.port : ''}`;
|
||||
}
|
||||
|
||||
// 3. Find matching domain
|
||||
const domain = await findDomainForInstance(client, instance.domain);
|
||||
logger.info(`[tunnel] ${instance.slug}: matched domain ${domain.baseDomain} (id: ${domain.domainId})`);
|
||||
|
||||
// 4. Create resources + targets
|
||||
const createdResources: Array<{ subdomain: string; name: string; resourceId: string }> = [];
|
||||
const existingResources = await client.listResources();
|
||||
|
||||
for (const def of RESOURCE_DEFINITIONS) {
|
||||
if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) {
|
||||
logger.debug(`[tunnel] ${instance.slug}: skipping ${def.name} (feature not enabled)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const sub = fullSubdomain(prefix, def.subdomain);
|
||||
// Build the expected full domain so we can do an idempotent check against
|
||||
// Pangolin's existing resources. Pangolin returns `fullDomain` not `subdomain`.
|
||||
const expectedFullDomain = sub
|
||||
? `${sub}.${domain.baseDomain}`
|
||||
: domain.baseDomain;
|
||||
|
||||
// Idempotent: skip if a resource with this fullDomain already exists
|
||||
const existing = existingResources.find(
|
||||
(r) => r.fullDomain === expectedFullDomain
|
||||
);
|
||||
if (existing) {
|
||||
logger.debug(`[tunnel] ${instance.slug}: resource ${def.name} (${expectedFullDomain}) already exists`);
|
||||
createdResources.push({ subdomain: sub, name: def.name, resourceId: String(existing.resourceId) });
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resourcePayload: Record<string, unknown> = {
|
||||
name: def.name,
|
||||
domainId: domain.domainId,
|
||||
http: true,
|
||||
protocol: 'tcp',
|
||||
};
|
||||
// Root domain: omit subdomain entirely (empty string is rejected by Pangolin)
|
||||
if (sub) resourcePayload.subdomain = sub;
|
||||
|
||||
const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
|
||||
|
||||
// Make the resource public (no SSO, no access block)
|
||||
try {
|
||||
await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: failed to make ${def.name} public: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
// Create target pointing to nginx:80 on the remote host
|
||||
await client.createTarget(resource.resourceId, {
|
||||
siteId: Number(siteId),
|
||||
ip: 'nginx',
|
||||
port: 80,
|
||||
method: 'http',
|
||||
enabled: true,
|
||||
});
|
||||
|
||||
createdResources.push({ subdomain: sub, name: def.name, resourceId: resource.resourceId });
|
||||
logger.info(`[tunnel] ${instance.slug}: created resource ${def.name} → ${sub}.${domain.baseDomain}`);
|
||||
} catch (err) {
|
||||
if (def.required) throw err;
|
||||
logger.warn(`[tunnel] ${instance.slug}: failed to create optional resource ${def.name}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Push Newt credentials to remote .env
|
||||
logger.info(`[tunnel] ${instance.slug}: pushing Newt credentials to remote .env`);
|
||||
const envLines = [
|
||||
`PANGOLIN_ENDPOINT=${endpoint}`,
|
||||
`PANGOLIN_SITE_ID=${siteId}`,
|
||||
`PANGOLIN_NEWT_ID=${newtId}`,
|
||||
`PANGOLIN_NEWT_SECRET=${newtSecret}`,
|
||||
].join('\n') + '\n';
|
||||
|
||||
// Read current .env, append/replace Pangolin vars
|
||||
const currentEnv = await driver.readEnvFile('');
|
||||
const envContent = buildUpdatedEnv(currentEnv, {
|
||||
PANGOLIN_ENDPOINT: endpoint,
|
||||
PANGOLIN_SITE_ID: siteId,
|
||||
PANGOLIN_NEWT_ID: newtId,
|
||||
PANGOLIN_NEWT_SECRET: newtSecret,
|
||||
});
|
||||
|
||||
await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
|
||||
|
||||
// 6. Persist on Instance row
|
||||
await prisma.instance.update({
|
||||
where: { id: instanceId },
|
||||
data: {
|
||||
pangolinEndpoint: endpoint,
|
||||
pangolinSiteId: siteId,
|
||||
pangolinNewtId: newtId,
|
||||
pangolinNewtSecret: newtSecret,
|
||||
pangolinSubdomainPrefix: prefix,
|
||||
},
|
||||
});
|
||||
|
||||
// 7. Recreate Newt container to pick up the new .env vars.
|
||||
// `docker compose restart` does NOT re-read .env — it only sends SIGTERM+restart.
|
||||
// `docker compose up -d newt` detects env var changes (via ${PANGOLIN_NEWT_ID}
|
||||
// expansion in docker-compose.yml) and recreates the container automatically.
|
||||
logger.info(`[tunnel] ${instance.slug}: recreating newt container with new credentials`);
|
||||
try {
|
||||
await driver.composeUp('', '', ['newt']);
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: composeUp(newt) failed: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
// 8. Audit log
|
||||
if (userId) {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId,
|
||||
instanceId,
|
||||
action: AuditAction.PANGOLIN_SETUP,
|
||||
details: {
|
||||
source: 'remote',
|
||||
siteId,
|
||||
newtId,
|
||||
endpoint,
|
||||
resourceCount: createdResources.length,
|
||||
subdomainPrefix: prefix,
|
||||
} as unknown as Prisma.InputJsonValue,
|
||||
ipAddress: ipAddress ?? null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`[tunnel] ${instance.slug}: tunnel setup complete — ${createdResources.length} resources created`);
|
||||
|
||||
return {
|
||||
siteId,
|
||||
newtId,
|
||||
endpoint,
|
||||
resourceCount: createdResources.length,
|
||||
resources: createdResources,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Sync ──────────────────────────────────────────────────────────
|
||||
|
||||
export async function syncResources(
|
||||
instanceId: string,
|
||||
userId?: string,
|
||||
ipAddress?: string | null
|
||||
) {
|
||||
const client = getPangolinClient();
|
||||
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
|
||||
if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
|
||||
|
||||
const prefix = instance.pangolinSubdomainPrefix || instance.slug;
|
||||
const domain = await findDomainForInstance(client, instance.domain);
|
||||
const existingResources = await client.listResources();
|
||||
const siteId = instance.pangolinSiteId;
|
||||
|
||||
let created = 0;
|
||||
for (const def of RESOURCE_DEFINITIONS) {
|
||||
if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) continue;
|
||||
|
||||
const sub = fullSubdomain(prefix, def.subdomain);
|
||||
const expectedFullDomain = sub ? `${sub}.${domain.baseDomain}` : domain.baseDomain;
|
||||
const existing = existingResources.find((r) => r.fullDomain === expectedFullDomain);
|
||||
if (existing) continue;
|
||||
|
||||
try {
|
||||
const resourcePayload: Record<string, unknown> = {
|
||||
name: def.name,
|
||||
domainId: domain.domainId,
|
||||
http: true,
|
||||
protocol: 'tcp',
|
||||
};
|
||||
if (sub) resourcePayload.subdomain = sub;
|
||||
|
||||
const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
|
||||
await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
|
||||
await client.createTarget(resource.resourceId, {
|
||||
siteId: Number(siteId),
|
||||
ip: 'nginx',
|
||||
port: 80,
|
||||
method: 'http',
|
||||
enabled: true,
|
||||
});
|
||||
created++;
|
||||
logger.info(`[tunnel] ${instance.slug}: sync created ${def.name} (${sub})`);
|
||||
} catch (err) {
|
||||
if (def.required) throw err;
|
||||
logger.warn(`[tunnel] ${instance.slug}: sync failed for ${def.name}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (userId) {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId,
|
||||
instanceId,
|
||||
action: AuditAction.PANGOLIN_SYNC,
|
||||
details: { source: 'remote', created, siteId } as unknown as Prisma.InputJsonValue,
|
||||
ipAddress: ipAddress ?? null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return { synced: true, created };
|
||||
}
|
||||
|
||||
// ─── Teardown ──────────────────────────────────────────────────────
|
||||
|
||||
export async function teardownTunnel(
|
||||
instanceId: string,
|
||||
userId?: string,
|
||||
ipAddress?: string | null
|
||||
) {
|
||||
const client = getPangolinClient();
|
||||
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
|
||||
if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
|
||||
|
||||
const siteId = instance.pangolinSiteId;
|
||||
|
||||
// Delete site from Pangolin (cascades resources + targets)
|
||||
try {
|
||||
await client.deleteSite(siteId);
|
||||
logger.info(`[tunnel] ${instance.slug}: deleted Pangolin site ${siteId}`);
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: deleteSite failed (may already be gone): ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
// Clear Instance fields
|
||||
await prisma.instance.update({
|
||||
where: { id: instanceId },
|
||||
data: {
|
||||
pangolinEndpoint: null,
|
||||
pangolinSiteId: null,
|
||||
pangolinNewtId: null,
|
||||
pangolinNewtSecret: null,
|
||||
},
|
||||
});
|
||||
|
||||
// Push empty Pangolin vars to remote .env
|
||||
if (instance.isRemote) {
|
||||
try {
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
const currentEnv = await driver.readEnvFile('');
|
||||
const envContent = buildUpdatedEnv(currentEnv, {
|
||||
PANGOLIN_ENDPOINT: '',
|
||||
PANGOLIN_SITE_ID: '',
|
||||
PANGOLIN_NEWT_ID: '',
|
||||
PANGOLIN_NEWT_SECRET: '',
|
||||
});
|
||||
await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
|
||||
|
||||
// Stop newt container (best effort)
|
||||
try {
|
||||
await driver.composeStop('', '');
|
||||
await driver.composeUp('', ''); // restart everything except newt won't start without creds
|
||||
} catch { /* ignore */ }
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: failed to push empty env to remote: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Audit log
|
||||
if (userId) {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId,
|
||||
instanceId,
|
||||
action: AuditAction.PANGOLIN_TEARDOWN,
|
||||
details: { source: 'remote', siteId } as unknown as Prisma.InputJsonValue,
|
||||
ipAddress: ipAddress ?? null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return { tornDown: true };
|
||||
}
|
||||
|
||||
// ─── Status ────────────────────────────────────────────────────────
|
||||
|
||||
export interface TunnelStatus {
|
||||
configured: boolean;
|
||||
online?: boolean;
|
||||
siteId?: string;
|
||||
endpoint?: string;
|
||||
resources?: Array<{
|
||||
subdomain: string;
|
||||
name: string;
|
||||
resourceId: string;
|
||||
hasTarget: boolean;
|
||||
targetIp?: string;
|
||||
targetPort?: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
export async function getTunnelStatus(instanceId: string): Promise<TunnelStatus> {
|
||||
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||
if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
|
||||
|
||||
if (!instance.pangolinSiteId) {
|
||||
return { configured: false };
|
||||
}
|
||||
|
||||
// For local instances, return stored values without querying Pangolin API
|
||||
if (!instance.isRemote) {
|
||||
return {
|
||||
configured: true,
|
||||
siteId: instance.pangolinSiteId ?? undefined,
|
||||
endpoint: instance.pangolinEndpoint ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const client = getPangolinClient();
|
||||
|
||||
let online = false;
|
||||
try {
|
||||
const site = await client.getSite(instance.pangolinSiteId);
|
||||
online = site.online ?? false;
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: getSite failed: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
const resources: TunnelStatus['resources'] = [];
|
||||
try {
|
||||
const allResources = await client.listResources();
|
||||
const siteIdNum = Number(instance.pangolinSiteId);
|
||||
// Filter to resources that have a target pointing to our siteId.
|
||||
// This is the most reliable filter since it uses the actual Pangolin
|
||||
// site association rather than guessing from subdomain names.
|
||||
for (const res of allResources) {
|
||||
let hasTarget = false;
|
||||
let targetIp: string | undefined;
|
||||
let targetPort: number | undefined;
|
||||
let belongsToUs = false;
|
||||
try {
|
||||
const targets = await client.listTargets(String(res.resourceId));
|
||||
for (const t of targets) {
|
||||
if (Number(t.siteId) === siteIdNum) {
|
||||
belongsToUs = true;
|
||||
hasTarget = true;
|
||||
targetIp = t.ip;
|
||||
targetPort = t.port;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
|
||||
if (belongsToUs) {
|
||||
// Extract subdomain from fullDomain for display
|
||||
const fd = res.fullDomain || '';
|
||||
const domainSuffix = `.${instance.domain}`;
|
||||
const subdomain = fd.endsWith(domainSuffix)
|
||||
? fd.slice(0, -domainSuffix.length)
|
||||
: fd === instance.domain ? '' : fd;
|
||||
resources.push({
|
||||
subdomain,
|
||||
name: res.name,
|
||||
resourceId: String(res.resourceId),
|
||||
hasTarget,
|
||||
targetIp,
|
||||
targetPort,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(`[tunnel] ${instance.slug}: listResources failed: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
return {
|
||||
configured: true,
|
||||
online,
|
||||
siteId: instance.pangolinSiteId ?? undefined,
|
||||
endpoint: instance.pangolinEndpoint ?? undefined,
|
||||
resources,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── .env Helpers ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Quote a .env value if it contains characters that dotenv parsers interpret:
|
||||
* # (comment), = (separator), spaces, quotes, backslashes, newlines.
|
||||
* Pangolin-issued UUIDs/base64 secrets typically don't need quoting, but
|
||||
* defensive quoting prevents silent corruption if they ever do.
|
||||
*/
|
||||
function quoteEnvValue(value: string): string {
|
||||
if (/[\s#"'\\=\n\r]/.test(value)) {
|
||||
return `"${value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an updated .env string by replacing/appending the given key-value pairs.
|
||||
* Preserves all existing keys not in the update set.
|
||||
*/
|
||||
function buildUpdatedEnv(
|
||||
currentEnv: Record<string, string> | null,
|
||||
updates: Record<string, string>
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// If we have the current env, reproduce it with replacements
|
||||
if (currentEnv) {
|
||||
for (const [key, value] of Object.entries(currentEnv)) {
|
||||
if (key in updates) {
|
||||
if (updates[key]) lines.push(`${key}=${quoteEnvValue(updates[key]!)}`);
|
||||
// If update value is empty, omit the line (remove the var)
|
||||
seen.add(key);
|
||||
} else {
|
||||
lines.push(`${key}=${quoteEnvValue(value)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Append new keys not already in the file
|
||||
for (const [key, value] of Object.entries(updates)) {
|
||||
if (!seen.has(key) && value) {
|
||||
lines.push(`${key}=${quoteEnvValue(value)}`);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n') + '\n';
|
||||
}
|
||||
@ -2,14 +2,61 @@ import { exec as execCb } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { UpgradeStatus, AuditAction, InstanceStatus, Prisma } from '@prisma/client';
|
||||
import { UpgradeStatus, AuditAction, InstanceStatus, Prisma, Instance } from '@prisma/client';
|
||||
import { prisma } from '../lib/prisma';
|
||||
import { logger } from '../utils/logger';
|
||||
import { createEvent } from './event.service';
|
||||
import { getRemoteDriverForInstance } from './execution-driver';
|
||||
import type { AgentUpdateStatus } from './remote-driver';
|
||||
|
||||
/**
|
||||
* Write an INSTANCE_UPGRADE audit log entry capturing a terminal outcome.
|
||||
* Wrapped in try/catch so that an audit-log DB failure cannot mask the
|
||||
* underlying upgrade row status update.
|
||||
*
|
||||
* Called from all three terminal paths (both local and remote):
|
||||
* - 'completed' — upgrade.sh/agent reported success
|
||||
* - 'failed' — upgrade.sh/agent reported failure
|
||||
* - 'orchestration_error' — CCP-side exception, timeout, or unreachable agent
|
||||
*/
|
||||
async function writeUpgradeAuditLog(args: {
|
||||
upgradeId: string;
|
||||
instanceId: string;
|
||||
triggeredById: string | null;
|
||||
source: 'local' | 'remote';
|
||||
outcome: 'completed' | 'failed' | 'orchestration_error';
|
||||
previousCommit: string | null;
|
||||
newCommit: string | null;
|
||||
durationSeconds: number | null;
|
||||
errorMessage?: string | null;
|
||||
}): Promise<void> {
|
||||
if (!args.triggeredById) return;
|
||||
try {
|
||||
await prisma.auditLog.create({
|
||||
data: {
|
||||
userId: args.triggeredById,
|
||||
instanceId: args.instanceId,
|
||||
action: AuditAction.INSTANCE_UPGRADE,
|
||||
details: {
|
||||
upgradeId: args.upgradeId,
|
||||
source: args.source,
|
||||
outcome: args.outcome,
|
||||
previousCommit: args.previousCommit,
|
||||
newCommit: args.newCommit,
|
||||
durationSeconds: args.durationSeconds,
|
||||
...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
|
||||
} as unknown as Prisma.InputJsonValue,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error(`[upgrade] failed to write audit log for ${args.upgradeId}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const exec = promisify(execCb);
|
||||
|
||||
const UPGRADE_TIMEOUT = 600_000; // 10 minutes
|
||||
const UPGRADE_TIMEOUT = 600_000; // 10 minutes — local upgrades
|
||||
const REMOTE_UPGRADE_TIMEOUT = 15 * 60 * 1000; // 15 minutes — remote (network round trips)
|
||||
const PROGRESS_POLL_INTERVAL = 2_000; // 2 seconds
|
||||
|
||||
// ─── Update Check ─────────────────────────────────────────────────
|
||||
@ -26,13 +73,57 @@ export interface UpdateStatus {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for available updates by running upgrade-check.sh in the instance's basePath.
|
||||
* Falls back to reading an existing status.json if the script isn't available.
|
||||
* Check for available updates. Branches on instance.isRemote:
|
||||
* - Local: runs upgrade-check.sh in the instance's basePath and reads status.json
|
||||
* - Remote: calls the agent's POST /upgrade/check endpoint over mTLS
|
||||
*/
|
||||
export async function checkForUpdates(instanceId: string): Promise<UpdateStatus> {
|
||||
const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
|
||||
if (!instance) throw new Error('Instance not found');
|
||||
|
||||
if (instance.isRemote) {
|
||||
return checkForUpdatesRemote(instance);
|
||||
}
|
||||
return checkForUpdatesLocal(instance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remote check: ask the agent to run upgrade-check.sh and return its status.json.
|
||||
*/
|
||||
async function checkForUpdatesRemote(instance: Instance): Promise<UpdateStatus> {
|
||||
try {
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
const status: AgentUpdateStatus = await driver.checkForUpdates();
|
||||
return {
|
||||
branch: status.branch,
|
||||
currentCommit: status.currentCommit,
|
||||
currentMessage: status.currentMessage,
|
||||
remoteCommit: status.remoteCommit,
|
||||
commitsBehind: status.commitsBehind,
|
||||
changelog: status.changelog,
|
||||
checkedAt: status.checkedAt,
|
||||
error: status.error,
|
||||
};
|
||||
} catch (err) {
|
||||
logger.warn(`[upgrade] remote check failed for ${instance.slug}: ${(err as Error).message}`);
|
||||
return {
|
||||
branch: instance.gitBranch,
|
||||
currentCommit: instance.gitCommit || 'unknown',
|
||||
remoteCommit: null,
|
||||
commitsBehind: 0,
|
||||
changelog: [],
|
||||
checkedAt: new Date().toISOString(),
|
||||
error: `Remote check failed: ${(err as Error).message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function checkForUpdatesLocal(instance: Instance): Promise<UpdateStatus> {
|
||||
const basePath = instance.basePath;
|
||||
const statusFile = path.join(basePath, 'data', 'upgrade', 'status.json');
|
||||
const scriptPath = path.join(basePath, 'scripts', 'upgrade-check.sh');
|
||||
@ -119,16 +210,21 @@ export async function startUpgrade(
|
||||
throw new Error('An upgrade is already in progress for this instance');
|
||||
}
|
||||
|
||||
// Get current commit for tracking
|
||||
let currentCommit: string | null = null;
|
||||
try {
|
||||
const { stdout } = await exec('git rev-parse --short HEAD', {
|
||||
cwd: instance.basePath,
|
||||
timeout: 5_000,
|
||||
});
|
||||
currentCommit = stdout.trim();
|
||||
} catch {
|
||||
// Non-critical — may be a release install without .git
|
||||
// Get current commit for tracking. For local instances we can read it from
|
||||
// git directly; for remote instances we either trust the DB-tracked value
|
||||
// (set by previous upgrade-check) or leave it null and let upgrade.sh
|
||||
// report the previous commit in result.json.
|
||||
let currentCommit: string | null = instance.gitCommit;
|
||||
if (!instance.isRemote) {
|
||||
try {
|
||||
const { stdout } = await exec('git rev-parse --short HEAD', {
|
||||
cwd: instance.basePath,
|
||||
timeout: 5_000,
|
||||
});
|
||||
currentCommit = stdout.trim();
|
||||
} catch {
|
||||
// Non-critical — may be a release install without .git
|
||||
}
|
||||
}
|
||||
|
||||
const branch = options?.branch || instance.gitBranch;
|
||||
@ -154,20 +250,222 @@ export async function startUpgrade(
|
||||
upgradeId: upgrade.id,
|
||||
previousCommit: currentCommit,
|
||||
branch,
|
||||
source: instance.isRemote ? 'remote' : 'local',
|
||||
options: options || {},
|
||||
} as unknown as Prisma.InputJsonValue,
|
||||
ipAddress,
|
||||
},
|
||||
});
|
||||
|
||||
// Fire-and-forget: run the upgrade asynchronously
|
||||
runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
|
||||
logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
|
||||
});
|
||||
// Fire-and-forget: branch on isRemote
|
||||
if (instance.isRemote) {
|
||||
runRemoteUpgrade(upgrade.id, instance, options).catch((err) => {
|
||||
logger.error(`[upgrade] Remote upgrade orchestration failed for ${instance.slug}: ${err}`);
|
||||
});
|
||||
} else {
|
||||
runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
|
||||
logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
|
||||
});
|
||||
}
|
||||
|
||||
return upgrade;
|
||||
}
|
||||
|
||||
/**
|
||||
* Async REMOTE upgrade runner.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Get RemoteDriver
|
||||
* 2. Mark InstanceUpgrade IN_PROGRESS
|
||||
* 3. Tell agent to start upgrade.sh in --api-mode
|
||||
* 4. Poll agent /upgrade/progress every 2s, mirror to DB
|
||||
* 5. Try /upgrade/result every poll cycle; when present, finalize
|
||||
* 6. On timeout (15 min), mark FAILED and create error event
|
||||
*
|
||||
* Note: there is no shell or filesystem access on the CCP side — everything
|
||||
* goes through the mTLS agent. The agent's spawn of upgrade.sh is itself
|
||||
* fire-and-forget under a slug mutex.
|
||||
*/
|
||||
async function runRemoteUpgrade(
|
||||
upgradeId: string,
|
||||
instance: Instance,
|
||||
options?: StartUpgradeOptions
|
||||
) {
|
||||
const slug = instance.slug;
|
||||
|
||||
try {
|
||||
const driver = await getRemoteDriverForInstance({
|
||||
id: instance.id,
|
||||
slug: instance.slug,
|
||||
isRemote: instance.isRemote,
|
||||
agentUrl: instance.agentUrl,
|
||||
});
|
||||
|
||||
// Mark IN_PROGRESS
|
||||
await prisma.instanceUpgrade.update({
|
||||
where: { id: upgradeId },
|
||||
data: {
|
||||
status: UpgradeStatus.IN_PROGRESS,
|
||||
progressMessage: 'Starting remote upgrade...',
|
||||
},
|
||||
});
|
||||
|
||||
// Tell the agent to start. The agent has its own mutex + stale-progress
|
||||
// check, so this can return 409 if a previous upgrade is still running.
|
||||
logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
|
||||
await driver.startUpgrade({
|
||||
skipBackup: options?.skipBackup,
|
||||
useRegistry: options?.useRegistry,
|
||||
branch: options?.branch,
|
||||
});
|
||||
|
||||
// Poll progress + result. We treat /result returning 200 as the signal
|
||||
// that upgrade.sh exited (successfully or with code != 0 — the script
|
||||
// writes result.json either way in --api-mode).
|
||||
const deadline = Date.now() + REMOTE_UPGRADE_TIMEOUT;
|
||||
let lastProgress: { phase?: number; phaseName?: string; percentage?: number; message?: string } = {};
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
await new Promise((r) => setTimeout(r, PROGRESS_POLL_INTERVAL));
|
||||
|
||||
// Try to fetch the result first; if it exists, we're done
|
||||
let result = null;
|
||||
try {
|
||||
result = await driver.getUpgradeResult();
|
||||
} catch {
|
||||
// No result yet — keep polling progress
|
||||
}
|
||||
|
||||
if (result) {
|
||||
// Final result available — write it and exit
|
||||
const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
|
||||
await prisma.instanceUpgrade.update({
|
||||
where: { id: upgradeId },
|
||||
data: {
|
||||
status: result.success ? UpgradeStatus.COMPLETED : UpgradeStatus.FAILED,
|
||||
newCommit: result.newCommit || null,
|
||||
commitCount: result.commitCount || 0,
|
||||
percentage: 100,
|
||||
phaseName: 'Complete',
|
||||
progressMessage: result.message || 'Upgrade completed',
|
||||
durationSeconds: result.durationSeconds || null,
|
||||
warnings: result.warnings?.length ? (result.warnings as unknown as Prisma.InputJsonValue) : undefined,
|
||||
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
// Update Instance.gitCommit if we have a new commit
|
||||
if (result.newCommit) {
|
||||
await prisma.instance.update({
|
||||
where: { id: instance.id },
|
||||
data: { gitCommit: result.newCommit },
|
||||
});
|
||||
}
|
||||
|
||||
if (!result.success) {
|
||||
await createEvent(
|
||||
instance.id,
|
||||
'ERROR',
|
||||
'upgrade',
|
||||
'Remote upgrade failed',
|
||||
result.message || 'The remote upgrade process failed. Check the agent log for details.',
|
||||
{ upgradeId, source: 'remote', warnings: result.warnings }
|
||||
);
|
||||
}
|
||||
|
||||
await writeUpgradeAuditLog({
|
||||
upgradeId,
|
||||
instanceId: instance.id,
|
||||
triggeredById: upgradeRowBefore?.triggeredById ?? null,
|
||||
source: 'remote',
|
||||
outcome: result.success ? 'completed' : 'failed',
|
||||
previousCommit: upgradeRowBefore?.previousCommit ?? null,
|
||||
newCommit: result.newCommit || null,
|
||||
durationSeconds: result.durationSeconds || null,
|
||||
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
|
||||
});
|
||||
|
||||
logger.info(`[upgrade] ${slug}: remote upgrade ${result.success ? 'COMPLETED' : 'FAILED'}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// No result yet — pull progress
|
||||
try {
|
||||
const progress = await driver.getUpgradeProgress();
|
||||
// Only update DB if something actually changed (avoid hot-loop writes)
|
||||
if (
|
||||
progress.phase !== lastProgress.phase ||
|
||||
progress.percentage !== lastProgress.percentage ||
|
||||
progress.message !== lastProgress.message
|
||||
) {
|
||||
lastProgress = {
|
||||
phase: progress.phase,
|
||||
phaseName: progress.phaseName,
|
||||
percentage: progress.percentage,
|
||||
message: progress.message,
|
||||
};
|
||||
await prisma.instanceUpgrade.update({
|
||||
where: { id: upgradeId },
|
||||
data: {
|
||||
currentPhase: progress.phase || 0,
|
||||
phaseName: progress.phaseName || null,
|
||||
percentage: progress.percentage || 0,
|
||||
progressMessage: progress.message || null,
|
||||
},
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
// Transient network blip during a long upgrade — keep polling
|
||||
logger.debug(`[upgrade] ${slug}: progress poll error: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Timeout — mark FAILED
|
||||
throw new Error(`Remote upgrade timed out after ${Math.round(REMOTE_UPGRADE_TIMEOUT / 60_000)} minutes`);
|
||||
} catch (err) {
|
||||
const errorMsg = (err as Error).message;
|
||||
const isTimeout = errorMsg.includes('timed out');
|
||||
|
||||
const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
|
||||
await prisma.instanceUpgrade.update({
|
||||
where: { id: upgradeId },
|
||||
data: {
|
||||
status: UpgradeStatus.FAILED,
|
||||
errorMessage: isTimeout ? errorMsg : errorMsg.slice(0, 2000),
|
||||
progressMessage: 'Failed',
|
||||
completedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
await createEvent(
|
||||
instance.id,
|
||||
'ERROR',
|
||||
'upgrade',
|
||||
isTimeout ? 'Remote upgrade timed out' : 'Remote upgrade failed',
|
||||
errorMsg.slice(0, 500),
|
||||
{ upgradeId, source: 'remote' }
|
||||
);
|
||||
|
||||
await writeUpgradeAuditLog({
|
||||
upgradeId,
|
||||
instanceId: instance.id,
|
||||
triggeredById: upgradeRowBefore?.triggeredById ?? null,
|
||||
source: 'remote',
|
||||
outcome: 'orchestration_error',
|
||||
previousCommit: upgradeRowBefore?.previousCommit ?? null,
|
||||
newCommit: null,
|
||||
durationSeconds: null,
|
||||
errorMessage: errorMsg,
|
||||
});
|
||||
|
||||
// Don't flip the instance to ERROR state for remote upgrades — the agent
|
||||
// health check will reflect the real state on the next poll, and we don't
|
||||
// want to mask a recovered instance with stale CCP-side ERROR.
|
||||
logger.error(`[upgrade] ${slug}: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Async upgrade runner. Runs upgrade.sh and polls progress.
|
||||
*/
|
||||
@ -271,19 +569,32 @@ async function runUpgrade(
|
||||
});
|
||||
}
|
||||
|
||||
if (!result.success) {
|
||||
const upgradeRow = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
|
||||
|
||||
if (!result.success && upgradeRow) {
|
||||
// Create error event
|
||||
const upgrade = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
|
||||
if (upgrade) {
|
||||
await createEvent(
|
||||
upgrade.instanceId,
|
||||
'ERROR',
|
||||
'upgrade',
|
||||
'Upgrade failed',
|
||||
result.message || 'The upgrade process failed. Check logs for details.',
|
||||
{ upgradeId, previousCommit: upgrade.previousCommit, warnings: result.warnings }
|
||||
);
|
||||
}
|
||||
await createEvent(
|
||||
upgradeRow.instanceId,
|
||||
'ERROR',
|
||||
'upgrade',
|
||||
'Upgrade failed',
|
||||
result.message || 'The upgrade process failed. Check logs for details.',
|
||||
{ upgradeId, previousCommit: upgradeRow.previousCommit, warnings: result.warnings }
|
||||
);
|
||||
}
|
||||
|
||||
if (upgradeRow) {
|
||||
await writeUpgradeAuditLog({
|
||||
upgradeId,
|
||||
instanceId: upgradeRow.instanceId,
|
||||
triggeredById: upgradeRow.triggeredById,
|
||||
source: 'local',
|
||||
outcome: result.success ? 'completed' : 'failed',
|
||||
previousCommit: upgradeRow.previousCommit,
|
||||
newCommit: result.newCommit || newCommit,
|
||||
durationSeconds: result.durationSeconds || null,
|
||||
errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`[upgrade] ${slug}: Upgrade ${result.success ? 'completed' : 'failed'}`);
|
||||
@ -327,6 +638,18 @@ async function runUpgrade(
|
||||
statusMessage: `Upgrade failed: ${isTimeout ? 'timeout' : errorMsg.slice(0, 200)}`,
|
||||
},
|
||||
});
|
||||
|
||||
await writeUpgradeAuditLog({
|
||||
upgradeId,
|
||||
instanceId: upgrade.instanceId,
|
||||
triggeredById: upgrade.triggeredById,
|
||||
source: 'local',
|
||||
outcome: 'orchestration_error',
|
||||
previousCommit: upgrade.previousCommit,
|
||||
newCommit: null,
|
||||
durationSeconds: result.durationSeconds || null,
|
||||
errorMessage: errorMsg,
|
||||
});
|
||||
}
|
||||
|
||||
logger.error(`[upgrade] ${slug}: Upgrade failed: ${errorMsg}`);
|
||||
|
||||
81
config.sh
81
config.sh
@ -38,6 +38,11 @@ NI_MAPBOX_KEY=""
|
||||
NI_MAXMIND_ACCOUNT_ID=""
|
||||
NI_MAXMIND_LICENSE_KEY=""
|
||||
|
||||
# CCP (Changemaker Control Panel) registration flags
|
||||
NI_CCP_URL=""
|
||||
NI_CCP_INVITE_CODE=""
|
||||
NI_CCP_AGENT_URL=""
|
||||
|
||||
# --- Arg parser ---
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
@ -62,6 +67,10 @@ while [[ $# -gt 0 ]]; do
|
||||
--mapbox-key) NI_MAPBOX_KEY="$2"; shift 2 ;;
|
||||
--maxmind-account-id) NI_MAXMIND_ACCOUNT_ID="$2"; shift 2 ;;
|
||||
--maxmind-license-key) NI_MAXMIND_LICENSE_KEY="$2"; shift 2 ;;
|
||||
# CCP (Changemaker Control Panel)
|
||||
--ccp-url) NI_CCP_URL="$2"; shift 2 ;;
|
||||
--ccp-invite-code) NI_CCP_INVITE_CODE="$2"; shift 2 ;;
|
||||
--ccp-agent-url) NI_CCP_AGENT_URL="$2"; shift 2 ;;
|
||||
--help|-h)
|
||||
echo "Usage: bash config.sh [OPTIONS]"
|
||||
echo ""
|
||||
@ -91,6 +100,11 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --maxmind-account-id ID MaxMind GeoIP account ID"
|
||||
echo " --maxmind-license-key K MaxMind GeoIP license key"
|
||||
echo ""
|
||||
echo "CCP (Changemaker Control Panel) — all 3 flags required to register:"
|
||||
echo " --ccp-url URL CCP server URL (e.g., https://ccp.example.com)"
|
||||
echo " --ccp-invite-code CODE One-time invite code from CCP"
|
||||
echo " --ccp-agent-url URL Agent URL the CCP reaches (e.g., https://this-host:7443)"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " bash config.sh --non-interactive --domain example.org --admin-password MyStr0ngPass123"
|
||||
echo " bash config.sh -y --domain example.org --admin-password MyStr0ngPass123 \\"
|
||||
@ -798,6 +812,17 @@ configure_features() {
|
||||
else
|
||||
warn "Set JVB_ADVERTISE_IP in .env before starting Jitsi containers."
|
||||
fi
|
||||
else
|
||||
# Non-interactive: auto-detect public IP for NAT traversal
|
||||
local detected_ip
|
||||
detected_ip=$(curl -sf --max-time 5 https://ifconfig.me 2>/dev/null || \
|
||||
curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true)
|
||||
if [[ -n "$detected_ip" ]]; then
|
||||
update_env_var "JVB_ADVERTISE_IP" "$detected_ip"
|
||||
success "JVB advertise IP auto-detected: $detected_ip"
|
||||
else
|
||||
warn "Could not auto-detect public IP. Set JVB_ADVERTISE_IP in .env before starting Jitsi."
|
||||
fi
|
||||
fi
|
||||
else
|
||||
MEET_ENABLED="no"
|
||||
@ -838,13 +863,6 @@ configure_features() {
|
||||
update_env_var "ENABLE_PEOPLE" "false"
|
||||
fi
|
||||
|
||||
if prompt_yes_no "Enable Analytics & GeoIP (visitor tracking, geo dashboard)?"; then
|
||||
update_env_var "ENABLE_ANALYTICS" "true"
|
||||
success "Analytics enabled"
|
||||
else
|
||||
update_env_var "ENABLE_ANALYTICS" "false"
|
||||
fi
|
||||
|
||||
if prompt_yes_no "Enable Docs Comments & Version History (Gitea-backed)?"; then
|
||||
update_env_var "GITEA_COMMENTS_ENABLED" "true"
|
||||
success "Docs Comments & Version History enabled"
|
||||
@ -881,8 +899,14 @@ configure_features() {
|
||||
fi
|
||||
|
||||
if prompt_yes_no "Enable Monitoring stack (Prometheus, Grafana, Alertmanager, cAdvisor)?" "y"; then
|
||||
update_env_var "COMPOSE_PROFILES" "monitoring"
|
||||
success "Monitoring enabled (COMPOSE_PROFILES=monitoring)"
|
||||
local existing_profiles
|
||||
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
|
||||
if [[ -z "$existing_profiles" ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "monitoring"
|
||||
elif [[ "$existing_profiles" != *"monitoring"* ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
|
||||
fi
|
||||
success "Monitoring enabled (COMPOSE_PROFILES includes monitoring)"
|
||||
MONITORING_ENABLED="yes"
|
||||
else
|
||||
MONITORING_ENABLED="no"
|
||||
@ -1401,6 +1425,35 @@ pangolin_connect_first_site() {
|
||||
configure_control_panel() {
|
||||
header "Control Panel Registration"
|
||||
|
||||
# Non-interactive: use --ccp-* flags if all three provided, otherwise skip
|
||||
if [[ "$NON_INTERACTIVE" == "true" ]]; then
|
||||
if [[ -n "$NI_CCP_URL" && -n "$NI_CCP_INVITE_CODE" && -n "$NI_CCP_AGENT_URL" ]]; then
|
||||
update_env_var "ENABLE_CCP_AGENT" "true"
|
||||
update_env_var "CCP_URL" "$NI_CCP_URL"
|
||||
update_env_var "CCP_INVITE_CODE" "$NI_CCP_INVITE_CODE"
|
||||
update_env_var "CCP_AGENT_URL" "$NI_CCP_AGENT_URL"
|
||||
|
||||
# Append ccp-agent to existing profiles (don't clobber monitoring)
|
||||
local existing_profiles
|
||||
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
|
||||
if [[ -z "$existing_profiles" ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "ccp-agent"
|
||||
elif [[ "$existing_profiles" != *"ccp-agent"* ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "${existing_profiles},ccp-agent"
|
||||
fi
|
||||
|
||||
success "CCP registration configured ($NI_CCP_URL)"
|
||||
else
|
||||
update_env_var "ENABLE_CCP_AGENT" "false"
|
||||
if [[ -n "$NI_CCP_URL" || -n "$NI_CCP_INVITE_CODE" || -n "$NI_CCP_AGENT_URL" ]]; then
|
||||
warn "CCP registration needs all 3 flags: --ccp-url, --ccp-invite-code, --ccp-agent-url"
|
||||
else
|
||||
info "Skipping CCP registration (no --ccp-url provided)"
|
||||
fi
|
||||
fi
|
||||
return
|
||||
fi
|
||||
|
||||
if prompt_yes_no "Register this instance with a Changemaker Control Panel?"; then
|
||||
echo ""
|
||||
read -rp " Enter Control Panel URL (e.g., https://ccp.example.com): " ccp_url
|
||||
@ -2152,9 +2205,15 @@ main() {
|
||||
header "Release Mode Settings"
|
||||
update_env_var "IMAGE_TAG" "latest"
|
||||
update_env_var "NODE_ENV" "production"
|
||||
# Ensure monitoring is included if user opted in
|
||||
# Ensure monitoring is included if user opted in (preserve existing profiles)
|
||||
if [[ "${MONITORING_ENABLED:-no}" == "yes" ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "monitoring"
|
||||
local existing_profiles
|
||||
existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
|
||||
if [[ -z "$existing_profiles" ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "monitoring"
|
||||
elif [[ "$existing_profiles" != *"monitoring"* ]]; then
|
||||
update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
|
||||
fi
|
||||
fi
|
||||
success "Set IMAGE_TAG=latest, NODE_ENV=production (pre-built images)"
|
||||
fi
|
||||
|
||||
@ -103,7 +103,8 @@ cp "$PROJECT_DIR/api/prisma/init-nocodb-db.sh" "$STAGE_DIR/scripts/"
|
||||
cp "$PROJECT_DIR/api/prisma/init-gancio-db.sh" "$STAGE_DIR/scripts/"
|
||||
|
||||
# Runtime scripts
|
||||
for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh backup.sh \
|
||||
for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh \
|
||||
backup.sh restore.sh \
|
||||
upgrade.sh upgrade-check.sh upgrade-watcher.sh \
|
||||
uninstall.sh test-deployment.sh; do
|
||||
if [[ -f "$PROJECT_DIR/scripts/$script" ]]; then
|
||||
|
||||
@ -294,7 +294,7 @@ if [[ "$START_SERVICES" =~ ^[Yy]$ ]]; then
|
||||
info " Database migrations and seeding run automatically on first boot."
|
||||
echo ""
|
||||
|
||||
CORE_SERVICES=("v2-postgres" "redis" "api" "admin")
|
||||
CORE_SERVICES=("v2-postgres" "redis" "api" "admin" "nginx")
|
||||
ELAPSED=0
|
||||
ALL_HEALTHY=false
|
||||
|
||||
|
||||
@ -359,9 +359,13 @@ trap on_failure EXIT
|
||||
acquire_lock
|
||||
load_env
|
||||
|
||||
# Determine branch
|
||||
# Determine branch (source mode only — release installs have no git)
|
||||
if [[ -z "$BRANCH" ]]; then
|
||||
BRANCH="$(git rev-parse --abbrev-ref HEAD)"
|
||||
if [[ "$INSTALL_MODE" == "release" ]]; then
|
||||
BRANCH="release"
|
||||
else
|
||||
BRANCH="$(git rev-parse --abbrev-ref HEAD)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
@ -461,13 +465,15 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Remote reachable
|
||||
info "Checking git remote..."
|
||||
if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
|
||||
success "Git remote reachable"
|
||||
else
|
||||
error "Cannot reach git remote. Check your network or remote configuration."
|
||||
exit 1
|
||||
# Remote reachable (source mode only — release mode pulls from Gitea API later)
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
info "Checking git remote..."
|
||||
if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
|
||||
success "Git remote reachable"
|
||||
else
|
||||
error "Cannot reach git remote. Check your network or remote configuration."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Working directory checks
|
||||
@ -490,9 +496,16 @@ fi
|
||||
success "Disk space: ${AVAILABLE_MB}MB available"
|
||||
|
||||
# Record pre-upgrade state
|
||||
PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
|
||||
PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
|
||||
info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
|
||||
PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
|
||||
info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
|
||||
else
|
||||
# Release mode: derive "commit" from VERSION file (format: <tag>\n<sha>)
|
||||
PRE_UPGRADE_COMMIT="$(head -2 "$PROJECT_DIR/VERSION" 2>/dev/null | tail -1 || echo "release")"
|
||||
PRE_UPGRADE_SHORT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
|
||||
info "Current version: $PRE_UPGRADE_SHORT"
|
||||
fi
|
||||
info "Target branch: $BRANCH"
|
||||
|
||||
# Record running containers (for restoring monitoring profile later)
|
||||
@ -502,31 +515,36 @@ if docker ps --format '{{.Names}}' | grep -q 'prometheus-changemaker'; then
|
||||
info "Monitoring stack detected (will restart after upgrade)"
|
||||
fi
|
||||
|
||||
# Warn about uncommitted changes in project-owned paths
|
||||
PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
|
||||
DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
|
||||
if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
|
||||
warn "Uncommitted changes in project-owned files:"
|
||||
echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo " $f"; done
|
||||
if [[ "$FORCE" != "true" ]]; then
|
||||
error "Commit or stash these changes first, or use --force to continue."
|
||||
exit 1
|
||||
# Source-mode-only checks: dirty files + upstream commit comparison
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
# Warn about uncommitted changes in project-owned paths
|
||||
PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
|
||||
DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
|
||||
if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
|
||||
warn "Uncommitted changes in project-owned files:"
|
||||
echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo " $f"; done
|
||||
if [[ "$FORCE" != "true" ]]; then
|
||||
error "Commit or stash these changes first, or use --force to continue."
|
||||
exit 1
|
||||
fi
|
||||
warn "Continuing with --force (changes will be stashed)"
|
||||
fi
|
||||
warn "Continuing with --force (changes will be stashed)"
|
||||
fi
|
||||
|
||||
# Check for available updates
|
||||
LOCAL_HEAD="$(git rev-parse HEAD)"
|
||||
REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
|
||||
if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
|
||||
info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
|
||||
if [[ "$FORCE" != "true" ]]; then
|
||||
success "Nothing to upgrade."
|
||||
release_lock
|
||||
exit 0
|
||||
# Check for available updates
|
||||
LOCAL_HEAD="$(git rev-parse HEAD)"
|
||||
REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
|
||||
if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
|
||||
info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
|
||||
if [[ "$FORCE" != "true" ]]; then
|
||||
success "Nothing to upgrade."
|
||||
release_lock
|
||||
exit 0
|
||||
fi
|
||||
warn "Continuing with --force despite no upstream changes."
|
||||
fi
|
||||
warn "Continuing with --force despite no upstream changes."
|
||||
fi
|
||||
# Release mode: the upstream-version comparison happens later in the
|
||||
# release-mode block (line ~597) which queries the Gitea Releases API.
|
||||
|
||||
# =============================================================================
|
||||
# Phase 2: Backup
|
||||
@ -669,100 +687,105 @@ elif [[ "$DRY_RUN" == "true" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Step 0: Save user-modifiable paths before any git operations
|
||||
save_user_paths
|
||||
# Source-mode git pull flow. Release mode handles its update via tarball
|
||||
# download in the block above and skips this entire section.
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
# Step 0: Save user-modifiable paths before any git operations
|
||||
save_user_paths
|
||||
|
||||
# Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
|
||||
SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
|
||||
if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
|
||||
info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
|
||||
echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
|
||||
success "Skip-worktree flags cleared"
|
||||
fi
|
||||
|
||||
# Step 0c: Fix Docker-owned directories that block git checkout
|
||||
for owned_dir in api/upgrade api/uploads api/configs; do
|
||||
if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
|
||||
info "Fixing permissions on $owned_dir..."
|
||||
docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
|
||||
# Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
|
||||
SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
|
||||
if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
|
||||
info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
|
||||
echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
|
||||
success "Skip-worktree flags cleared"
|
||||
fi
|
||||
done
|
||||
|
||||
# Step 1: Stash user changes if any exist
|
||||
HAS_CHANGES=false
|
||||
if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
|
||||
HAS_CHANGES=true
|
||||
STASH_NAME="upgrade-${TIMESTAMP}"
|
||||
info "Stashing local changes as '$STASH_NAME'..."
|
||||
git stash push --include-untracked -m "$STASH_NAME"
|
||||
success "Local changes stashed"
|
||||
fi
|
||||
# Step 0c: Fix Docker-owned directories that block git checkout
|
||||
for owned_dir in api/upgrade api/uploads api/configs; do
|
||||
if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
|
||||
info "Fixing permissions on $owned_dir..."
|
||||
docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
# Step 3: Pull updates
|
||||
info "Pulling updates from origin/$BRANCH..."
|
||||
if ! git pull origin "$BRANCH" --no-edit 2>&1; then
|
||||
error "git pull failed. This may indicate upstream force-push or branch issues."
|
||||
# Step 1: Stash user changes if any exist
|
||||
HAS_CHANGES=false
|
||||
if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
|
||||
HAS_CHANGES=true
|
||||
STASH_NAME="upgrade-${TIMESTAMP}"
|
||||
info "Stashing local changes as '$STASH_NAME'..."
|
||||
git stash push --include-untracked -m "$STASH_NAME"
|
||||
success "Local changes stashed"
|
||||
fi
|
||||
|
||||
# Step 3: Pull updates
|
||||
info "Pulling updates from origin/$BRANCH..."
|
||||
if ! git pull origin "$BRANCH" --no-edit 2>&1; then
|
||||
error "git pull failed. This may indicate upstream force-push or branch issues."
|
||||
if [[ "$HAS_CHANGES" == "true" ]]; then
|
||||
warn "Your stashed changes can be recovered with: git stash pop"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
|
||||
success "Updated to $POST_PULL_COMMIT"
|
||||
|
||||
# Step 4: Pop stash and handle conflicts
|
||||
if [[ "$HAS_CHANGES" == "true" ]]; then
|
||||
warn "Your stashed changes can be recovered with: git stash pop"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
info "Restoring local changes..."
|
||||
if git stash pop 2>&1; then
|
||||
success "Local changes restored cleanly"
|
||||
else
|
||||
warn "Merge conflicts detected during stash pop"
|
||||
|
||||
POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
|
||||
success "Updated to $POST_PULL_COMMIT"
|
||||
# Auto-resolve user-modifiable paths by keeping user's version
|
||||
RESOLVED_COUNT=0
|
||||
for user_path in "${USER_PATHS[@]}"; do
|
||||
CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
|
||||
if [[ -n "$CONFLICTED" ]]; then
|
||||
while IFS= read -r cf; do
|
||||
info " Auto-resolving (keeping yours): $cf"
|
||||
git checkout --theirs "$cf" 2>/dev/null || true
|
||||
git add "$cf"
|
||||
RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
|
||||
done < <(echo "$CONFLICTED")
|
||||
fi
|
||||
done
|
||||
|
||||
# Step 4: Pop stash and handle conflicts
|
||||
if [[ "$HAS_CHANGES" == "true" ]]; then
|
||||
info "Restoring local changes..."
|
||||
if git stash pop 2>&1; then
|
||||
success "Local changes restored cleanly"
|
||||
else
|
||||
warn "Merge conflicts detected during stash pop"
|
||||
|
||||
# Auto-resolve user-modifiable paths by keeping user's version
|
||||
RESOLVED_COUNT=0
|
||||
for user_path in "${USER_PATHS[@]}"; do
|
||||
CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
|
||||
if [[ -n "$CONFLICTED" ]]; then
|
||||
while IFS= read -r cf; do
|
||||
info " Auto-resolving (keeping yours): $cf"
|
||||
git checkout --theirs "$cf" 2>/dev/null || true
|
||||
git add "$cf"
|
||||
RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
|
||||
done < <(echo "$CONFLICTED")
|
||||
# Check if any conflicts remain in project-owned files
|
||||
REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
|
||||
if [[ -n "$REMAINING_CONFLICTS" ]]; then
|
||||
error "Unresolved conflicts in project-owned files:"
|
||||
echo "$REMAINING_CONFLICTS" | while read -r f; do echo " $f"; done
|
||||
echo ""
|
||||
error "These files have upstream changes that conflict with your edits."
|
||||
error "Resolve manually, then run the upgrade again."
|
||||
info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
|
||||
info "To abort: git merge --abort OR git checkout $PRE_UPGRADE_COMMIT"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Check if any conflicts remain in project-owned files
|
||||
REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
|
||||
if [[ -n "$REMAINING_CONFLICTS" ]]; then
|
||||
error "Unresolved conflicts in project-owned files:"
|
||||
echo "$REMAINING_CONFLICTS" | while read -r f; do echo " $f"; done
|
||||
echo ""
|
||||
error "These files have upstream changes that conflict with your edits."
|
||||
error "Resolve manually, then run the upgrade again."
|
||||
info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
|
||||
info "To abort: git merge --abort OR git checkout $PRE_UPGRADE_COMMIT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $RESOLVED_COUNT -gt 0 ]]; then
|
||||
success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
|
||||
if [[ $RESOLVED_COUNT -gt 0 ]]; then
|
||||
success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
|
||||
restore_user_paths
|
||||
# Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
|
||||
restore_user_paths
|
||||
|
||||
# Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
|
||||
# (can happen when save_user_paths can't read root-owned files in user paths)
|
||||
DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
|
||||
if [[ -n "$DELETED_TRACKED" ]]; then
|
||||
info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
|
||||
echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
|
||||
success "Tracked files restored from HEAD"
|
||||
# Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
|
||||
# (can happen when save_user_paths can't read root-owned files in user paths)
|
||||
DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
|
||||
if [[ -n "$DELETED_TRACKED" ]]; then
|
||||
info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
|
||||
echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
|
||||
success "Tracked files restored from HEAD"
|
||||
fi
|
||||
fi
|
||||
# End of source-mode git pull flow
|
||||
|
||||
# Step 5: Detect new env vars
|
||||
info "Checking for new environment variables..."
|
||||
@ -791,24 +814,30 @@ if [[ -f "$PROJECT_DIR/.env.example" ]] && [[ -f "$PROJECT_DIR/.env" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 6: Print update summary
|
||||
COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
|
||||
COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs)"
|
||||
echo ""
|
||||
info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
|
||||
git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20
|
||||
if [[ "$COMMIT_COUNT" -gt 20 ]]; then
|
||||
info " ... and $((COMMIT_COUNT - 20)) more"
|
||||
fi
|
||||
|
||||
# Flag commits that may require manual attention
|
||||
BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
|
||||
if [[ -n "$BREAKING_COMMITS" ]]; then
|
||||
# Step 6: Print update summary (source mode only — release mode has no commit range)
|
||||
COMMIT_COUNT=0
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
|
||||
# Use || true and check pipefail-safe to survive git failures
|
||||
COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs || echo 0)"
|
||||
echo ""
|
||||
warn "Commits requiring manual attention:"
|
||||
echo "$BREAKING_COMMITS" | while read -r line; do
|
||||
echo -e " ${YELLOW}$line${NC}"
|
||||
done
|
||||
info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
|
||||
git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20 || true
|
||||
if [[ "$COMMIT_COUNT" -gt 20 ]]; then
|
||||
info " ... and $((COMMIT_COUNT - 20)) more"
|
||||
fi
|
||||
|
||||
# Flag commits that may require manual attention
|
||||
BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
|
||||
if [[ -n "$BREAKING_COMMITS" ]]; then
|
||||
echo ""
|
||||
warn "Commits requiring manual attention:"
|
||||
echo "$BREAKING_COMMITS" | while read -r line; do
|
||||
echo -e " ${YELLOW}$line${NC}"
|
||||
done
|
||||
fi
|
||||
else
|
||||
info "Update summary: ${PRE_UPGRADE_SHORT} → release"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
@ -1135,7 +1164,10 @@ verify_service_health() {
|
||||
done
|
||||
warn "$name: not responding after ${max_wait}s"
|
||||
VERIFY_FAILED=true
|
||||
return 1
|
||||
# Always return 0 — under set -e a non-zero return from this helper would
|
||||
# exit the script before write_result runs. The VERIFY_FAILED flag is the
|
||||
# signal the caller actually checks.
|
||||
return 0
|
||||
}
|
||||
|
||||
# API health (with polling — may still be running migrations)
|
||||
@ -1194,7 +1226,11 @@ fi
|
||||
# =============================================================================
|
||||
|
||||
ELAPSED="$(elapsed)"
|
||||
FINAL_COMMIT="$(git rev-parse --short HEAD)"
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
FINAL_COMMIT="$(git rev-parse --short HEAD)"
|
||||
else
|
||||
FINAL_COMMIT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
|
||||
fi
|
||||
|
||||
# Collect warnings for API mode result
|
||||
UPGRADE_WARNINGS="[]"
|
||||
@ -1211,7 +1247,11 @@ echo -e "${BOLD}${GREEN} Upgrade Complete${NC}"
|
||||
echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
echo -e " ${BOLD}Previous:${NC} $PRE_UPGRADE_SHORT"
|
||||
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT ($(git log -1 --format='%s' HEAD))"
|
||||
if [[ "$INSTALL_MODE" == "source" ]]; then
|
||||
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT ($(git log -1 --format='%s' HEAD 2>/dev/null || echo "$FINAL_COMMIT"))"
|
||||
else
|
||||
echo -e " ${BOLD}Current:${NC} $FINAL_COMMIT"
|
||||
fi
|
||||
echo -e " ${BOLD}Commits:${NC} $COMMIT_COUNT"
|
||||
echo -e " ${BOLD}Duration:${NC} $ELAPSED"
|
||||
echo -e " ${BOLD}Log:${NC} $LOG_FILE"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user