CCP restore/tunnel/upgrade + upgrade.sh release-mode fixes + volunteer dashboard polish

- Add instance restore model, routes, and agent backup/restore endpoints - Add Pangolin tunnel service (subdomain prefix, teardown action, CCP client) - Add slug mutex for concurrent operation safety in agent - Expand upgrade service with remote driver orchestration - Fix upgrade.sh to properly handle release-mode installs (no git operations) - Add CCP registration flags to config.sh (--ccp-url, --ccp-invite-code, --ccp-agent-url) - Auto-detect JVB advertise IP in non-interactive mode - Polish volunteer dashboard ActionStepsList with highlighted step component - Add ticketed event description field + volunteer dashboard query refinements Bunker Admin
2026-04-12 11:09:46 -06:00 · 2026-04-12 11:09:46 -06:00 · 26ec925d9b
commit 26ec925d9b
parent 29d1f3998a
35 changed files with 4191 additions and 329 deletions
--- a/admin/src/components/volunteer/dashboard/ActionStepsList.tsx
+++ b/admin/src/components/volunteer/dashboard/ActionStepsList.tsx
@ -10,6 +10,8 @@ import {
  LinkOutlined,
  CheckSquareOutlined,
  CheckCircleFilled,
  RightOutlined,
  ThunderboltOutlined,
 } from '@ant-design/icons';
 import { useNavigate } from 'react-router-dom';
 import { api } from '@/lib/api';
@ -66,6 +68,97 @@ function resolveStepLink(step: DashboardActionStep): { to: string; external: boo
  }
 }
 function HighlightedStep({
  step,
  onNavigate,
  onSelfReport,
  loading,
 }: {
  step: DashboardActionStep;
  onNavigate: (step: DashboardActionStep) => void;
  onSelfReport: (step: DashboardActionStep) => void;
  loading: boolean;
 }) {
  const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
  const canNavigate = resolveStepLink(step) !== null;
  return (
    <div
      style={{
        background: 'linear-gradient(135deg, rgba(52,152,219,0.25) 0%, rgba(41,128,185,0.15) 100%)',
        border: '1px solid rgba(52,152,219,0.3)',
        borderRadius: 8,
        padding: '16px 20px',
        margin: '0 0 2px',
      }}
    >
      <div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 8 }}>
        <ThunderboltOutlined style={{ fontSize: 12, color: '#3498db' }} />
        <Typography.Text strong style={{ fontSize: 12, color: '#3498db', textTransform: 'uppercase', letterSpacing: 0.5 }}>
          Next Up
        </Typography.Text>
      </div>
      <div style={{ display: 'flex', alignItems: 'center', gap: 10, marginBottom: 8 }}>
        <div
          style={{
            width: 32,
            height: 32,
            borderRadius: '50%',
            background: 'rgba(52,152,219,0.25)',
            display: 'flex',
            alignItems: 'center',
            justifyContent: 'center',
            fontSize: 16,
            color: '#3498db',
            flexShrink: 0,
          }}
        >
          {KIND_ICONS[step.kind]}
        </div>
        <div style={{ flex: 1, minWidth: 0 }}>
          <Typography.Text strong style={{ fontSize: 15, display: 'block' }}>
            {step.label}
          </Typography.Text>
          {step.description && (
            <Typography.Text type="secondary" style={{ fontSize: 12, display: 'block', marginTop: 2 }}>
              {step.description}
            </Typography.Text>
          )}
        </div>
      </div>
      <div style={{ display: 'flex', gap: 8, marginTop: 4 }}>
        {isSelfReport ? (
          <>
            {canNavigate && (
              <Button size="middle" onClick={() => onNavigate(step)} icon={<RightOutlined />}>
                Open
              </Button>
            )}
            <Button
              type="primary"
              size="middle"
              loading={loading}
              onClick={() => onSelfReport(step)}
            >
              Mark as done
            </Button>
          </>
        ) : (
          <Button
            type="primary"
            size="middle"
            icon={<RightOutlined />}
            onClick={() => onNavigate(step)}
            disabled={!canNavigate}
          >
            Take Action
          </Button>
        )}
      </div>
    </div>
  );
 }
 export default function ActionStepsList({ campaign, onRefresh }: ActionStepsListProps) {
  const navigate = useNavigate();
  const { message } = App.useApp();
@ -95,6 +188,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
  };
  const sortedSteps = [...campaign.steps].sort((a, b) => a.order - b.order);
  const highlightedStep = sortedSteps.find((s) => !s.completed);
  const remainingSteps = sortedSteps.filter((s) => s.id !== highlightedStep?.id);
  return (
    <Card
@ -108,7 +203,18 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
        </Typography.Text>
      }
    >
-      {sortedSteps.map((step, i) => {
+      {highlightedStep && (
        <div style={{ padding: '12px 12px 0' }}>
          <HighlightedStep
            step={highlightedStep}
            onNavigate={handleNavigate}
            onSelfReport={handleSelfReport}
            loading={completingStepId === highlightedStep.id}
          />
        </div>
      )}
      {remainingSteps.map((step, i) => {
        const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
        const canNavigate = resolveStepLink(step) !== null;
@ -119,8 +225,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
              display: 'flex',
              alignItems: 'center',
              justifyContent: 'space-between',
-              padding: '12px 20px',
+              padding: '10px 20px',
-              borderTop: i > 0 ? '1px solid rgba(255,255,255,0.04)' : undefined,
+              borderTop: (highlightedStep || i > 0) ? '1px solid rgba(255,255,255,0.04)' : undefined,
              opacity: step.completed ? 0.55 : 1,
              gap: 12,
            }}
@ -128,22 +234,22 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
            <div style={{ display: 'flex', alignItems: 'center', gap: 10, flex: 1, minWidth: 0 }}>
              <div
                style={{
-                  width: 26,
+                  width: 24,
-                  height: 26,
+                  height: 24,
                  borderRadius: '50%',
-                  background: step.completed ? '#52c41a' : 'rgba(52,152,219,0.15)',
+                  background: step.completed ? '#52c41a' : 'rgba(255,255,255,0.06)',
                  display: 'flex',
                  alignItems: 'center',
                  justifyContent: 'center',
-                  fontSize: 13,
+                  fontSize: 12,
                  flexShrink: 0,
-                  color: step.completed ? '#fff' : 'rgba(255,255,255,0.7)',
+                  color: step.completed ? '#fff' : 'rgba(255,255,255,0.5)',
                }}
              >
                {step.completed ? <CheckCircleFilled /> : KIND_ICONS[step.kind]}
              </div>
              <div style={{ minWidth: 0 }}>
-                <Typography.Text strong style={{ fontSize: 12, color: 'rgba(255,255,255,0.45)', display: 'block' }}>
+                <Typography.Text strong style={{ fontSize: 11, color: 'rgba(255,255,255,0.35)', display: 'block', lineHeight: 1 }}>
                  {KIND_LABELS[step.kind]}
                </Typography.Text>
                <Typography.Text
@ -163,7 +269,7 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
            <div style={{ flexShrink: 0 }}>
              {step.completed ? (
-                <Tag color="success" style={{ margin: 0 }}>Done</Tag>
+                <Tag color="success" style={{ margin: 0, fontSize: 11 }}>Done</Tag>
              ) : isSelfReport ? (
                <Space size={4}>
                  {canNavigate && (
--- a/admin/src/pages/events/TicketedEventsPage.tsx
+++ b/admin/src/pages/events/TicketedEventsPage.tsx
@ -7,7 +7,7 @@ import {
 import {
  PlusOutlined, SearchOutlined, EditOutlined, EyeOutlined, DeleteOutlined,
  CheckCircleOutlined, CloseCircleOutlined, CopyOutlined, ScanOutlined,
-  TagOutlined, VideoCameraOutlined, EnvironmentOutlined,
+  TagOutlined, VideoCameraOutlined, EnvironmentOutlined, StarOutlined, StarFilled,
 } from '@ant-design/icons';
 import { api } from '@/lib/api';
 import dayjs from 'dayjs';
@ -45,6 +45,7 @@ interface TicketedEvent {
  currentAttendees: number;
  coverImageUrl: string | null;
  organizerName: string | null;
  featured: boolean;
  ticketTiers: TicketTier[];
  _count: { tickets: number; checkIns: number };
  createdAt: string;
@ -198,18 +199,55 @@ export default function TicketedEventsPage() {
    }
  };
  const handleFeature = async (id: string, featured: boolean) => {
    try {
      if (featured) {
        // Unfeature all others first (exclusive toggle)
        const othersToUnfeature = events.filter((e) => e.featured && e.id !== id);
        await Promise.all(
          othersToUnfeature.map((e) => api.put(`/api/ticketed-events/admin/${e.id}`, { featured: false }))
        );
      }
      await api.put(`/api/ticketed-events/admin/${id}`, { featured });
      message.success(featured ? 'Event featured on volunteer dashboard' : 'Event unfeatured');
      fetchEvents();
    } catch {
      message.error('Failed to update featured status');
    }
  };
  const copyLink = (slug: string) => {
    navigator.clipboard.writeText(`${window.location.origin}/event/${slug}`);
    message.success('Link copied');
  };
  const columns = [
    {
      title: '',
      key: 'featured',
      width: 36,
      render: (_: unknown, record: TicketedEvent) => (
        <Tooltip title={record.featured ? 'Remove from volunteer dashboard' : 'Feature on volunteer dashboard'}>
          <Button
            type="text"
            size="small"
            icon={record.featured
              ? <StarFilled style={{ color: '#faad14' }} />
              : <StarOutlined style={{ color: 'rgba(255,255,255,0.25)' }} />}
            onClick={(e) => { e.stopPropagation(); handleFeature(record.id, !record.featured); }}
          />
        </Tooltip>
      ),
    },
    {
      title: 'Title',
      dataIndex: 'title',
      key: 'title',
      render: (text: string, record: TicketedEvent) => (
-        <a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
+        <Space>
          <a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
          {record.featured && <Tag color="gold" style={{ fontSize: 11 }}>Featured</Tag>}
        </Space>
      ),
    },
    {
--- a/api/src/modules/ticketed-events/ticketed-events.schemas.ts
+++ b/api/src/modules/ticketed-events/ticketed-events.schemas.ts
@ -50,6 +50,7 @@ export const updateEventSchema = z.object({
  maxAttendees: z.number().int().positive().nullable().optional(),
  organizerName: z.string().max(200).nullable().optional(),
  organizerEmail: z.string().email().nullable().optional(),
  featured: z.boolean().optional(),
 });
 export const createTierSchema = z.object({
--- a/api/src/modules/volunteer-dashboard/volunteer-dashboard.service.ts
+++ b/api/src/modules/volunteer-dashboard/volunteer-dashboard.service.ts
@ -114,24 +114,31 @@ async function getReferral(userId: string): Promise<DashboardReferral> {
 async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> {
  const today = new Date();
  today.setHours(0, 0, 0, 0);
-  const event = await prisma.ticketedEvent.findFirst({
+  const eventSelect = {
-    where: {
+    slug: true,
-      featured: true,
+    title: true,
-      status: TicketedEventStatus.PUBLISHED,
+    date: true,
-      date: { gte: today },
+    startTime: true,
-    },
+    venueName: true,
-    orderBy: { date: 'asc' },
+    coverImageUrl: true,
-    select: {
+    currentAttendees: true,
-      slug: true,
+    maxAttendees: true,
-      title: true,
+  } as const;
-      date: true,
+  const baseWhere = { status: TicketedEventStatus.PUBLISHED, date: { gte: today } };
-      startTime: true,
+
-      venueName: true,
+  // Prefer admin-featured event; fall back to next upcoming published event
-      coverImageUrl: true,
+  const event =
-      currentAttendees: true,
+    await prisma.ticketedEvent.findFirst({
-      maxAttendees: true,
+      where: { ...baseWhere, featured: true },
-    },
+      orderBy: { date: 'asc' },
-  });
+      select: eventSelect,
    }) ??
    await prisma.ticketedEvent.findFirst({
      where: baseWhere,
      orderBy: { date: 'asc' },
      select: eventSelect,
    });
  if (!event) return null;
  return {
    slug: event.slug,
--- a/changemaker-control-panel/admin/src/pages/AgentRegistrationsPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/AgentRegistrationsPage.tsx
@ -14,7 +14,7 @@ export default function AgentRegistrationsPage() {
  const fetchRegistrations = useCallback(async () => {
    try {
      setLoading(true);
-      const { data } = await api.get('/api/agents/registrations');
+      const { data } = await api.get('/agents/registrations');
      setRegistrations(data);
    } catch {
      message.error('Failed to load registrations');
@ -27,7 +27,7 @@ export default function AgentRegistrationsPage() {
  const handleApprove = async (id: string) => {
    try {
-      await api.post(`/api/agents/registrations/${id}/approve`);
+      await api.post(`/agents/registrations/${id}/approve`);
      message.success('Registration approved — agent will receive certificates on next poll');
      fetchRegistrations();
      setDetailModal(null);
@ -39,7 +39,7 @@ export default function AgentRegistrationsPage() {
  const handleReject = async (id: string) => {
    try {
-      await api.post(`/api/agents/registrations/${id}/reject`);
+      await api.post(`/agents/registrations/${id}/reject`);
      message.success('Registration rejected');
      fetchRegistrations();
      setDetailModal(null);
--- a/changemaker-control-panel/admin/src/pages/BackupsPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/BackupsPage.tsx
@ -203,8 +203,16 @@ export default function BackupsPage() {
          {
            title: 'Instance',
            dataIndex: 'instance',
-            width: 160,
+            width: 180,
-            render: (inst: BackupRow['instance']) => inst?.name || '-',
+            render: (inst: BackupRow['instance'], record: BackupRow) => {
              const isRemote = record.manifest?.source === 'remote';
              return (
                <Space size={4}>
                  <span>{inst?.name || '-'}</span>
                  {isRemote && <Tag color="blue">remote</Tag>}
                </Space>
              );
            },
          },
          {
            title: 'Status',
--- a/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
@ -44,6 +44,7 @@ import {
  WarningOutlined,
  CloseCircleOutlined,
  InfoCircleOutlined,
  UndoOutlined,
 } from '@ant-design/icons';
 import dayjs from 'dayjs';
 import { useNavigate, useParams } from 'react-router-dom';
@ -89,6 +90,16 @@ export default function InstanceDetailPage() {
  const [backupsLoading, setBackupsLoading] = useState(false);
  const [creatingBackup, setCreatingBackup] = useState(false);
  // Restore state
  const [restoreModal, setRestoreModal] = useState<{ backup: Backup; typedSlug: string } | null>(null);
  const [restoring, setRestoring] = useState(false);
  const [activeRestoreId, setActiveRestoreId] = useState<string | null>(null);
  const [activeRestoreState, setActiveRestoreState] = useState<{
    status: string;
    logTail?: string | null;
    errorMessage?: string | null;
  } | null>(null);
  // Feature reconfiguration state
  const [featureFlags, setFeatureFlags] = useState<Record<string, boolean>>({});
  const [reconfiguring, setReconfiguring] = useState(false);
@ -109,6 +120,18 @@ export default function InstanceDetailPage() {
  const [tunnelSaving, setTunnelSaving] = useState(false);
  const [tunnelRemoving, setTunnelRemoving] = useState(false);
  // Remote tunnel state (Pangolin API managed by CCP)
  const [tunnelStatus, setTunnelStatus] = useState<{
    configured: boolean;
    online?: boolean;
    siteId?: string;
    endpoint?: string;
    resources?: Array<{ subdomain: string; name: string; resourceId: string; hasTarget: boolean; targetIp?: string; targetPort?: number }>;
  } | null>(null);
  const [tunnelStatusLoading, setTunnelStatusLoading] = useState(false);
  const [tunnelSetupRunning, setTunnelSetupRunning] = useState(false);
  const [tunnelSyncing, setTunnelSyncing] = useState(false);
  // Upgrade state
  const [updateStatus, setUpdateStatus] = useState<UpdateStatus | null>(null);
  const [checkingUpdate, setCheckingUpdate] = useState(false);
@ -390,6 +413,64 @@ export default function InstanceDetailPage() {
    window.open(`/api/backups/${backupId}/download`, '_blank');
  };
  const handleRestoreConfirm = async () => {
    if (!restoreModal) return;
    if (restoreModal.typedSlug !== instance?.slug) {
      message.error('Typed slug does not match — restore cancelled');
      return;
    }
    setRestoring(true);
    try {
      const { data } = await api.post(`/instances/${id}/restore`, {
        backupId: restoreModal.backup.id,
      });
      const restoreId = data.data.id as string;
      setActiveRestoreId(restoreId);
      setActiveRestoreState({ status: 'PENDING' });
      setRestoreModal(null);
      message.success('Restore started — polling for progress');
    } catch (err: unknown) {
      const e = err as { response?: { data?: { error?: { message?: string } } } };
      message.error(e?.response?.data?.error?.message || 'Failed to start restore');
    } finally {
      setRestoring(false);
    }
  };
  // Poll the active restore's status every 3s until it completes or fails
  useEffect(() => {
    if (!activeRestoreId) return;
    let cancelled = false;
    const poll = async () => {
      try {
        const { data } = await api.get(`/instances/${id}/restores/${activeRestoreId}`);
        if (cancelled) return;
        const row = data.data;
        setActiveRestoreState({
          status: row.status,
          logTail: row.logTail,
          errorMessage: row.errorMessage,
        });
        if (row.status === 'COMPLETED') {
          message.success('Restore completed successfully');
          setActiveRestoreId(null);
          fetchBackups();
        } else if (row.status === 'FAILED') {
          message.error(`Restore failed: ${row.errorMessage || 'unknown error'}`);
          setActiveRestoreId(null);
        }
      } catch {
        // keep trying; transient errors are expected during remote restart
      }
    };
    poll();
    const handle = setInterval(poll, 3000);
    return () => {
      cancelled = true;
      clearInterval(handle);
    };
  }, [activeRestoreId, id, fetchBackups]);
  // Initialize feature flags and tunnel form when instance loads
  useEffect(() => {
    if (instance) {
@ -508,6 +589,11 @@ export default function InstanceDetailPage() {
  const ports = instance.portConfig as Record<string, number>;
  const isProvisioning = instance.status === 'PROVISIONING';
  const isRegistered = instance.isRegistered;
  const isRemote = instance.isRemote;
  // A "managed" instance is one CCP can run backup/restore/upgrade on.
  // Local CCP-managed and remote (agent-backed) both qualify; only locally-
  // adopted registered instances (isRegistered && !isRemote) are unmanaged.
  const isManaged = !isRegistered || isRemote;
  const canStart = instance.status === 'STOPPED' || instance.status === 'ERROR';
  const canStop = instance.status === 'RUNNING' || instance.status === 'ERROR';
  const canRestart = instance.status === 'RUNNING';
@ -731,7 +817,7 @@ export default function InstanceDetailPage() {
  const backupsTab = (
    <div>
-      {isRegistered && (
+      {!isManaged && (
        <Alert
          message="Backups not managed by CCP"
          description="This instance was deployed outside the control panel. Use its own backup tools to manage backups."
@ -740,6 +826,15 @@ export default function InstanceDetailPage() {
          style={{ marginBottom: 16 }}
        />
      )}
      {isRemote && (
        <Alert
          message="Remote instance"
          description="Backups and restores run via the remote agent over mTLS. Create Backup triggers scripts/backup.sh on the remote host and streams the archive back to the control panel."
          type="info"
          showIcon
          style={{ marginBottom: 16 }}
        />
      )}
      <div style={{ marginBottom: 12, display: 'flex', justifyContent: 'space-between' }}>
        <Typography.Text type="secondary">
          {backups.length} backup{backups.length !== 1 ? 's' : ''}
@ -749,7 +844,7 @@ export default function InstanceDetailPage() {
          type="primary"
          onClick={handleCreateBackup}
          loading={creatingBackup}
-          disabled={instance.status !== 'RUNNING' || isRegistered}
+          disabled={instance.status !== 'RUNNING' || !isManaged}
        >
          Create Backup
        </Button>
@ -784,20 +879,36 @@ export default function InstanceDetailPage() {
            {
              title: 'Size',
              dataIndex: 'sizeBytes',
-              render: (b: number | null) => (b ? `${(b / 1024 / 1024).toFixed(1)} MB` : '-'),
+              render: (b: number | string | null) => {
                if (b == null) return '-';
                const n = typeof b === 'string' ? parseInt(b, 10) : b;
                return `${(n / 1024 / 1024).toFixed(1)} MB`;
              },
            },
            {
              title: 'Actions',
-              width: 120,
+              width: 160,
              render: (_: unknown, record: Backup) => (
                <Space size="small">
                  {record.status === 'COMPLETED' && (
-                    <Button
+                    <>
-                      icon={<CloudDownloadOutlined />}
+                      <Button
-                      size="small"
+                        icon={<CloudDownloadOutlined />}
-                      type="text"
+                        size="small"
-                      onClick={() => handleDownloadBackup(record.id)}
+                        type="text"
-                    />
+                        title="Download archive"
                        onClick={() => handleDownloadBackup(record.id)}
                      />
                      {isManaged && (
                        <Button
                          icon={<UndoOutlined />}
                          size="small"
                          type="text"
                          title="Restore this backup (destructive)"
                          onClick={() => setRestoreModal({ backup: record, typedSlug: '' })}
                        />
                      )}
                    </>
                  )}
                  <Popconfirm
                    title="Delete this backup?"
@ -1049,7 +1160,73 @@ export default function InstanceDetailPage() {
  );
  const tunnelConfigured = !!(instance.pangolinEndpoint && instance.pangolinNewtId);
-  const canConfigureTunnel = !isRegistered && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
+  const canConfigureTunnel = isManaged && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
  // Fetch tunnel status for remote instances
  const fetchTunnelStatus = useCallback(async () => {
    if (!isRemote) return;
    setTunnelStatusLoading(true);
    try {
      const { data } = await api.get(`/instances/${id}/tunnel/status`);
      setTunnelStatus(data.data);
    } catch {
      setTunnelStatus(null);
    } finally {
      setTunnelStatusLoading(false);
    }
  }, [id, isRemote]);
  useEffect(() => {
    if (activeTab === 'tunnel' && isRemote) {
      fetchTunnelStatus();
    }
  }, [activeTab, isRemote, fetchTunnelStatus]);
  const handleRemoteTunnelSetup = async (values: { subdomainPrefix?: string }) => {
    setTunnelSetupRunning(true);
    try {
      await api.post(`/instances/${id}/tunnel/setup`, {
        subdomainPrefix: values.subdomainPrefix || instance.slug,
      });
      message.success('Tunnel setup complete — Newt credentials pushed to remote instance');
      fetchInstance();
      fetchTunnelStatus();
    } catch (err: unknown) {
      const e = err as { response?: { data?: { error?: { message?: string } } } };
      message.error(e?.response?.data?.error?.message || 'Tunnel setup failed');
    } finally {
      setTunnelSetupRunning(false);
    }
  };
  const handleTunnelSync = async () => {
    setTunnelSyncing(true);
    try {
      const { data } = await api.post(`/instances/${id}/tunnel/sync`);
      message.success(`Sync complete — ${data.data.created} new resource(s) created`);
      fetchTunnelStatus();
    } catch (err: unknown) {
      const e = err as { response?: { data?: { error?: { message?: string } } } };
      message.error(e?.response?.data?.error?.message || 'Sync failed');
    } finally {
      setTunnelSyncing(false);
    }
  };
  const handleRemoteTunnelTeardown = async () => {
    setTunnelRemoving(true);
    try {
      await api.delete(`/instances/${id}/tunnel`);
      message.success('Tunnel torn down — Pangolin site deleted');
      fetchInstance();
      setTunnelStatus(null);
    } catch (err: unknown) {
      const e = err as { response?: { data?: { error?: { message?: string } } } };
      message.error(e?.response?.data?.error?.message || 'Teardown failed');
    } finally {
      setTunnelRemoving(false);
    }
  };
  const handleConfigureTunnel = async (values: { pangolinEndpoint: string; pangolinNewtId: string; pangolinNewtSecret?: string }) => {
    setTunnelSaving(true);
@ -1088,9 +1265,111 @@ export default function InstanceDetailPage() {
    }
  };
-  const tunnelTab = (
+  const remoteTunnelTab = (
    <Space direction="vertical" size="large" style={{ width: '100%' }}>
-      {isRegistered && (
+      {tunnelStatus?.configured ? (
        <>
          <Alert
            message={`Tunnel active — ${tunnelStatus.online ? 'online' : 'offline'}`}
            description={`Connected to ${tunnelStatus.endpoint || instance.pangolinEndpoint} (site: ${tunnelStatus.siteId})`}
            type={tunnelStatus.online ? 'success' : 'warning'}
            showIcon
            icon={<CloudOutlined />}
          />
          <Card title="Current Configuration" size="small">
            <Descriptions bordered column={1}>
              <Descriptions.Item label="Endpoint">
                <Typography.Text copyable>{tunnelStatus.endpoint || instance.pangolinEndpoint}</Typography.Text>
              </Descriptions.Item>
              <Descriptions.Item label="Site ID">
                <Typography.Text copyable>{tunnelStatus.siteId || instance.pangolinSiteId}</Typography.Text>
              </Descriptions.Item>
              <Descriptions.Item label="Newt ID">
                <Typography.Text copyable>{instance.pangolinNewtId}</Typography.Text>
              </Descriptions.Item>
              <Descriptions.Item label="Status">
                <Tag color={tunnelStatus.online ? 'green' : 'orange'}>{tunnelStatus.online ? 'Online' : 'Offline'}</Tag>
              </Descriptions.Item>
            </Descriptions>
          </Card>
          {tunnelStatus.resources && tunnelStatus.resources.length > 0 && (
            <Card
              title="Resources"
              size="small"
              extra={
                <Space>
                  <Button icon={<SyncOutlined />} size="small" onClick={handleTunnelSync} loading={tunnelSyncing}>
                    Sync
                  </Button>
                  <Button icon={<ReloadOutlined />} size="small" onClick={fetchTunnelStatus} loading={tunnelStatusLoading}>
                    Refresh
                  </Button>
                </Space>
              }
            >
              <Table
                dataSource={tunnelStatus.resources}
                rowKey="resourceId"
                size="small"
                pagination={false}
                columns={[
                  { title: 'Subdomain', dataIndex: 'subdomain', render: (s: string) => s || '(root)' },
                  { title: 'Name', dataIndex: 'name' },
                  { title: 'Target', render: (_: unknown, r: { hasTarget: boolean; targetIp?: string; targetPort?: number }) =>
                    r.hasTarget ? `${r.targetIp}:${r.targetPort}` : <Tag color="red">No target</Tag>
                  },
                ]}
              />
            </Card>
          )}
          <Popconfirm
            title="Tear down tunnel?"
            description="This will delete the Pangolin site and all resources. The Newt container will be stopped."
            onConfirm={handleRemoteTunnelTeardown}
          >
            <Button danger icon={<DisconnectOutlined />} loading={tunnelRemoving}>
              Teardown Tunnel
            </Button>
          </Popconfirm>
        </>
      ) : (
        <>
          <Alert
            message="No tunnel configured"
            description="The CCP will create a Pangolin site and resources for this instance, push Newt credentials to its .env, and start the tunnel container."
            type="info"
            showIcon
          />
          <Card title="Setup Tunnel" size="small">
            <Form layout="vertical" onFinish={handleRemoteTunnelSetup}>
              <Form.Item
                name="subdomainPrefix"
                label="Subdomain Prefix"
                initialValue={instance.slug}
                extra={`Resources will be created as <prefix>-app.${instance.domain}, <prefix>-api.${instance.domain}, etc.`}
                rules={[{ required: true }, { pattern: /^[a-z0-9-]+$/, message: 'Lowercase alphanumeric + hyphens only' }]}
              >
                <Input placeholder={instance.slug} />
              </Form.Item>
              <Form.Item style={{ marginBottom: 0 }}>
                <Button type="primary" htmlType="submit" icon={<CloudOutlined />} loading={tunnelSetupRunning}>
                  Setup Tunnel
                </Button>
              </Form.Item>
            </Form>
          </Card>
        </>
      )}
    </Space>
  );
  const localTunnelTab = (
    <Space direction="vertical" size="large" style={{ width: '100%' }}>
      {!isManaged && (
        <Alert
          message="Tunnel management is not available for external instances"
          description="This instance was deployed outside the control panel. Manage its tunnel configuration directly."
@ -1099,7 +1378,7 @@ export default function InstanceDetailPage() {
        />
      )}
-      {!isRegistered && tunnelConfigured && (
+      {isManaged && tunnelConfigured && (
        <Alert
          message={`Tunnel active — connected to ${instance.pangolinEndpoint}`}
          type="success"
@ -1108,7 +1387,7 @@ export default function InstanceDetailPage() {
        />
      )}
-      {!isRegistered && !tunnelConfigured && (
+      {isManaged && !tunnelConfigured && (
        <Alert
          message="No tunnel configured"
          description="Enter your Pangolin Newt credentials below to enable tunnel access for this instance. You can get these from your Pangolin dashboard."
@ -1133,7 +1412,7 @@ export default function InstanceDetailPage() {
        </Card>
      )}
-      {canConfigureTunnel && (
+      {canConfigureTunnel && !isRemote && (
        <Card title={tunnelConfigured ? 'Update Tunnel' : 'Enable Tunnel'} size="small">
          <Form
            form={tunnelForm}
@ -1200,6 +1479,8 @@ export default function InstanceDetailPage() {
    </Space>
  );
  const tunnelTab = isRemote ? remoteTunnelTab : localTunnelTab;
  // ─── Updates Tab ──────────────────────────────────────────────
  const isUpgrading = currentUpgrade?.status === 'IN_PROGRESS' || currentUpgrade?.status === 'PENDING';
@ -1278,7 +1559,7 @@ export default function InstanceDetailPage() {
      )}
      {/* Upgrade Action */}
-      {!isRegistered && (
+      {isManaged && (
        <Card title="Upgrade" size="small">
          {isUpgrading && currentUpgrade ? (
            <Space direction="vertical" style={{ width: '100%' }}>
@ -1340,7 +1621,7 @@ export default function InstanceDetailPage() {
        </Card>
      )}
-      {isRegistered && (
+      {!isManaged && (
        <Alert
          message="Upgrades are not managed by CCP for external instances"
          description="Run the upgrade script directly on the instance or use its own upgrade mechanism."
@ -1348,6 +1629,14 @@ export default function InstanceDetailPage() {
          showIcon
        />
      )}
      {isRemote && (
        <Alert
          message="Remote instance"
          description="Upgrades run via the remote agent over mTLS. The agent shells out to scripts/upgrade.sh --api-mode and the control panel polls progress every 2s."
          type="info"
          showIcon
        />
      )}
      {/* Upgrade History */}
      <Card title="Upgrade History" size="small">
@ -1794,6 +2083,108 @@ export default function InstanceDetailPage() {
          { key: 'tunnel', label: 'Tunnel', children: tunnelTab },
        ]}
      />
      {/* Restore confirmation modal (destructive action guard) */}
      <Modal
        title="Restore backup — destructive"
        open={!!restoreModal}
        onCancel={() => setRestoreModal(null)}
        onOk={handleRestoreConfirm}
        okText="Restore"
        okButtonProps={{
          danger: true,
          loading: restoring,
          disabled: restoreModal?.typedSlug !== instance.slug,
        }}
        cancelButtonProps={{ disabled: restoring }}
        width={560}
      >
        <Alert
          type="error"
          showIcon
          message="This will OVERWRITE the instance's databases and uploads"
          description="The agent will stop application containers, drop databases, and restore from the selected backup. This cannot be undone without another backup."
          style={{ marginBottom: 16 }}
        />
        {restoreModal && (
          <Descriptions column={1} size="small" bordered style={{ marginBottom: 16 }}>
            <Descriptions.Item label="Backup ID">
              <code>{restoreModal.backup.id.substring(0, 8)}</code>
            </Descriptions.Item>
            <Descriptions.Item label="Archive size">
              {restoreModal.backup.sizeBytes
                ? `${(Number(restoreModal.backup.sizeBytes) / 1024 / 1024).toFixed(1)} MB`
                : '-'}
            </Descriptions.Item>
            <Descriptions.Item label="Created">
              {restoreModal.backup.completedAt
                ? dayjs(restoreModal.backup.completedAt).format('YYYY-MM-DD HH:mm')
                : '-'}
            </Descriptions.Item>
          </Descriptions>
        )}
        <Typography.Paragraph>
          Type the instance slug <strong><code>{instance.slug}</code></strong> to confirm:
        </Typography.Paragraph>
        <Input
          value={restoreModal?.typedSlug || ''}
          onChange={(e) =>
            setRestoreModal((cur) => (cur ? { ...cur, typedSlug: e.target.value } : cur))
          }
          placeholder={instance.slug}
          autoFocus
        />
      </Modal>
      {/* Active restore progress banner */}
      {activeRestoreId && activeRestoreState && (
        <Modal
          title="Restore in progress"
          open
          closable={false}
          footer={null}
          width={640}
        >
          <Space direction="vertical" style={{ width: '100%' }}>
            <div>
              <Tag
                color={
                  activeRestoreState.status === 'COMPLETED'
                    ? 'green'
                    : activeRestoreState.status === 'FAILED'
                    ? 'red'
                    : 'processing'
                }
              >
                {activeRestoreState.status}
              </Tag>
              {activeRestoreState.status === 'RUNNING' && (
                <Typography.Text type="secondary" style={{ marginLeft: 8 }}>
                  Agent is running scripts/restore.sh — this can take several minutes
                </Typography.Text>
              )}
            </div>
            {activeRestoreState.errorMessage && (
              <Alert type="error" message={activeRestoreState.errorMessage} showIcon />
            )}
            {activeRestoreState.logTail && (
              <pre
                style={{
                  background: '#1e1e1e',
                  color: '#d4d4d4',
                  padding: 12,
                  maxHeight: 300,
                  overflow: 'auto',
                  fontSize: 12,
                  borderRadius: 4,
                }}
              >
                {activeRestoreState.logTail}
              </pre>
            )}
          </Space>
        </Modal>
      )}
    </div>
  );
 }
--- a/changemaker-control-panel/admin/src/pages/InviteCodesPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/InviteCodesPage.tsx
@ -14,7 +14,7 @@ export default function InviteCodesPage() {
  const fetchCodes = useCallback(async () => {
    try {
      setLoading(true);
-      const { data } = await api.get('/api/invite-codes');
+      const { data } = await api.get('/invite-codes');
      setCodes(data.data || []);
    } catch {
      message.error('Failed to load invite codes');
@ -28,7 +28,7 @@ export default function InviteCodesPage() {
  const handleCreate = async () => {
    try {
      setCreating(true);
-      const { data } = await api.post('/api/invite-codes');
+      const { data } = await api.post('/invite-codes');
      message.success(`Invite code created: ${data.code}`);
      fetchCodes();
    } catch {
@ -40,7 +40,7 @@ export default function InviteCodesPage() {
  const handleRevoke = async (id: string) => {
    try {
-      await api.delete(`/api/invite-codes/${id}`);
+      await api.delete(`/invite-codes/${id}`);
      message.success('Invite code revoked');
      fetchCodes();
    } catch {
--- a/changemaker-control-panel/agent/src/config/env.ts
+++ b/changemaker-control-panel/agent/src/config/env.ts
@ -26,6 +26,7 @@ const envSchema = z.object({
  INSTANCE_SLUG: z.string().default(''),
  INSTANCE_DOMAIN: z.string().default(''),
  INSTANCE_BASE_PATH: z.string().default(''),
  COMPOSE_PROJECT: z.string().default(''),
 });
 function validateEnv() {
--- a/changemaker-control-panel/agent/src/routes/backup.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/backup.routes.ts
@ -1,105 +1,623 @@
 import { Router, Request, Response } from 'express';
 import { param } from '../utils/params';
 import fs from 'fs/promises';
-import path from 'path';
+import { createReadStream, createWriteStream } from 'fs';
-import { exec as execCb } from 'child_process';
+import { pipeline as pipelineCb, Transform } from 'stream';
 import { promisify } from 'util';
-import * as docker from '../services/docker.service';
+import path from 'path';
 import crypto from 'crypto';
 import { spawn } from 'child_process';
 import { getSlugEntry } from '../services/registry.service';
 import { env } from '../config/env';
 import { logger } from '../utils/logger';
 import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
 import { AgentError } from '../middleware/error-handler';
 const pipeline = promisify(pipelineCb);
 const exec = promisify(execCb);
 const router = Router();
-// POST /instance/:slug/backup — Run pg_dump + tar uploads → return backup info
+// ─── Helpers ──────────────────────────────────────────────────────────
 router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
  const entry = await getSlugEntry(param(req, 'slug'));
  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
  const backupDir = path.join(env.AGENT_DATA_DIR, 'backups', param(req, 'slug'), timestamp);
  await fs.mkdir(backupDir, { recursive: true });
-  const { pgPassword } = req.body;
+const ID_REGEX = /^[a-zA-Z0-9_-]+$/;
 const ARCHIVE_PREFIX = 'changemaker-v2-backup-';
 const ARCHIVE_SUFFIX = '.tar.gz';
 function backupsDirFor(slug: string): string {
  return path.join(env.AGENT_DATA_DIR, 'backups', slug);
 }
 function archivePathFor(slug: string, id: string): string {
  return path.join(backupsDirFor(slug), `${ARCHIVE_PREFIX}${id}${ARCHIVE_SUFFIX}`);
 }
 async function sha256File(filePath: string): Promise<string> {
  return new Promise((resolve, reject) => {
    const hash = crypto.createHash('sha256');
    const stream = createReadStream(filePath);
    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end', () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
 }
 /**
 * Read the manifest.json out of a backup archive without extracting it.
 * backup.sh stores it at <archive>/changemaker-v2-backup-<ts>/manifest.json
 */
 async function readManifestFromArchive(archivePath: string): Promise<unknown | null> {
  return new Promise((resolve) => {
    const proc = spawn('tar', ['-xzOf', archivePath, '--wildcards', '*/manifest.json'], {
      stdio: ['ignore', 'pipe', 'ignore'],
    });
    let buf = '';
    proc.stdout.on('data', (chunk) => (buf += chunk.toString('utf-8')));
    proc.on('error', () => resolve(null));
    proc.on('close', (code) => {
      if (code !== 0 || !buf.trim()) return resolve(null);
      try {
        resolve(JSON.parse(buf));
      } catch {
        resolve(null);
      }
    });
  });
 }
 /**
 * Extract the timestamp ID from a filename like "changemaker-v2-backup-20260409_143000.tar.gz".
 */
 function idFromFilename(filename: string): string | null {
  if (!filename.startsWith(ARCHIVE_PREFIX) || !filename.endsWith(ARCHIVE_SUFFIX)) return null;
  return filename.slice(ARCHIVE_PREFIX.length, filename.length - ARCHIVE_SUFFIX.length);
 }
 // ─── Routes ───────────────────────────────────────────────────────────
 /**
 * POST /instance/:slug/backup
 * Shells out to the remote CML's scripts/backup.sh. Returns archive metadata
 * so the CCP can immediately stream it down via the /download endpoint.
 */
 router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const entry = await getSlugEntry(slug);
  try {
-    // 1. pg_dump
+    const result = await withSlugLock(slug, 'backup', async () => {
-    const dumpFile = path.join(backupDir, 'database.sql');
+      const backupsDir = backupsDirFor(slug);
-    const dump = await docker.composeExec(
+      await fs.mkdir(backupsDir, { recursive: true });
      entry.basePath, entry.composeProject,
      'v2-postgres',
      'pg_dump -U changemaker -d changemaker',
      300_000,
      pgPassword ? { PGPASSWORD: pgPassword } : undefined
    );
    await fs.writeFile(dumpFile, dump, 'utf-8');
-    // Gzip the dump
+      // Verify scripts/backup.sh exists
-    await exec(`gzip '${dumpFile}'`, { timeout: 120_000 });
+      const scriptPath = path.join(entry.basePath, 'scripts', 'backup.sh');
      try {
        await fs.access(scriptPath);
      } catch {
        throw new AgentError(500, `scripts/backup.sh not found at ${scriptPath}`, 'BACKUP_SCRIPT_MISSING');
      }
-    // 2. Tar uploads if exists
+      // Snapshot existing archive filenames so we can identify the new one
-    const uploadsDir = path.join(entry.basePath, 'uploads');
+      const existingFiles = new Set(
-    let hasUploads = false;
+        (await fs.readdir(backupsDir)).filter((f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX))
    try {
      await fs.access(uploadsDir);
      hasUploads = true;
    } catch { /* no uploads dir */ }
    if (hasUploads) {
      await exec(
        `tar -czf '${path.join(backupDir, 'uploads.tar.gz')}' -C '${entry.basePath}' uploads`,
        { timeout: 300_000 }
      );
    }
-    // 3. Create final archive
+      const logPath = path.join(backupsDir, `backup-${Date.now()}.log`);
-    const archiveName = `backup-${param(req, 'slug')}-${timestamp}.tar.gz`;
+      const logFd = await fs.open(logPath, 'w');
    const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
    await exec(
      `tar -czf '${archivePath}' -C '${path.dirname(backupDir)}' '${timestamp}'`,
      { timeout: 300_000 }
    );
-    // Clean up temp dir
+      // Spawn backup.sh with cwd=basePath so its .env detection works.
-    await fs.rm(backupDir, { recursive: true, force: true });
+      // Retention is effectively disabled here — CCP manages retention of
      // the streamed-down archives, not the agent's transient copies.
      //
      // Container names: backup.sh defaults to `changemaker-v2-postgres` and
      // `listmonk-db`, which match the main CML's `container_name:` overrides.
      // If a deployment has custom naming, the operator can set PG_CONTAINER /
      // LISTMONK_PG_CONTAINER in the instance's own .env (backup.sh loads it).
      const spawnEnv: NodeJS.ProcessEnv = {
        ...process.env,
        BACKUP_DIR: backupsDir,
        RETENTION_DAYS: '36500', // ~100 years; CCP controls retention
      };
-    const stats = await fs.stat(archivePath);
+      logger.info(`[backup] Running scripts/backup.sh for ${slug} (basePath=${entry.basePath})`);
    const backupId = timestamp;
-    logger.info(`[backup] Created backup for ${param(req, 'slug')}: ${archivePath} (${stats.size} bytes)`);
+      const exitCode: number = await new Promise((resolve, reject) => {
        const proc = spawn('bash', ['scripts/backup.sh'], {
          cwd: entry.basePath,
          env: spawnEnv,
          stdio: ['ignore', 'pipe', 'pipe'],
        });
        proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
        proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
        proc.on('error', reject);
        proc.on('close', (code) => resolve(code ?? 1));
      });
-    res.json({
+      await logFd.close();
-      backupId,
+
-      archivePath,
+      if (exitCode !== 0) {
-      sizeBytes: stats.size,
+        // Return the tail of the log so the CCP can display it
-      timestamp,
+        let logTail = '';
        try {
          const fullLog = await fs.readFile(logPath, 'utf-8');
          logTail = fullLog.split('\n').slice(-40).join('\n');
        } catch { /* ignore */ }
        throw new AgentError(500, `backup.sh exited with code ${exitCode}\n${logTail}`, 'BACKUP_FAILED');
      }
      // Find the new archive
      const afterFiles = (await fs.readdir(backupsDir)).filter(
        (f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX)
      );
      const newFiles = afterFiles.filter((f) => !existingFiles.has(f));
      if (newFiles.length === 0) {
        throw new AgentError(500, 'backup.sh succeeded but no new archive was created', 'BACKUP_NO_OUTPUT');
      }
      // Pick the most recently modified (in case of oddities)
      newFiles.sort();
      const newest = newFiles[newFiles.length - 1] as string;
      const archivePath = path.join(backupsDir, newest);
      const backupId = idFromFilename(newest);
      if (!backupId || !ID_REGEX.test(backupId)) {
        throw new AgentError(500, `Unexpected archive filename: ${newest}`, 'BACKUP_NAME_INVALID');
      }
      const stats = await fs.stat(archivePath);
      const sha256 = await sha256File(archivePath);
      const manifest = await readManifestFromArchive(archivePath);
      // Delete the log file once we know the backup succeeded
      try { await fs.unlink(logPath); } catch { /* ignore */ }
      logger.info(`[backup] ${slug}: created ${newest} (${stats.size} bytes, sha256=${sha256.substring(0, 16)}...)`);
      return {
        backupId,
        filename: newest,
        sizeBytes: stats.size,
        sha256,
        manifest,
        createdAt: stats.mtime.toISOString(),
      };
    });
    res.json(result);
  } catch (err) {
-    // Clean up on failure
+    if (err instanceof SlugBusyError) {
-    try { await fs.rm(backupDir, { recursive: true, force: true }); } catch { /* ignore */ }
+      res.status(409).json({ error: 'SLUG_BUSY', message: err.message });
      return;
    }
    throw err;
  }
 });
-// GET /instance/:slug/backup/:id/download — Stream backup archive
+/**
-router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
+ * GET /instance/:slug/backups
-  const archiveName = `backup-${param(req, 'slug')}-${param(req, 'id')}.tar.gz`;
+ * Lists backup archives currently held on the agent for this slug.
-  const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
+ */
 router.get('/instance/:slug/backups', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  await getSlugEntry(slug); // validate slug is registered
  const backupsDir = backupsDirFor(slug);
  let entries: string[] = [];
  try {
-    await fs.access(archivePath);
+    entries = await fs.readdir(backupsDir);
  } catch {
    res.json({ data: [] });
    return;
  }
  const results = [];
  for (const filename of entries) {
    const id = idFromFilename(filename);
    if (!id) continue;
    try {
      const stats = await fs.stat(path.join(backupsDir, filename));
      results.push({
        backupId: id,
        filename,
        sizeBytes: stats.size,
        createdAt: stats.mtime.toISOString(),
      });
    } catch { /* skip */ }
  }
  results.sort((a, b) => (a.createdAt < b.createdAt ? 1 : -1));
  res.json({ data: results });
 });
 /**
 * GET /instance/:slug/backup/:id/download
 * Streams the backup archive (supports Content-Length so the CCP can verify size).
 */
 router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const id = param(req, 'id');
  if (!ID_REGEX.test(id)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
    return;
  }
  await getSlugEntry(slug);
  const archivePath = archivePathFor(slug, id);
  try {
    const stats = await fs.stat(archivePath);
    res.setHeader('Content-Type', 'application/gzip');
    res.setHeader('Content-Length', String(stats.size));
    res.setHeader('Content-Disposition', `attachment; filename="${path.basename(archivePath)}"`);
    const stream = createReadStream(archivePath);
    stream.on('error', (err) => {
      logger.error(`[backup] stream error for ${archivePath}: ${err.message}`);
      if (!res.headersSent) res.status(500).end();
      else res.destroy(err);
    });
    stream.pipe(res);
  } catch {
    res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
    return;
  }
 });
-  const stats = await fs.stat(archivePath);
+/**
-  res.setHeader('Content-Type', 'application/gzip');
+ * DELETE /instance/:slug/backup/:id
-  res.setHeader('Content-Length', stats.size);
+ * Deletes the archive from the agent's disk. The CCP calls this after it has
-  res.setHeader('Content-Disposition', `attachment; filename="${archiveName}"`);
+ * successfully streamed the archive to its own storage.
 */
 router.delete('/instance/:slug/backup/:id', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const id = param(req, 'id');
  if (!ID_REGEX.test(id)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
    return;
  }
  await getSlugEntry(slug);
-  const { createReadStream } = await import('fs');
+  const archivePath = archivePathFor(slug, id);
-  const stream = createReadStream(archivePath);
+  // Path traversal defense: ensure the resolved path is still inside the slug's backups dir
-  stream.pipe(res);
+  const resolved = path.resolve(archivePath);
  const boundary = path.resolve(backupsDirFor(slug));
  if (!resolved.startsWith(boundary + path.sep)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
    return;
  }
  try {
    await fs.unlink(archivePath);
    logger.info(`[backup] ${slug}: deleted ${path.basename(archivePath)}`);
    res.json({ deleted: true });
  } catch (err) {
    const code = (err as NodeJS.ErrnoException).code;
    if (code === 'ENOENT') {
      res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
      return;
    }
    throw err;
  }
 });
 // ─── Restore ──────────────────────────────────────────────────────────
 // Hard cap on a single restore upload. The CCP is trusted, but a buggy or
 // compromised CCP shouldn't be able to fill the agent's disk in one request.
 // 20 GB is well above any realistic Changemaker Lite backup size.
 const MAX_RESTORE_UPLOAD_BYTES = 20 * 1024 * 1024 * 1024;
 function restoresDirFor(slug: string): string {
  return path.join(env.AGENT_DATA_DIR, 'restores', slug);
 }
 function restoreUploadDir(slug: string, uploadId: string): string {
  return path.join(restoresDirFor(slug), uploadId);
 }
 interface RestoreState {
  status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
  uploadId: string;
  startedAt: string;
  completedAt?: string;
  exitCode?: number;
  logTail?: string;
  errorMessage?: string;
  options?: Record<string, unknown>;
 }
 async function readRestoreState(slug: string, uploadId: string): Promise<RestoreState | null> {
  const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
  try {
    const content = await fs.readFile(statePath, 'utf-8');
    return JSON.parse(content) as RestoreState;
  } catch {
    return null;
  }
 }
 async function writeRestoreState(slug: string, uploadId: string, state: RestoreState): Promise<void> {
  const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
  await fs.writeFile(statePath, JSON.stringify(state, null, 2), 'utf-8');
 }
 /**
 * POST /instance/:slug/restore/upload?sha256=<hex>
 * Accepts an application/octet-stream upload of a backup archive and writes
 * it to the agent's restores directory. Verifies SHA256 as it streams — if
 * the hash doesn't match, the partial file is deleted and we return 400.
 *
 * Returns `{ uploadId, sizeBytes, sha256 }`.
 */
 router.post('/instance/:slug/restore/upload', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  await getSlugEntry(slug);
  if (isSlugLocked(slug, 'restore')) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'A restore is already in progress for this slug' });
    return;
  }
  if (isSlugLocked(slug, 'backup')) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup is in progress for this slug' });
    return;
  }
  const expectedSha256 = typeof req.query.sha256 === 'string' ? req.query.sha256.toLowerCase() : undefined;
  if (!expectedSha256 || !/^[a-f0-9]{64}$/.test(expectedSha256)) {
    res.status(400).json({ error: 'VALIDATION', message: 'sha256 query parameter required (64 hex chars)' });
    return;
  }
  const uploadId = crypto.randomBytes(16).toString('hex');
  const uploadDir = restoreUploadDir(slug, uploadId);
  await fs.mkdir(uploadDir, { recursive: true });
  const archivePath = path.join(uploadDir, 'archive.tar.gz');
  const hash = crypto.createHash('sha256');
  let bytesWritten = 0;
  const hashTransform = new Transform({
    transform(chunk: Buffer, _enc, cb) {
      bytesWritten += chunk.length;
      if (bytesWritten > MAX_RESTORE_UPLOAD_BYTES) {
        // Abort the stream — pipeline() will reject and the catch block below
        // will remove the partial upload directory.
        cb(new AgentError(
          413,
          `Upload exceeds maximum allowed size of ${MAX_RESTORE_UPLOAD_BYTES} bytes`,
          'UPLOAD_TOO_LARGE'
        ));
        return;
      }
      hash.update(chunk);
      cb(null, chunk);
    },
  });
  try {
    const writeStream = createWriteStream(archivePath);
    await pipeline(req, hashTransform, writeStream);
    const sha256 = hash.digest('hex');
    if (sha256 !== expectedSha256) {
      // Integrity failure — nuke the upload
      await fs.rm(uploadDir, { recursive: true, force: true });
      res.status(400).json({
        error: 'SHA256_MISMATCH',
        message: `Expected sha256 ${expectedSha256}, got ${sha256}`,
      });
      return;
    }
    const stats = await fs.stat(archivePath);
    // Persist initial state so the progress endpoint works even before apply
    await writeRestoreState(slug, uploadId, {
      status: 'UPLOADED',
      uploadId,
      startedAt: new Date().toISOString(),
    });
    logger.info(`[restore] ${slug}: uploaded ${bytesWritten} bytes (sha256=${sha256.substring(0, 16)}...) upload_id=${uploadId}`);
    res.json({
      uploadId,
      sizeBytes: stats.size,
      sha256,
    });
  } catch (err) {
    // Stream error or write error — clean up
    try { await fs.rm(uploadDir, { recursive: true, force: true }); } catch { /* ignore */ }
    throw err;
  }
 });
 /**
 * POST /instance/:slug/restore/:uploadId/apply
 * Body: { confirm: true, skipDb?, skipUploads?, skipListmonk?, dryRun? }
 *
 * Fires off `scripts/restore.sh --archive <path> --force` in the background
 * and writes progress to restore-state.json. The CCP polls the progress
 * endpoint for updates. Mutex prevents concurrent restores/backups.
 */
 router.post('/instance/:slug/restore/:uploadId/apply', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const uploadId = param(req, 'uploadId');
  if (!ID_REGEX.test(uploadId)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
    return;
  }
  const entry = await getSlugEntry(slug);
  const { confirm, skipDb, skipUploads, skipListmonk, dryRun } = req.body ?? {};
  if (confirm !== true) {
    res.status(400).json({ error: 'CONFIRMATION_REQUIRED', message: 'Body must include { confirm: true }' });
    return;
  }
  const uploadDir = restoreUploadDir(slug, uploadId);
  // Path traversal defense
  const resolvedDir = path.resolve(uploadDir);
  const boundary = path.resolve(restoresDirFor(slug));
  if (!resolvedDir.startsWith(boundary + path.sep)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
    return;
  }
  const archivePath = path.join(uploadDir, 'archive.tar.gz');
  try {
    await fs.access(archivePath);
  } catch {
    res.status(404).json({ error: 'NOT_FOUND', message: 'Upload not found or already applied' });
    return;
  }
  // Verify scripts/restore.sh exists
  const scriptPath = path.join(entry.basePath, 'scripts', 'restore.sh');
  try {
    await fs.access(scriptPath);
  } catch {
    res.status(500).json({ error: 'RESTORE_SCRIPT_MISSING', message: `scripts/restore.sh not found at ${scriptPath}` });
    return;
  }
  // Check mutex state (don't block — tell caller it's busy)
  if (isSlugLocked(slug, 'restore') || isSlugLocked(slug, 'backup')) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'Slug is busy with backup or restore' });
    return;
  }
  // Fire-and-forget: acquire lock and run in background. Return immediately
  // so CCP can start polling /progress.
  const options = {
    skipDb: !!skipDb,
    skipUploads: !!skipUploads,
    skipListmonk: !!skipListmonk,
    dryRun: !!dryRun,
  };
  await writeRestoreState(slug, uploadId, {
    status: 'RUNNING',
    uploadId,
    startedAt: new Date().toISOString(),
    options,
  });
  // Build restore.sh args (all flags, no user input interpolated into a shell string)
  const args = ['scripts/restore.sh', '--archive', archivePath, '--force'];
  if (options.skipDb) args.push('--skip-db');
  if (options.skipUploads) args.push('--skip-uploads');
  if (options.skipListmonk) args.push('--skip-listmonk');
  if (options.dryRun) args.push('--dry-run');
  const logPath = path.join(uploadDir, 'restore.log');
  // Schedule the background task — don't await inside the handler
  void withSlugLock(slug, 'restore', async () => {
    const logFd = await fs.open(logPath, 'w');
    logger.info(`[restore] ${slug}: running ${args.join(' ')} (cwd=${entry.basePath})`);
    const exitCode: number = await new Promise((resolve, reject) => {
      const proc = spawn('bash', args, {
        cwd: entry.basePath,
        env: { ...process.env },
        stdio: ['ignore', 'pipe', 'pipe'],
      });
      proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
      proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
      proc.on('error', reject);
      proc.on('close', (code) => resolve(code ?? 1));
    });
    await logFd.close();
    // Read the tail of the log for the state file
    let logTail = '';
    try {
      const fullLog = await fs.readFile(logPath, 'utf-8');
      logTail = fullLog.split('\n').slice(-80).join('\n');
    } catch { /* ignore */ }
    const state: RestoreState = {
      status: exitCode === 0 ? 'COMPLETED' : 'FAILED',
      uploadId,
      startedAt: (await readRestoreState(slug, uploadId))?.startedAt || new Date().toISOString(),
      completedAt: new Date().toISOString(),
      exitCode,
      logTail,
      options,
      ...(exitCode !== 0 ? { errorMessage: `restore.sh exited with code ${exitCode}` } : {}),
    };
    await writeRestoreState(slug, uploadId, state);
    logger.info(`[restore] ${slug}: restore.sh finished with exit ${exitCode}`);
  }).catch(async (err) => {
    logger.error(`[restore] ${slug}: background restore failed: ${(err as Error).message}`);
    // If the mutex was the issue, state is already written. Otherwise, mark failed.
    if (!(err instanceof SlugBusyError)) {
      try {
        await writeRestoreState(slug, uploadId, {
          status: 'FAILED',
          uploadId,
          startedAt: new Date().toISOString(),
          completedAt: new Date().toISOString(),
          errorMessage: (err as Error).message,
          options,
        });
      } catch { /* ignore */ }
    }
  });
  res.status(202).json({ applied: true, uploadId, options });
 });
 /**
 * GET /instance/:slug/restore/:uploadId/progress
 * Returns the current state of a running or completed restore.
 */
 router.get('/instance/:slug/restore/:uploadId/progress', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const uploadId = param(req, 'uploadId');
  if (!ID_REGEX.test(uploadId)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
    return;
  }
  await getSlugEntry(slug);
  const state = await readRestoreState(slug, uploadId);
  if (!state) {
    res.status(404).json({ error: 'NOT_FOUND', message: 'Restore not found' });
    return;
  }
  res.json(state);
 });
 /**
 * DELETE /instance/:slug/restore/:uploadId
 * Removes a restore upload directory. Refuses if a restore is currently running.
 */
 router.delete('/instance/:slug/restore/:uploadId', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const uploadId = param(req, 'uploadId');
  if (!ID_REGEX.test(uploadId)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
    return;
  }
  await getSlugEntry(slug);
  const uploadDir = restoreUploadDir(slug, uploadId);
  const resolvedDir = path.resolve(uploadDir);
  const boundary = path.resolve(restoresDirFor(slug));
  if (!resolvedDir.startsWith(boundary + path.sep)) {
    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
    return;
  }
  const state = await readRestoreState(slug, uploadId);
  if (state?.status === 'RUNNING') {
    res.status(409).json({ error: 'RESTORE_RUNNING', message: 'Cannot delete a running restore' });
    return;
  }
  try {
    await fs.rm(uploadDir, { recursive: true, force: true });
    res.json({ deleted: true });
  } catch (err) {
    throw err;
  }
 });
 export default router;
--- a/changemaker-control-panel/agent/src/routes/registry.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/registry.routes.ts
@ -4,6 +4,13 @@ import { registerSlug, unregisterSlug, listSlugs } from '../services/registry.se
 const router = Router();
 // SECURITY: defense-in-depth slug validation. The CCP enforces ^[a-z0-9-]+$
 // upstream via Zod, but the registry slug is later interpolated into
 // filesystem paths (backupsDirFor, etc.), so we validate independently here.
 // A poisoned registry entry could otherwise let a compromised or buggy CCP
 // escape AGENT_DATA_DIR.
 const SLUG_RE = /^[a-z0-9-]{2,50}$/;
 // POST /instances/register — Register a slug→basePath mapping
 router.post('/instances/register', async (req: Request, res: Response) => {
  const { slug, basePath, composeProject } = req.body;
@ -11,14 +18,23 @@ router.post('/instances/register', async (req: Request, res: Response) => {
    res.status(400).json({ error: 'VALIDATION', message: 'slug, basePath, and composeProject required' });
    return;
  }
  if (typeof slug !== 'string' || !SLUG_RE.test(slug)) {
    res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format (expected ^[a-z0-9-]{2,50}$)' });
    return;
  }
  await registerSlug(slug, basePath, composeProject);
  res.json({ registered: slug });
 });
 // DELETE /instances/:slug — Unregister slug
 router.delete('/instances/:slug', async (req: Request, res: Response) => {
-  await unregisterSlug(param(req, 'slug'));
+  const slug = param(req, 'slug');
-  res.json({ unregistered: param(req, 'slug') });
+  if (!SLUG_RE.test(slug)) {
    res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format' });
    return;
  }
  await unregisterSlug(slug);
  res.json({ unregistered: slug });
 });
 // GET /instances — List all managed slugs
--- a/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
@ -1,11 +1,12 @@
 import { Router, Request, Response } from 'express';
 import { param } from '../utils/params';
-import { execFile } from 'child_process';
+import { execFile, spawn } from 'child_process';
 import { promisify } from 'util';
 import fs from 'fs/promises';
 import path from 'path';
 import { getSlugEntry } from '../services/registry.service';
 import { logger } from '../utils/logger';
 import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
 const execFileAsync = promisify(execFile);
 const router = Router();
@ -13,9 +14,108 @@ const router = Router();
 /** Validate a git branch name — prevent shell injection. */
 const SAFE_BRANCH = /^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/;
-// POST /instance/:slug/upgrade/start — Run upgrade.sh
+/**
 * Max age of an in-progress upgrade (by progress.json mtime) before we
 * consider a previous attempt dead and allow a new one through.
 *
 * SECURITY NOTE: this must be LONGER than the CCP's REMOTE_UPGRADE_TIMEOUT
 * AND longer than any realistic legitimate upgrade duration. The concern is
 * a concurrent-upgrade scenario:
 *   - upgrade.sh is running and legitimately slow (large image pull + DB
 *     migration)
 *   - at 15 min the CCP side times out and marks the row FAILED
 *   - admin clicks "Upgrade" again → CCP's DB check sees no active row
 *   - if this staleness window is <= realistic upgrade time, the second
 *     /upgrade/start call would ALSO pass this check, spawning a second
 *     upgrade.sh process racing against the still-running first one
 *
 * 45 min gives headroom over the 15-min CCP timeout and covers realistic
 * upgrade durations. For a truly bulletproof guard, switch to a PID lock
 * file that verifies the process is still alive.
 */
 const STALE_UPGRADE_MTIME_MS = 45 * 60 * 1000;
 /**
 * Returns true if there's an in-progress upgrade visible on disk.
 *
 * Used as a second-line guard in case the in-memory mutex was lost to an
 * agent restart mid-upgrade. The check looks at progress.json mtime and
 * the absence of a result.json — together they indicate "started but not
 * finished within the staleness window".
 */
 async function isUpgradeRunningOnDisk(basePath: string): Promise<boolean> {
  const progressPath = path.join(basePath, 'data', 'upgrade', 'progress.json');
  const resultPath = path.join(basePath, 'data', 'upgrade', 'result.json');
  let progressStat: import('fs').Stats;
  try {
    progressStat = await fs.stat(progressPath);
  } catch {
    return false; // no progress file → no in-progress upgrade
  }
  // If a result file exists with mtime >= progress mtime, the run is finished
  try {
    const resultStat = await fs.stat(resultPath);
    if (resultStat.mtimeMs >= progressStat.mtimeMs) return false;
  } catch { /* no result file yet */ }
  // Stale: progress file is old and no result was written → assume the
  // previous attempt died and let a new one through
  if (Date.now() - progressStat.mtimeMs > STALE_UPGRADE_MTIME_MS) return false;
  return true;
 }
 // POST /instance/:slug/upgrade/check — Run upgrade-check.sh and return status.json
 router.post('/instance/:slug/upgrade/check', async (req: Request, res: Response) => {
  const slug = param(req, 'slug');
  const entry = await getSlugEntry(slug);
  // Refuse during a running upgrade — check writes status.json which could
  // race with upgrade.sh writing other files in data/upgrade/
  if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is currently running' });
    return;
  }
  const scriptPath = path.join(entry.basePath, 'scripts', 'upgrade-check.sh');
  try {
    await fs.access(scriptPath);
  } catch {
    res.status(404).json({ error: 'SCRIPT_NOT_FOUND', message: `upgrade-check.sh not found at ${scriptPath}` });
    return;
  }
  // Run upgrade-check.sh — it writes data/upgrade/status.json. Use execFile
  // (no shell) and a 60s timeout. Failures are non-fatal: the script may
  // still have written status.json before erroring out, so we always try
  // to read it afterwards.
  try {
    await execFileAsync('bash', [scriptPath], {
      cwd: entry.basePath,
      timeout: 60_000,
      maxBuffer: 4 * 1024 * 1024,
      env: { ...process.env, COMPOSE_ANSI: 'never' },
    });
  } catch (err) {
    logger.warn(`[upgrade] ${slug}: upgrade-check.sh failed: ${(err as Error).message}`);
    // continue — try to read status.json anyway
  }
  const statusPath = path.join(entry.basePath, 'data', 'upgrade', 'status.json');
  try {
    const content = await fs.readFile(statusPath, 'utf-8');
    res.json(JSON.parse(content));
  } catch {
    res.status(500).json({ error: 'STATUS_NOT_AVAILABLE', message: 'upgrade-check.sh did not produce status.json' });
  }
 });
 // POST /instance/:slug/upgrade/start — Run upgrade.sh in the background
 router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response) => {
-  const entry = await getSlugEntry(param(req, 'slug'));
+  const slug = param(req, 'slug');
  const entry = await getSlugEntry(slug);
  const { skipBackup, useRegistry, branch } = req.body || {};
  // SECURITY: Validate branch name to prevent injection
@ -28,26 +128,64 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
  try {
    await fs.access(scriptPath);
  } catch {
-    res.status(400).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
+    res.status(404).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
    return;
  }
-  // SECURITY: Use execFile with args array — no shell interpolation
+  // Refuse if an upgrade is already running (in-memory or on-disk indicators)
-  const args = ['--api-mode', '--force'];
+  if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
    return;
  }
  // Backup or restore concurrency: refuse to start an upgrade while either is running
  if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
    res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
    return;
  }
  // Clear stale progress/result files before starting so the on-disk staleness
  // check doesn't think a brand-new upgrade is still finishing.
  const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
  const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
  await fs.mkdir(path.dirname(progressPath), { recursive: true });
  await fs.rm(progressPath, { force: true });
  await fs.rm(resultPath, { force: true });
  // SECURITY: Use spawn with args array — no shell interpolation
  const args: string[] = [scriptPath, '--api-mode', '--force'];
  if (skipBackup) args.push('--skip-backup');
  if (useRegistry) args.push('--use-registry');
  if (branch) args.push('--branch', branch);
-  // Fire-and-forget — CCP polls progress
+  // Schedule the background task under the slug lock. Use void so the
-  execFileAsync('bash', [scriptPath, ...args], {
+  // promise doesn't block the response. Errors are caught and logged; the
-    cwd: entry.basePath,
+  // CCP detects them via the absence of a result file or via the timeout.
-    timeout: 600_000,
+  void withSlugLock(slug, 'upgrade', async () => {
-    maxBuffer: 10 * 1024 * 1024,
+    logger.info(`[upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
    try {
      await new Promise<void>((resolve, reject) => {
        const proc = spawn('bash', args, {
          cwd: entry.basePath,
          env: { ...process.env, COMPOSE_ANSI: 'never' },
          stdio: ['ignore', 'ignore', 'ignore'], // upgrade.sh writes its own logs
        });
        proc.on('error', reject);
        proc.on('close', (code) => {
          if (code === 0) resolve();
          else reject(new Error(`upgrade.sh exited with code ${code}`));
        });
      });
      logger.info(`[upgrade] ${slug}: upgrade.sh completed`);
    } catch (err) {
      logger.error(`[upgrade] ${slug}: ${(err as Error).message}`);
    }
  }).catch((err) => {
-    logger.error(`[upgrade] ${param(req, 'slug')} failed: ${(err as Error).message}`);
+    if (!(err instanceof SlugBusyError)) {
      logger.error(`[upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
    }
  });
-  res.json({ started: true });
+  res.status(202).json({ started: true });
 });
 // GET /instance/:slug/upgrade/progress — Read progress.json
--- a/changemaker-control-panel/agent/src/server.ts
+++ b/changemaker-control-panel/agent/src/server.ts
@ -53,8 +53,24 @@ if (hasCerts()) {
  app.use(errorHandler);
  const server = https.createServer(tlsOptions, app);
-  server.listen(env.AGENT_PORT, () => {
+  server.listen(env.AGENT_PORT, async () => {
    logger.info(`CCP Agent (mTLS) listening on port ${env.AGENT_PORT}`);
    // Auto-register this instance's slug if configured
    if (env.INSTANCE_SLUG && env.INSTANCE_BASE_PATH) {
      const { registerSlug, getSlugEntry } = await import('./services/registry.service');
      try {
        await getSlugEntry(env.INSTANCE_SLUG);
        logger.debug(`[registry] Slug ${env.INSTANCE_SLUG} already registered`);
      } catch {
        // Detect compose project name: use env override, or derive from basePath directory name
        // (Docker Compose default: directory name with special chars stripped)
        const pathMod = await import('path');
        const composeProject = env.COMPOSE_PROJECT
          || pathMod.basename(env.INSTANCE_BASE_PATH).replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
        await registerSlug(env.INSTANCE_SLUG, env.INSTANCE_BASE_PATH, composeProject);
      }
    }
  });
 } else {
  // Pre-approval mode — start HTTP, only health + phone-home polling
--- a/changemaker-control-panel/agent/src/services/slug-mutex.ts
+++ b/changemaker-control-panel/agent/src/services/slug-mutex.ts
@ -0,0 +1,65 @@
 /**
 * Per-slug single-flight mutex.
 *
 * Guards long-running, mutating operations (backup, restore, upgrade) so that
 * two concurrent CCP calls for the same slug can't trample each other.
 *
 * Usage:
 *   await withSlugLock(slug, 'backup', async () => { ... });
 *
 * If a lock is already held for (slug, op), throws SlugBusyError which the
 * route handler should convert to HTTP 409.
 */
 export class SlugBusyError extends Error {
  constructor(public slug: string, public op: string) {
    super(`Slug ${slug} is busy: ${op} already in progress`);
    this.name = 'SlugBusyError';
  }
 }
 type LockKey = string;
 const locks = new Map<LockKey, { op: string; startedAt: number }>();
 function key(slug: string, op: string): LockKey {
  return `${slug}::${op}`;
 }
 /**
 * Run `fn` while holding a single-flight lock on (slug, op).
 * Throws SlugBusyError immediately if another call is already running.
 */
 export async function withSlugLock<T>(
  slug: string,
  op: string,
  fn: () => Promise<T>
 ): Promise<T> {
  const k = key(slug, op);
  if (locks.has(k)) {
    throw new SlugBusyError(slug, op);
  }
  locks.set(k, { op, startedAt: Date.now() });
  try {
    return await fn();
  } finally {
    locks.delete(k);
  }
 }
 /**
 * Returns true if a lock is currently held for (slug, op).
 */
 export function isSlugLocked(slug: string, op: string): boolean {
  return locks.has(key(slug, op));
 }
 /**
 * Returns debug info about all active locks.
 */
 export function listActiveLocks(): Array<{ slug: string; op: string; ageMs: number }> {
  const now = Date.now();
  return Array.from(locks.entries()).map(([k, v]) => {
    const [slug] = k.split('::');
    return { slug: slug ?? '', op: v.op, ageMs: now - v.startedAt };
  });
 }
--- a/changemaker-control-panel/api/prisma/migrations/20260410233048_add_instance_restore/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260410233048_add_instance_restore/migration.sql
@ -0,0 +1,34 @@
 -- CreateEnum
 CREATE TYPE "RestoreStatus" AS ENUM ('PENDING', 'UPLOADING', 'RUNNING', 'COMPLETED', 'FAILED');
 -- AlterEnum
 ALTER TYPE "AuditAction" ADD VALUE 'BACKUP_RESTORE';
 -- CreateTable
 CREATE TABLE "instance_restores" (
    "id" TEXT NOT NULL,
    "instance_id" TEXT NOT NULL,
    "backup_id" TEXT NOT NULL,
    "status" "RestoreStatus" NOT NULL DEFAULT 'PENDING',
    "upload_id" TEXT,
    "progress_json" JSONB,
    "log_tail" TEXT,
    "error_message" TEXT,
    "triggered_by_id" TEXT,
    "started_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "completed_at" TIMESTAMP(3),
    CONSTRAINT "instance_restores_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE INDEX "instance_restores_instance_id_started_at_idx" ON "instance_restores"("instance_id", "started_at");
 -- CreateIndex
 CREATE INDEX "instance_restores_backup_id_idx" ON "instance_restores"("backup_id");
 -- AddForeignKey
 ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_instance_id_fkey" FOREIGN KEY ("instance_id") REFERENCES "instances"("id") ON DELETE CASCADE ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_backup_id_fkey" FOREIGN KEY ("backup_id") REFERENCES "backups"("id") ON DELETE CASCADE ON UPDATE CASCADE;
--- a/changemaker-control-panel/api/prisma/migrations/20260412045433_add_pangolin_subdomain_prefix/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260412045433_add_pangolin_subdomain_prefix/migration.sql
@ -0,0 +1,2 @@
 -- AlterTable
 ALTER TABLE "instances" ADD COLUMN     "pangolin_subdomain_prefix" TEXT;
--- a/changemaker-control-panel/api/prisma/migrations/20260412155638_add_pangolin_teardown_action/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260412155638_add_pangolin_teardown_action/migration.sql
@ -0,0 +1,2 @@
 -- AlterEnum
 ALTER TYPE "AuditAction" ADD VALUE 'PANGOLIN_TEARDOWN';
--- a/changemaker-control-panel/api/prisma/schema.prisma
+++ b/changemaker-control-panel/api/prisma/schema.prisma
@ -109,6 +109,7 @@ model Instance {
  pangolinSiteId     String?     @map("pangolin_site_id")
  pangolinNewtId     String?     @map("pangolin_newt_id")
  pangolinNewtSecret String?     @map("pangolin_newt_secret")
  pangolinSubdomainPrefix String? @map("pangolin_subdomain_prefix")
  // SMTP
  smtpHost        String?        @map("smtp_host")
@ -125,6 +126,7 @@ model Instance {
  portAllocations PortAllocation[]
  healthChecks    HealthCheck[]
  backups         Backup[]
  restores        InstanceRestore[]
  auditLogs       AuditLog[]
  upgrades        InstanceUpgrade[]
  events          InstanceEvent[]
@ -196,12 +198,44 @@ model Backup {
  s3Uploaded    Boolean      @default(false) @map("s3_uploaded")
  s3Key         String?      @map("s3_key")
-  instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
+  instance Instance          @relation(fields: [instanceId], references: [id], onDelete: Cascade)
  restores InstanceRestore[]
  @@index([instanceId, startedAt])
  @@map("backups")
 }
 // ─── Restore ───────────────────────────────────────────────
 enum RestoreStatus {
  PENDING
  UPLOADING
  RUNNING
  COMPLETED
  FAILED
 }
 model InstanceRestore {
  id             String        @id @default(uuid())
  instanceId     String        @map("instance_id")
  backupId       String        @map("backup_id")
  status         RestoreStatus @default(PENDING)
  uploadId       String?       @map("upload_id")
  progressJson   Json?         @map("progress_json")
  logTail        String?       @map("log_tail")
  errorMessage   String?       @map("error_message")
  triggeredById  String?       @map("triggered_by_id")
  startedAt      DateTime      @default(now()) @map("started_at")
  completedAt    DateTime?     @map("completed_at")
  instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
  backup   Backup   @relation(fields: [backupId], references: [id], onDelete: Cascade)
  @@index([instanceId, startedAt])
  @@index([backupId])
  @@map("instance_restores")
 }
 // ─── Audit Log ─────────────────────────────────────────────
 enum AuditAction {
@ -215,7 +249,9 @@ enum AuditAction {
  SECRETS_VIEWED
  BACKUP_CREATE
  BACKUP_DELETE
  BACKUP_RESTORE
  PANGOLIN_SETUP
  PANGOLIN_TEARDOWN
  PANGOLIN_SYNC
  AGENT_CONNECT
  AGENT_REGISTER
--- a/changemaker-control-panel/api/src/config/env.ts
+++ b/changemaker-control-panel/api/src/config/env.ts
@ -54,10 +54,11 @@ const envSchema = z.object({
  USE_REGISTRY_IMAGES: z.enum(['true', 'false']).default('true').transform((v) => v === 'true'),
  IMAGE_TAG: z.string().default('latest'),
-  // Pangolin (optional)
+  // Pangolin (optional — for remote tunnel management)
  PANGOLIN_API_URL: z.string().default(''),
  PANGOLIN_API_KEY: z.string().default(''),
  PANGOLIN_ORG_ID: z.string().default(''),
  PANGOLIN_ENDPOINT: z.string().default(''), // Newt WebSocket URL (may differ from API URL)
  // Health checks
  HEALTH_CHECK_INTERVAL_MS: z.coerce.number().default(300_000), // 5 min (0 to disable)
--- a/changemaker-control-panel/api/src/modules/agents/agents.routes.ts
+++ b/changemaker-control-panel/api/src/modules/agents/agents.routes.ts
@ -169,7 +169,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
  });
  // Issue mTLS certificates
-  const certMaterials = await issueAgentCert(instance.id, registration.slug);
+  const certMaterials = await issueAgentCert(instance.id, registration.slug, registration.agentUrl);
  // Mark invite code as used
  const invite = await prisma.agentInviteCode.findUnique({ where: { id: registration.inviteCodeId } });
@ -189,7 +189,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
        caCertPem: certMaterials.caCertPem,
        agentCertPem: certMaterials.agentCertPem,
        agentKeyPem: certMaterials.agentKeyPem,
-        ccpFingerprint: certMaterials.caFingerprint,
+        ccpFingerprint: certMaterials.fingerprint,
      },
    },
  });
--- a/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
@ -4,11 +4,13 @@ import rateLimit from 'express-rate-limit';
 import { prisma } from '../../lib/prisma';
 import { authenticate, requireRole } from '../../middleware/auth';
 import { validate } from '../../middleware/validate';
-import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema } from './instances.schemas';
+import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
 import * as instancesService from './instances.service';
 import * as healthService from '../../services/health.service';
 import * as backupService from '../../services/backup.service';
 import * as restoreService from '../../services/restore.service';
 import * as upgradeService from '../../services/upgrade.service';
 import * as tunnelService from '../../services/tunnel.service';
 import { discoverInstances } from '../../services/discovery.service';
 const secretsLimiter = rateLimit({
@ -186,6 +188,18 @@ router.delete(
  '/:id/tunnel',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  async (req: Request, res: Response) => {
    // Branch: remote instances use the CCP's Pangolin API to teardown;
    // local instances use the existing manual removal logic.
    const instance = await prisma.instance.findUnique({ where: { id: req.params.id as string } });
    if (instance?.isRemote && instance.pangolinSiteId) {
      const result = await tunnelService.teardownTunnel(
        req.params.id as string,
        req.user!.id,
        req.ip
      );
      res.json({ data: result });
      return;
    }
    const result = await instancesService.removeTunnel(
      req.params.id as string,
      req.user!.id,
@ -195,6 +209,47 @@ router.delete(
  }
 );
 // Remote tunnel setup via CCP's Pangolin API credentials
 router.post(
  '/:id/tunnel/setup',
  requireRole('SUPER_ADMIN'),
  validate(setupRemoteTunnelSchema),
  async (req: Request, res: Response) => {
    const { subdomainPrefix } = req.body || {};
    const result = await tunnelService.setupTunnel(
      req.params.id as string,
      { subdomainPrefix },
      req.user!.id,
      req.ip
    );
    res.status(201).json({ data: result });
  }
 );
 // Get tunnel status (resource matrix) — works for both local and remote
 router.get(
  '/:id/tunnel/status',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  async (req: Request, res: Response) => {
    const status = await tunnelService.getTunnelStatus(req.params.id as string);
    res.json({ data: status });
  }
 );
 // Re-sync resources (idempotent — creates missing, leaves existing)
 router.post(
  '/:id/tunnel/sync',
  requireRole('SUPER_ADMIN'),
  async (req: Request, res: Response) => {
    const result = await tunnelService.syncResources(
      req.params.id as string,
      req.user!.id,
      req.ip
    );
    res.json({ data: result });
  }
 );
 // ─── Lifecycle Endpoints ─────────────────────────────────────────────
 router.post(
@ -280,6 +335,7 @@ router.post(
 router.post(
  '/:id/upgrade',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  validate(startUpgradeSchema),
  async (req: Request, res: Response) => {
    const { skipBackup, useRegistry, branch } = req.body || {};
    const upgrade = await upgradeService.startUpgrade(
@ -356,4 +412,76 @@ router.get(
  }
 );
 // ─── Restores ──────────────────────────────────────────────────────
 /**
 * POST /:id/restore
 * Body: { backupId, options? }
 * Starts a restore of the given backup onto this instance. Returns the
 * InstanceRestore row immediately; caller polls GET /:id/restores or
 * GET /:id/restores/:restoreId for status.
 *
 * DESTRUCTIVE: overwrites databases and uploads. Requires SUPER_ADMIN.
 */
 router.post(
  '/:id/restore',
  requireRole('SUPER_ADMIN'),
  async (req: Request, res: Response) => {
    const instanceId = req.params.id as string;
    const { backupId, options } = req.body ?? {};
    if (!backupId || typeof backupId !== 'string') {
      res.status(400).json({ error: { message: 'backupId (string) is required', code: 'VALIDATION' } });
      return;
    }
    // Defensive: ensure the backup belongs to this instance
    const backup = await prisma.backup.findUnique({ where: { id: backupId } });
    if (!backup) {
      res.status(404).json({ error: { message: 'Backup not found', code: 'NOT_FOUND' } });
      return;
    }
    if (backup.instanceId !== instanceId) {
      res.status(400).json({
        error: {
          message: 'Backup does not belong to this instance (cross-instance restore is not supported)',
          code: 'CROSS_INSTANCE_RESTORE',
        },
      });
      return;
    }
    const restore = await restoreService.createRestore({
      backupId,
      triggeredById: req.user!.id,
      ipAddress: req.ip,
      options,
    });
    res.status(201).json({ data: restore });
  }
 );
 router.get(
  '/:id/restores',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  async (req: Request, res: Response) => {
    const page = Math.max(1, parseInt(req.query.page as string, 10) || 1);
    const limit = Math.min(100, Math.max(1, parseInt(req.query.limit as string, 10) || 50));
    const result = await restoreService.listRestores(req.params.id as string, page, limit);
    res.json(result);
  }
 );
 router.get(
  '/:id/restores/:restoreId',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  async (req: Request, res: Response) => {
    const restore = await restoreService.getRestore(req.params.restoreId as string);
    if (restore.instanceId !== req.params.id) {
      res.status(404).json({ error: { message: 'Restore not found', code: 'NOT_FOUND' } });
      return;
    }
    res.json({ data: restore });
  }
 );
 export default router;
--- a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
@ -108,9 +108,32 @@ export const importInstancesSchema = z.object({
  instances: z.array(registerInstanceSchema).min(1).max(50),
 });
 // SECURITY: branch name is interpolated into a shell command string in the
 // local `runUpgrade` path (exec, not spawn), so we must enforce the same
 // strict allow-list the agent uses on its own end. This blocks names starting
 // with `-` (avoiding flag confusion), shell metachars, and anything exotic.
 export const startUpgradeSchema = z.object({
  skipBackup: z.boolean().optional(),
  useRegistry: z.boolean().optional(),
  branch: z
    .string()
    .regex(/^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/, 'Invalid branch name')
    .optional(),
 });
 export const setupRemoteTunnelSchema = z.object({
  subdomainPrefix: z
    .string()
    .min(1)
    .max(50)
    .regex(/^[a-z0-9-]+$/, 'Prefix must be lowercase alphanumeric with hyphens')
    .optional(),
 });
 export type CreateInstanceInput = z.infer<typeof createInstanceSchema>;
 export type UpdateInstanceInput = z.infer<typeof updateInstanceSchema>;
 export type RegisterInstanceInput = z.infer<typeof registerInstanceSchema>;
 export type ReconfigureInstanceInput = z.infer<typeof reconfigureInstanceSchema>;
 export type ConfigureTunnelInput = z.infer<typeof configureTunnelSchema>;
 export type ImportInstancesInput = z.infer<typeof importInstancesSchema>;
 export type StartUpgradeInput = z.infer<typeof startUpgradeSchema>;
--- a/changemaker-control-panel/api/src/server.ts
+++ b/changemaker-control-panel/api/src/server.ts
@ -8,6 +8,12 @@ import { env } from './config/env';
 import { logger } from './utils/logger';
 import { errorHandler } from './middleware/error-handler';
 // BigInt JSON serialization. Prisma's BigInt columns (e.g. Backup.sizeBytes)
 // don't have a toJSON method by default, so res.json() throws. Stringify them.
 (BigInt.prototype as unknown as { toJSON: () => string }).toJSON = function () {
  return this.toString();
 };
 // Route imports
 import authRoutes from './modules/auth/auth.routes';
 import instanceRoutes from './modules/instances/instances.routes';
--- a/changemaker-control-panel/api/src/services/backup.service.ts
+++ b/changemaker-control-panel/api/src/services/backup.service.ts
@ -1,5 +1,6 @@
 import { Prisma, BackupStatus, AuditAction, InstanceStatus } from '@prisma/client';
 import fs from 'fs/promises';
 import { createReadStream } from 'fs';
 import path from 'path';
 import crypto from 'crypto';
 import { execFile as execFileCb } from 'child_process';
@ -10,6 +11,7 @@ import { AppError } from '../middleware/error-handler';
 import { decryptJson } from '../utils/encryption';
 import * as docker from './docker.service';
 import { logger } from '../utils/logger';
 import { getRemoteDriverForInstance } from './execution-driver';
 const execFile = promisify(execFileCb);
 /**
@ -24,11 +26,16 @@ function assertPathWithinBoundary(filePath: string, boundary: string, label: str
 }
 /**
- * Compute SHA-256 hash of a file.
+ * Compute SHA-256 hash of a file by streaming its contents.
 */
 async function fileHash(filePath: string): Promise<string> {
-  const fileBuffer = await fs.readFile(filePath);
+  return new Promise((resolve, reject) => {
-  return crypto.createHash('sha256').update(fileBuffer).digest('hex');
+    const hash = crypto.createHash('sha256');
    const stream = createReadStream(filePath);
    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end', () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
 }
 /**
@ -52,7 +59,11 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
    throw new AppError(400, `Cannot backup instance in ${instance.status} state`, 'INVALID_STATE');
  }
-  if ((instance as { isRegistered?: boolean }).isRegistered) {
+  // `isRegistered` + `isRemote` = a remote CCP-managed instance (agent on the
  // far side). `isRegistered` alone (without `isRemote`) would mean a local
  // host-managed instance that CCP doesn't own the compose files for — that
  // case we still can't back up.
  if (instance.isRegistered && !instance.isRemote) {
    throw new AppError(400, 'Backups not managed by CCP for registered instances', 'NOT_MANAGED');
  }
@ -72,9 +83,31 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
  return backup;
 }
 type BackupInstance = {
  id: string;
  slug: string;
  basePath: string;
  composeProject: string;
  encryptedSecrets: string | null;
  isRemote: boolean;
  agentUrl: string | null;
 };
 async function performBackup(
  backupId: string,
-  instance: { id: string; slug: string; basePath: string; composeProject: string; encryptedSecrets: string | null },
+  instance: BackupInstance,
  userId?: string,
  ipAddress?: string
 ) {
  if (instance.isRemote) {
    return performRemoteBackup(backupId, instance, userId, ipAddress);
  }
  return performLocalBackup(backupId, instance, userId, ipAddress);
 }
 async function performLocalBackup(
  backupId: string,
  instance: BackupInstance,
  userId?: string,
  ipAddress?: string
 ) {
@ -221,6 +254,168 @@ async function performBackup(
  }
 }
 /**
 * Run a backup on a remote agent and stream the resulting archive to CCP storage.
 *
 * Flow:
 *   1. Tell agent to run scripts/backup.sh → { backupId, sizeBytes, sha256, manifest }
 *   2. Stream archive from agent → $BACKUP_STORAGE_PATH/{slug}/backup-{slug}-{backupId}.tar.gz
 *   3. Verify local SHA256 matches what the agent reported (defense in depth)
 *   4. Tell agent to delete its local copy (reclaim remote disk)
 *   5. Update Backup row as COMPLETED
 *
 * On failure at any step after the remote backup was created, we leave the
 * agent-side archive in place so the operator can retry the download.
 */
 async function performRemoteBackup(
  backupId: string,
  instance: BackupInstance,
  userId?: string,
  ipAddress?: string
 ) {
  let archivePath: string | null = null;
  let agentBackupId: string | null = null;
  try {
    await prisma.backup.update({
      where: { id: backupId },
      data: { status: BackupStatus.IN_PROGRESS },
    });
    const driver = await getRemoteDriverForInstance({
      id: instance.id,
      slug: instance.slug,
      isRemote: instance.isRemote,
      agentUrl: instance.agentUrl,
    });
    // 1. Trigger the backup on the agent (this blocks until backup.sh completes)
    logger.info(`[backup] ${instance.slug}: triggering remote backup via agent`);
    const result = await driver.createBackup();
    agentBackupId = result.backupId;
    logger.info(
      `[backup] ${instance.slug}: agent backup complete — ${result.filename} ` +
      `(${(result.sizeBytes / 1024 / 1024).toFixed(1)} MB, sha256=${result.sha256.substring(0, 16)}...)`
    );
    // 2. Resolve the destination archive path on CCP storage
    const archiveName = `backup-${instance.slug}-${result.backupId}.tar.gz`;
    archivePath = path.join(env.BACKUP_STORAGE_PATH, instance.slug, archiveName);
    // Path traversal guard (slug should be safe but better to assert)
    assertPathWithinBoundary(archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
    await fs.mkdir(path.dirname(archivePath), { recursive: true });
    // 3. Stream the archive from the agent to CCP storage
    logger.info(`[backup] ${instance.slug}: streaming archive to ${archivePath}`);
    const { bytesWritten } = await driver.downloadBackup(result.backupId, archivePath);
    if (bytesWritten !== result.sizeBytes) {
      throw new Error(
        `Downloaded size ${bytesWritten} does not match agent-reported size ${result.sizeBytes}`
      );
    }
    // 4. Re-hash the downloaded file and compare to the agent-reported hash.
    //
    // SECURITY NOTE: this check authenticates *transmission integrity* only,
    // not content integrity against a malicious agent. Both the file bytes
    // and the expected hash are supplied by the (semi-trusted) agent, so a
    // compromised agent can trivially make this check pass while delivering
    // arbitrary content. The check still catches accidental corruption (bit
    // flips, truncation) and is essentially free.
    //
    // The mTLS channel guarantees that the bytes weren't modified in transit
    // by an outside attacker. The remaining trust gap — "what if the agent
    // itself is compromised?" — must be addressed before Phase B (restore)
    // ships, since restore feeds the archive into pg_restore. Either:
    //   (a) HMAC-sign the hash on the agent with its mTLS private key and
    //       verify on the CCP using the agent cert public key, or
    //   (b) limit restore operations to require an additional out-of-band
    //       admin confirmation step.
    const localSha256 = await fileHash(archivePath);
    if (localSha256 !== result.sha256) {
      throw new Error(
        `SHA256 mismatch: agent reported ${result.sha256}, local file hashed ${localSha256}`
      );
    }
    // 5. Reclaim disk on the remote agent
    try {
      await driver.deleteBackup(result.backupId);
    } catch (err) {
      logger.warn(
        `[backup] ${instance.slug}: failed to delete remote backup ${result.backupId}: ${(err as Error).message}`
      );
      // Non-fatal — CCP has the archive, remote copy will age out next retention sweep
    }
    // 6. Persist the result. Store sha256 and agentBackupId inside the manifest
    // since we don't have dedicated columns.
    const mergedManifest = {
      ...(result.manifest as Record<string, unknown> | null ?? {}),
      source: 'remote',
      agentBackupId: result.backupId,
      sha256: result.sha256,
      createdAt: result.createdAt,
    };
    await prisma.backup.update({
      where: { id: backupId },
      data: {
        status: BackupStatus.COMPLETED,
        archivePath,
        sizeBytes: BigInt(bytesWritten),
        manifest: mergedManifest as unknown as Prisma.InputJsonValue,
        completedAt: new Date(),
      },
    });
    if (userId) {
      await prisma.auditLog.create({
        data: {
          userId,
          instanceId: instance.id,
          action: AuditAction.BACKUP_CREATE,
          details: {
            backupId,
            archiveName,
            sizeBytes: bytesWritten,
            source: 'remote',
            agentBackupId: result.backupId,
          },
          ipAddress,
        },
      });
    }
    logger.info(
      `[backup] ${instance.slug}: remote backup stored at ${archivePath} ` +
      `(${(bytesWritten / 1024 / 1024).toFixed(1)} MB)`
    );
  } catch (err) {
    await prisma.backup.update({
      where: { id: backupId },
      data: {
        status: BackupStatus.FAILED,
        errorMessage: (err as Error).message,
        completedAt: new Date(),
      },
    });
    // Clean up any partial local file; leave the remote copy so retry is possible
    if (archivePath) {
      try { await fs.unlink(archivePath); } catch { /* ignore */ }
    }
    if (agentBackupId) {
      logger.warn(
        `[backup] ${instance.slug}: leaving agent-side backup ${agentBackupId} in place for retry`
      );
    }
    throw err;
  }
 }
 /**
 * Delete a backup (file + DB record).
 */
--- a/changemaker-control-panel/api/src/services/ccp-pangolin.client.ts
+++ b/changemaker-control-panel/api/src/services/ccp-pangolin.client.ts
@ -0,0 +1,368 @@
 /**
 * Pangolin Integration API client for the CCP.
 *
 * Ported from the main CML's pangolin.client.ts. Adapted to:
 * - Accept credentials via constructor (not env singleton)
 * - Be instantiable per-call so the CCP can use its own API token
 *   to manage tunnels for multiple remote instances
 *
 * The CCP never exposes its Pangolin API key to remote instances — it
 * only pushes the resulting Newt credentials via the agent's writeFiles.
 */
 import { logger } from '../utils/logger';
 // ─── Types ─────────────────────────────────────────────────────────
 export interface PangolinSite {
  siteId: string;
  name: string;
  orgId: string;
  niceId: string;
  pubKey?: string;
  subnet?: string;
  megabytesIn?: number;
  megabytesOut?: number;
  lastSeen?: string;
  online?: boolean;
  type?: string;
  address?: string;
 }
 export interface PangolinExitNode {
  exitNodeId: string;
  name: string;
  location?: string;
  region?: string;
  online: boolean;
  capacity?: number;
  latency?: number;
 }
 export interface PangolinResource {
  resourceId: string;
  name: string;
  subdomain?: string;
  fullDomain?: string;
  ssl?: boolean;
  blockAccess?: boolean;
  active?: boolean;
  proxyPort?: number;
  protocol?: string;
  domainBindings?: string[];
  http?: boolean;
  targets?: PangolinTarget[];
 }
 export interface PangolinTarget {
  targetId: string;
  resourceId: string;
  siteId: string;
  ip: string;
  port: number;
  method: string;
  enabled?: boolean;
 }
 export interface PangolinNewt {
  newtId: string;
  secret: string;
  siteId: string;
 }
 export interface PangolinSiteDefaults {
  newtId: string;
  newtSecret: string;
  address: string;
 }
 export interface CreateSitePayload {
  name: string;
  type?: string;
  subnet?: string;
  exitNodeId?: string;
  newtId?: string;
  secret?: string;
  address?: string;
 }
 export interface CreateHttpResourcePayload {
  name: string;
  domainId: string;
  subdomain?: string;
  http: true;
  protocol: 'tcp';
 }
 export interface CreateTargetPayload {
  siteId: string | number;
  ip: string;
  port: number;
  method: 'http' | 'https';
  enabled?: boolean;
 }
 export interface PangolinDomain {
  domainId: string;
  baseDomain: string;
  verified: boolean;
  type?: string;
  failed?: boolean;
  configManaged?: boolean;
 }
 export interface UpdateResourcePayload {
  name?: string;
  subdomain?: string;
  fullDomain?: string;
  ssl?: boolean;
  sso?: boolean;
  active?: boolean;
  blockAccess?: boolean;
  proxyPort?: number;
  protocol?: string;
  domainBindings?: string[];
 }
 export interface UpdateCertificatePayload {
  autoRenew?: boolean;
 }
 export interface PangolinCertificate {
  certId: string;
  domainId: string;
  domain: string;
  status: 'PENDING' | 'ACTIVE' | 'EXPIRED' | 'FAILED';
  issuedAt?: string;
  expiresAt?: string;
  autoRenew?: boolean;
  issuer?: string;
 }
 export interface PangolinConnectedClient {
  clientId: string;
  resourceId: string;
  ipAddress: string;
  connectedAt: string;
  lastSeen: string;
  bytesIn: number;
  bytesOut: number;
  online: boolean;
 }
 // ─── Helpers ───────────────────────────────────────────────────────
 /** Redact credential fields from Pangolin API request bodies before logging. */
 function redactSecrets(body: unknown): unknown {
  if (!body || typeof body !== 'object') return body;
  const obj = body as Record<string, unknown>;
  const redacted = { ...obj };
  if ('secret' in redacted) redacted.secret = '[REDACTED]';
  if ('newtSecret' in redacted) redacted.newtSecret = '[REDACTED]';
  return redacted;
 }
 // ─── Client ────────────────────────────────────────────────────────
 export class CcpPangolinClient {
  constructor(
    private baseUrl: string,
    private apiKey: string,
    private orgId: string
  ) {}
  get configured(): boolean {
    return !!(this.baseUrl && this.apiKey && this.orgId);
  }
  private async request<T>(method: string, path: string, body?: unknown): Promise<T> {
    if (!this.configured) {
      throw new Error('Pangolin API not configured. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in CCP .env');
    }
    const url = `${this.baseUrl}${path}`;
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), 15000);
    try {
      logger.debug(`[pangolin] ${method} ${path}${body ? ` body=${JSON.stringify(redactSecrets(body))}` : ''}`);
      const res = await fetch(url, {
        method,
        headers: {
          'Authorization': `Bearer ${this.apiKey}`,
          'Content-Type': 'application/json',
        },
        body: body ? JSON.stringify(body) : undefined,
        signal: controller.signal,
      });
      if (!res.ok) {
        const text = await res.text().catch(() => '');
        throw new Error(`Pangolin API ${method} ${path} returned ${res.status}: ${text}`);
      }
      const contentType = res.headers.get('content-type') || '';
      if (contentType.includes('application/json')) {
        const json = await res.json();
        return this.unwrapResponse<T>(json);
      }
      return {} as T;
    } finally {
      clearTimeout(timeout);
    }
  }
  private unwrapResponse<T>(json: unknown): T {
    if (json && typeof json === 'object' && !Array.isArray(json)) {
      const obj = json as Record<string, unknown>;
      if ('data' in obj && 'success' in obj) {
        return obj.data as T;
      }
    }
    return json as T;
  }
  // ─── Health ───────────────────────────────────────────────────
  async healthCheck(): Promise<boolean> {
    try {
      const controller = new AbortController();
      const timeout = setTimeout(() => controller.abort(), 5000);
      try {
        const res = await fetch(`${this.baseUrl}/`, {
          headers: { 'Authorization': `Bearer ${this.apiKey}` },
          signal: controller.signal,
        });
        return res.ok;
      } finally {
        clearTimeout(timeout);
      }
    } catch {
      return false;
    }
  }
  // ─── Site Defaults ────────────────────────────────────────────
  async pickSiteDefaults(): Promise<PangolinSiteDefaults> {
    const res = await this.request<unknown>('GET', `/org/${this.orgId}/pick-site-defaults`);
    const obj = res as Record<string, unknown>;
    const newtId = obj.newtId as string || '';
    const newtSecret = obj.newtSecret as string || obj.secret as string || '';
    const address = obj.clientAddress as string || obj.address as string || '';
    if (!newtId || !newtSecret) {
      throw new Error('Pangolin did not return Newt credentials from pick-site-defaults');
    }
    return { newtId, newtSecret, address };
  }
  // ─── Sites ────────────────────────────────────────────────────
  async listSites(): Promise<PangolinSite[]> {
    const res = await this.request<unknown>('GET', `/org/${this.orgId}/sites`);
    return this.extractArray(res, 'sites', 'listSites');
  }
  async getSite(siteId: string): Promise<PangolinSite> {
    return this.request<PangolinSite>('GET', `/site/${siteId}`);
  }
  async createSite(data: CreateSitePayload): Promise<PangolinSite & { newt?: PangolinNewt }> {
    return this.request<PangolinSite & { newt?: PangolinNewt }>('PUT', `/org/${this.orgId}/site`, data);
  }
  async deleteSite(siteId: string): Promise<void> {
    await this.request<void>('DELETE', `/site/${siteId}`);
  }
  async listExitNodes(): Promise<PangolinExitNode[]> {
    try {
      const res = await this.request<unknown>('GET', `/org/${this.orgId}/exit-nodes`);
      return this.extractArray(res, 'exitNodes', 'listExitNodes');
    } catch {
      return [];
    }
  }
  // ─── Resources ────────────────────────────────────────────────
  async listResources(): Promise<PangolinResource[]> {
    const res = await this.request<unknown>('GET', `/org/${this.orgId}/resources`);
    return this.extractArray(res, 'resources', 'listResources');
  }
  async getResource(resourceId: string): Promise<PangolinResource> {
    return this.request<PangolinResource>('GET', `/resource/${resourceId}`);
  }
  async createResource(data: CreateHttpResourcePayload): Promise<PangolinResource> {
    logger.info(`[pangolin] createResource: ${data.name} (subdomain: ${data.subdomain || '(root)'})`);
    return this.request<PangolinResource>('PUT', `/org/${this.orgId}/resource`, data);
  }
  async updateResource(resourceId: string, data: UpdateResourcePayload): Promise<PangolinResource> {
    return this.request<PangolinResource>('POST', `/resource/${resourceId}`, data);
  }
  async deleteResource(resourceId: string): Promise<void> {
    await this.request<void>('DELETE', `/resource/${resourceId}`);
  }
  // ─── Targets ──────────────────────────────────────────────────
  async createTarget(resourceId: string, data: CreateTargetPayload): Promise<PangolinTarget> {
    logger.info(`[pangolin] createTarget: resource=${resourceId}, ip=${data.ip}:${data.port}`);
    const payload = { ...data, siteId: Number(data.siteId) };
    return this.request<PangolinTarget>('PUT', `/resource/${resourceId}/target`, payload);
  }
  async listTargets(resourceId: string): Promise<PangolinTarget[]> {
    const res = await this.request<unknown>('GET', `/resource/${resourceId}/targets`);
    return this.extractArray(res, 'targets', 'listTargets');
  }
  async deleteTarget(targetId: string): Promise<void> {
    await this.request<void>('DELETE', `/target/${targetId}`);
  }
  // ─── Domains ──────────────────────────────────────────────────
  async listDomains(): Promise<PangolinDomain[]> {
    const res = await this.request<unknown>('GET', `/org/${this.orgId}/domains`);
    return this.extractArray(res, 'domains', 'listDomains');
  }
  // ─── Certificates ─────────────────────────────────────────────
  async getCertificate(domainId: string, domain: string): Promise<PangolinCertificate> {
    return this.request<PangolinCertificate>('GET', `/org/${this.orgId}/certificate/${domainId}/${domain}`);
  }
  async updateCertificate(certId: string, data: UpdateCertificatePayload): Promise<PangolinCertificate> {
    return this.request<PangolinCertificate>('POST', `/certificate/${certId}`, data);
  }
  // ─── Clients ──────────────────────────────────────────────────
  async listClients(resourceId: string): Promise<PangolinConnectedClient[]> {
    const res = await this.request<unknown>('GET', `/resource/${resourceId}/clients`);
    return this.extractArray(res, 'clients', 'listClients');
  }
  // ─── Helpers ──────────────────────────────────────────────────
  private extractArray<T>(res: unknown, key: string, context: string): T[] {
    if (Array.isArray(res)) return res as T[];
    if (res && typeof res === 'object') {
      const obj = res as Record<string, unknown>;
      if (Array.isArray(obj[key])) return obj[key] as T[];
      if (obj.data && typeof obj.data === 'object') {
        const dataObj = obj.data as Record<string, unknown>;
        if (Array.isArray(dataObj[key])) return dataObj[key] as T[];
      }
      if (Array.isArray(obj.data)) return obj.data as T[];
    }
    logger.warn(`[pangolin] ${context}: could not extract array from response`);
    return [];
  }
 }
--- a/changemaker-control-panel/api/src/services/certificate.service.ts
+++ b/changemaker-control-panel/api/src/services/certificate.service.ts
@ -90,7 +90,7 @@ export async function ensureCA() {
 * Issue a certificate for a remote agent, signed by the CA.
 * Returns the certificate materials (plaintext) for one-time display.
 */
-export async function issueAgentCert(instanceId: string, slug: string) {
+export async function issueAgentCert(instanceId: string, slug: string, agentUrl?: string) {
  const ca = await ensureCA();
  const caKeyPem = decrypt(ca.encryptedKey);
@ -110,12 +110,29 @@ export async function issueAgentCert(instanceId: string, slug: string) {
    await fs.writeFile(caCertFile, ca.certPem);
    await fs.writeFile(serialFile, crypto.randomBytes(16).toString('hex'));
-    // Extensions for server+client auth
+    // Build SAN entries from the agent URL hostname
-    await fs.writeFile(extFile, [
+    const sanEntries: string[] = [];
    if (agentUrl) {
      try {
        const hostname = new URL(agentUrl).hostname;
        // Detect IP vs DNS name
        if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname) || hostname.includes(':')) {
          sanEntries.push(`IP:${hostname}`);
        } else {
          sanEntries.push(`DNS:${hostname}`);
        }
      } catch { /* ignore invalid URL */ }
    }
    sanEntries.push(`DNS:${commonName}`);
    // Extensions for server+client auth with SANs
    const extLines = [
      'basicConstraints=CA:FALSE',
      'keyUsage=digitalSignature,keyEncipherment',
      'extendedKeyUsage=serverAuth,clientAuth',
-    ].join('\n'));
+      `subjectAltName=${sanEntries.join(',')}`,
    ];
    await fs.writeFile(extFile, extLines.join('\n'));
    // Generate agent key
    await exec(
--- a/changemaker-control-panel/api/src/services/execution-driver.ts
+++ b/changemaker-control-panel/api/src/services/execution-driver.ts
@ -60,7 +60,20 @@ export async function getDriverForInstance(instance: DriverInstance): Promise<Ex
    const { getLocalDriver } = await import('./local-driver');
    return getLocalDriver();
  }
  return getRemoteDriverForInstance(instance);
 }
 /**
 * Resolve a RemoteDriver for a remote instance. Throws if the instance is
 * local, missing an agent URL, or has no valid mTLS certificate.
 *
 * Use this when you need to call RemoteDriver-specific methods like
 * createBackup() that don't exist on the ExecutionDriver interface.
 */
 export async function getRemoteDriverForInstance(instance: DriverInstance) {
  if (!instance.isRemote) {
    throw new Error(`Instance ${instance.slug} is not remote`);
  }
  if (!instance.agentUrl) {
    throw new Error(`Remote instance ${instance.slug} has no agent URL configured`);
  }
--- a/changemaker-control-panel/api/src/services/remote-driver.ts
+++ b/changemaker-control-panel/api/src/services/remote-driver.ts
@ -1,10 +1,87 @@
 import https from 'https';
 import fs from 'fs';
 import { pipeline } from 'stream/promises';
 import { env } from '../config/env';
 import type { ExecutionDriver } from './execution-driver';
 import { AgentUnreachableError } from './execution-driver';
 import type { ContainerInfo } from './docker.service';
 import { logger } from '../utils/logger';
 export interface AgentBackupResult {
  backupId: string;
  filename: string;
  sizeBytes: number;
  sha256: string;
  manifest: unknown | null;
  createdAt: string;
 }
 export interface AgentBackupListEntry {
  backupId: string;
  filename: string;
  sizeBytes: number;
  createdAt: string;
 }
 export interface AgentRestoreUploadResult {
  uploadId: string;
  sizeBytes: number;
  sha256: string;
 }
 export interface AgentRestoreOptions {
  skipDb?: boolean;
  skipUploads?: boolean;
  skipListmonk?: boolean;
  dryRun?: boolean;
 }
 export interface AgentRestoreState {
  status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
  uploadId: string;
  startedAt: string;
  completedAt?: string;
  exitCode?: number;
  logTail?: string;
  errorMessage?: string;
  options?: AgentRestoreOptions;
 }
 export interface AgentUpdateStatus {
  branch: string;
  currentCommit: string;
  currentMessage?: string;
  remoteCommit: string | null;
  commitsBehind: number;
  changelog: Array<{ hash: string; message: string; date: string; author: string }>;
  checkedAt: string;
  error: string | null;
 }
 export interface AgentUpgradeProgress {
  phase?: number;
  phaseName?: string;
  percentage?: number;
  message?: string;
  timestamp?: string;
 }
 export interface AgentUpgradeResult {
  success: boolean;
  message?: string;
  previousCommit?: string;
  newCommit?: string;
  commitCount?: number;
  durationSeconds?: number;
  warnings?: string[];
 }
 export interface StartAgentUpgradeOptions {
  skipBackup?: boolean;
  useRegistry?: boolean;
  branch?: string;
 }
 interface AgentRequestOptions {
  method: 'GET' | 'POST' | 'DELETE';
  path: string;
@ -261,4 +338,261 @@ export class RemoteDriver implements ExecutionDriver {
      timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
    });
  }
  // ─── Backup Operations ──────────────────────────────────────
  /**
   * Trigger a backup on the remote agent. The agent shells out to scripts/backup.sh
   * and returns metadata for the resulting archive. The archive stays on the
   * agent's disk until downloadBackup() + deleteBackup() are called.
   */
  async createBackup(): Promise<AgentBackupResult> {
    return this.request<AgentBackupResult>({
      method: 'POST',
      path: `/instance/${this.slug}/backup`,
      timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
    });
  }
  /**
   * List backup archives currently held on the agent for this slug.
   */
  async listAgentBackups(): Promise<AgentBackupListEntry[]> {
    const resp = await this.request<{ data: AgentBackupListEntry[] }>({
      method: 'GET',
      path: `/instance/${this.slug}/backups`,
    });
    return resp.data;
  }
  /**
   * Delete an archive from the agent's disk. Called after a successful download.
   */
  async deleteBackup(backupId: string): Promise<void> {
    await this.request({
      method: 'DELETE',
      path: `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}`,
    });
  }
  /**
   * Stream a backup archive from the agent to a local file path.
   * Verifies the Content-Length header matches the bytes written.
   */
  async downloadBackup(backupId: string, destPath: string): Promise<{ bytesWritten: number }> {
    const url = new URL(
      `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}/download`,
      this.agentUrl
    );
    const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
    return new Promise((resolve, reject) => {
      const req = https.request(
        {
          hostname: url.hostname,
          port: url.port || 7443,
          path: url.pathname + url.search,
          method: 'GET',
          headers: { Accept: 'application/gzip' },
          cert: this.clientCert,
          key: this.clientKey,
          ca: this.caCert,
          rejectUnauthorized: true,
          timeout: timeoutMs,
        },
        async (res) => {
          if (res.statusCode && res.statusCode >= 400) {
            let body = '';
            res.on('data', (c) => (body += c));
            res.on('end', () => reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`)));
            return;
          }
          const expectedSize = res.headers['content-length']
            ? parseInt(res.headers['content-length'] as string, 10)
            : null;
          try {
            const out = fs.createWriteStream(destPath);
            await pipeline(res, out);
            const stats = await fs.promises.stat(destPath);
            if (expectedSize !== null && stats.size !== expectedSize) {
              reject(new Error(`Downloaded size ${stats.size} does not match Content-Length ${expectedSize}`));
              return;
            }
            resolve({ bytesWritten: stats.size });
          } catch (err) {
            reject(err);
          }
        }
      );
      req.on('error', (err) => {
        reject(new AgentUnreachableError(this.agentUrl, err));
      });
      req.on('timeout', () => {
        req.destroy();
        reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
      });
      req.end();
    });
  }
  // ─── Restore Operations ─────────────────────────────────────
  /**
   * Stream a backup archive from a local path to the agent's upload endpoint.
   * The expected SHA256 is passed as a query parameter and the agent verifies
   * it during ingestion — if it mismatches, the upload is rejected with 400.
   */
  async uploadRestore(
    archivePath: string,
    expectedSha256: string
  ): Promise<AgentRestoreUploadResult> {
    const stats = await fs.promises.stat(archivePath);
    const url = new URL(
      `/instance/${this.slug}/restore/upload?sha256=${encodeURIComponent(expectedSha256)}`,
      this.agentUrl
    );
    const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
    return new Promise((resolve, reject) => {
      const req = https.request(
        {
          hostname: url.hostname,
          port: url.port || 7443,
          path: url.pathname + url.search,
          method: 'POST',
          headers: {
            'Content-Type': 'application/octet-stream',
            'Content-Length': String(stats.size),
          },
          cert: this.clientCert,
          key: this.clientKey,
          ca: this.caCert,
          rejectUnauthorized: true,
          timeout: timeoutMs,
        },
        (res) => {
          let body = '';
          res.on('data', (c) => (body += c));
          res.on('end', () => {
            if (res.statusCode && res.statusCode >= 400) {
              try {
                const err = JSON.parse(body);
                reject(new Error(err.message || `Agent returned ${res.statusCode}`));
              } catch {
                reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`));
              }
              return;
            }
            try {
              resolve(JSON.parse(body) as AgentRestoreUploadResult);
            } catch (err) {
              reject(err);
            }
          });
        }
      );
      req.on('error', (err) => {
        reject(new AgentUnreachableError(this.agentUrl, err));
      });
      req.on('timeout', () => {
        req.destroy();
        reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
      });
      const fileStream = fs.createReadStream(archivePath);
      fileStream.on('error', (err) => {
        req.destroy();
        reject(err);
      });
      fileStream.pipe(req);
    });
  }
  /**
   * Tell the agent to apply a previously-uploaded restore archive. The agent
   * fires `scripts/restore.sh` in the background and returns immediately.
   * Use `getRestoreProgress()` to poll for completion.
   */
  async applyRestore(uploadId: string, options: AgentRestoreOptions = {}): Promise<void> {
    await this.request({
      method: 'POST',
      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/apply`,
      body: { confirm: true, ...options },
    });
  }
  /**
   * Poll the agent for the current state of a restore.
   */
  async getRestoreProgress(uploadId: string): Promise<AgentRestoreState> {
    return this.request<AgentRestoreState>({
      method: 'GET',
      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/progress`,
    });
  }
  /**
   * Delete a restore upload dir from the agent's disk. Called after the CCP
   * has finalized the InstanceRestore row.
   */
  async deleteRestoreUpload(uploadId: string): Promise<void> {
    await this.request({
      method: 'DELETE',
      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}`,
    });
  }
  // ─── Upgrade Operations ─────────────────────────────────────
  /**
   * Run upgrade-check.sh on the remote and return the parsed status.json.
   */
  async checkForUpdates(): Promise<AgentUpdateStatus> {
    return this.request<AgentUpdateStatus>({
      method: 'POST',
      path: `/instance/${this.slug}/upgrade/check`,
      timeoutMs: 90_000,
    });
  }
  /**
   * Trigger upgrade.sh --api-mode on the remote. Fire-and-forget; agent
   * spawns the script in the background and returns 202 immediately.
   * Use getUpgradeProgress / getUpgradeResult to track completion.
   */
  async startUpgrade(options: StartAgentUpgradeOptions = {}): Promise<void> {
    await this.request({
      method: 'POST',
      path: `/instance/${this.slug}/upgrade/start`,
      body: options,
      timeoutMs: 30_000,
    });
  }
  /**
   * Read the agent's data/upgrade/progress.json. Returns the default zero-state
   * if no progress has been written yet.
   */
  async getUpgradeProgress(): Promise<AgentUpgradeProgress> {
    return this.request<AgentUpgradeProgress>({
      method: 'GET',
      path: `/instance/${this.slug}/upgrade/progress`,
    });
  }
  /**
   * Read the agent's data/upgrade/result.json. Throws if no result is yet
   * available; the caller should treat that as "still running".
   */
  async getUpgradeResult(): Promise<AgentUpgradeResult> {
    return this.request<AgentUpgradeResult>({
      method: 'GET',
      path: `/instance/${this.slug}/upgrade/result`,
    });
  }
 }
--- a/changemaker-control-panel/api/src/services/restore.service.ts
+++ b/changemaker-control-panel/api/src/services/restore.service.ts
@ -0,0 +1,376 @@
 import fs from 'fs/promises';
 import path from 'path';
 import crypto from 'crypto';
 import { createReadStream } from 'fs';
 import { Prisma, RestoreStatus, AuditAction, InstanceStatus } from '@prisma/client';
 import { prisma } from '../lib/prisma';
 import { env } from '../config/env';
 import { AppError } from '../middleware/error-handler';
 import { logger } from '../utils/logger';
 import { getRemoteDriverForInstance } from './execution-driver';
 import type { AgentRestoreOptions, AgentRestoreState } from './remote-driver';
 /**
 * Validate that a path is within the allowed backup storage boundary.
 */
 function assertPathWithinBoundary(filePath: string, boundary: string, label: string): void {
  const normalized = path.resolve(filePath);
  const normalizedBoundary = path.resolve(boundary);
  if (!normalized.startsWith(normalizedBoundary + path.sep)) {
    throw new AppError(403, `${label} path outside allowed directory`, 'FORBIDDEN');
  }
 }
 /**
 * Compute SHA-256 hash of a file by streaming its contents.
 */
 async function fileHash(filePath: string): Promise<string> {
  return new Promise((resolve, reject) => {
    const hash = crypto.createHash('sha256');
    const stream = createReadStream(filePath);
    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end', () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
 }
 const POLL_INTERVAL_MS = 3_000;
 const POLL_TIMEOUT_MS = 15 * 60 * 1_000; // 15 min
 interface StartRestoreArgs {
  backupId: string;
  triggeredById?: string;
  ipAddress?: string | null;
  options?: AgentRestoreOptions;
 }
 /**
 * Kick off a restore for the given backup. Creates an InstanceRestore row
 * and runs the full upload → apply → poll loop asynchronously. Returns the
 * row so the caller (HTTP handler) can respond immediately.
 */
 export async function createRestore(args: StartRestoreArgs) {
  const backup = await prisma.backup.findUnique({
    where: { id: args.backupId },
    include: { instance: true },
  });
  if (!backup) {
    throw new AppError(404, 'Backup not found', 'NOT_FOUND');
  }
  if (backup.status !== 'COMPLETED') {
    throw new AppError(400, `Backup is ${backup.status}, not COMPLETED`, 'INVALID_STATE');
  }
  if (!backup.archivePath) {
    throw new AppError(400, 'Backup has no archive path', 'NO_ARCHIVE');
  }
  const instance = backup.instance;
  if (instance.status !== InstanceStatus.RUNNING) {
    throw new AppError(400, `Cannot restore to instance in ${instance.status} state`, 'INVALID_STATE');
  }
  // Phase B only supports remote restore. Local restore is deliberately stubbed
  // — if you need it, add a performLocalRestore branch below. This also covers
  // the registered-but-local case (CCP-adopted instances) since they have
  // isRemote=false.
  if (!instance.isRemote) {
    throw new AppError(501, 'Local restore is not implemented — Phase B covers remote only', 'NOT_IMPLEMENTED');
  }
  // Make sure the archive is where it says it is and inside the boundary
  assertPathWithinBoundary(backup.archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
  try {
    await fs.access(backup.archivePath);
  } catch {
    throw new AppError(404, 'Archive file is missing on disk', 'ARCHIVE_MISSING');
  }
  const restore = await prisma.instanceRestore.create({
    data: {
      instanceId: instance.id,
      backupId: backup.id,
      status: RestoreStatus.PENDING,
      triggeredById: args.triggeredById ?? null,
    },
  });
  // Fire-and-forget orchestration
  performRemoteRestore(restore.id, backup.archivePath, args.options ?? {}, args.triggeredById, args.ipAddress ?? null)
    .catch((err) => {
      logger.error(`[restore] ${restore.id} failed: ${(err as Error).message}`);
    });
  return restore;
 }
 /**
 * End-to-end remote restore orchestration.
 *
 * Flow:
 *   1. Compute sha256 of the archive on CCP disk
 *   2. Upload to agent with sha256 query param (agent re-verifies on stream)
 *   3. Apply via agent (shells out to restore.sh --force)
 *   4. Poll progress every 3s until COMPLETED/FAILED or timeout
 *   5. Delete the agent-side upload
 *   6. Update the InstanceRestore row + audit log
 */
 /**
 * Write a BACKUP_RESTORE audit log entry. Wrapped in a try/catch so that an
 * audit-log DB failure can never mask the underlying restore status update.
 *
 * Called in all three terminal paths:
 *   - success (outcome: 'success')
 *   - agent reported failure (outcome: 'agent_failed')
 *   - orchestration error / timeout / unexpected throw (outcome: 'orchestration_error')
 */
 async function writeRestoreAuditLog(args: {
  restoreId: string;
  instanceId: string;
  backupId: string;
  triggeredById?: string;
  ipAddress?: string | null;
  options: AgentRestoreOptions;
  outcome: 'success' | 'agent_failed' | 'orchestration_error';
  sha256?: string;
  uploadId?: string | null;
  errorMessage?: string;
 }): Promise<void> {
  if (!args.triggeredById) return;
  try {
    await prisma.auditLog.create({
      data: {
        userId: args.triggeredById,
        instanceId: args.instanceId,
        action: AuditAction.BACKUP_RESTORE,
        details: {
          backupId: args.backupId,
          restoreId: args.restoreId,
          source: 'remote',
          outcome: args.outcome,
          options: args.options as unknown as Prisma.InputJsonValue,
          ...(args.sha256 ? { sha256: args.sha256 } : {}),
          ...(args.uploadId ? { agentUploadId: args.uploadId } : {}),
          ...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
        },
        ipAddress: args.ipAddress ?? null,
      },
    });
  } catch (err) {
    logger.error(`[restore] failed to write audit log for ${args.restoreId}: ${(err as Error).message}`);
  }
 }
 async function performRemoteRestore(
  restoreId: string,
  archivePath: string,
  options: AgentRestoreOptions,
  triggeredById?: string,
  ipAddress?: string | null
 ) {
  const restore = await prisma.instanceRestore.findUnique({
    where: { id: restoreId },
    include: { instance: true, backup: true },
  });
  if (!restore) {
    logger.error(`[restore] row ${restoreId} vanished mid-flight`);
    return;
  }
  const instance = restore.instance;
  let uploadId: string | null = null;
  let sha256: string | undefined;
  try {
    await prisma.instanceRestore.update({
      where: { id: restoreId },
      data: { status: RestoreStatus.UPLOADING },
    });
    const driver = await getRemoteDriverForInstance({
      id: instance.id,
      slug: instance.slug,
      isRemote: instance.isRemote,
      agentUrl: instance.agentUrl,
    });
    // 1. Compute local SHA256 (authoritative — the agent will verify against this).
    // We persist this in the audit log so there's an immutable record of exactly
    // which bytes were restored, useful for post-incident comparison.
    logger.info(`[restore] ${instance.slug}: hashing archive ${path.basename(archivePath)}`);
    sha256 = await fileHash(archivePath);
    // 2. Stream upload to agent
    logger.info(`[restore] ${instance.slug}: uploading archive (sha256=${sha256.substring(0, 16)}...)`);
    const uploadResult = await driver.uploadRestore(archivePath, sha256);
    uploadId = uploadResult.uploadId;
    await prisma.instanceRestore.update({
      where: { id: restoreId },
      data: { uploadId, status: RestoreStatus.RUNNING },
    });
    // 3. Apply
    logger.info(`[restore] ${instance.slug}: applying restore ${uploadId}`);
    await driver.applyRestore(uploadId, options);
    // 4. Poll progress
    const deadline = Date.now() + POLL_TIMEOUT_MS;
    let finalState: AgentRestoreState | null = null;
    while (Date.now() < deadline) {
      await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
      try {
        const state = await driver.getRestoreProgress(uploadId);
        // Mirror progress to the DB row so the UI shows updates
        await prisma.instanceRestore.update({
          where: { id: restoreId },
          data: {
            progressJson: state as unknown as Prisma.InputJsonValue,
            logTail: state.logTail ?? null,
          },
        });
        if (state.status === 'COMPLETED' || state.status === 'FAILED') {
          finalState = state;
          break;
        }
      } catch (err) {
        logger.warn(`[restore] ${instance.slug}: poll error: ${(err as Error).message}`);
        // Keep polling — transient network blips shouldn't fail the restore
      }
    }
    if (!finalState) {
      throw new Error(`Restore timed out after ${Math.round(POLL_TIMEOUT_MS / 1000)}s`);
    }
    // 5. Clean up agent-side upload (best effort)
    try {
      await driver.deleteRestoreUpload(uploadId);
    } catch (err) {
      logger.warn(`[restore] ${instance.slug}: failed to delete agent upload ${uploadId}: ${(err as Error).message}`);
    }
    // 6. Finalize DB row
    if (finalState.status === 'COMPLETED') {
      await prisma.instanceRestore.update({
        where: { id: restoreId },
        data: {
          status: RestoreStatus.COMPLETED,
          progressJson: finalState as unknown as Prisma.InputJsonValue,
          logTail: finalState.logTail ?? null,
          completedAt: new Date(),
        },
      });
      await writeRestoreAuditLog({
        restoreId,
        instanceId: instance.id,
        backupId: restore.backupId,
        triggeredById,
        ipAddress,
        options,
        outcome: 'success',
        sha256,
        uploadId,
      });
      logger.info(`[restore] ${instance.slug}: restore ${restoreId} COMPLETED`);
    } else {
      const errMsg = finalState.errorMessage || `Agent reported FAILED (exit ${finalState.exitCode})`;
      await prisma.instanceRestore.update({
        where: { id: restoreId },
        data: {
          status: RestoreStatus.FAILED,
          progressJson: finalState as unknown as Prisma.InputJsonValue,
          logTail: finalState.logTail ?? null,
          errorMessage: errMsg,
          completedAt: new Date(),
        },
      });
      await writeRestoreAuditLog({
        restoreId,
        instanceId: instance.id,
        backupId: restore.backupId,
        triggeredById,
        ipAddress,
        options,
        outcome: 'agent_failed',
        sha256,
        uploadId,
        errorMessage: errMsg,
      });
      logger.warn(`[restore] ${instance.slug}: restore ${restoreId} FAILED (exit ${finalState.exitCode})`);
    }
  } catch (err) {
    const errMsg = (err as Error).message;
    await prisma.instanceRestore.update({
      where: { id: restoreId },
      data: {
        status: RestoreStatus.FAILED,
        errorMessage: errMsg,
        completedAt: new Date(),
      },
    });
    await writeRestoreAuditLog({
      restoreId,
      instanceId: instance.id,
      backupId: restore.backupId,
      triggeredById,
      ipAddress,
      options,
      outcome: 'orchestration_error',
      sha256,
      uploadId,
      errorMessage: errMsg,
    });
    logger.error(`[restore] ${restore.instance.slug}: ${errMsg}`);
    // Best-effort cleanup of the agent upload if we got that far
    if (uploadId) {
      try {
        const driver = await getRemoteDriverForInstance({
          id: instance.id,
          slug: instance.slug,
          isRemote: instance.isRemote,
          agentUrl: instance.agentUrl,
        });
        await driver.deleteRestoreUpload(uploadId);
      } catch { /* ignore */ }
    }
  }
 }
 /**
 * List restores with optional filtering and pagination.
 */
 export async function listRestores(instanceId?: string, page = 1, limit = 50) {
  const where = instanceId ? { instanceId } : {};
  const [data, total] = await Promise.all([
    prisma.instanceRestore.findMany({
      where,
      orderBy: { startedAt: 'desc' },
      skip: (page - 1) * limit,
      take: limit,
      include: {
        instance: { select: { id: true, name: true, slug: true } },
        backup: { select: { id: true, archivePath: true, sizeBytes: true } },
      },
    }),
    prisma.instanceRestore.count({ where }),
  ]);
  return { data, total, page, limit };
 }
 /**
 * Get a single restore by ID.
 */
 export async function getRestore(restoreId: string) {
  const restore = await prisma.instanceRestore.findUnique({
    where: { id: restoreId },
    include: {
      instance: { select: { id: true, name: true, slug: true } },
      backup: { select: { id: true, archivePath: true, sizeBytes: true, manifest: true } },
    },
  });
  if (!restore) {
    throw new AppError(404, 'Restore not found', 'NOT_FOUND');
  }
  return restore;
 }
--- a/changemaker-control-panel/api/src/services/tunnel.service.ts
+++ b/changemaker-control-panel/api/src/services/tunnel.service.ts
@ -0,0 +1,599 @@
 /**
 * Remote tunnel management service.
 *
 * Orchestrates Pangolin site/resource/target creation on behalf of remote CML
 * instances, then pushes Newt credentials to the remote host via the mTLS agent.
 * The CCP holds the Pangolin API token centrally — remote instances never touch
 * the Pangolin API themselves.
 */
 import { AuditAction, Prisma } from '@prisma/client';
 import { prisma } from '../lib/prisma';
 import { env } from '../config/env';
 import { AppError } from '../middleware/error-handler';
 import { logger } from '../utils/logger';
 import { getRemoteDriverForInstance } from './execution-driver';
 import {
  CcpPangolinClient,
  type PangolinDomain,
  type PangolinResource,
 } from './ccp-pangolin.client';
 // ─── Resource definitions ──────────────────────────────────────────
 interface ResourceDef {
  subdomain: string;
  name: string;
  required?: boolean;
  featureFlag?: string;
 }
 const RESOURCE_DEFINITIONS: ResourceDef[] = [
  { subdomain: 'app',      name: 'Admin GUI',    required: true },
  { subdomain: 'api',      name: 'API',          required: true },
  { subdomain: '',         name: 'Public Site',   required: true },
  { subdomain: 'media',    name: 'Media API',    featureFlag: 'enableMedia' },
  { subdomain: 'db',       name: 'NocoDB',       required: false },
  { subdomain: 'docs',     name: 'Docs',         required: false },
  { subdomain: 'code',     name: 'Code Server',  required: false },
  { subdomain: 'git',      name: 'Gitea',        required: false },
  { subdomain: 'home',     name: 'Homepage',     required: false },
  { subdomain: 'listmonk', name: 'Listmonk',     featureFlag: 'enableListmonk' },
  { subdomain: 'qr',       name: 'Mini QR',      required: false },
  { subdomain: 'draw',     name: 'Excalidraw',   required: false },
  { subdomain: 'vault',    name: 'Vaultwarden',  required: false },
  { subdomain: 'mail',     name: 'MailHog',      required: false },
  { subdomain: 'chat',     name: 'Rocket.Chat',  featureFlag: 'enableChat' },
  { subdomain: 'events',   name: 'Gancio',       featureFlag: 'enableGancio' },
  { subdomain: 'meet',     name: 'Jitsi Meet',   featureFlag: 'enableMeet' },
  { subdomain: 'grafana',  name: 'Grafana',      featureFlag: 'enableMonitoring' },
 ];
 // ─── Helpers ───────────────────────────────────────────────────────
 function getPangolinClient(): CcpPangolinClient {
  if (!env.PANGOLIN_API_URL || !env.PANGOLIN_API_KEY || !env.PANGOLIN_ORG_ID) {
    throw new AppError(
      501,
      'Pangolin API not configured on this CCP. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in the CCP .env file.',
      'PANGOLIN_NOT_CONFIGURED'
    );
  }
  return new CcpPangolinClient(env.PANGOLIN_API_URL, env.PANGOLIN_API_KEY, env.PANGOLIN_ORG_ID);
 }
 function fullSubdomain(prefix: string, sub: string): string {
  if (!sub) return prefix; // root domain → prefix alone (e.g., "ck")
  return `${prefix}-${sub}`; // e.g., "ck-app", "ck-api"
 }
 function shouldCreateResource(
  def: ResourceDef,
  instance: Record<string, unknown>
 ): boolean {
  if (def.required) return true;
  if (def.featureFlag) return !!(instance as Record<string, unknown>)[def.featureFlag];
  return true; // optional with no feature flag → always create
 }
 async function findDomainForInstance(
  client: CcpPangolinClient,
  instanceDomain: string
 ): Promise<PangolinDomain> {
  const domains = await client.listDomains();
  // Match the instance's domain against registered Pangolin base domains
  // e.g., instance.domain = "cursedknowledge.org" → look for base domain "cursedknowledge.org"
  // or broader: instance.domain = "app.example.com" → look for "example.com"
  const exact = domains.find((d) => d.baseDomain === instanceDomain);
  if (exact) return exact;
  // Try matching parent domain (e.g., sub.example.com → example.com)
  const parts = instanceDomain.split('.');
  for (let i = 1; i < parts.length - 1; i++) {
    const parent = parts.slice(i).join('.');
    const match = domains.find((d) => d.baseDomain === parent);
    if (match) return match;
  }
  throw new AppError(
    400,
    `No Pangolin domain matches instance domain "${instanceDomain}". Available: ${domains.map((d) => d.baseDomain).join(', ')}`,
    'DOMAIN_NOT_FOUND'
  );
 }
 // ─── Setup ─────────────────────────────────────────────────────────
 export interface SetupTunnelOptions {
  subdomainPrefix?: string;
 }
 export interface TunnelSetupResult {
  siteId: string;
  newtId: string;
  endpoint: string;
  resourceCount: number;
  resources: Array<{ subdomain: string; name: string; resourceId: string }>;
 }
 export async function setupTunnel(
  instanceId: string,
  options: SetupTunnelOptions,
  userId?: string,
  ipAddress?: string | null
 ): Promise<TunnelSetupResult> {
  const client = getPangolinClient();
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
  if (!instance.isRemote) throw new AppError(400, 'Tunnel setup via Pangolin API is only for remote instances', 'NOT_REMOTE');
  if (instance.pangolinSiteId) {
    throw new AppError(400, 'Tunnel is already configured. Use sync to update resources, or teardown first.', 'ALREADY_CONFIGURED');
  }
  const prefix = options.subdomainPrefix || instance.slug;
  const driver = await getRemoteDriverForInstance({
    id: instance.id,
    slug: instance.slug,
    isRemote: instance.isRemote,
    agentUrl: instance.agentUrl,
  });
  // 1. Get Newt credentials
  logger.info(`[tunnel] ${instance.slug}: picking site defaults`);
  const defaults = await client.pickSiteDefaults();
  // 2. Create site
  logger.info(`[tunnel] ${instance.slug}: creating Pangolin site`);
  const site = await client.createSite({
    name: instance.slug,
    type: 'newt',
    newtId: defaults.newtId,
    secret: defaults.newtSecret,
    address: defaults.address,
  });
  const siteId = String(site.siteId);
  const newtId = site.newt?.newtId || defaults.newtId;
  const newtSecret = site.newt?.secret || defaults.newtSecret;
  // The Pangolin endpoint (what Newt connects to) may be different from
  // the API URL. E.g., API = api.bnkserve.org/v1, endpoint = pangolin.bnkserve.org.
  // If PANGOLIN_ENDPOINT is set, use it. Otherwise derive from API URL.
  let endpoint = env.PANGOLIN_ENDPOINT || '';
  if (!endpoint) {
    const endpointUrl = new URL(env.PANGOLIN_API_URL);
    endpoint = `${endpointUrl.protocol}//${endpointUrl.hostname}${endpointUrl.port ? ':' + endpointUrl.port : ''}`;
  }
  // 3. Find matching domain
  const domain = await findDomainForInstance(client, instance.domain);
  logger.info(`[tunnel] ${instance.slug}: matched domain ${domain.baseDomain} (id: ${domain.domainId})`);
  // 4. Create resources + targets
  const createdResources: Array<{ subdomain: string; name: string; resourceId: string }> = [];
  const existingResources = await client.listResources();
  for (const def of RESOURCE_DEFINITIONS) {
    if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) {
      logger.debug(`[tunnel] ${instance.slug}: skipping ${def.name} (feature not enabled)`);
      continue;
    }
    const sub = fullSubdomain(prefix, def.subdomain);
    // Build the expected full domain so we can do an idempotent check against
    // Pangolin's existing resources. Pangolin returns `fullDomain` not `subdomain`.
    const expectedFullDomain = sub
      ? `${sub}.${domain.baseDomain}`
      : domain.baseDomain;
    // Idempotent: skip if a resource with this fullDomain already exists
    const existing = existingResources.find(
      (r) => r.fullDomain === expectedFullDomain
    );
    if (existing) {
      logger.debug(`[tunnel] ${instance.slug}: resource ${def.name} (${expectedFullDomain}) already exists`);
      createdResources.push({ subdomain: sub, name: def.name, resourceId: String(existing.resourceId) });
      continue;
    }
    try {
      const resourcePayload: Record<string, unknown> = {
        name: def.name,
        domainId: domain.domainId,
        http: true,
        protocol: 'tcp',
      };
      // Root domain: omit subdomain entirely (empty string is rejected by Pangolin)
      if (sub) resourcePayload.subdomain = sub;
      const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
      // Make the resource public (no SSO, no access block)
      try {
        await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
      } catch (err) {
        logger.warn(`[tunnel] ${instance.slug}: failed to make ${def.name} public: ${(err as Error).message}`);
      }
      // Create target pointing to nginx:80 on the remote host
      await client.createTarget(resource.resourceId, {
        siteId: Number(siteId),
        ip: 'nginx',
        port: 80,
        method: 'http',
        enabled: true,
      });
      createdResources.push({ subdomain: sub, name: def.name, resourceId: resource.resourceId });
      logger.info(`[tunnel] ${instance.slug}: created resource ${def.name} → ${sub}.${domain.baseDomain}`);
    } catch (err) {
      if (def.required) throw err;
      logger.warn(`[tunnel] ${instance.slug}: failed to create optional resource ${def.name}: ${(err as Error).message}`);
    }
  }
  // 5. Push Newt credentials to remote .env
  logger.info(`[tunnel] ${instance.slug}: pushing Newt credentials to remote .env`);
  const envLines = [
    `PANGOLIN_ENDPOINT=${endpoint}`,
    `PANGOLIN_SITE_ID=${siteId}`,
    `PANGOLIN_NEWT_ID=${newtId}`,
    `PANGOLIN_NEWT_SECRET=${newtSecret}`,
  ].join('\n') + '\n';
  // Read current .env, append/replace Pangolin vars
  const currentEnv = await driver.readEnvFile('');
  const envContent = buildUpdatedEnv(currentEnv, {
    PANGOLIN_ENDPOINT: endpoint,
    PANGOLIN_SITE_ID: siteId,
    PANGOLIN_NEWT_ID: newtId,
    PANGOLIN_NEWT_SECRET: newtSecret,
  });
  await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
  // 6. Persist on Instance row
  await prisma.instance.update({
    where: { id: instanceId },
    data: {
      pangolinEndpoint: endpoint,
      pangolinSiteId: siteId,
      pangolinNewtId: newtId,
      pangolinNewtSecret: newtSecret,
      pangolinSubdomainPrefix: prefix,
    },
  });
  // 7. Recreate Newt container to pick up the new .env vars.
  // `docker compose restart` does NOT re-read .env — it only sends SIGTERM+restart.
  // `docker compose up -d newt` detects env var changes (via ${PANGOLIN_NEWT_ID}
  // expansion in docker-compose.yml) and recreates the container automatically.
  logger.info(`[tunnel] ${instance.slug}: recreating newt container with new credentials`);
  try {
    await driver.composeUp('', '', ['newt']);
  } catch (err) {
    logger.warn(`[tunnel] ${instance.slug}: composeUp(newt) failed: ${(err as Error).message}`);
  }
  // 8. Audit log
  if (userId) {
    await prisma.auditLog.create({
      data: {
        userId,
        instanceId,
        action: AuditAction.PANGOLIN_SETUP,
        details: {
          source: 'remote',
          siteId,
          newtId,
          endpoint,
          resourceCount: createdResources.length,
          subdomainPrefix: prefix,
        } as unknown as Prisma.InputJsonValue,
        ipAddress: ipAddress ?? null,
      },
    });
  }
  logger.info(`[tunnel] ${instance.slug}: tunnel setup complete — ${createdResources.length} resources created`);
  return {
    siteId,
    newtId,
    endpoint,
    resourceCount: createdResources.length,
    resources: createdResources,
  };
 }
 // ─── Sync ──────────────────────────────────────────────────────────
 export async function syncResources(
  instanceId: string,
  userId?: string,
  ipAddress?: string | null
 ) {
  const client = getPangolinClient();
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
  if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
  const prefix = instance.pangolinSubdomainPrefix || instance.slug;
  const domain = await findDomainForInstance(client, instance.domain);
  const existingResources = await client.listResources();
  const siteId = instance.pangolinSiteId;
  let created = 0;
  for (const def of RESOURCE_DEFINITIONS) {
    if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) continue;
    const sub = fullSubdomain(prefix, def.subdomain);
    const expectedFullDomain = sub ? `${sub}.${domain.baseDomain}` : domain.baseDomain;
    const existing = existingResources.find((r) => r.fullDomain === expectedFullDomain);
    if (existing) continue;
    try {
      const resourcePayload: Record<string, unknown> = {
        name: def.name,
        domainId: domain.domainId,
        http: true,
        protocol: 'tcp',
      };
      if (sub) resourcePayload.subdomain = sub;
      const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
      await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
      await client.createTarget(resource.resourceId, {
        siteId: Number(siteId),
        ip: 'nginx',
        port: 80,
        method: 'http',
        enabled: true,
      });
      created++;
      logger.info(`[tunnel] ${instance.slug}: sync created ${def.name} (${sub})`);
    } catch (err) {
      if (def.required) throw err;
      logger.warn(`[tunnel] ${instance.slug}: sync failed for ${def.name}: ${(err as Error).message}`);
    }
  }
  if (userId) {
    await prisma.auditLog.create({
      data: {
        userId,
        instanceId,
        action: AuditAction.PANGOLIN_SYNC,
        details: { source: 'remote', created, siteId } as unknown as Prisma.InputJsonValue,
        ipAddress: ipAddress ?? null,
      },
    });
  }
  return { synced: true, created };
 }
 // ─── Teardown ──────────────────────────────────────────────────────
 export async function teardownTunnel(
  instanceId: string,
  userId?: string,
  ipAddress?: string | null
 ) {
  const client = getPangolinClient();
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
  if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
  const siteId = instance.pangolinSiteId;
  // Delete site from Pangolin (cascades resources + targets)
  try {
    await client.deleteSite(siteId);
    logger.info(`[tunnel] ${instance.slug}: deleted Pangolin site ${siteId}`);
  } catch (err) {
    logger.warn(`[tunnel] ${instance.slug}: deleteSite failed (may already be gone): ${(err as Error).message}`);
  }
  // Clear Instance fields
  await prisma.instance.update({
    where: { id: instanceId },
    data: {
      pangolinEndpoint: null,
      pangolinSiteId: null,
      pangolinNewtId: null,
      pangolinNewtSecret: null,
    },
  });
  // Push empty Pangolin vars to remote .env
  if (instance.isRemote) {
    try {
      const driver = await getRemoteDriverForInstance({
        id: instance.id,
        slug: instance.slug,
        isRemote: instance.isRemote,
        agentUrl: instance.agentUrl,
      });
      const currentEnv = await driver.readEnvFile('');
      const envContent = buildUpdatedEnv(currentEnv, {
        PANGOLIN_ENDPOINT: '',
        PANGOLIN_SITE_ID: '',
        PANGOLIN_NEWT_ID: '',
        PANGOLIN_NEWT_SECRET: '',
      });
      await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
      // Stop newt container (best effort)
      try {
        await driver.composeStop('', '');
        await driver.composeUp('', ''); // restart everything except newt won't start without creds
      } catch { /* ignore */ }
    } catch (err) {
      logger.warn(`[tunnel] ${instance.slug}: failed to push empty env to remote: ${(err as Error).message}`);
    }
  }
  // Audit log
  if (userId) {
    await prisma.auditLog.create({
      data: {
        userId,
        instanceId,
        action: AuditAction.PANGOLIN_TEARDOWN,
        details: { source: 'remote', siteId } as unknown as Prisma.InputJsonValue,
        ipAddress: ipAddress ?? null,
      },
    });
  }
  return { tornDown: true };
 }
 // ─── Status ────────────────────────────────────────────────────────
 export interface TunnelStatus {
  configured: boolean;
  online?: boolean;
  siteId?: string;
  endpoint?: string;
  resources?: Array<{
    subdomain: string;
    name: string;
    resourceId: string;
    hasTarget: boolean;
    targetIp?: string;
    targetPort?: number;
  }>;
 }
 export async function getTunnelStatus(instanceId: string): Promise<TunnelStatus> {
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
  if (!instance.pangolinSiteId) {
    return { configured: false };
  }
  // For local instances, return stored values without querying Pangolin API
  if (!instance.isRemote) {
    return {
      configured: true,
      siteId: instance.pangolinSiteId ?? undefined,
      endpoint: instance.pangolinEndpoint ?? undefined,
    };
  }
  const client = getPangolinClient();
  let online = false;
  try {
    const site = await client.getSite(instance.pangolinSiteId);
    online = site.online ?? false;
  } catch (err) {
    logger.warn(`[tunnel] ${instance.slug}: getSite failed: ${(err as Error).message}`);
  }
  const resources: TunnelStatus['resources'] = [];
  try {
    const allResources = await client.listResources();
    const siteIdNum = Number(instance.pangolinSiteId);
    // Filter to resources that have a target pointing to our siteId.
    // This is the most reliable filter since it uses the actual Pangolin
    // site association rather than guessing from subdomain names.
    for (const res of allResources) {
      let hasTarget = false;
      let targetIp: string | undefined;
      let targetPort: number | undefined;
      let belongsToUs = false;
      try {
        const targets = await client.listTargets(String(res.resourceId));
        for (const t of targets) {
          if (Number(t.siteId) === siteIdNum) {
            belongsToUs = true;
            hasTarget = true;
            targetIp = t.ip;
            targetPort = t.port;
            break;
          }
        }
      } catch { /* ignore */ }
      if (belongsToUs) {
        // Extract subdomain from fullDomain for display
        const fd = res.fullDomain || '';
        const domainSuffix = `.${instance.domain}`;
        const subdomain = fd.endsWith(domainSuffix)
          ? fd.slice(0, -domainSuffix.length)
          : fd === instance.domain ? '' : fd;
        resources.push({
          subdomain,
          name: res.name,
          resourceId: String(res.resourceId),
          hasTarget,
          targetIp,
          targetPort,
        });
      }
    }
  } catch (err) {
    logger.warn(`[tunnel] ${instance.slug}: listResources failed: ${(err as Error).message}`);
  }
  return {
    configured: true,
    online,
    siteId: instance.pangolinSiteId ?? undefined,
    endpoint: instance.pangolinEndpoint ?? undefined,
    resources,
  };
 }
 // ─── .env Helpers ──────────────────────────────────────────────────
 /**
 * Quote a .env value if it contains characters that dotenv parsers interpret:
 *   # (comment), = (separator), spaces, quotes, backslashes, newlines.
 * Pangolin-issued UUIDs/base64 secrets typically don't need quoting, but
 * defensive quoting prevents silent corruption if they ever do.
 */
 function quoteEnvValue(value: string): string {
  if (/[\s#"'\\=\n\r]/.test(value)) {
    return `"${value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
  }
  return value;
 }
 /**
 * Build an updated .env string by replacing/appending the given key-value pairs.
 * Preserves all existing keys not in the update set.
 */
 function buildUpdatedEnv(
  currentEnv: Record<string, string> | null,
  updates: Record<string, string>
 ): string {
  const lines: string[] = [];
  const seen = new Set<string>();
  // If we have the current env, reproduce it with replacements
  if (currentEnv) {
    for (const [key, value] of Object.entries(currentEnv)) {
      if (key in updates) {
        if (updates[key]) lines.push(`${key}=${quoteEnvValue(updates[key]!)}`);
        // If update value is empty, omit the line (remove the var)
        seen.add(key);
      } else {
        lines.push(`${key}=${quoteEnvValue(value)}`);
      }
    }
  }
  // Append new keys not already in the file
  for (const [key, value] of Object.entries(updates)) {
    if (!seen.has(key) && value) {
      lines.push(`${key}=${quoteEnvValue(value)}`);
    }
  }
  return lines.join('\n') + '\n';
 }
--- a/changemaker-control-panel/api/src/services/upgrade.service.ts
+++ b/changemaker-control-panel/api/src/services/upgrade.service.ts
@ -2,14 +2,61 @@ import { exec as execCb } from 'child_process';
 import { promisify } from 'util';
 import fs from 'fs/promises';
 import path from 'path';
-import { UpgradeStatus, AuditAction, InstanceStatus, Prisma } from '@prisma/client';
+import { UpgradeStatus, AuditAction, InstanceStatus, Prisma, Instance } from '@prisma/client';
 import { prisma } from '../lib/prisma';
 import { logger } from '../utils/logger';
 import { createEvent } from './event.service';
 import { getRemoteDriverForInstance } from './execution-driver';
 import type { AgentUpdateStatus } from './remote-driver';
 /**
 * Write an INSTANCE_UPGRADE audit log entry capturing a terminal outcome.
 * Wrapped in try/catch so that an audit-log DB failure cannot mask the
 * underlying upgrade row status update.
 *
 * Called from all three terminal paths (both local and remote):
 *   - 'completed'   — upgrade.sh/agent reported success
 *   - 'failed'      — upgrade.sh/agent reported failure
 *   - 'orchestration_error' — CCP-side exception, timeout, or unreachable agent
 */
 async function writeUpgradeAuditLog(args: {
  upgradeId: string;
  instanceId: string;
  triggeredById: string | null;
  source: 'local' | 'remote';
  outcome: 'completed' | 'failed' | 'orchestration_error';
  previousCommit: string | null;
  newCommit: string | null;
  durationSeconds: number | null;
  errorMessage?: string | null;
 }): Promise<void> {
  if (!args.triggeredById) return;
  try {
    await prisma.auditLog.create({
      data: {
        userId: args.triggeredById,
        instanceId: args.instanceId,
        action: AuditAction.INSTANCE_UPGRADE,
        details: {
          upgradeId: args.upgradeId,
          source: args.source,
          outcome: args.outcome,
          previousCommit: args.previousCommit,
          newCommit: args.newCommit,
          durationSeconds: args.durationSeconds,
          ...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
        } as unknown as Prisma.InputJsonValue,
      },
    });
  } catch (err) {
    logger.error(`[upgrade] failed to write audit log for ${args.upgradeId}: ${(err as Error).message}`);
  }
 }
 const exec = promisify(execCb);
-const UPGRADE_TIMEOUT = 600_000; // 10 minutes
+const UPGRADE_TIMEOUT = 600_000; // 10 minutes — local upgrades
 const REMOTE_UPGRADE_TIMEOUT = 15 * 60 * 1000; // 15 minutes — remote (network round trips)
 const PROGRESS_POLL_INTERVAL = 2_000; // 2 seconds
 // ─── Update Check ─────────────────────────────────────────────────
@ -26,13 +73,57 @@ export interface UpdateStatus {
 }
 /**
- * Check for available updates by running upgrade-check.sh in the instance's basePath.
+ * Check for available updates. Branches on instance.isRemote:
- * Falls back to reading an existing status.json if the script isn't available.
+ *   - Local: runs upgrade-check.sh in the instance's basePath and reads status.json
 *   - Remote: calls the agent's POST /upgrade/check endpoint over mTLS
 */
 export async function checkForUpdates(instanceId: string): Promise<UpdateStatus> {
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new Error('Instance not found');
  if (instance.isRemote) {
    return checkForUpdatesRemote(instance);
  }
  return checkForUpdatesLocal(instance);
 }
 /**
 * Remote check: ask the agent to run upgrade-check.sh and return its status.json.
 */
 async function checkForUpdatesRemote(instance: Instance): Promise<UpdateStatus> {
  try {
    const driver = await getRemoteDriverForInstance({
      id: instance.id,
      slug: instance.slug,
      isRemote: instance.isRemote,
      agentUrl: instance.agentUrl,
    });
    const status: AgentUpdateStatus = await driver.checkForUpdates();
    return {
      branch: status.branch,
      currentCommit: status.currentCommit,
      currentMessage: status.currentMessage,
      remoteCommit: status.remoteCommit,
      commitsBehind: status.commitsBehind,
      changelog: status.changelog,
      checkedAt: status.checkedAt,
      error: status.error,
    };
  } catch (err) {
    logger.warn(`[upgrade] remote check failed for ${instance.slug}: ${(err as Error).message}`);
    return {
      branch: instance.gitBranch,
      currentCommit: instance.gitCommit || 'unknown',
      remoteCommit: null,
      commitsBehind: 0,
      changelog: [],
      checkedAt: new Date().toISOString(),
      error: `Remote check failed: ${(err as Error).message}`,
    };
  }
 }
 async function checkForUpdatesLocal(instance: Instance): Promise<UpdateStatus> {
  const basePath = instance.basePath;
  const statusFile = path.join(basePath, 'data', 'upgrade', 'status.json');
  const scriptPath = path.join(basePath, 'scripts', 'upgrade-check.sh');
@ -119,16 +210,21 @@ export async function startUpgrade(
    throw new Error('An upgrade is already in progress for this instance');
  }
-  // Get current commit for tracking
+  // Get current commit for tracking. For local instances we can read it from
-  let currentCommit: string | null = null;
+  // git directly; for remote instances we either trust the DB-tracked value
-  try {
+  // (set by previous upgrade-check) or leave it null and let upgrade.sh
-    const { stdout } = await exec('git rev-parse --short HEAD', {
+  // report the previous commit in result.json.
-      cwd: instance.basePath,
+  let currentCommit: string | null = instance.gitCommit;
-      timeout: 5_000,
+  if (!instance.isRemote) {
-    });
+    try {
-    currentCommit = stdout.trim();
+      const { stdout } = await exec('git rev-parse --short HEAD', {
-  } catch {
+        cwd: instance.basePath,
-    // Non-critical — may be a release install without .git
+        timeout: 5_000,
      });
      currentCommit = stdout.trim();
    } catch {
      // Non-critical — may be a release install without .git
    }
  }
  const branch = options?.branch || instance.gitBranch;
@ -154,20 +250,222 @@ export async function startUpgrade(
        upgradeId: upgrade.id,
        previousCommit: currentCommit,
        branch,
        source: instance.isRemote ? 'remote' : 'local',
        options: options || {},
      } as unknown as Prisma.InputJsonValue,
      ipAddress,
    },
  });
-  // Fire-and-forget: run the upgrade asynchronously
+  // Fire-and-forget: branch on isRemote
-  runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
+  if (instance.isRemote) {
-    logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
+    runRemoteUpgrade(upgrade.id, instance, options).catch((err) => {
-  });
+      logger.error(`[upgrade] Remote upgrade orchestration failed for ${instance.slug}: ${err}`);
    });
  } else {
    runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
      logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
    });
  }
  return upgrade;
 }
 /**
 * Async REMOTE upgrade runner.
 *
 * Flow:
 *   1. Get RemoteDriver
 *   2. Mark InstanceUpgrade IN_PROGRESS
 *   3. Tell agent to start upgrade.sh in --api-mode
 *   4. Poll agent /upgrade/progress every 2s, mirror to DB
 *   5. Try /upgrade/result every poll cycle; when present, finalize
 *   6. On timeout (15 min), mark FAILED and create error event
 *
 * Note: there is no shell or filesystem access on the CCP side — everything
 * goes through the mTLS agent. The agent's spawn of upgrade.sh is itself
 * fire-and-forget under a slug mutex.
 */
 async function runRemoteUpgrade(
  upgradeId: string,
  instance: Instance,
  options?: StartUpgradeOptions
 ) {
  const slug = instance.slug;
  try {
    const driver = await getRemoteDriverForInstance({
      id: instance.id,
      slug: instance.slug,
      isRemote: instance.isRemote,
      agentUrl: instance.agentUrl,
    });
    // Mark IN_PROGRESS
    await prisma.instanceUpgrade.update({
      where: { id: upgradeId },
      data: {
        status: UpgradeStatus.IN_PROGRESS,
        progressMessage: 'Starting remote upgrade...',
      },
    });
    // Tell the agent to start. The agent has its own mutex + stale-progress
    // check, so this can return 409 if a previous upgrade is still running.
    logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
    await driver.startUpgrade({
      skipBackup: options?.skipBackup,
      useRegistry: options?.useRegistry,
      branch: options?.branch,
    });
    // Poll progress + result. We treat /result returning 200 as the signal
    // that upgrade.sh exited (successfully or with code != 0 — the script
    // writes result.json either way in --api-mode).
    const deadline = Date.now() + REMOTE_UPGRADE_TIMEOUT;
    let lastProgress: { phase?: number; phaseName?: string; percentage?: number; message?: string } = {};
    while (Date.now() < deadline) {
      await new Promise((r) => setTimeout(r, PROGRESS_POLL_INTERVAL));
      // Try to fetch the result first; if it exists, we're done
      let result = null;
      try {
        result = await driver.getUpgradeResult();
      } catch {
        // No result yet — keep polling progress
      }
      if (result) {
        // Final result available — write it and exit
        const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
        await prisma.instanceUpgrade.update({
          where: { id: upgradeId },
          data: {
            status: result.success ? UpgradeStatus.COMPLETED : UpgradeStatus.FAILED,
            newCommit: result.newCommit || null,
            commitCount: result.commitCount || 0,
            percentage: 100,
            phaseName: 'Complete',
            progressMessage: result.message || 'Upgrade completed',
            durationSeconds: result.durationSeconds || null,
            warnings: result.warnings?.length ? (result.warnings as unknown as Prisma.InputJsonValue) : undefined,
            errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
            completedAt: new Date(),
          },
        });
        // Update Instance.gitCommit if we have a new commit
        if (result.newCommit) {
          await prisma.instance.update({
            where: { id: instance.id },
            data: { gitCommit: result.newCommit },
          });
        }
        if (!result.success) {
          await createEvent(
            instance.id,
            'ERROR',
            'upgrade',
            'Remote upgrade failed',
            result.message || 'The remote upgrade process failed. Check the agent log for details.',
            { upgradeId, source: 'remote', warnings: result.warnings }
          );
        }
        await writeUpgradeAuditLog({
          upgradeId,
          instanceId: instance.id,
          triggeredById: upgradeRowBefore?.triggeredById ?? null,
          source: 'remote',
          outcome: result.success ? 'completed' : 'failed',
          previousCommit: upgradeRowBefore?.previousCommit ?? null,
          newCommit: result.newCommit || null,
          durationSeconds: result.durationSeconds || null,
          errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
        });
        logger.info(`[upgrade] ${slug}: remote upgrade ${result.success ? 'COMPLETED' : 'FAILED'}`);
        return;
      }
      // No result yet — pull progress
      try {
        const progress = await driver.getUpgradeProgress();
        // Only update DB if something actually changed (avoid hot-loop writes)
        if (
          progress.phase !== lastProgress.phase ||
          progress.percentage !== lastProgress.percentage ||
          progress.message !== lastProgress.message
        ) {
          lastProgress = {
            phase: progress.phase,
            phaseName: progress.phaseName,
            percentage: progress.percentage,
            message: progress.message,
          };
          await prisma.instanceUpgrade.update({
            where: { id: upgradeId },
            data: {
              currentPhase: progress.phase || 0,
              phaseName: progress.phaseName || null,
              percentage: progress.percentage || 0,
              progressMessage: progress.message || null,
            },
          });
        }
      } catch (err) {
        // Transient network blip during a long upgrade — keep polling
        logger.debug(`[upgrade] ${slug}: progress poll error: ${(err as Error).message}`);
      }
    }
    // Timeout — mark FAILED
    throw new Error(`Remote upgrade timed out after ${Math.round(REMOTE_UPGRADE_TIMEOUT / 60_000)} minutes`);
  } catch (err) {
    const errorMsg = (err as Error).message;
    const isTimeout = errorMsg.includes('timed out');
    const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
    await prisma.instanceUpgrade.update({
      where: { id: upgradeId },
      data: {
        status: UpgradeStatus.FAILED,
        errorMessage: isTimeout ? errorMsg : errorMsg.slice(0, 2000),
        progressMessage: 'Failed',
        completedAt: new Date(),
      },
    });
    await createEvent(
      instance.id,
      'ERROR',
      'upgrade',
      isTimeout ? 'Remote upgrade timed out' : 'Remote upgrade failed',
      errorMsg.slice(0, 500),
      { upgradeId, source: 'remote' }
    );
    await writeUpgradeAuditLog({
      upgradeId,
      instanceId: instance.id,
      triggeredById: upgradeRowBefore?.triggeredById ?? null,
      source: 'remote',
      outcome: 'orchestration_error',
      previousCommit: upgradeRowBefore?.previousCommit ?? null,
      newCommit: null,
      durationSeconds: null,
      errorMessage: errorMsg,
    });
    // Don't flip the instance to ERROR state for remote upgrades — the agent
    // health check will reflect the real state on the next poll, and we don't
    // want to mask a recovered instance with stale CCP-side ERROR.
    logger.error(`[upgrade] ${slug}: ${errorMsg}`);
  }
 }
 /**
 * Async upgrade runner. Runs upgrade.sh and polls progress.
 */
@ -271,19 +569,32 @@ async function runUpgrade(
      });
    }
-    if (!result.success) {
+    const upgradeRow = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
    if (!result.success && upgradeRow) {
      // Create error event
-      const upgrade = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
+      await createEvent(
-      if (upgrade) {
+        upgradeRow.instanceId,
-        await createEvent(
+        'ERROR',
-          upgrade.instanceId,
+        'upgrade',
-          'ERROR',
+        'Upgrade failed',
-          'upgrade',
+        result.message || 'The upgrade process failed. Check logs for details.',
-          'Upgrade failed',
+        { upgradeId, previousCommit: upgradeRow.previousCommit, warnings: result.warnings }
-          result.message || 'The upgrade process failed. Check logs for details.',
+      );
-          { upgradeId, previousCommit: upgrade.previousCommit, warnings: result.warnings }
+    }
-        );
+
-      }
+    if (upgradeRow) {
      await writeUpgradeAuditLog({
        upgradeId,
        instanceId: upgradeRow.instanceId,
        triggeredById: upgradeRow.triggeredById,
        source: 'local',
        outcome: result.success ? 'completed' : 'failed',
        previousCommit: upgradeRow.previousCommit,
        newCommit: result.newCommit || newCommit,
        durationSeconds: result.durationSeconds || null,
        errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
      });
    }
    logger.info(`[upgrade] ${slug}: Upgrade ${result.success ? 'completed' : 'failed'}`);
@ -327,6 +638,18 @@ async function runUpgrade(
          statusMessage: `Upgrade failed: ${isTimeout ? 'timeout' : errorMsg.slice(0, 200)}`,
        },
      });
      await writeUpgradeAuditLog({
        upgradeId,
        instanceId: upgrade.instanceId,
        triggeredById: upgrade.triggeredById,
        source: 'local',
        outcome: 'orchestration_error',
        previousCommit: upgrade.previousCommit,
        newCommit: null,
        durationSeconds: result.durationSeconds || null,
        errorMessage: errorMsg,
      });
    }
    logger.error(`[upgrade] ${slug}: Upgrade failed: ${errorMsg}`);
--- a/config.sh
+++ b/config.sh
@ -38,6 +38,11 @@ NI_MAPBOX_KEY=""
 NI_MAXMIND_ACCOUNT_ID=""
 NI_MAXMIND_LICENSE_KEY=""
 # CCP (Changemaker Control Panel) registration flags
 NI_CCP_URL=""
 NI_CCP_INVITE_CODE=""
 NI_CCP_AGENT_URL=""
 # --- Arg parser ---
 while [[ $# -gt 0 ]]; do
  case "$1" in
@ -62,6 +67,10 @@ while [[ $# -gt 0 ]]; do
    --mapbox-key)         NI_MAPBOX_KEY="$2"; shift 2 ;;
    --maxmind-account-id) NI_MAXMIND_ACCOUNT_ID="$2"; shift 2 ;;
    --maxmind-license-key) NI_MAXMIND_LICENSE_KEY="$2"; shift 2 ;;
    # CCP (Changemaker Control Panel)
    --ccp-url)            NI_CCP_URL="$2"; shift 2 ;;
    --ccp-invite-code)    NI_CCP_INVITE_CODE="$2"; shift 2 ;;
    --ccp-agent-url)      NI_CCP_AGENT_URL="$2"; shift 2 ;;
    --help|-h)
      echo "Usage: bash config.sh [OPTIONS]"
      echo ""
@ -91,6 +100,11 @@ while [[ $# -gt 0 ]]; do
      echo "  --maxmind-account-id ID MaxMind GeoIP account ID"
      echo "  --maxmind-license-key K MaxMind GeoIP license key"
      echo ""
      echo "CCP (Changemaker Control Panel) — all 3 flags required to register:"
      echo "  --ccp-url URL           CCP server URL (e.g., https://ccp.example.com)"
      echo "  --ccp-invite-code CODE  One-time invite code from CCP"
      echo "  --ccp-agent-url URL     Agent URL the CCP reaches (e.g., https://this-host:7443)"
      echo ""
      echo "Example:"
      echo "  bash config.sh --non-interactive --domain example.org --admin-password MyStr0ngPass123"
      echo "  bash config.sh -y --domain example.org --admin-password MyStr0ngPass123 \\"
@ -798,6 +812,17 @@ configure_features() {
      else
        warn "Set JVB_ADVERTISE_IP in .env before starting Jitsi containers."
      fi
    else
      # Non-interactive: auto-detect public IP for NAT traversal
      local detected_ip
      detected_ip=$(curl -sf --max-time 5 https://ifconfig.me 2>/dev/null || \
                    curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true)
      if [[ -n "$detected_ip" ]]; then
        update_env_var "JVB_ADVERTISE_IP" "$detected_ip"
        success "JVB advertise IP auto-detected: $detected_ip"
      else
        warn "Could not auto-detect public IP. Set JVB_ADVERTISE_IP in .env before starting Jitsi."
      fi
    fi
  else
    MEET_ENABLED="no"
@ -838,13 +863,6 @@ configure_features() {
    update_env_var "ENABLE_PEOPLE" "false"
  fi
  if prompt_yes_no "Enable Analytics & GeoIP (visitor tracking, geo dashboard)?"; then
    update_env_var "ENABLE_ANALYTICS" "true"
    success "Analytics enabled"
  else
    update_env_var "ENABLE_ANALYTICS" "false"
  fi
  if prompt_yes_no "Enable Docs Comments & Version History (Gitea-backed)?"; then
    update_env_var "GITEA_COMMENTS_ENABLED" "true"
    success "Docs Comments & Version History enabled"
@ -881,8 +899,14 @@ configure_features() {
  fi
  if prompt_yes_no "Enable Monitoring stack (Prometheus, Grafana, Alertmanager, cAdvisor)?" "y"; then
-    update_env_var "COMPOSE_PROFILES" "monitoring"
+    local existing_profiles
-    success "Monitoring enabled (COMPOSE_PROFILES=monitoring)"
+    existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
    if [[ -z "$existing_profiles" ]]; then
      update_env_var "COMPOSE_PROFILES" "monitoring"
    elif [[ "$existing_profiles" != *"monitoring"* ]]; then
      update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
    fi
    success "Monitoring enabled (COMPOSE_PROFILES includes monitoring)"
    MONITORING_ENABLED="yes"
  else
    MONITORING_ENABLED="no"
@ -1401,6 +1425,35 @@ pangolin_connect_first_site() {
 configure_control_panel() {
  header "Control Panel Registration"
  # Non-interactive: use --ccp-* flags if all three provided, otherwise skip
  if [[ "$NON_INTERACTIVE" == "true" ]]; then
    if [[ -n "$NI_CCP_URL" && -n "$NI_CCP_INVITE_CODE" && -n "$NI_CCP_AGENT_URL" ]]; then
      update_env_var "ENABLE_CCP_AGENT" "true"
      update_env_var "CCP_URL" "$NI_CCP_URL"
      update_env_var "CCP_INVITE_CODE" "$NI_CCP_INVITE_CODE"
      update_env_var "CCP_AGENT_URL" "$NI_CCP_AGENT_URL"
      # Append ccp-agent to existing profiles (don't clobber monitoring)
      local existing_profiles
      existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
      if [[ -z "$existing_profiles" ]]; then
        update_env_var "COMPOSE_PROFILES" "ccp-agent"
      elif [[ "$existing_profiles" != *"ccp-agent"* ]]; then
        update_env_var "COMPOSE_PROFILES" "${existing_profiles},ccp-agent"
      fi
      success "CCP registration configured ($NI_CCP_URL)"
    else
      update_env_var "ENABLE_CCP_AGENT" "false"
      if [[ -n "$NI_CCP_URL" || -n "$NI_CCP_INVITE_CODE" || -n "$NI_CCP_AGENT_URL" ]]; then
        warn "CCP registration needs all 3 flags: --ccp-url, --ccp-invite-code, --ccp-agent-url"
      else
        info "Skipping CCP registration (no --ccp-url provided)"
      fi
    fi
    return
  fi
  if prompt_yes_no "Register this instance with a Changemaker Control Panel?"; then
    echo ""
    read -rp "  Enter Control Panel URL (e.g., https://ccp.example.com): " ccp_url
@ -2152,9 +2205,15 @@ main() {
    header "Release Mode Settings"
    update_env_var "IMAGE_TAG" "latest"
    update_env_var "NODE_ENV" "production"
-    # Ensure monitoring is included if user opted in
+    # Ensure monitoring is included if user opted in (preserve existing profiles)
    if [[ "${MONITORING_ENABLED:-no}" == "yes" ]]; then
-      update_env_var "COMPOSE_PROFILES" "monitoring"
+      local existing_profiles
      existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
      if [[ -z "$existing_profiles" ]]; then
        update_env_var "COMPOSE_PROFILES" "monitoring"
      elif [[ "$existing_profiles" != *"monitoring"* ]]; then
        update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
      fi
    fi
    success "Set IMAGE_TAG=latest, NODE_ENV=production (pre-built images)"
  fi
--- a/scripts/build-release.sh
+++ b/scripts/build-release.sh
@ -103,7 +103,8 @@ cp "$PROJECT_DIR/api/prisma/init-nocodb-db.sh" "$STAGE_DIR/scripts/"
 cp "$PROJECT_DIR/api/prisma/init-gancio-db.sh" "$STAGE_DIR/scripts/"
 # Runtime scripts
-for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh backup.sh \
+for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh \
              backup.sh restore.sh \
              upgrade.sh upgrade-check.sh upgrade-watcher.sh \
              uninstall.sh test-deployment.sh; do
  if [[ -f "$PROJECT_DIR/scripts/$script" ]]; then
--- a/scripts/install.sh
+++ b/scripts/install.sh
@ -294,7 +294,7 @@ if [[ "$START_SERVICES" =~ ^[Yy]$ ]]; then
  info "  Database migrations and seeding run automatically on first boot."
  echo ""
-  CORE_SERVICES=("v2-postgres" "redis" "api" "admin")
+  CORE_SERVICES=("v2-postgres" "redis" "api" "admin" "nginx")
  ELAPSED=0
  ALL_HEALTHY=false
--- a/scripts/upgrade.sh
+++ b/scripts/upgrade.sh
@ -359,9 +359,13 @@ trap on_failure EXIT
 acquire_lock
 load_env
-# Determine branch
+# Determine branch (source mode only — release installs have no git)
 if [[ -z "$BRANCH" ]]; then
-  BRANCH="$(git rev-parse --abbrev-ref HEAD)"
+  if [[ "$INSTALL_MODE" == "release" ]]; then
    BRANCH="release"
  else
    BRANCH="$(git rev-parse --abbrev-ref HEAD)"
  fi
 fi
 # =============================================================================
@ -461,13 +465,15 @@ else
  exit 1
 fi
-# Remote reachable
+# Remote reachable (source mode only — release mode pulls from Gitea API later)
-info "Checking git remote..."
+if [[ "$INSTALL_MODE" == "source" ]]; then
-if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
+  info "Checking git remote..."
-  success "Git remote reachable"
+  if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
-else
+    success "Git remote reachable"
-  error "Cannot reach git remote. Check your network or remote configuration."
+  else
-  exit 1
+    error "Cannot reach git remote. Check your network or remote configuration."
    exit 1
  fi
 fi
 # Working directory checks
@ -490,9 +496,16 @@ fi
 success "Disk space: ${AVAILABLE_MB}MB available"
 # Record pre-upgrade state
-PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
+if [[ "$INSTALL_MODE" == "source" ]]; then
-PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
+  PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
-info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
+  PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
  info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
 else
  # Release mode: derive "commit" from VERSION file (format: <tag>\n<sha>)
  PRE_UPGRADE_COMMIT="$(head -2 "$PROJECT_DIR/VERSION" 2>/dev/null | tail -1 || echo "release")"
  PRE_UPGRADE_SHORT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
  info "Current version: $PRE_UPGRADE_SHORT"
 fi
 info "Target branch: $BRANCH"
 # Record running containers (for restoring monitoring profile later)
@ -502,31 +515,36 @@ if docker ps --format '{{.Names}}' | grep -q 'prometheus-changemaker'; then
  info "Monitoring stack detected (will restart after upgrade)"
 fi
-# Warn about uncommitted changes in project-owned paths
+# Source-mode-only checks: dirty files + upstream commit comparison
-PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
+if [[ "$INSTALL_MODE" == "source" ]]; then
-DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
+  # Warn about uncommitted changes in project-owned paths
-if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
+  PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
-  warn "Uncommitted changes in project-owned files:"
+  DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
-  echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo "    $f"; done
+  if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
-  if [[ "$FORCE" != "true" ]]; then
+    warn "Uncommitted changes in project-owned files:"
-    error "Commit or stash these changes first, or use --force to continue."
+    echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo "    $f"; done
-    exit 1
+    if [[ "$FORCE" != "true" ]]; then
      error "Commit or stash these changes first, or use --force to continue."
      exit 1
    fi
    warn "Continuing with --force (changes will be stashed)"
  fi
  warn "Continuing with --force (changes will be stashed)"
 fi
-# Check for available updates
+  # Check for available updates
-LOCAL_HEAD="$(git rev-parse HEAD)"
+  LOCAL_HEAD="$(git rev-parse HEAD)"
-REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
+  REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
-if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
+  if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
-  info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
+    info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
-  if [[ "$FORCE" != "true" ]]; then
+    if [[ "$FORCE" != "true" ]]; then
-    success "Nothing to upgrade."
+      success "Nothing to upgrade."
-    release_lock
+      release_lock
-    exit 0
+      exit 0
    fi
    warn "Continuing with --force despite no upstream changes."
  fi
  warn "Continuing with --force despite no upstream changes."
 fi
 # Release mode: the upstream-version comparison happens later in the
 # release-mode block (line ~597) which queries the Gitea Releases API.
 # =============================================================================
 # Phase 2: Backup
@ -669,100 +687,105 @@ elif [[ "$DRY_RUN" == "true" ]]; then
  exit 0
 fi
-# Step 0: Save user-modifiable paths before any git operations
+# Source-mode git pull flow. Release mode handles its update via tarball
-save_user_paths
+# download in the block above and skips this entire section.
 if [[ "$INSTALL_MODE" == "source" ]]; then
  # Step 0: Save user-modifiable paths before any git operations
  save_user_paths
-# Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
+  # Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
-SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
+  SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
-if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
+  if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
-  info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
+    info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
-  echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
+    echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
-  success "Skip-worktree flags cleared"
+    success "Skip-worktree flags cleared"
 fi
 # Step 0c: Fix Docker-owned directories that block git checkout
 for owned_dir in api/upgrade api/uploads api/configs; do
  if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
    info "Fixing permissions on $owned_dir..."
    docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
  fi
 done
-# Step 1: Stash user changes if any exist
+  # Step 0c: Fix Docker-owned directories that block git checkout
-HAS_CHANGES=false
+  for owned_dir in api/upgrade api/uploads api/configs; do
-if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
+    if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
-  HAS_CHANGES=true
+      info "Fixing permissions on $owned_dir..."
-  STASH_NAME="upgrade-${TIMESTAMP}"
+      docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
-  info "Stashing local changes as '$STASH_NAME'..."
+    fi
-  git stash push --include-untracked -m "$STASH_NAME"
+  done
  success "Local changes stashed"
 fi
-# Step 3: Pull updates
+  # Step 1: Stash user changes if any exist
-info "Pulling updates from origin/$BRANCH..."
+  HAS_CHANGES=false
-if ! git pull origin "$BRANCH" --no-edit 2>&1; then
+  if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
-  error "git pull failed. This may indicate upstream force-push or branch issues."
+    HAS_CHANGES=true
    STASH_NAME="upgrade-${TIMESTAMP}"
    info "Stashing local changes as '$STASH_NAME'..."
    git stash push --include-untracked -m "$STASH_NAME"
    success "Local changes stashed"
  fi
  # Step 3: Pull updates
  info "Pulling updates from origin/$BRANCH..."
  if ! git pull origin "$BRANCH" --no-edit 2>&1; then
    error "git pull failed. This may indicate upstream force-push or branch issues."
    if [[ "$HAS_CHANGES" == "true" ]]; then
      warn "Your stashed changes can be recovered with: git stash pop"
    fi
    exit 1
  fi
  POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
  success "Updated to $POST_PULL_COMMIT"
  # Step 4: Pop stash and handle conflicts
  if [[ "$HAS_CHANGES" == "true" ]]; then
-    warn "Your stashed changes can be recovered with: git stash pop"
+    info "Restoring local changes..."
-  fi
+    if git stash pop 2>&1; then
-  exit 1
+      success "Local changes restored cleanly"
-fi
+    else
      warn "Merge conflicts detected during stash pop"
-POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
+      # Auto-resolve user-modifiable paths by keeping user's version
-success "Updated to $POST_PULL_COMMIT"
+      RESOLVED_COUNT=0
      for user_path in "${USER_PATHS[@]}"; do
        CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
        if [[ -n "$CONFLICTED" ]]; then
          while IFS= read -r cf; do
            info "  Auto-resolving (keeping yours): $cf"
            git checkout --theirs "$cf" 2>/dev/null || true
            git add "$cf"
            RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
          done < <(echo "$CONFLICTED")
        fi
      done
-# Step 4: Pop stash and handle conflicts
+      # Check if any conflicts remain in project-owned files
-if [[ "$HAS_CHANGES" == "true" ]]; then
+      REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
-  info "Restoring local changes..."
+      if [[ -n "$REMAINING_CONFLICTS" ]]; then
-  if git stash pop 2>&1; then
+        error "Unresolved conflicts in project-owned files:"
-    success "Local changes restored cleanly"
+        echo "$REMAINING_CONFLICTS" | while read -r f; do echo "    $f"; done
-  else
+        echo ""
-    warn "Merge conflicts detected during stash pop"
+        error "These files have upstream changes that conflict with your edits."
-
+        error "Resolve manually, then run the upgrade again."
-    # Auto-resolve user-modifiable paths by keeping user's version
+        info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
-    RESOLVED_COUNT=0
+        info "To abort: git merge --abort  OR  git checkout $PRE_UPGRADE_COMMIT"
-    for user_path in "${USER_PATHS[@]}"; do
+        exit 1
      CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
      if [[ -n "$CONFLICTED" ]]; then
        while IFS= read -r cf; do
          info "  Auto-resolving (keeping yours): $cf"
          git checkout --theirs "$cf" 2>/dev/null || true
          git add "$cf"
          RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
        done < <(echo "$CONFLICTED")
      fi
    done
-    # Check if any conflicts remain in project-owned files
+      if [[ $RESOLVED_COUNT -gt 0 ]]; then
-    REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
+        success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
-    if [[ -n "$REMAINING_CONFLICTS" ]]; then
+      fi
      error "Unresolved conflicts in project-owned files:"
      echo "$REMAINING_CONFLICTS" | while read -r f; do echo "    $f"; done
      echo ""
      error "These files have upstream changes that conflict with your edits."
      error "Resolve manually, then run the upgrade again."
      info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
      info "To abort: git merge --abort  OR  git checkout $PRE_UPGRADE_COMMIT"
      exit 1
    fi
    if [[ $RESOLVED_COUNT -gt 0 ]]; then
      success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
    fi
  fi
 fi
-# Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
+  # Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
-restore_user_paths
+  restore_user_paths
-# Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
+  # Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
-# (can happen when save_user_paths can't read root-owned files in user paths)
+  # (can happen when save_user_paths can't read root-owned files in user paths)
-DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
+  DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
-if [[ -n "$DELETED_TRACKED" ]]; then
+  if [[ -n "$DELETED_TRACKED" ]]; then
-  info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
+    info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
-  echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
+    echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
-  success "Tracked files restored from HEAD"
+    success "Tracked files restored from HEAD"
  fi
 fi
 # End of source-mode git pull flow
 # Step 5: Detect new env vars
 info "Checking for new environment variables..."
@ -791,24 +814,30 @@ if [[ -f "$PROJECT_DIR/.env.example" ]] && [[ -f "$PROJECT_DIR/.env" ]]; then
  fi
 fi
-# Step 6: Print update summary
+# Step 6: Print update summary (source mode only — release mode has no commit range)
-COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
+COMMIT_COUNT=0
-COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs)"
+if [[ "$INSTALL_MODE" == "source" ]]; then
-echo ""
+  COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
-info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
+  # Use || true and check pipefail-safe to survive git failures
-git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20
+  COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs || echo 0)"
 if [[ "$COMMIT_COUNT" -gt 20 ]]; then
  info "  ... and $((COMMIT_COUNT - 20)) more"
 fi
 # Flag commits that may require manual attention
 BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
 if [[ -n "$BREAKING_COMMITS" ]]; then
  echo ""
-  warn "Commits requiring manual attention:"
+  info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
-  echo "$BREAKING_COMMITS" | while read -r line; do
+  git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20 || true
-    echo -e "    ${YELLOW}$line${NC}"
+  if [[ "$COMMIT_COUNT" -gt 20 ]]; then
-  done
+    info "  ... and $((COMMIT_COUNT - 20)) more"
  fi
  # Flag commits that may require manual attention
  BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
  if [[ -n "$BREAKING_COMMITS" ]]; then
    echo ""
    warn "Commits requiring manual attention:"
    echo "$BREAKING_COMMITS" | while read -r line; do
      echo -e "    ${YELLOW}$line${NC}"
    done
  fi
 else
  info "Update summary: ${PRE_UPGRADE_SHORT} → release"
 fi
 # =============================================================================
@ -1135,7 +1164,10 @@ verify_service_health() {
  done
  warn "$name: not responding after ${max_wait}s"
  VERIFY_FAILED=true
-  return 1
+  # Always return 0 — under set -e a non-zero return from this helper would
  # exit the script before write_result runs. The VERIFY_FAILED flag is the
  # signal the caller actually checks.
  return 0
 }
 # API health (with polling — may still be running migrations)
@ -1194,7 +1226,11 @@ fi
 # =============================================================================
 ELAPSED="$(elapsed)"
-FINAL_COMMIT="$(git rev-parse --short HEAD)"
+if [[ "$INSTALL_MODE" == "source" ]]; then
  FINAL_COMMIT="$(git rev-parse --short HEAD)"
 else
  FINAL_COMMIT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
 fi
 # Collect warnings for API mode result
 UPGRADE_WARNINGS="[]"
@ -1211,7 +1247,11 @@ echo -e "${BOLD}${GREEN}  Upgrade Complete${NC}"
 echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
 echo ""
 echo -e "  ${BOLD}Previous:${NC}  $PRE_UPGRADE_SHORT"
-echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT ($(git log -1 --format='%s' HEAD))"
+if [[ "$INSTALL_MODE" == "source" ]]; then
  echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT ($(git log -1 --format='%s' HEAD 2>/dev/null || echo "$FINAL_COMMIT"))"
 else
  echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT"
 fi
 echo -e "  ${BOLD}Commits:${NC}   $COMMIT_COUNT"
 echo -e "  ${BOLD}Duration:${NC}  $ELAPSED"
 echo -e "  ${BOLD}Log:${NC}       $LOG_FILE"
		`@ -0,0 +1,2 @@`
							`-- AlterTable`
							`ALTER TABLE "instances" ADD COLUMN "pangolin_subdomain_prefix" TEXT;`
		`@ -0,0 +1,2 @@`
							`-- AlterEnum`
							`ALTER TYPE "AuditAction" ADD VALUE 'PANGOLIN_TEARDOWN';`