CCP restore/tunnel/upgrade + upgrade.sh release-mode fixes + volunteer dashboard polish

- Add instance restore model, routes, and agent backup/restore endpoints - Add Pangolin tunnel service (subdomain prefix, teardown action, CCP client) - Add slug mutex for concurrent operation safety in agent - Expand upgrade service with remote driver orchestration - Fix upgrade.sh to properly handle release-mode installs (no git operations) - Add CCP registration flags to config.sh (--ccp-url, --ccp-invite-code, --ccp-agent-url) - Auto-detect JVB advertise IP in non-interactive mode - Polish volunteer dashboard ActionStepsList with highlighted step component - Add ticketed event description field + volunteer dashboard query refinements Bunker Admin
2026-04-12 11:09:46 -06:00 · 2026-04-12 11:09:46 -06:00 · 26ec925d9b
commit 26ec925d9b
parent 29d1f3998a
35 changed files with 4191 additions and 329 deletions
--- a/admin/src/components/volunteer/dashboard/ActionStepsList.tsx
+++ b/admin/src/components/volunteer/dashboard/ActionStepsList.tsx
@ -10,6 +10,8 @@ import {
  LinkOutlined,
  CheckSquareOutlined,
  CheckCircleFilled,
+  RightOutlined,
+  ThunderboltOutlined,
 } from '@ant-design/icons';
 import { useNavigate } from 'react-router-dom';
 import { api } from '@/lib/api';
@ -66,6 +68,97 @@ function resolveStepLink(step: DashboardActionStep): { to: string; external: boo
  }
 }

+function HighlightedStep({
+  step,
+  onNavigate,
+  onSelfReport,
+  loading,
+}: {
+  step: DashboardActionStep;
+  onNavigate: (step: DashboardActionStep) => void;
+  onSelfReport: (step: DashboardActionStep) => void;
+  loading: boolean;
+}) {
+  const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
+  const canNavigate = resolveStepLink(step) !== null;
+
+  return (
+    <div
+      style={{
+        background: 'linear-gradient(135deg, rgba(52,152,219,0.25) 0%, rgba(41,128,185,0.15) 100%)',
+        border: '1px solid rgba(52,152,219,0.3)',
+        borderRadius: 8,
+        padding: '16px 20px',
+        margin: '0 0 2px',
+      }}
+    >
+      <div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 8 }}>
+        <ThunderboltOutlined style={{ fontSize: 12, color: '#3498db' }} />
+        <Typography.Text strong style={{ fontSize: 12, color: '#3498db', textTransform: 'uppercase', letterSpacing: 0.5 }}>
+          Next Up
+        </Typography.Text>
+      </div>
+      <div style={{ display: 'flex', alignItems: 'center', gap: 10, marginBottom: 8 }}>
+        <div
+          style={{
+            width: 32,
+            height: 32,
+            borderRadius: '50%',
+            background: 'rgba(52,152,219,0.25)',
+            display: 'flex',
+            alignItems: 'center',
+            justifyContent: 'center',
+            fontSize: 16,
+            color: '#3498db',
+            flexShrink: 0,
+          }}
+        >
+          {KIND_ICONS[step.kind]}
+        </div>
+        <div style={{ flex: 1, minWidth: 0 }}>
+          <Typography.Text strong style={{ fontSize: 15, display: 'block' }}>
+            {step.label}
+          </Typography.Text>
+          {step.description && (
+            <Typography.Text type="secondary" style={{ fontSize: 12, display: 'block', marginTop: 2 }}>
+              {step.description}
+            </Typography.Text>
+          )}
+        </div>
+      </div>
+      <div style={{ display: 'flex', gap: 8, marginTop: 4 }}>
+        {isSelfReport ? (
+          <>
+            {canNavigate && (
+              <Button size="middle" onClick={() => onNavigate(step)} icon={<RightOutlined />}>
+                Open
+              </Button>
+            )}
+            <Button
+              type="primary"
+              size="middle"
+              loading={loading}
+              onClick={() => onSelfReport(step)}
+            >
+              Mark as done
+            </Button>
+          </>
+        ) : (
+          <Button
+            type="primary"
+            size="middle"
+            icon={<RightOutlined />}
+            onClick={() => onNavigate(step)}
+            disabled={!canNavigate}
+          >
+            Take Action
+          </Button>
+        )}
+      </div>
+    </div>
+  );
+}
+
 export default function ActionStepsList({ campaign, onRefresh }: ActionStepsListProps) {
  const navigate = useNavigate();
  const { message } = App.useApp();
@ -95,6 +188,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
  };

  const sortedSteps = [...campaign.steps].sort((a, b) => a.order - b.order);
+  const highlightedStep = sortedSteps.find((s) => !s.completed);
+  const remainingSteps = sortedSteps.filter((s) => s.id !== highlightedStep?.id);

  return (
    <Card
@ -108,7 +203,18 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
        </Typography.Text>
      }
    >
-      {sortedSteps.map((step, i) => {
+      {highlightedStep && (
+        <div style={{ padding: '12px 12px 0' }}>
+          <HighlightedStep
+            step={highlightedStep}
+            onNavigate={handleNavigate}
+            onSelfReport={handleSelfReport}
+            loading={completingStepId === highlightedStep.id}
+          />
+        </div>
+      )}
+
+      {remainingSteps.map((step, i) => {
        const isSelfReport = step.kind === 'CUSTOM' || step.kind === 'VISIT_LINK';
        const canNavigate = resolveStepLink(step) !== null;

@ -119,8 +225,8 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
              display: 'flex',
              alignItems: 'center',
              justifyContent: 'space-between',
-              padding: '12px 20px',
-              borderTop: i > 0 ? '1px solid rgba(255,255,255,0.04)' : undefined,
+              padding: '10px 20px',
+              borderTop: (highlightedStep || i > 0) ? '1px solid rgba(255,255,255,0.04)' : undefined,
              opacity: step.completed ? 0.55 : 1,
              gap: 12,
            }}
@ -128,22 +234,22 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList
            <div style={{ display: 'flex', alignItems: 'center', gap: 10, flex: 1, minWidth: 0 }}>
              <div
                style={{
-                  width: 26,
-                  height: 26,
+                  width: 24,
+                  height: 24,
                  borderRadius: '50%',
-                  background: step.completed ? '#52c41a' : 'rgba(52,152,219,0.15)',
+                  background: step.completed ? '#52c41a' : 'rgba(255,255,255,0.06)',
                  display: 'flex',
                  alignItems: 'center',
                  justifyContent: 'center',
-                  fontSize: 13,
+                  fontSize: 12,
                  flexShrink: 0,
-                  color: step.completed ? '#fff' : 'rgba(255,255,255,0.7)',
+                  color: step.completed ? '#fff' : 'rgba(255,255,255,0.5)',
                }}
              >
                {step.completed ? <CheckCircleFilled /> : KIND_ICONS[step.kind]}
              </div>
              <div style={{ minWidth: 0 }}>
-                <Typography.Text strong style={{ fontSize: 12, color: 'rgba(255,255,255,0.45)', display: 'block' }}>
+                <Typography.Text strong style={{ fontSize: 11, color: 'rgba(255,255,255,0.35)', display: 'block', lineHeight: 1 }}>
                  {KIND_LABELS[step.kind]}
                </Typography.Text>
                <Typography.Text
@ -163,7 +269,7 @@ export default function ActionStepsList({ campaign, onRefresh }: ActionStepsList

            <div style={{ flexShrink: 0 }}>
              {step.completed ? (
-                <Tag color="success" style={{ margin: 0 }}>Done</Tag>
+                <Tag color="success" style={{ margin: 0, fontSize: 11 }}>Done</Tag>
              ) : isSelfReport ? (
                <Space size={4}>
                  {canNavigate && (
--- a/admin/src/pages/events/TicketedEventsPage.tsx
+++ b/admin/src/pages/events/TicketedEventsPage.tsx
@ -7,7 +7,7 @@ import {
 import {
  PlusOutlined, SearchOutlined, EditOutlined, EyeOutlined, DeleteOutlined,
  CheckCircleOutlined, CloseCircleOutlined, CopyOutlined, ScanOutlined,
-  TagOutlined, VideoCameraOutlined, EnvironmentOutlined,
+  TagOutlined, VideoCameraOutlined, EnvironmentOutlined, StarOutlined, StarFilled,
 } from '@ant-design/icons';
 import { api } from '@/lib/api';
 import dayjs from 'dayjs';
@ -45,6 +45,7 @@ interface TicketedEvent {
  currentAttendees: number;
  coverImageUrl: string | null;
  organizerName: string | null;
+  featured: boolean;
  ticketTiers: TicketTier[];
  _count: { tickets: number; checkIns: number };
  createdAt: string;
@ -198,18 +199,55 @@ export default function TicketedEventsPage() {
    }
  };

+  const handleFeature = async (id: string, featured: boolean) => {
+    try {
+      if (featured) {
+        // Unfeature all others first (exclusive toggle)
+        const othersToUnfeature = events.filter((e) => e.featured && e.id !== id);
+        await Promise.all(
+          othersToUnfeature.map((e) => api.put(`/api/ticketed-events/admin/${e.id}`, { featured: false }))
+        );
+      }
+      await api.put(`/api/ticketed-events/admin/${id}`, { featured });
+      message.success(featured ? 'Event featured on volunteer dashboard' : 'Event unfeatured');
+      fetchEvents();
+    } catch {
+      message.error('Failed to update featured status');
+    }
+  };
+
  const copyLink = (slug: string) => {
    navigator.clipboard.writeText(`${window.location.origin}/event/${slug}`);
    message.success('Link copied');
  };

  const columns = [
+    {
+      title: '',
+      key: 'featured',
+      width: 36,
+      render: (_: unknown, record: TicketedEvent) => (
+        <Tooltip title={record.featured ? 'Remove from volunteer dashboard' : 'Feature on volunteer dashboard'}>
+          <Button
+            type="text"
+            size="small"
+            icon={record.featured
+              ? <StarFilled style={{ color: '#faad14' }} />
+              : <StarOutlined style={{ color: 'rgba(255,255,255,0.25)' }} />}
+            onClick={(e) => { e.stopPropagation(); handleFeature(record.id, !record.featured); }}
+          />
+        </Tooltip>
+      ),
+    },
    {
      title: 'Title',
      dataIndex: 'title',
      key: 'title',
      render: (text: string, record: TicketedEvent) => (
-        <a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
+        <Space>
+          <a onClick={() => navigate(`/app/events/${record.id}`)}>{text}</a>
+          {record.featured && <Tag color="gold" style={{ fontSize: 11 }}>Featured</Tag>}
+        </Space>
      ),
    },
    {
--- a/api/src/modules/ticketed-events/ticketed-events.schemas.ts
+++ b/api/src/modules/ticketed-events/ticketed-events.schemas.ts
@ -50,6 +50,7 @@ export const updateEventSchema = z.object({
  maxAttendees: z.number().int().positive().nullable().optional(),
  organizerName: z.string().max(200).nullable().optional(),
  organizerEmail: z.string().email().nullable().optional(),
+  featured: z.boolean().optional(),
 });

 export const createTierSchema = z.object({
--- a/api/src/modules/volunteer-dashboard/volunteer-dashboard.service.ts
+++ b/api/src/modules/volunteer-dashboard/volunteer-dashboard.service.ts
@ -114,24 +114,31 @@ async function getReferral(userId: string): Promise<DashboardReferral> {
 async function getFeaturedEvent(): Promise<DashboardFeaturedEvent | null> {
  const today = new Date();
  today.setHours(0, 0, 0, 0);
-  const event = await prisma.ticketedEvent.findFirst({
-    where: {
-      featured: true,
-      status: TicketedEventStatus.PUBLISHED,
-      date: { gte: today },
-    },
-    orderBy: { date: 'asc' },
-    select: {
-      slug: true,
-      title: true,
-      date: true,
-      startTime: true,
-      venueName: true,
-      coverImageUrl: true,
-      currentAttendees: true,
-      maxAttendees: true,
-    },
-  });
+  const eventSelect = {
+    slug: true,
+    title: true,
+    date: true,
+    startTime: true,
+    venueName: true,
+    coverImageUrl: true,
+    currentAttendees: true,
+    maxAttendees: true,
+  } as const;
+  const baseWhere = { status: TicketedEventStatus.PUBLISHED, date: { gte: today } };
+
+  // Prefer admin-featured event; fall back to next upcoming published event
+  const event =
+    await prisma.ticketedEvent.findFirst({
+      where: { ...baseWhere, featured: true },
+      orderBy: { date: 'asc' },
+      select: eventSelect,
+    }) ??
+    await prisma.ticketedEvent.findFirst({
+      where: baseWhere,
+      orderBy: { date: 'asc' },
+      select: eventSelect,
+    });
+
  if (!event) return null;
  return {
    slug: event.slug,
--- a/changemaker-control-panel/admin/src/pages/AgentRegistrationsPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/AgentRegistrationsPage.tsx
@ -14,7 +14,7 @@ export default function AgentRegistrationsPage() {
  const fetchRegistrations = useCallback(async () => {
    try {
      setLoading(true);
-      const { data } = await api.get('/api/agents/registrations');
+      const { data } = await api.get('/agents/registrations');
      setRegistrations(data);
    } catch {
      message.error('Failed to load registrations');
@ -27,7 +27,7 @@ export default function AgentRegistrationsPage() {

  const handleApprove = async (id: string) => {
    try {
-      await api.post(`/api/agents/registrations/${id}/approve`);
+      await api.post(`/agents/registrations/${id}/approve`);
      message.success('Registration approved — agent will receive certificates on next poll');
      fetchRegistrations();
      setDetailModal(null);
@ -39,7 +39,7 @@ export default function AgentRegistrationsPage() {

  const handleReject = async (id: string) => {
    try {
-      await api.post(`/api/agents/registrations/${id}/reject`);
+      await api.post(`/agents/registrations/${id}/reject`);
      message.success('Registration rejected');
      fetchRegistrations();
      setDetailModal(null);
--- a/changemaker-control-panel/admin/src/pages/BackupsPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/BackupsPage.tsx
@ -203,8 +203,16 @@ export default function BackupsPage() {
          {
            title: 'Instance',
            dataIndex: 'instance',
-            width: 160,
-            render: (inst: BackupRow['instance']) => inst?.name || '-',
+            width: 180,
+            render: (inst: BackupRow['instance'], record: BackupRow) => {
+              const isRemote = record.manifest?.source === 'remote';
+              return (
+                <Space size={4}>
+                  <span>{inst?.name || '-'}</span>
+                  {isRemote && <Tag color="blue">remote</Tag>}
+                </Space>
+              );
+            },
          },
          {
            title: 'Status',
--- a/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/InstanceDetailPage.tsx
@ -44,6 +44,7 @@ import {
  WarningOutlined,
  CloseCircleOutlined,
  InfoCircleOutlined,
+  UndoOutlined,
 } from '@ant-design/icons';
 import dayjs from 'dayjs';
 import { useNavigate, useParams } from 'react-router-dom';
@ -89,6 +90,16 @@ export default function InstanceDetailPage() {
  const [backupsLoading, setBackupsLoading] = useState(false);
  const [creatingBackup, setCreatingBackup] = useState(false);

+  // Restore state
+  const [restoreModal, setRestoreModal] = useState<{ backup: Backup; typedSlug: string } | null>(null);
+  const [restoring, setRestoring] = useState(false);
+  const [activeRestoreId, setActiveRestoreId] = useState<string | null>(null);
+  const [activeRestoreState, setActiveRestoreState] = useState<{
+    status: string;
+    logTail?: string | null;
+    errorMessage?: string | null;
+  } | null>(null);
+
  // Feature reconfiguration state
  const [featureFlags, setFeatureFlags] = useState<Record<string, boolean>>({});
  const [reconfiguring, setReconfiguring] = useState(false);
@ -109,6 +120,18 @@ export default function InstanceDetailPage() {
  const [tunnelSaving, setTunnelSaving] = useState(false);
  const [tunnelRemoving, setTunnelRemoving] = useState(false);

+  // Remote tunnel state (Pangolin API managed by CCP)
+  const [tunnelStatus, setTunnelStatus] = useState<{
+    configured: boolean;
+    online?: boolean;
+    siteId?: string;
+    endpoint?: string;
+    resources?: Array<{ subdomain: string; name: string; resourceId: string; hasTarget: boolean; targetIp?: string; targetPort?: number }>;
+  } | null>(null);
+  const [tunnelStatusLoading, setTunnelStatusLoading] = useState(false);
+  const [tunnelSetupRunning, setTunnelSetupRunning] = useState(false);
+  const [tunnelSyncing, setTunnelSyncing] = useState(false);
+
  // Upgrade state
  const [updateStatus, setUpdateStatus] = useState<UpdateStatus | null>(null);
  const [checkingUpdate, setCheckingUpdate] = useState(false);
@ -390,6 +413,64 @@ export default function InstanceDetailPage() {
    window.open(`/api/backups/${backupId}/download`, '_blank');
  };

+  const handleRestoreConfirm = async () => {
+    if (!restoreModal) return;
+    if (restoreModal.typedSlug !== instance?.slug) {
+      message.error('Typed slug does not match — restore cancelled');
+      return;
+    }
+    setRestoring(true);
+    try {
+      const { data } = await api.post(`/instances/${id}/restore`, {
+        backupId: restoreModal.backup.id,
+      });
+      const restoreId = data.data.id as string;
+      setActiveRestoreId(restoreId);
+      setActiveRestoreState({ status: 'PENDING' });
+      setRestoreModal(null);
+      message.success('Restore started — polling for progress');
+    } catch (err: unknown) {
+      const e = err as { response?: { data?: { error?: { message?: string } } } };
+      message.error(e?.response?.data?.error?.message || 'Failed to start restore');
+    } finally {
+      setRestoring(false);
+    }
+  };
+
+  // Poll the active restore's status every 3s until it completes or fails
+  useEffect(() => {
+    if (!activeRestoreId) return;
+    let cancelled = false;
+    const poll = async () => {
+      try {
+        const { data } = await api.get(`/instances/${id}/restores/${activeRestoreId}`);
+        if (cancelled) return;
+        const row = data.data;
+        setActiveRestoreState({
+          status: row.status,
+          logTail: row.logTail,
+          errorMessage: row.errorMessage,
+        });
+        if (row.status === 'COMPLETED') {
+          message.success('Restore completed successfully');
+          setActiveRestoreId(null);
+          fetchBackups();
+        } else if (row.status === 'FAILED') {
+          message.error(`Restore failed: ${row.errorMessage || 'unknown error'}`);
+          setActiveRestoreId(null);
+        }
+      } catch {
+        // keep trying; transient errors are expected during remote restart
+      }
+    };
+    poll();
+    const handle = setInterval(poll, 3000);
+    return () => {
+      cancelled = true;
+      clearInterval(handle);
+    };
+  }, [activeRestoreId, id, fetchBackups]);
+
  // Initialize feature flags and tunnel form when instance loads
  useEffect(() => {
    if (instance) {
@ -508,6 +589,11 @@ export default function InstanceDetailPage() {
  const ports = instance.portConfig as Record<string, number>;
  const isProvisioning = instance.status === 'PROVISIONING';
  const isRegistered = instance.isRegistered;
+  const isRemote = instance.isRemote;
+  // A "managed" instance is one CCP can run backup/restore/upgrade on.
+  // Local CCP-managed and remote (agent-backed) both qualify; only locally-
+  // adopted registered instances (isRegistered && !isRemote) are unmanaged.
+  const isManaged = !isRegistered || isRemote;
  const canStart = instance.status === 'STOPPED' || instance.status === 'ERROR';
  const canStop = instance.status === 'RUNNING' || instance.status === 'ERROR';
  const canRestart = instance.status === 'RUNNING';
@ -731,7 +817,7 @@ export default function InstanceDetailPage() {

  const backupsTab = (
    <div>
-      {isRegistered && (
+      {!isManaged && (
        <Alert
          message="Backups not managed by CCP"
          description="This instance was deployed outside the control panel. Use its own backup tools to manage backups."
@ -740,6 +826,15 @@ export default function InstanceDetailPage() {
          style={{ marginBottom: 16 }}
        />
      )}
+      {isRemote && (
+        <Alert
+          message="Remote instance"
+          description="Backups and restores run via the remote agent over mTLS. Create Backup triggers scripts/backup.sh on the remote host and streams the archive back to the control panel."
+          type="info"
+          showIcon
+          style={{ marginBottom: 16 }}
+        />
+      )}
      <div style={{ marginBottom: 12, display: 'flex', justifyContent: 'space-between' }}>
        <Typography.Text type="secondary">
          {backups.length} backup{backups.length !== 1 ? 's' : ''}
@ -749,7 +844,7 @@ export default function InstanceDetailPage() {
          type="primary"
          onClick={handleCreateBackup}
          loading={creatingBackup}
-          disabled={instance.status !== 'RUNNING' || isRegistered}
+          disabled={instance.status !== 'RUNNING' || !isManaged}
        >
          Create Backup
        </Button>
@ -784,20 +879,36 @@ export default function InstanceDetailPage() {
            {
              title: 'Size',
              dataIndex: 'sizeBytes',
-              render: (b: number | null) => (b ? `${(b / 1024 / 1024).toFixed(1)} MB` : '-'),
+              render: (b: number | string | null) => {
+                if (b == null) return '-';
+                const n = typeof b === 'string' ? parseInt(b, 10) : b;
+                return `${(n / 1024 / 1024).toFixed(1)} MB`;
+              },
            },
            {
              title: 'Actions',
-              width: 120,
+              width: 160,
              render: (_: unknown, record: Backup) => (
                <Space size="small">
                  {record.status === 'COMPLETED' && (
-                    <Button
-                      icon={<CloudDownloadOutlined />}
-                      size="small"
-                      type="text"
-                      onClick={() => handleDownloadBackup(record.id)}
-                    />
+                    <>
+                      <Button
+                        icon={<CloudDownloadOutlined />}
+                        size="small"
+                        type="text"
+                        title="Download archive"
+                        onClick={() => handleDownloadBackup(record.id)}
+                      />
+                      {isManaged && (
+                        <Button
+                          icon={<UndoOutlined />}
+                          size="small"
+                          type="text"
+                          title="Restore this backup (destructive)"
+                          onClick={() => setRestoreModal({ backup: record, typedSlug: '' })}
+                        />
+                      )}
+                    </>
                  )}
                  <Popconfirm
                    title="Delete this backup?"
@ -1049,7 +1160,73 @@ export default function InstanceDetailPage() {
  );

  const tunnelConfigured = !!(instance.pangolinEndpoint && instance.pangolinNewtId);
-  const canConfigureTunnel = !isRegistered && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
+  const canConfigureTunnel = isManaged && (instance.status === 'RUNNING' || instance.status === 'STOPPED');
+
+  // Fetch tunnel status for remote instances
+  const fetchTunnelStatus = useCallback(async () => {
+    if (!isRemote) return;
+    setTunnelStatusLoading(true);
+    try {
+      const { data } = await api.get(`/instances/${id}/tunnel/status`);
+      setTunnelStatus(data.data);
+    } catch {
+      setTunnelStatus(null);
+    } finally {
+      setTunnelStatusLoading(false);
+    }
+  }, [id, isRemote]);
+
+  useEffect(() => {
+    if (activeTab === 'tunnel' && isRemote) {
+      fetchTunnelStatus();
+    }
+  }, [activeTab, isRemote, fetchTunnelStatus]);
+
+  const handleRemoteTunnelSetup = async (values: { subdomainPrefix?: string }) => {
+    setTunnelSetupRunning(true);
+    try {
+      await api.post(`/instances/${id}/tunnel/setup`, {
+        subdomainPrefix: values.subdomainPrefix || instance.slug,
+      });
+      message.success('Tunnel setup complete — Newt credentials pushed to remote instance');
+      fetchInstance();
+      fetchTunnelStatus();
+    } catch (err: unknown) {
+      const e = err as { response?: { data?: { error?: { message?: string } } } };
+      message.error(e?.response?.data?.error?.message || 'Tunnel setup failed');
+    } finally {
+      setTunnelSetupRunning(false);
+    }
+  };
+
+  const handleTunnelSync = async () => {
+    setTunnelSyncing(true);
+    try {
+      const { data } = await api.post(`/instances/${id}/tunnel/sync`);
+      message.success(`Sync complete — ${data.data.created} new resource(s) created`);
+      fetchTunnelStatus();
+    } catch (err: unknown) {
+      const e = err as { response?: { data?: { error?: { message?: string } } } };
+      message.error(e?.response?.data?.error?.message || 'Sync failed');
+    } finally {
+      setTunnelSyncing(false);
+    }
+  };
+
+  const handleRemoteTunnelTeardown = async () => {
+    setTunnelRemoving(true);
+    try {
+      await api.delete(`/instances/${id}/tunnel`);
+      message.success('Tunnel torn down — Pangolin site deleted');
+      fetchInstance();
+      setTunnelStatus(null);
+    } catch (err: unknown) {
+      const e = err as { response?: { data?: { error?: { message?: string } } } };
+      message.error(e?.response?.data?.error?.message || 'Teardown failed');
+    } finally {
+      setTunnelRemoving(false);
+    }
+  };

  const handleConfigureTunnel = async (values: { pangolinEndpoint: string; pangolinNewtId: string; pangolinNewtSecret?: string }) => {
    setTunnelSaving(true);
@ -1088,9 +1265,111 @@ export default function InstanceDetailPage() {
    }
  };

-  const tunnelTab = (
+  const remoteTunnelTab = (
    <Space direction="vertical" size="large" style={{ width: '100%' }}>
-      {isRegistered && (
+      {tunnelStatus?.configured ? (
+        <>
+          <Alert
+            message={`Tunnel active — ${tunnelStatus.online ? 'online' : 'offline'}`}
+            description={`Connected to ${tunnelStatus.endpoint || instance.pangolinEndpoint} (site: ${tunnelStatus.siteId})`}
+            type={tunnelStatus.online ? 'success' : 'warning'}
+            showIcon
+            icon={<CloudOutlined />}
+          />
+
+          <Card title="Current Configuration" size="small">
+            <Descriptions bordered column={1}>
+              <Descriptions.Item label="Endpoint">
+                <Typography.Text copyable>{tunnelStatus.endpoint || instance.pangolinEndpoint}</Typography.Text>
+              </Descriptions.Item>
+              <Descriptions.Item label="Site ID">
+                <Typography.Text copyable>{tunnelStatus.siteId || instance.pangolinSiteId}</Typography.Text>
+              </Descriptions.Item>
+              <Descriptions.Item label="Newt ID">
+                <Typography.Text copyable>{instance.pangolinNewtId}</Typography.Text>
+              </Descriptions.Item>
+              <Descriptions.Item label="Status">
+                <Tag color={tunnelStatus.online ? 'green' : 'orange'}>{tunnelStatus.online ? 'Online' : 'Offline'}</Tag>
+              </Descriptions.Item>
+            </Descriptions>
+          </Card>
+
+          {tunnelStatus.resources && tunnelStatus.resources.length > 0 && (
+            <Card
+              title="Resources"
+              size="small"
+              extra={
+                <Space>
+                  <Button icon={<SyncOutlined />} size="small" onClick={handleTunnelSync} loading={tunnelSyncing}>
+                    Sync
+                  </Button>
+                  <Button icon={<ReloadOutlined />} size="small" onClick={fetchTunnelStatus} loading={tunnelStatusLoading}>
+                    Refresh
+                  </Button>
+                </Space>
+              }
+            >
+              <Table
+                dataSource={tunnelStatus.resources}
+                rowKey="resourceId"
+                size="small"
+                pagination={false}
+                columns={[
+                  { title: 'Subdomain', dataIndex: 'subdomain', render: (s: string) => s || '(root)' },
+                  { title: 'Name', dataIndex: 'name' },
+                  { title: 'Target', render: (_: unknown, r: { hasTarget: boolean; targetIp?: string; targetPort?: number }) =>
+                    r.hasTarget ? `${r.targetIp}:${r.targetPort}` : <Tag color="red">No target</Tag>
+                  },
+                ]}
+              />
+            </Card>
+          )}
+
+          <Popconfirm
+            title="Tear down tunnel?"
+            description="This will delete the Pangolin site and all resources. The Newt container will be stopped."
+            onConfirm={handleRemoteTunnelTeardown}
+          >
+            <Button danger icon={<DisconnectOutlined />} loading={tunnelRemoving}>
+              Teardown Tunnel
+            </Button>
+          </Popconfirm>
+        </>
+      ) : (
+        <>
+          <Alert
+            message="No tunnel configured"
+            description="The CCP will create a Pangolin site and resources for this instance, push Newt credentials to its .env, and start the tunnel container."
+            type="info"
+            showIcon
+          />
+
+          <Card title="Setup Tunnel" size="small">
+            <Form layout="vertical" onFinish={handleRemoteTunnelSetup}>
+              <Form.Item
+                name="subdomainPrefix"
+                label="Subdomain Prefix"
+                initialValue={instance.slug}
+                extra={`Resources will be created as <prefix>-app.${instance.domain}, <prefix>-api.${instance.domain}, etc.`}
+                rules={[{ required: true }, { pattern: /^[a-z0-9-]+$/, message: 'Lowercase alphanumeric + hyphens only' }]}
+              >
+                <Input placeholder={instance.slug} />
+              </Form.Item>
+              <Form.Item style={{ marginBottom: 0 }}>
+                <Button type="primary" htmlType="submit" icon={<CloudOutlined />} loading={tunnelSetupRunning}>
+                  Setup Tunnel
+                </Button>
+              </Form.Item>
+            </Form>
+          </Card>
+        </>
+      )}
+    </Space>
+  );
+
+  const localTunnelTab = (
+    <Space direction="vertical" size="large" style={{ width: '100%' }}>
+      {!isManaged && (
        <Alert
          message="Tunnel management is not available for external instances"
          description="This instance was deployed outside the control panel. Manage its tunnel configuration directly."
@ -1099,7 +1378,7 @@ export default function InstanceDetailPage() {
        />
      )}

-      {!isRegistered && tunnelConfigured && (
+      {isManaged && tunnelConfigured && (
        <Alert
          message={`Tunnel active — connected to ${instance.pangolinEndpoint}`}
          type="success"
@ -1108,7 +1387,7 @@ export default function InstanceDetailPage() {
        />
      )}

-      {!isRegistered && !tunnelConfigured && (
+      {isManaged && !tunnelConfigured && (
        <Alert
          message="No tunnel configured"
          description="Enter your Pangolin Newt credentials below to enable tunnel access for this instance. You can get these from your Pangolin dashboard."
@ -1133,7 +1412,7 @@ export default function InstanceDetailPage() {
        </Card>
      )}

-      {canConfigureTunnel && (
+      {canConfigureTunnel && !isRemote && (
        <Card title={tunnelConfigured ? 'Update Tunnel' : 'Enable Tunnel'} size="small">
          <Form
            form={tunnelForm}
@ -1200,6 +1479,8 @@ export default function InstanceDetailPage() {
    </Space>
  );

+  const tunnelTab = isRemote ? remoteTunnelTab : localTunnelTab;
+
  // ─── Updates Tab ──────────────────────────────────────────────

  const isUpgrading = currentUpgrade?.status === 'IN_PROGRESS' || currentUpgrade?.status === 'PENDING';
@ -1278,7 +1559,7 @@ export default function InstanceDetailPage() {
      )}

      {/* Upgrade Action */}
-      {!isRegistered && (
+      {isManaged && (
        <Card title="Upgrade" size="small">
          {isUpgrading && currentUpgrade ? (
            <Space direction="vertical" style={{ width: '100%' }}>
@ -1340,7 +1621,7 @@ export default function InstanceDetailPage() {
        </Card>
      )}

-      {isRegistered && (
+      {!isManaged && (
        <Alert
          message="Upgrades are not managed by CCP for external instances"
          description="Run the upgrade script directly on the instance or use its own upgrade mechanism."
@ -1348,6 +1629,14 @@ export default function InstanceDetailPage() {
          showIcon
        />
      )}
+      {isRemote && (
+        <Alert
+          message="Remote instance"
+          description="Upgrades run via the remote agent over mTLS. The agent shells out to scripts/upgrade.sh --api-mode and the control panel polls progress every 2s."
+          type="info"
+          showIcon
+        />
+      )}

      {/* Upgrade History */}
      <Card title="Upgrade History" size="small">
@ -1794,6 +2083,108 @@ export default function InstanceDetailPage() {
          { key: 'tunnel', label: 'Tunnel', children: tunnelTab },
        ]}
      />
+
+      {/* Restore confirmation modal (destructive action guard) */}
+      <Modal
+        title="Restore backup — destructive"
+        open={!!restoreModal}
+        onCancel={() => setRestoreModal(null)}
+        onOk={handleRestoreConfirm}
+        okText="Restore"
+        okButtonProps={{
+          danger: true,
+          loading: restoring,
+          disabled: restoreModal?.typedSlug !== instance.slug,
+        }}
+        cancelButtonProps={{ disabled: restoring }}
+        width={560}
+      >
+        <Alert
+          type="error"
+          showIcon
+          message="This will OVERWRITE the instance's databases and uploads"
+          description="The agent will stop application containers, drop databases, and restore from the selected backup. This cannot be undone without another backup."
+          style={{ marginBottom: 16 }}
+        />
+        {restoreModal && (
+          <Descriptions column={1} size="small" bordered style={{ marginBottom: 16 }}>
+            <Descriptions.Item label="Backup ID">
+              <code>{restoreModal.backup.id.substring(0, 8)}</code>
+            </Descriptions.Item>
+            <Descriptions.Item label="Archive size">
+              {restoreModal.backup.sizeBytes
+                ? `${(Number(restoreModal.backup.sizeBytes) / 1024 / 1024).toFixed(1)} MB`
+                : '-'}
+            </Descriptions.Item>
+            <Descriptions.Item label="Created">
+              {restoreModal.backup.completedAt
+                ? dayjs(restoreModal.backup.completedAt).format('YYYY-MM-DD HH:mm')
+                : '-'}
+            </Descriptions.Item>
+          </Descriptions>
+        )}
+        <Typography.Paragraph>
+          Type the instance slug <strong><code>{instance.slug}</code></strong> to confirm:
+        </Typography.Paragraph>
+        <Input
+          value={restoreModal?.typedSlug || ''}
+          onChange={(e) =>
+            setRestoreModal((cur) => (cur ? { ...cur, typedSlug: e.target.value } : cur))
+          }
+          placeholder={instance.slug}
+          autoFocus
+        />
+      </Modal>
+
+      {/* Active restore progress banner */}
+      {activeRestoreId && activeRestoreState && (
+        <Modal
+          title="Restore in progress"
+          open
+          closable={false}
+          footer={null}
+          width={640}
+        >
+          <Space direction="vertical" style={{ width: '100%' }}>
+            <div>
+              <Tag
+                color={
+                  activeRestoreState.status === 'COMPLETED'
+                    ? 'green'
+                    : activeRestoreState.status === 'FAILED'
+                    ? 'red'
+                    : 'processing'
+                }
+              >
+                {activeRestoreState.status}
+              </Tag>
+              {activeRestoreState.status === 'RUNNING' && (
+                <Typography.Text type="secondary" style={{ marginLeft: 8 }}>
+                  Agent is running scripts/restore.sh — this can take several minutes
+                </Typography.Text>
+              )}
+            </div>
+            {activeRestoreState.errorMessage && (
+              <Alert type="error" message={activeRestoreState.errorMessage} showIcon />
+            )}
+            {activeRestoreState.logTail && (
+              <pre
+                style={{
+                  background: '#1e1e1e',
+                  color: '#d4d4d4',
+                  padding: 12,
+                  maxHeight: 300,
+                  overflow: 'auto',
+                  fontSize: 12,
+                  borderRadius: 4,
+                }}
+              >
+                {activeRestoreState.logTail}
+              </pre>
+            )}
+          </Space>
+        </Modal>
+      )}
    </div>
  );
 }
--- a/changemaker-control-panel/admin/src/pages/InviteCodesPage.tsx
+++ b/changemaker-control-panel/admin/src/pages/InviteCodesPage.tsx
@ -14,7 +14,7 @@ export default function InviteCodesPage() {
  const fetchCodes = useCallback(async () => {
    try {
      setLoading(true);
-      const { data } = await api.get('/api/invite-codes');
+      const { data } = await api.get('/invite-codes');
      setCodes(data.data || []);
    } catch {
      message.error('Failed to load invite codes');
@ -28,7 +28,7 @@ export default function InviteCodesPage() {
  const handleCreate = async () => {
    try {
      setCreating(true);
-      const { data } = await api.post('/api/invite-codes');
+      const { data } = await api.post('/invite-codes');
      message.success(`Invite code created: ${data.code}`);
      fetchCodes();
    } catch {
@ -40,7 +40,7 @@ export default function InviteCodesPage() {

  const handleRevoke = async (id: string) => {
    try {
-      await api.delete(`/api/invite-codes/${id}`);
+      await api.delete(`/invite-codes/${id}`);
      message.success('Invite code revoked');
      fetchCodes();
    } catch {
--- a/changemaker-control-panel/agent/src/config/env.ts
+++ b/changemaker-control-panel/agent/src/config/env.ts
@ -26,6 +26,7 @@ const envSchema = z.object({
  INSTANCE_SLUG: z.string().default(''),
  INSTANCE_DOMAIN: z.string().default(''),
  INSTANCE_BASE_PATH: z.string().default(''),
+  COMPOSE_PROJECT: z.string().default(''),
 });

 function validateEnv() {
--- a/changemaker-control-panel/agent/src/routes/backup.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/backup.routes.ts
@ -1,105 +1,623 @@
 import { Router, Request, Response } from 'express';
 import { param } from '../utils/params';
 import fs from 'fs/promises';
-import path from 'path';
-import { exec as execCb } from 'child_process';
+import { createReadStream, createWriteStream } from 'fs';
+import { pipeline as pipelineCb, Transform } from 'stream';
 import { promisify } from 'util';
-import * as docker from '../services/docker.service';
+import path from 'path';
+import crypto from 'crypto';
+import { spawn } from 'child_process';
 import { getSlugEntry } from '../services/registry.service';
 import { env } from '../config/env';
 import { logger } from '../utils/logger';
+import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';
+import { AgentError } from '../middleware/error-handler';
+
+const pipeline = promisify(pipelineCb);

-const exec = promisify(execCb);
 const router = Router();

-// POST /instance/:slug/backup — Run pg_dump + tar uploads → return backup info
-router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
-  const entry = await getSlugEntry(param(req, 'slug'));
-  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
-  const backupDir = path.join(env.AGENT_DATA_DIR, 'backups', param(req, 'slug'), timestamp);
-  await fs.mkdir(backupDir, { recursive: true });
+// ─── Helpers ──────────────────────────────────────────────────────────

-  const { pgPassword } = req.body;
+const ID_REGEX = /^[a-zA-Z0-9_-]+$/;
+const ARCHIVE_PREFIX = 'changemaker-v2-backup-';
+const ARCHIVE_SUFFIX = '.tar.gz';
+
+function backupsDirFor(slug: string): string {
+  return path.join(env.AGENT_DATA_DIR, 'backups', slug);
+}
+
+function archivePathFor(slug: string, id: string): string {
+  return path.join(backupsDirFor(slug), `${ARCHIVE_PREFIX}${id}${ARCHIVE_SUFFIX}`);
+}
+
+async function sha256File(filePath: string): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const hash = crypto.createHash('sha256');
+    const stream = createReadStream(filePath);
+    stream.on('data', (chunk) => hash.update(chunk));
+    stream.on('end', () => resolve(hash.digest('hex')));
+    stream.on('error', reject);
+  });
+}
+
+/**
+ * Read the manifest.json out of a backup archive without extracting it.
+ * backup.sh stores it at <archive>/changemaker-v2-backup-<ts>/manifest.json
+ */
+async function readManifestFromArchive(archivePath: string): Promise<unknown | null> {
+  return new Promise((resolve) => {
+    const proc = spawn('tar', ['-xzOf', archivePath, '--wildcards', '*/manifest.json'], {
+      stdio: ['ignore', 'pipe', 'ignore'],
+    });
+    let buf = '';
+    proc.stdout.on('data', (chunk) => (buf += chunk.toString('utf-8')));
+    proc.on('error', () => resolve(null));
+    proc.on('close', (code) => {
+      if (code !== 0 || !buf.trim()) return resolve(null);
+      try {
+        resolve(JSON.parse(buf));
+      } catch {
+        resolve(null);
+      }
+    });
+  });
+}
+
+/**
+ * Extract the timestamp ID from a filename like "changemaker-v2-backup-20260409_143000.tar.gz".
+ */
+function idFromFilename(filename: string): string | null {
+  if (!filename.startsWith(ARCHIVE_PREFIX) || !filename.endsWith(ARCHIVE_SUFFIX)) return null;
+  return filename.slice(ARCHIVE_PREFIX.length, filename.length - ARCHIVE_SUFFIX.length);
+}
+
+// ─── Routes ───────────────────────────────────────────────────────────
+
+/**
+ * POST /instance/:slug/backup
+ * Shells out to the remote CML's scripts/backup.sh. Returns archive metadata
+ * so the CCP can immediately stream it down via the /download endpoint.
+ */
+router.post('/instance/:slug/backup', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const entry = await getSlugEntry(slug);

  try {
-    // 1. pg_dump
-    const dumpFile = path.join(backupDir, 'database.sql');
-    const dump = await docker.composeExec(
-      entry.basePath, entry.composeProject,
-      'v2-postgres',
-      'pg_dump -U changemaker -d changemaker',
-      300_000,
-      pgPassword ? { PGPASSWORD: pgPassword } : undefined
-    );
-    await fs.writeFile(dumpFile, dump, 'utf-8');
+    const result = await withSlugLock(slug, 'backup', async () => {
+      const backupsDir = backupsDirFor(slug);
+      await fs.mkdir(backupsDir, { recursive: true });

-    // Gzip the dump
-    await exec(`gzip '${dumpFile}'`, { timeout: 120_000 });
+      // Verify scripts/backup.sh exists
+      const scriptPath = path.join(entry.basePath, 'scripts', 'backup.sh');
+      try {
+        await fs.access(scriptPath);
+      } catch {
+        throw new AgentError(500, `scripts/backup.sh not found at ${scriptPath}`, 'BACKUP_SCRIPT_MISSING');
+      }

-    // 2. Tar uploads if exists
-    const uploadsDir = path.join(entry.basePath, 'uploads');
-    let hasUploads = false;
-    try {
-      await fs.access(uploadsDir);
-      hasUploads = true;
-    } catch { /* no uploads dir */ }
-
-    if (hasUploads) {
-      await exec(
-        `tar -czf '${path.join(backupDir, 'uploads.tar.gz')}' -C '${entry.basePath}' uploads`,
-        { timeout: 300_000 }
+      // Snapshot existing archive filenames so we can identify the new one
+      const existingFiles = new Set(
+        (await fs.readdir(backupsDir)).filter((f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX))
      );
-    }

-    // 3. Create final archive
-    const archiveName = `backup-${param(req, 'slug')}-${timestamp}.tar.gz`;
-    const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
-    await exec(
-      `tar -czf '${archivePath}' -C '${path.dirname(backupDir)}' '${timestamp}'`,
-      { timeout: 300_000 }
-    );
+      const logPath = path.join(backupsDir, `backup-${Date.now()}.log`);
+      const logFd = await fs.open(logPath, 'w');

-    // Clean up temp dir
-    await fs.rm(backupDir, { recursive: true, force: true });
+      // Spawn backup.sh with cwd=basePath so its .env detection works.
+      // Retention is effectively disabled here — CCP manages retention of
+      // the streamed-down archives, not the agent's transient copies.
+      //
+      // Container names: backup.sh defaults to `changemaker-v2-postgres` and
+      // `listmonk-db`, which match the main CML's `container_name:` overrides.
+      // If a deployment has custom naming, the operator can set PG_CONTAINER /
+      // LISTMONK_PG_CONTAINER in the instance's own .env (backup.sh loads it).
+      const spawnEnv: NodeJS.ProcessEnv = {
+        ...process.env,
+        BACKUP_DIR: backupsDir,
+        RETENTION_DAYS: '36500', // ~100 years; CCP controls retention
+      };

-    const stats = await fs.stat(archivePath);
-    const backupId = timestamp;
+      logger.info(`[backup] Running scripts/backup.sh for ${slug} (basePath=${entry.basePath})`);

-    logger.info(`[backup] Created backup for ${param(req, 'slug')}: ${archivePath} (${stats.size} bytes)`);
+      const exitCode: number = await new Promise((resolve, reject) => {
+        const proc = spawn('bash', ['scripts/backup.sh'], {
+          cwd: entry.basePath,
+          env: spawnEnv,
+          stdio: ['ignore', 'pipe', 'pipe'],
+        });
+        proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
+        proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
+        proc.on('error', reject);
+        proc.on('close', (code) => resolve(code ?? 1));
+      });

-    res.json({
-      backupId,
-      archivePath,
-      sizeBytes: stats.size,
-      timestamp,
+      await logFd.close();
+
+      if (exitCode !== 0) {
+        // Return the tail of the log so the CCP can display it
+        let logTail = '';
+        try {
+          const fullLog = await fs.readFile(logPath, 'utf-8');
+          logTail = fullLog.split('\n').slice(-40).join('\n');
+        } catch { /* ignore */ }
+        throw new AgentError(500, `backup.sh exited with code ${exitCode}\n${logTail}`, 'BACKUP_FAILED');
+      }
+
+      // Find the new archive
+      const afterFiles = (await fs.readdir(backupsDir)).filter(
+        (f) => f.startsWith(ARCHIVE_PREFIX) && f.endsWith(ARCHIVE_SUFFIX)
+      );
+      const newFiles = afterFiles.filter((f) => !existingFiles.has(f));
+      if (newFiles.length === 0) {
+        throw new AgentError(500, 'backup.sh succeeded but no new archive was created', 'BACKUP_NO_OUTPUT');
+      }
+      // Pick the most recently modified (in case of oddities)
+      newFiles.sort();
+      const newest = newFiles[newFiles.length - 1] as string;
+      const archivePath = path.join(backupsDir, newest);
+      const backupId = idFromFilename(newest);
+      if (!backupId || !ID_REGEX.test(backupId)) {
+        throw new AgentError(500, `Unexpected archive filename: ${newest}`, 'BACKUP_NAME_INVALID');
+      }
+
+      const stats = await fs.stat(archivePath);
+      const sha256 = await sha256File(archivePath);
+      const manifest = await readManifestFromArchive(archivePath);
+
+      // Delete the log file once we know the backup succeeded
+      try { await fs.unlink(logPath); } catch { /* ignore */ }
+
+      logger.info(`[backup] ${slug}: created ${newest} (${stats.size} bytes, sha256=${sha256.substring(0, 16)}...)`);
+
+      return {
+        backupId,
+        filename: newest,
+        sizeBytes: stats.size,
+        sha256,
+        manifest,
+        createdAt: stats.mtime.toISOString(),
+      };
    });
+
+    res.json(result);
  } catch (err) {
-    // Clean up on failure
-    try { await fs.rm(backupDir, { recursive: true, force: true }); } catch { /* ignore */ }
+    if (err instanceof SlugBusyError) {
+      res.status(409).json({ error: 'SLUG_BUSY', message: err.message });
+      return;
+    }
    throw err;
  }
 });

-// GET /instance/:slug/backup/:id/download — Stream backup archive
-router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
-  const archiveName = `backup-${param(req, 'slug')}-${param(req, 'id')}.tar.gz`;
-  const archivePath = path.join(env.AGENT_DATA_DIR, 'backups', archiveName);
+/**
+ * GET /instance/:slug/backups
+ * Lists backup archives currently held on the agent for this slug.
+ */
+router.get('/instance/:slug/backups', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  await getSlugEntry(slug); // validate slug is registered

+  const backupsDir = backupsDirFor(slug);
+  let entries: string[] = [];
  try {
-    await fs.access(archivePath);
+    entries = await fs.readdir(backupsDir);
+  } catch {
+    res.json({ data: [] });
+    return;
+  }
+
+  const results = [];
+  for (const filename of entries) {
+    const id = idFromFilename(filename);
+    if (!id) continue;
+    try {
+      const stats = await fs.stat(path.join(backupsDir, filename));
+      results.push({
+        backupId: id,
+        filename,
+        sizeBytes: stats.size,
+        createdAt: stats.mtime.toISOString(),
+      });
+    } catch { /* skip */ }
+  }
+  results.sort((a, b) => (a.createdAt < b.createdAt ? 1 : -1));
+  res.json({ data: results });
+});
+
+/**
+ * GET /instance/:slug/backup/:id/download
+ * Streams the backup archive (supports Content-Length so the CCP can verify size).
+ */
+router.get('/instance/:slug/backup/:id/download', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const id = param(req, 'id');
+  if (!ID_REGEX.test(id)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
+    return;
+  }
+  await getSlugEntry(slug);
+
+  const archivePath = archivePathFor(slug, id);
+  try {
+    const stats = await fs.stat(archivePath);
+    res.setHeader('Content-Type', 'application/gzip');
+    res.setHeader('Content-Length', String(stats.size));
+    res.setHeader('Content-Disposition', `attachment; filename="${path.basename(archivePath)}"`);
+    const stream = createReadStream(archivePath);
+    stream.on('error', (err) => {
+      logger.error(`[backup] stream error for ${archivePath}: ${err.message}`);
+      if (!res.headersSent) res.status(500).end();
+      else res.destroy(err);
+    });
+    stream.pipe(res);
  } catch {
    res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
    return;
  }
+});

-  const stats = await fs.stat(archivePath);
-  res.setHeader('Content-Type', 'application/gzip');
-  res.setHeader('Content-Length', stats.size);
-  res.setHeader('Content-Disposition', `attachment; filename="${archiveName}"`);
+/**
+ * DELETE /instance/:slug/backup/:id
+ * Deletes the archive from the agent's disk. The CCP calls this after it has
+ * successfully streamed the archive to its own storage.
+ */
+router.delete('/instance/:slug/backup/:id', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const id = param(req, 'id');
+  if (!ID_REGEX.test(id)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
+    return;
+  }
+  await getSlugEntry(slug);

-  const { createReadStream } = await import('fs');
-  const stream = createReadStream(archivePath);
-  stream.pipe(res);
+  const archivePath = archivePathFor(slug, id);
+  // Path traversal defense: ensure the resolved path is still inside the slug's backups dir
+  const resolved = path.resolve(archivePath);
+  const boundary = path.resolve(backupsDirFor(slug));
+  if (!resolved.startsWith(boundary + path.sep)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid backup id' });
+    return;
+  }
+
+  try {
+    await fs.unlink(archivePath);
+    logger.info(`[backup] ${slug}: deleted ${path.basename(archivePath)}`);
+    res.json({ deleted: true });
+  } catch (err) {
+    const code = (err as NodeJS.ErrnoException).code;
+    if (code === 'ENOENT') {
+      res.status(404).json({ error: 'NOT_FOUND', message: 'Backup archive not found' });
+      return;
+    }
+    throw err;
+  }
+});
+
+// ─── Restore ──────────────────────────────────────────────────────────
+
+// Hard cap on a single restore upload. The CCP is trusted, but a buggy or
+// compromised CCP shouldn't be able to fill the agent's disk in one request.
+// 20 GB is well above any realistic Changemaker Lite backup size.
+const MAX_RESTORE_UPLOAD_BYTES = 20 * 1024 * 1024 * 1024;
+
+function restoresDirFor(slug: string): string {
+  return path.join(env.AGENT_DATA_DIR, 'restores', slug);
+}
+
+function restoreUploadDir(slug: string, uploadId: string): string {
+  return path.join(restoresDirFor(slug), uploadId);
+}
+
+interface RestoreState {
+  status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
+  uploadId: string;
+  startedAt: string;
+  completedAt?: string;
+  exitCode?: number;
+  logTail?: string;
+  errorMessage?: string;
+  options?: Record<string, unknown>;
+}
+
+async function readRestoreState(slug: string, uploadId: string): Promise<RestoreState | null> {
+  const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
+  try {
+    const content = await fs.readFile(statePath, 'utf-8');
+    return JSON.parse(content) as RestoreState;
+  } catch {
+    return null;
+  }
+}
+
+async function writeRestoreState(slug: string, uploadId: string, state: RestoreState): Promise<void> {
+  const statePath = path.join(restoreUploadDir(slug, uploadId), 'restore-state.json');
+  await fs.writeFile(statePath, JSON.stringify(state, null, 2), 'utf-8');
+}
+
+/**
+ * POST /instance/:slug/restore/upload?sha256=<hex>
+ * Accepts an application/octet-stream upload of a backup archive and writes
+ * it to the agent's restores directory. Verifies SHA256 as it streams — if
+ * the hash doesn't match, the partial file is deleted and we return 400.
+ *
+ * Returns `{ uploadId, sizeBytes, sha256 }`.
+ */
+router.post('/instance/:slug/restore/upload', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  await getSlugEntry(slug);
+
+  if (isSlugLocked(slug, 'restore')) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'A restore is already in progress for this slug' });
+    return;
+  }
+  if (isSlugLocked(slug, 'backup')) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup is in progress for this slug' });
+    return;
+  }
+
+  const expectedSha256 = typeof req.query.sha256 === 'string' ? req.query.sha256.toLowerCase() : undefined;
+  if (!expectedSha256 || !/^[a-f0-9]{64}$/.test(expectedSha256)) {
+    res.status(400).json({ error: 'VALIDATION', message: 'sha256 query parameter required (64 hex chars)' });
+    return;
+  }
+
+  const uploadId = crypto.randomBytes(16).toString('hex');
+  const uploadDir = restoreUploadDir(slug, uploadId);
+  await fs.mkdir(uploadDir, { recursive: true });
+  const archivePath = path.join(uploadDir, 'archive.tar.gz');
+
+  const hash = crypto.createHash('sha256');
+  let bytesWritten = 0;
+  const hashTransform = new Transform({
+    transform(chunk: Buffer, _enc, cb) {
+      bytesWritten += chunk.length;
+      if (bytesWritten > MAX_RESTORE_UPLOAD_BYTES) {
+        // Abort the stream — pipeline() will reject and the catch block below
+        // will remove the partial upload directory.
+        cb(new AgentError(
+          413,
+          `Upload exceeds maximum allowed size of ${MAX_RESTORE_UPLOAD_BYTES} bytes`,
+          'UPLOAD_TOO_LARGE'
+        ));
+        return;
+      }
+      hash.update(chunk);
+      cb(null, chunk);
+    },
+  });
+
+  try {
+    const writeStream = createWriteStream(archivePath);
+    await pipeline(req, hashTransform, writeStream);
+    const sha256 = hash.digest('hex');
+
+    if (sha256 !== expectedSha256) {
+      // Integrity failure — nuke the upload
+      await fs.rm(uploadDir, { recursive: true, force: true });
+      res.status(400).json({
+        error: 'SHA256_MISMATCH',
+        message: `Expected sha256 ${expectedSha256}, got ${sha256}`,
+      });
+      return;
+    }
+
+    const stats = await fs.stat(archivePath);
+
+    // Persist initial state so the progress endpoint works even before apply
+    await writeRestoreState(slug, uploadId, {
+      status: 'UPLOADED',
+      uploadId,
+      startedAt: new Date().toISOString(),
+    });
+
+    logger.info(`[restore] ${slug}: uploaded ${bytesWritten} bytes (sha256=${sha256.substring(0, 16)}...) upload_id=${uploadId}`);
+
+    res.json({
+      uploadId,
+      sizeBytes: stats.size,
+      sha256,
+    });
+  } catch (err) {
+    // Stream error or write error — clean up
+    try { await fs.rm(uploadDir, { recursive: true, force: true }); } catch { /* ignore */ }
+    throw err;
+  }
+});
+
+/**
+ * POST /instance/:slug/restore/:uploadId/apply
+ * Body: { confirm: true, skipDb?, skipUploads?, skipListmonk?, dryRun? }
+ *
+ * Fires off `scripts/restore.sh --archive <path> --force` in the background
+ * and writes progress to restore-state.json. The CCP polls the progress
+ * endpoint for updates. Mutex prevents concurrent restores/backups.
+ */
+router.post('/instance/:slug/restore/:uploadId/apply', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const uploadId = param(req, 'uploadId');
+  if (!ID_REGEX.test(uploadId)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
+    return;
+  }
+  const entry = await getSlugEntry(slug);
+
+  const { confirm, skipDb, skipUploads, skipListmonk, dryRun } = req.body ?? {};
+  if (confirm !== true) {
+    res.status(400).json({ error: 'CONFIRMATION_REQUIRED', message: 'Body must include { confirm: true }' });
+    return;
+  }
+
+  const uploadDir = restoreUploadDir(slug, uploadId);
+  // Path traversal defense
+  const resolvedDir = path.resolve(uploadDir);
+  const boundary = path.resolve(restoresDirFor(slug));
+  if (!resolvedDir.startsWith(boundary + path.sep)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
+    return;
+  }
+
+  const archivePath = path.join(uploadDir, 'archive.tar.gz');
+  try {
+    await fs.access(archivePath);
+  } catch {
+    res.status(404).json({ error: 'NOT_FOUND', message: 'Upload not found or already applied' });
+    return;
+  }
+
+  // Verify scripts/restore.sh exists
+  const scriptPath = path.join(entry.basePath, 'scripts', 'restore.sh');
+  try {
+    await fs.access(scriptPath);
+  } catch {
+    res.status(500).json({ error: 'RESTORE_SCRIPT_MISSING', message: `scripts/restore.sh not found at ${scriptPath}` });
+    return;
+  }
+
+  // Check mutex state (don't block — tell caller it's busy)
+  if (isSlugLocked(slug, 'restore') || isSlugLocked(slug, 'backup')) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'Slug is busy with backup or restore' });
+    return;
+  }
+
+  // Fire-and-forget: acquire lock and run in background. Return immediately
+  // so CCP can start polling /progress.
+  const options = {
+    skipDb: !!skipDb,
+    skipUploads: !!skipUploads,
+    skipListmonk: !!skipListmonk,
+    dryRun: !!dryRun,
+  };
+
+  await writeRestoreState(slug, uploadId, {
+    status: 'RUNNING',
+    uploadId,
+    startedAt: new Date().toISOString(),
+    options,
+  });
+
+  // Build restore.sh args (all flags, no user input interpolated into a shell string)
+  const args = ['scripts/restore.sh', '--archive', archivePath, '--force'];
+  if (options.skipDb) args.push('--skip-db');
+  if (options.skipUploads) args.push('--skip-uploads');
+  if (options.skipListmonk) args.push('--skip-listmonk');
+  if (options.dryRun) args.push('--dry-run');
+
+  const logPath = path.join(uploadDir, 'restore.log');
+
+  // Schedule the background task — don't await inside the handler
+  void withSlugLock(slug, 'restore', async () => {
+    const logFd = await fs.open(logPath, 'w');
+    logger.info(`[restore] ${slug}: running ${args.join(' ')} (cwd=${entry.basePath})`);
+
+    const exitCode: number = await new Promise((resolve, reject) => {
+      const proc = spawn('bash', args, {
+        cwd: entry.basePath,
+        env: { ...process.env },
+        stdio: ['ignore', 'pipe', 'pipe'],
+      });
+      proc.stdout.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
+      proc.stderr.on('data', (chunk) => logFd.write(chunk).catch(() => {}));
+      proc.on('error', reject);
+      proc.on('close', (code) => resolve(code ?? 1));
+    });
+
+    await logFd.close();
+
+    // Read the tail of the log for the state file
+    let logTail = '';
+    try {
+      const fullLog = await fs.readFile(logPath, 'utf-8');
+      logTail = fullLog.split('\n').slice(-80).join('\n');
+    } catch { /* ignore */ }
+
+    const state: RestoreState = {
+      status: exitCode === 0 ? 'COMPLETED' : 'FAILED',
+      uploadId,
+      startedAt: (await readRestoreState(slug, uploadId))?.startedAt || new Date().toISOString(),
+      completedAt: new Date().toISOString(),
+      exitCode,
+      logTail,
+      options,
+      ...(exitCode !== 0 ? { errorMessage: `restore.sh exited with code ${exitCode}` } : {}),
+    };
+    await writeRestoreState(slug, uploadId, state);
+
+    logger.info(`[restore] ${slug}: restore.sh finished with exit ${exitCode}`);
+  }).catch(async (err) => {
+    logger.error(`[restore] ${slug}: background restore failed: ${(err as Error).message}`);
+    // If the mutex was the issue, state is already written. Otherwise, mark failed.
+    if (!(err instanceof SlugBusyError)) {
+      try {
+        await writeRestoreState(slug, uploadId, {
+          status: 'FAILED',
+          uploadId,
+          startedAt: new Date().toISOString(),
+          completedAt: new Date().toISOString(),
+          errorMessage: (err as Error).message,
+          options,
+        });
+      } catch { /* ignore */ }
+    }
+  });
+
+  res.status(202).json({ applied: true, uploadId, options });
+});
+
+/**
+ * GET /instance/:slug/restore/:uploadId/progress
+ * Returns the current state of a running or completed restore.
+ */
+router.get('/instance/:slug/restore/:uploadId/progress', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const uploadId = param(req, 'uploadId');
+  if (!ID_REGEX.test(uploadId)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
+    return;
+  }
+  await getSlugEntry(slug);
+
+  const state = await readRestoreState(slug, uploadId);
+  if (!state) {
+    res.status(404).json({ error: 'NOT_FOUND', message: 'Restore not found' });
+    return;
+  }
+  res.json(state);
+});
+
+/**
+ * DELETE /instance/:slug/restore/:uploadId
+ * Removes a restore upload directory. Refuses if a restore is currently running.
+ */
+router.delete('/instance/:slug/restore/:uploadId', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const uploadId = param(req, 'uploadId');
+  if (!ID_REGEX.test(uploadId)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
+    return;
+  }
+  await getSlugEntry(slug);
+
+  const uploadDir = restoreUploadDir(slug, uploadId);
+  const resolvedDir = path.resolve(uploadDir);
+  const boundary = path.resolve(restoresDirFor(slug));
+  if (!resolvedDir.startsWith(boundary + path.sep)) {
+    res.status(400).json({ error: 'INVALID_ID', message: 'Invalid upload id' });
+    return;
+  }
+
+  const state = await readRestoreState(slug, uploadId);
+  if (state?.status === 'RUNNING') {
+    res.status(409).json({ error: 'RESTORE_RUNNING', message: 'Cannot delete a running restore' });
+    return;
+  }
+
+  try {
+    await fs.rm(uploadDir, { recursive: true, force: true });
+    res.json({ deleted: true });
+  } catch (err) {
+    throw err;
+  }
 });

 export default router;
--- a/changemaker-control-panel/agent/src/routes/registry.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/registry.routes.ts
@ -4,6 +4,13 @@ import { registerSlug, unregisterSlug, listSlugs } from '../services/registry.se

 const router = Router();

+// SECURITY: defense-in-depth slug validation. The CCP enforces ^[a-z0-9-]+$
+// upstream via Zod, but the registry slug is later interpolated into
+// filesystem paths (backupsDirFor, etc.), so we validate independently here.
+// A poisoned registry entry could otherwise let a compromised or buggy CCP
+// escape AGENT_DATA_DIR.
+const SLUG_RE = /^[a-z0-9-]{2,50}$/;
+
 // POST /instances/register — Register a slug→basePath mapping
 router.post('/instances/register', async (req: Request, res: Response) => {
  const { slug, basePath, composeProject } = req.body;
@ -11,14 +18,23 @@ router.post('/instances/register', async (req: Request, res: Response) => {
    res.status(400).json({ error: 'VALIDATION', message: 'slug, basePath, and composeProject required' });
    return;
  }
+  if (typeof slug !== 'string' || !SLUG_RE.test(slug)) {
+    res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format (expected ^[a-z0-9-]{2,50}$)' });
+    return;
+  }
  await registerSlug(slug, basePath, composeProject);
  res.json({ registered: slug });
 });

 // DELETE /instances/:slug — Unregister slug
 router.delete('/instances/:slug', async (req: Request, res: Response) => {
-  await unregisterSlug(param(req, 'slug'));
-  res.json({ unregistered: param(req, 'slug') });
+  const slug = param(req, 'slug');
+  if (!SLUG_RE.test(slug)) {
+    res.status(400).json({ error: 'VALIDATION', message: 'Invalid slug format' });
+    return;
+  }
+  await unregisterSlug(slug);
+  res.json({ unregistered: slug });
 });

 // GET /instances — List all managed slugs
--- a/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
+++ b/changemaker-control-panel/agent/src/routes/upgrade.routes.ts
@ -1,11 +1,12 @@
 import { Router, Request, Response } from 'express';
 import { param } from '../utils/params';
-import { execFile } from 'child_process';
+import { execFile, spawn } from 'child_process';
 import { promisify } from 'util';
 import fs from 'fs/promises';
 import path from 'path';
 import { getSlugEntry } from '../services/registry.service';
 import { logger } from '../utils/logger';
+import { withSlugLock, SlugBusyError, isSlugLocked } from '../services/slug-mutex';

 const execFileAsync = promisify(execFile);
 const router = Router();
@ -13,9 +14,108 @@ const router = Router();
 /** Validate a git branch name — prevent shell injection. */
 const SAFE_BRANCH = /^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/;

-// POST /instance/:slug/upgrade/start — Run upgrade.sh
+/**
+ * Max age of an in-progress upgrade (by progress.json mtime) before we
+ * consider a previous attempt dead and allow a new one through.
+ *
+ * SECURITY NOTE: this must be LONGER than the CCP's REMOTE_UPGRADE_TIMEOUT
+ * AND longer than any realistic legitimate upgrade duration. The concern is
+ * a concurrent-upgrade scenario:
+ *   - upgrade.sh is running and legitimately slow (large image pull + DB
+ *     migration)
+ *   - at 15 min the CCP side times out and marks the row FAILED
+ *   - admin clicks "Upgrade" again → CCP's DB check sees no active row
+ *   - if this staleness window is <= realistic upgrade time, the second
+ *     /upgrade/start call would ALSO pass this check, spawning a second
+ *     upgrade.sh process racing against the still-running first one
+ *
+ * 45 min gives headroom over the 15-min CCP timeout and covers realistic
+ * upgrade durations. For a truly bulletproof guard, switch to a PID lock
+ * file that verifies the process is still alive.
+ */
+const STALE_UPGRADE_MTIME_MS = 45 * 60 * 1000;
+
+/**
+ * Returns true if there's an in-progress upgrade visible on disk.
+ *
+ * Used as a second-line guard in case the in-memory mutex was lost to an
+ * agent restart mid-upgrade. The check looks at progress.json mtime and
+ * the absence of a result.json — together they indicate "started but not
+ * finished within the staleness window".
+ */
+async function isUpgradeRunningOnDisk(basePath: string): Promise<boolean> {
+  const progressPath = path.join(basePath, 'data', 'upgrade', 'progress.json');
+  const resultPath = path.join(basePath, 'data', 'upgrade', 'result.json');
+
+  let progressStat: import('fs').Stats;
+  try {
+    progressStat = await fs.stat(progressPath);
+  } catch {
+    return false; // no progress file → no in-progress upgrade
+  }
+
+  // If a result file exists with mtime >= progress mtime, the run is finished
+  try {
+    const resultStat = await fs.stat(resultPath);
+    if (resultStat.mtimeMs >= progressStat.mtimeMs) return false;
+  } catch { /* no result file yet */ }
+
+  // Stale: progress file is old and no result was written → assume the
+  // previous attempt died and let a new one through
+  if (Date.now() - progressStat.mtimeMs > STALE_UPGRADE_MTIME_MS) return false;
+
+  return true;
+}
+
+// POST /instance/:slug/upgrade/check — Run upgrade-check.sh and return status.json
+router.post('/instance/:slug/upgrade/check', async (req: Request, res: Response) => {
+  const slug = param(req, 'slug');
+  const entry = await getSlugEntry(slug);
+
+  // Refuse during a running upgrade — check writes status.json which could
+  // race with upgrade.sh writing other files in data/upgrade/
+  if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is currently running' });
+    return;
+  }
+
+  const scriptPath = path.join(entry.basePath, 'scripts', 'upgrade-check.sh');
+  try {
+    await fs.access(scriptPath);
+  } catch {
+    res.status(404).json({ error: 'SCRIPT_NOT_FOUND', message: `upgrade-check.sh not found at ${scriptPath}` });
+    return;
+  }
+
+  // Run upgrade-check.sh — it writes data/upgrade/status.json. Use execFile
+  // (no shell) and a 60s timeout. Failures are non-fatal: the script may
+  // still have written status.json before erroring out, so we always try
+  // to read it afterwards.
+  try {
+    await execFileAsync('bash', [scriptPath], {
+      cwd: entry.basePath,
+      timeout: 60_000,
+      maxBuffer: 4 * 1024 * 1024,
+      env: { ...process.env, COMPOSE_ANSI: 'never' },
+    });
+  } catch (err) {
+    logger.warn(`[upgrade] ${slug}: upgrade-check.sh failed: ${(err as Error).message}`);
+    // continue — try to read status.json anyway
+  }
+
+  const statusPath = path.join(entry.basePath, 'data', 'upgrade', 'status.json');
+  try {
+    const content = await fs.readFile(statusPath, 'utf-8');
+    res.json(JSON.parse(content));
+  } catch {
+    res.status(500).json({ error: 'STATUS_NOT_AVAILABLE', message: 'upgrade-check.sh did not produce status.json' });
+  }
+});
+
+// POST /instance/:slug/upgrade/start — Run upgrade.sh in the background
 router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response) => {
-  const entry = await getSlugEntry(param(req, 'slug'));
+  const slug = param(req, 'slug');
+  const entry = await getSlugEntry(slug);
  const { skipBackup, useRegistry, branch } = req.body || {};

  // SECURITY: Validate branch name to prevent injection
@ -28,26 +128,64 @@ router.post('/instance/:slug/upgrade/start', async (req: Request, res: Response)
  try {
    await fs.access(scriptPath);
  } catch {
-    res.status(400).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
+    res.status(404).json({ error: 'NOT_FOUND', message: 'upgrade.sh not found' });
    return;
  }

-  // SECURITY: Use execFile with args array — no shell interpolation
-  const args = ['--api-mode', '--force'];
+  // Refuse if an upgrade is already running (in-memory or on-disk indicators)
+  if (isSlugLocked(slug, 'upgrade') || await isUpgradeRunningOnDisk(entry.basePath)) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'An upgrade is already in progress' });
+    return;
+  }
+  // Backup or restore concurrency: refuse to start an upgrade while either is running
+  if (isSlugLocked(slug, 'backup') || isSlugLocked(slug, 'restore')) {
+    res.status(409).json({ error: 'SLUG_BUSY', message: 'A backup or restore is currently running' });
+    return;
+  }
+
+  // Clear stale progress/result files before starting so the on-disk staleness
+  // check doesn't think a brand-new upgrade is still finishing.
+  const progressPath = path.join(entry.basePath, 'data', 'upgrade', 'progress.json');
+  const resultPath = path.join(entry.basePath, 'data', 'upgrade', 'result.json');
+  await fs.mkdir(path.dirname(progressPath), { recursive: true });
+  await fs.rm(progressPath, { force: true });
+  await fs.rm(resultPath, { force: true });
+
+  // SECURITY: Use spawn with args array — no shell interpolation
+  const args: string[] = [scriptPath, '--api-mode', '--force'];
  if (skipBackup) args.push('--skip-backup');
  if (useRegistry) args.push('--use-registry');
  if (branch) args.push('--branch', branch);

-  // Fire-and-forget — CCP polls progress
-  execFileAsync('bash', [scriptPath, ...args], {
-    cwd: entry.basePath,
-    timeout: 600_000,
-    maxBuffer: 10 * 1024 * 1024,
+  // Schedule the background task under the slug lock. Use void so the
+  // promise doesn't block the response. Errors are caught and logged; the
+  // CCP detects them via the absence of a result file or via the timeout.
+  void withSlugLock(slug, 'upgrade', async () => {
+    logger.info(`[upgrade] ${slug}: spawning ${args.join(' ')} (cwd=${entry.basePath})`);
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const proc = spawn('bash', args, {
+          cwd: entry.basePath,
+          env: { ...process.env, COMPOSE_ANSI: 'never' },
+          stdio: ['ignore', 'ignore', 'ignore'], // upgrade.sh writes its own logs
+        });
+        proc.on('error', reject);
+        proc.on('close', (code) => {
+          if (code === 0) resolve();
+          else reject(new Error(`upgrade.sh exited with code ${code}`));
+        });
+      });
+      logger.info(`[upgrade] ${slug}: upgrade.sh completed`);
+    } catch (err) {
+      logger.error(`[upgrade] ${slug}: ${(err as Error).message}`);
+    }
  }).catch((err) => {
-    logger.error(`[upgrade] ${param(req, 'slug')} failed: ${(err as Error).message}`);
+    if (!(err instanceof SlugBusyError)) {
+      logger.error(`[upgrade] ${slug}: lock or background error: ${(err as Error).message}`);
+    }
  });

-  res.json({ started: true });
+  res.status(202).json({ started: true });
 });

 // GET /instance/:slug/upgrade/progress — Read progress.json
--- a/changemaker-control-panel/agent/src/server.ts
+++ b/changemaker-control-panel/agent/src/server.ts
@ -53,8 +53,24 @@ if (hasCerts()) {
  app.use(errorHandler);

  const server = https.createServer(tlsOptions, app);
-  server.listen(env.AGENT_PORT, () => {
+  server.listen(env.AGENT_PORT, async () => {
    logger.info(`CCP Agent (mTLS) listening on port ${env.AGENT_PORT}`);
+
+    // Auto-register this instance's slug if configured
+    if (env.INSTANCE_SLUG && env.INSTANCE_BASE_PATH) {
+      const { registerSlug, getSlugEntry } = await import('./services/registry.service');
+      try {
+        await getSlugEntry(env.INSTANCE_SLUG);
+        logger.debug(`[registry] Slug ${env.INSTANCE_SLUG} already registered`);
+      } catch {
+        // Detect compose project name: use env override, or derive from basePath directory name
+        // (Docker Compose default: directory name with special chars stripped)
+        const pathMod = await import('path');
+        const composeProject = env.COMPOSE_PROJECT
+          || pathMod.basename(env.INSTANCE_BASE_PATH).replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
+        await registerSlug(env.INSTANCE_SLUG, env.INSTANCE_BASE_PATH, composeProject);
+      }
+    }
  });
 } else {
  // Pre-approval mode — start HTTP, only health + phone-home polling
--- a/changemaker-control-panel/agent/src/services/slug-mutex.ts
+++ b/changemaker-control-panel/agent/src/services/slug-mutex.ts
@ -0,0 +1,65 @@
+/**
+ * Per-slug single-flight mutex.
+ *
+ * Guards long-running, mutating operations (backup, restore, upgrade) so that
+ * two concurrent CCP calls for the same slug can't trample each other.
+ *
+ * Usage:
+ *   await withSlugLock(slug, 'backup', async () => { ... });
+ *
+ * If a lock is already held for (slug, op), throws SlugBusyError which the
+ * route handler should convert to HTTP 409.
+ */
+
+export class SlugBusyError extends Error {
+  constructor(public slug: string, public op: string) {
+    super(`Slug ${slug} is busy: ${op} already in progress`);
+    this.name = 'SlugBusyError';
+  }
+}
+
+type LockKey = string;
+const locks = new Map<LockKey, { op: string; startedAt: number }>();
+
+function key(slug: string, op: string): LockKey {
+  return `${slug}::${op}`;
+}
+
+/**
+ * Run `fn` while holding a single-flight lock on (slug, op).
+ * Throws SlugBusyError immediately if another call is already running.
+ */
+export async function withSlugLock<T>(
+  slug: string,
+  op: string,
+  fn: () => Promise<T>
+): Promise<T> {
+  const k = key(slug, op);
+  if (locks.has(k)) {
+    throw new SlugBusyError(slug, op);
+  }
+  locks.set(k, { op, startedAt: Date.now() });
+  try {
+    return await fn();
+  } finally {
+    locks.delete(k);
+  }
+}
+
+/**
+ * Returns true if a lock is currently held for (slug, op).
+ */
+export function isSlugLocked(slug: string, op: string): boolean {
+  return locks.has(key(slug, op));
+}
+
+/**
+ * Returns debug info about all active locks.
+ */
+export function listActiveLocks(): Array<{ slug: string; op: string; ageMs: number }> {
+  const now = Date.now();
+  return Array.from(locks.entries()).map(([k, v]) => {
+    const [slug] = k.split('::');
+    return { slug: slug ?? '', op: v.op, ageMs: now - v.startedAt };
+  });
+}
--- a/changemaker-control-panel/api/prisma/migrations/20260410233048_add_instance_restore/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260410233048_add_instance_restore/migration.sql
@ -0,0 +1,34 @@
+-- CreateEnum
+CREATE TYPE "RestoreStatus" AS ENUM ('PENDING', 'UPLOADING', 'RUNNING', 'COMPLETED', 'FAILED');
+
+-- AlterEnum
+ALTER TYPE "AuditAction" ADD VALUE 'BACKUP_RESTORE';
+
+-- CreateTable
+CREATE TABLE "instance_restores" (
+    "id" TEXT NOT NULL,
+    "instance_id" TEXT NOT NULL,
+    "backup_id" TEXT NOT NULL,
+    "status" "RestoreStatus" NOT NULL DEFAULT 'PENDING',
+    "upload_id" TEXT,
+    "progress_json" JSONB,
+    "log_tail" TEXT,
+    "error_message" TEXT,
+    "triggered_by_id" TEXT,
+    "started_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "completed_at" TIMESTAMP(3),
+
+    CONSTRAINT "instance_restores_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE INDEX "instance_restores_instance_id_started_at_idx" ON "instance_restores"("instance_id", "started_at");
+
+-- CreateIndex
+CREATE INDEX "instance_restores_backup_id_idx" ON "instance_restores"("backup_id");
+
+-- AddForeignKey
+ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_instance_id_fkey" FOREIGN KEY ("instance_id") REFERENCES "instances"("id") ON DELETE CASCADE ON UPDATE CASCADE;
+
+-- AddForeignKey
+ALTER TABLE "instance_restores" ADD CONSTRAINT "instance_restores_backup_id_fkey" FOREIGN KEY ("backup_id") REFERENCES "backups"("id") ON DELETE CASCADE ON UPDATE CASCADE;
--- a/changemaker-control-panel/api/prisma/migrations/20260412045433_add_pangolin_subdomain_prefix/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260412045433_add_pangolin_subdomain_prefix/migration.sql
@ -0,0 +1,2 @@
+-- AlterTable
+ALTER TABLE "instances" ADD COLUMN     "pangolin_subdomain_prefix" TEXT;
--- a/changemaker-control-panel/api/prisma/migrations/20260412155638_add_pangolin_teardown_action/migration.sql
+++ b/changemaker-control-panel/api/prisma/migrations/20260412155638_add_pangolin_teardown_action/migration.sql
@ -0,0 +1,2 @@
+-- AlterEnum
+ALTER TYPE "AuditAction" ADD VALUE 'PANGOLIN_TEARDOWN';
--- a/changemaker-control-panel/api/prisma/schema.prisma
+++ b/changemaker-control-panel/api/prisma/schema.prisma
@ -109,6 +109,7 @@ model Instance {
  pangolinSiteId     String?     @map("pangolin_site_id")
  pangolinNewtId     String?     @map("pangolin_newt_id")
  pangolinNewtSecret String?     @map("pangolin_newt_secret")
+  pangolinSubdomainPrefix String? @map("pangolin_subdomain_prefix")

  // SMTP
  smtpHost        String?        @map("smtp_host")
@ -125,6 +126,7 @@ model Instance {
  portAllocations PortAllocation[]
  healthChecks    HealthCheck[]
  backups         Backup[]
+  restores        InstanceRestore[]
  auditLogs       AuditLog[]
  upgrades        InstanceUpgrade[]
  events          InstanceEvent[]
@ -196,12 +198,44 @@ model Backup {
  s3Uploaded    Boolean      @default(false) @map("s3_uploaded")
  s3Key         String?      @map("s3_key")

-  instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
+  instance Instance          @relation(fields: [instanceId], references: [id], onDelete: Cascade)
+  restores InstanceRestore[]

  @@index([instanceId, startedAt])
  @@map("backups")
 }

+// ─── Restore ───────────────────────────────────────────────
+
+enum RestoreStatus {
+  PENDING
+  UPLOADING
+  RUNNING
+  COMPLETED
+  FAILED
+}
+
+model InstanceRestore {
+  id             String        @id @default(uuid())
+  instanceId     String        @map("instance_id")
+  backupId       String        @map("backup_id")
+  status         RestoreStatus @default(PENDING)
+  uploadId       String?       @map("upload_id")
+  progressJson   Json?         @map("progress_json")
+  logTail        String?       @map("log_tail")
+  errorMessage   String?       @map("error_message")
+  triggeredById  String?       @map("triggered_by_id")
+  startedAt      DateTime      @default(now()) @map("started_at")
+  completedAt    DateTime?     @map("completed_at")
+
+  instance Instance @relation(fields: [instanceId], references: [id], onDelete: Cascade)
+  backup   Backup   @relation(fields: [backupId], references: [id], onDelete: Cascade)
+
+  @@index([instanceId, startedAt])
+  @@index([backupId])
+  @@map("instance_restores")
+}
+
 // ─── Audit Log ─────────────────────────────────────────────

 enum AuditAction {
@ -215,7 +249,9 @@ enum AuditAction {
  SECRETS_VIEWED
  BACKUP_CREATE
  BACKUP_DELETE
+  BACKUP_RESTORE
  PANGOLIN_SETUP
+  PANGOLIN_TEARDOWN
  PANGOLIN_SYNC
  AGENT_CONNECT
  AGENT_REGISTER
--- a/changemaker-control-panel/api/src/config/env.ts
+++ b/changemaker-control-panel/api/src/config/env.ts
@ -54,10 +54,11 @@ const envSchema = z.object({
  USE_REGISTRY_IMAGES: z.enum(['true', 'false']).default('true').transform((v) => v === 'true'),
  IMAGE_TAG: z.string().default('latest'),

-  // Pangolin (optional)
+  // Pangolin (optional — for remote tunnel management)
  PANGOLIN_API_URL: z.string().default(''),
  PANGOLIN_API_KEY: z.string().default(''),
  PANGOLIN_ORG_ID: z.string().default(''),
+  PANGOLIN_ENDPOINT: z.string().default(''), // Newt WebSocket URL (may differ from API URL)

  // Health checks
  HEALTH_CHECK_INTERVAL_MS: z.coerce.number().default(300_000), // 5 min (0 to disable)
--- a/changemaker-control-panel/api/src/modules/agents/agents.routes.ts
+++ b/changemaker-control-panel/api/src/modules/agents/agents.routes.ts
@ -169,7 +169,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
  });

  // Issue mTLS certificates
-  const certMaterials = await issueAgentCert(instance.id, registration.slug);
+  const certMaterials = await issueAgentCert(instance.id, registration.slug, registration.agentUrl);

  // Mark invite code as used
  const invite = await prisma.agentInviteCode.findUnique({ where: { id: registration.inviteCodeId } });
@ -189,7 +189,7 @@ router.post('/registrations/:id/approve', authenticate, requireRole('SUPER_ADMIN
        caCertPem: certMaterials.caCertPem,
        agentCertPem: certMaterials.agentCertPem,
        agentKeyPem: certMaterials.agentKeyPem,
-        ccpFingerprint: certMaterials.caFingerprint,
+        ccpFingerprint: certMaterials.fingerprint,
      },
    },
  });
--- a/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.routes.ts
@ -4,11 +4,13 @@ import rateLimit from 'express-rate-limit';
 import { prisma } from '../../lib/prisma';
 import { authenticate, requireRole } from '../../middleware/auth';
 import { validate } from '../../middleware/validate';
-import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema } from './instances.schemas';
+import { createInstanceSchema, updateInstanceSchema, registerInstanceSchema, reconfigureInstanceSchema, configureTunnelSchema, importInstancesSchema, startUpgradeSchema, setupRemoteTunnelSchema } from './instances.schemas';
 import * as instancesService from './instances.service';
 import * as healthService from '../../services/health.service';
 import * as backupService from '../../services/backup.service';
+import * as restoreService from '../../services/restore.service';
 import * as upgradeService from '../../services/upgrade.service';
+import * as tunnelService from '../../services/tunnel.service';
 import { discoverInstances } from '../../services/discovery.service';

 const secretsLimiter = rateLimit({
@ -186,6 +188,18 @@ router.delete(
  '/:id/tunnel',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
  async (req: Request, res: Response) => {
+    // Branch: remote instances use the CCP's Pangolin API to teardown;
+    // local instances use the existing manual removal logic.
+    const instance = await prisma.instance.findUnique({ where: { id: req.params.id as string } });
+    if (instance?.isRemote && instance.pangolinSiteId) {
+      const result = await tunnelService.teardownTunnel(
+        req.params.id as string,
+        req.user!.id,
+        req.ip
+      );
+      res.json({ data: result });
+      return;
+    }
    const result = await instancesService.removeTunnel(
      req.params.id as string,
      req.user!.id,
@ -195,6 +209,47 @@ router.delete(
  }
 );

+// Remote tunnel setup via CCP's Pangolin API credentials
+router.post(
+  '/:id/tunnel/setup',
+  requireRole('SUPER_ADMIN'),
+  validate(setupRemoteTunnelSchema),
+  async (req: Request, res: Response) => {
+    const { subdomainPrefix } = req.body || {};
+    const result = await tunnelService.setupTunnel(
+      req.params.id as string,
+      { subdomainPrefix },
+      req.user!.id,
+      req.ip
+    );
+    res.status(201).json({ data: result });
+  }
+);
+
+// Get tunnel status (resource matrix) — works for both local and remote
+router.get(
+  '/:id/tunnel/status',
+  requireRole('SUPER_ADMIN', 'OPERATOR'),
+  async (req: Request, res: Response) => {
+    const status = await tunnelService.getTunnelStatus(req.params.id as string);
+    res.json({ data: status });
+  }
+);
+
+// Re-sync resources (idempotent — creates missing, leaves existing)
+router.post(
+  '/:id/tunnel/sync',
+  requireRole('SUPER_ADMIN'),
+  async (req: Request, res: Response) => {
+    const result = await tunnelService.syncResources(
+      req.params.id as string,
+      req.user!.id,
+      req.ip
+    );
+    res.json({ data: result });
+  }
+);
+
 // ─── Lifecycle Endpoints ─────────────────────────────────────────────

 router.post(
@ -280,6 +335,7 @@ router.post(
 router.post(
  '/:id/upgrade',
  requireRole('SUPER_ADMIN', 'OPERATOR'),
+  validate(startUpgradeSchema),
  async (req: Request, res: Response) => {
    const { skipBackup, useRegistry, branch } = req.body || {};
    const upgrade = await upgradeService.startUpgrade(
@ -356,4 +412,76 @@ router.get(
  }
 );

+// ─── Restores ──────────────────────────────────────────────────────
+
+/**
+ * POST /:id/restore
+ * Body: { backupId, options? }
+ * Starts a restore of the given backup onto this instance. Returns the
+ * InstanceRestore row immediately; caller polls GET /:id/restores or
+ * GET /:id/restores/:restoreId for status.
+ *
+ * DESTRUCTIVE: overwrites databases and uploads. Requires SUPER_ADMIN.
+ */
+router.post(
+  '/:id/restore',
+  requireRole('SUPER_ADMIN'),
+  async (req: Request, res: Response) => {
+    const instanceId = req.params.id as string;
+    const { backupId, options } = req.body ?? {};
+    if (!backupId || typeof backupId !== 'string') {
+      res.status(400).json({ error: { message: 'backupId (string) is required', code: 'VALIDATION' } });
+      return;
+    }
+
+    // Defensive: ensure the backup belongs to this instance
+    const backup = await prisma.backup.findUnique({ where: { id: backupId } });
+    if (!backup) {
+      res.status(404).json({ error: { message: 'Backup not found', code: 'NOT_FOUND' } });
+      return;
+    }
+    if (backup.instanceId !== instanceId) {
+      res.status(400).json({
+        error: {
+          message: 'Backup does not belong to this instance (cross-instance restore is not supported)',
+          code: 'CROSS_INSTANCE_RESTORE',
+        },
+      });
+      return;
+    }
+
+    const restore = await restoreService.createRestore({
+      backupId,
+      triggeredById: req.user!.id,
+      ipAddress: req.ip,
+      options,
+    });
+    res.status(201).json({ data: restore });
+  }
+);
+
+router.get(
+  '/:id/restores',
+  requireRole('SUPER_ADMIN', 'OPERATOR'),
+  async (req: Request, res: Response) => {
+    const page = Math.max(1, parseInt(req.query.page as string, 10) || 1);
+    const limit = Math.min(100, Math.max(1, parseInt(req.query.limit as string, 10) || 50));
+    const result = await restoreService.listRestores(req.params.id as string, page, limit);
+    res.json(result);
+  }
+);
+
+router.get(
+  '/:id/restores/:restoreId',
+  requireRole('SUPER_ADMIN', 'OPERATOR'),
+  async (req: Request, res: Response) => {
+    const restore = await restoreService.getRestore(req.params.restoreId as string);
+    if (restore.instanceId !== req.params.id) {
+      res.status(404).json({ error: { message: 'Restore not found', code: 'NOT_FOUND' } });
+      return;
+    }
+    res.json({ data: restore });
+  }
+);
+
 export default router;
--- a/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
+++ b/changemaker-control-panel/api/src/modules/instances/instances.schemas.ts
@ -108,9 +108,32 @@ export const importInstancesSchema = z.object({
  instances: z.array(registerInstanceSchema).min(1).max(50),
 });

+// SECURITY: branch name is interpolated into a shell command string in the
+// local `runUpgrade` path (exec, not spawn), so we must enforce the same
+// strict allow-list the agent uses on its own end. This blocks names starting
+// with `-` (avoiding flag confusion), shell metachars, and anything exotic.
+export const startUpgradeSchema = z.object({
+  skipBackup: z.boolean().optional(),
+  useRegistry: z.boolean().optional(),
+  branch: z
+    .string()
+    .regex(/^[a-zA-Z0-9][a-zA-Z0-9_.\/-]{0,99}$/, 'Invalid branch name')
+    .optional(),
+});
+
+export const setupRemoteTunnelSchema = z.object({
+  subdomainPrefix: z
+    .string()
+    .min(1)
+    .max(50)
+    .regex(/^[a-z0-9-]+$/, 'Prefix must be lowercase alphanumeric with hyphens')
+    .optional(),
+});
+
 export type CreateInstanceInput = z.infer<typeof createInstanceSchema>;
 export type UpdateInstanceInput = z.infer<typeof updateInstanceSchema>;
 export type RegisterInstanceInput = z.infer<typeof registerInstanceSchema>;
 export type ReconfigureInstanceInput = z.infer<typeof reconfigureInstanceSchema>;
 export type ConfigureTunnelInput = z.infer<typeof configureTunnelSchema>;
 export type ImportInstancesInput = z.infer<typeof importInstancesSchema>;
+export type StartUpgradeInput = z.infer<typeof startUpgradeSchema>;
--- a/changemaker-control-panel/api/src/server.ts
+++ b/changemaker-control-panel/api/src/server.ts
@ -8,6 +8,12 @@ import { env } from './config/env';
 import { logger } from './utils/logger';
 import { errorHandler } from './middleware/error-handler';

+// BigInt JSON serialization. Prisma's BigInt columns (e.g. Backup.sizeBytes)
+// don't have a toJSON method by default, so res.json() throws. Stringify them.
+(BigInt.prototype as unknown as { toJSON: () => string }).toJSON = function () {
+  return this.toString();
+};
+
 // Route imports
 import authRoutes from './modules/auth/auth.routes';
 import instanceRoutes from './modules/instances/instances.routes';
--- a/changemaker-control-panel/api/src/services/backup.service.ts
+++ b/changemaker-control-panel/api/src/services/backup.service.ts
@ -1,5 +1,6 @@
 import { Prisma, BackupStatus, AuditAction, InstanceStatus } from '@prisma/client';
 import fs from 'fs/promises';
+import { createReadStream } from 'fs';
 import path from 'path';
 import crypto from 'crypto';
 import { execFile as execFileCb } from 'child_process';
@ -10,6 +11,7 @@ import { AppError } from '../middleware/error-handler';
 import { decryptJson } from '../utils/encryption';
 import * as docker from './docker.service';
 import { logger } from '../utils/logger';
+import { getRemoteDriverForInstance } from './execution-driver';
 const execFile = promisify(execFileCb);

 /**
@ -24,11 +26,16 @@ function assertPathWithinBoundary(filePath: string, boundary: string, label: str
 }

 /**
- * Compute SHA-256 hash of a file.
+ * Compute SHA-256 hash of a file by streaming its contents.
 */
 async function fileHash(filePath: string): Promise<string> {
-  const fileBuffer = await fs.readFile(filePath);
-  return crypto.createHash('sha256').update(fileBuffer).digest('hex');
+  return new Promise((resolve, reject) => {
+    const hash = crypto.createHash('sha256');
+    const stream = createReadStream(filePath);
+    stream.on('data', (chunk) => hash.update(chunk));
+    stream.on('end', () => resolve(hash.digest('hex')));
+    stream.on('error', reject);
+  });
 }

 /**
@ -52,7 +59,11 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
    throw new AppError(400, `Cannot backup instance in ${instance.status} state`, 'INVALID_STATE');
  }

-  if ((instance as { isRegistered?: boolean }).isRegistered) {
+  // `isRegistered` + `isRemote` = a remote CCP-managed instance (agent on the
+  // far side). `isRegistered` alone (without `isRemote`) would mean a local
+  // host-managed instance that CCP doesn't own the compose files for — that
+  // case we still can't back up.
+  if (instance.isRegistered && !instance.isRemote) {
    throw new AppError(400, 'Backups not managed by CCP for registered instances', 'NOT_MANAGED');
  }

@ -72,9 +83,31 @@ export async function createBackup(instanceId: string, userId?: string, ipAddres
  return backup;
 }

+type BackupInstance = {
+  id: string;
+  slug: string;
+  basePath: string;
+  composeProject: string;
+  encryptedSecrets: string | null;
+  isRemote: boolean;
+  agentUrl: string | null;
+};
+
 async function performBackup(
  backupId: string,
-  instance: { id: string; slug: string; basePath: string; composeProject: string; encryptedSecrets: string | null },
+  instance: BackupInstance,
+  userId?: string,
+  ipAddress?: string
+) {
+  if (instance.isRemote) {
+    return performRemoteBackup(backupId, instance, userId, ipAddress);
+  }
+  return performLocalBackup(backupId, instance, userId, ipAddress);
+}
+
+async function performLocalBackup(
+  backupId: string,
+  instance: BackupInstance,
  userId?: string,
  ipAddress?: string
 ) {
@ -221,6 +254,168 @@ async function performBackup(
  }
 }

+/**
+ * Run a backup on a remote agent and stream the resulting archive to CCP storage.
+ *
+ * Flow:
+ *   1. Tell agent to run scripts/backup.sh → { backupId, sizeBytes, sha256, manifest }
+ *   2. Stream archive from agent → $BACKUP_STORAGE_PATH/{slug}/backup-{slug}-{backupId}.tar.gz
+ *   3. Verify local SHA256 matches what the agent reported (defense in depth)
+ *   4. Tell agent to delete its local copy (reclaim remote disk)
+ *   5. Update Backup row as COMPLETED
+ *
+ * On failure at any step after the remote backup was created, we leave the
+ * agent-side archive in place so the operator can retry the download.
+ */
+async function performRemoteBackup(
+  backupId: string,
+  instance: BackupInstance,
+  userId?: string,
+  ipAddress?: string
+) {
+  let archivePath: string | null = null;
+  let agentBackupId: string | null = null;
+
+  try {
+    await prisma.backup.update({
+      where: { id: backupId },
+      data: { status: BackupStatus.IN_PROGRESS },
+    });
+
+    const driver = await getRemoteDriverForInstance({
+      id: instance.id,
+      slug: instance.slug,
+      isRemote: instance.isRemote,
+      agentUrl: instance.agentUrl,
+    });
+
+    // 1. Trigger the backup on the agent (this blocks until backup.sh completes)
+    logger.info(`[backup] ${instance.slug}: triggering remote backup via agent`);
+    const result = await driver.createBackup();
+    agentBackupId = result.backupId;
+
+    logger.info(
+      `[backup] ${instance.slug}: agent backup complete — ${result.filename} ` +
+      `(${(result.sizeBytes / 1024 / 1024).toFixed(1)} MB, sha256=${result.sha256.substring(0, 16)}...)`
+    );
+
+    // 2. Resolve the destination archive path on CCP storage
+    const archiveName = `backup-${instance.slug}-${result.backupId}.tar.gz`;
+    archivePath = path.join(env.BACKUP_STORAGE_PATH, instance.slug, archiveName);
+    // Path traversal guard (slug should be safe but better to assert)
+    assertPathWithinBoundary(archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
+    await fs.mkdir(path.dirname(archivePath), { recursive: true });
+
+    // 3. Stream the archive from the agent to CCP storage
+    logger.info(`[backup] ${instance.slug}: streaming archive to ${archivePath}`);
+    const { bytesWritten } = await driver.downloadBackup(result.backupId, archivePath);
+    if (bytesWritten !== result.sizeBytes) {
+      throw new Error(
+        `Downloaded size ${bytesWritten} does not match agent-reported size ${result.sizeBytes}`
+      );
+    }
+
+    // 4. Re-hash the downloaded file and compare to the agent-reported hash.
+    //
+    // SECURITY NOTE: this check authenticates *transmission integrity* only,
+    // not content integrity against a malicious agent. Both the file bytes
+    // and the expected hash are supplied by the (semi-trusted) agent, so a
+    // compromised agent can trivially make this check pass while delivering
+    // arbitrary content. The check still catches accidental corruption (bit
+    // flips, truncation) and is essentially free.
+    //
+    // The mTLS channel guarantees that the bytes weren't modified in transit
+    // by an outside attacker. The remaining trust gap — "what if the agent
+    // itself is compromised?" — must be addressed before Phase B (restore)
+    // ships, since restore feeds the archive into pg_restore. Either:
+    //   (a) HMAC-sign the hash on the agent with its mTLS private key and
+    //       verify on the CCP using the agent cert public key, or
+    //   (b) limit restore operations to require an additional out-of-band
+    //       admin confirmation step.
+    const localSha256 = await fileHash(archivePath);
+    if (localSha256 !== result.sha256) {
+      throw new Error(
+        `SHA256 mismatch: agent reported ${result.sha256}, local file hashed ${localSha256}`
+      );
+    }
+
+    // 5. Reclaim disk on the remote agent
+    try {
+      await driver.deleteBackup(result.backupId);
+    } catch (err) {
+      logger.warn(
+        `[backup] ${instance.slug}: failed to delete remote backup ${result.backupId}: ${(err as Error).message}`
+      );
+      // Non-fatal — CCP has the archive, remote copy will age out next retention sweep
+    }
+
+    // 6. Persist the result. Store sha256 and agentBackupId inside the manifest
+    // since we don't have dedicated columns.
+    const mergedManifest = {
+      ...(result.manifest as Record<string, unknown> | null ?? {}),
+      source: 'remote',
+      agentBackupId: result.backupId,
+      sha256: result.sha256,
+      createdAt: result.createdAt,
+    };
+
+    await prisma.backup.update({
+      where: { id: backupId },
+      data: {
+        status: BackupStatus.COMPLETED,
+        archivePath,
+        sizeBytes: BigInt(bytesWritten),
+        manifest: mergedManifest as unknown as Prisma.InputJsonValue,
+        completedAt: new Date(),
+      },
+    });
+
+    if (userId) {
+      await prisma.auditLog.create({
+        data: {
+          userId,
+          instanceId: instance.id,
+          action: AuditAction.BACKUP_CREATE,
+          details: {
+            backupId,
+            archiveName,
+            sizeBytes: bytesWritten,
+            source: 'remote',
+            agentBackupId: result.backupId,
+          },
+          ipAddress,
+        },
+      });
+    }
+
+    logger.info(
+      `[backup] ${instance.slug}: remote backup stored at ${archivePath} ` +
+      `(${(bytesWritten / 1024 / 1024).toFixed(1)} MB)`
+    );
+  } catch (err) {
+    await prisma.backup.update({
+      where: { id: backupId },
+      data: {
+        status: BackupStatus.FAILED,
+        errorMessage: (err as Error).message,
+        completedAt: new Date(),
+      },
+    });
+
+    // Clean up any partial local file; leave the remote copy so retry is possible
+    if (archivePath) {
+      try { await fs.unlink(archivePath); } catch { /* ignore */ }
+    }
+    if (agentBackupId) {
+      logger.warn(
+        `[backup] ${instance.slug}: leaving agent-side backup ${agentBackupId} in place for retry`
+      );
+    }
+
+    throw err;
+  }
+}
+
 /**
 * Delete a backup (file + DB record).
 */
--- a/changemaker-control-panel/api/src/services/ccp-pangolin.client.ts
+++ b/changemaker-control-panel/api/src/services/ccp-pangolin.client.ts
@ -0,0 +1,368 @@
+/**
+ * Pangolin Integration API client for the CCP.
+ *
+ * Ported from the main CML's pangolin.client.ts. Adapted to:
+ * - Accept credentials via constructor (not env singleton)
+ * - Be instantiable per-call so the CCP can use its own API token
+ *   to manage tunnels for multiple remote instances
+ *
+ * The CCP never exposes its Pangolin API key to remote instances — it
+ * only pushes the resulting Newt credentials via the agent's writeFiles.
+ */
+import { logger } from '../utils/logger';
+
+// ─── Types ─────────────────────────────────────────────────────────
+
+export interface PangolinSite {
+  siteId: string;
+  name: string;
+  orgId: string;
+  niceId: string;
+  pubKey?: string;
+  subnet?: string;
+  megabytesIn?: number;
+  megabytesOut?: number;
+  lastSeen?: string;
+  online?: boolean;
+  type?: string;
+  address?: string;
+}
+
+export interface PangolinExitNode {
+  exitNodeId: string;
+  name: string;
+  location?: string;
+  region?: string;
+  online: boolean;
+  capacity?: number;
+  latency?: number;
+}
+
+export interface PangolinResource {
+  resourceId: string;
+  name: string;
+  subdomain?: string;
+  fullDomain?: string;
+  ssl?: boolean;
+  blockAccess?: boolean;
+  active?: boolean;
+  proxyPort?: number;
+  protocol?: string;
+  domainBindings?: string[];
+  http?: boolean;
+  targets?: PangolinTarget[];
+}
+
+export interface PangolinTarget {
+  targetId: string;
+  resourceId: string;
+  siteId: string;
+  ip: string;
+  port: number;
+  method: string;
+  enabled?: boolean;
+}
+
+export interface PangolinNewt {
+  newtId: string;
+  secret: string;
+  siteId: string;
+}
+
+export interface PangolinSiteDefaults {
+  newtId: string;
+  newtSecret: string;
+  address: string;
+}
+
+export interface CreateSitePayload {
+  name: string;
+  type?: string;
+  subnet?: string;
+  exitNodeId?: string;
+  newtId?: string;
+  secret?: string;
+  address?: string;
+}
+
+export interface CreateHttpResourcePayload {
+  name: string;
+  domainId: string;
+  subdomain?: string;
+  http: true;
+  protocol: 'tcp';
+}
+
+export interface CreateTargetPayload {
+  siteId: string | number;
+  ip: string;
+  port: number;
+  method: 'http' | 'https';
+  enabled?: boolean;
+}
+
+export interface PangolinDomain {
+  domainId: string;
+  baseDomain: string;
+  verified: boolean;
+  type?: string;
+  failed?: boolean;
+  configManaged?: boolean;
+}
+
+export interface UpdateResourcePayload {
+  name?: string;
+  subdomain?: string;
+  fullDomain?: string;
+  ssl?: boolean;
+  sso?: boolean;
+  active?: boolean;
+  blockAccess?: boolean;
+  proxyPort?: number;
+  protocol?: string;
+  domainBindings?: string[];
+}
+
+export interface UpdateCertificatePayload {
+  autoRenew?: boolean;
+}
+
+export interface PangolinCertificate {
+  certId: string;
+  domainId: string;
+  domain: string;
+  status: 'PENDING' | 'ACTIVE' | 'EXPIRED' | 'FAILED';
+  issuedAt?: string;
+  expiresAt?: string;
+  autoRenew?: boolean;
+  issuer?: string;
+}
+
+export interface PangolinConnectedClient {
+  clientId: string;
+  resourceId: string;
+  ipAddress: string;
+  connectedAt: string;
+  lastSeen: string;
+  bytesIn: number;
+  bytesOut: number;
+  online: boolean;
+}
+
+// ─── Helpers ───────────────────────────────────────────────────────
+
+/** Redact credential fields from Pangolin API request bodies before logging. */
+function redactSecrets(body: unknown): unknown {
+  if (!body || typeof body !== 'object') return body;
+  const obj = body as Record<string, unknown>;
+  const redacted = { ...obj };
+  if ('secret' in redacted) redacted.secret = '[REDACTED]';
+  if ('newtSecret' in redacted) redacted.newtSecret = '[REDACTED]';
+  return redacted;
+}
+
+// ─── Client ────────────────────────────────────────────────────────
+
+export class CcpPangolinClient {
+  constructor(
+    private baseUrl: string,
+    private apiKey: string,
+    private orgId: string
+  ) {}
+
+  get configured(): boolean {
+    return !!(this.baseUrl && this.apiKey && this.orgId);
+  }
+
+  private async request<T>(method: string, path: string, body?: unknown): Promise<T> {
+    if (!this.configured) {
+      throw new Error('Pangolin API not configured. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in CCP .env');
+    }
+
+    const url = `${this.baseUrl}${path}`;
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 15000);
+
+    try {
+      logger.debug(`[pangolin] ${method} ${path}${body ? ` body=${JSON.stringify(redactSecrets(body))}` : ''}`);
+
+      const res = await fetch(url, {
+        method,
+        headers: {
+          'Authorization': `Bearer ${this.apiKey}`,
+          'Content-Type': 'application/json',
+        },
+        body: body ? JSON.stringify(body) : undefined,
+        signal: controller.signal,
+      });
+
+      if (!res.ok) {
+        const text = await res.text().catch(() => '');
+        throw new Error(`Pangolin API ${method} ${path} returned ${res.status}: ${text}`);
+      }
+
+      const contentType = res.headers.get('content-type') || '';
+      if (contentType.includes('application/json')) {
+        const json = await res.json();
+        return this.unwrapResponse<T>(json);
+      }
+      return {} as T;
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  private unwrapResponse<T>(json: unknown): T {
+    if (json && typeof json === 'object' && !Array.isArray(json)) {
+      const obj = json as Record<string, unknown>;
+      if ('data' in obj && 'success' in obj) {
+        return obj.data as T;
+      }
+    }
+    return json as T;
+  }
+
+  // ─── Health ───────────────────────────────────────────────────
+
+  async healthCheck(): Promise<boolean> {
+    try {
+      const controller = new AbortController();
+      const timeout = setTimeout(() => controller.abort(), 5000);
+      try {
+        const res = await fetch(`${this.baseUrl}/`, {
+          headers: { 'Authorization': `Bearer ${this.apiKey}` },
+          signal: controller.signal,
+        });
+        return res.ok;
+      } finally {
+        clearTimeout(timeout);
+      }
+    } catch {
+      return false;
+    }
+  }
+
+  // ─── Site Defaults ────────────────────────────────────────────
+
+  async pickSiteDefaults(): Promise<PangolinSiteDefaults> {
+    const res = await this.request<unknown>('GET', `/org/${this.orgId}/pick-site-defaults`);
+    const obj = res as Record<string, unknown>;
+    const newtId = obj.newtId as string || '';
+    const newtSecret = obj.newtSecret as string || obj.secret as string || '';
+    const address = obj.clientAddress as string || obj.address as string || '';
+    if (!newtId || !newtSecret) {
+      throw new Error('Pangolin did not return Newt credentials from pick-site-defaults');
+    }
+    return { newtId, newtSecret, address };
+  }
+
+  // ─── Sites ────────────────────────────────────────────────────
+
+  async listSites(): Promise<PangolinSite[]> {
+    const res = await this.request<unknown>('GET', `/org/${this.orgId}/sites`);
+    return this.extractArray(res, 'sites', 'listSites');
+  }
+
+  async getSite(siteId: string): Promise<PangolinSite> {
+    return this.request<PangolinSite>('GET', `/site/${siteId}`);
+  }
+
+  async createSite(data: CreateSitePayload): Promise<PangolinSite & { newt?: PangolinNewt }> {
+    return this.request<PangolinSite & { newt?: PangolinNewt }>('PUT', `/org/${this.orgId}/site`, data);
+  }
+
+  async deleteSite(siteId: string): Promise<void> {
+    await this.request<void>('DELETE', `/site/${siteId}`);
+  }
+
+  async listExitNodes(): Promise<PangolinExitNode[]> {
+    try {
+      const res = await this.request<unknown>('GET', `/org/${this.orgId}/exit-nodes`);
+      return this.extractArray(res, 'exitNodes', 'listExitNodes');
+    } catch {
+      return [];
+    }
+  }
+
+  // ─── Resources ────────────────────────────────────────────────
+
+  async listResources(): Promise<PangolinResource[]> {
+    const res = await this.request<unknown>('GET', `/org/${this.orgId}/resources`);
+    return this.extractArray(res, 'resources', 'listResources');
+  }
+
+  async getResource(resourceId: string): Promise<PangolinResource> {
+    return this.request<PangolinResource>('GET', `/resource/${resourceId}`);
+  }
+
+  async createResource(data: CreateHttpResourcePayload): Promise<PangolinResource> {
+    logger.info(`[pangolin] createResource: ${data.name} (subdomain: ${data.subdomain || '(root)'})`);
+    return this.request<PangolinResource>('PUT', `/org/${this.orgId}/resource`, data);
+  }
+
+  async updateResource(resourceId: string, data: UpdateResourcePayload): Promise<PangolinResource> {
+    return this.request<PangolinResource>('POST', `/resource/${resourceId}`, data);
+  }
+
+  async deleteResource(resourceId: string): Promise<void> {
+    await this.request<void>('DELETE', `/resource/${resourceId}`);
+  }
+
+  // ─── Targets ──────────────────────────────────────────────────
+
+  async createTarget(resourceId: string, data: CreateTargetPayload): Promise<PangolinTarget> {
+    logger.info(`[pangolin] createTarget: resource=${resourceId}, ip=${data.ip}:${data.port}`);
+    const payload = { ...data, siteId: Number(data.siteId) };
+    return this.request<PangolinTarget>('PUT', `/resource/${resourceId}/target`, payload);
+  }
+
+  async listTargets(resourceId: string): Promise<PangolinTarget[]> {
+    const res = await this.request<unknown>('GET', `/resource/${resourceId}/targets`);
+    return this.extractArray(res, 'targets', 'listTargets');
+  }
+
+  async deleteTarget(targetId: string): Promise<void> {
+    await this.request<void>('DELETE', `/target/${targetId}`);
+  }
+
+  // ─── Domains ──────────────────────────────────────────────────
+
+  async listDomains(): Promise<PangolinDomain[]> {
+    const res = await this.request<unknown>('GET', `/org/${this.orgId}/domains`);
+    return this.extractArray(res, 'domains', 'listDomains');
+  }
+
+  // ─── Certificates ─────────────────────────────────────────────
+
+  async getCertificate(domainId: string, domain: string): Promise<PangolinCertificate> {
+    return this.request<PangolinCertificate>('GET', `/org/${this.orgId}/certificate/${domainId}/${domain}`);
+  }
+
+  async updateCertificate(certId: string, data: UpdateCertificatePayload): Promise<PangolinCertificate> {
+    return this.request<PangolinCertificate>('POST', `/certificate/${certId}`, data);
+  }
+
+  // ─── Clients ──────────────────────────────────────────────────
+
+  async listClients(resourceId: string): Promise<PangolinConnectedClient[]> {
+    const res = await this.request<unknown>('GET', `/resource/${resourceId}/clients`);
+    return this.extractArray(res, 'clients', 'listClients');
+  }
+
+  // ─── Helpers ──────────────────────────────────────────────────
+
+  private extractArray<T>(res: unknown, key: string, context: string): T[] {
+    if (Array.isArray(res)) return res as T[];
+    if (res && typeof res === 'object') {
+      const obj = res as Record<string, unknown>;
+      if (Array.isArray(obj[key])) return obj[key] as T[];
+      if (obj.data && typeof obj.data === 'object') {
+        const dataObj = obj.data as Record<string, unknown>;
+        if (Array.isArray(dataObj[key])) return dataObj[key] as T[];
+      }
+      if (Array.isArray(obj.data)) return obj.data as T[];
+    }
+    logger.warn(`[pangolin] ${context}: could not extract array from response`);
+    return [];
+  }
+}
--- a/changemaker-control-panel/api/src/services/certificate.service.ts
+++ b/changemaker-control-panel/api/src/services/certificate.service.ts
@ -90,7 +90,7 @@ export async function ensureCA() {
 * Issue a certificate for a remote agent, signed by the CA.
 * Returns the certificate materials (plaintext) for one-time display.
 */
-export async function issueAgentCert(instanceId: string, slug: string) {
+export async function issueAgentCert(instanceId: string, slug: string, agentUrl?: string) {
  const ca = await ensureCA();
  const caKeyPem = decrypt(ca.encryptedKey);

@ -110,12 +110,29 @@ export async function issueAgentCert(instanceId: string, slug: string) {
    await fs.writeFile(caCertFile, ca.certPem);
    await fs.writeFile(serialFile, crypto.randomBytes(16).toString('hex'));

-    // Extensions for server+client auth
-    await fs.writeFile(extFile, [
+    // Build SAN entries from the agent URL hostname
+    const sanEntries: string[] = [];
+    if (agentUrl) {
+      try {
+        const hostname = new URL(agentUrl).hostname;
+        // Detect IP vs DNS name
+        if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname) || hostname.includes(':')) {
+          sanEntries.push(`IP:${hostname}`);
+        } else {
+          sanEntries.push(`DNS:${hostname}`);
+        }
+      } catch { /* ignore invalid URL */ }
+    }
+    sanEntries.push(`DNS:${commonName}`);
+
+    // Extensions for server+client auth with SANs
+    const extLines = [
      'basicConstraints=CA:FALSE',
      'keyUsage=digitalSignature,keyEncipherment',
      'extendedKeyUsage=serverAuth,clientAuth',
-    ].join('\n'));
+      `subjectAltName=${sanEntries.join(',')}`,
+    ];
+    await fs.writeFile(extFile, extLines.join('\n'));

    // Generate agent key
    await exec(
--- a/changemaker-control-panel/api/src/services/execution-driver.ts
+++ b/changemaker-control-panel/api/src/services/execution-driver.ts
@ -60,7 +60,20 @@ export async function getDriverForInstance(instance: DriverInstance): Promise<Ex
    const { getLocalDriver } = await import('./local-driver');
    return getLocalDriver();
  }
+  return getRemoteDriverForInstance(instance);
+}

+/**
+ * Resolve a RemoteDriver for a remote instance. Throws if the instance is
+ * local, missing an agent URL, or has no valid mTLS certificate.
+ *
+ * Use this when you need to call RemoteDriver-specific methods like
+ * createBackup() that don't exist on the ExecutionDriver interface.
+ */
+export async function getRemoteDriverForInstance(instance: DriverInstance) {
+  if (!instance.isRemote) {
+    throw new Error(`Instance ${instance.slug} is not remote`);
+  }
  if (!instance.agentUrl) {
    throw new Error(`Remote instance ${instance.slug} has no agent URL configured`);
  }
--- a/changemaker-control-panel/api/src/services/remote-driver.ts
+++ b/changemaker-control-panel/api/src/services/remote-driver.ts
@ -1,10 +1,87 @@
 import https from 'https';
+import fs from 'fs';
+import { pipeline } from 'stream/promises';
 import { env } from '../config/env';
 import type { ExecutionDriver } from './execution-driver';
 import { AgentUnreachableError } from './execution-driver';
 import type { ContainerInfo } from './docker.service';
 import { logger } from '../utils/logger';

+export interface AgentBackupResult {
+  backupId: string;
+  filename: string;
+  sizeBytes: number;
+  sha256: string;
+  manifest: unknown | null;
+  createdAt: string;
+}
+
+export interface AgentBackupListEntry {
+  backupId: string;
+  filename: string;
+  sizeBytes: number;
+  createdAt: string;
+}
+
+export interface AgentRestoreUploadResult {
+  uploadId: string;
+  sizeBytes: number;
+  sha256: string;
+}
+
+export interface AgentRestoreOptions {
+  skipDb?: boolean;
+  skipUploads?: boolean;
+  skipListmonk?: boolean;
+  dryRun?: boolean;
+}
+
+export interface AgentRestoreState {
+  status: 'UPLOADED' | 'RUNNING' | 'COMPLETED' | 'FAILED';
+  uploadId: string;
+  startedAt: string;
+  completedAt?: string;
+  exitCode?: number;
+  logTail?: string;
+  errorMessage?: string;
+  options?: AgentRestoreOptions;
+}
+
+export interface AgentUpdateStatus {
+  branch: string;
+  currentCommit: string;
+  currentMessage?: string;
+  remoteCommit: string | null;
+  commitsBehind: number;
+  changelog: Array<{ hash: string; message: string; date: string; author: string }>;
+  checkedAt: string;
+  error: string | null;
+}
+
+export interface AgentUpgradeProgress {
+  phase?: number;
+  phaseName?: string;
+  percentage?: number;
+  message?: string;
+  timestamp?: string;
+}
+
+export interface AgentUpgradeResult {
+  success: boolean;
+  message?: string;
+  previousCommit?: string;
+  newCommit?: string;
+  commitCount?: number;
+  durationSeconds?: number;
+  warnings?: string[];
+}
+
+export interface StartAgentUpgradeOptions {
+  skipBackup?: boolean;
+  useRegistry?: boolean;
+  branch?: string;
+}
+
 interface AgentRequestOptions {
  method: 'GET' | 'POST' | 'DELETE';
  path: string;
@ -261,4 +338,261 @@ export class RemoteDriver implements ExecutionDriver {
      timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
    });
  }
+
+  // ─── Backup Operations ──────────────────────────────────────
+
+  /**
+   * Trigger a backup on the remote agent. The agent shells out to scripts/backup.sh
+   * and returns metadata for the resulting archive. The archive stays on the
+   * agent's disk until downloadBackup() + deleteBackup() are called.
+   */
+  async createBackup(): Promise<AgentBackupResult> {
+    return this.request<AgentBackupResult>({
+      method: 'POST',
+      path: `/instance/${this.slug}/backup`,
+      timeoutMs: env.AGENT_LONG_OP_TIMEOUT_MS,
+    });
+  }
+
+  /**
+   * List backup archives currently held on the agent for this slug.
+   */
+  async listAgentBackups(): Promise<AgentBackupListEntry[]> {
+    const resp = await this.request<{ data: AgentBackupListEntry[] }>({
+      method: 'GET',
+      path: `/instance/${this.slug}/backups`,
+    });
+    return resp.data;
+  }
+
+  /**
+   * Delete an archive from the agent's disk. Called after a successful download.
+   */
+  async deleteBackup(backupId: string): Promise<void> {
+    await this.request({
+      method: 'DELETE',
+      path: `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}`,
+    });
+  }
+
+  /**
+   * Stream a backup archive from the agent to a local file path.
+   * Verifies the Content-Length header matches the bytes written.
+   */
+  async downloadBackup(backupId: string, destPath: string): Promise<{ bytesWritten: number }> {
+    const url = new URL(
+      `/instance/${this.slug}/backup/${encodeURIComponent(backupId)}/download`,
+      this.agentUrl
+    );
+    const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
+
+    return new Promise((resolve, reject) => {
+      const req = https.request(
+        {
+          hostname: url.hostname,
+          port: url.port || 7443,
+          path: url.pathname + url.search,
+          method: 'GET',
+          headers: { Accept: 'application/gzip' },
+          cert: this.clientCert,
+          key: this.clientKey,
+          ca: this.caCert,
+          rejectUnauthorized: true,
+          timeout: timeoutMs,
+        },
+        async (res) => {
+          if (res.statusCode && res.statusCode >= 400) {
+            let body = '';
+            res.on('data', (c) => (body += c));
+            res.on('end', () => reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`)));
+            return;
+          }
+
+          const expectedSize = res.headers['content-length']
+            ? parseInt(res.headers['content-length'] as string, 10)
+            : null;
+
+          try {
+            const out = fs.createWriteStream(destPath);
+            await pipeline(res, out);
+            const stats = await fs.promises.stat(destPath);
+            if (expectedSize !== null && stats.size !== expectedSize) {
+              reject(new Error(`Downloaded size ${stats.size} does not match Content-Length ${expectedSize}`));
+              return;
+            }
+            resolve({ bytesWritten: stats.size });
+          } catch (err) {
+            reject(err);
+          }
+        }
+      );
+
+      req.on('error', (err) => {
+        reject(new AgentUnreachableError(this.agentUrl, err));
+      });
+      req.on('timeout', () => {
+        req.destroy();
+        reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
+      });
+
+      req.end();
+    });
+  }
+
+  // ─── Restore Operations ─────────────────────────────────────
+
+  /**
+   * Stream a backup archive from a local path to the agent's upload endpoint.
+   * The expected SHA256 is passed as a query parameter and the agent verifies
+   * it during ingestion — if it mismatches, the upload is rejected with 400.
+   */
+  async uploadRestore(
+    archivePath: string,
+    expectedSha256: string
+  ): Promise<AgentRestoreUploadResult> {
+    const stats = await fs.promises.stat(archivePath);
+    const url = new URL(
+      `/instance/${this.slug}/restore/upload?sha256=${encodeURIComponent(expectedSha256)}`,
+      this.agentUrl
+    );
+    const timeoutMs = env.AGENT_LONG_OP_TIMEOUT_MS;
+
+    return new Promise((resolve, reject) => {
+      const req = https.request(
+        {
+          hostname: url.hostname,
+          port: url.port || 7443,
+          path: url.pathname + url.search,
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/octet-stream',
+            'Content-Length': String(stats.size),
+          },
+          cert: this.clientCert,
+          key: this.clientKey,
+          ca: this.caCert,
+          rejectUnauthorized: true,
+          timeout: timeoutMs,
+        },
+        (res) => {
+          let body = '';
+          res.on('data', (c) => (body += c));
+          res.on('end', () => {
+            if (res.statusCode && res.statusCode >= 400) {
+              try {
+                const err = JSON.parse(body);
+                reject(new Error(err.message || `Agent returned ${res.statusCode}`));
+              } catch {
+                reject(new Error(`Agent returned ${res.statusCode}: ${body.substring(0, 500)}`));
+              }
+              return;
+            }
+            try {
+              resolve(JSON.parse(body) as AgentRestoreUploadResult);
+            } catch (err) {
+              reject(err);
+            }
+          });
+        }
+      );
+
+      req.on('error', (err) => {
+        reject(new AgentUnreachableError(this.agentUrl, err));
+      });
+      req.on('timeout', () => {
+        req.destroy();
+        reject(new AgentUnreachableError(this.agentUrl, new Error(`Timed out after ${timeoutMs}ms`)));
+      });
+
+      const fileStream = fs.createReadStream(archivePath);
+      fileStream.on('error', (err) => {
+        req.destroy();
+        reject(err);
+      });
+      fileStream.pipe(req);
+    });
+  }
+
+  /**
+   * Tell the agent to apply a previously-uploaded restore archive. The agent
+   * fires `scripts/restore.sh` in the background and returns immediately.
+   * Use `getRestoreProgress()` to poll for completion.
+   */
+  async applyRestore(uploadId: string, options: AgentRestoreOptions = {}): Promise<void> {
+    await this.request({
+      method: 'POST',
+      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/apply`,
+      body: { confirm: true, ...options },
+    });
+  }
+
+  /**
+   * Poll the agent for the current state of a restore.
+   */
+  async getRestoreProgress(uploadId: string): Promise<AgentRestoreState> {
+    return this.request<AgentRestoreState>({
+      method: 'GET',
+      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}/progress`,
+    });
+  }
+
+  /**
+   * Delete a restore upload dir from the agent's disk. Called after the CCP
+   * has finalized the InstanceRestore row.
+   */
+  async deleteRestoreUpload(uploadId: string): Promise<void> {
+    await this.request({
+      method: 'DELETE',
+      path: `/instance/${this.slug}/restore/${encodeURIComponent(uploadId)}`,
+    });
+  }
+
+  // ─── Upgrade Operations ─────────────────────────────────────
+
+  /**
+   * Run upgrade-check.sh on the remote and return the parsed status.json.
+   */
+  async checkForUpdates(): Promise<AgentUpdateStatus> {
+    return this.request<AgentUpdateStatus>({
+      method: 'POST',
+      path: `/instance/${this.slug}/upgrade/check`,
+      timeoutMs: 90_000,
+    });
+  }
+
+  /**
+   * Trigger upgrade.sh --api-mode on the remote. Fire-and-forget; agent
+   * spawns the script in the background and returns 202 immediately.
+   * Use getUpgradeProgress / getUpgradeResult to track completion.
+   */
+  async startUpgrade(options: StartAgentUpgradeOptions = {}): Promise<void> {
+    await this.request({
+      method: 'POST',
+      path: `/instance/${this.slug}/upgrade/start`,
+      body: options,
+      timeoutMs: 30_000,
+    });
+  }
+
+  /**
+   * Read the agent's data/upgrade/progress.json. Returns the default zero-state
+   * if no progress has been written yet.
+   */
+  async getUpgradeProgress(): Promise<AgentUpgradeProgress> {
+    return this.request<AgentUpgradeProgress>({
+      method: 'GET',
+      path: `/instance/${this.slug}/upgrade/progress`,
+    });
+  }
+
+  /**
+   * Read the agent's data/upgrade/result.json. Throws if no result is yet
+   * available; the caller should treat that as "still running".
+   */
+  async getUpgradeResult(): Promise<AgentUpgradeResult> {
+    return this.request<AgentUpgradeResult>({
+      method: 'GET',
+      path: `/instance/${this.slug}/upgrade/result`,
+    });
+  }
 }
--- a/changemaker-control-panel/api/src/services/restore.service.ts
+++ b/changemaker-control-panel/api/src/services/restore.service.ts
@ -0,0 +1,376 @@
+import fs from 'fs/promises';
+import path from 'path';
+import crypto from 'crypto';
+import { createReadStream } from 'fs';
+import { Prisma, RestoreStatus, AuditAction, InstanceStatus } from '@prisma/client';
+import { prisma } from '../lib/prisma';
+import { env } from '../config/env';
+import { AppError } from '../middleware/error-handler';
+import { logger } from '../utils/logger';
+import { getRemoteDriverForInstance } from './execution-driver';
+import type { AgentRestoreOptions, AgentRestoreState } from './remote-driver';
+
+/**
+ * Validate that a path is within the allowed backup storage boundary.
+ */
+function assertPathWithinBoundary(filePath: string, boundary: string, label: string): void {
+  const normalized = path.resolve(filePath);
+  const normalizedBoundary = path.resolve(boundary);
+  if (!normalized.startsWith(normalizedBoundary + path.sep)) {
+    throw new AppError(403, `${label} path outside allowed directory`, 'FORBIDDEN');
+  }
+}
+
+/**
+ * Compute SHA-256 hash of a file by streaming its contents.
+ */
+async function fileHash(filePath: string): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const hash = crypto.createHash('sha256');
+    const stream = createReadStream(filePath);
+    stream.on('data', (chunk) => hash.update(chunk));
+    stream.on('end', () => resolve(hash.digest('hex')));
+    stream.on('error', reject);
+  });
+}
+
+const POLL_INTERVAL_MS = 3_000;
+const POLL_TIMEOUT_MS = 15 * 60 * 1_000; // 15 min
+
+interface StartRestoreArgs {
+  backupId: string;
+  triggeredById?: string;
+  ipAddress?: string | null;
+  options?: AgentRestoreOptions;
+}
+
+/**
+ * Kick off a restore for the given backup. Creates an InstanceRestore row
+ * and runs the full upload → apply → poll loop asynchronously. Returns the
+ * row so the caller (HTTP handler) can respond immediately.
+ */
+export async function createRestore(args: StartRestoreArgs) {
+  const backup = await prisma.backup.findUnique({
+    where: { id: args.backupId },
+    include: { instance: true },
+  });
+  if (!backup) {
+    throw new AppError(404, 'Backup not found', 'NOT_FOUND');
+  }
+  if (backup.status !== 'COMPLETED') {
+    throw new AppError(400, `Backup is ${backup.status}, not COMPLETED`, 'INVALID_STATE');
+  }
+  if (!backup.archivePath) {
+    throw new AppError(400, 'Backup has no archive path', 'NO_ARCHIVE');
+  }
+
+  const instance = backup.instance;
+  if (instance.status !== InstanceStatus.RUNNING) {
+    throw new AppError(400, `Cannot restore to instance in ${instance.status} state`, 'INVALID_STATE');
+  }
+  // Phase B only supports remote restore. Local restore is deliberately stubbed
+  // — if you need it, add a performLocalRestore branch below. This also covers
+  // the registered-but-local case (CCP-adopted instances) since they have
+  // isRemote=false.
+  if (!instance.isRemote) {
+    throw new AppError(501, 'Local restore is not implemented — Phase B covers remote only', 'NOT_IMPLEMENTED');
+  }
+
+  // Make sure the archive is where it says it is and inside the boundary
+  assertPathWithinBoundary(backup.archivePath, env.BACKUP_STORAGE_PATH, 'Backup archive');
+  try {
+    await fs.access(backup.archivePath);
+  } catch {
+    throw new AppError(404, 'Archive file is missing on disk', 'ARCHIVE_MISSING');
+  }
+
+  const restore = await prisma.instanceRestore.create({
+    data: {
+      instanceId: instance.id,
+      backupId: backup.id,
+      status: RestoreStatus.PENDING,
+      triggeredById: args.triggeredById ?? null,
+    },
+  });
+
+  // Fire-and-forget orchestration
+  performRemoteRestore(restore.id, backup.archivePath, args.options ?? {}, args.triggeredById, args.ipAddress ?? null)
+    .catch((err) => {
+      logger.error(`[restore] ${restore.id} failed: ${(err as Error).message}`);
+    });
+
+  return restore;
+}
+
+/**
+ * End-to-end remote restore orchestration.
+ *
+ * Flow:
+ *   1. Compute sha256 of the archive on CCP disk
+ *   2. Upload to agent with sha256 query param (agent re-verifies on stream)
+ *   3. Apply via agent (shells out to restore.sh --force)
+ *   4. Poll progress every 3s until COMPLETED/FAILED or timeout
+ *   5. Delete the agent-side upload
+ *   6. Update the InstanceRestore row + audit log
+ */
+/**
+ * Write a BACKUP_RESTORE audit log entry. Wrapped in a try/catch so that an
+ * audit-log DB failure can never mask the underlying restore status update.
+ *
+ * Called in all three terminal paths:
+ *   - success (outcome: 'success')
+ *   - agent reported failure (outcome: 'agent_failed')
+ *   - orchestration error / timeout / unexpected throw (outcome: 'orchestration_error')
+ */
+async function writeRestoreAuditLog(args: {
+  restoreId: string;
+  instanceId: string;
+  backupId: string;
+  triggeredById?: string;
+  ipAddress?: string | null;
+  options: AgentRestoreOptions;
+  outcome: 'success' | 'agent_failed' | 'orchestration_error';
+  sha256?: string;
+  uploadId?: string | null;
+  errorMessage?: string;
+}): Promise<void> {
+  if (!args.triggeredById) return;
+  try {
+    await prisma.auditLog.create({
+      data: {
+        userId: args.triggeredById,
+        instanceId: args.instanceId,
+        action: AuditAction.BACKUP_RESTORE,
+        details: {
+          backupId: args.backupId,
+          restoreId: args.restoreId,
+          source: 'remote',
+          outcome: args.outcome,
+          options: args.options as unknown as Prisma.InputJsonValue,
+          ...(args.sha256 ? { sha256: args.sha256 } : {}),
+          ...(args.uploadId ? { agentUploadId: args.uploadId } : {}),
+          ...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
+        },
+        ipAddress: args.ipAddress ?? null,
+      },
+    });
+  } catch (err) {
+    logger.error(`[restore] failed to write audit log for ${args.restoreId}: ${(err as Error).message}`);
+  }
+}
+
+async function performRemoteRestore(
+  restoreId: string,
+  archivePath: string,
+  options: AgentRestoreOptions,
+  triggeredById?: string,
+  ipAddress?: string | null
+) {
+  const restore = await prisma.instanceRestore.findUnique({
+    where: { id: restoreId },
+    include: { instance: true, backup: true },
+  });
+  if (!restore) {
+    logger.error(`[restore] row ${restoreId} vanished mid-flight`);
+    return;
+  }
+  const instance = restore.instance;
+
+  let uploadId: string | null = null;
+  let sha256: string | undefined;
+  try {
+    await prisma.instanceRestore.update({
+      where: { id: restoreId },
+      data: { status: RestoreStatus.UPLOADING },
+    });
+
+    const driver = await getRemoteDriverForInstance({
+      id: instance.id,
+      slug: instance.slug,
+      isRemote: instance.isRemote,
+      agentUrl: instance.agentUrl,
+    });
+
+    // 1. Compute local SHA256 (authoritative — the agent will verify against this).
+    // We persist this in the audit log so there's an immutable record of exactly
+    // which bytes were restored, useful for post-incident comparison.
+    logger.info(`[restore] ${instance.slug}: hashing archive ${path.basename(archivePath)}`);
+    sha256 = await fileHash(archivePath);
+
+    // 2. Stream upload to agent
+    logger.info(`[restore] ${instance.slug}: uploading archive (sha256=${sha256.substring(0, 16)}...)`);
+    const uploadResult = await driver.uploadRestore(archivePath, sha256);
+    uploadId = uploadResult.uploadId;
+
+    await prisma.instanceRestore.update({
+      where: { id: restoreId },
+      data: { uploadId, status: RestoreStatus.RUNNING },
+    });
+
+    // 3. Apply
+    logger.info(`[restore] ${instance.slug}: applying restore ${uploadId}`);
+    await driver.applyRestore(uploadId, options);
+
+    // 4. Poll progress
+    const deadline = Date.now() + POLL_TIMEOUT_MS;
+    let finalState: AgentRestoreState | null = null;
+    while (Date.now() < deadline) {
+      await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
+      try {
+        const state = await driver.getRestoreProgress(uploadId);
+        // Mirror progress to the DB row so the UI shows updates
+        await prisma.instanceRestore.update({
+          where: { id: restoreId },
+          data: {
+            progressJson: state as unknown as Prisma.InputJsonValue,
+            logTail: state.logTail ?? null,
+          },
+        });
+        if (state.status === 'COMPLETED' || state.status === 'FAILED') {
+          finalState = state;
+          break;
+        }
+      } catch (err) {
+        logger.warn(`[restore] ${instance.slug}: poll error: ${(err as Error).message}`);
+        // Keep polling — transient network blips shouldn't fail the restore
+      }
+    }
+
+    if (!finalState) {
+      throw new Error(`Restore timed out after ${Math.round(POLL_TIMEOUT_MS / 1000)}s`);
+    }
+
+    // 5. Clean up agent-side upload (best effort)
+    try {
+      await driver.deleteRestoreUpload(uploadId);
+    } catch (err) {
+      logger.warn(`[restore] ${instance.slug}: failed to delete agent upload ${uploadId}: ${(err as Error).message}`);
+    }
+
+    // 6. Finalize DB row
+    if (finalState.status === 'COMPLETED') {
+      await prisma.instanceRestore.update({
+        where: { id: restoreId },
+        data: {
+          status: RestoreStatus.COMPLETED,
+          progressJson: finalState as unknown as Prisma.InputJsonValue,
+          logTail: finalState.logTail ?? null,
+          completedAt: new Date(),
+        },
+      });
+
+      await writeRestoreAuditLog({
+        restoreId,
+        instanceId: instance.id,
+        backupId: restore.backupId,
+        triggeredById,
+        ipAddress,
+        options,
+        outcome: 'success',
+        sha256,
+        uploadId,
+      });
+
+      logger.info(`[restore] ${instance.slug}: restore ${restoreId} COMPLETED`);
+    } else {
+      const errMsg = finalState.errorMessage || `Agent reported FAILED (exit ${finalState.exitCode})`;
+      await prisma.instanceRestore.update({
+        where: { id: restoreId },
+        data: {
+          status: RestoreStatus.FAILED,
+          progressJson: finalState as unknown as Prisma.InputJsonValue,
+          logTail: finalState.logTail ?? null,
+          errorMessage: errMsg,
+          completedAt: new Date(),
+        },
+      });
+      await writeRestoreAuditLog({
+        restoreId,
+        instanceId: instance.id,
+        backupId: restore.backupId,
+        triggeredById,
+        ipAddress,
+        options,
+        outcome: 'agent_failed',
+        sha256,
+        uploadId,
+        errorMessage: errMsg,
+      });
+      logger.warn(`[restore] ${instance.slug}: restore ${restoreId} FAILED (exit ${finalState.exitCode})`);
+    }
+  } catch (err) {
+    const errMsg = (err as Error).message;
+    await prisma.instanceRestore.update({
+      where: { id: restoreId },
+      data: {
+        status: RestoreStatus.FAILED,
+        errorMessage: errMsg,
+        completedAt: new Date(),
+      },
+    });
+    await writeRestoreAuditLog({
+      restoreId,
+      instanceId: instance.id,
+      backupId: restore.backupId,
+      triggeredById,
+      ipAddress,
+      options,
+      outcome: 'orchestration_error',
+      sha256,
+      uploadId,
+      errorMessage: errMsg,
+    });
+    logger.error(`[restore] ${restore.instance.slug}: ${errMsg}`);
+
+    // Best-effort cleanup of the agent upload if we got that far
+    if (uploadId) {
+      try {
+        const driver = await getRemoteDriverForInstance({
+          id: instance.id,
+          slug: instance.slug,
+          isRemote: instance.isRemote,
+          agentUrl: instance.agentUrl,
+        });
+        await driver.deleteRestoreUpload(uploadId);
+      } catch { /* ignore */ }
+    }
+  }
+}
+
+/**
+ * List restores with optional filtering and pagination.
+ */
+export async function listRestores(instanceId?: string, page = 1, limit = 50) {
+  const where = instanceId ? { instanceId } : {};
+  const [data, total] = await Promise.all([
+    prisma.instanceRestore.findMany({
+      where,
+      orderBy: { startedAt: 'desc' },
+      skip: (page - 1) * limit,
+      take: limit,
+      include: {
+        instance: { select: { id: true, name: true, slug: true } },
+        backup: { select: { id: true, archivePath: true, sizeBytes: true } },
+      },
+    }),
+    prisma.instanceRestore.count({ where }),
+  ]);
+  return { data, total, page, limit };
+}
+
+/**
+ * Get a single restore by ID.
+ */
+export async function getRestore(restoreId: string) {
+  const restore = await prisma.instanceRestore.findUnique({
+    where: { id: restoreId },
+    include: {
+      instance: { select: { id: true, name: true, slug: true } },
+      backup: { select: { id: true, archivePath: true, sizeBytes: true, manifest: true } },
+    },
+  });
+  if (!restore) {
+    throw new AppError(404, 'Restore not found', 'NOT_FOUND');
+  }
+  return restore;
+}
--- a/changemaker-control-panel/api/src/services/tunnel.service.ts
+++ b/changemaker-control-panel/api/src/services/tunnel.service.ts
@ -0,0 +1,599 @@
+/**
+ * Remote tunnel management service.
+ *
+ * Orchestrates Pangolin site/resource/target creation on behalf of remote CML
+ * instances, then pushes Newt credentials to the remote host via the mTLS agent.
+ * The CCP holds the Pangolin API token centrally — remote instances never touch
+ * the Pangolin API themselves.
+ */
+import { AuditAction, Prisma } from '@prisma/client';
+import { prisma } from '../lib/prisma';
+import { env } from '../config/env';
+import { AppError } from '../middleware/error-handler';
+import { logger } from '../utils/logger';
+import { getRemoteDriverForInstance } from './execution-driver';
+import {
+  CcpPangolinClient,
+  type PangolinDomain,
+  type PangolinResource,
+} from './ccp-pangolin.client';
+
+// ─── Resource definitions ──────────────────────────────────────────
+
+interface ResourceDef {
+  subdomain: string;
+  name: string;
+  required?: boolean;
+  featureFlag?: string;
+}
+
+const RESOURCE_DEFINITIONS: ResourceDef[] = [
+  { subdomain: 'app',      name: 'Admin GUI',    required: true },
+  { subdomain: 'api',      name: 'API',          required: true },
+  { subdomain: '',         name: 'Public Site',   required: true },
+  { subdomain: 'media',    name: 'Media API',    featureFlag: 'enableMedia' },
+  { subdomain: 'db',       name: 'NocoDB',       required: false },
+  { subdomain: 'docs',     name: 'Docs',         required: false },
+  { subdomain: 'code',     name: 'Code Server',  required: false },
+  { subdomain: 'git',      name: 'Gitea',        required: false },
+  { subdomain: 'home',     name: 'Homepage',     required: false },
+  { subdomain: 'listmonk', name: 'Listmonk',     featureFlag: 'enableListmonk' },
+  { subdomain: 'qr',       name: 'Mini QR',      required: false },
+  { subdomain: 'draw',     name: 'Excalidraw',   required: false },
+  { subdomain: 'vault',    name: 'Vaultwarden',  required: false },
+  { subdomain: 'mail',     name: 'MailHog',      required: false },
+  { subdomain: 'chat',     name: 'Rocket.Chat',  featureFlag: 'enableChat' },
+  { subdomain: 'events',   name: 'Gancio',       featureFlag: 'enableGancio' },
+  { subdomain: 'meet',     name: 'Jitsi Meet',   featureFlag: 'enableMeet' },
+  { subdomain: 'grafana',  name: 'Grafana',      featureFlag: 'enableMonitoring' },
+];
+
+// ─── Helpers ───────────────────────────────────────────────────────
+
+function getPangolinClient(): CcpPangolinClient {
+  if (!env.PANGOLIN_API_URL || !env.PANGOLIN_API_KEY || !env.PANGOLIN_ORG_ID) {
+    throw new AppError(
+      501,
+      'Pangolin API not configured on this CCP. Set PANGOLIN_API_URL, PANGOLIN_API_KEY, PANGOLIN_ORG_ID in the CCP .env file.',
+      'PANGOLIN_NOT_CONFIGURED'
+    );
+  }
+  return new CcpPangolinClient(env.PANGOLIN_API_URL, env.PANGOLIN_API_KEY, env.PANGOLIN_ORG_ID);
+}
+
+function fullSubdomain(prefix: string, sub: string): string {
+  if (!sub) return prefix; // root domain → prefix alone (e.g., "ck")
+  return `${prefix}-${sub}`; // e.g., "ck-app", "ck-api"
+}
+
+function shouldCreateResource(
+  def: ResourceDef,
+  instance: Record<string, unknown>
+): boolean {
+  if (def.required) return true;
+  if (def.featureFlag) return !!(instance as Record<string, unknown>)[def.featureFlag];
+  return true; // optional with no feature flag → always create
+}
+
+async function findDomainForInstance(
+  client: CcpPangolinClient,
+  instanceDomain: string
+): Promise<PangolinDomain> {
+  const domains = await client.listDomains();
+  // Match the instance's domain against registered Pangolin base domains
+  // e.g., instance.domain = "cursedknowledge.org" → look for base domain "cursedknowledge.org"
+  // or broader: instance.domain = "app.example.com" → look for "example.com"
+  const exact = domains.find((d) => d.baseDomain === instanceDomain);
+  if (exact) return exact;
+
+  // Try matching parent domain (e.g., sub.example.com → example.com)
+  const parts = instanceDomain.split('.');
+  for (let i = 1; i < parts.length - 1; i++) {
+    const parent = parts.slice(i).join('.');
+    const match = domains.find((d) => d.baseDomain === parent);
+    if (match) return match;
+  }
+
+  throw new AppError(
+    400,
+    `No Pangolin domain matches instance domain "${instanceDomain}". Available: ${domains.map((d) => d.baseDomain).join(', ')}`,
+    'DOMAIN_NOT_FOUND'
+  );
+}
+
+// ─── Setup ─────────────────────────────────────────────────────────
+
+export interface SetupTunnelOptions {
+  subdomainPrefix?: string;
+}
+
+export interface TunnelSetupResult {
+  siteId: string;
+  newtId: string;
+  endpoint: string;
+  resourceCount: number;
+  resources: Array<{ subdomain: string; name: string; resourceId: string }>;
+}
+
+export async function setupTunnel(
+  instanceId: string,
+  options: SetupTunnelOptions,
+  userId?: string,
+  ipAddress?: string | null
+): Promise<TunnelSetupResult> {
+  const client = getPangolinClient();
+
+  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
+  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
+  if (!instance.isRemote) throw new AppError(400, 'Tunnel setup via Pangolin API is only for remote instances', 'NOT_REMOTE');
+  if (instance.pangolinSiteId) {
+    throw new AppError(400, 'Tunnel is already configured. Use sync to update resources, or teardown first.', 'ALREADY_CONFIGURED');
+  }
+
+  const prefix = options.subdomainPrefix || instance.slug;
+
+  const driver = await getRemoteDriverForInstance({
+    id: instance.id,
+    slug: instance.slug,
+    isRemote: instance.isRemote,
+    agentUrl: instance.agentUrl,
+  });
+
+  // 1. Get Newt credentials
+  logger.info(`[tunnel] ${instance.slug}: picking site defaults`);
+  const defaults = await client.pickSiteDefaults();
+
+  // 2. Create site
+  logger.info(`[tunnel] ${instance.slug}: creating Pangolin site`);
+  const site = await client.createSite({
+    name: instance.slug,
+    type: 'newt',
+    newtId: defaults.newtId,
+    secret: defaults.newtSecret,
+    address: defaults.address,
+  });
+  const siteId = String(site.siteId);
+  const newtId = site.newt?.newtId || defaults.newtId;
+  const newtSecret = site.newt?.secret || defaults.newtSecret;
+
+  // The Pangolin endpoint (what Newt connects to) may be different from
+  // the API URL. E.g., API = api.bnkserve.org/v1, endpoint = pangolin.bnkserve.org.
+  // If PANGOLIN_ENDPOINT is set, use it. Otherwise derive from API URL.
+  let endpoint = env.PANGOLIN_ENDPOINT || '';
+  if (!endpoint) {
+    const endpointUrl = new URL(env.PANGOLIN_API_URL);
+    endpoint = `${endpointUrl.protocol}//${endpointUrl.hostname}${endpointUrl.port ? ':' + endpointUrl.port : ''}`;
+  }
+
+  // 3. Find matching domain
+  const domain = await findDomainForInstance(client, instance.domain);
+  logger.info(`[tunnel] ${instance.slug}: matched domain ${domain.baseDomain} (id: ${domain.domainId})`);
+
+  // 4. Create resources + targets
+  const createdResources: Array<{ subdomain: string; name: string; resourceId: string }> = [];
+  const existingResources = await client.listResources();
+
+  for (const def of RESOURCE_DEFINITIONS) {
+    if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) {
+      logger.debug(`[tunnel] ${instance.slug}: skipping ${def.name} (feature not enabled)`);
+      continue;
+    }
+
+    const sub = fullSubdomain(prefix, def.subdomain);
+    // Build the expected full domain so we can do an idempotent check against
+    // Pangolin's existing resources. Pangolin returns `fullDomain` not `subdomain`.
+    const expectedFullDomain = sub
+      ? `${sub}.${domain.baseDomain}`
+      : domain.baseDomain;
+
+    // Idempotent: skip if a resource with this fullDomain already exists
+    const existing = existingResources.find(
+      (r) => r.fullDomain === expectedFullDomain
+    );
+    if (existing) {
+      logger.debug(`[tunnel] ${instance.slug}: resource ${def.name} (${expectedFullDomain}) already exists`);
+      createdResources.push({ subdomain: sub, name: def.name, resourceId: String(existing.resourceId) });
+      continue;
+    }
+
+    try {
+      const resourcePayload: Record<string, unknown> = {
+        name: def.name,
+        domainId: domain.domainId,
+        http: true,
+        protocol: 'tcp',
+      };
+      // Root domain: omit subdomain entirely (empty string is rejected by Pangolin)
+      if (sub) resourcePayload.subdomain = sub;
+
+      const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
+
+      // Make the resource public (no SSO, no access block)
+      try {
+        await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
+      } catch (err) {
+        logger.warn(`[tunnel] ${instance.slug}: failed to make ${def.name} public: ${(err as Error).message}`);
+      }
+
+      // Create target pointing to nginx:80 on the remote host
+      await client.createTarget(resource.resourceId, {
+        siteId: Number(siteId),
+        ip: 'nginx',
+        port: 80,
+        method: 'http',
+        enabled: true,
+      });
+
+      createdResources.push({ subdomain: sub, name: def.name, resourceId: resource.resourceId });
+      logger.info(`[tunnel] ${instance.slug}: created resource ${def.name} → ${sub}.${domain.baseDomain}`);
+    } catch (err) {
+      if (def.required) throw err;
+      logger.warn(`[tunnel] ${instance.slug}: failed to create optional resource ${def.name}: ${(err as Error).message}`);
+    }
+  }
+
+  // 5. Push Newt credentials to remote .env
+  logger.info(`[tunnel] ${instance.slug}: pushing Newt credentials to remote .env`);
+  const envLines = [
+    `PANGOLIN_ENDPOINT=${endpoint}`,
+    `PANGOLIN_SITE_ID=${siteId}`,
+    `PANGOLIN_NEWT_ID=${newtId}`,
+    `PANGOLIN_NEWT_SECRET=${newtSecret}`,
+  ].join('\n') + '\n';
+
+  // Read current .env, append/replace Pangolin vars
+  const currentEnv = await driver.readEnvFile('');
+  const envContent = buildUpdatedEnv(currentEnv, {
+    PANGOLIN_ENDPOINT: endpoint,
+    PANGOLIN_SITE_ID: siteId,
+    PANGOLIN_NEWT_ID: newtId,
+    PANGOLIN_NEWT_SECRET: newtSecret,
+  });
+
+  await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
+
+  // 6. Persist on Instance row
+  await prisma.instance.update({
+    where: { id: instanceId },
+    data: {
+      pangolinEndpoint: endpoint,
+      pangolinSiteId: siteId,
+      pangolinNewtId: newtId,
+      pangolinNewtSecret: newtSecret,
+      pangolinSubdomainPrefix: prefix,
+    },
+  });
+
+  // 7. Recreate Newt container to pick up the new .env vars.
+  // `docker compose restart` does NOT re-read .env — it only sends SIGTERM+restart.
+  // `docker compose up -d newt` detects env var changes (via ${PANGOLIN_NEWT_ID}
+  // expansion in docker-compose.yml) and recreates the container automatically.
+  logger.info(`[tunnel] ${instance.slug}: recreating newt container with new credentials`);
+  try {
+    await driver.composeUp('', '', ['newt']);
+  } catch (err) {
+    logger.warn(`[tunnel] ${instance.slug}: composeUp(newt) failed: ${(err as Error).message}`);
+  }
+
+  // 8. Audit log
+  if (userId) {
+    await prisma.auditLog.create({
+      data: {
+        userId,
+        instanceId,
+        action: AuditAction.PANGOLIN_SETUP,
+        details: {
+          source: 'remote',
+          siteId,
+          newtId,
+          endpoint,
+          resourceCount: createdResources.length,
+          subdomainPrefix: prefix,
+        } as unknown as Prisma.InputJsonValue,
+        ipAddress: ipAddress ?? null,
+      },
+    });
+  }
+
+  logger.info(`[tunnel] ${instance.slug}: tunnel setup complete — ${createdResources.length} resources created`);
+
+  return {
+    siteId,
+    newtId,
+    endpoint,
+    resourceCount: createdResources.length,
+    resources: createdResources,
+  };
+}
+
+// ─── Sync ──────────────────────────────────────────────────────────
+
+export async function syncResources(
+  instanceId: string,
+  userId?: string,
+  ipAddress?: string | null
+) {
+  const client = getPangolinClient();
+  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
+  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
+  if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
+
+  const prefix = instance.pangolinSubdomainPrefix || instance.slug;
+  const domain = await findDomainForInstance(client, instance.domain);
+  const existingResources = await client.listResources();
+  const siteId = instance.pangolinSiteId;
+
+  let created = 0;
+  for (const def of RESOURCE_DEFINITIONS) {
+    if (!shouldCreateResource(def, instance as unknown as Record<string, unknown>)) continue;
+
+    const sub = fullSubdomain(prefix, def.subdomain);
+    const expectedFullDomain = sub ? `${sub}.${domain.baseDomain}` : domain.baseDomain;
+    const existing = existingResources.find((r) => r.fullDomain === expectedFullDomain);
+    if (existing) continue;
+
+    try {
+      const resourcePayload: Record<string, unknown> = {
+        name: def.name,
+        domainId: domain.domainId,
+        http: true,
+        protocol: 'tcp',
+      };
+      if (sub) resourcePayload.subdomain = sub;
+
+      const resource = await client.createResource(resourcePayload as unknown as Parameters<typeof client.createResource>[0]);
+      await client.updateResource(resource.resourceId, { sso: false, blockAccess: false });
+      await client.createTarget(resource.resourceId, {
+        siteId: Number(siteId),
+        ip: 'nginx',
+        port: 80,
+        method: 'http',
+        enabled: true,
+      });
+      created++;
+      logger.info(`[tunnel] ${instance.slug}: sync created ${def.name} (${sub})`);
+    } catch (err) {
+      if (def.required) throw err;
+      logger.warn(`[tunnel] ${instance.slug}: sync failed for ${def.name}: ${(err as Error).message}`);
+    }
+  }
+
+  if (userId) {
+    await prisma.auditLog.create({
+      data: {
+        userId,
+        instanceId,
+        action: AuditAction.PANGOLIN_SYNC,
+        details: { source: 'remote', created, siteId } as unknown as Prisma.InputJsonValue,
+        ipAddress: ipAddress ?? null,
+      },
+    });
+  }
+
+  return { synced: true, created };
+}
+
+// ─── Teardown ──────────────────────────────────────────────────────
+
+export async function teardownTunnel(
+  instanceId: string,
+  userId?: string,
+  ipAddress?: string | null
+) {
+  const client = getPangolinClient();
+  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
+  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
+  if (!instance.pangolinSiteId) throw new AppError(400, 'No tunnel configured', 'NO_TUNNEL');
+
+  const siteId = instance.pangolinSiteId;
+
+  // Delete site from Pangolin (cascades resources + targets)
+  try {
+    await client.deleteSite(siteId);
+    logger.info(`[tunnel] ${instance.slug}: deleted Pangolin site ${siteId}`);
+  } catch (err) {
+    logger.warn(`[tunnel] ${instance.slug}: deleteSite failed (may already be gone): ${(err as Error).message}`);
+  }
+
+  // Clear Instance fields
+  await prisma.instance.update({
+    where: { id: instanceId },
+    data: {
+      pangolinEndpoint: null,
+      pangolinSiteId: null,
+      pangolinNewtId: null,
+      pangolinNewtSecret: null,
+    },
+  });
+
+  // Push empty Pangolin vars to remote .env
+  if (instance.isRemote) {
+    try {
+      const driver = await getRemoteDriverForInstance({
+        id: instance.id,
+        slug: instance.slug,
+        isRemote: instance.isRemote,
+        agentUrl: instance.agentUrl,
+      });
+      const currentEnv = await driver.readEnvFile('');
+      const envContent = buildUpdatedEnv(currentEnv, {
+        PANGOLIN_ENDPOINT: '',
+        PANGOLIN_SITE_ID: '',
+        PANGOLIN_NEWT_ID: '',
+        PANGOLIN_NEWT_SECRET: '',
+      });
+      await driver.writeFiles('', [{ relativePath: '.env', content: envContent }]);
+
+      // Stop newt container (best effort)
+      try {
+        await driver.composeStop('', '');
+        await driver.composeUp('', ''); // restart everything except newt won't start without creds
+      } catch { /* ignore */ }
+    } catch (err) {
+      logger.warn(`[tunnel] ${instance.slug}: failed to push empty env to remote: ${(err as Error).message}`);
+    }
+  }
+
+  // Audit log
+  if (userId) {
+    await prisma.auditLog.create({
+      data: {
+        userId,
+        instanceId,
+        action: AuditAction.PANGOLIN_TEARDOWN,
+        details: { source: 'remote', siteId } as unknown as Prisma.InputJsonValue,
+        ipAddress: ipAddress ?? null,
+      },
+    });
+  }
+
+  return { tornDown: true };
+}
+
+// ─── Status ────────────────────────────────────────────────────────
+
+export interface TunnelStatus {
+  configured: boolean;
+  online?: boolean;
+  siteId?: string;
+  endpoint?: string;
+  resources?: Array<{
+    subdomain: string;
+    name: string;
+    resourceId: string;
+    hasTarget: boolean;
+    targetIp?: string;
+    targetPort?: number;
+  }>;
+}
+
+export async function getTunnelStatus(instanceId: string): Promise<TunnelStatus> {
+  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
+  if (!instance) throw new AppError(404, 'Instance not found', 'NOT_FOUND');
+
+  if (!instance.pangolinSiteId) {
+    return { configured: false };
+  }
+
+  // For local instances, return stored values without querying Pangolin API
+  if (!instance.isRemote) {
+    return {
+      configured: true,
+      siteId: instance.pangolinSiteId ?? undefined,
+      endpoint: instance.pangolinEndpoint ?? undefined,
+    };
+  }
+
+  const client = getPangolinClient();
+
+  let online = false;
+  try {
+    const site = await client.getSite(instance.pangolinSiteId);
+    online = site.online ?? false;
+  } catch (err) {
+    logger.warn(`[tunnel] ${instance.slug}: getSite failed: ${(err as Error).message}`);
+  }
+
+  const resources: TunnelStatus['resources'] = [];
+  try {
+    const allResources = await client.listResources();
+    const siteIdNum = Number(instance.pangolinSiteId);
+    // Filter to resources that have a target pointing to our siteId.
+    // This is the most reliable filter since it uses the actual Pangolin
+    // site association rather than guessing from subdomain names.
+    for (const res of allResources) {
+      let hasTarget = false;
+      let targetIp: string | undefined;
+      let targetPort: number | undefined;
+      let belongsToUs = false;
+      try {
+        const targets = await client.listTargets(String(res.resourceId));
+        for (const t of targets) {
+          if (Number(t.siteId) === siteIdNum) {
+            belongsToUs = true;
+            hasTarget = true;
+            targetIp = t.ip;
+            targetPort = t.port;
+            break;
+          }
+        }
+      } catch { /* ignore */ }
+
+      if (belongsToUs) {
+        // Extract subdomain from fullDomain for display
+        const fd = res.fullDomain || '';
+        const domainSuffix = `.${instance.domain}`;
+        const subdomain = fd.endsWith(domainSuffix)
+          ? fd.slice(0, -domainSuffix.length)
+          : fd === instance.domain ? '' : fd;
+        resources.push({
+          subdomain,
+          name: res.name,
+          resourceId: String(res.resourceId),
+          hasTarget,
+          targetIp,
+          targetPort,
+        });
+      }
+    }
+  } catch (err) {
+    logger.warn(`[tunnel] ${instance.slug}: listResources failed: ${(err as Error).message}`);
+  }
+
+  return {
+    configured: true,
+    online,
+    siteId: instance.pangolinSiteId ?? undefined,
+    endpoint: instance.pangolinEndpoint ?? undefined,
+    resources,
+  };
+}
+
+// ─── .env Helpers ──────────────────────────────────────────────────
+
+/**
+ * Quote a .env value if it contains characters that dotenv parsers interpret:
+ *   # (comment), = (separator), spaces, quotes, backslashes, newlines.
+ * Pangolin-issued UUIDs/base64 secrets typically don't need quoting, but
+ * defensive quoting prevents silent corruption if they ever do.
+ */
+function quoteEnvValue(value: string): string {
+  if (/[\s#"'\\=\n\r]/.test(value)) {
+    return `"${value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
+  }
+  return value;
+}
+
+/**
+ * Build an updated .env string by replacing/appending the given key-value pairs.
+ * Preserves all existing keys not in the update set.
+ */
+function buildUpdatedEnv(
+  currentEnv: Record<string, string> | null,
+  updates: Record<string, string>
+): string {
+  const lines: string[] = [];
+  const seen = new Set<string>();
+
+  // If we have the current env, reproduce it with replacements
+  if (currentEnv) {
+    for (const [key, value] of Object.entries(currentEnv)) {
+      if (key in updates) {
+        if (updates[key]) lines.push(`${key}=${quoteEnvValue(updates[key]!)}`);
+        // If update value is empty, omit the line (remove the var)
+        seen.add(key);
+      } else {
+        lines.push(`${key}=${quoteEnvValue(value)}`);
+      }
+    }
+  }
+
+  // Append new keys not already in the file
+  for (const [key, value] of Object.entries(updates)) {
+    if (!seen.has(key) && value) {
+      lines.push(`${key}=${quoteEnvValue(value)}`);
+    }
+  }
+
+  return lines.join('\n') + '\n';
+}
--- a/changemaker-control-panel/api/src/services/upgrade.service.ts
+++ b/changemaker-control-panel/api/src/services/upgrade.service.ts
@ -2,14 +2,61 @@ import { exec as execCb } from 'child_process';
 import { promisify } from 'util';
 import fs from 'fs/promises';
 import path from 'path';
-import { UpgradeStatus, AuditAction, InstanceStatus, Prisma } from '@prisma/client';
+import { UpgradeStatus, AuditAction, InstanceStatus, Prisma, Instance } from '@prisma/client';
 import { prisma } from '../lib/prisma';
 import { logger } from '../utils/logger';
 import { createEvent } from './event.service';
+import { getRemoteDriverForInstance } from './execution-driver';
+import type { AgentUpdateStatus } from './remote-driver';
+
+/**
+ * Write an INSTANCE_UPGRADE audit log entry capturing a terminal outcome.
+ * Wrapped in try/catch so that an audit-log DB failure cannot mask the
+ * underlying upgrade row status update.
+ *
+ * Called from all three terminal paths (both local and remote):
+ *   - 'completed'   — upgrade.sh/agent reported success
+ *   - 'failed'      — upgrade.sh/agent reported failure
+ *   - 'orchestration_error' — CCP-side exception, timeout, or unreachable agent
+ */
+async function writeUpgradeAuditLog(args: {
+  upgradeId: string;
+  instanceId: string;
+  triggeredById: string | null;
+  source: 'local' | 'remote';
+  outcome: 'completed' | 'failed' | 'orchestration_error';
+  previousCommit: string | null;
+  newCommit: string | null;
+  durationSeconds: number | null;
+  errorMessage?: string | null;
+}): Promise<void> {
+  if (!args.triggeredById) return;
+  try {
+    await prisma.auditLog.create({
+      data: {
+        userId: args.triggeredById,
+        instanceId: args.instanceId,
+        action: AuditAction.INSTANCE_UPGRADE,
+        details: {
+          upgradeId: args.upgradeId,
+          source: args.source,
+          outcome: args.outcome,
+          previousCommit: args.previousCommit,
+          newCommit: args.newCommit,
+          durationSeconds: args.durationSeconds,
+          ...(args.errorMessage ? { errorMessage: args.errorMessage.substring(0, 500) } : {}),
+        } as unknown as Prisma.InputJsonValue,
+      },
+    });
+  } catch (err) {
+    logger.error(`[upgrade] failed to write audit log for ${args.upgradeId}: ${(err as Error).message}`);
+  }
+}

 const exec = promisify(execCb);

-const UPGRADE_TIMEOUT = 600_000; // 10 minutes
+const UPGRADE_TIMEOUT = 600_000; // 10 minutes — local upgrades
+const REMOTE_UPGRADE_TIMEOUT = 15 * 60 * 1000; // 15 minutes — remote (network round trips)
 const PROGRESS_POLL_INTERVAL = 2_000; // 2 seconds

 // ─── Update Check ─────────────────────────────────────────────────
@ -26,13 +73,57 @@ export interface UpdateStatus {
 }

 /**
- * Check for available updates by running upgrade-check.sh in the instance's basePath.
- * Falls back to reading an existing status.json if the script isn't available.
+ * Check for available updates. Branches on instance.isRemote:
+ *   - Local: runs upgrade-check.sh in the instance's basePath and reads status.json
+ *   - Remote: calls the agent's POST /upgrade/check endpoint over mTLS
 */
 export async function checkForUpdates(instanceId: string): Promise<UpdateStatus> {
  const instance = await prisma.instance.findUnique({ where: { id: instanceId } });
  if (!instance) throw new Error('Instance not found');

+  if (instance.isRemote) {
+    return checkForUpdatesRemote(instance);
+  }
+  return checkForUpdatesLocal(instance);
+}
+
+/**
+ * Remote check: ask the agent to run upgrade-check.sh and return its status.json.
+ */
+async function checkForUpdatesRemote(instance: Instance): Promise<UpdateStatus> {
+  try {
+    const driver = await getRemoteDriverForInstance({
+      id: instance.id,
+      slug: instance.slug,
+      isRemote: instance.isRemote,
+      agentUrl: instance.agentUrl,
+    });
+    const status: AgentUpdateStatus = await driver.checkForUpdates();
+    return {
+      branch: status.branch,
+      currentCommit: status.currentCommit,
+      currentMessage: status.currentMessage,
+      remoteCommit: status.remoteCommit,
+      commitsBehind: status.commitsBehind,
+      changelog: status.changelog,
+      checkedAt: status.checkedAt,
+      error: status.error,
+    };
+  } catch (err) {
+    logger.warn(`[upgrade] remote check failed for ${instance.slug}: ${(err as Error).message}`);
+    return {
+      branch: instance.gitBranch,
+      currentCommit: instance.gitCommit || 'unknown',
+      remoteCommit: null,
+      commitsBehind: 0,
+      changelog: [],
+      checkedAt: new Date().toISOString(),
+      error: `Remote check failed: ${(err as Error).message}`,
+    };
+  }
+}
+
+async function checkForUpdatesLocal(instance: Instance): Promise<UpdateStatus> {
  const basePath = instance.basePath;
  const statusFile = path.join(basePath, 'data', 'upgrade', 'status.json');
  const scriptPath = path.join(basePath, 'scripts', 'upgrade-check.sh');
@ -119,16 +210,21 @@ export async function startUpgrade(
    throw new Error('An upgrade is already in progress for this instance');
  }

-  // Get current commit for tracking
-  let currentCommit: string | null = null;
-  try {
-    const { stdout } = await exec('git rev-parse --short HEAD', {
-      cwd: instance.basePath,
-      timeout: 5_000,
-    });
-    currentCommit = stdout.trim();
-  } catch {
-    // Non-critical — may be a release install without .git
+  // Get current commit for tracking. For local instances we can read it from
+  // git directly; for remote instances we either trust the DB-tracked value
+  // (set by previous upgrade-check) or leave it null and let upgrade.sh
+  // report the previous commit in result.json.
+  let currentCommit: string | null = instance.gitCommit;
+  if (!instance.isRemote) {
+    try {
+      const { stdout } = await exec('git rev-parse --short HEAD', {
+        cwd: instance.basePath,
+        timeout: 5_000,
+      });
+      currentCommit = stdout.trim();
+    } catch {
+      // Non-critical — may be a release install without .git
+    }
  }

  const branch = options?.branch || instance.gitBranch;
@ -154,20 +250,222 @@ export async function startUpgrade(
        upgradeId: upgrade.id,
        previousCommit: currentCommit,
        branch,
+        source: instance.isRemote ? 'remote' : 'local',
        options: options || {},
      } as unknown as Prisma.InputJsonValue,
      ipAddress,
    },
  });

-  // Fire-and-forget: run the upgrade asynchronously
-  runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
-    logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
-  });
+  // Fire-and-forget: branch on isRemote
+  if (instance.isRemote) {
+    runRemoteUpgrade(upgrade.id, instance, options).catch((err) => {
+      logger.error(`[upgrade] Remote upgrade orchestration failed for ${instance.slug}: ${err}`);
+    });
+  } else {
+    runUpgrade(upgrade.id, instance.basePath, instance.slug, options).catch((err) => {
+      logger.error(`[upgrade] Upgrade orchestration failed for ${instance.slug}: ${err}`);
+    });
+  }

  return upgrade;
 }

+/**
+ * Async REMOTE upgrade runner.
+ *
+ * Flow:
+ *   1. Get RemoteDriver
+ *   2. Mark InstanceUpgrade IN_PROGRESS
+ *   3. Tell agent to start upgrade.sh in --api-mode
+ *   4. Poll agent /upgrade/progress every 2s, mirror to DB
+ *   5. Try /upgrade/result every poll cycle; when present, finalize
+ *   6. On timeout (15 min), mark FAILED and create error event
+ *
+ * Note: there is no shell or filesystem access on the CCP side — everything
+ * goes through the mTLS agent. The agent's spawn of upgrade.sh is itself
+ * fire-and-forget under a slug mutex.
+ */
+async function runRemoteUpgrade(
+  upgradeId: string,
+  instance: Instance,
+  options?: StartUpgradeOptions
+) {
+  const slug = instance.slug;
+
+  try {
+    const driver = await getRemoteDriverForInstance({
+      id: instance.id,
+      slug: instance.slug,
+      isRemote: instance.isRemote,
+      agentUrl: instance.agentUrl,
+    });
+
+    // Mark IN_PROGRESS
+    await prisma.instanceUpgrade.update({
+      where: { id: upgradeId },
+      data: {
+        status: UpgradeStatus.IN_PROGRESS,
+        progressMessage: 'Starting remote upgrade...',
+      },
+    });
+
+    // Tell the agent to start. The agent has its own mutex + stale-progress
+    // check, so this can return 409 if a previous upgrade is still running.
+    logger.info(`[upgrade] ${slug}: triggering remote upgrade.sh start`);
+    await driver.startUpgrade({
+      skipBackup: options?.skipBackup,
+      useRegistry: options?.useRegistry,
+      branch: options?.branch,
+    });
+
+    // Poll progress + result. We treat /result returning 200 as the signal
+    // that upgrade.sh exited (successfully or with code != 0 — the script
+    // writes result.json either way in --api-mode).
+    const deadline = Date.now() + REMOTE_UPGRADE_TIMEOUT;
+    let lastProgress: { phase?: number; phaseName?: string; percentage?: number; message?: string } = {};
+
+    while (Date.now() < deadline) {
+      await new Promise((r) => setTimeout(r, PROGRESS_POLL_INTERVAL));
+
+      // Try to fetch the result first; if it exists, we're done
+      let result = null;
+      try {
+        result = await driver.getUpgradeResult();
+      } catch {
+        // No result yet — keep polling progress
+      }
+
+      if (result) {
+        // Final result available — write it and exit
+        const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
+        await prisma.instanceUpgrade.update({
+          where: { id: upgradeId },
+          data: {
+            status: result.success ? UpgradeStatus.COMPLETED : UpgradeStatus.FAILED,
+            newCommit: result.newCommit || null,
+            commitCount: result.commitCount || 0,
+            percentage: 100,
+            phaseName: 'Complete',
+            progressMessage: result.message || 'Upgrade completed',
+            durationSeconds: result.durationSeconds || null,
+            warnings: result.warnings?.length ? (result.warnings as unknown as Prisma.InputJsonValue) : undefined,
+            errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
+            completedAt: new Date(),
+          },
+        });
+
+        // Update Instance.gitCommit if we have a new commit
+        if (result.newCommit) {
+          await prisma.instance.update({
+            where: { id: instance.id },
+            data: { gitCommit: result.newCommit },
+          });
+        }
+
+        if (!result.success) {
+          await createEvent(
+            instance.id,
+            'ERROR',
+            'upgrade',
+            'Remote upgrade failed',
+            result.message || 'The remote upgrade process failed. Check the agent log for details.',
+            { upgradeId, source: 'remote', warnings: result.warnings }
+          );
+        }
+
+        await writeUpgradeAuditLog({
+          upgradeId,
+          instanceId: instance.id,
+          triggeredById: upgradeRowBefore?.triggeredById ?? null,
+          source: 'remote',
+          outcome: result.success ? 'completed' : 'failed',
+          previousCommit: upgradeRowBefore?.previousCommit ?? null,
+          newCommit: result.newCommit || null,
+          durationSeconds: result.durationSeconds || null,
+          errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
+        });
+
+        logger.info(`[upgrade] ${slug}: remote upgrade ${result.success ? 'COMPLETED' : 'FAILED'}`);
+        return;
+      }
+
+      // No result yet — pull progress
+      try {
+        const progress = await driver.getUpgradeProgress();
+        // Only update DB if something actually changed (avoid hot-loop writes)
+        if (
+          progress.phase !== lastProgress.phase ||
+          progress.percentage !== lastProgress.percentage ||
+          progress.message !== lastProgress.message
+        ) {
+          lastProgress = {
+            phase: progress.phase,
+            phaseName: progress.phaseName,
+            percentage: progress.percentage,
+            message: progress.message,
+          };
+          await prisma.instanceUpgrade.update({
+            where: { id: upgradeId },
+            data: {
+              currentPhase: progress.phase || 0,
+              phaseName: progress.phaseName || null,
+              percentage: progress.percentage || 0,
+              progressMessage: progress.message || null,
+            },
+          });
+        }
+      } catch (err) {
+        // Transient network blip during a long upgrade — keep polling
+        logger.debug(`[upgrade] ${slug}: progress poll error: ${(err as Error).message}`);
+      }
+    }
+
+    // Timeout — mark FAILED
+    throw new Error(`Remote upgrade timed out after ${Math.round(REMOTE_UPGRADE_TIMEOUT / 60_000)} minutes`);
+  } catch (err) {
+    const errorMsg = (err as Error).message;
+    const isTimeout = errorMsg.includes('timed out');
+
+    const upgradeRowBefore = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
+    await prisma.instanceUpgrade.update({
+      where: { id: upgradeId },
+      data: {
+        status: UpgradeStatus.FAILED,
+        errorMessage: isTimeout ? errorMsg : errorMsg.slice(0, 2000),
+        progressMessage: 'Failed',
+        completedAt: new Date(),
+      },
+    });
+
+    await createEvent(
+      instance.id,
+      'ERROR',
+      'upgrade',
+      isTimeout ? 'Remote upgrade timed out' : 'Remote upgrade failed',
+      errorMsg.slice(0, 500),
+      { upgradeId, source: 'remote' }
+    );
+
+    await writeUpgradeAuditLog({
+      upgradeId,
+      instanceId: instance.id,
+      triggeredById: upgradeRowBefore?.triggeredById ?? null,
+      source: 'remote',
+      outcome: 'orchestration_error',
+      previousCommit: upgradeRowBefore?.previousCommit ?? null,
+      newCommit: null,
+      durationSeconds: null,
+      errorMessage: errorMsg,
+    });
+
+    // Don't flip the instance to ERROR state for remote upgrades — the agent
+    // health check will reflect the real state on the next poll, and we don't
+    // want to mask a recovered instance with stale CCP-side ERROR.
+    logger.error(`[upgrade] ${slug}: ${errorMsg}`);
+  }
+}
+
 /**
 * Async upgrade runner. Runs upgrade.sh and polls progress.
 */
@ -271,19 +569,32 @@ async function runUpgrade(
      });
    }

-    if (!result.success) {
+    const upgradeRow = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
+
+    if (!result.success && upgradeRow) {
      // Create error event
-      const upgrade = await prisma.instanceUpgrade.findUnique({ where: { id: upgradeId } });
-      if (upgrade) {
-        await createEvent(
-          upgrade.instanceId,
-          'ERROR',
-          'upgrade',
-          'Upgrade failed',
-          result.message || 'The upgrade process failed. Check logs for details.',
-          { upgradeId, previousCommit: upgrade.previousCommit, warnings: result.warnings }
-        );
-      }
+      await createEvent(
+        upgradeRow.instanceId,
+        'ERROR',
+        'upgrade',
+        'Upgrade failed',
+        result.message || 'The upgrade process failed. Check logs for details.',
+        { upgradeId, previousCommit: upgradeRow.previousCommit, warnings: result.warnings }
+      );
+    }
+
+    if (upgradeRow) {
+      await writeUpgradeAuditLog({
+        upgradeId,
+        instanceId: upgradeRow.instanceId,
+        triggeredById: upgradeRow.triggeredById,
+        source: 'local',
+        outcome: result.success ? 'completed' : 'failed',
+        previousCommit: upgradeRow.previousCommit,
+        newCommit: result.newCommit || newCommit,
+        durationSeconds: result.durationSeconds || null,
+        errorMessage: result.success ? null : (result.message || 'Upgrade failed'),
+      });
    }

    logger.info(`[upgrade] ${slug}: Upgrade ${result.success ? 'completed' : 'failed'}`);
@ -327,6 +638,18 @@ async function runUpgrade(
          statusMessage: `Upgrade failed: ${isTimeout ? 'timeout' : errorMsg.slice(0, 200)}`,
        },
      });
+
+      await writeUpgradeAuditLog({
+        upgradeId,
+        instanceId: upgrade.instanceId,
+        triggeredById: upgrade.triggeredById,
+        source: 'local',
+        outcome: 'orchestration_error',
+        previousCommit: upgrade.previousCommit,
+        newCommit: null,
+        durationSeconds: result.durationSeconds || null,
+        errorMessage: errorMsg,
+      });
    }

    logger.error(`[upgrade] ${slug}: Upgrade failed: ${errorMsg}`);
--- a/config.sh
+++ b/config.sh
@ -38,6 +38,11 @@ NI_MAPBOX_KEY=""
 NI_MAXMIND_ACCOUNT_ID=""
 NI_MAXMIND_LICENSE_KEY=""

+# CCP (Changemaker Control Panel) registration flags
+NI_CCP_URL=""
+NI_CCP_INVITE_CODE=""
+NI_CCP_AGENT_URL=""
+
 # --- Arg parser ---
 while [[ $# -gt 0 ]]; do
  case "$1" in
@ -62,6 +67,10 @@ while [[ $# -gt 0 ]]; do
    --mapbox-key)         NI_MAPBOX_KEY="$2"; shift 2 ;;
    --maxmind-account-id) NI_MAXMIND_ACCOUNT_ID="$2"; shift 2 ;;
    --maxmind-license-key) NI_MAXMIND_LICENSE_KEY="$2"; shift 2 ;;
+    # CCP (Changemaker Control Panel)
+    --ccp-url)            NI_CCP_URL="$2"; shift 2 ;;
+    --ccp-invite-code)    NI_CCP_INVITE_CODE="$2"; shift 2 ;;
+    --ccp-agent-url)      NI_CCP_AGENT_URL="$2"; shift 2 ;;
    --help|-h)
      echo "Usage: bash config.sh [OPTIONS]"
      echo ""
@ -91,6 +100,11 @@ while [[ $# -gt 0 ]]; do
      echo "  --maxmind-account-id ID MaxMind GeoIP account ID"
      echo "  --maxmind-license-key K MaxMind GeoIP license key"
      echo ""
+      echo "CCP (Changemaker Control Panel) — all 3 flags required to register:"
+      echo "  --ccp-url URL           CCP server URL (e.g., https://ccp.example.com)"
+      echo "  --ccp-invite-code CODE  One-time invite code from CCP"
+      echo "  --ccp-agent-url URL     Agent URL the CCP reaches (e.g., https://this-host:7443)"
+      echo ""
      echo "Example:"
      echo "  bash config.sh --non-interactive --domain example.org --admin-password MyStr0ngPass123"
      echo "  bash config.sh -y --domain example.org --admin-password MyStr0ngPass123 \\"
@ -798,6 +812,17 @@ configure_features() {
      else
        warn "Set JVB_ADVERTISE_IP in .env before starting Jitsi containers."
      fi
+    else
+      # Non-interactive: auto-detect public IP for NAT traversal
+      local detected_ip
+      detected_ip=$(curl -sf --max-time 5 https://ifconfig.me 2>/dev/null || \
+                    curl -sf --max-time 5 https://api.ipify.org 2>/dev/null || true)
+      if [[ -n "$detected_ip" ]]; then
+        update_env_var "JVB_ADVERTISE_IP" "$detected_ip"
+        success "JVB advertise IP auto-detected: $detected_ip"
+      else
+        warn "Could not auto-detect public IP. Set JVB_ADVERTISE_IP in .env before starting Jitsi."
+      fi
    fi
  else
    MEET_ENABLED="no"
@ -838,13 +863,6 @@ configure_features() {
    update_env_var "ENABLE_PEOPLE" "false"
  fi

-  if prompt_yes_no "Enable Analytics & GeoIP (visitor tracking, geo dashboard)?"; then
-    update_env_var "ENABLE_ANALYTICS" "true"
-    success "Analytics enabled"
-  else
-    update_env_var "ENABLE_ANALYTICS" "false"
-  fi
-
  if prompt_yes_no "Enable Docs Comments & Version History (Gitea-backed)?"; then
    update_env_var "GITEA_COMMENTS_ENABLED" "true"
    success "Docs Comments & Version History enabled"
@ -881,8 +899,14 @@ configure_features() {
  fi

  if prompt_yes_no "Enable Monitoring stack (Prometheus, Grafana, Alertmanager, cAdvisor)?" "y"; then
-    update_env_var "COMPOSE_PROFILES" "monitoring"
-    success "Monitoring enabled (COMPOSE_PROFILES=monitoring)"
+    local existing_profiles
+    existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
+    if [[ -z "$existing_profiles" ]]; then
+      update_env_var "COMPOSE_PROFILES" "monitoring"
+    elif [[ "$existing_profiles" != *"monitoring"* ]]; then
+      update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
+    fi
+    success "Monitoring enabled (COMPOSE_PROFILES includes monitoring)"
    MONITORING_ENABLED="yes"
  else
    MONITORING_ENABLED="no"
@ -1401,6 +1425,35 @@ pangolin_connect_first_site() {
 configure_control_panel() {
  header "Control Panel Registration"

+  # Non-interactive: use --ccp-* flags if all three provided, otherwise skip
+  if [[ "$NON_INTERACTIVE" == "true" ]]; then
+    if [[ -n "$NI_CCP_URL" && -n "$NI_CCP_INVITE_CODE" && -n "$NI_CCP_AGENT_URL" ]]; then
+      update_env_var "ENABLE_CCP_AGENT" "true"
+      update_env_var "CCP_URL" "$NI_CCP_URL"
+      update_env_var "CCP_INVITE_CODE" "$NI_CCP_INVITE_CODE"
+      update_env_var "CCP_AGENT_URL" "$NI_CCP_AGENT_URL"
+
+      # Append ccp-agent to existing profiles (don't clobber monitoring)
+      local existing_profiles
+      existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
+      if [[ -z "$existing_profiles" ]]; then
+        update_env_var "COMPOSE_PROFILES" "ccp-agent"
+      elif [[ "$existing_profiles" != *"ccp-agent"* ]]; then
+        update_env_var "COMPOSE_PROFILES" "${existing_profiles},ccp-agent"
+      fi
+
+      success "CCP registration configured ($NI_CCP_URL)"
+    else
+      update_env_var "ENABLE_CCP_AGENT" "false"
+      if [[ -n "$NI_CCP_URL" || -n "$NI_CCP_INVITE_CODE" || -n "$NI_CCP_AGENT_URL" ]]; then
+        warn "CCP registration needs all 3 flags: --ccp-url, --ccp-invite-code, --ccp-agent-url"
+      else
+        info "Skipping CCP registration (no --ccp-url provided)"
+      fi
+    fi
+    return
+  fi
+
  if prompt_yes_no "Register this instance with a Changemaker Control Panel?"; then
    echo ""
    read -rp "  Enter Control Panel URL (e.g., https://ccp.example.com): " ccp_url
@ -2152,9 +2205,15 @@ main() {
    header "Release Mode Settings"
    update_env_var "IMAGE_TAG" "latest"
    update_env_var "NODE_ENV" "production"
-    # Ensure monitoring is included if user opted in
+    # Ensure monitoring is included if user opted in (preserve existing profiles)
    if [[ "${MONITORING_ENABLED:-no}" == "yes" ]]; then
-      update_env_var "COMPOSE_PROFILES" "monitoring"
+      local existing_profiles
+      existing_profiles=$(grep -oP 'COMPOSE_PROFILES=\K.*' "$ENV_FILE" 2>/dev/null || echo "")
+      if [[ -z "$existing_profiles" ]]; then
+        update_env_var "COMPOSE_PROFILES" "monitoring"
+      elif [[ "$existing_profiles" != *"monitoring"* ]]; then
+        update_env_var "COMPOSE_PROFILES" "${existing_profiles},monitoring"
+      fi
    fi
    success "Set IMAGE_TAG=latest, NODE_ENV=production (pre-built images)"
  fi
--- a/scripts/build-release.sh
+++ b/scripts/build-release.sh
@ -103,7 +103,8 @@ cp "$PROJECT_DIR/api/prisma/init-nocodb-db.sh" "$STAGE_DIR/scripts/"
 cp "$PROJECT_DIR/api/prisma/init-gancio-db.sh" "$STAGE_DIR/scripts/"

 # Runtime scripts
-for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh backup.sh \
+for script in nocodb-init.sh gitea-init.sh mkdocs-entrypoint.sh \
+              backup.sh restore.sh \
              upgrade.sh upgrade-check.sh upgrade-watcher.sh \
              uninstall.sh test-deployment.sh; do
  if [[ -f "$PROJECT_DIR/scripts/$script" ]]; then
--- a/scripts/install.sh
+++ b/scripts/install.sh
@ -294,7 +294,7 @@ if [[ "$START_SERVICES" =~ ^[Yy]$ ]]; then
  info "  Database migrations and seeding run automatically on first boot."
  echo ""

-  CORE_SERVICES=("v2-postgres" "redis" "api" "admin")
+  CORE_SERVICES=("v2-postgres" "redis" "api" "admin" "nginx")
  ELAPSED=0
  ALL_HEALTHY=false

--- a/scripts/upgrade.sh
+++ b/scripts/upgrade.sh
@ -359,9 +359,13 @@ trap on_failure EXIT
 acquire_lock
 load_env

-# Determine branch
+# Determine branch (source mode only — release installs have no git)
 if [[ -z "$BRANCH" ]]; then
-  BRANCH="$(git rev-parse --abbrev-ref HEAD)"
+  if [[ "$INSTALL_MODE" == "release" ]]; then
+    BRANCH="release"
+  else
+    BRANCH="$(git rev-parse --abbrev-ref HEAD)"
+  fi
 fi

 # =============================================================================
@ -461,13 +465,15 @@ else
  exit 1
 fi

-# Remote reachable
-info "Checking git remote..."
-if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
-  success "Git remote reachable"
-else
-  error "Cannot reach git remote. Check your network or remote configuration."
-  exit 1
+# Remote reachable (source mode only — release mode pulls from Gitea API later)
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  info "Checking git remote..."
+  if timeout 10 git ls-remote origin HEAD &>/dev/null 2>&1; then
+    success "Git remote reachable"
+  else
+    error "Cannot reach git remote. Check your network or remote configuration."
+    exit 1
+  fi
 fi

 # Working directory checks
@ -490,9 +496,16 @@ fi
 success "Disk space: ${AVAILABLE_MB}MB available"

 # Record pre-upgrade state
-PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
-PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
-info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  PRE_UPGRADE_COMMIT="$(git rev-parse HEAD)"
+  PRE_UPGRADE_SHORT="$(git rev-parse --short HEAD)"
+  info "Current commit: $PRE_UPGRADE_SHORT ($(git log -1 --format='%s' HEAD))"
+else
+  # Release mode: derive "commit" from VERSION file (format: <tag>\n<sha>)
+  PRE_UPGRADE_COMMIT="$(head -2 "$PROJECT_DIR/VERSION" 2>/dev/null | tail -1 || echo "release")"
+  PRE_UPGRADE_SHORT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
+  info "Current version: $PRE_UPGRADE_SHORT"
+fi
 info "Target branch: $BRANCH"

 # Record running containers (for restoring monitoring profile later)
@ -502,31 +515,36 @@ if docker ps --format '{{.Names}}' | grep -q 'prometheus-changemaker'; then
  info "Monitoring stack detected (will restart after upgrade)"
 fi

-# Warn about uncommitted changes in project-owned paths
-PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
-DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
-if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
-  warn "Uncommitted changes in project-owned files:"
-  echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo "    $f"; done
-  if [[ "$FORCE" != "true" ]]; then
-    error "Commit or stash these changes first, or use --force to continue."
-    exit 1
+# Source-mode-only checks: dirty files + upstream commit comparison
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  # Warn about uncommitted changes in project-owned paths
+  PROJECT_OWNED_PATHS="api/ admin/ docker-compose.yml"
+  DIRTY_PROJECT_FILES="$(git diff --name-only HEAD -- $PROJECT_OWNED_PATHS 2>/dev/null || true)"
+  if [[ -n "$DIRTY_PROJECT_FILES" ]]; then
+    warn "Uncommitted changes in project-owned files:"
+    echo "$DIRTY_PROJECT_FILES" | while read -r f; do echo "    $f"; done
+    if [[ "$FORCE" != "true" ]]; then
+      error "Commit or stash these changes first, or use --force to continue."
+      exit 1
+    fi
+    warn "Continuing with --force (changes will be stashed)"
  fi
-  warn "Continuing with --force (changes will be stashed)"
-fi

-# Check for available updates
-LOCAL_HEAD="$(git rev-parse HEAD)"
-REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
-if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
-  info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
-  if [[ "$FORCE" != "true" ]]; then
-    success "Nothing to upgrade."
-    release_lock
-    exit 0
+  # Check for available updates
+  LOCAL_HEAD="$(git rev-parse HEAD)"
+  REMOTE_HEAD="$(git ls-remote origin "$BRANCH" | cut -f1)"
+  if [[ "$LOCAL_HEAD" == "$REMOTE_HEAD" ]]; then
+    info "Already up to date ($PRE_UPGRADE_SHORT). No upstream changes."
+    if [[ "$FORCE" != "true" ]]; then
+      success "Nothing to upgrade."
+      release_lock
+      exit 0
+    fi
+    warn "Continuing with --force despite no upstream changes."
  fi
-  warn "Continuing with --force despite no upstream changes."
 fi
+# Release mode: the upstream-version comparison happens later in the
+# release-mode block (line ~597) which queries the Gitea Releases API.

 # =============================================================================
 # Phase 2: Backup
@ -669,100 +687,105 @@ elif [[ "$DRY_RUN" == "true" ]]; then
  exit 0
 fi

-# Step 0: Save user-modifiable paths before any git operations
-save_user_paths
+# Source-mode git pull flow. Release mode handles its update via tarball
+# download in the block above and skips this entire section.
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  # Step 0: Save user-modifiable paths before any git operations
+  save_user_paths

-# Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
-SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
-if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
-  info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
-  echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
-  success "Skip-worktree flags cleared"
-fi
-
-# Step 0c: Fix Docker-owned directories that block git checkout
-for owned_dir in api/upgrade api/uploads api/configs; do
-  if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
-    info "Fixing permissions on $owned_dir..."
-    docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
+  # Step 0b: Clear skip-worktree flags that prevent merge (e.g., repo-data JSON files)
+  SKIP_WORKTREE_FILES="$(git ls-files -v | grep '^S ' | awk '{print $2}' || true)"
+  if [[ -n "$SKIP_WORKTREE_FILES" ]]; then
+    info "Clearing skip-worktree flags on $(echo "$SKIP_WORKTREE_FILES" | wc -l | xargs) file(s)..."
+    echo "$SKIP_WORKTREE_FILES" | xargs git update-index --no-skip-worktree
+    success "Skip-worktree flags cleared"
  fi
-done

-# Step 1: Stash user changes if any exist
-HAS_CHANGES=false
-if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
-  HAS_CHANGES=true
-  STASH_NAME="upgrade-${TIMESTAMP}"
-  info "Stashing local changes as '$STASH_NAME'..."
-  git stash push --include-untracked -m "$STASH_NAME"
-  success "Local changes stashed"
-fi
+  # Step 0c: Fix Docker-owned directories that block git checkout
+  for owned_dir in api/upgrade api/uploads api/configs; do
+    if [[ -d "$PROJECT_DIR/$owned_dir" ]] && [[ ! -w "$PROJECT_DIR/$owned_dir" ]]; then
+      info "Fixing permissions on $owned_dir..."
+      docker run --rm -v "$PROJECT_DIR/$owned_dir:/fix" alpine chown -R "$(id -u):$(id -g)" /fix 2>/dev/null || true
+    fi
+  done

-# Step 3: Pull updates
-info "Pulling updates from origin/$BRANCH..."
-if ! git pull origin "$BRANCH" --no-edit 2>&1; then
-  error "git pull failed. This may indicate upstream force-push or branch issues."
+  # Step 1: Stash user changes if any exist
+  HAS_CHANGES=false
+  if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
+    HAS_CHANGES=true
+    STASH_NAME="upgrade-${TIMESTAMP}"
+    info "Stashing local changes as '$STASH_NAME'..."
+    git stash push --include-untracked -m "$STASH_NAME"
+    success "Local changes stashed"
+  fi
+
+  # Step 3: Pull updates
+  info "Pulling updates from origin/$BRANCH..."
+  if ! git pull origin "$BRANCH" --no-edit 2>&1; then
+    error "git pull failed. This may indicate upstream force-push or branch issues."
+    if [[ "$HAS_CHANGES" == "true" ]]; then
+      warn "Your stashed changes can be recovered with: git stash pop"
+    fi
+    exit 1
+  fi
+
+  POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
+  success "Updated to $POST_PULL_COMMIT"
+
+  # Step 4: Pop stash and handle conflicts
  if [[ "$HAS_CHANGES" == "true" ]]; then
-    warn "Your stashed changes can be recovered with: git stash pop"
-  fi
-  exit 1
-fi
+    info "Restoring local changes..."
+    if git stash pop 2>&1; then
+      success "Local changes restored cleanly"
+    else
+      warn "Merge conflicts detected during stash pop"

-POST_PULL_COMMIT="$(git rev-parse --short HEAD)"
-success "Updated to $POST_PULL_COMMIT"
+      # Auto-resolve user-modifiable paths by keeping user's version
+      RESOLVED_COUNT=0
+      for user_path in "${USER_PATHS[@]}"; do
+        CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
+        if [[ -n "$CONFLICTED" ]]; then
+          while IFS= read -r cf; do
+            info "  Auto-resolving (keeping yours): $cf"
+            git checkout --theirs "$cf" 2>/dev/null || true
+            git add "$cf"
+            RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
+          done < <(echo "$CONFLICTED")
+        fi
+      done

-# Step 4: Pop stash and handle conflicts
-if [[ "$HAS_CHANGES" == "true" ]]; then
-  info "Restoring local changes..."
-  if git stash pop 2>&1; then
-    success "Local changes restored cleanly"
-  else
-    warn "Merge conflicts detected during stash pop"
-
-    # Auto-resolve user-modifiable paths by keeping user's version
-    RESOLVED_COUNT=0
-    for user_path in "${USER_PATHS[@]}"; do
-      CONFLICTED="$(git diff --name-only --diff-filter=U -- "$user_path" 2>/dev/null || true)"
-      if [[ -n "$CONFLICTED" ]]; then
-        while IFS= read -r cf; do
-          info "  Auto-resolving (keeping yours): $cf"
-          git checkout --theirs "$cf" 2>/dev/null || true
-          git add "$cf"
-          RESOLVED_COUNT=$((RESOLVED_COUNT + 1))
-        done < <(echo "$CONFLICTED")
+      # Check if any conflicts remain in project-owned files
+      REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
+      if [[ -n "$REMAINING_CONFLICTS" ]]; then
+        error "Unresolved conflicts in project-owned files:"
+        echo "$REMAINING_CONFLICTS" | while read -r f; do echo "    $f"; done
+        echo ""
+        error "These files have upstream changes that conflict with your edits."
+        error "Resolve manually, then run the upgrade again."
+        info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
+        info "To abort: git merge --abort  OR  git checkout $PRE_UPGRADE_COMMIT"
+        exit 1
      fi
-    done

-    # Check if any conflicts remain in project-owned files
-    REMAINING_CONFLICTS="$(git diff --name-only --diff-filter=U 2>/dev/null || true)"
-    if [[ -n "$REMAINING_CONFLICTS" ]]; then
-      error "Unresolved conflicts in project-owned files:"
-      echo "$REMAINING_CONFLICTS" | while read -r f; do echo "    $f"; done
-      echo ""
-      error "These files have upstream changes that conflict with your edits."
-      error "Resolve manually, then run the upgrade again."
-      info "Your pre-upgrade commit: $PRE_UPGRADE_COMMIT"
-      info "To abort: git merge --abort  OR  git checkout $PRE_UPGRADE_COMMIT"
-      exit 1
-    fi
-
-    if [[ $RESOLVED_COUNT -gt 0 ]]; then
-      success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
+      if [[ $RESOLVED_COUNT -gt 0 ]]; then
+        success "Auto-resolved $RESOLVED_COUNT user-modifiable path(s) (kept your versions)"
+      fi
    fi
  fi
-fi

-# Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
-restore_user_paths
+  # Step 4b: Restore user-modifiable paths (unconditionally overwrites with saved copies)
+  restore_user_paths

-# Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
-# (can happen when save_user_paths can't read root-owned files in user paths)
-DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
-if [[ -n "$DELETED_TRACKED" ]]; then
-  info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
-  echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
-  success "Tracked files restored from HEAD"
+  # Step 4c: Restore any tracked files accidentally deleted by restore_user_paths
+  # (can happen when save_user_paths can't read root-owned files in user paths)
+  DELETED_TRACKED="$(git ls-files --deleted 2>/dev/null || true)"
+  if [[ -n "$DELETED_TRACKED" ]]; then
+    info "Restoring $(echo "$DELETED_TRACKED" | wc -l | xargs) tracked file(s) deleted during restore..."
+    echo "$DELETED_TRACKED" | xargs git checkout HEAD -- 2>/dev/null || true
+    success "Tracked files restored from HEAD"
+  fi
 fi
+# End of source-mode git pull flow

 # Step 5: Detect new env vars
 info "Checking for new environment variables..."
@ -791,24 +814,30 @@ if [[ -f "$PROJECT_DIR/.env.example" ]] && [[ -f "$PROJECT_DIR/.env" ]]; then
  fi
 fi

-# Step 6: Print update summary
-COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
-COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs)"
-echo ""
-info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
-git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20
-if [[ "$COMMIT_COUNT" -gt 20 ]]; then
-  info "  ... and $((COMMIT_COUNT - 20)) more"
-fi
-
-# Flag commits that may require manual attention
-BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
-if [[ -n "$BREAKING_COMMITS" ]]; then
+# Step 6: Print update summary (source mode only — release mode has no commit range)
+COMMIT_COUNT=0
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  COMMIT_RANGE="${PRE_UPGRADE_SHORT}..${POST_PULL_COMMIT}"
+  # Use || true and check pipefail-safe to survive git failures
+  COMMIT_COUNT="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | wc -l | xargs || echo 0)"
  echo ""
-  warn "Commits requiring manual attention:"
-  echo "$BREAKING_COMMITS" | while read -r line; do
-    echo -e "    ${YELLOW}$line${NC}"
-  done
+  info "Update summary: $COMMIT_COUNT commit(s) ($COMMIT_RANGE)"
+  git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" 2>/dev/null | head -20 || true
+  if [[ "$COMMIT_COUNT" -gt 20 ]]; then
+    info "  ... and $((COMMIT_COUNT - 20)) more"
+  fi
+
+  # Flag commits that may require manual attention
+  BREAKING_COMMITS="$(git log --oneline "$PRE_UPGRADE_COMMIT..HEAD" --grep="BREAKING" --grep="\[manual\]" 2>/dev/null || true)"
+  if [[ -n "$BREAKING_COMMITS" ]]; then
+    echo ""
+    warn "Commits requiring manual attention:"
+    echo "$BREAKING_COMMITS" | while read -r line; do
+      echo -e "    ${YELLOW}$line${NC}"
+    done
+  fi
+else
+  info "Update summary: ${PRE_UPGRADE_SHORT} → release"
 fi

 # =============================================================================
@ -1135,7 +1164,10 @@ verify_service_health() {
  done
  warn "$name: not responding after ${max_wait}s"
  VERIFY_FAILED=true
-  return 1
+  # Always return 0 — under set -e a non-zero return from this helper would
+  # exit the script before write_result runs. The VERIFY_FAILED flag is the
+  # signal the caller actually checks.
+  return 0
 }

 # API health (with polling — may still be running migrations)
@ -1194,7 +1226,11 @@ fi
 # =============================================================================

 ELAPSED="$(elapsed)"
-FINAL_COMMIT="$(git rev-parse --short HEAD)"
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  FINAL_COMMIT="$(git rev-parse --short HEAD)"
+else
+  FINAL_COMMIT="$(head -1 "$PROJECT_DIR/VERSION" 2>/dev/null || echo "release")"
+fi

 # Collect warnings for API mode result
 UPGRADE_WARNINGS="[]"
@ -1211,7 +1247,11 @@ echo -e "${BOLD}${GREEN}  Upgrade Complete${NC}"
 echo -e "${BOLD}${GREEN}══════════════════════════════════════════════════${NC}"
 echo ""
 echo -e "  ${BOLD}Previous:${NC}  $PRE_UPGRADE_SHORT"
-echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT ($(git log -1 --format='%s' HEAD))"
+if [[ "$INSTALL_MODE" == "source" ]]; then
+  echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT ($(git log -1 --format='%s' HEAD 2>/dev/null || echo "$FINAL_COMMIT"))"
+else
+  echo -e "  ${BOLD}Current:${NC}   $FINAL_COMMIT"
+fi
 echo -e "  ${BOLD}Commits:${NC}   $COMMIT_COUNT"
 echo -e "  ${BOLD}Duration:${NC}  $ELAPSED"
 echo -e "  ${BOLD}Log:${NC}       $LOG_FILE"