From c1d69c3ff920195b45fa59cd6ea99b737cf695a7 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 15:48:37 -0700
Subject: [PATCH 1/6] improvement(tables): drain rows in batches before
 hard-deleting archived tables

---
 .../background/cleanup-soft-deletes.test.ts   | 49 +++++++++++-
 apps/sim/background/cleanup-soft-deletes.ts   | 78 +++++++++++++++++--
 apps/sim/lib/cleanup/batch-delete.test.ts     | 69 ++++++++++++++++
 apps/sim/lib/cleanup/batch-delete.ts          | 68 +++++++++++++++-
 apps/sim/lib/knowledge/documents/service.ts   |  4 +-
 5 files changed, 258 insertions(+), 10 deletions(-)
 create mode 100644 apps/sim/lib/cleanup/batch-delete.test.ts

diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts
index cada41fdf3a..99425ef8a37 100644
--- a/apps/sim/background/cleanup-soft-deletes.test.ts
+++ b/apps/sim/background/cleanup-soft-deletes.test.ts
@@ -6,9 +6,11 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 const {
   mockBatchDeleteByWorkspaceAndTimestamp,
+  mockDeleteDefinition,
   mockDeleteFileMetadata,
   mockDeleteFiles,
   mockDeleteRowsById,
+  mockDrainRowsByColumn,
   mockIsUsingCloudStorage,
   mockLimit,
   mockOrderBy,
@@ -26,12 +28,15 @@ const {
     leftJoin: vi.fn(() => ({ where: mockWhere })),
   }))
   const mockSelect = vi.fn(() => ({ from: mockFrom }))
+  const mockDeleteDefinition = vi.fn(async () => undefined)
 
   return {
     mockBatchDeleteByWorkspaceAndTimestamp: vi.fn(async () => ({ deleted: 0, failed: 0 })),
+    mockDeleteDefinition,
     mockDeleteFileMetadata: vi.fn(async () => true),
     mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })),
     mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })),
+    mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, budgetExhausted: false })),
     mockIsUsingCloudStorage: vi.fn(() => true),
     mockLimit,
     mockOrderBy,
@@ -43,7 +48,9 @@ const {
   }
 })
 
-vi.mock('@sim/db', () => ({ db: { select: mockSelect } }))
+vi.mock('@sim/db', () => ({
+  db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDeleteDefinition })) },
+}))
 
 vi.mock('@sim/db/schema', () => {
   const table = (cols: string[]) =>
@@ -58,6 +65,7 @@ vi.mock('@sim/db/schema', () => {
     mcpServers: table(softCols),
     memory: table(softCols),
     userTableDefinitions: table(softCols),
+    userTableRows: table(['id', 'tableId', 'workspaceId']),
     workflow: table(softCols),
     workflowFolder: table(softCols),
     workflowMcpServer: table(softCols),
@@ -93,6 +101,7 @@ vi.mock('@/lib/cleanup/batch-delete', () => ({
     return chunks
   },
   deleteRowsById: mockDeleteRowsById,
+  drainRowsByColumn: mockDrainRowsByColumn,
   selectRowsByIdChunks: mockSelectRowsByIdChunks,
 }))
 
@@ -162,3 +171,41 @@ describe('cleanup soft deletes — orphan KB binding sweep', () => {
     expect(mockDeleteFileMetadata).not.toHaveBeenCalled()
   })
 })
+
+describe('cleanup soft deletes — archived user tables', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    mockIsUsingCloudStorage.mockReturnValue(true)
+    mockLimit.mockResolvedValue([])
+    // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed tables.
+    mockSelectRowsByIdChunks
+      .mockResolvedValueOnce([])
+      .mockResolvedValueOnce([])
+      .mockResolvedValueOnce([])
+      .mockResolvedValueOnce([{ id: 'tbl-1' }])
+  })
+
+  it('drains rows before deleting the table definition', async () => {
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, budgetExhausted: false })
+
+    await runCleanupSoftDeletes(basePayload)
+
+    expect(mockDrainRowsByColumn).toHaveBeenCalledWith(
+      expect.objectContaining({ matchValue: 'tbl-1' })
+    )
+    expect(mockDeleteDefinition).toHaveBeenCalledTimes(1)
+    // Rows are drained first, then the definition is deleted.
+    expect(mockDrainRowsByColumn.mock.invocationCallOrder[0]).toBeLessThan(
+      mockDeleteDefinition.mock.invocationCallOrder[0]
+    )
+  })
+
+  it('defers the definition delete when the row budget is exhausted', async () => {
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, budgetExhausted: true })
+
+    await runCleanupSoftDeletes(basePayload)
+
+    expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1)
+    expect(mockDeleteDefinition).not.toHaveBeenCalled()
+  })
+})
diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts
index ae900abc804..a6ed60b4492 100644
--- a/apps/sim/background/cleanup-soft-deletes.ts
+++ b/apps/sim/background/cleanup-soft-deletes.ts
@@ -7,6 +7,7 @@ import {
   mcpServers,
   memory,
   userTableDefinitions,
+  userTableRows,
   workflow,
   workflowFolder,
   workflowMcpServer,
@@ -21,6 +22,7 @@ import {
   batchDeleteByWorkspaceAndTimestamp,
   chunkArray,
   deleteRowsById,
+  drainRowsByColumn,
   selectRowsByIdChunks,
 } from '@/lib/cleanup/batch-delete'
 import { prepareChatCleanup } from '@/lib/cleanup/chat-cleanup'
@@ -148,12 +150,6 @@ const CLEANUP_TARGETS = [
     wsCol: knowledgeBase.workspaceId,
     name: 'knowledgeBase',
   },
-  {
-    table: userTableDefinitions,
-    softDeleteCol: userTableDefinitions.archivedAt,
-    wsCol: userTableDefinitions.workspaceId,
-    name: 'userTableDefinitions',
-  },
   { table: memory, softDeleteCol: memory.deletedAt, wsCol: memory.workspaceId, name: 'memory' },
   {
     table: mcpServers,
@@ -256,6 +252,74 @@ async function cleanupOrphanedKnowledgeBaseBindings(
   return stats
 }
 
+const TABLE_ROW_DRAIN_BATCH_SIZE = 5_000
+/**
+ * Per-run cap on user-table rows drained before deleting their definitions.
+ * Each batch is its own transaction, so this only bounds total job duration; a
+ * table larger than this drains across several cron runs. A definition is only
+ * deleted once its rows are fully drained, so its ON DELETE CASCADE never fires
+ * on a large set.
+ */
+const TABLE_ROW_DRAIN_TOTAL_LIMIT = 1_000_000
+
+/**
+ * Hard-delete archived user tables. Rows are drained in bounded batches first
+ * (each batch cascades its `table_row_executions`), then the definition is
+ * deleted — turning what would be a single multi-million-row cascade into many
+ * small transactions. Tables whose rows can't be fully drained within this
+ * run's budget keep their archived definition for the next run.
+ */
+async function cleanupArchivedUserTables(
+  workspaceIds: string[],
+  retentionDate: Date,
+  label: string
+): Promise<number> {
+  const doomedTables = await selectRowsByIdChunks(workspaceIds, (chunkIds, chunkLimit) =>
+    db
+      .select({ id: userTableDefinitions.id })
+      .from(userTableDefinitions)
+      .where(
+        and(
+          inArray(userTableDefinitions.workspaceId, chunkIds),
+          isNotNull(userTableDefinitions.archivedAt),
+          lt(userTableDefinitions.archivedAt, retentionDate)
+        )
+      )
+      .limit(chunkLimit)
+  )
+
+  if (doomedTables.length === 0) return 0
+
+  let rowBudget = TABLE_ROW_DRAIN_TOTAL_LIMIT
+  let definitionsDeleted = 0
+
+  for (const { id: tableId } of doomedTables) {
+    if (rowBudget <= 0) break
+
+    const drain = await drainRowsByColumn({
+      tableDef: userTableRows,
+      idCol: userTableRows.id,
+      matchCol: userTableRows.tableId,
+      matchValue: tableId,
+      tableName: `${label}/userTableRows`,
+      batchSize: TABLE_ROW_DRAIN_BATCH_SIZE,
+      rowBudget,
+    })
+    rowBudget -= drain.deleted
+
+    // Rows still remain — defer the definition delete so its cascade stays small.
+    if (drain.budgetExhausted) break
+
+    await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId))
+    definitionsDeleted++
+  }
+
+  logger.info(
+    `[${label}/userTableDefinitions] Deleted ${definitionsDeleted}/${doomedTables.length} archived tables (row budget left: ${rowBudget})`
+  )
+  return definitionsDeleted
+}
+
 export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise<void> {
   const startTime = Date.now()
   const { workspaceIds, retentionHours, label } = payload
@@ -350,6 +414,8 @@ export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise
     totalDeleted += result.deleted
   }
 
+  totalDeleted += await cleanupArchivedUserTables(workspaceIds, retentionDate, label)
+
   const orphanBindingStats = await cleanupOrphanedKnowledgeBaseBindings(workspaceIds, label)
 
   logger.info(
diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts
new file mode 100644
index 00000000000..21394d0c9c9
--- /dev/null
+++ b/apps/sim/lib/cleanup/batch-delete.test.ts
@@ -0,0 +1,69 @@
+/**
+ * @vitest-environment node
+ */
+
+import { dbChainMock, dbChainMockFns } from '@sim/testing'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+vi.mock('@sim/db', () => dbChainMock)
+
+import { drainRowsByColumn } from '@/lib/cleanup/batch-delete'
+
+const baseOpts = {
+  tableDef: {} as never,
+  idCol: 'col.id' as never,
+  matchCol: 'col.tableId' as never,
+  matchValue: 'tbl-1',
+  tableName: 'test/userTableRows',
+}
+
+function returnRows(count: number) {
+  return Array.from({ length: count }, (_, i) => ({ id: `row-${i}` }))
+}
+
+describe('drainRowsByColumn', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('drains in batches until a short batch and reports the set exhausted', async () => {
+    dbChainMockFns.returning
+      .mockResolvedValueOnce(returnRows(2))
+      .mockResolvedValueOnce(returnRows(1))
+
+    const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
+
+    expect(result).toEqual({ deleted: 3, budgetExhausted: false })
+    expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
+  })
+
+  it('stops at the row budget and reports it exhausted', async () => {
+    dbChainMockFns.returning
+      .mockResolvedValueOnce(returnRows(2))
+      .mockResolvedValueOnce(returnRows(2))
+
+    const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 })
+
+    expect(result).toEqual({ deleted: 4, budgetExhausted: true })
+    expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
+  })
+
+  it('returns immediately when the match set is already empty', async () => {
+    dbChainMockFns.returning.mockResolvedValueOnce([])
+
+    const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
+
+    expect(result).toEqual({ deleted: 0, budgetExhausted: false })
+    expect(dbChainMockFns.returning).toHaveBeenCalledTimes(1)
+  })
+
+  it('bails without throwing when a batch delete fails', async () => {
+    dbChainMockFns.returning
+      .mockResolvedValueOnce(returnRows(2))
+      .mockRejectedValueOnce(new Error('db down'))
+
+    const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
+
+    expect(result).toEqual({ deleted: 2, budgetExhausted: false })
+  })
+})
diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts
index d523800cd59..6c33ac0c22c 100644
--- a/apps/sim/lib/cleanup/batch-delete.ts
+++ b/apps/sim/lib/cleanup/batch-delete.ts
@@ -1,6 +1,6 @@
 import { db } from '@sim/db'
 import { createLogger } from '@sim/logger'
-import { and, inArray, isNotNull, lt, sql } from 'drizzle-orm'
+import { and, eq, inArray, isNotNull, lt, sql } from 'drizzle-orm'
 import type { PgColumn, PgTable } from 'drizzle-orm/pg-core'
 
 const logger = createLogger('BatchDelete')
@@ -262,3 +262,69 @@ export async function deleteRowsById(
   )
   return result
 }
+
+export interface DrainByColumnOptions {
+  tableDef: PgTable
+  /** Single-column primary key used to batch the delete. */
+  idCol: PgColumn
+  /** Column matched against `matchValue` to scope the drain (e.g. a parent FK). */
+  matchCol: PgColumn
+  matchValue: string
+  tableName: string
+  batchSize?: number
+  /** Max rows to delete in this call across all batches. */
+  rowBudget: number
+}
+
+export interface DrainResult {
+  deleted: number
+  /** True if the budget ran out before the match set was fully drained. */
+  budgetExhausted: boolean
+}
+
+/**
+ * Delete every row matching `matchCol = matchValue` in self-bounded batches,
+ * each its own transaction. Use to empty a large child set before deleting its
+ * parent so the parent's ON DELETE CASCADE fires on a small (or empty) set
+ * instead of millions of rows in one statement.
+ */
+export async function drainRowsByColumn({
+  tableDef,
+  idCol,
+  matchCol,
+  matchValue,
+  tableName,
+  batchSize = DEFAULT_DELETE_CHUNK_SIZE,
+  rowBudget,
+}: DrainByColumnOptions): Promise<DrainResult> {
+  let deleted = 0
+  let remaining = rowBudget
+
+  while (remaining > 0) {
+    const limit = Math.min(batchSize, remaining)
+    const targetIds = db
+      .select({ id: idCol })
+      .from(tableDef)
+      .where(eq(matchCol, matchValue))
+      .limit(limit)
+
+    let batchDeleted: { id: unknown }[]
+    try {
+      batchDeleted = await db
+        .delete(tableDef)
+        .where(inArray(idCol, targetIds))
+        .returning({ id: idCol })
+    } catch (error) {
+      logger.error(`[${tableName}] Drain batch failed for ${matchValue}:`, { error })
+      return { deleted, budgetExhausted: false }
+    }
+
+    deleted += batchDeleted.length
+    remaining -= batchDeleted.length
+
+    // Short batch means the match set is exhausted.
+    if (batchDeleted.length < limit) return { deleted, budgetExhausted: false }
+  }
+
+  return { deleted, budgetExhausted: true }
+}
diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts
index 9b4fce29135..d1371673886 100644
--- a/apps/sim/lib/knowledge/documents/service.ts
+++ b/apps/sim/lib/knowledge/documents/service.ts
@@ -97,7 +97,7 @@ async function assertKnowledgeBaseFileUrlsOwnership(
     ...new Set(
       fileUrls
         .map((url) => getKnowledgeBaseStorageKey(url))
-        .filter((key): key is string => key !== null && key.startsWith('kb/'))
+        .filter((key): key is string => key?.startsWith('kb/'))
     ),
   ]
   if (keys.length === 0) {
@@ -1974,7 +1974,7 @@ export async function deleteDocumentStorageFiles(
     ...new Set(
       entries
         .map((entry) => entry.storageKey)
-        .filter((key): key is string => key !== null && key.startsWith('kb/'))
+        .filter((key): key is string => key?.startsWith('kb/'))
     ),
   ]
   const ownerByKey = new Map<string, string | null>()

From d660542a516b7d93d063ab28f3bbd3300f7eee01 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 15:56:13 -0700
Subject: [PATCH 2/6] fix(tables): defer definition delete unless rows are
 provably drained

---
 apps/sim/background/cleanup-soft-deletes.test.ts |  9 +++++----
 apps/sim/background/cleanup-soft-deletes.ts      |  5 +++--
 apps/sim/lib/cleanup/batch-delete.test.ts        | 16 ++++++++--------
 apps/sim/lib/cleanup/batch-delete.ts             | 16 +++++++++++-----
 4 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts
index 99425ef8a37..c4fde4b7dd3 100644
--- a/apps/sim/background/cleanup-soft-deletes.test.ts
+++ b/apps/sim/background/cleanup-soft-deletes.test.ts
@@ -36,7 +36,7 @@ const {
     mockDeleteFileMetadata: vi.fn(async () => true),
     mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })),
     mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })),
-    mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, budgetExhausted: false })),
+    mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, fullyDrained: false })),
     mockIsUsingCloudStorage: vi.fn(() => true),
     mockLimit,
     mockOrderBy,
@@ -186,7 +186,7 @@ describe('cleanup soft deletes — archived user tables', () => {
   })
 
   it('drains rows before deleting the table definition', async () => {
-    mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, budgetExhausted: false })
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, fullyDrained: true })
 
     await runCleanupSoftDeletes(basePayload)
 
@@ -200,8 +200,9 @@ describe('cleanup soft deletes — archived user tables', () => {
     )
   })
 
-  it('defers the definition delete when the row budget is exhausted', async () => {
-    mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, budgetExhausted: true })
+  it('defers the definition delete when the drain does not fully complete', async () => {
+    // Budget stop or a drain error both surface as fullyDrained: false.
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, fullyDrained: false })
 
     await runCleanupSoftDeletes(basePayload)
 
diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts
index a6ed60b4492..496ab278767 100644
--- a/apps/sim/background/cleanup-soft-deletes.ts
+++ b/apps/sim/background/cleanup-soft-deletes.ts
@@ -307,8 +307,9 @@ async function cleanupArchivedUserTables(
     })
     rowBudget -= drain.deleted
 
-    // Rows still remain — defer the definition delete so its cascade stays small.
-    if (drain.budgetExhausted) break
+    // Only delete the definition once its rows are provably gone. A budget stop
+    // or a drain error leaves rows behind, so defer to keep the cascade small.
+    if (!drain.fullyDrained) break
 
     await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId))
     definitionsDeleted++
diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts
index 21394d0c9c9..b1a0cf7c7ab 100644
--- a/apps/sim/lib/cleanup/batch-delete.test.ts
+++ b/apps/sim/lib/cleanup/batch-delete.test.ts
@@ -26,44 +26,44 @@ describe('drainRowsByColumn', () => {
     vi.clearAllMocks()
   })
 
-  it('drains in batches until a short batch and reports the set exhausted', async () => {
+  it('drains in batches until a short batch and reports the set fully drained', async () => {
     dbChainMockFns.returning
       .mockResolvedValueOnce(returnRows(2))
       .mockResolvedValueOnce(returnRows(1))
 
     const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
 
-    expect(result).toEqual({ deleted: 3, budgetExhausted: false })
+    expect(result).toEqual({ deleted: 3, fullyDrained: true })
     expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
   })
 
-  it('stops at the row budget and reports it exhausted', async () => {
+  it('stops at the row budget and reports the set not fully drained', async () => {
     dbChainMockFns.returning
       .mockResolvedValueOnce(returnRows(2))
       .mockResolvedValueOnce(returnRows(2))
 
     const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 })
 
-    expect(result).toEqual({ deleted: 4, budgetExhausted: true })
+    expect(result).toEqual({ deleted: 4, fullyDrained: false })
     expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
   })
 
-  it('returns immediately when the match set is already empty', async () => {
+  it('reports fully drained immediately when the match set is already empty', async () => {
     dbChainMockFns.returning.mockResolvedValueOnce([])
 
     const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
 
-    expect(result).toEqual({ deleted: 0, budgetExhausted: false })
+    expect(result).toEqual({ deleted: 0, fullyDrained: true })
     expect(dbChainMockFns.returning).toHaveBeenCalledTimes(1)
   })
 
-  it('bails without throwing when a batch delete fails', async () => {
+  it('reports not fully drained on a batch error so the caller defers the cascade', async () => {
     dbChainMockFns.returning
       .mockResolvedValueOnce(returnRows(2))
       .mockRejectedValueOnce(new Error('db down'))
 
     const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 })
 
-    expect(result).toEqual({ deleted: 2, budgetExhausted: false })
+    expect(result).toEqual({ deleted: 2, fullyDrained: false })
   })
 })
diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts
index 6c33ac0c22c..737e2cd33c1 100644
--- a/apps/sim/lib/cleanup/batch-delete.ts
+++ b/apps/sim/lib/cleanup/batch-delete.ts
@@ -278,8 +278,13 @@ export interface DrainByColumnOptions {
 
 export interface DrainResult {
   deleted: number
-  /** True if the budget ran out before the match set was fully drained. */
-  budgetExhausted: boolean
+  /**
+   * True only when the match set was confirmed empty (a short final batch).
+   * Budget exhaustion and batch errors both yield `false` — callers must treat
+   * `false` as "rows may remain" and defer any dependent parent-delete (whose
+   * ON DELETE CASCADE would otherwise fire on the leftovers) to a later run.
+   */
+  fullyDrained: boolean
 }
 
 /**
@@ -316,15 +321,16 @@ export async function drainRowsByColumn({
         .returning({ id: idCol })
     } catch (error) {
       logger.error(`[${tableName}] Drain batch failed for ${matchValue}:`, { error })
-      return { deleted, budgetExhausted: false }
+      return { deleted, fullyDrained: false }
     }
 
     deleted += batchDeleted.length
     remaining -= batchDeleted.length
 
     // Short batch means the match set is exhausted.
-    if (batchDeleted.length < limit) return { deleted, budgetExhausted: false }
+    if (batchDeleted.length < limit) return { deleted, fullyDrained: true }
   }
 
-  return { deleted, budgetExhausted: true }
+  // Hit the per-call budget on a full batch — rows may remain.
+  return { deleted, fullyDrained: false }
 }

From 4ad8c0bf93d62a2b87d4940a40aade18b3c7b517 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 16:03:23 -0700
Subject: [PATCH 3/6] fix(tables): skip an errored table mid-drain instead of
 aborting the whole run

---
 .../background/cleanup-soft-deletes.test.ts   | 26 +++++++++++++++----
 apps/sim/background/cleanup-soft-deletes.ts   | 14 ++++++----
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts
index c4fde4b7dd3..d25a92c386a 100644
--- a/apps/sim/background/cleanup-soft-deletes.test.ts
+++ b/apps/sim/background/cleanup-soft-deletes.test.ts
@@ -177,15 +177,16 @@ describe('cleanup soft deletes — archived user tables', () => {
     vi.clearAllMocks()
     mockIsUsingCloudStorage.mockReturnValue(true)
     mockLimit.mockResolvedValue([])
-    // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed tables.
+    // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed
+    // tables. Each test queues the doomed-tables result (4th call) itself.
     mockSelectRowsByIdChunks
       .mockResolvedValueOnce([])
       .mockResolvedValueOnce([])
       .mockResolvedValueOnce([])
-      .mockResolvedValueOnce([{ id: 'tbl-1' }])
   })
 
   it('drains rows before deleting the table definition', async () => {
+    mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-1' }])
     mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, fullyDrained: true })
 
     await runCleanupSoftDeletes(basePayload)
@@ -200,13 +201,28 @@ describe('cleanup soft deletes — archived user tables', () => {
     )
   })
 
-  it('defers the definition delete when the drain does not fully complete', async () => {
-    // Budget stop or a drain error both surface as fullyDrained: false.
-    mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, fullyDrained: false })
+  it('defers the definition delete when the budget is exhausted', async () => {
+    mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-1' }])
+    // Budget stop consumes the whole budget — deleted equals the per-run cap.
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 1_000_000, fullyDrained: false })
 
     await runCleanupSoftDeletes(basePayload)
 
     expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1)
     expect(mockDeleteDefinition).not.toHaveBeenCalled()
   })
+
+  it('skips a table that errors mid-drain but keeps cleaning the rest', async () => {
+    mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-err' }, { id: 'tbl-ok' }])
+    // First table errors mid-drain (budget remains); second drains fully.
+    mockDrainRowsByColumn
+      .mockResolvedValueOnce({ deleted: 2, fullyDrained: false })
+      .mockResolvedValueOnce({ deleted: 5, fullyDrained: true })
+
+    await runCleanupSoftDeletes(basePayload)
+
+    expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(2)
+    // Only the fully-drained table's definition is deleted.
+    expect(mockDeleteDefinition).toHaveBeenCalledTimes(1)
+  })
 })
diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts
index 496ab278767..eb3fa1fc832 100644
--- a/apps/sim/background/cleanup-soft-deletes.ts
+++ b/apps/sim/background/cleanup-soft-deletes.ts
@@ -307,12 +307,16 @@ async function cleanupArchivedUserTables(
     })
     rowBudget -= drain.deleted
 
-    // Only delete the definition once its rows are provably gone. A budget stop
-    // or a drain error leaves rows behind, so defer to keep the cascade small.
-    if (!drain.fullyDrained) break
+    if (drain.fullyDrained) {
+      await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId))
+      definitionsDeleted++
+      continue
+    }
 
-    await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId))
-    definitionsDeleted++
+    // Not fully drained: a budget stop consumes the whole budget, so a positive
+    // remainder means this one table errored mid-drain. Leave its definition for
+    // a later run and keep cleaning the rest; only stop once the budget is spent.
+    if (rowBudget <= 0) break
   }
 
   logger.info(

From ed095a35f6c1c3e577574beb8f96a024f84c2772 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 16:10:47 -0700
Subject: [PATCH 4/6] fix(tables): probe for leftover rows on budget exhaustion
 to avoid needless deferral

---
 apps/sim/lib/cleanup/batch-delete.test.ts | 19 +++++++++++++++--
 apps/sim/lib/cleanup/batch-delete.ts      | 25 +++++++++++++++++------
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts
index b1a0cf7c7ab..105a9e2af00 100644
--- a/apps/sim/lib/cleanup/batch-delete.test.ts
+++ b/apps/sim/lib/cleanup/batch-delete.test.ts
@@ -2,7 +2,7 @@
  * @vitest-environment node
  */
 
-import { dbChainMock, dbChainMockFns } from '@sim/testing'
+import { dbChainMock, dbChainMockFns, resetDbChainMock } from '@sim/testing'
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 vi.mock('@sim/db', () => dbChainMock)
@@ -24,6 +24,7 @@ function returnRows(count: number) {
 describe('drainRowsByColumn', () => {
   beforeEach(() => {
     vi.clearAllMocks()
+    resetDbChainMock()
   })
 
   it('drains in batches until a short batch and reports the set fully drained', async () => {
@@ -37,10 +38,12 @@ describe('drainRowsByColumn', () => {
     expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
   })
 
-  it('stops at the row budget and reports the set not fully drained', async () => {
+  it('stops at the budget and reports not fully drained when rows remain', async () => {
     dbChainMockFns.returning
       .mockResolvedValueOnce(returnRows(2))
       .mockResolvedValueOnce(returnRows(2))
+    // Existence probe after the budget is spent finds a leftover row.
+    dbChainMockFns.limit.mockResolvedValue([{ id: 'leftover' }])
 
     const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 })
 
@@ -48,6 +51,18 @@ describe('drainRowsByColumn', () => {
     expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2)
   })
 
+  it('reports fully drained when the budget is hit but the set emptied exactly', async () => {
+    dbChainMockFns.returning
+      .mockResolvedValueOnce(returnRows(2))
+      .mockResolvedValueOnce(returnRows(2))
+    // Existence probe finds nothing remaining.
+    dbChainMockFns.limit.mockResolvedValue([])
+
+    const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 })
+
+    expect(result).toEqual({ deleted: 4, fullyDrained: true })
+  })
+
   it('reports fully drained immediately when the match set is already empty', async () => {
     dbChainMockFns.returning.mockResolvedValueOnce([])
 
diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts
index 737e2cd33c1..24aa8554d3b 100644
--- a/apps/sim/lib/cleanup/batch-delete.ts
+++ b/apps/sim/lib/cleanup/batch-delete.ts
@@ -279,10 +279,11 @@ export interface DrainByColumnOptions {
 export interface DrainResult {
   deleted: number
   /**
-   * True only when the match set was confirmed empty (a short final batch).
-   * Budget exhaustion and batch errors both yield `false` — callers must treat
-   * `false` as "rows may remain" and defer any dependent parent-delete (whose
-   * ON DELETE CASCADE would otherwise fire on the leftovers) to a later run.
+   * True only when the match set was confirmed empty — via a short final batch
+   * or an existence probe after the budget was spent. Batch errors yield
+   * `false`; callers must treat `false` as "rows may remain" and defer any
+   * dependent parent-delete (whose ON DELETE CASCADE would otherwise fire on the
+   * leftovers) to a later run.
    */
   fullyDrained: boolean
 }
@@ -331,6 +332,18 @@ export async function drainRowsByColumn({
     if (batchDeleted.length < limit) return { deleted, fullyDrained: true }
   }
 
-  // Hit the per-call budget on a full batch — rows may remain.
-  return { deleted, fullyDrained: false }
+  // Budget hit on a full final batch — rows may or may not remain. A cheap
+  // indexed existence probe disambiguates so a set whose size divides the budget
+  // exactly isn't needlessly deferred to a later run.
+  try {
+    const [leftover] = await db
+      .select({ id: idCol })
+      .from(tableDef)
+      .where(eq(matchCol, matchValue))
+      .limit(1)
+    return { deleted, fullyDrained: !leftover }
+  } catch (error) {
+    logger.error(`[${tableName}] Drain remainder probe failed for ${matchValue}:`, { error })
+    return { deleted, fullyDrained: false }
+  }
 }

From b26cbdae94ab699480a42d0dddeb699340f69551 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 16:30:08 -0700
Subject: [PATCH 5/6] fix(knowledge): restore null-check type guard broken by
 an unsafe lint autofix

---
 apps/sim/lib/knowledge/documents/service.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts
index d1371673886..9b4fce29135 100644
--- a/apps/sim/lib/knowledge/documents/service.ts
+++ b/apps/sim/lib/knowledge/documents/service.ts
@@ -97,7 +97,7 @@ async function assertKnowledgeBaseFileUrlsOwnership(
     ...new Set(
       fileUrls
         .map((url) => getKnowledgeBaseStorageKey(url))
-        .filter((key): key is string => key?.startsWith('kb/'))
+        .filter((key): key is string => key !== null && key.startsWith('kb/'))
     ),
   ]
   if (keys.length === 0) {
@@ -1974,7 +1974,7 @@ export async function deleteDocumentStorageFiles(
     ...new Set(
       entries
         .map((entry) => entry.storageKey)
-        .filter((key): key is string => key?.startsWith('kb/'))
+        .filter((key): key is string => key !== null && key.startsWith('kb/'))
     ),
   ]
   const ownerByKey = new Map<string, string | null>()

From e5d3eb8aca7d01ff76dbccc278b7133c194d1741 Mon Sep 17 00:00:00 2001
From: Theodore Li <theo@sim.ai>
Date: Fri, 5 Jun 2026 17:01:26 -0700
Subject: [PATCH 6/6] fix(tables): guard drain and definition delete against
 concurrent table restore

---
 .../background/cleanup-soft-deletes.test.ts   | 41 ++++++++++++++-----
 apps/sim/background/cleanup-soft-deletes.ts   | 34 +++++++++++++--
 apps/sim/lib/cleanup/batch-delete.ts          | 17 +++++---
 3 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts
index d25a92c386a..23a46f6e323 100644
--- a/apps/sim/background/cleanup-soft-deletes.test.ts
+++ b/apps/sim/background/cleanup-soft-deletes.test.ts
@@ -6,7 +6,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 const {
   mockBatchDeleteByWorkspaceAndTimestamp,
-  mockDeleteDefinition,
+  mockDefDeleteReturning,
+  mockDefDeleteWhere,
   mockDeleteFileMetadata,
   mockDeleteFiles,
   mockDeleteRowsById,
@@ -28,11 +29,15 @@ const {
     leftJoin: vi.fn(() => ({ where: mockWhere })),
   }))
   const mockSelect = vi.fn(() => ({ from: mockFrom }))
-  const mockDeleteDefinition = vi.fn(async () => undefined)
+  // Definition delete chain: db.delete(...).where(pred).returning() — captures
+  // the predicate so tests can assert the archivedAt guard.
+  const mockDefDeleteReturning = vi.fn(async () => [{ id: 'def' }])
+  const mockDefDeleteWhere = vi.fn(() => ({ returning: mockDefDeleteReturning }))
 
   return {
     mockBatchDeleteByWorkspaceAndTimestamp: vi.fn(async () => ({ deleted: 0, failed: 0 })),
-    mockDeleteDefinition,
+    mockDefDeleteReturning,
+    mockDefDeleteWhere,
     mockDeleteFileMetadata: vi.fn(async () => true),
     mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })),
     mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })),
@@ -49,7 +54,7 @@ const {
 })
 
 vi.mock('@sim/db', () => ({
-  db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDeleteDefinition })) },
+  db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDefDeleteWhere })) },
 }))
 
 vi.mock('@sim/db/schema', () => {
@@ -84,6 +89,7 @@ vi.mock('drizzle-orm', () => ({
   and: vi.fn((...args: unknown[]) => ({ op: 'and', args })),
   asc: vi.fn((column: unknown) => ({ op: 'asc', column })),
   eq: vi.fn((...args: unknown[]) => ({ op: 'eq', args })),
+  exists: vi.fn((query: unknown) => ({ op: 'exists', query })),
   inArray: vi.fn((...args: unknown[]) => ({ op: 'inArray', args })),
   isNotNull: vi.fn((...args: unknown[]) => ({ op: 'isNotNull', args })),
   isNull: vi.fn((...args: unknown[]) => ({ op: 'isNull', args })),
@@ -191,13 +197,16 @@ describe('cleanup soft deletes — archived user tables', () => {
 
     await runCleanupSoftDeletes(basePayload)
 
+    // Drain is gated by a per-batch guard (parent still archived).
     expect(mockDrainRowsByColumn).toHaveBeenCalledWith(
-      expect.objectContaining({ matchValue: 'tbl-1' })
+      expect.objectContaining({ matchValue: 'tbl-1', guard: expect.anything() })
     )
-    expect(mockDeleteDefinition).toHaveBeenCalledTimes(1)
+    expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1)
+    // The definition delete is conditional on the archivedAt guard.
+    expect(JSON.stringify(mockDefDeleteWhere.mock.calls[0][0])).toContain('archivedAt')
     // Rows are drained first, then the definition is deleted.
     expect(mockDrainRowsByColumn.mock.invocationCallOrder[0]).toBeLessThan(
-      mockDeleteDefinition.mock.invocationCallOrder[0]
+      mockDefDeleteReturning.mock.invocationCallOrder[0]
     )
   })
 
@@ -209,7 +218,7 @@ describe('cleanup soft deletes — archived user tables', () => {
     await runCleanupSoftDeletes(basePayload)
 
     expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1)
-    expect(mockDeleteDefinition).not.toHaveBeenCalled()
+    expect(mockDefDeleteReturning).not.toHaveBeenCalled()
   })
 
   it('skips a table that errors mid-drain but keeps cleaning the rest', async () => {
@@ -222,7 +231,19 @@ describe('cleanup soft deletes — archived user tables', () => {
     await runCleanupSoftDeletes(basePayload)
 
     expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(2)
-    // Only the fully-drained table's definition is deleted.
-    expect(mockDeleteDefinition).toHaveBeenCalledTimes(1)
+    // Only the fully-drained table's definition delete is attempted.
+    expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1)
+  })
+
+  it('does not count a restored table whose conditional definition delete no-ops', async () => {
+    mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-restored' }])
+    mockDrainRowsByColumn.mockResolvedValue({ deleted: 0, fullyDrained: true })
+    // Restored mid-run: the archivedAt-guarded delete matches no row.
+    mockDefDeleteReturning.mockResolvedValueOnce([])
+
+    await runCleanupSoftDeletes(basePayload)
+
+    // The delete is attempted but conditional, so it safely removes nothing.
+    expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1)
   })
 })
diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts
index eb3fa1fc832..77fdc823c9e 100644
--- a/apps/sim/background/cleanup-soft-deletes.ts
+++ b/apps/sim/background/cleanup-soft-deletes.ts
@@ -16,7 +16,7 @@ import {
 } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
 import { task } from '@trigger.dev/sdk'
-import { and, asc, eq, inArray, isNotNull, isNull, lt, sql } from 'drizzle-orm'
+import { and, asc, eq, exists, inArray, isNotNull, isNull, lt, sql } from 'drizzle-orm'
 import type { CleanupJobPayload } from '@/lib/billing/cleanup-dispatcher'
 import {
   batchDeleteByWorkspaceAndTimestamp,
@@ -296,11 +296,28 @@ async function cleanupArchivedUserTables(
   for (const { id: tableId } of doomedTables) {
     if (rowBudget <= 0) break
 
+    // Re-checked atomically on every drain batch: if the table is restored
+    // mid-run (archivedAt cleared) the next batch deletes nothing, so a concurrent
+    // restore can't keep purging an active table's rows.
+    const stillArchived = exists(
+      db
+        .select({ one: sql`1` })
+        .from(userTableDefinitions)
+        .where(
+          and(
+            eq(userTableDefinitions.id, tableId),
+            isNotNull(userTableDefinitions.archivedAt),
+            lt(userTableDefinitions.archivedAt, retentionDate)
+          )
+        )
+    )
+
     const drain = await drainRowsByColumn({
       tableDef: userTableRows,
       idCol: userTableRows.id,
       matchCol: userTableRows.tableId,
       matchValue: tableId,
+      guard: stillArchived,
       tableName: `${label}/userTableRows`,
       batchSize: TABLE_ROW_DRAIN_BATCH_SIZE,
       rowBudget,
@@ -308,8 +325,19 @@ async function cleanupArchivedUserTables(
     rowBudget -= drain.deleted
 
     if (drain.fullyDrained) {
-      await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId))
-      definitionsDeleted++
+      // Delete the definition only while it is still archived+expired; a restore
+      // makes this a no-op, so an active table is never purged.
+      const deletedDef = await db
+        .delete(userTableDefinitions)
+        .where(
+          and(
+            eq(userTableDefinitions.id, tableId),
+            isNotNull(userTableDefinitions.archivedAt),
+            lt(userTableDefinitions.archivedAt, retentionDate)
+          )
+        )
+        .returning({ id: userTableDefinitions.id })
+      if (deletedDef.length === 1) definitionsDeleted++
       continue
     }
 
diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts
index 24aa8554d3b..54e1a9a6161 100644
--- a/apps/sim/lib/cleanup/batch-delete.ts
+++ b/apps/sim/lib/cleanup/batch-delete.ts
@@ -1,6 +1,6 @@
 import { db } from '@sim/db'
 import { createLogger } from '@sim/logger'
-import { and, eq, inArray, isNotNull, lt, sql } from 'drizzle-orm'
+import { and, eq, inArray, isNotNull, lt, type SQL, sql } from 'drizzle-orm'
 import type { PgColumn, PgTable } from 'drizzle-orm/pg-core'
 
 const logger = createLogger('BatchDelete')
@@ -274,6 +274,13 @@ export interface DrainByColumnOptions {
   batchSize?: number
   /** Max rows to delete in this call across all batches. */
   rowBudget: number
+  /**
+   * Extra predicate ANDed into each batch's selection. Re-evaluated per batch
+   * (each batch is its own statement), so it can gate the drain on live state —
+   * e.g. "the parent row is still soft-deleted" — and stop deleting as soon as
+   * that state flips (a restore committed between batches).
+   */
+  guard?: SQL
 }
 
 export interface DrainResult {
@@ -302,17 +309,15 @@ export async function drainRowsByColumn({
   tableName,
   batchSize = DEFAULT_DELETE_CHUNK_SIZE,
   rowBudget,
+  guard,
 }: DrainByColumnOptions): Promise<DrainResult> {
   let deleted = 0
   let remaining = rowBudget
+  const matchPredicate = guard ? and(eq(matchCol, matchValue), guard) : eq(matchCol, matchValue)
 
   while (remaining > 0) {
     const limit = Math.min(batchSize, remaining)
-    const targetIds = db
-      .select({ id: idCol })
-      .from(tableDef)
-      .where(eq(matchCol, matchValue))
-      .limit(limit)
+    const targetIds = db.select({ id: idCol }).from(tableDef).where(matchPredicate).limit(limit)
 
     let batchDeleted: { id: unknown }[]
     try {