From c1d69c3ff920195b45fa59cd6ea99b737cf695a7 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 15:48:37 -0700 Subject: [PATCH 1/6] improvement(tables): drain rows in batches before hard-deleting archived tables --- .../background/cleanup-soft-deletes.test.ts | 49 +++++++++++- apps/sim/background/cleanup-soft-deletes.ts | 78 +++++++++++++++++-- apps/sim/lib/cleanup/batch-delete.test.ts | 69 ++++++++++++++++ apps/sim/lib/cleanup/batch-delete.ts | 68 +++++++++++++++- apps/sim/lib/knowledge/documents/service.ts | 4 +- 5 files changed, 258 insertions(+), 10 deletions(-) create mode 100644 apps/sim/lib/cleanup/batch-delete.test.ts diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts index cada41fdf3a..99425ef8a37 100644 --- a/apps/sim/background/cleanup-soft-deletes.test.ts +++ b/apps/sim/background/cleanup-soft-deletes.test.ts @@ -6,9 +6,11 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { mockBatchDeleteByWorkspaceAndTimestamp, + mockDeleteDefinition, mockDeleteFileMetadata, mockDeleteFiles, mockDeleteRowsById, + mockDrainRowsByColumn, mockIsUsingCloudStorage, mockLimit, mockOrderBy, @@ -26,12 +28,15 @@ const { leftJoin: vi.fn(() => ({ where: mockWhere })), })) const mockSelect = vi.fn(() => ({ from: mockFrom })) + const mockDeleteDefinition = vi.fn(async () => undefined) return { mockBatchDeleteByWorkspaceAndTimestamp: vi.fn(async () => ({ deleted: 0, failed: 0 })), + mockDeleteDefinition, mockDeleteFileMetadata: vi.fn(async () => true), mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })), mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })), + mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, budgetExhausted: false })), mockIsUsingCloudStorage: vi.fn(() => true), mockLimit, mockOrderBy, @@ -43,7 +48,9 @@ const { } }) -vi.mock('@sim/db', () => ({ db: { select: mockSelect } })) +vi.mock('@sim/db', () => ({ + db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDeleteDefinition })) }, +})) vi.mock('@sim/db/schema', () => { const table = (cols: string[]) => @@ -58,6 +65,7 @@ vi.mock('@sim/db/schema', () => { mcpServers: table(softCols), memory: table(softCols), userTableDefinitions: table(softCols), + userTableRows: table(['id', 'tableId', 'workspaceId']), workflow: table(softCols), workflowFolder: table(softCols), workflowMcpServer: table(softCols), @@ -93,6 +101,7 @@ vi.mock('@/lib/cleanup/batch-delete', () => ({ return chunks }, deleteRowsById: mockDeleteRowsById, + drainRowsByColumn: mockDrainRowsByColumn, selectRowsByIdChunks: mockSelectRowsByIdChunks, })) @@ -162,3 +171,41 @@ describe('cleanup soft deletes — orphan KB binding sweep', () => { expect(mockDeleteFileMetadata).not.toHaveBeenCalled() }) }) + +describe('cleanup soft deletes — archived user tables', () => { + beforeEach(() => { + vi.clearAllMocks() + mockIsUsingCloudStorage.mockReturnValue(true) + mockLimit.mockResolvedValue([]) + // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed tables. + mockSelectRowsByIdChunks + .mockResolvedValueOnce([]) + .mockResolvedValueOnce([]) + .mockResolvedValueOnce([]) + .mockResolvedValueOnce([{ id: 'tbl-1' }]) + }) + + it('drains rows before deleting the table definition', async () => { + mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, budgetExhausted: false }) + + await runCleanupSoftDeletes(basePayload) + + expect(mockDrainRowsByColumn).toHaveBeenCalledWith( + expect.objectContaining({ matchValue: 'tbl-1' }) + ) + expect(mockDeleteDefinition).toHaveBeenCalledTimes(1) + // Rows are drained first, then the definition is deleted. + expect(mockDrainRowsByColumn.mock.invocationCallOrder[0]).toBeLessThan( + mockDeleteDefinition.mock.invocationCallOrder[0] + ) + }) + + it('defers the definition delete when the row budget is exhausted', async () => { + mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, budgetExhausted: true }) + + await runCleanupSoftDeletes(basePayload) + + expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1) + expect(mockDeleteDefinition).not.toHaveBeenCalled() + }) +}) diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts index ae900abc804..a6ed60b4492 100644 --- a/apps/sim/background/cleanup-soft-deletes.ts +++ b/apps/sim/background/cleanup-soft-deletes.ts @@ -7,6 +7,7 @@ import { mcpServers, memory, userTableDefinitions, + userTableRows, workflow, workflowFolder, workflowMcpServer, @@ -21,6 +22,7 @@ import { batchDeleteByWorkspaceAndTimestamp, chunkArray, deleteRowsById, + drainRowsByColumn, selectRowsByIdChunks, } from '@/lib/cleanup/batch-delete' import { prepareChatCleanup } from '@/lib/cleanup/chat-cleanup' @@ -148,12 +150,6 @@ const CLEANUP_TARGETS = [ wsCol: knowledgeBase.workspaceId, name: 'knowledgeBase', }, - { - table: userTableDefinitions, - softDeleteCol: userTableDefinitions.archivedAt, - wsCol: userTableDefinitions.workspaceId, - name: 'userTableDefinitions', - }, { table: memory, softDeleteCol: memory.deletedAt, wsCol: memory.workspaceId, name: 'memory' }, { table: mcpServers, @@ -256,6 +252,74 @@ async function cleanupOrphanedKnowledgeBaseBindings( return stats } +const TABLE_ROW_DRAIN_BATCH_SIZE = 5_000 +/** + * Per-run cap on user-table rows drained before deleting their definitions. + * Each batch is its own transaction, so this only bounds total job duration; a + * table larger than this drains across several cron runs. A definition is only + * deleted once its rows are fully drained, so its ON DELETE CASCADE never fires + * on a large set. + */ +const TABLE_ROW_DRAIN_TOTAL_LIMIT = 1_000_000 + +/** + * Hard-delete archived user tables. Rows are drained in bounded batches first + * (each batch cascades its `table_row_executions`), then the definition is + * deleted — turning what would be a single multi-million-row cascade into many + * small transactions. Tables whose rows can't be fully drained within this + * run's budget keep their archived definition for the next run. + */ +async function cleanupArchivedUserTables( + workspaceIds: string[], + retentionDate: Date, + label: string +): Promise { + const doomedTables = await selectRowsByIdChunks(workspaceIds, (chunkIds, chunkLimit) => + db + .select({ id: userTableDefinitions.id }) + .from(userTableDefinitions) + .where( + and( + inArray(userTableDefinitions.workspaceId, chunkIds), + isNotNull(userTableDefinitions.archivedAt), + lt(userTableDefinitions.archivedAt, retentionDate) + ) + ) + .limit(chunkLimit) + ) + + if (doomedTables.length === 0) return 0 + + let rowBudget = TABLE_ROW_DRAIN_TOTAL_LIMIT + let definitionsDeleted = 0 + + for (const { id: tableId } of doomedTables) { + if (rowBudget <= 0) break + + const drain = await drainRowsByColumn({ + tableDef: userTableRows, + idCol: userTableRows.id, + matchCol: userTableRows.tableId, + matchValue: tableId, + tableName: `${label}/userTableRows`, + batchSize: TABLE_ROW_DRAIN_BATCH_SIZE, + rowBudget, + }) + rowBudget -= drain.deleted + + // Rows still remain — defer the definition delete so its cascade stays small. + if (drain.budgetExhausted) break + + await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId)) + definitionsDeleted++ + } + + logger.info( + `[${label}/userTableDefinitions] Deleted ${definitionsDeleted}/${doomedTables.length} archived tables (row budget left: ${rowBudget})` + ) + return definitionsDeleted +} + export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise { const startTime = Date.now() const { workspaceIds, retentionHours, label } = payload @@ -350,6 +414,8 @@ export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise totalDeleted += result.deleted } + totalDeleted += await cleanupArchivedUserTables(workspaceIds, retentionDate, label) + const orphanBindingStats = await cleanupOrphanedKnowledgeBaseBindings(workspaceIds, label) logger.info( diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts new file mode 100644 index 00000000000..21394d0c9c9 --- /dev/null +++ b/apps/sim/lib/cleanup/batch-delete.test.ts @@ -0,0 +1,69 @@ +/** + * @vitest-environment node + */ + +import { dbChainMock, dbChainMockFns } from '@sim/testing' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@sim/db', () => dbChainMock) + +import { drainRowsByColumn } from '@/lib/cleanup/batch-delete' + +const baseOpts = { + tableDef: {} as never, + idCol: 'col.id' as never, + matchCol: 'col.tableId' as never, + matchValue: 'tbl-1', + tableName: 'test/userTableRows', +} + +function returnRows(count: number) { + return Array.from({ length: count }, (_, i) => ({ id: `row-${i}` })) +} + +describe('drainRowsByColumn', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('drains in batches until a short batch and reports the set exhausted', async () => { + dbChainMockFns.returning + .mockResolvedValueOnce(returnRows(2)) + .mockResolvedValueOnce(returnRows(1)) + + const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) + + expect(result).toEqual({ deleted: 3, budgetExhausted: false }) + expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) + }) + + it('stops at the row budget and reports it exhausted', async () => { + dbChainMockFns.returning + .mockResolvedValueOnce(returnRows(2)) + .mockResolvedValueOnce(returnRows(2)) + + const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 }) + + expect(result).toEqual({ deleted: 4, budgetExhausted: true }) + expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) + }) + + it('returns immediately when the match set is already empty', async () => { + dbChainMockFns.returning.mockResolvedValueOnce([]) + + const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) + + expect(result).toEqual({ deleted: 0, budgetExhausted: false }) + expect(dbChainMockFns.returning).toHaveBeenCalledTimes(1) + }) + + it('bails without throwing when a batch delete fails', async () => { + dbChainMockFns.returning + .mockResolvedValueOnce(returnRows(2)) + .mockRejectedValueOnce(new Error('db down')) + + const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) + + expect(result).toEqual({ deleted: 2, budgetExhausted: false }) + }) +}) diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts index d523800cd59..6c33ac0c22c 100644 --- a/apps/sim/lib/cleanup/batch-delete.ts +++ b/apps/sim/lib/cleanup/batch-delete.ts @@ -1,6 +1,6 @@ import { db } from '@sim/db' import { createLogger } from '@sim/logger' -import { and, inArray, isNotNull, lt, sql } from 'drizzle-orm' +import { and, eq, inArray, isNotNull, lt, sql } from 'drizzle-orm' import type { PgColumn, PgTable } from 'drizzle-orm/pg-core' const logger = createLogger('BatchDelete') @@ -262,3 +262,69 @@ export async function deleteRowsById( ) return result } + +export interface DrainByColumnOptions { + tableDef: PgTable + /** Single-column primary key used to batch the delete. */ + idCol: PgColumn + /** Column matched against `matchValue` to scope the drain (e.g. a parent FK). */ + matchCol: PgColumn + matchValue: string + tableName: string + batchSize?: number + /** Max rows to delete in this call across all batches. */ + rowBudget: number +} + +export interface DrainResult { + deleted: number + /** True if the budget ran out before the match set was fully drained. */ + budgetExhausted: boolean +} + +/** + * Delete every row matching `matchCol = matchValue` in self-bounded batches, + * each its own transaction. Use to empty a large child set before deleting its + * parent so the parent's ON DELETE CASCADE fires on a small (or empty) set + * instead of millions of rows in one statement. + */ +export async function drainRowsByColumn({ + tableDef, + idCol, + matchCol, + matchValue, + tableName, + batchSize = DEFAULT_DELETE_CHUNK_SIZE, + rowBudget, +}: DrainByColumnOptions): Promise { + let deleted = 0 + let remaining = rowBudget + + while (remaining > 0) { + const limit = Math.min(batchSize, remaining) + const targetIds = db + .select({ id: idCol }) + .from(tableDef) + .where(eq(matchCol, matchValue)) + .limit(limit) + + let batchDeleted: { id: unknown }[] + try { + batchDeleted = await db + .delete(tableDef) + .where(inArray(idCol, targetIds)) + .returning({ id: idCol }) + } catch (error) { + logger.error(`[${tableName}] Drain batch failed for ${matchValue}:`, { error }) + return { deleted, budgetExhausted: false } + } + + deleted += batchDeleted.length + remaining -= batchDeleted.length + + // Short batch means the match set is exhausted. + if (batchDeleted.length < limit) return { deleted, budgetExhausted: false } + } + + return { deleted, budgetExhausted: true } +} diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index 9b4fce29135..d1371673886 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -97,7 +97,7 @@ async function assertKnowledgeBaseFileUrlsOwnership( ...new Set( fileUrls .map((url) => getKnowledgeBaseStorageKey(url)) - .filter((key): key is string => key !== null && key.startsWith('kb/')) + .filter((key): key is string => key?.startsWith('kb/')) ), ] if (keys.length === 0) { @@ -1974,7 +1974,7 @@ export async function deleteDocumentStorageFiles( ...new Set( entries .map((entry) => entry.storageKey) - .filter((key): key is string => key !== null && key.startsWith('kb/')) + .filter((key): key is string => key?.startsWith('kb/')) ), ] const ownerByKey = new Map() From d660542a516b7d93d063ab28f3bbd3300f7eee01 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 15:56:13 -0700 Subject: [PATCH 2/6] fix(tables): defer definition delete unless rows are provably drained --- apps/sim/background/cleanup-soft-deletes.test.ts | 9 +++++---- apps/sim/background/cleanup-soft-deletes.ts | 5 +++-- apps/sim/lib/cleanup/batch-delete.test.ts | 16 ++++++++-------- apps/sim/lib/cleanup/batch-delete.ts | 16 +++++++++++----- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts index 99425ef8a37..c4fde4b7dd3 100644 --- a/apps/sim/background/cleanup-soft-deletes.test.ts +++ b/apps/sim/background/cleanup-soft-deletes.test.ts @@ -36,7 +36,7 @@ const { mockDeleteFileMetadata: vi.fn(async () => true), mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })), mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })), - mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, budgetExhausted: false })), + mockDrainRowsByColumn: vi.fn(async () => ({ deleted: 0, fullyDrained: false })), mockIsUsingCloudStorage: vi.fn(() => true), mockLimit, mockOrderBy, @@ -186,7 +186,7 @@ describe('cleanup soft deletes — archived user tables', () => { }) it('drains rows before deleting the table definition', async () => { - mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, budgetExhausted: false }) + mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, fullyDrained: true }) await runCleanupSoftDeletes(basePayload) @@ -200,8 +200,9 @@ describe('cleanup soft deletes — archived user tables', () => { ) }) - it('defers the definition delete when the row budget is exhausted', async () => { - mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, budgetExhausted: true }) + it('defers the definition delete when the drain does not fully complete', async () => { + // Budget stop or a drain error both surface as fullyDrained: false. + mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, fullyDrained: false }) await runCleanupSoftDeletes(basePayload) diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts index a6ed60b4492..496ab278767 100644 --- a/apps/sim/background/cleanup-soft-deletes.ts +++ b/apps/sim/background/cleanup-soft-deletes.ts @@ -307,8 +307,9 @@ async function cleanupArchivedUserTables( }) rowBudget -= drain.deleted - // Rows still remain — defer the definition delete so its cascade stays small. - if (drain.budgetExhausted) break + // Only delete the definition once its rows are provably gone. A budget stop + // or a drain error leaves rows behind, so defer to keep the cascade small. + if (!drain.fullyDrained) break await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId)) definitionsDeleted++ diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts index 21394d0c9c9..b1a0cf7c7ab 100644 --- a/apps/sim/lib/cleanup/batch-delete.test.ts +++ b/apps/sim/lib/cleanup/batch-delete.test.ts @@ -26,44 +26,44 @@ describe('drainRowsByColumn', () => { vi.clearAllMocks() }) - it('drains in batches until a short batch and reports the set exhausted', async () => { + it('drains in batches until a short batch and reports the set fully drained', async () => { dbChainMockFns.returning .mockResolvedValueOnce(returnRows(2)) .mockResolvedValueOnce(returnRows(1)) const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) - expect(result).toEqual({ deleted: 3, budgetExhausted: false }) + expect(result).toEqual({ deleted: 3, fullyDrained: true }) expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) }) - it('stops at the row budget and reports it exhausted', async () => { + it('stops at the row budget and reports the set not fully drained', async () => { dbChainMockFns.returning .mockResolvedValueOnce(returnRows(2)) .mockResolvedValueOnce(returnRows(2)) const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 }) - expect(result).toEqual({ deleted: 4, budgetExhausted: true }) + expect(result).toEqual({ deleted: 4, fullyDrained: false }) expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) }) - it('returns immediately when the match set is already empty', async () => { + it('reports fully drained immediately when the match set is already empty', async () => { dbChainMockFns.returning.mockResolvedValueOnce([]) const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) - expect(result).toEqual({ deleted: 0, budgetExhausted: false }) + expect(result).toEqual({ deleted: 0, fullyDrained: true }) expect(dbChainMockFns.returning).toHaveBeenCalledTimes(1) }) - it('bails without throwing when a batch delete fails', async () => { + it('reports not fully drained on a batch error so the caller defers the cascade', async () => { dbChainMockFns.returning .mockResolvedValueOnce(returnRows(2)) .mockRejectedValueOnce(new Error('db down')) const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 10 }) - expect(result).toEqual({ deleted: 2, budgetExhausted: false }) + expect(result).toEqual({ deleted: 2, fullyDrained: false }) }) }) diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts index 6c33ac0c22c..737e2cd33c1 100644 --- a/apps/sim/lib/cleanup/batch-delete.ts +++ b/apps/sim/lib/cleanup/batch-delete.ts @@ -278,8 +278,13 @@ export interface DrainByColumnOptions { export interface DrainResult { deleted: number - /** True if the budget ran out before the match set was fully drained. */ - budgetExhausted: boolean + /** + * True only when the match set was confirmed empty (a short final batch). + * Budget exhaustion and batch errors both yield `false` — callers must treat + * `false` as "rows may remain" and defer any dependent parent-delete (whose + * ON DELETE CASCADE would otherwise fire on the leftovers) to a later run. + */ + fullyDrained: boolean } /** @@ -316,15 +321,16 @@ export async function drainRowsByColumn({ .returning({ id: idCol }) } catch (error) { logger.error(`[${tableName}] Drain batch failed for ${matchValue}:`, { error }) - return { deleted, budgetExhausted: false } + return { deleted, fullyDrained: false } } deleted += batchDeleted.length remaining -= batchDeleted.length // Short batch means the match set is exhausted. - if (batchDeleted.length < limit) return { deleted, budgetExhausted: false } + if (batchDeleted.length < limit) return { deleted, fullyDrained: true } } - return { deleted, budgetExhausted: true } + // Hit the per-call budget on a full batch — rows may remain. + return { deleted, fullyDrained: false } } From 4ad8c0bf93d62a2b87d4940a40aade18b3c7b517 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 16:03:23 -0700 Subject: [PATCH 3/6] fix(tables): skip an errored table mid-drain instead of aborting the whole run --- .../background/cleanup-soft-deletes.test.ts | 26 +++++++++++++++---- apps/sim/background/cleanup-soft-deletes.ts | 14 ++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts index c4fde4b7dd3..d25a92c386a 100644 --- a/apps/sim/background/cleanup-soft-deletes.test.ts +++ b/apps/sim/background/cleanup-soft-deletes.test.ts @@ -177,15 +177,16 @@ describe('cleanup soft deletes — archived user tables', () => { vi.clearAllMocks() mockIsUsingCloudStorage.mockReturnValue(true) mockLimit.mockResolvedValue([]) - // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed tables. + // selectRowsByIdChunks call order: workflows, file legacy, file multi, doomed + // tables. Each test queues the doomed-tables result (4th call) itself. mockSelectRowsByIdChunks .mockResolvedValueOnce([]) .mockResolvedValueOnce([]) .mockResolvedValueOnce([]) - .mockResolvedValueOnce([{ id: 'tbl-1' }]) }) it('drains rows before deleting the table definition', async () => { + mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-1' }]) mockDrainRowsByColumn.mockResolvedValue({ deleted: 5, fullyDrained: true }) await runCleanupSoftDeletes(basePayload) @@ -200,13 +201,28 @@ describe('cleanup soft deletes — archived user tables', () => { ) }) - it('defers the definition delete when the drain does not fully complete', async () => { - // Budget stop or a drain error both surface as fullyDrained: false. - mockDrainRowsByColumn.mockResolvedValue({ deleted: 200_000, fullyDrained: false }) + it('defers the definition delete when the budget is exhausted', async () => { + mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-1' }]) + // Budget stop consumes the whole budget — deleted equals the per-run cap. + mockDrainRowsByColumn.mockResolvedValue({ deleted: 1_000_000, fullyDrained: false }) await runCleanupSoftDeletes(basePayload) expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1) expect(mockDeleteDefinition).not.toHaveBeenCalled() }) + + it('skips a table that errors mid-drain but keeps cleaning the rest', async () => { + mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-err' }, { id: 'tbl-ok' }]) + // First table errors mid-drain (budget remains); second drains fully. + mockDrainRowsByColumn + .mockResolvedValueOnce({ deleted: 2, fullyDrained: false }) + .mockResolvedValueOnce({ deleted: 5, fullyDrained: true }) + + await runCleanupSoftDeletes(basePayload) + + expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(2) + // Only the fully-drained table's definition is deleted. + expect(mockDeleteDefinition).toHaveBeenCalledTimes(1) + }) }) diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts index 496ab278767..eb3fa1fc832 100644 --- a/apps/sim/background/cleanup-soft-deletes.ts +++ b/apps/sim/background/cleanup-soft-deletes.ts @@ -307,12 +307,16 @@ async function cleanupArchivedUserTables( }) rowBudget -= drain.deleted - // Only delete the definition once its rows are provably gone. A budget stop - // or a drain error leaves rows behind, so defer to keep the cascade small. - if (!drain.fullyDrained) break + if (drain.fullyDrained) { + await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId)) + definitionsDeleted++ + continue + } - await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId)) - definitionsDeleted++ + // Not fully drained: a budget stop consumes the whole budget, so a positive + // remainder means this one table errored mid-drain. Leave its definition for + // a later run and keep cleaning the rest; only stop once the budget is spent. + if (rowBudget <= 0) break } logger.info( From ed095a35f6c1c3e577574beb8f96a024f84c2772 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 16:10:47 -0700 Subject: [PATCH 4/6] fix(tables): probe for leftover rows on budget exhaustion to avoid needless deferral --- apps/sim/lib/cleanup/batch-delete.test.ts | 19 +++++++++++++++-- apps/sim/lib/cleanup/batch-delete.ts | 25 +++++++++++++++++------ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/apps/sim/lib/cleanup/batch-delete.test.ts b/apps/sim/lib/cleanup/batch-delete.test.ts index b1a0cf7c7ab..105a9e2af00 100644 --- a/apps/sim/lib/cleanup/batch-delete.test.ts +++ b/apps/sim/lib/cleanup/batch-delete.test.ts @@ -2,7 +2,7 @@ * @vitest-environment node */ -import { dbChainMock, dbChainMockFns } from '@sim/testing' +import { dbChainMock, dbChainMockFns, resetDbChainMock } from '@sim/testing' import { beforeEach, describe, expect, it, vi } from 'vitest' vi.mock('@sim/db', () => dbChainMock) @@ -24,6 +24,7 @@ function returnRows(count: number) { describe('drainRowsByColumn', () => { beforeEach(() => { vi.clearAllMocks() + resetDbChainMock() }) it('drains in batches until a short batch and reports the set fully drained', async () => { @@ -37,10 +38,12 @@ describe('drainRowsByColumn', () => { expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) }) - it('stops at the row budget and reports the set not fully drained', async () => { + it('stops at the budget and reports not fully drained when rows remain', async () => { dbChainMockFns.returning .mockResolvedValueOnce(returnRows(2)) .mockResolvedValueOnce(returnRows(2)) + // Existence probe after the budget is spent finds a leftover row. + dbChainMockFns.limit.mockResolvedValue([{ id: 'leftover' }]) const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 }) @@ -48,6 +51,18 @@ describe('drainRowsByColumn', () => { expect(dbChainMockFns.returning).toHaveBeenCalledTimes(2) }) + it('reports fully drained when the budget is hit but the set emptied exactly', async () => { + dbChainMockFns.returning + .mockResolvedValueOnce(returnRows(2)) + .mockResolvedValueOnce(returnRows(2)) + // Existence probe finds nothing remaining. + dbChainMockFns.limit.mockResolvedValue([]) + + const result = await drainRowsByColumn({ ...baseOpts, batchSize: 2, rowBudget: 4 }) + + expect(result).toEqual({ deleted: 4, fullyDrained: true }) + }) + it('reports fully drained immediately when the match set is already empty', async () => { dbChainMockFns.returning.mockResolvedValueOnce([]) diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts index 737e2cd33c1..24aa8554d3b 100644 --- a/apps/sim/lib/cleanup/batch-delete.ts +++ b/apps/sim/lib/cleanup/batch-delete.ts @@ -279,10 +279,11 @@ export interface DrainByColumnOptions { export interface DrainResult { deleted: number /** - * True only when the match set was confirmed empty (a short final batch). - * Budget exhaustion and batch errors both yield `false` — callers must treat - * `false` as "rows may remain" and defer any dependent parent-delete (whose - * ON DELETE CASCADE would otherwise fire on the leftovers) to a later run. + * True only when the match set was confirmed empty — via a short final batch + * or an existence probe after the budget was spent. Batch errors yield + * `false`; callers must treat `false` as "rows may remain" and defer any + * dependent parent-delete (whose ON DELETE CASCADE would otherwise fire on the + * leftovers) to a later run. */ fullyDrained: boolean } @@ -331,6 +332,18 @@ export async function drainRowsByColumn({ if (batchDeleted.length < limit) return { deleted, fullyDrained: true } } - // Hit the per-call budget on a full batch — rows may remain. - return { deleted, fullyDrained: false } + // Budget hit on a full final batch — rows may or may not remain. A cheap + // indexed existence probe disambiguates so a set whose size divides the budget + // exactly isn't needlessly deferred to a later run. + try { + const [leftover] = await db + .select({ id: idCol }) + .from(tableDef) + .where(eq(matchCol, matchValue)) + .limit(1) + return { deleted, fullyDrained: !leftover } + } catch (error) { + logger.error(`[${tableName}] Drain remainder probe failed for ${matchValue}:`, { error }) + return { deleted, fullyDrained: false } + } } From b26cbdae94ab699480a42d0dddeb699340f69551 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 16:30:08 -0700 Subject: [PATCH 5/6] fix(knowledge): restore null-check type guard broken by an unsafe lint autofix --- apps/sim/lib/knowledge/documents/service.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index d1371673886..9b4fce29135 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -97,7 +97,7 @@ async function assertKnowledgeBaseFileUrlsOwnership( ...new Set( fileUrls .map((url) => getKnowledgeBaseStorageKey(url)) - .filter((key): key is string => key?.startsWith('kb/')) + .filter((key): key is string => key !== null && key.startsWith('kb/')) ), ] if (keys.length === 0) { @@ -1974,7 +1974,7 @@ export async function deleteDocumentStorageFiles( ...new Set( entries .map((entry) => entry.storageKey) - .filter((key): key is string => key?.startsWith('kb/')) + .filter((key): key is string => key !== null && key.startsWith('kb/')) ), ] const ownerByKey = new Map() From e5d3eb8aca7d01ff76dbccc278b7133c194d1741 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Fri, 5 Jun 2026 17:01:26 -0700 Subject: [PATCH 6/6] fix(tables): guard drain and definition delete against concurrent table restore --- .../background/cleanup-soft-deletes.test.ts | 41 ++++++++++++++----- apps/sim/background/cleanup-soft-deletes.ts | 34 +++++++++++++-- apps/sim/lib/cleanup/batch-delete.ts | 17 +++++--- 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/apps/sim/background/cleanup-soft-deletes.test.ts b/apps/sim/background/cleanup-soft-deletes.test.ts index d25a92c386a..23a46f6e323 100644 --- a/apps/sim/background/cleanup-soft-deletes.test.ts +++ b/apps/sim/background/cleanup-soft-deletes.test.ts @@ -6,7 +6,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { mockBatchDeleteByWorkspaceAndTimestamp, - mockDeleteDefinition, + mockDefDeleteReturning, + mockDefDeleteWhere, mockDeleteFileMetadata, mockDeleteFiles, mockDeleteRowsById, @@ -28,11 +29,15 @@ const { leftJoin: vi.fn(() => ({ where: mockWhere })), })) const mockSelect = vi.fn(() => ({ from: mockFrom })) - const mockDeleteDefinition = vi.fn(async () => undefined) + // Definition delete chain: db.delete(...).where(pred).returning() — captures + // the predicate so tests can assert the archivedAt guard. + const mockDefDeleteReturning = vi.fn(async () => [{ id: 'def' }]) + const mockDefDeleteWhere = vi.fn(() => ({ returning: mockDefDeleteReturning })) return { mockBatchDeleteByWorkspaceAndTimestamp: vi.fn(async () => ({ deleted: 0, failed: 0 })), - mockDeleteDefinition, + mockDefDeleteReturning, + mockDefDeleteWhere, mockDeleteFileMetadata: vi.fn(async () => true), mockDeleteFiles: vi.fn(async () => ({ deleted: 0, failed: [] as Array<{ key: string }> })), mockDeleteRowsById: vi.fn(async () => ({ deleted: 0, failed: 0 })), @@ -49,7 +54,7 @@ const { }) vi.mock('@sim/db', () => ({ - db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDeleteDefinition })) }, + db: { select: mockSelect, delete: vi.fn(() => ({ where: mockDefDeleteWhere })) }, })) vi.mock('@sim/db/schema', () => { @@ -84,6 +89,7 @@ vi.mock('drizzle-orm', () => ({ and: vi.fn((...args: unknown[]) => ({ op: 'and', args })), asc: vi.fn((column: unknown) => ({ op: 'asc', column })), eq: vi.fn((...args: unknown[]) => ({ op: 'eq', args })), + exists: vi.fn((query: unknown) => ({ op: 'exists', query })), inArray: vi.fn((...args: unknown[]) => ({ op: 'inArray', args })), isNotNull: vi.fn((...args: unknown[]) => ({ op: 'isNotNull', args })), isNull: vi.fn((...args: unknown[]) => ({ op: 'isNull', args })), @@ -191,13 +197,16 @@ describe('cleanup soft deletes — archived user tables', () => { await runCleanupSoftDeletes(basePayload) + // Drain is gated by a per-batch guard (parent still archived). expect(mockDrainRowsByColumn).toHaveBeenCalledWith( - expect.objectContaining({ matchValue: 'tbl-1' }) + expect.objectContaining({ matchValue: 'tbl-1', guard: expect.anything() }) ) - expect(mockDeleteDefinition).toHaveBeenCalledTimes(1) + expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1) + // The definition delete is conditional on the archivedAt guard. + expect(JSON.stringify(mockDefDeleteWhere.mock.calls[0][0])).toContain('archivedAt') // Rows are drained first, then the definition is deleted. expect(mockDrainRowsByColumn.mock.invocationCallOrder[0]).toBeLessThan( - mockDeleteDefinition.mock.invocationCallOrder[0] + mockDefDeleteReturning.mock.invocationCallOrder[0] ) }) @@ -209,7 +218,7 @@ describe('cleanup soft deletes — archived user tables', () => { await runCleanupSoftDeletes(basePayload) expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(1) - expect(mockDeleteDefinition).not.toHaveBeenCalled() + expect(mockDefDeleteReturning).not.toHaveBeenCalled() }) it('skips a table that errors mid-drain but keeps cleaning the rest', async () => { @@ -222,7 +231,19 @@ describe('cleanup soft deletes — archived user tables', () => { await runCleanupSoftDeletes(basePayload) expect(mockDrainRowsByColumn).toHaveBeenCalledTimes(2) - // Only the fully-drained table's definition is deleted. - expect(mockDeleteDefinition).toHaveBeenCalledTimes(1) + // Only the fully-drained table's definition delete is attempted. + expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1) + }) + + it('does not count a restored table whose conditional definition delete no-ops', async () => { + mockSelectRowsByIdChunks.mockResolvedValueOnce([{ id: 'tbl-restored' }]) + mockDrainRowsByColumn.mockResolvedValue({ deleted: 0, fullyDrained: true }) + // Restored mid-run: the archivedAt-guarded delete matches no row. + mockDefDeleteReturning.mockResolvedValueOnce([]) + + await runCleanupSoftDeletes(basePayload) + + // The delete is attempted but conditional, so it safely removes nothing. + expect(mockDefDeleteReturning).toHaveBeenCalledTimes(1) }) }) diff --git a/apps/sim/background/cleanup-soft-deletes.ts b/apps/sim/background/cleanup-soft-deletes.ts index eb3fa1fc832..77fdc823c9e 100644 --- a/apps/sim/background/cleanup-soft-deletes.ts +++ b/apps/sim/background/cleanup-soft-deletes.ts @@ -16,7 +16,7 @@ import { } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { task } from '@trigger.dev/sdk' -import { and, asc, eq, inArray, isNotNull, isNull, lt, sql } from 'drizzle-orm' +import { and, asc, eq, exists, inArray, isNotNull, isNull, lt, sql } from 'drizzle-orm' import type { CleanupJobPayload } from '@/lib/billing/cleanup-dispatcher' import { batchDeleteByWorkspaceAndTimestamp, @@ -296,11 +296,28 @@ async function cleanupArchivedUserTables( for (const { id: tableId } of doomedTables) { if (rowBudget <= 0) break + // Re-checked atomically on every drain batch: if the table is restored + // mid-run (archivedAt cleared) the next batch deletes nothing, so a concurrent + // restore can't keep purging an active table's rows. + const stillArchived = exists( + db + .select({ one: sql`1` }) + .from(userTableDefinitions) + .where( + and( + eq(userTableDefinitions.id, tableId), + isNotNull(userTableDefinitions.archivedAt), + lt(userTableDefinitions.archivedAt, retentionDate) + ) + ) + ) + const drain = await drainRowsByColumn({ tableDef: userTableRows, idCol: userTableRows.id, matchCol: userTableRows.tableId, matchValue: tableId, + guard: stillArchived, tableName: `${label}/userTableRows`, batchSize: TABLE_ROW_DRAIN_BATCH_SIZE, rowBudget, @@ -308,8 +325,19 @@ async function cleanupArchivedUserTables( rowBudget -= drain.deleted if (drain.fullyDrained) { - await db.delete(userTableDefinitions).where(eq(userTableDefinitions.id, tableId)) - definitionsDeleted++ + // Delete the definition only while it is still archived+expired; a restore + // makes this a no-op, so an active table is never purged. + const deletedDef = await db + .delete(userTableDefinitions) + .where( + and( + eq(userTableDefinitions.id, tableId), + isNotNull(userTableDefinitions.archivedAt), + lt(userTableDefinitions.archivedAt, retentionDate) + ) + ) + .returning({ id: userTableDefinitions.id }) + if (deletedDef.length === 1) definitionsDeleted++ continue } diff --git a/apps/sim/lib/cleanup/batch-delete.ts b/apps/sim/lib/cleanup/batch-delete.ts index 24aa8554d3b..54e1a9a6161 100644 --- a/apps/sim/lib/cleanup/batch-delete.ts +++ b/apps/sim/lib/cleanup/batch-delete.ts @@ -1,6 +1,6 @@ import { db } from '@sim/db' import { createLogger } from '@sim/logger' -import { and, eq, inArray, isNotNull, lt, sql } from 'drizzle-orm' +import { and, eq, inArray, isNotNull, lt, type SQL, sql } from 'drizzle-orm' import type { PgColumn, PgTable } from 'drizzle-orm/pg-core' const logger = createLogger('BatchDelete') @@ -274,6 +274,13 @@ export interface DrainByColumnOptions { batchSize?: number /** Max rows to delete in this call across all batches. */ rowBudget: number + /** + * Extra predicate ANDed into each batch's selection. Re-evaluated per batch + * (each batch is its own statement), so it can gate the drain on live state — + * e.g. "the parent row is still soft-deleted" — and stop deleting as soon as + * that state flips (a restore committed between batches). + */ + guard?: SQL } export interface DrainResult { @@ -302,17 +309,15 @@ export async function drainRowsByColumn({ tableName, batchSize = DEFAULT_DELETE_CHUNK_SIZE, rowBudget, + guard, }: DrainByColumnOptions): Promise { let deleted = 0 let remaining = rowBudget + const matchPredicate = guard ? and(eq(matchCol, matchValue), guard) : eq(matchCol, matchValue) while (remaining > 0) { const limit = Math.min(batchSize, remaining) - const targetIds = db - .select({ id: idCol }) - .from(tableDef) - .where(eq(matchCol, matchValue)) - .limit(limit) + const targetIds = db.select({ id: idCol }).from(tableDef).where(matchPredicate).limit(limit) let batchDeleted: { id: unknown }[] try {