Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions apps/sim/background/table-export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { runTableExport, type TableExportPayload } from '@/lib/table/export-runn

/**
* Trigger.dev wrapper around `runTableExport`. Retry-safe: a retried attempt regenerates the file
* from scratch (failures clean up their partial upload), and the `table_jobs` ownership gate
* stops a run that lost the job. `medium-1x` — the serialized file is buffered in memory before
* the single-shot storage upload (~hundreds of MB worst case for enterprise 1M-row tables).
* from scratch (failures abort/clean up their partial upload), and the `table_jobs` ownership gate
* stops a run that lost the job. The file streams to storage in bounded multipart chunks (no longer
* buffered whole), so `medium-1x` is now headroom rather than a hard requirement.
*/
export const tableExportTask = task({
id: 'table-export',
Expand Down
19 changes: 14 additions & 5 deletions apps/sim/lib/api/contracts/hotspots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,20 @@ export const functionExecuteContract = defineRouteContract({
isCustomTool: z.boolean().optional().default(false),
_sandboxFiles: z
.array(
z.object({
path: z.string(),
content: z.string(),
encoding: z.literal('base64').optional(),
})
z.union([
z.object({
type: z.literal('content').optional(),
path: z.string(),
content: z.string(),
encoding: z.literal('base64').optional(),
}),
// Mounted by reference: the sandbox fetches `url` itself (no bytes through the web tier).
z.object({
type: z.literal('url'),
path: z.string(),
url: z.string(),
}),
])
)
.optional(),
}),
Expand Down
201 changes: 201 additions & 0 deletions apps/sim/lib/copilot/tools/handlers/function-execute.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
/**
* @vitest-environment node
*/
import { beforeEach, describe, expect, it, vi } from 'vitest'

const {
mockIsFeatureEnabled,
mockGetTableById,
mockListTables,
mockQueryRows,
mockGetOrCreateTableSnapshot,
mockDownloadFile,
mockGeneratePresignedDownloadUrl,
mockHasCloudStorage,
mockExecuteTool,
} = vi.hoisted(() => ({
mockIsFeatureEnabled: vi.fn(),
mockGetTableById: vi.fn(),
mockListTables: vi.fn(),
mockQueryRows: vi.fn(),
mockGetOrCreateTableSnapshot: vi.fn(),
mockDownloadFile: vi.fn(),
mockGeneratePresignedDownloadUrl: vi.fn(),
mockHasCloudStorage: vi.fn(),
mockExecuteTool: vi.fn(),
}))

vi.mock('@/lib/core/config/feature-flags', () => ({ isFeatureEnabled: mockIsFeatureEnabled }))
vi.mock('@/lib/table/service', () => ({
getTableById: mockGetTableById,
listTables: mockListTables,
}))
vi.mock('@/lib/table/rows/service', () => ({ queryRows: mockQueryRows }))
vi.mock('@/lib/table/snapshot-cache', () => ({
getOrCreateTableSnapshot: mockGetOrCreateTableSnapshot,
SNAPSHOT_MAX_BYTES: 500 * 1024 * 1024,
}))
vi.mock('@/lib/uploads/core/storage-service', () => ({
downloadFile: mockDownloadFile,
generatePresignedDownloadUrl: mockGeneratePresignedDownloadUrl,
hasCloudStorage: mockHasCloudStorage,
}))
vi.mock('@/tools', () => ({ executeTool: mockExecuteTool }))
// Workspace-file + VFS surfaces are unused on the tables-only path; stub to avoid heavy loads.
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
fetchWorkspaceFileBuffer: vi.fn(),
findWorkspaceFileRecord: vi.fn(),
getSandboxWorkspaceFilePath: vi.fn(),
listWorkspaceFiles: vi.fn(),
}))
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-folder-manager', () => ({
listWorkspaceFileFolders: vi.fn(),
}))
vi.mock('@/lib/copilot/vfs/path-utils', () => ({
decodeVfsPathSegments: (p: string) => p.split('/'),
encodeVfsPathSegments: (s: string[]) => s.join('/'),
}))
vi.mock('@/lib/copilot/vfs/workflow-alias-resolver', () => ({
resolveWorkflowAliasForWorkspace: vi.fn().mockResolvedValue(null),
}))
vi.mock('@/lib/copilot/vfs/workflow-aliases', () => ({
isPlanAliasPath: () => false,
workflowAliasSandboxPath: (p: string) => p,
}))

import { executeFunctionExecute } from '@/lib/copilot/tools/handlers/function-execute'

const table = {
id: 'tbl_1',
workspaceId: 'ws_1',
rowCount: 1000,
schema: { columns: [{ id: 'col_name', name: 'name', type: 'string' }] },
}

const context = { workspaceId: 'ws_1', userId: 'u1' }

function mountedFiles() {
const params = mockExecuteTool.mock.calls[0][1] as {
_sandboxFiles?: Array<{ path: string; type?: string; content?: string; url?: string }>
}
return params._sandboxFiles ?? []
}

const snapshotCacheOn = (flag: string) => Promise.resolve(flag === 'table-snapshot-cache')

describe('executeFunctionExecute table mounts', () => {
beforeEach(() => {
vi.clearAllMocks()
mockExecuteTool.mockResolvedValue({ success: true })
mockGetTableById.mockResolvedValue(table)
mockIsFeatureEnabled.mockResolvedValue(false)
mockQueryRows.mockResolvedValue({ rows: [{ data: { name: 'Ada' } }] })
mockHasCloudStorage.mockReturnValue(true)
mockGeneratePresignedDownloadUrl.mockResolvedValue('https://s3.example/presigned?sig=abc')
})

it('flag OFF: drains the table inline via queryRows (existing path)', async () => {
await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockQueryRows).toHaveBeenCalledTimes(1)
expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
const files = mountedFiles()
expect(files[0].path).toBe('/home/user/tables/tbl_1.csv')
expect(files[0].content).toBe('name\nAda')
})

it('flag ON + cloud storage: mounts by presigned URL, no bytes through web', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 9,
version: 5,
})

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGetOrCreateTableSnapshot).toHaveBeenCalledTimes(1)
expect(mockQueryRows).not.toHaveBeenCalled()
expect(mockDownloadFile).not.toHaveBeenCalled()
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith(
'table-snapshots/ws_1/tbl_1/v5.csv',
'execution',
expect.any(Number)
)
expect(mountedFiles()[0]).toEqual({
type: 'url',
path: '/home/user/tables/tbl_1.csv',
url: 'https://s3.example/presigned?sig=abc',
})
})

it('flag ON + local storage: falls back to a buffered content mount', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockHasCloudStorage.mockReturnValue(false)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 9,
version: 5,
})
mockDownloadFile.mockResolvedValue(Buffer.from('name\nAda\n'))

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
expect(mockDownloadFile).toHaveBeenCalledWith(
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v5.csv', context: 'execution' })
)
const file = mountedFiles()[0]
expect(file.path).toBe('/home/user/tables/tbl_1.csv')
expect(file.content).toBe('name\nAda\n')
expect(file.type).toBeUndefined()
})

it('flag ON but small table stays on the inline path', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetTableById.mockResolvedValue({ ...table, rowCount: 10 })

await executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)

expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
expect(mockQueryRows).toHaveBeenCalledTimes(1)
})

it('flag ON + cloud: throws when the snapshot exceeds the table mount limit', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 600 * 1024 * 1024,
version: 5,
})

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/table mount limit/)
expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
})

it('flag ON + local: throws when the snapshot exceeds the per-file mount limit', async () => {
mockIsFeatureEnabled.mockImplementation(snapshotCacheOn)
mockHasCloudStorage.mockReturnValue(false)
mockGetOrCreateTableSnapshot.mockResolvedValue({
key: 'table-snapshots/ws_1/tbl_1/v5.csv',
size: 20 * 1024 * 1024,
version: 5,
})

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/per-file mount limit/)
expect(mockDownloadFile).not.toHaveBeenCalled()
})

it('rejects a table that belongs to another workspace (tenant isolation)', async () => {
mockGetTableById.mockResolvedValue({ ...table, workspaceId: 'ws_2' })

await expect(
executeFunctionExecute({ inputTables: ['tbl_1'] }, context as never)
).rejects.toThrow(/Input table not found/)
expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
})
})
87 changes: 74 additions & 13 deletions apps/sim/lib/copilot/tools/handlers/function-execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ import { isPlanAliasPath, workflowAliasSandboxPath } from '@/lib/copilot/vfs/wor
import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
import { queryRows } from '@/lib/table/rows/service'
import { getTableById, listTables } from '@/lib/table/service'
import { getOrCreateTableSnapshot, SNAPSHOT_MAX_BYTES } from '@/lib/table/snapshot-cache'
import { listWorkspaceFileFolders } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
import {
fetchWorkspaceFileBuffer,
findWorkspaceFileRecord,
getSandboxWorkspaceFilePath,
listWorkspaceFiles,
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import {
downloadFile,
generatePresignedDownloadUrl,
hasCloudStorage,
} from '@/lib/uploads/core/storage-service'
import { executeTool as executeAppTool } from '@/tools'
import type { ToolExecutionContext, ToolExecutionResult } from '../../tool-executor/types'

Expand All @@ -21,11 +27,22 @@ const MAX_FILE_SIZE = 10 * 1024 * 1024
const MAX_TOTAL_SIZE = 50 * 1024 * 1024
const MAX_MOUNTED_FILES = 500

interface SandboxFile {
path: string
content: string
encoding?: 'base64'
}
/**
* Below this row count a table mounts via the direct inline CSV path — the version-keyed snapshot
* cache (storage round-trip) only pays off for larger/hot tables. Behind the feature flag either
* way; this just keeps tiny one-shot tables on the cheaper path.
*/
const SNAPSHOT_MIN_ROWS = 500

/**
* Lifetime of the presigned URL handed to the sandbox to fetch a snapshot. Long enough to download
* a large file at sandbox startup; the URL grants read to only that one version-pinned object.
*/
const SNAPSHOT_URL_TTL_SECONDS = 600

type SandboxFile =
| { type?: 'content'; path: string; content: string; encoding?: 'base64' }
| { type: 'url'; path: string; url: string }

interface CanonicalFileInput {
path: string
Expand Down Expand Up @@ -249,6 +266,7 @@ async function resolveInputFiles(
const tablePathLookup = hasTablePathRefs
? new Map((await listTables(workspaceId)).map((table) => [table.name, table]))
: undefined
const snapshotCacheEnabled = await isFeatureEnabled('table-snapshot-cache')
for (const tableRef of inputTables) {
const tableId =
typeof tableRef === 'string'
Expand All @@ -263,6 +281,56 @@ async function resolveInputFiles(
`Input table not found: "${tableId}". Pass the table id (tbl_...) from tables/{name}/meta.json, or a tables/{name}/meta.json path.`
)
}
const sandboxPath =
typeof tableRef === 'object' && tableRef !== null
? (tableRef as CanonicalTableInput).sandboxPath
: undefined
const mountPath = sandboxPath || `/home/user/tables/${table.id}.csv`

// Large/hot tables mount by reference from a version-keyed CSV snapshot in object storage.
if (snapshotCacheEnabled && table.rowCount >= SNAPSHOT_MIN_ROWS) {
const snapshot = await getOrCreateTableSnapshot(table, 'copilot-fn-exec')

if (hasCloudStorage()) {
// Mount by reference: the sandbox fetches the snapshot straight from storage via a
// presigned URL, so the bytes never pass through the web process — the only ceiling is
// sandbox disk (enforced at materialization by SNAPSHOT_MAX_BYTES).
if (snapshot.size > SNAPSHOT_MAX_BYTES) {
throw new Error(
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${SNAPSHOT_MAX_BYTES / 1024 / 1024}MB table mount limit.`
)
}
const url = await generatePresignedDownloadUrl(
snapshot.key,
'execution',
SNAPSHOT_URL_TTL_SECONDS
)
sandboxFiles.push({ type: 'url', path: mountPath, url })
Comment thread
cursor[bot] marked this conversation as resolved.
continue
}

// Local storage: a presigned URL is an app-internal serve path a remote sandbox can't
// reach, so fall back to buffering the bytes through the web process (file-mount guards).
if (snapshot.size > MAX_FILE_SIZE) {
throw new Error(
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`
)
}
if (totalSize + snapshot.size > MAX_TOTAL_SIZE) {
throw new Error(
`Mounting "${tableId}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller tables.`
)
}
const buffer = await downloadFile({
key: snapshot.key,
context: 'execution',
maxBytes: MAX_FILE_SIZE,
})
totalSize += buffer.length
sandboxFiles.push({ path: mountPath, content: buffer.toString('utf-8') })
continue
}

Comment thread
TheodoreSpeaks marked this conversation as resolved.
const rows = await queryRows(table, {}, 'copilot-fn-exec')

const allKeys = new Set(table.schema.columns.map((column) => column.name))
Expand Down Expand Up @@ -290,14 +358,7 @@ async function resolveInputFiles(
)
}
const csvContent = csvLines.join('\n')
const sandboxPath =
typeof tableRef === 'object' && tableRef !== null
? (tableRef as CanonicalTableInput).sandboxPath
: undefined
sandboxFiles.push({
path: sandboxPath || `/home/user/tables/${table.id}.csv`,
content: csvContent,
})
sandboxFiles.push({ path: mountPath, content: csvContent })
}
}

Expand Down
1 change: 1 addition & 0 deletions apps/sim/lib/core/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ export const env = createEnv({
BILLING_ENABLED: z.boolean().optional(), // Enable billing enforcement and usage tracking
FREE_API_DEPLOYMENT_GATE_ENABLED: z.boolean().optional(), // Block free-plan accounts from programmatic execution (API/MCP/A2A/generic webhooks/chat embeds). Requires BILLING_ENABLED. Off by default for dark rollout
TABLES_FRACTIONAL_ORDERING: z.boolean().optional(), // Order table rows by fractional order_key (O(1) insert/delete) instead of integer position
TABLE_SNAPSHOT_CACHE: z.boolean().optional(), // Mount tables into sandboxes by reference via a version-keyed CSV snapshot in object storage instead of draining the whole table into web-process heap

// Table feature limits (per plan). Apply when billing is disabled (free tier defaults) or for billed plans.
FREE_TABLES_LIMIT: z.number().optional(), // Max user tables per workspace on free tier (default: 3)
Expand Down
8 changes: 8 additions & 0 deletions apps/sim/lib/core/config/feature-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
'user context — use enabled:true for global rollout rather than per-user targeting.',
fallback: 'MOTHERSHIP_BETA_FEATURES',
},
'table-snapshot-cache': {
description:
'Mount Sim tables into code sandboxes by reference via a version-keyed CSV snapshot in ' +
'object storage (reused across runs until the table mutates) instead of draining the whole ' +
'table into web-process heap. resolveInputFiles evaluates without user context — use ' +
'enabled:true for global rollout rather than per-user targeting.',
fallback: 'TABLE_SNAPSHOT_CACHE',
},
} satisfies Record<string, FeatureFlagDefinition>

/**
Expand Down
Loading
Loading