From bd9bbde7f01d824eeace6b660095d084175a49ee Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 24 Jun 2026 14:26:19 -0700 Subject: [PATCH 1/2] improvement(sandbox): mount workspace files by presigned URL instead of buffering bytes Files and directories mounted into the function_execute sandbox were downloaded into the web process, re-encoded, and shipped inline. Mirror the table-snapshot path: under cloud storage, presign each file and let the sandbox curl it directly (no web-heap transit). Local storage keeps the buffered fallback. Add a count cap on the inputFiles list and a generous aggregate URL-mount byte ceiling so oversized requests fail fast instead of filling sandbox disk. --- .../tools/handlers/function-execute.test.ts | 128 ++++++++++++++- .../tools/handlers/function-execute.ts | 147 ++++++++++++------ 2 files changed, 223 insertions(+), 52 deletions(-) diff --git a/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts b/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts index b47286a03b2..f195a0f71ef 100644 --- a/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts +++ b/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts @@ -13,6 +13,11 @@ const { mockGeneratePresignedDownloadUrl, mockHasCloudStorage, mockExecuteTool, + mockListWorkspaceFiles, + mockFindWorkspaceFileRecord, + mockFetchWorkspaceFileBuffer, + mockGetSandboxWorkspaceFilePath, + mockListWorkspaceFileFolders, } = vi.hoisted(() => ({ mockIsFeatureEnabled: vi.fn(), mockGetTableById: vi.fn(), @@ -23,6 +28,11 @@ const { mockGeneratePresignedDownloadUrl: vi.fn(), mockHasCloudStorage: vi.fn(), mockExecuteTool: vi.fn(), + mockListWorkspaceFiles: vi.fn(), + mockFindWorkspaceFileRecord: vi.fn(), + mockFetchWorkspaceFileBuffer: vi.fn(), + mockGetSandboxWorkspaceFilePath: vi.fn(), + mockListWorkspaceFileFolders: vi.fn(), })) vi.mock('@/lib/core/config/feature-flags', () => ({ isFeatureEnabled: mockIsFeatureEnabled })) @@ -41,15 +51,14 @@ vi.mock('@/lib/uploads/core/storage-service', () => ({ hasCloudStorage: mockHasCloudStorage, })) vi.mock('@/tools', () => ({ executeTool: mockExecuteTool })) -// Workspace-file + VFS surfaces are unused on the tables-only path; stub to avoid heavy loads. vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({ - fetchWorkspaceFileBuffer: vi.fn(), - findWorkspaceFileRecord: vi.fn(), - getSandboxWorkspaceFilePath: vi.fn(), - listWorkspaceFiles: vi.fn(), + fetchWorkspaceFileBuffer: mockFetchWorkspaceFileBuffer, + findWorkspaceFileRecord: mockFindWorkspaceFileRecord, + getSandboxWorkspaceFilePath: mockGetSandboxWorkspaceFilePath, + listWorkspaceFiles: mockListWorkspaceFiles, })) vi.mock('@/lib/uploads/contexts/workspace/workspace-file-folder-manager', () => ({ - listWorkspaceFileFolders: vi.fn(), + listWorkspaceFileFolders: mockListWorkspaceFileFolders, })) vi.mock('@/lib/copilot/vfs/path-utils', () => ({ decodeVfsPathSegments: (p: string) => p.split('/'), @@ -247,3 +256,110 @@ describe('executeFunctionExecute table mounts', () => { expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled() }) }) + +const fileRecord = { + id: 'file_1', + workspaceId: 'ws_1', + name: 'data.csv', + key: 'workspace/ws_1/data.csv', + path: '/api/files/serve/workspace%2Fws_1%2Fdata.csv', + size: 100, + type: 'text/csv', + storageContext: 'workspace' as const, +} + +describe('executeFunctionExecute file mounts', () => { + beforeEach(() => { + vi.clearAllMocks() + mockExecuteTool.mockResolvedValue({ success: true }) + mockIsFeatureEnabled.mockResolvedValue(false) + mockHasCloudStorage.mockReturnValue(true) + mockGeneratePresignedDownloadUrl.mockResolvedValue('https://s3.example/file?sig=abc') + mockListWorkspaceFiles.mockResolvedValue([fileRecord]) + mockFindWorkspaceFileRecord.mockReturnValue(fileRecord) + mockGetSandboxWorkspaceFilePath.mockReturnValue('/home/user/files/data.csv') + }) + + it('cloud storage: mounts by presigned URL with the record context, no bytes through web', async () => { + await executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never) + + expect(mockFetchWorkspaceFileBuffer).not.toHaveBeenCalled() + expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith( + 'workspace/ws_1/data.csv', + 'workspace', + expect.any(Number) + ) + expect(mountedFiles()[0]).toEqual({ + type: 'url', + path: '/home/user/files/data.csv', + url: 'https://s3.example/file?sig=abc', + }) + }) + + it('local storage: falls back to a buffered inline content mount', async () => { + mockHasCloudStorage.mockReturnValue(false) + mockFetchWorkspaceFileBuffer.mockResolvedValue(Buffer.from('name\nAda\n')) + + await executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never) + + expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled() + const file = mountedFiles()[0] + expect(file.path).toBe('/home/user/files/data.csv') + expect(file.content).toBe('name\nAda\n') + expect(file.type).toBeUndefined() + }) + + it('cloud storage: throws when a file exceeds the per-file URL mount limit', async () => { + mockFindWorkspaceFileRecord.mockReturnValue({ ...fileRecord, size: 600 * 1024 * 1024 }) + + await expect( + executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never) + ).rejects.toThrow(/per-file mount limit/) + expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled() + }) + + it('cloud storage: throws when mounts exceed the aggregate URL mount limit', async () => { + // Each file is at the 500MB per-file cap; the 5th pushes the running total past 2GB. + mockFindWorkspaceFileRecord.mockReturnValue({ ...fileRecord, size: 500 * 1024 * 1024 }) + const paths = Array.from({ length: 5 }, (_, i) => `files/big-${i}.csv`) + + await expect(executeFunctionExecute({ inputFiles: paths }, context as never)).rejects.toThrow( + /total mount limit/ + ) + expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledTimes(4) + }) + + it('throws when the inputFiles list exceeds the mounted-file count cap', async () => { + const paths = Array.from({ length: 501 }, (_, i) => `files/f-${i}.csv`) + + await expect(executeFunctionExecute({ inputFiles: paths }, context as never)).rejects.toThrow( + /Too many input files/ + ) + expect(mockListWorkspaceFiles).not.toHaveBeenCalled() + }) + + it('cloud storage: mounts each directory descendant by presigned URL', async () => { + mockListWorkspaceFileFolders.mockResolvedValue([{ path: 'Reports' }]) + const descendant = { + ...fileRecord, + name: 'q1.csv', + key: 'workspace/ws_1/q1.csv', + folderPath: 'Reports', + } + mockListWorkspaceFiles.mockResolvedValue([descendant]) + + await executeFunctionExecute({ inputs: { directories: ['files/Reports'] } }, context as never) + + expect(mockFetchWorkspaceFileBuffer).not.toHaveBeenCalled() + expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith( + 'workspace/ws_1/q1.csv', + 'workspace', + expect.any(Number) + ) + expect(mountedFiles()[0]).toEqual({ + type: 'url', + path: '/home/user/files/Reports/q1.csv', + url: 'https://s3.example/file?sig=abc', + }) + }) +}) diff --git a/apps/sim/lib/copilot/tools/handlers/function-execute.ts b/apps/sim/lib/copilot/tools/handlers/function-execute.ts index 2550b63dde1..166769ebc75 100644 --- a/apps/sim/lib/copilot/tools/handlers/function-execute.ts +++ b/apps/sim/lib/copilot/tools/handlers/function-execute.ts @@ -14,6 +14,7 @@ import { findWorkspaceFileRecord, getSandboxWorkspaceFilePath, listWorkspaceFiles, + type WorkspaceFileRecord, } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { downloadFile, @@ -37,15 +38,98 @@ const MAX_MOUNTED_FILES = 500 const SNAPSHOT_MIN_ROWS = 500 /** - * Lifetime of the presigned URL handed to the sandbox to fetch a snapshot. Long enough to download - * a large file at sandbox startup; the URL grants read to only that one version-pinned object. + * Lifetime of a presigned URL handed to the sandbox to fetch a mounted object (table snapshot or + * workspace file). Long enough to download a large file at sandbox startup; the URL grants read to + * only that one object. */ -const SNAPSHOT_URL_TTL_SECONDS = 600 +const MOUNT_URL_TTL_SECONDS = 600 + +/** + * Per-file ceiling for URL-mounted workspace files. The bytes never transit the web process — the + * sandbox curls them straight from storage — so the bound is sandbox disk, not web heap (unlike the + * inline MAX_FILE_SIZE path). + */ +const MOUNT_URL_MAX_BYTES = 500 * 1024 * 1024 + +/** + * Aggregate ceiling across all URL-mounted files in one request. URL mounts bypass the web heap (so + * they don't count against MAX_TOTAL_SIZE), but the sandbox still curls every byte onto its disk — + * this rejects an oversized request up front instead of filling the sandbox disk one slow curl at a + * time. Generous vs MAX_TOTAL_SIZE since the bytes never transit web memory. + */ +const MAX_TOTAL_URL_BYTES = 2 * 1024 * 1024 * 1024 type SandboxFile = | { type?: 'content'; path: string; content: string; encoding?: 'base64' } | { type: 'url'; path: string; url: string } +/** + * Running byte totals for one resolveInputFiles call. `buffered` bytes pass through the web process + * (capped by MAX_TOTAL_SIZE); `url` bytes are curled straight into the sandbox (capped by + * MAX_TOTAL_URL_BYTES). Tracked separately because the two ceilings protect different resources — + * web heap vs sandbox disk. + */ +interface MountedBytes { + buffered: number + url: number +} + +/** + * Mounts a stored workspace file into the sandbox and records its bytes against the running totals. + * With cloud storage the sandbox fetches the bytes itself from a presigned URL (no web-heap transit, + * per-file ceiling MOUNT_URL_MAX_BYTES, aggregate ceiling MAX_TOTAL_URL_BYTES); with local storage a + * presigned URL is an app-internal serve path a remote sandbox can't reach, so we buffer the bytes + * through the web process under the inline MAX_FILE_SIZE / MAX_TOTAL_SIZE guards. + */ +async function pushWorkspaceFileMount( + sandboxFiles: SandboxFile[], + record: WorkspaceFileRecord, + mountPath: string, + mounted: MountedBytes +): Promise { + if (hasCloudStorage()) { + if (record.size > MOUNT_URL_MAX_BYTES) { + throw new Error( + `Input file "${record.name}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MOUNT_URL_MAX_BYTES / 1024 / 1024}MB per-file mount limit.` + ) + } + if (mounted.url + record.size > MAX_TOTAL_URL_BYTES) { + throw new Error( + `Mounting "${record.name}" would exceed the ${MAX_TOTAL_URL_BYTES / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` + ) + } + const url = await generatePresignedDownloadUrl( + record.key, + record.storageContext ?? 'workspace', + MOUNT_URL_TTL_SECONDS + ) + sandboxFiles.push({ type: 'url', path: mountPath, url }) + mounted.url += record.size + return + } + + if (record.size > MAX_FILE_SIZE) { + throw new Error( + `Input file "${record.name}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.` + ) + } + if (mounted.buffered + record.size > MAX_TOTAL_SIZE) { + throw new Error( + `Mounting "${record.name}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` + ) + } + const buffer = await fetchWorkspaceFileBuffer(record) + const isText = /^text\/|application\/json|application\/xml|application\/csv/.test( + record.type || '' + ) + sandboxFiles.push({ + path: mountPath, + content: isText ? buffer.toString('utf-8') : buffer.toString('base64'), + encoding: isText ? undefined : 'base64', + }) + mounted.buffered += buffer.length +} + interface CanonicalFileInput { path: string sandboxPath?: string @@ -89,10 +173,15 @@ export async function resolveInputFiles( inputDirectories?: unknown[] ): Promise { const sandboxFiles: SandboxFile[] = [] - let totalSize = 0 + const mounted: MountedBytes = { buffered: 0, url: 0 } const betaEnabled = await isFeatureEnabled('mothership-beta') if (inputFiles?.length && workspaceId) { + if (inputFiles.length > MAX_MOUNTED_FILES) { + throw new Error( + `Too many input files (${inputFiles.length}). Maximum is ${MAX_MOUNTED_FILES}. Mount fewer files.` + ) + } const allFiles = await listWorkspaceFiles(workspaceId, { includeReservedSystemFiles: betaEnabled, }) @@ -124,33 +213,14 @@ export async function resolveInputFiles( `Input file not found: "${filePath}". Pass the exact canonical VFS path copied from glob/read (e.g. "files/Reports/data.csv").` ) } - if (record.size > MAX_FILE_SIZE) { - throw new Error( - `Input file "${filePath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.` - ) - } - if (totalSize + record.size > MAX_TOTAL_SIZE) { - throw new Error( - `Mounting "${filePath}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` - ) - } - const buffer = await fetchWorkspaceFileBuffer(record) - totalSize += buffer.length - const isText = /^text\/|application\/json|application\/xml|application\/csv/.test( - record.type || '' - ) - const content = isText ? buffer.toString('utf-8') : buffer.toString('base64') const explicitSandboxPath = typeof fileRef === 'object' && fileRef !== null ? (fileRef as CanonicalFileInput).sandboxPath : undefined - sandboxFiles.push({ - path: - explicitSandboxPath || - (alias ? workflowAliasSandboxPath(alias.aliasPath) : getSandboxWorkspaceFilePath(record)), - content, - encoding: isText ? undefined : 'base64', - }) + const mountPath = + explicitSandboxPath || + (alias ? workflowAliasSandboxPath(alias.aliasPath) : getSandboxWorkspaceFilePath(record)) + await pushWorkspaceFileMount(sandboxFiles, record, mountPath, mounted) } } @@ -228,17 +298,6 @@ export async function resolveInputFiles( } } for (const record of descendants) { - if (record.size > MAX_FILE_SIZE) { - throw new Error(`Input file exceeds size limit: ${record.name}`) - } - if (totalSize + record.size > MAX_TOTAL_SIZE) { - throw new Error('Total input size limit exceeded while mounting directory') - } - const buffer = await fetchWorkspaceFileBuffer(record) - totalSize += buffer.length - const isText = /^text\/|application\/json|application\/xml|application\/csv/.test( - record.type || '' - ) const relativeFolder = record.folderPath?.slice(folder.path.length).replace(/^\/+/, '') ?? '' const relativePath = alias @@ -246,11 +305,7 @@ export async function resolveInputFiles( [relativeFolder, record.name].filter(Boolean).join('/').split('/') ) : [relativeFolder, record.name].filter(Boolean).join('/') - sandboxFiles.push({ - path: `${mountRoot}/${relativePath}`, - content: isText ? buffer.toString('utf-8') : buffer.toString('base64'), - encoding: isText ? undefined : 'base64', - }) + await pushWorkspaceFileMount(sandboxFiles, record, `${mountRoot}/${relativePath}`, mounted) } } } @@ -305,7 +360,7 @@ export async function resolveInputFiles( const url = await generatePresignedDownloadUrl( snapshot.key, 'execution', - SNAPSHOT_URL_TTL_SECONDS + MOUNT_URL_TTL_SECONDS ) sandboxFiles.push({ type: 'url', path: mountPath, url }) continue @@ -318,7 +373,7 @@ export async function resolveInputFiles( `Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.` ) } - if (totalSize + snapshot.size > MAX_TOTAL_SIZE) { + if (mounted.buffered + snapshot.size > MAX_TOTAL_SIZE) { throw new Error( `Mounting "${tableId}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller tables.` ) @@ -328,7 +383,7 @@ export async function resolveInputFiles( context: 'execution', maxBytes: MAX_FILE_SIZE, }) - totalSize += buffer.length + mounted.buffered += buffer.length sandboxFiles.push({ path: mountPath, content: buffer.toString('utf-8') }) continue } From 49d2c56a8c5032c87873f7204c839da545b76d62 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 24 Jun 2026 14:35:32 -0700 Subject: [PATCH 2/2] improvement(sandbox): use mount path in size-limit errors, display GB, add directory local-fallback test --- .../tools/handlers/function-execute.test.ts | 21 +++++++++++++++++++ .../tools/handlers/function-execute.ts | 8 +++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts b/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts index f195a0f71ef..f6494b14aa0 100644 --- a/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts +++ b/apps/sim/lib/copilot/tools/handlers/function-execute.test.ts @@ -362,4 +362,25 @@ describe('executeFunctionExecute file mounts', () => { url: 'https://s3.example/file?sig=abc', }) }) + + it('local storage: buffers directory descendants via inline content', async () => { + mockHasCloudStorage.mockReturnValue(false) + mockListWorkspaceFileFolders.mockResolvedValue([{ path: 'Reports' }]) + const descendant = { + ...fileRecord, + name: 'q1.csv', + key: 'workspace/ws_1/q1.csv', + folderPath: 'Reports', + } + mockListWorkspaceFiles.mockResolvedValue([descendant]) + mockFetchWorkspaceFileBuffer.mockResolvedValue(Buffer.from('a,b\n1,2\n')) + + await executeFunctionExecute({ inputs: { directories: ['files/Reports'] } }, context as never) + + expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled() + const file = mountedFiles()[0] + expect(file.path).toBe('/home/user/files/Reports/q1.csv') + expect(file.content).toBe('a,b\n1,2\n') + expect(file.type).toBeUndefined() + }) }) diff --git a/apps/sim/lib/copilot/tools/handlers/function-execute.ts b/apps/sim/lib/copilot/tools/handlers/function-execute.ts index 166769ebc75..bc32699d66a 100644 --- a/apps/sim/lib/copilot/tools/handlers/function-execute.ts +++ b/apps/sim/lib/copilot/tools/handlers/function-execute.ts @@ -90,12 +90,12 @@ async function pushWorkspaceFileMount( if (hasCloudStorage()) { if (record.size > MOUNT_URL_MAX_BYTES) { throw new Error( - `Input file "${record.name}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MOUNT_URL_MAX_BYTES / 1024 / 1024}MB per-file mount limit.` + `Input file "${mountPath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MOUNT_URL_MAX_BYTES / 1024 / 1024}MB per-file mount limit.` ) } if (mounted.url + record.size > MAX_TOTAL_URL_BYTES) { throw new Error( - `Mounting "${record.name}" would exceed the ${MAX_TOTAL_URL_BYTES / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` + `Mounting "${mountPath}" would exceed the ${MAX_TOTAL_URL_BYTES / 1024 / 1024 / 1024}GB total mount limit. Mount fewer or smaller files.` ) } const url = await generatePresignedDownloadUrl( @@ -110,12 +110,12 @@ async function pushWorkspaceFileMount( if (record.size > MAX_FILE_SIZE) { throw new Error( - `Input file "${record.name}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.` + `Input file "${mountPath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.` ) } if (mounted.buffered + record.size > MAX_TOTAL_SIZE) { throw new Error( - `Mounting "${record.name}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` + `Mounting "${mountPath}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.` ) } const buffer = await fetchWorkspaceFileBuffer(record)