Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 143 additions & 6 deletions apps/sim/lib/copilot/tools/handlers/function-execute.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ const {
mockGeneratePresignedDownloadUrl,
mockHasCloudStorage,
mockExecuteTool,
mockListWorkspaceFiles,
mockFindWorkspaceFileRecord,
mockFetchWorkspaceFileBuffer,
mockGetSandboxWorkspaceFilePath,
mockListWorkspaceFileFolders,
} = vi.hoisted(() => ({
mockIsFeatureEnabled: vi.fn(),
mockGetTableById: vi.fn(),
Expand All @@ -23,6 +28,11 @@ const {
mockGeneratePresignedDownloadUrl: vi.fn(),
mockHasCloudStorage: vi.fn(),
mockExecuteTool: vi.fn(),
mockListWorkspaceFiles: vi.fn(),
mockFindWorkspaceFileRecord: vi.fn(),
mockFetchWorkspaceFileBuffer: vi.fn(),
mockGetSandboxWorkspaceFilePath: vi.fn(),
mockListWorkspaceFileFolders: vi.fn(),
}))

vi.mock('@/lib/core/config/feature-flags', () => ({ isFeatureEnabled: mockIsFeatureEnabled }))
Expand All @@ -41,15 +51,14 @@ vi.mock('@/lib/uploads/core/storage-service', () => ({
hasCloudStorage: mockHasCloudStorage,
}))
vi.mock('@/tools', () => ({ executeTool: mockExecuteTool }))
// Workspace-file + VFS surfaces are unused on the tables-only path; stub to avoid heavy loads.
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
fetchWorkspaceFileBuffer: vi.fn(),
findWorkspaceFileRecord: vi.fn(),
getSandboxWorkspaceFilePath: vi.fn(),
listWorkspaceFiles: vi.fn(),
fetchWorkspaceFileBuffer: mockFetchWorkspaceFileBuffer,
findWorkspaceFileRecord: mockFindWorkspaceFileRecord,
getSandboxWorkspaceFilePath: mockGetSandboxWorkspaceFilePath,
listWorkspaceFiles: mockListWorkspaceFiles,
}))
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-folder-manager', () => ({
listWorkspaceFileFolders: vi.fn(),
listWorkspaceFileFolders: mockListWorkspaceFileFolders,
}))
vi.mock('@/lib/copilot/vfs/path-utils', () => ({
decodeVfsPathSegments: (p: string) => p.split('/'),
Expand Down Expand Up @@ -247,3 +256,131 @@ describe('executeFunctionExecute table mounts', () => {
expect(mockGetOrCreateTableSnapshot).not.toHaveBeenCalled()
})
})

const fileRecord = {
id: 'file_1',
workspaceId: 'ws_1',
name: 'data.csv',
key: 'workspace/ws_1/data.csv',
path: '/api/files/serve/workspace%2Fws_1%2Fdata.csv',
size: 100,
type: 'text/csv',
storageContext: 'workspace' as const,
}

describe('executeFunctionExecute file mounts', () => {
beforeEach(() => {
vi.clearAllMocks()
mockExecuteTool.mockResolvedValue({ success: true })
mockIsFeatureEnabled.mockResolvedValue(false)
mockHasCloudStorage.mockReturnValue(true)
mockGeneratePresignedDownloadUrl.mockResolvedValue('https://s3.example/file?sig=abc')
mockListWorkspaceFiles.mockResolvedValue([fileRecord])
mockFindWorkspaceFileRecord.mockReturnValue(fileRecord)
mockGetSandboxWorkspaceFilePath.mockReturnValue('/home/user/files/data.csv')
})

it('cloud storage: mounts by presigned URL with the record context, no bytes through web', async () => {
await executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never)

expect(mockFetchWorkspaceFileBuffer).not.toHaveBeenCalled()
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith(
'workspace/ws_1/data.csv',
'workspace',
expect.any(Number)
)
expect(mountedFiles()[0]).toEqual({
type: 'url',
path: '/home/user/files/data.csv',
url: 'https://s3.example/file?sig=abc',
})
})

it('local storage: falls back to a buffered inline content mount', async () => {
mockHasCloudStorage.mockReturnValue(false)
mockFetchWorkspaceFileBuffer.mockResolvedValue(Buffer.from('name\nAda\n'))

await executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never)

expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
const file = mountedFiles()[0]
expect(file.path).toBe('/home/user/files/data.csv')
expect(file.content).toBe('name\nAda\n')
expect(file.type).toBeUndefined()
})
Comment thread
greptile-apps[bot] marked this conversation as resolved.

it('cloud storage: throws when a file exceeds the per-file URL mount limit', async () => {
mockFindWorkspaceFileRecord.mockReturnValue({ ...fileRecord, size: 600 * 1024 * 1024 })

await expect(
executeFunctionExecute({ inputFiles: ['files/data.csv'] }, context as never)
).rejects.toThrow(/per-file mount limit/)
expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
})

it('cloud storage: throws when mounts exceed the aggregate URL mount limit', async () => {
// Each file is at the 500MB per-file cap; the 5th pushes the running total past 2GB.
mockFindWorkspaceFileRecord.mockReturnValue({ ...fileRecord, size: 500 * 1024 * 1024 })
const paths = Array.from({ length: 5 }, (_, i) => `files/big-${i}.csv`)

await expect(executeFunctionExecute({ inputFiles: paths }, context as never)).rejects.toThrow(
/total mount limit/
)
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledTimes(4)
})

it('throws when the inputFiles list exceeds the mounted-file count cap', async () => {
const paths = Array.from({ length: 501 }, (_, i) => `files/f-${i}.csv`)

await expect(executeFunctionExecute({ inputFiles: paths }, context as never)).rejects.toThrow(
/Too many input files/
)
expect(mockListWorkspaceFiles).not.toHaveBeenCalled()
})

it('cloud storage: mounts each directory descendant by presigned URL', async () => {
mockListWorkspaceFileFolders.mockResolvedValue([{ path: 'Reports' }])
const descendant = {
...fileRecord,
name: 'q1.csv',
key: 'workspace/ws_1/q1.csv',
folderPath: 'Reports',
}
mockListWorkspaceFiles.mockResolvedValue([descendant])

await executeFunctionExecute({ inputs: { directories: ['files/Reports'] } }, context as never)

expect(mockFetchWorkspaceFileBuffer).not.toHaveBeenCalled()
expect(mockGeneratePresignedDownloadUrl).toHaveBeenCalledWith(
'workspace/ws_1/q1.csv',
'workspace',
expect.any(Number)
)
expect(mountedFiles()[0]).toEqual({
type: 'url',
path: '/home/user/files/Reports/q1.csv',
url: 'https://s3.example/file?sig=abc',
})
})

it('local storage: buffers directory descendants via inline content', async () => {
mockHasCloudStorage.mockReturnValue(false)
mockListWorkspaceFileFolders.mockResolvedValue([{ path: 'Reports' }])
const descendant = {
...fileRecord,
name: 'q1.csv',
key: 'workspace/ws_1/q1.csv',
folderPath: 'Reports',
}
mockListWorkspaceFiles.mockResolvedValue([descendant])
mockFetchWorkspaceFileBuffer.mockResolvedValue(Buffer.from('a,b\n1,2\n'))

await executeFunctionExecute({ inputs: { directories: ['files/Reports'] } }, context as never)

expect(mockGeneratePresignedDownloadUrl).not.toHaveBeenCalled()
const file = mountedFiles()[0]
expect(file.path).toBe('/home/user/files/Reports/q1.csv')
expect(file.content).toBe('a,b\n1,2\n')
expect(file.type).toBeUndefined()
})
})
147 changes: 101 additions & 46 deletions apps/sim/lib/copilot/tools/handlers/function-execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
findWorkspaceFileRecord,
getSandboxWorkspaceFilePath,
listWorkspaceFiles,
type WorkspaceFileRecord,
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import {
downloadFile,
Expand All @@ -37,15 +38,98 @@ const MAX_MOUNTED_FILES = 500
const SNAPSHOT_MIN_ROWS = 500

/**
* Lifetime of the presigned URL handed to the sandbox to fetch a snapshot. Long enough to download
* a large file at sandbox startup; the URL grants read to only that one version-pinned object.
* Lifetime of a presigned URL handed to the sandbox to fetch a mounted object (table snapshot or
* workspace file). Long enough to download a large file at sandbox startup; the URL grants read to
* only that one object.
*/
const SNAPSHOT_URL_TTL_SECONDS = 600
const MOUNT_URL_TTL_SECONDS = 600

/**
* Per-file ceiling for URL-mounted workspace files. The bytes never transit the web process — the
* sandbox curls them straight from storage — so the bound is sandbox disk, not web heap (unlike the
* inline MAX_FILE_SIZE path).
*/
const MOUNT_URL_MAX_BYTES = 500 * 1024 * 1024

/**
* Aggregate ceiling across all URL-mounted files in one request. URL mounts bypass the web heap (so
* they don't count against MAX_TOTAL_SIZE), but the sandbox still curls every byte onto its disk —
* this rejects an oversized request up front instead of filling the sandbox disk one slow curl at a
* time. Generous vs MAX_TOTAL_SIZE since the bytes never transit web memory.
*/
const MAX_TOTAL_URL_BYTES = 2 * 1024 * 1024 * 1024

type SandboxFile =
| { type?: 'content'; path: string; content: string; encoding?: 'base64' }
| { type: 'url'; path: string; url: string }

/**
* Running byte totals for one resolveInputFiles call. `buffered` bytes pass through the web process
* (capped by MAX_TOTAL_SIZE); `url` bytes are curled straight into the sandbox (capped by
* MAX_TOTAL_URL_BYTES). Tracked separately because the two ceilings protect different resources —
* web heap vs sandbox disk.
*/
interface MountedBytes {
buffered: number
url: number
}

/**
* Mounts a stored workspace file into the sandbox and records its bytes against the running totals.
* With cloud storage the sandbox fetches the bytes itself from a presigned URL (no web-heap transit,
* per-file ceiling MOUNT_URL_MAX_BYTES, aggregate ceiling MAX_TOTAL_URL_BYTES); with local storage a
* presigned URL is an app-internal serve path a remote sandbox can't reach, so we buffer the bytes
* through the web process under the inline MAX_FILE_SIZE / MAX_TOTAL_SIZE guards.
*/
async function pushWorkspaceFileMount(
sandboxFiles: SandboxFile[],
record: WorkspaceFileRecord,
mountPath: string,
mounted: MountedBytes
): Promise<void> {
if (hasCloudStorage()) {
if (record.size > MOUNT_URL_MAX_BYTES) {
throw new Error(
`Input file "${mountPath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MOUNT_URL_MAX_BYTES / 1024 / 1024}MB per-file mount limit.`
)
}
if (mounted.url + record.size > MAX_TOTAL_URL_BYTES) {
throw new Error(
`Mounting "${mountPath}" would exceed the ${MAX_TOTAL_URL_BYTES / 1024 / 1024 / 1024}GB total mount limit. Mount fewer or smaller files.`
)
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Comment thread
greptile-apps[bot] marked this conversation as resolved.
}
const url = await generatePresignedDownloadUrl(
record.key,
record.storageContext ?? 'workspace',
MOUNT_URL_TTL_SECONDS
)
sandboxFiles.push({ type: 'url', path: mountPath, url })
mounted.url += record.size
return
}

if (record.size > MAX_FILE_SIZE) {
throw new Error(
`Input file "${mountPath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`
)
}
if (mounted.buffered + record.size > MAX_TOTAL_SIZE) {
throw new Error(
`Mounting "${mountPath}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.`
)
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.
const buffer = await fetchWorkspaceFileBuffer(record)
const isText = /^text\/|application\/json|application\/xml|application\/csv/.test(
record.type || ''
)
sandboxFiles.push({
path: mountPath,
content: isText ? buffer.toString('utf-8') : buffer.toString('base64'),
encoding: isText ? undefined : 'base64',
})
mounted.buffered += buffer.length
}

interface CanonicalFileInput {
path: string
sandboxPath?: string
Expand Down Expand Up @@ -89,10 +173,15 @@ export async function resolveInputFiles(
inputDirectories?: unknown[]
): Promise<SandboxFile[]> {
const sandboxFiles: SandboxFile[] = []
let totalSize = 0
const mounted: MountedBytes = { buffered: 0, url: 0 }
const betaEnabled = await isFeatureEnabled('mothership-beta')

if (inputFiles?.length && workspaceId) {
if (inputFiles.length > MAX_MOUNTED_FILES) {
throw new Error(
`Too many input files (${inputFiles.length}). Maximum is ${MAX_MOUNTED_FILES}. Mount fewer files.`
)
}
const allFiles = await listWorkspaceFiles(workspaceId, {
includeReservedSystemFiles: betaEnabled,
})
Expand Down Expand Up @@ -124,33 +213,14 @@ export async function resolveInputFiles(
`Input file not found: "${filePath}". Pass the exact canonical VFS path copied from glob/read (e.g. "files/Reports/data.csv").`
)
}
if (record.size > MAX_FILE_SIZE) {
throw new Error(
`Input file "${filePath}" is ${Math.round(record.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`
)
}
if (totalSize + record.size > MAX_TOTAL_SIZE) {
throw new Error(
`Mounting "${filePath}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller files.`
)
}
const buffer = await fetchWorkspaceFileBuffer(record)
totalSize += buffer.length
const isText = /^text\/|application\/json|application\/xml|application\/csv/.test(
record.type || ''
)
const content = isText ? buffer.toString('utf-8') : buffer.toString('base64')
const explicitSandboxPath =
typeof fileRef === 'object' && fileRef !== null
? (fileRef as CanonicalFileInput).sandboxPath
: undefined
sandboxFiles.push({
path:
explicitSandboxPath ||
(alias ? workflowAliasSandboxPath(alias.aliasPath) : getSandboxWorkspaceFilePath(record)),
content,
encoding: isText ? undefined : 'base64',
})
const mountPath =
explicitSandboxPath ||
(alias ? workflowAliasSandboxPath(alias.aliasPath) : getSandboxWorkspaceFilePath(record))
await pushWorkspaceFileMount(sandboxFiles, record, mountPath, mounted)
}
}

Expand Down Expand Up @@ -228,29 +298,14 @@ export async function resolveInputFiles(
}
}
for (const record of descendants) {
if (record.size > MAX_FILE_SIZE) {
throw new Error(`Input file exceeds size limit: ${record.name}`)
}
if (totalSize + record.size > MAX_TOTAL_SIZE) {
throw new Error('Total input size limit exceeded while mounting directory')
}
const buffer = await fetchWorkspaceFileBuffer(record)
totalSize += buffer.length
const isText = /^text\/|application\/json|application\/xml|application\/csv/.test(
record.type || ''
)
const relativeFolder =
record.folderPath?.slice(folder.path.length).replace(/^\/+/, '') ?? ''
const relativePath = alias
? encodeVfsPathSegments(
[relativeFolder, record.name].filter(Boolean).join('/').split('/')
)
: [relativeFolder, record.name].filter(Boolean).join('/')
sandboxFiles.push({
path: `${mountRoot}/${relativePath}`,
content: isText ? buffer.toString('utf-8') : buffer.toString('base64'),
encoding: isText ? undefined : 'base64',
})
await pushWorkspaceFileMount(sandboxFiles, record, `${mountRoot}/${relativePath}`, mounted)
}
}
}
Expand Down Expand Up @@ -305,7 +360,7 @@ export async function resolveInputFiles(
const url = await generatePresignedDownloadUrl(
snapshot.key,
'execution',
SNAPSHOT_URL_TTL_SECONDS
MOUNT_URL_TTL_SECONDS
)
sandboxFiles.push({ type: 'url', path: mountPath, url })
continue
Expand All @@ -318,7 +373,7 @@ export async function resolveInputFiles(
`Input table "${tableId}" is ${Math.round(snapshot.size / 1024 / 1024)}MB, over the ${MAX_FILE_SIZE / 1024 / 1024}MB per-file mount limit.`
)
}
if (totalSize + snapshot.size > MAX_TOTAL_SIZE) {
if (mounted.buffered + snapshot.size > MAX_TOTAL_SIZE) {
throw new Error(
`Mounting "${tableId}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller tables.`
)
Expand All @@ -328,7 +383,7 @@ export async function resolveInputFiles(
context: 'execution',
maxBytes: MAX_FILE_SIZE,
})
totalSize += buffer.length
mounted.buffered += buffer.length
sandboxFiles.push({ path: mountPath, content: buffer.toString('utf-8') })
continue
}
Expand Down
Loading