diff --git a/apps/sim/app/api/files/parse/route.test.ts b/apps/sim/app/api/files/parse/route.test.ts index 8c18422bae3..42a44f6a2d5 100644 --- a/apps/sim/app/api/files/parse/route.test.ts +++ b/apps/sim/app/api/files/parse/route.test.ts @@ -31,6 +31,9 @@ const { mockFsWriteFile, mockJoin, actualPath, + mockFileExistsInWorkspace, + mockListWorkspaceFiles, + mockUploadWorkspaceFile, } = vi.hoisted(() => { // eslint-disable-next-line @typescript-eslint/no-require-imports const actualPath = require('path') as typeof import('path') @@ -49,7 +52,7 @@ const { metadata: { pageCount: 1 }, }), mockFsAccess: vi.fn().mockResolvedValue(undefined), - mockFsStat: vi.fn().mockImplementation(() => ({ isFile: () => true })), + mockFsStat: vi.fn().mockImplementation(() => ({ isFile: () => true, size: 17 })), mockFsReadFile: vi.fn().mockResolvedValue(Buffer.from('test file content')), mockFsWriteFile: vi.fn().mockResolvedValue(undefined), mockJoin: vi.fn((...args: string[]): string => { @@ -59,6 +62,9 @@ const { return actualPath.join(...args) }), actualPath, + mockFileExistsInWorkspace: vi.fn().mockResolvedValue(false), + mockListWorkspaceFiles: vi.fn().mockResolvedValue([]), + mockUploadWorkspaceFile: vi.fn().mockResolvedValue({}), } }) @@ -104,6 +110,12 @@ vi.mock('@/lib/uploads/contexts/execution', () => ({ uploadExecutionFile: vi.fn(), })) +vi.mock('@/lib/uploads/contexts/workspace', () => ({ + fileExistsInWorkspace: mockFileExistsInWorkspace, + listWorkspaceFiles: mockListWorkspaceFiles, + uploadWorkspaceFile: mockUploadWorkspaceFile, +})) + vi.mock('@/lib/uploads/server/metadata', () => ({ getFileMetadataByKey: vi.fn(), })) @@ -175,7 +187,12 @@ describe('File Parse API Route', () => { permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue({ canView: true }) storageServiceMockFns.mockHasCloudStorage.mockReturnValue(true) storageServiceMockFns.mockDownloadFile.mockResolvedValue(Buffer.from('test file content')) + mockFsStat.mockResolvedValue({ isFile: () => true, size: 17 }) + mockFsReadFile.mockResolvedValue(Buffer.from('test file content')) mockIsSupportedFileType.mockReturnValue(true) + mockFileExistsInWorkspace.mockResolvedValue(false) + mockListWorkspaceFiles.mockResolvedValue([]) + mockUploadWorkspaceFile.mockResolvedValue({}) mockParseFile.mockResolvedValue({ content: 'parsed content', metadata: { pageCount: 1 }, @@ -311,6 +328,123 @@ describe('File Parse API Route', () => { expect(data.results).toHaveLength(2) }) + it('should cap remaining download size while processing multi-file parse results', async () => { + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: true, + resolvedIP: '203.0.113.10', + }) + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + new Response('file content', { + status: 200, + headers: { 'content-type': 'text/plain' }, + }) + ) + + const fourMbContent = 'a'.repeat(4 * 1024 * 1024) + mockParseBuffer + .mockResolvedValueOnce({ + content: fourMbContent, + metadata: { pageCount: 1 }, + }) + .mockResolvedValueOnce({ + content: 'second file', + metadata: { pageCount: 1 }, + }) + + const req = createMockRequest('POST', { + filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'], + }) + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.results).toHaveLength(2) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith( + 1, + 'https://example.com/file1.txt', + '203.0.113.10', + expect.objectContaining({ maxResponseBytes: 5 * 1024 * 1024 }) + ) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith( + 2, + 'https://example.com/file2.txt', + '203.0.113.10', + expect.objectContaining({ maxResponseBytes: 1024 * 1024 }) + ) + }) + + it('should preserve the remaining multi-file cap when an external URL reuses a workspace file', async () => { + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: true, + resolvedIP: '203.0.113.10', + }) + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + new Response('file content', { + status: 200, + headers: { 'content-type': 'text/plain' }, + }) + ) + mockFileExistsInWorkspace.mockResolvedValueOnce(false).mockResolvedValueOnce(true) + mockListWorkspaceFiles.mockResolvedValueOnce([ + { name: 'file2.txt', key: 'workspace-file2.txt' }, + ]) + + mockParseBuffer + .mockResolvedValueOnce({ + content: 'a'.repeat(4 * 1024 * 1024), + metadata: { pageCount: 1 }, + }) + .mockResolvedValueOnce({ + content: 'second file', + metadata: { pageCount: 1 }, + }) + + const req = createMockRequest('POST', { + filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'], + workspaceId: 'workspace-id', + }) + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.results).toHaveLength(2) + expect(storageServiceMockFns.mockDownloadFile).toHaveBeenCalledWith( + expect.objectContaining({ key: 'workspace-file2.txt', maxBytes: 1024 * 1024 }) + ) + }) + + it('should stop multi-file parsing once the combined parsed output is too large', async () => { + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: true, + resolvedIP: '203.0.113.10', + }) + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + new Response('file content', { + status: 200, + headers: { 'content-type': 'text/plain' }, + }) + ) + + mockParseBuffer.mockResolvedValueOnce({ + content: 'a'.repeat(5 * 1024 * 1024 + 1), + metadata: { pageCount: 1 }, + }) + + const req = createMockRequest('POST', { + filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'], + }) + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(413) + expect(data.success).toBe(false) + expect(data.error).toContain('too large') + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledTimes(1) + }) + it('should pass custom headers when fetching external URLs', async () => { inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ isValid: true, @@ -344,6 +478,58 @@ describe('File Parse API Route', () => { ) }) + it('should reject oversized external downloads before reading the body', async () => { + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: true, + resolvedIP: '203.0.113.10', + }) + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + new Response('oversized', { + status: 200, + headers: { 'content-length': '104857601', 'content-type': 'text/plain' }, + }) + ) + + const req = createMockRequest('POST', { + filePath: 'https://example.com/large.txt', + }) + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.success).toBe(false) + expect(data.error).toContain('too large') + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledWith( + 'https://example.com/large.txt', + '203.0.113.10', + expect.objectContaining({ + maxResponseBytes: 104857600, + }) + ) + }) + + it('should reject oversized local files before materializing them', async () => { + setupFileApiMocks({ + cloudEnabled: false, + storageProvider: 'local', + authenticated: true, + }) + mockFsStat.mockResolvedValue({ isFile: () => true, size: 104857601 }) + + const req = createMockRequest('POST', { + filePath: 'workspace/large.txt', + }) + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.success).toBe(false) + expect(data.error).toContain('too large') + expect(mockFsReadFile).not.toHaveBeenCalled() + }) + it('should process execution file URLs with context query param', async () => { setupFileApiMocks({ cloudEnabled: true, diff --git a/apps/sim/app/api/files/parse/route.ts b/apps/sim/app/api/files/parse/route.ts index aa3c3ff93c3..584561a31fc 100644 --- a/apps/sim/app/api/files/parse/route.ts +++ b/apps/sim/app/api/files/parse/route.ts @@ -1,6 +1,6 @@ import { Buffer, isUtf8 } from 'buffer' import { createHash } from 'crypto' -import fsPromises, { readFile } from 'fs/promises' +import fsPromises from 'fs/promises' import path from 'path' import { createLogger } from '@sim/logger' import { getErrorMessage } from '@sim/utils/errors' @@ -15,6 +15,13 @@ import { validateUrlWithDNS, } from '@/lib/core/security/input-validation.server' import { sanitizeUrlForLog } from '@/lib/core/utils/logging' +import { + assertKnownSizeWithinLimit, + DEFAULT_MAX_ERROR_BODY_BYTES, + isPayloadSizeLimitError, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { isSupportedFileType, parseFile } from '@/lib/file-parsers' import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads' import { uploadExecutionFile } from '@/lib/uploads/contexts/execution' @@ -41,6 +48,8 @@ const logger = createLogger('FilesParseAPI') const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds +const MAX_FILE_REFERENCE_LENGTH = 4096 +const MAX_MULTI_FILE_PARSE_OUTPUT_BYTES = 5 * 1024 * 1024 const BINARY_EXTENSIONS = new Set(binaryExtensionsList) function isLikelyTextBuffer(fileBuffer: Buffer): boolean { @@ -69,6 +78,10 @@ interface ParseResult { } } +function getContentBytes(content: unknown): number { + return typeof content === 'string' ? Buffer.byteLength(content, 'utf8') : 0 +} + /** * Main API route handler */ @@ -134,49 +147,69 @@ export const POST = withRouteHandler(async (request: NextRequest) => { }) if (Array.isArray(filePath)) { - const results = await Promise.all( - filePath.map(async (singlePath) => { - if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) { - return { - success: false, - error: 'Empty file path in array', - filePath: singlePath || '', - } - } + const results = [] + let totalOutputBytes = 0 + + for (const singlePath of filePath) { + if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) { + results.push({ + success: false, + error: 'Empty file path in array', + filePath: singlePath || '', + }) + continue + } - const result = await parseFileSingle( - singlePath, - fileType, - workspaceId, - userId, - executionContext, - headers - ) - if (result.metadata) { - result.metadata.processingTime = Date.now() - startTime - } + const remainingOutputBytes = MAX_MULTI_FILE_PARSE_OUTPUT_BYTES - totalOutputBytes + if (remainingOutputBytes <= 0) { + return parsedOutputTooLargeResponse() + } - if (result.success) { - const displayName = - result.originalName || extractCleanFilename(result.filePath) || 'unknown' - return { - success: true, - output: { - content: result.content, - name: displayName, - fileType: result.metadata?.fileType || 'application/octet-stream', - size: result.metadata?.size || 0, - binary: false, - file: result.userFile, - }, - filePath: result.filePath, - viewerUrl: result.viewerUrl, - } + const result = await parseFileSingle( + singlePath, + fileType, + workspaceId, + userId, + executionContext, + headers, + request.signal, + remainingOutputBytes, + remainingOutputBytes + ) + if (result.metadata) { + result.metadata.processingTime = Date.now() - startTime + } + + if (result.success) { + totalOutputBytes += getContentBytes(result.content) + if (totalOutputBytes > MAX_MULTI_FILE_PARSE_OUTPUT_BYTES) { + return parsedOutputTooLargeResponse() } - return result - }) - ) + const displayName = + result.originalName || extractCleanFilename(result.filePath) || 'unknown' + results.push({ + success: true, + output: { + content: result.content, + name: displayName, + fileType: result.metadata?.fileType || 'application/octet-stream', + size: result.metadata?.size || 0, + binary: false, + file: result.userFile, + }, + filePath: result.filePath, + viewerUrl: result.viewerUrl, + }) + continue + } + + if (result.error?.startsWith('Parsed file output is too large')) { + return parsedOutputTooLargeResponse() + } + + results.push(result) + } return NextResponse.json({ success: true, @@ -190,7 +223,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { workspaceId, userId, executionContext, - headers + headers, + request.signal ) if (result.metadata) { @@ -237,7 +271,10 @@ async function parseFileSingle( workspaceId: string, userId: string, executionContext?: ExecutionContext, - headers?: Record + headers?: Record, + signal?: AbortSignal, + maxDownloadBytes = MAX_DOWNLOAD_SIZE_BYTES, + maxParsedOutputBytes?: number ): Promise { logger.info('Parsing file:', filePath) @@ -249,6 +286,15 @@ async function parseFileSingle( } } + const referenceValidation = validateFileReferenceShape(filePath) + if (!referenceValidation.isValid) { + return { + success: false, + error: referenceValidation.error || 'Invalid file reference', + filePath, + } + } + const pathValidation = validateFilePath(filePath) if (!pathValidation.isValid) { return { @@ -259,18 +305,120 @@ async function parseFileSingle( } if (isInternalFileUrl(filePath)) { - return handleCloudFile(filePath, fileType, undefined, userId, executionContext) + return handleCloudFile( + filePath, + fileType, + undefined, + userId, + executionContext, + maxDownloadBytes, + maxParsedOutputBytes + ) } if (filePath.startsWith('http://') || filePath.startsWith('https://')) { - return handleExternalUrl(filePath, fileType, workspaceId, userId, executionContext, headers) + return handleExternalUrl( + filePath, + fileType, + workspaceId, + userId, + executionContext, + headers, + signal, + maxDownloadBytes, + maxParsedOutputBytes + ) } if (isUsingCloudStorage()) { - return handleCloudFile(filePath, fileType, undefined, userId, executionContext) + return handleCloudFile( + filePath, + fileType, + undefined, + userId, + executionContext, + maxDownloadBytes, + maxParsedOutputBytes + ) } - return handleLocalFile(filePath, fileType, userId, executionContext) + return handleLocalFile( + filePath, + fileType, + userId, + executionContext, + maxDownloadBytes, + maxParsedOutputBytes + ) +} + +function validateFileReferenceShape(filePath: string): { isValid: boolean; error?: string } { + const trimmed = filePath.trim() + if ( + trimmed.startsWith('http://') || + trimmed.startsWith('https://') || + isInternalFileUrl(trimmed) + ) { + return { isValid: true } + } + + if (trimmed.startsWith('data:')) { + return { + isValid: false, + error: 'File input must be a URL or uploaded file reference, not inline file content', + } + } + + if (filePath.length > MAX_FILE_REFERENCE_LENGTH) { + return { + isValid: false, + error: 'File reference is too long; provide a file URL or upload the file instead', + } + } + + if (/[\x00-\x08\x0B\x0C\x0E-\x1F]/.test(filePath)) { + return { + isValid: false, + error: + 'File reference contains binary content; provide a file URL or upload the file instead', + } + } + + const newlineCount = filePath.match(/\r\n|\r|\n/g)?.length ?? 0 + if (newlineCount > 2) { + return { + isValid: false, + error: + 'File reference looks like inline file content; provide a file URL or upload the file instead', + } + } + + return { isValid: true } +} + +function parsedOutputTooLargeResponse(): NextResponse { + return NextResponse.json( + { + success: false, + error: `Parsed file output is too large to return safely. Maximum combined parsed output is ${prettySize( + MAX_MULTI_FILE_PARSE_OUTPUT_BYTES + )}.`, + }, + { status: 413 } + ) +} + +function getParsedOutputTooLargeMessage(maxBytes: number): string { + return `Parsed file output is too large to return safely. Maximum parsed output is ${prettySize( + maxBytes + )}.` +} + +function assertParsedContentWithinLimit(content: string, maxBytes?: number): string { + if (maxBytes !== undefined) { + assertKnownSizeWithinLimit(Buffer.byteLength(content, 'utf8'), maxBytes, 'parsed file output') + } + return content } /** @@ -311,7 +459,10 @@ async function handleExternalUrl( workspaceId: string, userId: string, executionContext?: ExecutionContext, - headers?: Record + headers?: Record, + signal?: AbortSignal, + maxDownloadBytes = MAX_DOWNLOAD_SIZE_BYTES, + maxParsedOutputBytes?: number ): Promise { try { logger.info('Fetching external URL:', url) @@ -388,29 +539,39 @@ async function handleExternalUrl( if (existingFile) { const storageFilePath = `/api/files/serve/${existingFile.key}` - return handleCloudFile(storageFilePath, fileType, 'workspace', userId, executionContext) + return handleCloudFile( + storageFilePath, + fileType, + 'workspace', + userId, + executionContext, + maxDownloadBytes, + maxParsedOutputBytes + ) } } } const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP!, { timeout: DOWNLOAD_TIMEOUT_MS, + maxResponseBytes: maxDownloadBytes, + signal, ...(headers && Object.keys(headers).length > 0 && { headers }), }) if (!response.ok) { + await readResponseTextWithLimit(response, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'file download error response', + signal, + }).catch(() => '') throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`) } - const contentLength = response.headers.get('content-length') - if (contentLength && Number.parseInt(contentLength) > MAX_DOWNLOAD_SIZE_BYTES) { - throw new Error(`File too large: ${contentLength} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`) - } - - const buffer = Buffer.from(await response.arrayBuffer()) - - if (buffer.length > MAX_DOWNLOAD_SIZE_BYTES) { - throw new Error(`File too large: ${buffer.length} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`) - } + const buffer = await readResponseToBufferWithLimit(response, { + maxBytes: maxDownloadBytes, + label: 'file download', + signal, + }) logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`) @@ -449,13 +610,20 @@ async function handleExternalUrl( let parseResult: ParseResult if (extension === 'pdf') { - parseResult = await handlePdfBuffer(buffer, filename, fileType, url) + parseResult = await handlePdfBuffer(buffer, filename, fileType, url, maxParsedOutputBytes) } else if (extension === 'csv') { - parseResult = await handleCsvBuffer(buffer, filename, fileType, url) + parseResult = await handleCsvBuffer(buffer, filename, fileType, url, maxParsedOutputBytes) } else if (isSupportedFileType(extension)) { - parseResult = await handleGenericTextBuffer(buffer, filename, extension, fileType, url) + parseResult = await handleGenericTextBuffer( + buffer, + filename, + extension, + fileType, + url, + maxParsedOutputBytes + ) } else { - parseResult = handleGenericBuffer(buffer, filename, extension, fileType) + parseResult = handleGenericBuffer(buffer, filename, extension, fileType, maxParsedOutputBytes) } // Attach userFile to the result @@ -466,6 +634,25 @@ async function handleExternalUrl( return parseResult } catch (error) { logger.error(`Error handling external URL ${sanitizeUrlForLog(url)}:`, error) + if (isPayloadSizeLimitError(error)) { + logger.warn('Rejected oversized external file parse payload', { + maxBytes: error.maxBytes, + observedBytes: error.observedBytes, + label: error.label, + url: sanitizeUrlForLog(url), + }) + return { + success: false, + error: + error.label === 'parsed file output' + ? getParsedOutputTooLargeMessage(error.maxBytes) + : `File is too large to parse safely. Maximum supported download size is ${prettySize( + error.maxBytes + )}.`, + filePath: url, + } + } + return { success: false, error: `Error fetching URL: ${(error as Error).message}`, @@ -484,7 +671,9 @@ async function handleCloudFile( fileType: string, explicitContext: string | undefined, userId: string, - executionContext?: ExecutionContext + executionContext?: ExecutionContext, + maxDownloadBytes = MAX_DOWNLOAD_SIZE_BYTES, + maxParsedOutputBytes?: number ): Promise { try { const cloudKey = extractStorageKey(filePath) @@ -524,7 +713,11 @@ async function handleCloudFile( } } - const fileBuffer = await StorageService.downloadFile({ key: cloudKey, context }) + const fileBuffer = await StorageService.downloadFile({ + key: cloudKey, + context, + maxBytes: maxDownloadBytes, + }) logger.info( `Downloaded file from ${context} storage (${explicitContext ? 'explicit' : 'inferred'}): ${cloudKey}, size: ${fileBuffer.length} bytes` ) @@ -582,19 +775,38 @@ async function handleCloudFile( let parseResult: ParseResult if (extension === 'pdf') { - parseResult = await handlePdfBuffer(fileBuffer, filename, fileType, normalizedFilePath) + parseResult = await handlePdfBuffer( + fileBuffer, + filename, + fileType, + normalizedFilePath, + maxParsedOutputBytes + ) } else if (extension === 'csv') { - parseResult = await handleCsvBuffer(fileBuffer, filename, fileType, normalizedFilePath) + parseResult = await handleCsvBuffer( + fileBuffer, + filename, + fileType, + normalizedFilePath, + maxParsedOutputBytes + ) } else if (isSupportedFileType(extension)) { parseResult = await handleGenericTextBuffer( fileBuffer, filename, extension, fileType, - normalizedFilePath + normalizedFilePath, + maxParsedOutputBytes ) } else { - parseResult = handleGenericBuffer(fileBuffer, filename, extension, fileType) + parseResult = handleGenericBuffer( + fileBuffer, + filename, + extension, + fileType, + maxParsedOutputBytes + ) parseResult.filePath = normalizedFilePath } @@ -614,6 +826,25 @@ async function handleCloudFile( logger.error(`Error handling cloud file ${filePath}:`, error) const errorMessage = (error as Error).message + if (isPayloadSizeLimitError(error)) { + logger.warn('Rejected oversized cloud file parse payload', { + maxBytes: error.maxBytes, + observedBytes: error.observedBytes, + label: error.label, + filePath, + }) + return { + success: false, + error: + error.label === 'parsed file output' + ? getParsedOutputTooLargeMessage(error.maxBytes) + : `File is too large to parse safely. Maximum supported download size is ${prettySize( + error.maxBytes + )}.`, + filePath, + } + } + if (errorMessage.includes('Access denied') || errorMessage.includes('Forbidden')) { throw new Error(`Error accessing file from cloud storage: ${errorMessage}`) } @@ -633,14 +864,17 @@ async function handleLocalFile( filePath: string, fileType: string, userId: string, - executionContext?: ExecutionContext + executionContext?: ExecutionContext, + maxDownloadBytes = MAX_DOWNLOAD_SIZE_BYTES, + maxParsedOutputBytes?: number ): Promise { try { - const filename = filePath.split('/').pop() || filePath + const storageKey = isInternalFileUrl(filePath) ? extractStorageKey(filePath) : filePath + const filename = storageKey.split('/').pop() || storageKey - const context = inferContextFromKey(filename) + const context = inferContextFromKey(storageKey) const hasAccess = await verifyFileAccess( - filename, + storageKey, userId, undefined, // customConfig context, // context @@ -656,7 +890,7 @@ async function handleLocalFile( } } - const fullPath = path.join(UPLOAD_DIR_SERVER, filename) + const fullPath = path.join(UPLOAD_DIR_SERVER, storageKey) logger.info('Processing local file:', fullPath) @@ -666,10 +900,12 @@ async function handleLocalFile( throw new Error(`File not found: ${filename}`) } - const result = await parseFile(fullPath) - const stats = await fsPromises.stat(fullPath) - const fileBuffer = await readFile(fullPath) + assertKnownSizeWithinLimit(stats.size, maxDownloadBytes, 'local file') + + const result = await parseFile(fullPath) + const content = assertParsedContentWithinLimit(result.content, maxParsedOutputBytes) + const fileBuffer = await fsPromises.readFile(fullPath) const hash = createHash('md5').update(fileBuffer).digest('hex') const extension = path.extname(filename).toLowerCase().substring(1) @@ -694,7 +930,7 @@ async function handleLocalFile( return { success: true, - content: result.content, + content, filePath, userFile, metadata: { @@ -706,6 +942,25 @@ async function handleLocalFile( } } catch (error) { logger.error(`Error handling local file ${filePath}:`, error) + if (isPayloadSizeLimitError(error)) { + logger.warn('Rejected oversized local file parse payload', { + maxBytes: error.maxBytes, + observedBytes: error.observedBytes, + label: error.label, + filePath, + }) + return { + success: false, + error: + error.label === 'parsed file output' + ? getParsedOutputTooLargeMessage(error.maxBytes) + : `File is too large to parse safely. Maximum supported local file size is ${prettySize( + error.maxBytes + )}.`, + filePath, + } + } + return { success: false, error: `Error processing local file: ${(error as Error).message}`, @@ -721,7 +976,8 @@ async function handlePdfBuffer( fileBuffer: Buffer, filename: string, fileType?: string, - originalPath?: string + originalPath?: string, + maxParsedOutputBytes?: number ): Promise { try { logger.info(`Parsing PDF in memory: ${filename}`) @@ -731,10 +987,11 @@ async function handlePdfBuffer( const content = result.content || createPdfFallbackMessage(result.metadata?.pageCount || 0, fileBuffer.length, originalPath) + const limitedContent = assertParsedContentWithinLimit(content, maxParsedOutputBytes) return { success: true, - content, + content: limitedContent, filePath: originalPath || filename, metadata: { fileType: fileType || 'application/pdf', @@ -744,6 +1001,8 @@ async function handlePdfBuffer( }, } } catch (error) { + if (isPayloadSizeLimitError(error)) throw error + logger.error('Failed to parse PDF in memory:', error) const content = createPdfFailureMessage( @@ -774,7 +1033,8 @@ async function handleCsvBuffer( fileBuffer: Buffer, filename: string, fileType?: string, - originalPath?: string + originalPath?: string, + maxParsedOutputBytes?: number ): Promise { try { logger.info(`Parsing CSV in memory: ${filename}`) @@ -784,7 +1044,7 @@ async function handleCsvBuffer( return { success: true, - content: result.content, + content: assertParsedContentWithinLimit(result.content, maxParsedOutputBytes), filePath: originalPath || filename, metadata: { fileType: fileType || 'text/csv', @@ -794,6 +1054,8 @@ async function handleCsvBuffer( }, } } catch (error) { + if (isPayloadSizeLimitError(error)) throw error + logger.error('Failed to parse CSV in memory:', error) return { success: false, @@ -817,7 +1079,8 @@ async function handleGenericTextBuffer( filename: string, extension: string, fileType?: string, - originalPath?: string + originalPath?: string, + maxParsedOutputBytes?: number ): Promise { try { logger.info(`Parsing text file in memory: ${filename}`) @@ -830,7 +1093,7 @@ async function handleGenericTextBuffer( return { success: true, - content: result.content, + content: assertParsedContentWithinLimit(result.content, maxParsedOutputBytes), filePath: originalPath || filename, metadata: { fileType: fileType || getMimeTypeFromExtension(extension), @@ -841,9 +1104,14 @@ async function handleGenericTextBuffer( } } } catch (parserError) { + if (isPayloadSizeLimitError(parserError)) throw parserError + logger.warn('Specialized parser failed, falling back to generic parsing:', parserError) } + if (maxParsedOutputBytes !== undefined) { + assertKnownSizeWithinLimit(fileBuffer.length, maxParsedOutputBytes, 'parsed file output') + } const content = fileBuffer.toString('utf-8') return { @@ -858,6 +1126,8 @@ async function handleGenericTextBuffer( }, } } catch (error) { + if (isPayloadSizeLimitError(error)) throw error + logger.error('Failed to parse text file in memory:', error) return { success: false, @@ -880,12 +1150,13 @@ function handleGenericBuffer( fileBuffer: Buffer, filename: string, extension: string, - fileType?: string + fileType?: string, + maxParsedOutputBytes?: number ): ParseResult { const normalizedExtension = extension.toLowerCase() const content = !BINARY_EXTENSIONS.has(normalizedExtension) && isLikelyTextBuffer(fileBuffer) - ? fileBuffer.toString('utf-8') + ? assertParsedContentWithinLimit(fileBuffer.toString('utf-8'), maxParsedOutputBytes) : `[Binary ${normalizedExtension.toUpperCase()} file - ${fileBuffer.length} bytes]` return { diff --git a/apps/sim/app/api/files/upload/route.test.ts b/apps/sim/app/api/files/upload/route.test.ts index cf80cbf9b0d..7c7903ce306 100644 --- a/apps/sim/app/api/files/upload/route.test.ts +++ b/apps/sim/app/api/files/upload/route.test.ts @@ -90,6 +90,14 @@ vi.mock('@/lib/uploads', () => ({ vi.mock('@/lib/uploads/core/storage-service', () => storageServiceMock) +vi.mock('@/lib/uploads/shared/types', async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + MAX_WORKSPACE_FORMDATA_FILE_SIZE: 1024, + } +}) + vi.mock('@/lib/uploads/setup.server', () => ({ UPLOAD_DIR_SERVER: '/tmp/test-uploads', })) @@ -179,6 +187,13 @@ describe('File Upload API Route', () => { return new File([content], name, { type }) } + const createUploadRequest = (formData: FormData): NextRequest => + new NextRequest('http://localhost:3000/api/files/upload', { + method: 'POST', + headers: { 'content-length': '1024' }, + body: formData, + }) + beforeEach(() => { vi.clearAllMocks() }) @@ -196,10 +211,7 @@ describe('File Upload API Route', () => { const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - const req = new NextRequest('http://localhost:3000/api/files/upload', { - method: 'POST', - body: formData, - }) + const req = createUploadRequest(formData) const response = await POST(req) const data = await response.json() @@ -224,10 +236,7 @@ describe('File Upload API Route', () => { const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - const req = new NextRequest('http://localhost:3000/api/files/upload', { - method: 'POST', - body: formData, - }) + const req = createUploadRequest(formData) const response = await POST(req) const data = await response.json() @@ -253,10 +262,7 @@ describe('File Upload API Route', () => { const mockFile2 = createMockFile('file2.txt', 'text/plain') const formData = createMockFormData([mockFile1, mockFile2]) - const req = new NextRequest('http://localhost:3000/api/files/upload', { - method: 'POST', - body: formData, - }) + const req = createUploadRequest(formData) const response = await POST(req) const data = await response.json() @@ -266,15 +272,44 @@ describe('File Upload API Route', () => { expect(data).toBeDefined() }) + it('rejects oversized workspace uploads before materializing file contents', async () => { + setupFileApiMocks({ + cloudEnabled: false, + storageProvider: 'local', + }) + + const mockFile = createMockFile('large.txt', 'text/plain', 'x'.repeat(1025)) + const arrayBufferSpy = vi.spyOn(mockFile, 'arrayBuffer') + const formData = { + getAll: (name: string) => (name === 'file' ? [mockFile] : []), + get: (name: string) => { + if (name === 'context') return 'workspace' + if (name === 'workspaceId') return 'test-workspace-id' + return null + }, + } as unknown as FormData + + const req = { + formData: async () => formData, + } as unknown as NextRequest + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(413) + expect(data.error).toBe('PayloadSizeLimitError') + expect(data.message).toContain('File exceeds the server upload limit') + expect(data.message).toContain('Use direct upload for larger workspace files') + expect(arrayBufferSpy).not.toHaveBeenCalled() + expect(uploadWorkspaceFile).not.toHaveBeenCalled() + }) + it('should handle missing files', async () => { setupFileApiMocks() const formData = new FormData() - const req = new NextRequest('http://localhost:3000/api/files/upload', { - method: 'POST', - body: formData, - }) + const req = createUploadRequest(formData) const response = await POST(req) const data = await response.json() @@ -295,10 +330,7 @@ describe('File Upload API Route', () => { const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - const req = new NextRequest('http://localhost:3000/api/files/upload', { - method: 'POST', - body: formData, - }) + const req = createUploadRequest(formData) const response = await POST(req) const data = await response.json() @@ -362,6 +394,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -381,6 +414,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -400,6 +434,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -418,6 +453,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -437,6 +473,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -462,6 +499,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) @@ -483,6 +521,7 @@ describe('File Upload Security Tests', () => { const req = new Request('http://localhost/api/files/upload', { method: 'POST', + headers: { 'content-length': '1024' }, body: formData, }) diff --git a/apps/sim/app/api/files/upload/route.ts b/apps/sim/app/api/files/upload/route.ts index e1dc599cad7..2261dfe8bd7 100644 --- a/apps/sim/app/api/files/upload/route.ts +++ b/apps/sim/app/api/files/upload/route.ts @@ -10,10 +10,17 @@ import { } from '@/lib/api/contracts/storage-transfer' import { getValidationErrorMessage } from '@/lib/api/server' import { getSession } from '@/lib/auth' +import { + assertContentLengthWithinLimit, + assertKnownSizeWithinLimit, + isPayloadSizeLimitError, + readFileToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { captureServerEvent } from '@/lib/posthog/server' import type { StorageContext } from '@/lib/uploads/config' import { generateWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager' +import { MAX_WORKSPACE_FORMDATA_FILE_SIZE } from '@/lib/uploads/shared/types' import { isImageFileType, resolveFileType } from '@/lib/uploads/utils/file-utils' import { SUPPORTED_ATTACHMENT_EXTENSIONS, @@ -24,6 +31,7 @@ import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' import { createErrorResponse, InvalidRequestError } from '@/app/api/files/utils' const ALLOWED_EXTENSIONS = new Set(SUPPORTED_ATTACHMENT_EXTENSIONS) +const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024 function validateFileExtension(filename: string): boolean { const extension = filename.split('.').pop()?.toLowerCase() @@ -42,6 +50,18 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) } + if (request.headers && !request.headers.get('content-length')) { + return NextResponse.json( + { error: 'Content-Length is required for multipart uploads' }, + { status: 411 } + ) + } + assertContentLengthWithinLimit( + request.headers, + MAX_WORKSPACE_FORMDATA_FILE_SIZE + MAX_MULTIPART_OVERHEAD_BYTES, + 'multipart upload body' + ) + const formData = await request.formData() const rawFiles = formData.getAll('file') @@ -50,6 +70,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { throw new InvalidRequestError('No files provided') } const files = filesResult.data + const totalFileSize = files.reduce((total, file) => total + file.size, 0) + assertKnownSizeWithinLimit(totalFileSize, MAX_WORKSPACE_FORMDATA_FILE_SIZE, 'uploaded files') const formFieldsResult = uploadFilesFormFieldsSchema.safeParse({ workflowId: formData.get('workflowId'), @@ -90,8 +112,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { ) } - const bytes = await file.arrayBuffer() - const buffer = Buffer.from(bytes) + const buffer = await readFileToBufferWithLimit(file, { + maxBytes: MAX_WORKSPACE_FORMDATA_FILE_SIZE, + label: 'uploaded file', + }) // Handle execution context if (context === 'execution') { @@ -423,6 +447,15 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return NextResponse.json({ files: uploadResults }) } catch (error) { logger.error('Error in file upload:', error) + if (isPayloadSizeLimitError(error)) { + return NextResponse.json( + { + error: 'PayloadSizeLimitError', + message: `File exceeds the server upload limit of ${Math.round(error.maxBytes / (1024 * 1024))}MB. Use direct upload for larger workspace files.`, + }, + { status: 413 } + ) + } return createErrorResponse(error instanceof Error ? error : new Error('File upload failed')) } }) diff --git a/apps/sim/app/api/table/[tableId]/import/route.test.ts b/apps/sim/app/api/table/[tableId]/import/route.test.ts index b821961cb6d..3eec617c519 100644 --- a/apps/sim/app/api/table/[tableId]/import/route.test.ts +++ b/apps/sim/app/api/table/[tableId]/import/route.test.ts @@ -112,6 +112,7 @@ function buildTable(overrides: Partial = {}): TableDefinition { async function callPost(form: FormData, { tableId }: { tableId: string } = { tableId: 'tbl_1' }) { const req = new NextRequest(`http://localhost:3000/api/table/${tableId}/import`, { method: 'POST', + headers: { 'content-length': '1024' }, body: form, }) return POST(req, { params: Promise.resolve({ tableId }) }) @@ -182,6 +183,26 @@ describe('POST /api/table/[tableId]/import', () => { expect(data.error).toMatch(/archived/i) }) + it('returns 413 for oversized CSV files before reading their contents', async () => { + const file = createCsvFile('name,age\nAlice,30') + Object.defineProperty(file, 'size', { + value: 26 * 1024 * 1024, + }) + const arrayBufferSpy = vi.spyOn(file, 'arrayBuffer') + + const req = { + formData: async () => createFormData(file), + } as unknown as NextRequest + + const response = await POST(req, { params: Promise.resolve({ tableId: 'tbl_1' }) }) + expect(response.status).toBe(413) + const data = await response.json() + expect(data.error).toMatch(/CSV import file exceeds maximum size/) + expect(arrayBufferSpy).not.toHaveBeenCalled() + expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled() + expect(mockReplaceTableRowsWithTx).not.toHaveBeenCalled() + }) + it('returns 400 when the CSV is missing a required column', async () => { const response = await callPost(createFormData(createCsvFile('age\n30'))) expect(response.status).toBe(400) diff --git a/apps/sim/app/api/table/[tableId]/import/route.ts b/apps/sim/app/api/table/[tableId]/import/route.ts index c51cde1b2ab..b30396ed69a 100644 --- a/apps/sim/app/api/table/[tableId]/import/route.ts +++ b/apps/sim/app/api/table/[tableId]/import/route.ts @@ -14,12 +14,18 @@ import { import { getValidationErrorMessage } from '@/lib/api/server' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { generateRequestId } from '@/lib/core/utils/request' +import { + assertContentLengthWithinLimit, + isPayloadSizeLimitError, + readFileToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { addTableColumnsWithTx, batchInsertRowsWithTx, buildAutoMapping, CSV_MAX_BATCH_SIZE, + CSV_MAX_FILE_SIZE_BYTES, type CsvHeaderMapping, CsvImportValidationError, coerceRowsForTable, @@ -34,6 +40,7 @@ import { import { accessError, checkAccess } from '@/app/api/table/utils' const logger = createLogger('TableImportCSVExisting') +const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024 interface RouteParams { params: Promise<{ tableId: string }> @@ -49,6 +56,18 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro return NextResponse.json({ error: 'Authentication required' }, { status: 401 }) } + if (request.headers && !request.headers.get('content-length')) { + return NextResponse.json( + { error: 'Content-Length is required for CSV imports' }, + { status: 411 } + ) + } + assertContentLengthWithinLimit( + request.headers, + CSV_MAX_FILE_SIZE_BYTES + MAX_MULTIPART_OVERHEAD_BYTES, + 'CSV import body' + ) + const formData = await request.formData() const formValidation = csvImportFormSchema.safeParse({ file: formData.get('file'), @@ -59,9 +78,11 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro const rawCreateColumns = formData.get('createColumns') if (!formValidation.success) { + const message = getValidationErrorMessage(formValidation.error) + const isSizeLimit = message.includes('File exceeds maximum allowed size') return NextResponse.json( - { error: getValidationErrorMessage(formValidation.error) }, - { status: 400 } + { error: isSizeLimit ? 'CSV import file exceeds maximum size' : message }, + { status: isSizeLimit ? 413 : 400 } ) } @@ -125,7 +146,10 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro createColumns = createColumnsValidation.data } - const buffer = Buffer.from(await file.arrayBuffer()) + const buffer = await readFileToBufferWithLimit(file, { + maxBytes: CSV_MAX_FILE_SIZE_BYTES, + label: 'CSV import file', + }) const delimiter = extensionValidation.data === 'tsv' ? '\t' : ',' const { headers, rows } = await parseCsvBuffer(buffer, delimiter) @@ -343,14 +367,19 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro const message = toError(error).message logger.error(`[${requestId}] CSV import into existing table failed:`, error) + const isSizeLimitError = + isPayloadSizeLimitError(error) || message.includes('CSV import file exceeds maximum size') const isClientError = message.includes('CSV file has no') || message.includes('already exists') || - message.includes('Invalid column name') + message.includes('Invalid column name') || + isSizeLimitError return NextResponse.json( { error: isClientError ? message : 'Failed to import CSV' }, - { status: isClientError ? 400 : 500 } + { + status: isSizeLimitError ? 413 : isClientError ? 400 : 500, + } ) } }) diff --git a/apps/sim/app/api/table/import-csv/route.test.ts b/apps/sim/app/api/table/import-csv/route.test.ts new file mode 100644 index 00000000000..d635a17c85e --- /dev/null +++ b/apps/sim/app/api/table/import-csv/route.test.ts @@ -0,0 +1,106 @@ +/** + * @vitest-environment node + */ +import { hybridAuthMockFns, permissionsMock, permissionsMockFns } from '@sim/testing' +import type { NextRequest } from 'next/server' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockCreateTable, mockParseCsvBuffer, mockGetWorkspaceTableLimits } = vi.hoisted(() => ({ + mockCreateTable: vi.fn(), + mockParseCsvBuffer: vi.fn(), + mockGetWorkspaceTableLimits: vi.fn(), +})) + +vi.mock('@sim/utils/id', () => ({ + generateId: vi.fn().mockReturnValue('deadbeefcafef00d'), + generateShortId: vi.fn().mockReturnValue('short-id'), +})) + +vi.mock('@/lib/table', () => ({ + batchInsertRows: vi.fn(), + CSV_MAX_BATCH_SIZE: 1000, + CSV_MAX_FILE_SIZE_BYTES: 25 * 1024 * 1024, + coerceRowsForTable: vi.fn(), + createTable: mockCreateTable, + deleteTable: vi.fn(), + getWorkspaceTableLimits: mockGetWorkspaceTableLimits, + inferSchemaFromCsv: vi.fn(), + parseCsvBuffer: mockParseCsvBuffer, + sanitizeName: vi.fn((name: string) => name), + TABLE_LIMITS: { + MAX_TABLE_NAME_LENGTH: 64, + }, +})) + +vi.mock('@/app/api/table/utils', () => ({ + normalizeColumn: vi.fn((column) => column), +})) + +vi.mock('@/lib/workspaces/permissions/utils', () => permissionsMock) + +import { POST } from '@/app/api/table/import-csv/route' + +function createCsvFile(contents: string, name = 'data.csv', type = 'text/csv'): File { + return new File([contents], name, { type }) +} + +function createFormData(file: File): FormData { + const form = new FormData() + form.append('file', file) + form.append('workspaceId', 'workspace-1') + return form +} + +async function callPost(form: FormData) { + const req = { + formData: async () => form, + } as unknown as NextRequest + return POST(req) +} + +describe('POST /api/table/import-csv', () => { + beforeEach(() => { + vi.clearAllMocks() + hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({ + success: true, + userId: 'user-1', + authType: 'session', + }) + permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('write') + mockGetWorkspaceTableLimits.mockResolvedValue({ + maxRowsPerTable: 1000, + maxTables: 10, + }) + }) + + it('returns 413 for oversized CSV files before reading their contents or creating a table', async () => { + const file = createCsvFile('name,age\nAlice,30') + Object.defineProperty(file, 'size', { + value: 26 * 1024 * 1024, + }) + const arrayBufferSpy = vi.spyOn(file, 'arrayBuffer') + + const response = await callPost(createFormData(file)) + const data = await response.json() + + expect(response.status).toBe(413) + expect(data.error).toMatch(/CSV import file exceeds maximum size/) + expect(arrayBufferSpy).not.toHaveBeenCalled() + expect(mockParseCsvBuffer).not.toHaveBeenCalled() + expect(mockCreateTable).not.toHaveBeenCalled() + }) + + it('requires content-length when request headers are available', async () => { + const req = { + headers: new Headers({ 'transfer-encoding': 'chunked' }), + formData: vi.fn(async () => createFormData(createCsvFile('name\nAlice'))), + } as unknown as NextRequest + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(411) + expect(data.error).toMatch(/Content-Length is required/) + expect(req.formData).not.toHaveBeenCalled() + }) +}) diff --git a/apps/sim/app/api/table/import-csv/route.ts b/apps/sim/app/api/table/import-csv/route.ts index 11951d0cb20..909beccf048 100644 --- a/apps/sim/app/api/table/import-csv/route.ts +++ b/apps/sim/app/api/table/import-csv/route.ts @@ -6,10 +6,16 @@ import { csvExtensionSchema, csvImportFormSchema } from '@/lib/api/contracts/tab import { getValidationErrorMessage } from '@/lib/api/server' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { generateRequestId } from '@/lib/core/utils/request' +import { + assertContentLengthWithinLimit, + isPayloadSizeLimitError, + readFileToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { batchInsertRows, CSV_MAX_BATCH_SIZE, + CSV_MAX_FILE_SIZE_BYTES, coerceRowsForTable, createTable, deleteTable, @@ -24,6 +30,7 @@ import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' import { normalizeColumn } from '@/app/api/table/utils' const logger = createLogger('TableImportCSV') +const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024 export const POST = withRouteHandler(async (request: NextRequest) => { const requestId = generateRequestId() @@ -34,6 +41,18 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return NextResponse.json({ error: 'Authentication required' }, { status: 401 }) } + if (request.headers && !request.headers.get('content-length')) { + return NextResponse.json( + { error: 'Content-Length is required for CSV imports' }, + { status: 411 } + ) + } + assertContentLengthWithinLimit( + request.headers, + CSV_MAX_FILE_SIZE_BYTES + MAX_MULTIPART_OVERHEAD_BYTES, + 'CSV import body' + ) + const formData = await request.formData() const validation = csvImportFormSchema.safeParse({ file: formData.get('file'), @@ -41,9 +60,11 @@ export const POST = withRouteHandler(async (request: NextRequest) => { }) if (!validation.success) { + const message = getValidationErrorMessage(validation.error) + const isSizeLimit = message.includes('File exceeds maximum allowed size') return NextResponse.json( - { error: getValidationErrorMessage(validation.error) }, - { status: 400 } + { error: isSizeLimit ? 'CSV import file exceeds maximum size' : message }, + { status: isSizeLimit ? 413 : 400 } ) } @@ -63,7 +84,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { ) } - const buffer = Buffer.from(await file.arrayBuffer()) + const buffer = await readFileToBufferWithLimit(file, { + maxBytes: CSV_MAX_FILE_SIZE_BYTES, + label: 'CSV import file', + }) const delimiter = extensionValidation.data === 'tsv' ? '\t' : ',' const { headers, rows } = await parseCsvBuffer(buffer, delimiter) @@ -132,16 +156,21 @@ export const POST = withRouteHandler(async (request: NextRequest) => { const message = toError(error).message logger.error(`[${requestId}] CSV import failed:`, error) + const isSizeLimitError = + isPayloadSizeLimitError(error) || message.includes('CSV import file exceeds maximum size') const isClientError = message.includes('maximum table limit') || message.includes('CSV file has no') || message.includes('Invalid table name') || message.includes('Invalid schema') || - message.includes('already exists') + message.includes('already exists') || + isSizeLimitError return NextResponse.json( { error: isClientError ? message : 'Failed to import CSV' }, - { status: isClientError ? 400 : 500 } + { + status: isSizeLimitError ? 413 : isClientError ? 400 : 500, + } ) } }) diff --git a/apps/sim/app/api/tools/docusign/route.ts b/apps/sim/app/api/tools/docusign/route.ts index c88878bb73b..b49c1ca18d2 100644 --- a/apps/sim/app/api/tools/docusign/route.ts +++ b/apps/sim/app/api/tools/docusign/route.ts @@ -4,19 +4,50 @@ import { type NextRequest, NextResponse } from 'next/server' import { docusignToolContract } from '@/lib/api/contracts/tools/docusign' import { getValidationErrorMessage, parseRequest } from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' +import { + assertKnownSizeWithinLimit, + DEFAULT_MAX_ERROR_BODY_BYTES, + isPayloadSizeLimitError, + PayloadSizeLimitError, + readResponseJsonWithLimit, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' +import { uploadExecutionFile } from '@/lib/uploads/contexts/execution' import { FileInputSchema } from '@/lib/uploads/utils/file-schemas' import { processFilesToUserFiles, type RawFileInput } from '@/lib/uploads/utils/file-utils' import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server' import { assertToolFileAccess } from '@/app/api/files/authorization' const logger = createLogger('DocuSignAPI') +const MAX_DOCUSIGN_DOCUMENT_BYTES = 25 * 1024 * 1024 +const MAX_DOCUSIGN_JSON_BYTES = 2 * 1024 * 1024 +const MAX_LEGACY_INLINE_DOCUMENT_BYTES = 7 * 1024 * 1024 interface DocuSignAccountInfo { accountId: string baseUri: string } +async function readDocusignJson( + response: Response, + label: string +): Promise> { + return readResponseJsonWithLimit>(response, { + maxBytes: MAX_DOCUSIGN_JSON_BYTES, + label, + }) +} + +function docusignError(data: Record, fallback: string): string { + return ( + (typeof data.message === 'string' && data.message) || + (typeof data.errorCode === 'string' && data.errorCode) || + fallback + ) +} + /** * Resolves the user's DocuSign account info from their access token * by calling the DocuSign userinfo endpoint. @@ -27,7 +58,10 @@ async function resolveAccount(accessToken: string): Promise }) if (!response.ok) { - const errorText = await response.text() + const errorText = await readResponseTextWithLimit(response, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'DocuSign account error response', + }).catch(() => '') logger.error('Failed to resolve DocuSign account', { status: response.status, error: errorText, @@ -35,10 +69,16 @@ async function resolveAccount(accessToken: string): Promise throw new Error(`Failed to resolve DocuSign account: ${response.status}`) } - const data = await response.json() - const accounts = data.accounts ?? [] + const data = await readDocusignJson(response, 'DocuSign account response') + const accounts = Array.isArray(data.accounts) + ? (data.accounts as Array<{ + is_default?: boolean + base_uri?: string + account_id?: string + }>) + : [] - const defaultAccount = accounts.find((a: { is_default: boolean }) => a.is_default) ?? accounts[0] + const defaultAccount = accounts.find((account) => account.is_default) ?? accounts[0] if (!defaultAccount) { throw new Error('No DocuSign accounts found for this user') } @@ -47,9 +87,13 @@ async function resolveAccount(accessToken: string): Promise if (!baseUri) { throw new Error('DocuSign account is missing base_uri') } + const accountId = defaultAccount.account_id + if (!accountId) { + throw new Error('DocuSign account is missing account_id') + } return { - accountId: defaultAccount.account_id, + accountId, baseUri, } } @@ -110,7 +154,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error('DocuSign API error', { operation, error }) const message = getErrorMessage(error, 'Internal server error') - return NextResponse.json({ success: false, error: message }, { status: 500 }) + return NextResponse.json( + { success: false, error: message }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } }) @@ -140,15 +187,29 @@ async function handleSendEnvelope( const userFile = userFiles[0] const denied = await assertToolFileAccess(userFile.key, userId, 'docusign-send', logger) if (denied) return denied - const buffer = await downloadFileFromStorage(userFile, 'docusign-send', logger) + if (userFile.size > MAX_DOCUSIGN_DOCUMENT_BYTES) { + return NextResponse.json( + { success: false, error: 'Document is too large to send through DocuSign' }, + { status: 413 } + ) + } + const buffer = await downloadFileFromStorage(userFile, 'docusign-send', logger, { + maxBytes: MAX_DOCUSIGN_DOCUMENT_BYTES, + }) + assertKnownSizeWithinLimit(buffer.length, MAX_DOCUSIGN_DOCUMENT_BYTES, 'DocuSign document') documentBase64 = buffer.toString('base64') documentName = userFile.name } } catch (fileError) { logger.error('Failed to process file for DocuSign envelope', { fileError }) return NextResponse.json( - { success: false, error: 'Failed to process uploaded file' }, - { status: 400 } + { + success: false, + error: isPayloadSizeLimitError(fileError) + ? getErrorMessage(fileError, 'Document is too large to send through DocuSign') + : 'Failed to process uploaded file', + }, + { status: isPayloadSizeLimitError(fileError) ? 413 : 400 } ) } } @@ -222,11 +283,11 @@ async function handleSendEnvelope( body: JSON.stringify(envelopeBody), }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign send envelope response') if (!response.ok) { logger.error('DocuSign send envelope failed', { data, status: response.status }) return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to send envelope' }, + { success: false, error: docusignError(data, 'Failed to send envelope') }, { status: response.status } ) } @@ -276,13 +337,13 @@ async function handleCreateFromTemplate( body: JSON.stringify(envelopeBody), }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign create from template response') if (!response.ok) { logger.error('DocuSign create from template failed', { data, status: response.status }) return NextResponse.json( { success: false, - error: data.message || data.errorCode || 'Failed to create envelope from template', + error: docusignError(data, 'Failed to create envelope from template'), }, { status: response.status } ) @@ -305,11 +366,11 @@ async function handleGetEnvelope( `${apiBase}/envelopes/${(envelopeId as string).trim()}?include=recipients,documents`, { headers } ) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign envelope response') if (!response.ok) { return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to get envelope' }, + { success: false, error: docusignError(data, 'Failed to get envelope') }, { status: response.status } ) } @@ -339,11 +400,11 @@ async function handleListEnvelopes( if (params.count) queryParams.append('count', params.count as string) const response = await fetch(`${apiBase}/envelopes?${queryParams}`, { headers }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign envelope list response') if (!response.ok) { return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to list envelopes' }, + { success: false, error: docusignError(data, 'Failed to list envelopes') }, { status: response.status } ) } @@ -370,10 +431,10 @@ async function handleVoidEnvelope( body: JSON.stringify({ status: 'voided', voidedReason }), }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign void envelope response') if (!response.ok) { return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to void envelope' }, + { success: false, error: docusignError(data, 'Failed to void envelope') }, { status: response.status } ) } @@ -403,7 +464,10 @@ async function handleDownloadDocument( if (!response.ok) { let errorText = '' try { - errorText = await response.text() + errorText = await readResponseTextWithLimit(response, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'DocuSign document error response', + }) } catch { // ignore } @@ -422,7 +486,37 @@ async function handleDownloadDocument( fileName = filenameMatch[1].replace(/['"]/g, '') } - const buffer = Buffer.from(await response.arrayBuffer()) + const buffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_DOCUSIGN_DOCUMENT_BYTES, + label: 'DocuSign document download', + }) + + const workspaceId = typeof params.workspaceId === 'string' ? params.workspaceId : undefined + const workflowId = typeof params.workflowId === 'string' ? params.workflowId : undefined + const executionId = typeof params.executionId === 'string' ? params.executionId : undefined + + if (workspaceId && workflowId && executionId) { + const file = await uploadExecutionFile( + { workspaceId, workflowId, executionId }, + buffer, + fileName, + contentType + ) + return NextResponse.json({ + file, + mimeType: contentType, + fileName, + }) + } + + if (buffer.length > MAX_LEGACY_INLINE_DOCUMENT_BYTES) { + throw new PayloadSizeLimitError({ + label: 'DocuSign legacy inline document', + maxBytes: MAX_LEGACY_INLINE_DOCUMENT_BYTES, + observedBytes: buffer.length, + }) + } + const base64Content = buffer.toString('base64') return NextResponse.json({ base64Content, mimeType: contentType, fileName }) @@ -441,11 +535,11 @@ async function handleListTemplates( const url = queryString ? `${apiBase}/templates?${queryString}` : `${apiBase}/templates` const response = await fetch(url, { headers }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign template list response') if (!response.ok) { return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to list templates' }, + { success: false, error: docusignError(data, 'Failed to list templates') }, { status: response.status } ) } @@ -466,11 +560,11 @@ async function handleListRecipients( const response = await fetch(`${apiBase}/envelopes/${(envelopeId as string).trim()}/recipients`, { headers, }) - const data = await response.json() + const data = await readDocusignJson(response, 'DocuSign recipients response') if (!response.ok) { return NextResponse.json( - { success: false, error: data.message || data.errorCode || 'Failed to list recipients' }, + { success: false, error: docusignError(data, 'Failed to list recipients') }, { status: response.status } ) } diff --git a/apps/sim/app/api/tools/image/route.ts b/apps/sim/app/api/tools/image/route.ts index d48e5dffd80..b1643542402 100644 --- a/apps/sim/app/api/tools/image/route.ts +++ b/apps/sim/app/api/tools/image/route.ts @@ -21,10 +21,21 @@ import { validateUrlWithDNS, } from '@/lib/core/security/input-validation.server' import { generateRequestId } from '@/lib/core/utils/request' +import { + assertKnownSizeWithinLimit, + consumeOrCancelBody, + DEFAULT_MAX_ERROR_BODY_BYTES, + isPayloadSizeLimitError, + readResponseJsonWithLimit, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { getBaseUrl } from '@/lib/core/utils/urls' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' const logger = createLogger('ImageProxyAPI') +const MAX_IMAGE_BYTES = 25 * 1024 * 1024 +const MAX_IMAGE_JSON_BYTES = Math.ceil((MAX_IMAGE_BYTES * 4) / 3) + 256 * 1024 export const dynamic = 'force-dynamic' export const maxDuration = 600 @@ -116,7 +127,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] Image generation failed:`, error) const errorMessage = getErrorMessage(error, 'Image generation failed') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } const storedImage = await storeGeneratedImage(imageResult, body, authResult.userId, requestId) @@ -131,7 +145,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] Image generation route error:`, error) const errorMessage = getErrorMessage(error, 'Unknown error') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } }) @@ -172,6 +189,7 @@ export const GET = withRouteHandler(async (request: NextRequest) => { try { const imageResponse = await secureFetchWithPinnedIP(imageUrl, urlValidation.resolvedIP!, { method: 'GET', + maxResponseBytes: MAX_IMAGE_BYTES, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', @@ -186,6 +204,7 @@ export const GET = withRouteHandler(async (request: NextRequest) => { }) if (!imageResponse.ok) { + await consumeOrCancelBody(imageResponse) logger.error(`[${requestId}] Image fetch failed:`, { status: imageResponse.status, statusText: imageResponse.statusText, @@ -197,14 +216,17 @@ export const GET = withRouteHandler(async (request: NextRequest) => { const contentType = imageResponse.headers.get('content-type') || 'image/jpeg' - const imageArrayBuffer = await imageResponse.arrayBuffer() + const imageBuffer = await readResponseToBufferWithLimit(imageResponse, { + maxBytes: MAX_IMAGE_BYTES, + label: 'image proxy response', + }) - if (imageArrayBuffer.byteLength === 0) { + if (imageBuffer.length === 0) { logger.error(`[${requestId}] Empty image received`) return new NextResponse('Empty image received', { status: 404 }) } - return new NextResponse(imageArrayBuffer, { + return new NextResponse(new Uint8Array(imageBuffer), { headers: { 'Content-Type': contentType, 'Access-Control-Allow-Origin': '*', @@ -216,7 +238,7 @@ export const GET = withRouteHandler(async (request: NextRequest) => { logger.error(`[${requestId}] Image proxy error:`, { error: errorMessage }) return new NextResponse(`Failed to proxy image: ${errorMessage}`, { - status: 500, + status: isPayloadSizeLimitError(error) ? 413 : 500, }) } }) @@ -458,9 +480,11 @@ async function bufferFromImageUrl(url: string): Promise<{ buffer: Buffer; conten if (url.startsWith('data:')) { const match = /^data:([^;]+);base64,(.+)$/u.exec(url) if (!match) throw new Error('Invalid data URI image response') + const buffer = Buffer.from(match[2], 'base64') + assertKnownSizeWithinLimit(buffer.length, MAX_IMAGE_BYTES, 'inline image response') return { contentType: match[1], - buffer: Buffer.from(match[2], 'base64'), + buffer, } } @@ -471,15 +495,22 @@ async function bufferFromImageUrl(url: string): Promise<{ buffer: Buffer; conten const imageResponse = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP, { method: 'GET', + maxResponseBytes: MAX_IMAGE_BYTES, }) if (!imageResponse.ok) { - await imageResponse.text().catch(() => {}) + await readResponseTextWithLimit(imageResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'generated image error response', + }).catch(() => '') throw new Error(`Failed to download generated image: ${imageResponse.status}`) } const contentType = imageResponse.headers.get('content-type') || 'image/png' - const arrayBuffer = await imageResponse.arrayBuffer() - return { buffer: Buffer.from(arrayBuffer), contentType } + const buffer = await readResponseToBufferWithLimit(imageResponse, { + maxBytes: MAX_IMAGE_BYTES, + label: 'generated image download', + }) + return { buffer, contentType } } async function generateWithOpenAI( @@ -524,11 +555,17 @@ async function generateWithOpenAI( }) if (!openaiResponse.ok) { - const error = await openaiResponse.text() + const error = await readResponseTextWithLimit(openaiResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'OpenAI image error response', + }) throw new Error(`OpenAI API error: ${openaiResponse.status} - ${error}`) } - const data = (await openaiResponse.json()) as unknown + const data = await readResponseJsonWithLimit(openaiResponse, { + maxBytes: MAX_IMAGE_JSON_BYTES, + label: 'OpenAI image response', + }) if (!isRecord(data)) { throw new Error('Invalid OpenAI image response') } @@ -542,6 +579,7 @@ async function generateWithOpenAI( if (base64Image) { buffer = Buffer.from(base64Image, 'base64') + assertKnownSizeWithinLimit(buffer.length, MAX_IMAGE_BYTES, 'OpenAI image response') } else if (imageUrl) { const downloaded = await bufferFromImageUrl(imageUrl) buffer = downloaded.buffer @@ -611,11 +649,17 @@ async function generateWithGemini( ) if (!geminiResponse.ok) { - const error = await geminiResponse.text() + const error = await readResponseTextWithLimit(geminiResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'Gemini image error response', + }) throw new Error(`Gemini API error: ${geminiResponse.status} - ${error}`) } - const data = (await geminiResponse.json()) as unknown + const data = await readResponseJsonWithLimit(geminiResponse, { + maxBytes: MAX_IMAGE_JSON_BYTES, + label: 'Gemini image response', + }) if (!isRecord(data)) { throw new Error('Invalid Gemini image response') } @@ -650,7 +694,11 @@ async function generateWithGemini( } return { - buffer: Buffer.from(base64Image, 'base64'), + buffer: (() => { + const buffer = Buffer.from(base64Image, 'base64') + assertKnownSizeWithinLimit(buffer.length, MAX_IMAGE_BYTES, 'Gemini image response') + return buffer + })(), contentType, fileName: `gemini-${model}.${extensionFromContentType(contentType)}`, provider: 'gemini', @@ -767,11 +815,17 @@ async function generateWithFalAI( }) if (!createResponse.ok) { - const error = await createResponse.text() + const error = await readResponseTextWithLimit(createResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'Fal.ai create error response', + }) throw new Error(`Fal.ai API error: ${createResponse.status} - ${error}`) } - const createData = (await createResponse.json()) as unknown + const createData = await readResponseJsonWithLimit(createResponse, { + maxBytes: MAX_IMAGE_JSON_BYTES, + label: 'Fal.ai create response', + }) if (!isRecord(createData)) { throw new Error('Invalid Fal.ai queue response') } @@ -804,11 +858,17 @@ async function generateWithFalAI( }) if (!statusResponse.ok) { - await statusResponse.text().catch(() => {}) + await readResponseTextWithLimit(statusResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'Fal.ai status error response', + }).catch(() => '') throw new Error(`Fal.ai status check failed: ${statusResponse.status}`) } - const statusData = (await statusResponse.json()) as unknown + const statusData = await readResponseJsonWithLimit(statusResponse, { + maxBytes: MAX_IMAGE_JSON_BYTES, + label: 'Fal.ai status response', + }) if (!isRecord(statusData)) { throw new Error('Invalid Fal.ai status response') } @@ -830,11 +890,17 @@ async function generateWithFalAI( ) if (!resultResponse.ok) { - await resultResponse.text().catch(() => {}) + await readResponseTextWithLimit(resultResponse, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'Fal.ai result error response', + }).catch(() => '') throw new Error(`Failed to fetch Fal.ai result: ${resultResponse.status}`) } - const resultData = (await resultResponse.json()) as unknown + const resultData = await readResponseJsonWithLimit(resultResponse, { + maxBytes: MAX_IMAGE_JSON_BYTES, + label: 'Fal.ai result response', + }) if (!isRecord(resultData)) { throw new Error('Invalid Fal.ai result response') } diff --git a/apps/sim/app/api/tools/tts/route.ts b/apps/sim/app/api/tools/tts/route.ts index 929e995c1da..366d2ee03ee 100644 --- a/apps/sim/app/api/tools/tts/route.ts +++ b/apps/sim/app/api/tools/tts/route.ts @@ -7,11 +7,16 @@ import { getValidationErrorMessage, parseRequest } from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits' import { validateAlphanumericId } from '@/lib/core/security/input-validation' +import { + isPayloadSizeLimitError, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { getBaseUrl } from '@/lib/core/utils/urls' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { StorageService } from '@/lib/uploads' const logger = createLogger('ProxyTTSAPI') +const MAX_TTS_AUDIO_BYTES = 25 * 1024 * 1024 export const POST = withRouteHandler(async (request: NextRequest) => { try { @@ -98,14 +103,17 @@ export const POST = withRouteHandler(async (request: NextRequest) => { ) } - const audioBlob = await response.blob() + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'TTS audio response', + signal: request.signal, + }) - if (audioBlob.size === 0) { + if (audioBuffer.length === 0) { logger.error('Empty audio received from ElevenLabs') return NextResponse.json({ error: 'Empty audio received' }, { status: 422 }) } - const audioBuffer = Buffer.from(await audioBlob.arrayBuffer()) const timestamp = Date.now() // Use execution storage for workflow tool calls, copilot for chat UI @@ -160,7 +168,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { { error: `Internal Server Error: ${getErrorMessage(error, 'Unknown error')}`, }, - { status: 500 } + { status: isPayloadSizeLimitError(error) ? 413 : 500 } ) } }) diff --git a/apps/sim/app/api/tools/tts/unified/route.ts b/apps/sim/app/api/tools/tts/unified/route.ts index 3b6f0a78707..6b3fc064a00 100644 --- a/apps/sim/app/api/tools/tts/unified/route.ts +++ b/apps/sim/app/api/tools/tts/unified/route.ts @@ -10,6 +10,13 @@ import { import { getValidationErrorMessage, parseRequest, validationErrorResponse } from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' import { validateAlphanumericId } from '@/lib/core/security/input-validation' +import { + assertKnownSizeWithinLimit, + isPayloadSizeLimitError, + readResponseJsonWithLimit, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { getBaseUrl } from '@/lib/core/utils/urls' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { StorageService } from '@/lib/uploads' @@ -26,6 +33,9 @@ import type { import { getFileExtension, getMimeType } from '@/tools/tts/types' const logger = createLogger('TtsUnifiedProxyAPI') +const MAX_TTS_AUDIO_BYTES = 25 * 1024 * 1024 +const MAX_TTS_ERROR_BYTES = 64 * 1024 +const MAX_TTS_JSON_BYTES = Math.ceil((MAX_TTS_AUDIO_BYTES * 4) / 3) + 256 * 1024 export const dynamic = 'force-dynamic' export const maxDuration = 60 // 1 minute @@ -208,7 +218,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] TTS synthesis failed:`, error) const errorMessage = getErrorMessage(error, 'TTS synthesis failed') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } const timestamp = Date.now() @@ -277,7 +290,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] TTS unified proxy error:`, error) const errorMessage = getErrorMessage(error, 'Unknown error') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } }) @@ -308,8 +324,10 @@ async function synthesizeWithOpenAi( throw new Error(`OpenAI TTS API error: ${errorMessage}`) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'OpenAI TTS audio response', + }) const mimeType = getMimeType(responseFormat) return { @@ -364,8 +382,10 @@ async function synthesizeWithDeepgram( throw new Error(`Deepgram TTS API error: ${errorMessage}`) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'Deepgram TTS audio response', + }) let finalFormat: string = encoding if (container === 'wav') { @@ -430,8 +450,10 @@ async function synthesizeWithElevenLabs( throw new Error(`ElevenLabs TTS API error: ${errorMessage}`) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'ElevenLabs TTS audio response', + }) return { audioBuffer, @@ -523,8 +545,10 @@ async function synthesizeWithCartesia( ) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'Cartesia TTS audio response', + }) const format = outputFormat && typeof outputFormat === 'object' && 'container' in outputFormat @@ -621,7 +645,10 @@ async function synthesizeWithGoogle( throw new Error(`Google Cloud TTS API error: ${errorMessage}`) } - const data = await response.json() + const data = await readResponseJsonWithLimit<{ audioContent?: string }>(response, { + maxBytes: MAX_TTS_JSON_BYTES, + label: 'Google TTS JSON response', + }) const audioContent = data.audioContent if (!audioContent) { @@ -629,6 +656,7 @@ async function synthesizeWithGoogle( } const audioBuffer = Buffer.from(audioContent, 'base64') + assertKnownSizeWithinLimit(audioBuffer.length, MAX_TTS_AUDIO_BYTES, 'Google TTS audio response') const format = audioEncoding.toLowerCase().replace('_', '') const mimeType = getMimeType(format) @@ -706,12 +734,17 @@ async function synthesizeWithAzure( }) if (!response.ok) { - const error = await response.text() + const error = await readResponseTextWithLimit(response, { + maxBytes: MAX_TTS_ERROR_BYTES, + label: 'Azure TTS error response', + }) throw new Error(`Azure TTS API error: ${error || response.statusText}`) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'Azure TTS audio response', + }) const format = outputFormat.includes('mp3') ? 'mp3' : 'wav' const mimeType = getMimeType(format) @@ -773,8 +806,10 @@ async function synthesizeWithPlayHT( throw new Error(`PlayHT TTS API error: ${errorMessage}`) } - const arrayBuffer = await response.arrayBuffer() - const audioBuffer = Buffer.from(arrayBuffer) + const audioBuffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TTS_AUDIO_BYTES, + label: 'PlayHT TTS audio response', + }) const format = outputFormat || 'mp3' const mimeType = getMimeType(format) diff --git a/apps/sim/app/api/tools/video/route.ts b/apps/sim/app/api/tools/video/route.ts index 693a6e192c2..6645718bf4a 100644 --- a/apps/sim/app/api/tools/video/route.ts +++ b/apps/sim/app/api/tools/video/route.ts @@ -7,16 +7,31 @@ import { videoProviders, videoToolContract } from '@/lib/api/contracts/tools/med import { getValidationErrorMessage, parseRequest, validationErrorResponse } from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' +import { + assertKnownSizeWithinLimit, + isPayloadSizeLimitError, + PayloadSizeLimitError, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server' import { assertToolFileAccess } from '@/app/api/files/authorization' import type { UserFile } from '@/executor/types' const logger = createLogger('VideoProxyAPI') +const MAX_VIDEO_OUTPUT_BYTES = 250 * 1024 * 1024 +const MAX_VIDEO_REFERENCE_IMAGE_BYTES = 25 * 1024 * 1024 export const dynamic = 'force-dynamic' export const maxDuration = 600 // 10 minutes for video generation +async function readVideoResponseBuffer(response: Response, label: string): Promise { + return readResponseToBufferWithLimit(response, { + maxBytes: MAX_VIDEO_OUTPUT_BYTES, + label, + }) +} + export const POST = withRouteHandler(async (request: NextRequest) => { const requestId = generateId() logger.info(`[${requestId}] Video generation request started`) @@ -214,7 +229,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] Video generation failed:`, error) const errorMessage = getErrorMessage(error, 'Video generation failed') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } const executionContext = @@ -298,7 +316,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } catch (error) { logger.error(`[${requestId}] Video proxy error:`, error) const errorMessage = getErrorMessage(error, 'Unknown error') - return NextResponse.json({ error: errorMessage }, { status: 500 }) + return NextResponse.json( + { error: errorMessage }, + { status: isPayloadSizeLimitError(error) ? 413 : 500 } + ) } }) @@ -333,7 +354,21 @@ async function generateWithRunway( } if (visualReference) { - const refBuffer = await downloadFileFromStorage(visualReference, requestId, logger) + if (visualReference.size > MAX_VIDEO_REFERENCE_IMAGE_BYTES) { + throw new PayloadSizeLimitError({ + label: 'video visual reference', + maxBytes: MAX_VIDEO_REFERENCE_IMAGE_BYTES, + observedBytes: visualReference.size, + }) + } + const refBuffer = await downloadFileFromStorage(visualReference, requestId, logger, { + maxBytes: MAX_VIDEO_REFERENCE_IMAGE_BYTES, + }) + assertKnownSizeWithinLimit( + refBuffer.length, + MAX_VIDEO_REFERENCE_IMAGE_BYTES, + 'video visual reference' + ) const refBase64 = refBuffer.toString('base64') createPayload.promptImage = `data:${visualReference.type};base64,${refBase64}` // Use promptImage } @@ -388,9 +423,8 @@ async function generateWithRunway( throw new Error(`Failed to download video: ${videoResponse.status}`) } - const arrayBuffer = await videoResponse.arrayBuffer() return { - buffer: Buffer.from(arrayBuffer), + buffer: await readVideoResponseBuffer(videoResponse, 'Runway video response'), width: dimensions.width, height: dimensions.height, jobId: taskId, @@ -510,9 +544,8 @@ async function generateWithVeo( throw new Error(`Failed to download video: ${videoResponse.status}`) } - const arrayBuffer = await videoResponse.arrayBuffer() return { - buffer: Buffer.from(arrayBuffer), + buffer: await readVideoResponseBuffer(videoResponse, 'Veo video response'), width: dimensions.width, height: dimensions.height, jobId: operationName, @@ -616,9 +649,8 @@ async function generateWithLuma( throw new Error(`Failed to download video: ${videoResponse.status}`) } - const arrayBuffer = await videoResponse.arrayBuffer() return { - buffer: Buffer.from(arrayBuffer), + buffer: await readVideoResponseBuffer(videoResponse, 'Luma video response'), width: dimensions.width, height: dimensions.height, jobId: generationId, @@ -766,9 +798,8 @@ async function generateWithMiniMax( throw new Error(`Failed to download video from URL: ${videoResponse.status}`) } - const arrayBuffer = await videoResponse.arrayBuffer() return { - buffer: Buffer.from(arrayBuffer), + buffer: await readVideoResponseBuffer(videoResponse, 'MiniMax video response'), width: dimensions.width, height: dimensions.height, jobId: taskId, @@ -1212,8 +1243,6 @@ async function generateWithFalAI( throw new Error(`Failed to download video: ${videoResponse.status}`) } - const arrayBuffer = await videoResponse.arrayBuffer() - let width = getNumberProperty(videoOutput, 'width') || 1920 let height = getNumberProperty(videoOutput, 'height') || 1080 @@ -1224,7 +1253,7 @@ async function generateWithFalAI( } return { - buffer: Buffer.from(arrayBuffer), + buffer: await readVideoResponseBuffer(videoResponse, 'Fal.ai video response'), width, height, jobId: requestIdFal, diff --git a/apps/sim/app/api/v1/files/route.ts b/apps/sim/app/api/v1/files/route.ts index a286a655de6..a76528a44d5 100644 --- a/apps/sim/app/api/v1/files/route.ts +++ b/apps/sim/app/api/v1/files/route.ts @@ -5,6 +5,11 @@ import { type NextRequest, NextResponse } from 'next/server' import { v1ListFilesContract, v1UploadFileFormFieldsSchema } from '@/lib/api/contracts/v1/files' import { getValidationErrorMessage, parseRequest } from '@/lib/api/server' import { generateRequestId } from '@/lib/core/utils/request' +import { + assertContentLengthWithinLimit, + isPayloadSizeLimitError, + readFileToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { FileConflictError, @@ -25,6 +30,7 @@ export const dynamic = 'force-dynamic' export const revalidate = 0 const MAX_FILE_SIZE = 100 * 1024 * 1024 // 100MB +const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024 /** GET /api/v1/files — List all files in a workspace. */ export const GET = withRouteHandler(async (request: NextRequest) => { @@ -83,8 +89,22 @@ export const POST = withRouteHandler(async (request: NextRequest) => { let formData: FormData try { + if (request.headers && !request.headers.get('content-length')) { + return NextResponse.json( + { error: 'Content-Length is required for multipart uploads' }, + { status: 411 } + ) + } + assertContentLengthWithinLimit( + request.headers, + MAX_FILE_SIZE + MAX_MULTIPART_OVERHEAD_BYTES, + 'workspace file upload body' + ) formData = await request.formData() - } catch { + } catch (error) { + if (isPayloadSizeLimitError(error)) { + return NextResponse.json({ error: error.message }, { status: 413 }) + } return NextResponse.json( { error: 'Request body must be valid multipart form data' }, { status: 400 } @@ -117,14 +137,17 @@ export const POST = withRouteHandler(async (request: NextRequest) => { { error: `File size exceeds 100MB limit (${(file.size / (1024 * 1024)).toFixed(2)}MB)`, }, - { status: 400 } + { status: 413 } ) } const accessError = await validateWorkspaceAccess(rateLimit, userId, workspaceId, 'write') if (accessError) return accessError - const buffer = Buffer.from(await file.arrayBuffer()) + const buffer = await readFileToBufferWithLimit(file, { + maxBytes: MAX_FILE_SIZE, + label: 'workspace upload file', + }) const userFile = await uploadWorkspaceFile( workspaceId, @@ -172,6 +195,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { }, }) } catch (error) { + if (isPayloadSizeLimitError(error)) { + return NextResponse.json({ error: error.message }, { status: 413 }) + } + const errorMessage = getErrorMessage(error, 'Failed to upload file') const isDuplicate = error instanceof FileConflictError || errorMessage.includes('already exists') diff --git a/apps/sim/background/cleanup-logs.ts b/apps/sim/background/cleanup-logs.ts index 090f2e4a729..f990701b0d0 100644 --- a/apps/sim/background/cleanup-logs.ts +++ b/apps/sim/background/cleanup-logs.ts @@ -37,9 +37,10 @@ const REFERENCE_CHECK_KEY_CHUNK_SIZE = 200 */ async function filterLargeValueKeysWithoutRetainedReferences( keys: string[], - deletedLogIds: string[] + deletedLogIds: string[], + workspaceIds: string[] ): Promise { - if (keys.length === 0 || deletedLogIds.length === 0) return [] + if (keys.length === 0 || deletedLogIds.length === 0 || workspaceIds.length === 0) return [] const uniqueKeys = Array.from(new Set(keys)) const referencedKeys = new Set() @@ -49,7 +50,8 @@ async function filterLargeValueKeysWithoutRetainedReferences( SELECT DISTINCT k.key AS key FROM ${workflowExecutionLogs} AS wel, unnest(${keyChunk}::text[]) AS k(key) - WHERE wel.id <> ALL(${deletedLogIds}::text[]) + WHERE wel.workspace_id = ANY(${workspaceIds}::text[]) + AND wel.id <> ALL(${deletedLogIds}::text[]) AND position(k.key in wel.execution_data::text) > 0 `) for (const row of rows) referencedKeys.add(row.key) @@ -120,6 +122,7 @@ async function cleanupWorkflowExecutionLogs( db .select({ id: workflowExecutionLogs.id, + workspaceId: workflowExecutionLogs.workspaceId, executionId: workflowExecutionLogs.executionId, executionData: workflowExecutionLogs.executionData, files: workflowExecutionLogs.files, @@ -142,10 +145,12 @@ async function cleanupWorkflowExecutionLogs( .limit(limit), onBatch: async (rows) => { const deletedLogIds = rows.map((row) => row.id) + const workspaceIds = Array.from(new Set(rows.map((row) => row.workspaceId))) const largeValueKeys = rows.flatMap((row) => collectLargeValueKeys(row.executionData)) const unreferencedLargeValueKeys = await filterLargeValueKeysWithoutRetainedReferences( largeValueKeys, - deletedLogIds + deletedLogIds, + workspaceIds ) for (const row of rows) { diff --git a/apps/sim/blocks/blocks/docusign.ts b/apps/sim/blocks/blocks/docusign.ts index d66c001f37f..40ee34ddd51 100644 --- a/apps/sim/blocks/blocks/docusign.ts +++ b/apps/sim/blocks/blocks/docusign.ts @@ -365,6 +365,7 @@ export const DocuSignBlock: BlockConfig = { type: 'json', description: 'Array of CC recipients (recipientId, name, email, status)', }, + file: { type: 'file', description: 'Stored downloaded document file' }, base64Content: { type: 'string', description: 'Base64-encoded document content' }, mimeType: { type: 'string', description: 'Document MIME type' }, fileName: { type: 'string', description: 'Document file name' }, diff --git a/apps/sim/blocks/blocks/file.test.ts b/apps/sim/blocks/blocks/file.test.ts new file mode 100644 index 00000000000..10f0cd0d31e --- /dev/null +++ b/apps/sim/blocks/blocks/file.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from 'vitest' +import { FileV4Block } from '@/blocks/blocks/file' + +describe('FileV4Block', () => { + const buildParams = FileV4Block.tools.config.params + + it('accepts http and https URLs for fetch', () => { + expect( + buildParams({ + operation: 'file_fetch', + fileUrl: 'https://example.com/image.jpg', + _context: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + }, + }) + ).toMatchObject({ + filePath: 'https://example.com/image.jpg', + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + }) + }) + + it('rejects inline content for fetch', () => { + expect(() => + buildParams({ + operation: 'file_fetch', + fileUrl: '\u0001\u0002raw jpeg bytes', + }) + ).toThrow('File URL must be a valid http or https URL') + }) + + it('rejects data URLs for fetch', () => { + expect(() => + buildParams({ + operation: 'file_fetch', + fileUrl: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD', + }) + ).toThrow('File URL must be a valid http or https URL') + }) +}) diff --git a/apps/sim/blocks/blocks/file.ts b/apps/sim/blocks/blocks/file.ts index 43904e9816f..9bf80cc48a8 100644 --- a/apps/sim/blocks/blocks/file.ts +++ b/apps/sim/blocks/blocks/file.ts @@ -44,6 +44,23 @@ const resolveFilePathsFromInput = (fileInput: unknown): string[] => { return resolved ? [resolved] : [] } +const resolveHttpFileUrl = (value: unknown): string => { + const fileUrl = typeof value === 'string' ? value.trim() : '' + if (!fileUrl) { + throw new Error('File URL is required') + } + + try { + const parsed = new URL(fileUrl) + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { + throw new Error('File URL must use http or https') + } + return fileUrl + } catch { + throw new Error('File URL must be a valid http or https URL') + } +} + export const FileBlock: BlockConfig = { type: 'file', name: 'File (Legacy)', @@ -733,11 +750,7 @@ export const FileV4Block: BlockConfig = { } if (operation === 'file_fetch') { - const fileUrl = typeof params.fileUrl === 'string' ? params.fileUrl.trim() : '' - if (!fileUrl) { - logger.error('No file URL provided') - throw new Error('File URL is required') - } + const fileUrl = resolveHttpFileUrl(params.fileUrl) return { filePath: fileUrl, diff --git a/apps/sim/blocks/blocks/google_slides.ts b/apps/sim/blocks/blocks/google_slides.ts index 2608eb71f13..c3540534bca 100644 --- a/apps/sim/blocks/blocks/google_slides.ts +++ b/apps/sim/blocks/blocks/google_slides.ts @@ -3369,6 +3369,7 @@ Return ONLY the text content - no explanations, no markdown formatting markers, title: { type: 'string', description: 'Presentation title' }, // Export presentation + file: { type: 'file', description: 'Stored exported presentation file' }, contentBase64: { type: 'string', description: 'Base64-encoded exported content' }, mimeType: { type: 'string', description: 'MIME type of the exported content' }, sizeBytes: { type: 'number', description: 'Size of the exported content in bytes' }, diff --git a/apps/sim/blocks/blocks/typeform.ts b/apps/sim/blocks/blocks/typeform.ts index cb707c349b3..b22e1a46f46 100644 --- a/apps/sim/blocks/blocks/typeform.ts +++ b/apps/sim/blocks/blocks/typeform.ts @@ -445,6 +445,7 @@ Do not include any explanations, markdown formatting, or other text outside the message: { type: 'string', description: 'Deletion confirmation message' }, // File operation outputs fileUrl: { type: 'string', description: 'Downloaded file URL' }, + file: { type: 'file', description: 'Downloaded file' }, contentType: { type: 'string', description: 'File content type' }, filename: { type: 'string', description: 'File name' }, // Insights outputs diff --git a/apps/sim/lib/api/contracts/storage-transfer.ts b/apps/sim/lib/api/contracts/storage-transfer.ts index 45e42a7832d..e4a27726227 100644 --- a/apps/sim/lib/api/contracts/storage-transfer.ts +++ b/apps/sim/lib/api/contracts/storage-transfer.ts @@ -301,7 +301,9 @@ export const fileDownloadBodySchema = z export const fileParseBodySchema = z .object({ - filePath: z.union([z.string(), z.array(z.string())]).optional(), + filePath: z + .union([z.string(), z.array(z.string()).max(10, 'At most 10 files can be parsed at once')]) + .optional(), fileType: z.string().optional().default(''), headers: z.record(z.string(), z.string()).optional(), workspaceId: z.string().optional().default(''), diff --git a/apps/sim/lib/billing/cleanup-dispatcher.ts b/apps/sim/lib/billing/cleanup-dispatcher.ts index 23c410ee365..279b04901ef 100644 --- a/apps/sim/lib/billing/cleanup-dispatcher.ts +++ b/apps/sim/lib/billing/cleanup-dispatcher.ts @@ -3,7 +3,7 @@ import type { WorkspaceMode } from '@sim/db/schema' import { organization, workspace } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { tasks } from '@trigger.dev/sdk' -import { eq, isNull } from 'drizzle-orm' +import { and, asc, eq, gt, isNull } from 'drizzle-orm' import { getOrganizationSubscription } from '@/lib/billing/core/billing' import { getHighestPriorityPersonalSubscription } from '@/lib/billing/core/subscription' import { getPlanType, type PlanCategory } from '@/lib/billing/plan-helpers' @@ -18,6 +18,7 @@ const logger = createLogger('RetentionDispatcher') /** Trigger.dev's documented cap on items per `batchTrigger` call (SDK 4.3.1+). */ const BATCH_TRIGGER_CHUNK_SIZE = 1000 +const WORKSPACE_SCOPE_PAGE_SIZE = 500 /** Bounds per-run memory + DB connections regardless of plan size. */ const WORKSPACES_PER_CLEANUP_CHUNK = 500 @@ -63,6 +64,10 @@ const DAY = 24 type PlanResolutionEntry = readonly [string, PlanCategory] +function getCleanupConcurrencyKey(jobType: CleanupJobType): string { + return `cleanup:${jobType}` +} + /** * Single source of truth for cleanup retention: which key each job type reads * from `organization.dataRetentionSettings`, and the default retention (in @@ -84,7 +89,9 @@ export const CLEANUP_CONFIG = { }, } as const satisfies Record -async function listActiveWorkspaceCleanupScopeRows(): Promise { +async function listActiveWorkspaceCleanupScopeRowsPage( + afterId: string | null +): Promise { const rows = await db .select({ id: workspace.id, @@ -95,7 +102,13 @@ async function listActiveWorkspaceCleanupScopeRows(): Promise ({ ...row, @@ -199,64 +212,74 @@ const GLOBAL_HOUSEKEEPING_PLAN: Partial> = 'cleanup-logs': 'free', } -async function buildCleanupChunks(jobType: CleanupJobType): Promise { - const config = CLEANUP_CONFIG[jobType] - const activeRows = await listActiveWorkspaceCleanupScopeRows() - const planByWorkspaceId = await resolvePlanTypesByWorkspaceId(activeRows) +interface BuildCleanupChunksResult { + chunks: CleanupJobPayload[] + workspaceCount: number +} +async function buildCleanupChunks(jobType: CleanupJobType): Promise { + const config = CLEANUP_CONFIG[jobType] const chunks: CleanupJobPayload[] = [] + let workspaceCount = 0 + let afterId: string | null = null - for (const plan of NON_ENTERPRISE_PLANS) { - const retentionHours = config.defaults[plan] - if (retentionHours === null) continue - const workspaceIds = activeRows - .filter((row) => planByWorkspaceId.get(row.id) === plan) - .map((row) => row.id) - if (workspaceIds.length === 0) continue - const planChunks = chunkArray(workspaceIds, WORKSPACES_PER_CLEANUP_CHUNK) - for (const [idx, ws] of planChunks.entries()) { - chunks.push({ - plan, - workspaceIds: ws, - retentionHours, - label: planChunks.length > 1 ? `${plan}/${idx + 1}` : plan, - }) - } - } + while (true) { + const rows = await listActiveWorkspaceCleanupScopeRowsPage(afterId) + if (rows.length === 0) break - for (const row of activeRows) { - if (planByWorkspaceId.get(row.id) !== 'enterprise') continue - const hours = row.organizationSettings?.[config.key] - if (hours == null) continue - chunks.push({ - plan: 'enterprise', - workspaceIds: [row.id], - retentionHours: hours, - label: `enterprise/${row.id}`, - }) - } + afterId = rows[rows.length - 1].id + const planByWorkspaceId = await resolvePlanTypesByWorkspaceId(rows) - const housekeepingPlan = GLOBAL_HOUSEKEEPING_PLAN[jobType] - if (housekeepingPlan) { - const target = chunks.find((c) => c.plan === housekeepingPlan) - if (target) { - target.runGlobalHousekeeping = true - } else if (housekeepingPlan !== 'enterprise') { - // Synthetic empty chunk so housekeeping still fires when the plan has no workspaces. - const retentionHours = config.defaults[housekeepingPlan] - if (retentionHours != null) { + for (const plan of NON_ENTERPRISE_PLANS) { + const retentionHours = config.defaults[plan] + if (retentionHours === null) continue + + const workspaceIds = rows + .filter((row) => planByWorkspaceId.get(row.id) === plan) + .map((row) => row.id) + if (workspaceIds.length === 0) continue + + workspaceCount += workspaceIds.length + const planChunks = chunkArray(workspaceIds, WORKSPACES_PER_CLEANUP_CHUNK) + for (const [idx, ws] of planChunks.entries()) { chunks.push({ - plan: housekeepingPlan, - workspaceIds: [], + plan, + workspaceIds: ws, retentionHours, - label: `${housekeepingPlan}/housekeeping`, - runGlobalHousekeeping: true, + label: planChunks.length > 1 ? `${plan}/${idx + 1}` : plan, }) } } + + for (const row of rows) { + if (planByWorkspaceId.get(row.id) !== 'enterprise') continue + const hours = row.organizationSettings?.[config.key] + if (hours == null) continue + workspaceCount++ + chunks.push({ + plan: 'enterprise', + workspaceIds: [row.id], + retentionHours: hours, + label: `enterprise/${row.id}`, + }) + } } - return chunks + const housekeepingPlan = GLOBAL_HOUSEKEEPING_PLAN[jobType] + if (housekeepingPlan && housekeepingPlan !== 'enterprise') { + const retentionHours = config.defaults[housekeepingPlan] + if (retentionHours != null) { + chunks.push({ + plan: housekeepingPlan, + workspaceIds: [], + retentionHours, + label: `${housekeepingPlan}/housekeeping`, + runGlobalHousekeeping: true, + }) + } + } + + return { chunks, workspaceCount } } /** @@ -270,8 +293,7 @@ export async function dispatchCleanupJobs(jobType: CleanupJobType): Promise<{ chunkCount: number workspaceCount: number }> { - const chunks = await buildCleanupChunks(jobType) - const workspaceCount = chunks.reduce((sum, c) => sum + c.workspaceIds.length, 0) + const { chunks, workspaceCount } = await buildCleanupChunks(jobType) logger.info( `[${jobType}] Dispatching: ${chunks.length} chunk(s) covering ${workspaceCount} workspace(s)` @@ -283,6 +305,30 @@ export async function dispatchCleanupJobs(jobType: CleanupJobType): Promise<{ const jobIds: string[] = [] + const inlineRunner = shouldExecuteInline() ? await buildCleanupRunner(jobType) : undefined + if (inlineRunner) { + let succeeded = 0 + let failed = 0 + + for (const payload of chunks) { + try { + await inlineRunner(payload, new AbortController().signal) + jobIds.push(`inline:${jobType}:${payload.label}`) + succeeded++ + } catch (error) { + failed++ + logger.error(`[${jobType}] Inline cleanup chunk failed:`, { + plan: payload.plan, + label: payload.label, + error, + }) + } + } + + logger.info(`[${jobType}] Inline cleanup chunks: ${succeeded} succeeded, ${failed} failed`) + return { jobIds, jobCount: jobIds.length, chunkCount: chunks.length, workspaceCount } + } + if (isTriggerAvailable()) { for (let i = 0; i < chunks.length; i += BATCH_TRIGGER_CHUNK_SIZE) { const batch = chunks.slice(i, i + BATCH_TRIGGER_CHUNK_SIZE) @@ -292,6 +338,7 @@ export async function dispatchCleanupJobs(jobType: CleanupJobType): Promise<{ payload, options: { tags: [`plan:${payload.plan}`, `jobType:${jobType}`], + concurrencyKey: getCleanupConcurrencyKey(jobType), }, })) ) @@ -300,22 +347,19 @@ export async function dispatchCleanupJobs(jobType: CleanupJobType): Promise<{ return { jobIds, jobCount: jobIds.length, chunkCount: chunks.length, workspaceCount } } - // Fallback: parallel enqueue via abstraction (self-hosted / inline path) - const inlineRunner = shouldExecuteInline() ? await buildCleanupRunner(jobType) : undefined const jobQueue = await getJobQueue() - const results = await Promise.allSettled( - chunks.map((payload) => jobQueue.enqueue(jobType, payload, { runner: inlineRunner })) - ) - let succeeded = 0 let failed = 0 - for (const result of results) { - if (result.status === 'fulfilled') { - jobIds.push(result.value) + for (const payload of chunks) { + try { + const jobId = await jobQueue.enqueue(jobType, payload, { + concurrencyKey: getCleanupConcurrencyKey(jobType), + }) + jobIds.push(jobId) succeeded++ - } else { + } catch (reason) { failed++ - logger.error(`[${jobType}] Failed to enqueue chunk:`, { reason: result.reason }) + logger.error(`[${jobType}] Failed to enqueue chunk:`, { reason }) } } logger.info(`[${jobType}] Chunk enqueue: ${succeeded} succeeded, ${failed} failed`) diff --git a/apps/sim/lib/core/security/input-validation.server.ts b/apps/sim/lib/core/security/input-validation.server.ts index e16bda7c6ea..d76928ade83 100644 --- a/apps/sim/lib/core/security/input-validation.server.ts +++ b/apps/sim/lib/core/security/input-validation.server.ts @@ -7,6 +7,7 @@ import { toError } from '@sim/utils/errors' import * as ipaddr from 'ipaddr.js' import { isHosted } from '@/lib/core/config/feature-flags' import { type ValidationResult, validateExternalUrl } from '@/lib/core/security/input-validation' +import { PayloadSizeLimitError } from '@/lib/core/utils/stream-limits' const logger = createLogger('InputValidation') @@ -263,6 +264,10 @@ function isRedirectStatus(status: number): boolean { return status >= 300 && status < 400 && status !== 304 } +function isRetryableHttpStatus(status: number): boolean { + return status === 429 || (status >= 500 && status <= 599) +} + function resolveRedirectUrl(baseUrl: string, location: string): string { try { return new URL(location, baseUrl).toString() @@ -381,6 +386,37 @@ export async function secureFetchWithPinnedIP( } } + const contentLength = headersRecord['content-length'] + if (typeof maxResponseBytes === 'number' && maxResponseBytes > 0 && contentLength) { + const parsedLength = Number.parseInt(contentLength, 10) + if (Number.isFinite(parsedLength) && parsedLength > maxResponseBytes) { + cleanupAbort() + res.destroy() + req.destroy() + if (isRetryableHttpStatus(statusCode)) { + settledResolve({ + ok: false, + status: statusCode, + statusText: res.statusMessage || '', + headers: new SecureFetchHeaders(headersRecord, setCookieArray), + body: null, + text: async () => '', + json: async () => ({}), + arrayBuffer: async () => new ArrayBuffer(0), + }) + return + } + settledReject( + new PayloadSizeLimitError({ + label: 'response body', + maxBytes: maxResponseBytes, + observedBytes: parsedLength, + }) + ) + return + } + } + let totalBytes = 0 const nodeRes = res const body = new ReadableStream({ @@ -394,7 +430,11 @@ export async function secureFetchWithPinnedIP( ) { cleanupAbort() controller.error( - new Error(`Response exceeded maximum size of ${maxResponseBytes} bytes`) + new PayloadSizeLimitError({ + label: 'response body', + maxBytes: maxResponseBytes, + observedBytes: totalBytes, + }) ) nodeRes.destroy() return diff --git a/apps/sim/lib/core/utils/stream-limits.test.ts b/apps/sim/lib/core/utils/stream-limits.test.ts new file mode 100644 index 00000000000..e7983a20d70 --- /dev/null +++ b/apps/sim/lib/core/utils/stream-limits.test.ts @@ -0,0 +1,189 @@ +/** + * @vitest-environment node + */ + +import { Readable } from 'stream' +import { describe, expect, it, vi } from 'vitest' +import { + assertContentLengthWithinLimit, + PayloadSizeLimitError, + readFileToBufferWithLimit, + readNodeStreamToBufferWithLimit, + readResponseJsonWithLimit, + readResponseTextWithLimit, + readResponseToBufferWithLimit, + readStreamToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' + +function streamFromChunks(chunks: Uint8Array[]): ReadableStream { + let index = 0 + return new ReadableStream({ + pull(controller) { + if (index >= chunks.length) { + controller.close() + return + } + controller.enqueue(chunks[index]) + index += 1 + }, + }) +} + +function headers(contentLength?: string): Headers { + const headers = new Headers() + if (contentLength !== undefined) headers.set('content-length', contentLength) + return headers +} + +describe('stream limits', () => { + it('reads a stream under the limit', async () => { + const buffer = await readStreamToBufferWithLimit( + streamFromChunks([new TextEncoder().encode('hello'), new TextEncoder().encode(' world')]), + { maxBytes: 32, label: 'test payload' } + ) + + expect(buffer.toString('utf-8')).toBe('hello world') + }) + + it('rejects when content-length is over the limit', () => { + expect(() => assertContentLengthWithinLimit(headers('11'), 10, 'download')).toThrow( + PayloadSizeLimitError + ) + }) + + it('cancels response bodies when content-length preflight rejects', async () => { + const cancelSpy = vi.fn() + const body = new ReadableStream({ + cancel: cancelSpy, + }) + + await expect( + readResponseToBufferWithLimit( + { + headers: headers('11'), + body, + }, + { maxBytes: 10, label: 'download' } + ) + ).rejects.toBeInstanceOf(PayloadSizeLimitError) + expect(cancelSpy).toHaveBeenCalled() + }) + + it('allows content-length exactly at the limit', () => { + expect(() => assertContentLengthWithinLimit(headers('10'), 10, 'download')).not.toThrow() + }) + + it('rejects when streamed bytes exceed the limit', async () => { + await expect( + readStreamToBufferWithLimit(streamFromChunks([new Uint8Array(6), new Uint8Array(5)]), { + maxBytes: 10, + label: 'download', + }) + ).rejects.toMatchObject({ + name: 'PayloadSizeLimitError', + maxBytes: 10, + observedBytes: 11, + }) + }) + + it('rejects underreported content-length via streamed byte counting', async () => { + await expect( + readResponseToBufferWithLimit( + { + headers: headers('5'), + body: streamFromChunks([new Uint8Array(6), new Uint8Array(5)]), + }, + { maxBytes: 10, label: 'download' } + ) + ).rejects.toBeInstanceOf(PayloadSizeLimitError) + }) + + it('returns an empty buffer for a missing body', async () => { + const buffer = await readResponseToBufferWithLimit( + { headers: headers('0'), body: null }, + { maxBytes: 10, label: 'empty response' } + ) + + expect(buffer.length).toBe(0) + }) + + it('reads text and JSON responses with limits', async () => { + const text = await readResponseTextWithLimit( + { body: streamFromChunks([new TextEncoder().encode('hello')]) }, + { maxBytes: 10, label: 'text response' } + ) + const json = await readResponseJsonWithLimit<{ ok: boolean }>( + { body: streamFromChunks([new TextEncoder().encode('{"ok":true}')]) }, + { maxBytes: 20, label: 'json response' } + ) + + expect(text).toBe('hello') + expect(json.ok).toBe(true) + }) + + it('prefers arrayBuffer over text for binary response fallbacks', async () => { + const bytes = Uint8Array.from([0, 255, 1, 254]) + const arrayBuffer = vi.fn(async () => bytes.buffer) + const text = vi.fn(async () => 'corrupted') + + const buffer = await readResponseToBufferWithLimit( + { headers: headers(String(bytes.byteLength)), arrayBuffer, text }, + { maxBytes: 10, label: 'binary response' } + ) + + expect(buffer).toEqual(Buffer.from(bytes)) + expect(arrayBuffer).toHaveBeenCalled() + expect(text).not.toHaveBeenCalled() + }) + + it('cancels when the abort signal is already aborted', async () => { + const controller = new AbortController() + controller.abort(new Error('stop')) + const cancelSpy = vi.fn() + const stream = new ReadableStream({ + pull(controller) { + controller.enqueue(new TextEncoder().encode('content')) + }, + cancel: cancelSpy, + }) + + await expect( + readStreamToBufferWithLimit(stream, { + maxBytes: 100, + label: 'abortable', + signal: controller.signal, + }) + ).rejects.toThrow('stop') + expect(cancelSpy).toHaveBeenCalled() + }) + + it('checks file size before materializing a File', async () => { + const file = new File(['hello'], 'small.txt', { type: 'text/plain' }) + const buffer = await readFileToBufferWithLimit(file, { maxBytes: 5, label: 'upload file' }) + + expect(buffer.toString('utf-8')).toBe('hello') + await expect( + readFileToBufferWithLimit(file, { maxBytes: 4, label: 'upload file' }) + ).rejects.toBeInstanceOf(PayloadSizeLimitError) + }) + + it('rechecks materialized file bytes after arrayBuffer', async () => { + const file = { + size: 1, + arrayBuffer: vi.fn(async () => new Uint8Array(6).buffer), + } as unknown as File + + await expect( + readFileToBufferWithLimit(file, { maxBytes: 5, label: 'upload file' }) + ).rejects.toBeInstanceOf(PayloadSizeLimitError) + }) + + it('rejects node streams that exceed the limit', async () => { + await expect( + readNodeStreamToBufferWithLimit(Readable.from([Buffer.alloc(6), Buffer.alloc(5)]), { + maxBytes: 10, + label: 'storage download', + }) + ).rejects.toBeInstanceOf(PayloadSizeLimitError) + }) +}) diff --git a/apps/sim/lib/core/utils/stream-limits.ts b/apps/sim/lib/core/utils/stream-limits.ts new file mode 100644 index 00000000000..078072b8d02 --- /dev/null +++ b/apps/sim/lib/core/utils/stream-limits.ts @@ -0,0 +1,283 @@ +import { toError } from '@sim/utils/errors' + +export const DEFAULT_MAX_ERROR_BODY_BYTES = 64 * 1024 + +export interface PayloadSizeLimitContext { + label: string + maxBytes: number + observedBytes?: number +} + +export class PayloadSizeLimitError extends Error { + readonly label: string + readonly maxBytes: number + readonly observedBytes?: number + + constructor({ label, maxBytes, observedBytes }: PayloadSizeLimitContext) { + super( + observedBytes === undefined + ? `${label} exceeds maximum size of ${maxBytes} bytes` + : `${label} exceeds maximum size of ${maxBytes} bytes (${observedBytes} bytes received)` + ) + this.name = 'PayloadSizeLimitError' + this.label = label + this.maxBytes = maxBytes + this.observedBytes = observedBytes + } +} + +export function isPayloadSizeLimitError(error: unknown): error is PayloadSizeLimitError { + return error instanceof PayloadSizeLimitError +} + +export function assertKnownSizeWithinLimit(size: number, maxBytes: number, label: string): void { + if (Number.isFinite(size) && size > maxBytes) { + throw new PayloadSizeLimitError({ label, maxBytes, observedBytes: size }) + } +} + +function getContentLength( + headers: { get(name: string): string | null } | undefined +): number | null { + const rawLength = headers?.get('content-length') + if (!rawLength) return null + const parsed = Number.parseInt(rawLength, 10) + return Number.isFinite(parsed) && parsed >= 0 ? parsed : null +} + +export function assertContentLengthWithinLimit( + headers: { get(name: string): string | null } | undefined, + maxBytes: number, + label: string +): void { + const contentLength = getContentLength(headers) + if (contentLength !== null) { + assertKnownSizeWithinLimit(contentLength, maxBytes, label) + } +} + +export interface ReadStreamWithLimitOptions { + maxBytes: number + label: string + signal?: AbortSignal + onChunk?: (chunk: Uint8Array, totalBytes: number) => void | Promise +} + +export async function readStreamToBufferWithLimit( + stream: ReadableStream | null | undefined, + options: ReadStreamWithLimitOptions +): Promise { + if (!stream) return Buffer.alloc(0) + + const reader = stream.getReader() + const chunks: Buffer[] = [] + let totalBytes = 0 + + try { + while (true) { + if (options.signal?.aborted) { + await reader.cancel(options.signal.reason).catch(() => {}) + throw toError(options.signal.reason ?? new Error('Aborted')) + } + + const { done, value } = await reader.read() + if (done) break + if (!value) continue + + totalBytes += value.byteLength + if (totalBytes > options.maxBytes) { + await reader.cancel().catch(() => {}) + throw new PayloadSizeLimitError({ + label: options.label, + maxBytes: options.maxBytes, + observedBytes: totalBytes, + }) + } + + await options.onChunk?.(value, totalBytes) + chunks.push(Buffer.from(value)) + } + } finally { + reader.releaseLock() + } + + return Buffer.concat(chunks, totalBytes) +} + +export async function readNodeStreamToBufferWithLimit( + stream: NodeJS.ReadableStream | null | undefined, + options: ReadStreamWithLimitOptions +): Promise { + if (!stream) return Buffer.alloc(0) + + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + let totalBytes = 0 + let settled = false + + const finish = (callback: () => void) => { + if (settled) return + settled = true + cleanup() + callback() + } + + const cleanup = () => { + stream.off('data', onData) + stream.off('end', onEnd) + stream.off('error', onError) + options.signal?.removeEventListener('abort', onAbort) + } + + const onAbort = () => { + if ('destroy' in stream && typeof stream.destroy === 'function') { + stream.destroy(toError(options.signal?.reason ?? new Error('Aborted'))) + } + finish(() => reject(toError(options.signal?.reason ?? new Error('Aborted')))) + } + + const onData = (chunk: Buffer | Uint8Array | string) => { + const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk) + totalBytes += buffer.byteLength + + if (totalBytes > options.maxBytes) { + if ('destroy' in stream && typeof stream.destroy === 'function') { + stream.destroy() + } + finish(() => + reject( + new PayloadSizeLimitError({ + label: options.label, + maxBytes: options.maxBytes, + observedBytes: totalBytes, + }) + ) + ) + return + } + + void options.onChunk?.(buffer, totalBytes) + chunks.push(buffer) + } + + const onEnd = () => { + finish(() => resolve(Buffer.concat(chunks, totalBytes))) + } + + const onError = (error: unknown) => { + finish(() => reject(error)) + } + + if (options.signal?.aborted) { + onAbort() + return + } + + options.signal?.addEventListener('abort', onAbort, { once: true }) + stream.on('data', onData) + stream.on('end', onEnd) + stream.on('error', onError) + }) +} + +export interface ReadResponseWithLimitOptions extends ReadStreamWithLimitOptions { + headers?: { get(name: string): string | null } + preferTextFallback?: boolean +} + +export async function readResponseToBufferWithLimit( + response: { + headers?: { get(name: string): string | null } + body?: ReadableStream | null + arrayBuffer?: () => Promise + text?: () => Promise + }, + options: ReadResponseWithLimitOptions +): Promise { + try { + assertContentLengthWithinLimit( + response.headers ?? options.headers, + options.maxBytes, + options.label + ) + } catch (error) { + if (isPayloadSizeLimitError(error)) { + await response.body?.cancel(error).catch(() => {}) + } + throw error + } + if (!response.body && options.preferTextFallback && response.text) { + const text = await response.text() + const buffer = Buffer.from(text) + assertKnownSizeWithinLimit(buffer.byteLength, options.maxBytes, options.label) + return buffer + } + if (!response.body && response.arrayBuffer) { + const buffer = Buffer.from(await response.arrayBuffer()) + assertKnownSizeWithinLimit(buffer.byteLength, options.maxBytes, options.label) + if (buffer.byteLength > 0 || !response.text) { + return buffer + } + const text = await response.text() + const textBuffer = Buffer.from(text) + assertKnownSizeWithinLimit(textBuffer.byteLength, options.maxBytes, options.label) + return textBuffer + } + if (!response.body && response.text) { + const text = await response.text() + const buffer = Buffer.from(text) + assertKnownSizeWithinLimit(buffer.byteLength, options.maxBytes, options.label) + return buffer + } + return readStreamToBufferWithLimit(response.body, options) +} + +export async function readResponseTextWithLimit( + response: { + headers?: { get(name: string): string | null } + body?: ReadableStream | null + arrayBuffer?: () => Promise + text?: () => Promise + }, + options: ReadResponseWithLimitOptions +): Promise { + return ( + await readResponseToBufferWithLimit(response, { ...options, preferTextFallback: true }) + ).toString('utf-8') +} + +export async function readResponseJsonWithLimit( + response: { + headers?: { get(name: string): string | null } + body?: ReadableStream | null + }, + options: ReadResponseWithLimitOptions +): Promise { + return JSON.parse(await readResponseTextWithLimit(response, options)) as T +} + +export async function readFileToBufferWithLimit( + file: File, + options: { maxBytes: number; label: string } +): Promise { + assertKnownSizeWithinLimit(file.size, options.maxBytes, options.label) + const buffer = Buffer.from(await file.arrayBuffer()) + assertKnownSizeWithinLimit(buffer.byteLength, options.maxBytes, options.label) + return buffer +} + +export async function consumeOrCancelBody( + response: { body?: ReadableStream | null }, + maxBytes = DEFAULT_MAX_ERROR_BODY_BYTES +): Promise { + if (!response.body) return + + try { + await readStreamToBufferWithLimit(response.body, { + maxBytes, + label: 'response body', + }) + } catch { + await response.body.cancel().catch(() => {}) + } +} diff --git a/apps/sim/lib/execution/payloads/materialization.server.ts b/apps/sim/lib/execution/payloads/materialization.server.ts index 0a7a8e38572..58093bed715 100644 --- a/apps/sim/lib/execution/payloads/materialization.server.ts +++ b/apps/sim/lib/execution/payloads/materialization.server.ts @@ -1,5 +1,6 @@ import { createLogger, type Logger } from '@sim/logger' import { toError } from '@sim/utils/errors' +import { isPayloadSizeLimitError } from '@/lib/core/utils/stream-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' import { getLargeValueMaterializationError, @@ -132,22 +133,31 @@ export async function readLargeValueRefFromStorage( assertLargeValueRefAccess(ref, options) assertInlineMaterializationSize(ref.size, options.maxBytes) + const maxBytes = options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES try { const { StorageService } = await import('@/lib/uploads') const buffer = await StorageService.downloadFile({ key: ref.key, context: 'execution', + maxBytes, }) - if (buffer.length > (options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES)) { + if (buffer.length > maxBytes) { throw new ExecutionResourceLimitError({ resource: 'execution_payload_bytes', attemptedBytes: buffer.length, - limitBytes: options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES, + limitBytes: maxBytes, }) } return JSON.parse(buffer.toString('utf8')) } catch (error) { + if (isPayloadSizeLimitError(error)) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: error.observedBytes ?? maxBytes + 1, + limitBytes: maxBytes, + }) + } if (error instanceof ExecutionResourceLimitError) { throw error } @@ -280,7 +290,18 @@ export async function readUserFileContent( const log = getLogger(options) const requestId = options.requestId ?? 'unknown' - buffer = await downloadFileFromStorage(file, requestId, log) + try { + buffer = await downloadFileFromStorage(file, requestId, log, { maxBytes: maxSourceBytes }) + } catch (error) { + if (isPayloadSizeLimitError(error)) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: error.observedBytes ?? maxSourceBytes + 1, + limitBytes: maxSourceBytes, + }) + } + throw error + } if (!buffer) { throw new Error(`File content for ${file.name} is unavailable.`) diff --git a/apps/sim/lib/execution/payloads/store.test.ts b/apps/sim/lib/execution/payloads/store.test.ts index 089f8284f07..8b16cb4caa5 100644 --- a/apps/sim/lib/execution/payloads/store.test.ts +++ b/apps/sim/lib/execution/payloads/store.test.ts @@ -2,6 +2,7 @@ * @vitest-environment node */ import { beforeEach, describe, expect, it, vi } from 'vitest' +import { PayloadSizeLimitError } from '@/lib/core/utils/stream-limits' import { cacheLargeValue, clearLargeValueCacheForTests, @@ -429,6 +430,82 @@ describe('large execution payload store', () => { ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) }) + it('passes source byte limits into execution file storage downloads', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello', 'utf8')) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 5, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxSourceBytes: 6, + } + ) + ).resolves.toBe('hello') + + expect(mockDownloadFile).toHaveBeenCalledWith( + expect.objectContaining({ + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + maxBytes: 6, + }) + ) + }) + + it('converts storage byte-limit failures into execution resource-limit errors', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockRejectedValueOnce( + new PayloadSizeLimitError({ + label: 'storage file download', + maxBytes: 6, + observedBytes: 7, + }) + ) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 5, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxSourceBytes: 6, + } + ) + ).rejects.toMatchObject({ + code: EXECUTION_RESOURCE_LIMIT_CODE, + attemptedBytes: 7, + limitBytes: 6, + }) + }) + it('allows explicit chunked file reads to slice within the inline cap', async () => { const workspaceId = '11111111-1111-4111-8111-111111111111' const workflowId = '22222222-2222-4222-8222-222222222222' diff --git a/apps/sim/lib/logs/execution/logger.test.ts b/apps/sim/lib/logs/execution/logger.test.ts index 4f4bb7ff739..645168c2328 100644 --- a/apps/sim/lib/logs/execution/logger.test.ts +++ b/apps/sim/lib/logs/execution/logger.test.ts @@ -168,6 +168,97 @@ describe('ExecutionLogger', () => { expect(completedData.hasTraceSpans).toBe(false) expect(completedData.traceSpanCount).toBe(0) }) + + test('summarizes oversized execution data before storage', () => { + const loggerInstance = new ExecutionLogger() as any + const largePayload = 'x'.repeat(220_000) + const executionState = { + blockStates: { + blockA: { + output: { data: largePayload }, + executed: true, + executionTime: 10, + }, + }, + executedBlocks: ['blockA'], + blockLogs: [ + { + blockId: 'blockA', + blockName: 'HTTP', + blockType: 'api', + startedAt: '2025-01-01T00:00:00.000Z', + endedAt: '2025-01-01T00:00:01.000Z', + durationMs: 1000, + success: true, + executionOrder: 1, + input: { url: 'https://example.com/image.jpg', data: largePayload }, + output: { data: largePayload }, + }, + ], + decisions: { router: {}, condition: {} }, + completedLoops: [], + activeExecutionPath: [], + } + + const completedData = loggerInstance.buildCompletedExecutionData({ + traceSpans: [ + { + id: 'workflow-execution', + name: 'Workflow Execution', + type: 'workflow', + duration: 1000, + startTime: '2025-01-01T00:00:00.000Z', + endTime: '2025-01-01T00:00:01.000Z', + status: 'success', + children: [ + { + id: 'blockA-1', + name: 'HTTP', + type: 'api', + duration: 1000, + startTime: '2025-01-01T00:00:00.000Z', + endTime: '2025-01-01T00:00:01.000Z', + status: 'success', + blockId: 'blockA', + executionOrder: 1, + input: { url: 'https://example.com/image.jpg', data: largePayload }, + output: { data: largePayload }, + }, + ], + }, + ], + finalOutput: { data: largePayload }, + executionState, + finalizationPath: 'completed', + executionCost: { + tokens: { input: 0, output: 0, total: 0 }, + models: {}, + }, + }) + + const compacted = loggerInstance.compactExecutionDataForStorage( + completedData, + 'execution-oversized' + ) + const storedBytes = Buffer.byteLength(JSON.stringify(compacted), 'utf8') + + expect(storedBytes).toBeLessThanOrEqual(500 * 1024) + expect(compacted.executionDataTruncated).toBe(true) + expect(compacted.executionState).toBeUndefined() + expect(compacted.executionStateSummary).toEqual({ + executedBlockCount: 1, + blockLogCount: 1, + completedLoopCount: 0, + activeExecutionPathLength: 0, + pendingQueueLength: 0, + }) + expect(compacted.traceSpans?.[0]?.children?.[0]?.input).toEqual({ + _truncated: true, + reason: 'execution_data_size_limit', + originalBytes: expect.any(Number), + summary: 'object with 2 keys', + }) + }) }) describe('file extraction', () => { diff --git a/apps/sim/lib/logs/execution/logger.ts b/apps/sim/lib/logs/execution/logger.ts index 07b7af219bb..1027aed046d 100644 --- a/apps/sim/lib/logs/execution/logger.ts +++ b/apps/sim/lib/logs/execution/logger.ts @@ -48,6 +48,160 @@ const TRIGGER_COUNTER_MAP: Record = { } as const const logger = createLogger('ExecutionLogger') +const MAX_EXECUTION_DATA_BYTES = 500 * 1024 +const MAX_TRACE_IO_BYTES = 8 * 1024 +const MAX_WORKFLOW_VALUE_BYTES = 64 * 1024 +const EXECUTION_LOG_STATEMENT_TIMEOUT_MS = 30_000 +const EXECUTION_LOG_LOCK_TIMEOUT_MS = 3_000 +const EXECUTION_LOG_IDLE_TIMEOUT_MS = 5_000 + +type ExecutionData = WorkflowExecutionLog['executionData'] + +function getJsonByteSize(value: unknown): number | undefined { + try { + const json = JSON.stringify(value) + return json === undefined ? undefined : Buffer.byteLength(json, 'utf8') + } catch { + return undefined + } +} + +function describeValue(value: unknown): string { + if (value === null) return 'null' + if (value === undefined) return 'undefined' + if (Array.isArray(value)) return `array with ${value.length} items` + if (typeof value === 'string') return `string with ${value.length} characters` + if (typeof value === 'object') return `object with ${Object.keys(value).length} keys` + return typeof value +} + +function summarizeValueForExecutionData(value: unknown, maxBytes: number): unknown { + const size = getJsonByteSize(value) + if (size === undefined || size <= maxBytes) { + return value + } + + return { + _truncated: true, + reason: 'execution_data_size_limit', + originalBytes: size, + summary: describeValue(value), + } +} + +function summarizeTextForExecutionData(value: string | undefined): string | undefined { + if (!value) return value + const size = getJsonByteSize(value) + if (size === undefined || size <= MAX_TRACE_IO_BYTES) { + return value + } + return `[Truncated ${size} byte text value due to execution log size limit]` +} + +function summarizeTraceSpansForExecutionData(traceSpans?: TraceSpan[]): TraceSpan[] | undefined { + if (!traceSpans) { + return traceSpans + } + + return traceSpans.map((span) => { + const { input, output, children, thinking, modelToolCalls, ...rest } = span + const summarized: TraceSpan = { ...rest } + + if (input !== undefined) { + summarized.input = summarizeValueForExecutionData(input, MAX_TRACE_IO_BYTES) as Record< + string, + unknown + > + } + if (output !== undefined) { + summarized.output = summarizeValueForExecutionData(output, MAX_TRACE_IO_BYTES) as Record< + string, + unknown + > + } + if (children?.length) { + summarized.children = summarizeTraceSpansForExecutionData(children) + } + if (thinking !== undefined) { + summarized.thinking = summarizeTextForExecutionData(thinking) + } + if ( + modelToolCalls !== undefined && + (getJsonByteSize(modelToolCalls) ?? 0) <= MAX_TRACE_IO_BYTES + ) { + summarized.modelToolCalls = modelToolCalls + } + + return summarized + }) +} + +function summarizeTraceSpansWithoutIo(traceSpans?: TraceSpan[]): TraceSpan[] | undefined { + if (!traceSpans) { + return traceSpans + } + + return traceSpans.map((span) => { + const { + input: _input, + output: _output, + children, + thinking: _thinking, + modelToolCalls: _modelToolCalls, + ...rest + } = span + return { + ...rest, + ...(children?.length ? { children: summarizeTraceSpansWithoutIo(children) } : {}), + } + }) +} + +function summarizeExecutionState(executionState?: SerializableExecutionState) { + if (!executionState) { + return undefined + } + + return { + executedBlockCount: executionState.executedBlocks.length, + blockLogCount: executionState.blockLogs.length, + completedLoopCount: executionState.completedLoops.length, + activeExecutionPathLength: executionState.activeExecutionPath.length, + pendingQueueLength: executionState.pendingQueue?.length ?? 0, + } +} + +function recordStoredByteSize(executionData: ExecutionData): { + executionData: ExecutionData + storedBytes?: number +} { + const firstBytes = getJsonByteSize(executionData) + if (firstBytes === undefined) { + return { executionData } + } + + const withFirstSize = { ...executionData, executionDataStoredBytes: firstBytes } + const secondBytes = getJsonByteSize(withFirstSize) + if (secondBytes === undefined || secondBytes === firstBytes) { + return { executionData: withFirstSize, storedBytes: secondBytes ?? firstBytes } + } + + const withSecondSize = { ...executionData, executionDataStoredBytes: secondBytes } + return { + executionData: withSecondSize, + storedBytes: getJsonByteSize(withSecondSize) ?? secondBytes, + } +} + +async function setExecutionLogWriteTimeouts(trx: Pick): Promise { + await trx.execute( + sql.raw(`SET LOCAL statement_timeout = '${EXECUTION_LOG_STATEMENT_TIMEOUT_MS}ms'`) + ) + await trx.execute(sql.raw(`SET LOCAL lock_timeout = '${EXECUTION_LOG_LOCK_TIMEOUT_MS}ms'`)) + await trx.execute( + sql.raw(`SET LOCAL idle_in_transaction_session_timeout = '${EXECUTION_LOG_IDLE_TIMEOUT_MS}ms'`) + ) +} function countTraceSpans(traceSpans?: TraceSpan[]): number { if (!Array.isArray(traceSpans) || traceSpans.length === 0) { @@ -58,6 +212,133 @@ function countTraceSpans(traceSpans?: TraceSpan[]): number { } export class ExecutionLogger implements IExecutionLoggerService { + private compactExecutionDataForStorage( + executionData: ExecutionData, + executionId: string + ): ExecutionData { + const originalBytes = getJsonByteSize(executionData) + if (originalBytes === undefined || originalBytes <= MAX_EXECUTION_DATA_BYTES) { + return executionData + } + + const { executionState: _executionState, ...executionDataWithoutState } = executionData + const summarized: ExecutionData = { + ...executionDataWithoutState, + traceSpans: summarizeTraceSpansForExecutionData(executionData.traceSpans), + finalOutput: summarizeValueForExecutionData( + executionData.finalOutput, + MAX_WORKFLOW_VALUE_BYTES + ) as BlockOutputData, + executionDataTruncated: true, + executionDataOriginalBytes: originalBytes, + executionDataMaxBytes: MAX_EXECUTION_DATA_BYTES, + executionDataTruncationReason: + 'Execution log exceeded the maximum stored payload size, so large inputs and outputs were summarized.', + } + + if (executionData.workflowInput !== undefined) { + summarized.workflowInput = summarizeValueForExecutionData( + executionData.workflowInput, + MAX_WORKFLOW_VALUE_BYTES + ) + } + + if (executionData.executionState) { + summarized.executionStateSummary = summarizeExecutionState(executionData.executionState) + } + + const summarizedWithSize = recordStoredByteSize(summarized) + if ( + summarizedWithSize.storedBytes !== undefined && + summarizedWithSize.storedBytes <= MAX_EXECUTION_DATA_BYTES + ) { + logger.warn('Summarized oversized workflow execution data before storing log', { + executionId, + originalBytes, + storedBytes: summarizedWithSize.storedBytes, + maxBytes: MAX_EXECUTION_DATA_BYTES, + }) + return summarizedWithSize.executionData + } + + const minimal: ExecutionData = { + ...(executionData.environment ? { environment: executionData.environment } : {}), + ...(executionData.trigger ? { trigger: executionData.trigger } : {}), + ...(executionData.correlation ? { correlation: executionData.correlation } : {}), + ...(executionData.error ? { error: executionData.error } : {}), + ...(executionData.lastStartedBlock + ? { lastStartedBlock: executionData.lastStartedBlock } + : {}), + ...(executionData.lastCompletedBlock + ? { lastCompletedBlock: executionData.lastCompletedBlock } + : {}), + ...(executionData.completionFailure + ? { completionFailure: executionData.completionFailure } + : {}), + ...(executionData.finalizationPath + ? { finalizationPath: executionData.finalizationPath } + : {}), + hasTraceSpans: executionData.hasTraceSpans, + traceSpanCount: executionData.traceSpanCount, + traceSpans: summarizeTraceSpansWithoutIo(executionData.traceSpans), + finalOutput: summarizeValueForExecutionData(executionData.finalOutput, MAX_TRACE_IO_BYTES) as + | BlockOutputData + | undefined, + tokens: executionData.tokens, + models: executionData.models, + executionStateSummary: summarizeExecutionState(executionData.executionState), + executionDataTruncated: true, + executionDataOriginalBytes: originalBytes, + executionDataMaxBytes: MAX_EXECUTION_DATA_BYTES, + executionDataTruncationReason: + 'Execution log exceeded the maximum stored payload size after summarization, so trace payload details were omitted.', + } + + const minimalWithSize = recordStoredByteSize(minimal) + + if ( + minimalWithSize.storedBytes !== undefined && + minimalWithSize.storedBytes > MAX_EXECUTION_DATA_BYTES + ) { + const metadataOnly: ExecutionData = { + hasTraceSpans: executionData.hasTraceSpans, + traceSpanCount: executionData.traceSpanCount, + tokens: executionData.tokens, + models: executionData.models, + executionDataTruncated: true, + executionDataOriginalBytes: originalBytes, + executionDataMaxBytes: MAX_EXECUTION_DATA_BYTES, + executionDataTruncationReason: + 'Execution log exceeded the maximum stored payload size after minimal summarization, so only execution metadata was stored.', + } + + const metadataOnlyWithSize = recordStoredByteSize(metadataOnly) + logger.warn( + 'Stored metadata-only workflow execution data after oversized log summarization', + { + executionId, + originalBytes, + storedBytes: metadataOnlyWithSize.storedBytes, + minimalBytes: minimalWithSize.storedBytes, + summarizedBytes: summarizedWithSize.storedBytes, + maxBytes: MAX_EXECUTION_DATA_BYTES, + } + ) + + return metadataOnlyWithSize.executionData + } + + logger.warn('Stored minimal workflow execution data after oversized log summarization', { + executionId, + originalBytes, + storedBytes: minimalWithSize.storedBytes, + summarizedBytes: summarizedWithSize.storedBytes, + maxBytes: MAX_EXECUTION_DATA_BYTES, + }) + + return minimalWithSize.executionData + } + private buildCompletedExecutionData(params: { existingExecutionData?: WorkflowExecutionLog['executionData'] traceSpans?: TraceSpan[] @@ -336,8 +617,12 @@ export class ExecutionLogger implements IExecutionLoggerService { const filteredTraceSpans = filterForDisplay(mergedTraceSpans) const filteredFinalOutput = filterForDisplay(finalOutput) + const filteredWorkflowInput = + workflowInput !== undefined ? filterForDisplay(workflowInput) : undefined const redactedTraceSpans = redactApiKeys(filteredTraceSpans) const redactedFinalOutput = redactApiKeys(filteredFinalOutput) + const redactedWorkflowInput = + filteredWorkflowInput !== undefined ? redactApiKeys(filteredWorkflowInput) : undefined const executionCost = { total: costSummary.totalCost, @@ -360,30 +645,37 @@ export class ExecutionLogger implements IExecutionLoggerService { ? Math.max(0, Math.round(rawDurationMs)) : 0 - const completedExecutionData = this.buildCompletedExecutionData({ - existingExecutionData, - traceSpans: redactedTraceSpans, - finalOutput: redactedFinalOutput, - finalizationPath, - completionFailure, - executionCost, - executionState, - workflowInput, - }) + const completedExecutionData = this.compactExecutionDataForStorage( + this.buildCompletedExecutionData({ + existingExecutionData, + traceSpans: redactedTraceSpans, + finalOutput: redactedFinalOutput, + finalizationPath, + completionFailure, + executionCost, + executionState, + workflowInput: redactedWorkflowInput, + }), + executionId + ) - const [updatedLog] = await db - .update(workflowExecutionLogs) - .set({ - level, - status, - endedAt: new Date(endedAt), - totalDurationMs: totalDuration, - files: executionFiles.length > 0 ? executionFiles : null, - executionData: completedExecutionData, - cost: executionCost, - }) - .where(eq(workflowExecutionLogs.executionId, executionId)) - .returning() + const [updatedLog] = await db.transaction(async (tx) => { + await setExecutionLogWriteTimeouts(tx) + + return tx + .update(workflowExecutionLogs) + .set({ + level, + status, + endedAt: new Date(endedAt), + totalDurationMs: totalDuration, + files: executionFiles.length > 0 ? executionFiles : null, + executionData: completedExecutionData, + cost: executionCost, + }) + .where(eq(workflowExecutionLogs.executionId, executionId)) + .returning() + }) if (!updatedLog) { throw new Error(`Workflow log not found for execution ${executionId}`) diff --git a/apps/sim/lib/logs/types.ts b/apps/sim/lib/logs/types.ts index 4064b302e3e..f4021439a35 100644 --- a/apps/sim/lib/logs/types.ts +++ b/apps/sim/lib/logs/types.ts @@ -152,6 +152,18 @@ export interface WorkflowExecutionLog { } > executionState?: SerializableExecutionState + executionStateSummary?: { + executedBlockCount: number + blockLogCount: number + completedLoopCount: number + activeExecutionPathLength: number + pendingQueueLength: number + } + executionDataTruncated?: boolean + executionDataOriginalBytes?: number + executionDataStoredBytes?: number + executionDataMaxBytes?: number + executionDataTruncationReason?: string finalOutput?: any workflowInput?: unknown errorDetails?: { diff --git a/apps/sim/lib/table/import.ts b/apps/sim/lib/table/import.ts index 117f4a62310..23566c145d5 100644 --- a/apps/sim/lib/table/import.ts +++ b/apps/sim/lib/table/import.ts @@ -20,8 +20,8 @@ export const CSV_SCHEMA_SAMPLE_SIZE = 100 /** Maximum rows inserted per `batchInsertRows` call during import. */ export const CSV_MAX_BATCH_SIZE = 1000 -/** Maximum CSV/TSV file size accepted by import routes (50 MB). */ -export const CSV_MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024 +/** Maximum CSV/TSV file size accepted by import routes (25 MB). */ +export const CSV_MAX_FILE_SIZE_BYTES = 25 * 1024 * 1024 /** * Error thrown when the user-supplied mapping or CSV does not line up with the diff --git a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts index 1fb9c435099..59ea7c04b64 100644 --- a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' import { getErrorMessage } from '@sim/utils/errors' +import { isPayloadSizeLimitError } from '@/lib/core/utils/stream-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' import { StorageService } from '@/lib/uploads' import type { ExecutionContext } from '@/lib/uploads/contexts/execution/utils' @@ -130,13 +131,17 @@ export async function uploadExecutionFile( /** * Download a file from execution-scoped storage */ -export async function downloadExecutionFile(userFile: UserFile): Promise { +export async function downloadExecutionFile( + userFile: UserFile, + options: { maxBytes?: number } = {} +): Promise { logger.info(`Downloading execution file: ${userFile.name}`) try { const fileBuffer = await StorageService.downloadFile({ key: userFile.key, context: 'execution', + ...(options.maxBytes === undefined ? {} : { maxBytes: options.maxBytes }), }) logger.info( @@ -144,6 +149,9 @@ export async function downloadExecutionFile(userFile: UserFile): Promise ) return fileBuffer } catch (error) { + if (isPayloadSizeLimitError(error)) { + throw error + } logger.error(`Failed to download execution file ${userFile.name}:`, error) throw new Error(`Failed to download file: ${getErrorMessage(error, 'Unknown error')}`) } diff --git a/apps/sim/lib/uploads/core/storage-service.ts b/apps/sim/lib/uploads/core/storage-service.ts index 7a5ba092f29..f730d49beae 100644 --- a/apps/sim/lib/uploads/core/storage-service.ts +++ b/apps/sim/lib/uploads/core/storage-service.ts @@ -1,6 +1,7 @@ import { randomBytes } from 'crypto' import { createLogger } from '@sim/logger' import { getErrorMessage } from '@sim/utils/errors' +import { assertKnownSizeWithinLimit } from '@/lib/core/utils/stream-limits' import { getStorageConfig, USE_BLOB_STORAGE, USE_S3_STORAGE } from '@/lib/uploads/config' import type { BlobConfig } from '@/lib/uploads/providers/blob/types' import type { S3Config } from '@/lib/uploads/providers/s3/types' @@ -184,29 +185,40 @@ export async function uploadFile(options: UploadFileOptions): Promise * Download a file from the configured storage provider */ export async function downloadFile(options: DownloadFileOptions): Promise { - const { key, context } = options + const { key, context, maxBytes } = options if (context) { const config = getStorageConfig(context) if (USE_BLOB_STORAGE) { const { downloadFromBlob } = await import('@/lib/uploads/providers/blob/client') - return downloadFromBlob(key, createBlobConfig(config)) + const blobConfig = createBlobConfig(config) + return maxBytes === undefined + ? downloadFromBlob(key, blobConfig) + : downloadFromBlob(key, blobConfig, maxBytes) } if (USE_S3_STORAGE) { const { downloadFromS3 } = await import('@/lib/uploads/providers/s3/client') - return downloadFromS3(key, createS3Config(config)) + const s3Config = createS3Config(config) + return maxBytes === undefined + ? downloadFromS3(key, s3Config) + : downloadFromS3(key, s3Config, maxBytes) } } - const { readFile } = await import('fs/promises') + const { readFile, stat } = await import('fs/promises') const { join } = await import('path') const { UPLOAD_DIR_SERVER } = await import('./setup.server') const safeKey = sanitizeFileKey(key) const filePath = join(UPLOAD_DIR_SERVER, safeKey) + if (maxBytes !== undefined) { + const fileStats = await stat(filePath) + assertKnownSizeWithinLimit(fileStats.size, maxBytes, 'storage download') + } + return readFile(filePath) } diff --git a/apps/sim/lib/uploads/providers/blob/client.test.ts b/apps/sim/lib/uploads/providers/blob/client.test.ts index 484dce6f2aa..7e15a7095c3 100644 --- a/apps/sim/lib/uploads/providers/blob/client.test.ts +++ b/apps/sim/lib/uploads/providers/blob/client.test.ts @@ -144,6 +144,7 @@ describe('Azure Blob Storage Client', () => { callback() } }), + off: vi.fn(() => mockReadableStream), } mockDownload.mockResolvedValueOnce({ @@ -156,6 +157,24 @@ describe('Azure Blob Storage Client', () => { expect(mockDownload).toHaveBeenCalled() expect(result).toEqual(testContent) }) + + it('should destroy the opened stream when content length exceeds the limit', async () => { + const mockDestroy = vi.fn() + const mockReadableStream = { + destroy: mockDestroy, + on: vi.fn(() => mockReadableStream), + } + + mockDownload.mockResolvedValueOnce({ + readableStreamBody: mockReadableStream, + contentLength: 1024, + }) + + await expect(downloadFromBlob('large-file-key', undefined, 10)).rejects.toThrow( + 'storage download exceeds maximum size' + ) + expect(mockDestroy).toHaveBeenCalledWith(expect.any(Error)) + }) }) describe('deleteFromBlob', () => { diff --git a/apps/sim/lib/uploads/providers/blob/client.ts b/apps/sim/lib/uploads/providers/blob/client.ts index ee1c0505661..e329799eed5 100644 --- a/apps/sim/lib/uploads/providers/blob/client.ts +++ b/apps/sim/lib/uploads/providers/blob/client.ts @@ -1,6 +1,10 @@ import type { BlobServiceClient as BlobServiceClientType } from '@azure/storage-blob' import { createLogger } from '@sim/logger' import { generateId } from '@sim/utils/id' +import { + assertKnownSizeWithinLimit, + readNodeStreamToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { BLOB_CONFIG } from '@/lib/uploads/config' import type { AzureMultipartPart, @@ -267,7 +271,17 @@ export async function downloadFromBlob(key: string): Promise */ export async function downloadFromBlob(key: string, customConfig: BlobConfig): Promise -export async function downloadFromBlob(key: string, customConfig?: BlobConfig): Promise { +export async function downloadFromBlob( + key: string, + customConfig: BlobConfig, + maxBytes: number +): Promise + +export async function downloadFromBlob( + key: string, + customConfig?: BlobConfig, + maxBytes?: number +): Promise { const { BlobServiceClient, StorageSharedKeyCredential } = await import('@azure/storage-blob') let blobServiceClient: BlobServiceClientType let containerName: string @@ -297,10 +311,32 @@ export async function downloadFromBlob(key: string, customConfig?: BlobConfig): const blockBlobClient = containerClient.getBlockBlobClient(key) const downloadBlockBlobResponse = await blockBlobClient.download() + if (maxBytes !== undefined && downloadBlockBlobResponse.contentLength !== undefined) { + try { + assertKnownSizeWithinLimit( + downloadBlockBlobResponse.contentLength, + maxBytes, + 'storage download' + ) + } catch (error) { + const stream = downloadBlockBlobResponse.readableStreamBody as + | { destroy?: (error?: Error) => void } + | undefined + stream?.destroy?.(error instanceof Error ? error : undefined) + throw error + } + } + if (!downloadBlockBlobResponse.readableStreamBody) { throw new Error('Failed to get readable stream from blob download') } - const downloaded = await streamToBuffer(downloadBlockBlobResponse.readableStreamBody) + const downloaded = await readNodeStreamToBufferWithLimit( + downloadBlockBlobResponse.readableStreamBody, + { + maxBytes: maxBytes ?? Number.MAX_SAFE_INTEGER, + label: 'storage download', + } + ) return downloaded } @@ -402,22 +438,6 @@ export async function deleteFromBlob(key: string, customConfig?: BlobConfig): Pr await blockBlobClient.delete() } -/** - * Helper function to convert a readable stream to a Buffer - */ -async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise { - return new Promise((resolve, reject) => { - const chunks: Buffer[] = [] - readableStream.on('data', (data) => { - chunks.push(data instanceof Buffer ? data : Buffer.from(data)) - }) - readableStream.on('end', () => { - resolve(Buffer.concat(chunks)) - }) - readableStream.on('error', reject) - }) -} - /** * Derive the deterministic Azure block id for a given part number. * Block ids must be base64-encoded and equal length within an upload; using a diff --git a/apps/sim/lib/uploads/providers/s3/client.test.ts b/apps/sim/lib/uploads/providers/s3/client.test.ts index d30ab5b87e3..ff780b55864 100644 --- a/apps/sim/lib/uploads/providers/s3/client.test.ts +++ b/apps/sim/lib/uploads/providers/s3/client.test.ts @@ -53,9 +53,7 @@ vi.mock('@/lib/core/config/env', () => ({ isTruthy: (value: string | boolean | number | undefined) => typeof value === 'string' ? value.toLowerCase() === 'true' || value === '1' : Boolean(value), isFalsy: (value: string | boolean | number | undefined) => - typeof value === 'string' - ? value.toLowerCase() === 'false' || value === '0' - : value === false, + typeof value === 'string' ? value.toLowerCase() === 'false' || value === '0' : value === false, })) vi.mock('@/lib/uploads/setup', () => ({ @@ -228,6 +226,7 @@ describe('S3 Client', () => { } return mockStream }), + off: vi.fn(() => mockStream), } mockSend.mockResolvedValueOnce({ @@ -257,6 +256,7 @@ describe('S3 Client', () => { } return mockStream }), + off: vi.fn(() => mockStream), } mockSend.mockResolvedValueOnce({ @@ -269,6 +269,25 @@ describe('S3 Client', () => { await expect(downloadFromS3(key)).rejects.toThrow('Stream error') }) + it('should destroy the opened stream when content length exceeds the limit', async () => { + const mockDestroy = vi.fn() + const mockStream = { + destroy: mockDestroy, + on: vi.fn(() => mockStream), + } + + mockSend.mockResolvedValueOnce({ + Body: mockStream, + ContentLength: 1024, + $metadata: { httpStatusCode: 200 }, + }) + + await expect( + downloadFromS3('large-file.txt', { bucket: 'test-bucket', region: 'test-region' }, 10) + ).rejects.toThrow('storage download exceeds maximum size') + expect(mockDestroy).toHaveBeenCalledWith(expect.any(Error)) + }) + it('should handle S3 client errors', async () => { const error = new Error('Download failed') mockSend.mockRejectedValueOnce(error) diff --git a/apps/sim/lib/uploads/providers/s3/client.ts b/apps/sim/lib/uploads/providers/s3/client.ts index f31bd21718d..fe939cb506f 100644 --- a/apps/sim/lib/uploads/providers/s3/client.ts +++ b/apps/sim/lib/uploads/providers/s3/client.ts @@ -14,6 +14,10 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import { getErrorMessage } from '@sim/utils/errors' import { generateId } from '@sim/utils/id' import { env } from '@/lib/core/config/env' +import { + assertKnownSizeWithinLimit, + readNodeStreamToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { S3_CONFIG, S3_KB_CONFIG } from '@/lib/uploads/config' import type { S3Config, @@ -181,7 +185,17 @@ export async function downloadFromS3(key: string): Promise */ export async function downloadFromS3(key: string, customConfig: S3Config): Promise -export async function downloadFromS3(key: string, customConfig?: S3Config): Promise { +export async function downloadFromS3( + key: string, + customConfig: S3Config, + maxBytes: number +): Promise + +export async function downloadFromS3( + key: string, + customConfig?: S3Config, + maxBytes?: number +): Promise { const config = customConfig || { bucket: S3_CONFIG.bucket, region: S3_CONFIG.region } const command = new GetObjectCommand({ @@ -190,13 +204,20 @@ export async function downloadFromS3(key: string, customConfig?: S3Config): Prom }) const response = await getS3Client().send(command) - const stream = response.Body as any + if (maxBytes !== undefined && response.ContentLength !== undefined) { + try { + assertKnownSizeWithinLimit(response.ContentLength, maxBytes, 'storage download') + } catch (error) { + const body = response.Body as { destroy?: (error?: Error) => void } | undefined + body?.destroy?.(error instanceof Error ? error : undefined) + throw error + } + } - return new Promise((resolve, reject) => { - const chunks: Buffer[] = [] - stream.on('data', (chunk: Buffer) => chunks.push(chunk)) - stream.on('end', () => resolve(Buffer.concat(chunks))) - stream.on('error', reject) + const stream = response.Body as NodeJS.ReadableStream + return readNodeStreamToBufferWithLimit(stream, { + maxBytes: maxBytes ?? Number.MAX_SAFE_INTEGER, + label: 'storage download', }) } diff --git a/apps/sim/lib/uploads/shared/types.ts b/apps/sim/lib/uploads/shared/types.ts index bba56ad348e..827c08b0f7a 100644 --- a/apps/sim/lib/uploads/shared/types.ts +++ b/apps/sim/lib/uploads/shared/types.ts @@ -70,6 +70,7 @@ export interface UploadFileOptions { export interface DownloadFileOptions { key: string context?: StorageContext + maxBytes?: number } export interface DeleteFileOptions { diff --git a/apps/sim/lib/uploads/utils/file-utils.server.ts b/apps/sim/lib/uploads/utils/file-utils.server.ts index 5d62fb4be7e..f0fb7a606a6 100644 --- a/apps/sim/lib/uploads/utils/file-utils.server.ts +++ b/apps/sim/lib/uploads/utils/file-utils.server.ts @@ -7,6 +7,11 @@ import { secureFetchWithPinnedIP, validateUrlWithDNS, } from '@/lib/core/security/input-validation.server' +import { + assertKnownSizeWithinLimit, + consumeOrCancelBody, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import type { StorageContext } from '@/lib/uploads' import { StorageService } from '@/lib/uploads' import { isExecutionFile } from '@/lib/uploads/contexts/execution/utils' @@ -139,14 +144,15 @@ export async function resolveFileInputToUrl( */ export async function downloadFileFromUrl( fileUrl: string, - timeoutMs = getMaxExecutionTimeout() + timeoutMs = getMaxExecutionTimeout(), + maxBytes?: number ): Promise { const { parseInternalFileUrl } = await import('./file-utils') if (isInternalFileUrl(fileUrl)) { const { key, context } = parseInternalFileUrl(fileUrl) const { downloadFile } = await import('@/lib/uploads/core/storage-service') - return downloadFile({ key, context }) + return downloadFile({ key, context, maxBytes }) } const urlValidation = await validateUrlWithDNS(fileUrl, 'fileUrl') @@ -156,13 +162,18 @@ export async function downloadFileFromUrl( const response = await secureFetchWithPinnedIP(fileUrl, urlValidation.resolvedIP!, { timeout: timeoutMs, + maxResponseBytes: maxBytes, }) if (!response.ok) { + await consumeOrCancelBody(response) throw new Error(`Failed to download file: ${response.statusText}`) } - return Buffer.from(await response.arrayBuffer()) + return readResponseToBufferWithLimit(response, { + maxBytes: maxBytes ?? Number.MAX_SAFE_INTEGER, + label: 'file download', + }) } export async function resolveInternalFileUrl( @@ -208,16 +219,20 @@ export async function resolveInternalFileUrl( export async function downloadFileFromStorage( userFile: UserFile, requestId: string, - logger: Logger + logger: Logger, + options: { maxBytes?: number } = {} ): Promise { let buffer: Buffer + if (options.maxBytes !== undefined && userFile.size > options.maxBytes) { + assertKnownSizeWithinLimit(userFile.size, options.maxBytes, 'storage file download') + } if (isExecutionFile(userFile)) { logger.info(`[${requestId}] Downloading from execution storage: ${userFile.key}`) const { downloadExecutionFile } = await import( '@/lib/uploads/contexts/execution/execution-file-manager' ) - buffer = await downloadExecutionFile(userFile) + buffer = await downloadExecutionFile(userFile, { maxBytes: options.maxBytes }) } else if (userFile.key) { const context = (userFile.context as StorageContext) || inferContextFromKey(userFile.key) logger.info( @@ -228,10 +243,15 @@ export async function downloadFileFromStorage( buffer = await downloadFile({ key: userFile.key, context, + maxBytes: options.maxBytes, }) } else { throw new Error('File has no key - cannot download') } + if (options.maxBytes !== undefined) { + assertKnownSizeWithinLimit(buffer.length, options.maxBytes, 'storage file download') + } + return buffer } diff --git a/apps/sim/tools/docusign/download_document.test.ts b/apps/sim/tools/docusign/download_document.test.ts new file mode 100644 index 00000000000..b4f315e5365 --- /dev/null +++ b/apps/sim/tools/docusign/download_document.test.ts @@ -0,0 +1,59 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { docusignDownloadDocumentTool } from '@/tools/docusign/download_document' + +describe('DocuSign download document tool', () => { + it('forwards execution context to the internal route', () => { + const body = docusignDownloadDocumentTool.request.body?.({ + accessToken: 'token', + envelopeId: 'envelope-1', + documentId: 'combined', + _context: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + }, + }) + + expect(body).toMatchObject({ + accessToken: 'token', + operation: 'download_document', + envelopeId: 'envelope-1', + documentId: 'combined', + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + }) + }) + + it('returns file outputs from execution-context downloads', async () => { + const file = { + id: 'file-1', + name: 'signed.pdf', + size: 128, + type: 'application/pdf', + url: '/api/files/serve/execution/file-1', + key: 'execution/workflow/file-1', + context: 'execution', + } + const response = new Response( + JSON.stringify({ + file, + mimeType: 'application/pdf', + fileName: 'signed.pdf', + }), + { status: 200, headers: { 'content-type': 'application/json' } } + ) + + const result = await docusignDownloadDocumentTool.transformResponse?.(response) + + expect(result?.output).toEqual({ + file, + mimeType: 'application/pdf', + fileName: 'signed.pdf', + }) + expect(result?.output.base64Content).toBeUndefined() + }) +}) diff --git a/apps/sim/tools/docusign/download_document.ts b/apps/sim/tools/docusign/download_document.ts index f6325051fbc..0a965b694d0 100644 --- a/apps/sim/tools/docusign/download_document.ts +++ b/apps/sim/tools/docusign/download_document.ts @@ -4,6 +4,19 @@ import type { } from '@/tools/docusign/types' import type { ToolConfig } from '@/tools/types' +function getExecutionContext(params: DocuSignDownloadDocumentParams): { + workspaceId?: string + workflowId?: string + executionId?: string +} { + const context = params._context + return { + workspaceId: typeof context?.workspaceId === 'string' ? context.workspaceId : undefined, + workflowId: typeof context?.workflowId === 'string' ? context.workflowId : undefined, + executionId: typeof context?.executionId === 'string' ? context.executionId : undefined, + } +} + export const docusignDownloadDocumentTool: ToolConfig< DocuSignDownloadDocumentParams, DocuSignDownloadDocumentResponse @@ -44,12 +57,16 @@ export const docusignDownloadDocumentTool: ToolConfig< url: '/api/tools/docusign', method: 'POST', headers: () => ({ 'Content-Type': 'application/json' }), - body: (params) => ({ - accessToken: params.accessToken, - operation: 'download_document', - envelopeId: params.envelopeId, - documentId: params.documentId, - }), + body: (params) => { + const context = getExecutionContext(params) + return { + accessToken: params.accessToken, + operation: 'download_document', + envelopeId: params.envelopeId, + documentId: params.documentId, + ...context, + } + }, }, transformResponse: async (response) => { @@ -60,7 +77,8 @@ export const docusignDownloadDocumentTool: ToolConfig< return { success: true, output: { - base64Content: data.base64Content ?? '', + ...(data.file ? { file: data.file } : {}), + ...(typeof data.base64Content === 'string' ? { base64Content: data.base64Content } : {}), mimeType: data.mimeType ?? 'application/pdf', fileName: data.fileName ?? 'document.pdf', }, @@ -68,7 +86,12 @@ export const docusignDownloadDocumentTool: ToolConfig< }, outputs: { - base64Content: { type: 'string', description: 'Base64-encoded document content' }, + file: { type: 'file', description: 'Stored downloaded document file', optional: true }, + base64Content: { + type: 'string', + description: 'Legacy base64 document content for small downloads', + optional: true, + }, mimeType: { type: 'string', description: 'MIME type of the document' }, fileName: { type: 'string', description: 'Original file name' }, }, diff --git a/apps/sim/tools/docusign/types.ts b/apps/sim/tools/docusign/types.ts index 910687dc938..e7545f3a3f4 100644 --- a/apps/sim/tools/docusign/types.ts +++ b/apps/sim/tools/docusign/types.ts @@ -1,3 +1,4 @@ +import type { UserFile } from '@/executor/types' import type { OutputProperty, ToolResponse } from '@/tools/types' /** Common envelope output properties */ @@ -136,6 +137,7 @@ export interface DocuSignDownloadDocumentParams { accessToken: string envelopeId: string documentId?: string + _context?: Record } export interface DocuSignListTemplatesParams { @@ -208,7 +210,8 @@ export interface DocuSignVoidEnvelopeResponse extends ToolResponse { export interface DocuSignDownloadDocumentResponse extends ToolResponse { output: { - base64Content: string + base64Content?: string + file?: UserFile mimeType: string fileName: string } diff --git a/apps/sim/tools/file/parser.test.ts b/apps/sim/tools/file/parser.test.ts new file mode 100644 index 00000000000..bfdadb2511c --- /dev/null +++ b/apps/sim/tools/file/parser.test.ts @@ -0,0 +1,84 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { fileFetchTool, fileParserTool, fileParserV3Tool } from '@/tools/file/parser' + +describe('fileParserTool', () => { + it('propagates parse route failures as tool failures', async () => { + const result = await fileParserTool.transformResponse?.( + Response.json({ + success: false, + error: 'File is too large to parse safely.', + filePath: 'https://example.com/big.pdf', + }) + ) + + expect(result).toMatchObject({ + success: false, + error: 'File is too large to parse safely.', + output: { + files: [], + combinedContent: '', + }, + }) + }) + + it('propagates parse route failures from V3 and file fetch tools', async () => { + const body = { + success: false, + error: 'File is too large to parse safely.', + filePath: 'https://example.com/big.pdf', + } + + await expect(fileParserV3Tool.transformResponse?.(Response.json(body))).resolves.toMatchObject({ + success: false, + error: 'File is too large to parse safely.', + output: { + files: [], + combinedContent: '', + }, + }) + await expect(fileFetchTool.transformResponse?.(Response.json(body))).resolves.toMatchObject({ + success: false, + error: 'File is too large to parse safely.', + output: { + files: [], + combinedContent: '', + }, + }) + }) + + it('omits failed entries from partial multi-file parse results', async () => { + const result = await fileParserTool.transformResponse?.( + Response.json({ + success: true, + results: [ + { + success: false, + error: 'First file failed', + filePath: 'bad.pdf', + }, + { + success: true, + output: { + content: 'ok', + fileType: 'text/plain', + size: 2, + name: 'ok.txt', + binary: false, + }, + }, + ], + }) + ) + + expect(result).toMatchObject({ + success: true, + output: { + files: [{ name: 'ok.txt', content: 'ok' }], + combinedContent: 'ok', + }, + }) + }) +}) diff --git a/apps/sim/tools/file/parser.ts b/apps/sim/tools/file/parser.ts index d98e08de653..18049422c4d 100644 --- a/apps/sim/tools/file/parser.ts +++ b/apps/sim/tools/file/parser.ts @@ -89,14 +89,45 @@ const parseFileParserResponse = async (response: Response): Promise - normalizeFileParseResult(fileResult) + const failedResults = result.results.filter( + (fileResult) => isRecord(fileResult) && fileResult.success === false ) + if (failedResults.length === result.results.length) { + const firstError = failedResults.find( + (fileResult) => isRecord(fileResult) && typeof fileResult.error === 'string' + ) + return { + success: false, + output: { + files: [], + combinedContent: '', + }, + error: + isRecord(firstError) && typeof firstError.error === 'string' + ? firstError.error + : 'Failed to parse files', + } + } + + // Extract individual file results + const fileResults: FileParseResult[] = result.results + .filter((fileResult) => !(isRecord(fileResult) && fileResult.success === false)) + .map((fileResult) => normalizeFileParseResult(fileResult)) // Collect UserFile objects from results const processedFiles: UserFile[] = fileResults @@ -305,6 +336,17 @@ export const fileParserV2Tool: ToolConfig = { const parseFileParserV3Response = async (response: Response): Promise => { const parsed = await parseFileParserResponse(response) + if (!parsed.success) { + return { + success: false, + output: { + files: [], + combinedContent: '', + }, + error: parsed.error, + } + } + const output = parsed.output as FileParserOutputData const files = Array.isArray(output.processedFiles) && output.processedFiles.length > 0 diff --git a/apps/sim/tools/google_slides/export_presentation.test.ts b/apps/sim/tools/google_slides/export_presentation.test.ts new file mode 100644 index 00000000000..6076edb75cb --- /dev/null +++ b/apps/sim/tools/google_slides/export_presentation.test.ts @@ -0,0 +1,81 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockUploadExecutionFile } = vi.hoisted(() => ({ + mockUploadExecutionFile: vi.fn(), +})) + +vi.mock('@/lib/uploads/contexts/execution', () => ({ + uploadExecutionFile: mockUploadExecutionFile, +})) + +import { exportPresentationTool } from '@/tools/google_slides/export_presentation' + +describe('Google Slides export presentation tool', () => { + beforeEach(() => { + vi.clearAllMocks() + mockUploadExecutionFile.mockResolvedValue({ + id: 'file-1', + name: 'presentation-1.pdf', + size: 7, + type: 'application/pdf', + url: '/api/files/serve/execution/file-1', + key: 'execution/workflow/file-1', + context: 'execution', + }) + }) + + it('stores exports as execution file references instead of base64', async () => { + const response = new Response('content', { + status: 200, + headers: { 'content-type': 'application/pdf' }, + }) + + const result = await exportPresentationTool.transformResponse?.(response, { + accessToken: 'token', + presentationId: 'presentation-1', + exportFormat: 'PDF', + _context: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + }, + }) + + expect(mockUploadExecutionFile).toHaveBeenCalledWith( + { workspaceId: 'workspace-1', workflowId: 'workflow-1', executionId: 'execution-1' }, + Buffer.from('content'), + 'presentation-1.pdf', + 'application/pdf', + 'user-1' + ) + expect(result?.output.file).toMatchObject({ + key: 'execution/workflow/file-1', + context: 'execution', + mimeType: 'application/pdf', + }) + expect(result?.output.contentBase64).toBeUndefined() + }) + + it('preserves legacy base64 content when execution context is unavailable', async () => { + const bytes = Uint8Array.from([0, 255, 1, 254]) + const response = new Response(bytes, { + status: 200, + headers: { 'content-type': 'application/pdf' }, + }) + + const result = await exportPresentationTool.transformResponse?.(response, { + accessToken: 'token', + presentationId: 'presentation-1', + exportFormat: 'PDF', + }) + + expect(mockUploadExecutionFile).not.toHaveBeenCalled() + expect(result?.output.file).toBeUndefined() + expect(result?.output.contentBase64).toBe(Buffer.from(bytes).toString('base64')) + expect(result?.output.sizeBytes).toBe(bytes.byteLength) + }) +}) diff --git a/apps/sim/tools/google_slides/export_presentation.ts b/apps/sim/tools/google_slides/export_presentation.ts index 34553149411..d16e6cf1afa 100644 --- a/apps/sim/tools/google_slides/export_presentation.ts +++ b/apps/sim/tools/google_slides/export_presentation.ts @@ -1,4 +1,11 @@ import { createLogger } from '@sim/logger' +import { + PayloadSizeLimitError, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' +import { uploadExecutionFile } from '@/lib/uploads/contexts/execution' +import type { UserFile } from '@/executor/types' import { presentationUrl } from '@/tools/google_slides/utils' import type { ToolConfig } from '@/tools/types' @@ -8,18 +15,23 @@ interface ExportPresentationParams { accessToken: string presentationId: string exportFormat?: 'PDF' | 'PPTX' | 'ODP' | 'TXT' | 'PNG' | 'JPEG' | 'SVG' + _context?: Record } interface ExportPresentationResponse { success: boolean output: { - contentBase64: string + contentBase64?: string + file?: UserFile & { mimeType?: string } mimeType: string sizeBytes: number metadata: { presentationId: string; url: string; exportFormat: string } } } +const MAX_GOOGLE_SLIDES_EXPORT_BYTES = 10 * 1024 * 1024 +const MAX_LEGACY_INLINE_EXPORT_BYTES = 7 * 1024 * 1024 + const FORMAT_TO_MIME: Record = { PDF: 'application/pdf', PPTX: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', @@ -30,6 +42,25 @@ const FORMAT_TO_MIME: Record = { SVG: 'image/svg+xml', } +function getExecutionContext(params?: ExportPresentationParams): { + context?: { workspaceId: string; workflowId: string; executionId: string } + userId?: string +} { + const context = ( + params as (ExportPresentationParams & { _context?: Record }) | undefined + )?._context + const workspaceId = typeof context?.workspaceId === 'string' ? context.workspaceId : undefined + const workflowId = typeof context?.workflowId === 'string' ? context.workflowId : undefined + const executionId = typeof context?.executionId === 'string' ? context.executionId : undefined + const userId = typeof context?.userId === 'string' ? context.userId : undefined + + if (!workspaceId || !workflowId || !executionId) { + return { userId } + } + + return { context: { workspaceId, workflowId, executionId }, userId } +} + export const exportPresentationTool: ToolConfig< ExportPresentationParams, ExportPresentationResponse @@ -37,7 +68,7 @@ export const exportPresentationTool: ToolConfig< id: 'google_slides_export_presentation', name: 'Export Google Slides Presentation', description: - 'Export a presentation to PDF, PPTX, ODP, TXT, PNG, JPEG, or SVG via the Drive export endpoint. Returns the file content base64-encoded.', + 'Export a presentation to PDF, PPTX, ODP, TXT, PNG, JPEG, or SVG via the Drive export endpoint. Stores the exported file as an execution file when execution context is available.', version: '1.0.0', oauth: { required: true, provider: 'google-drive' }, @@ -83,7 +114,11 @@ export const exportPresentationTool: ToolConfig< if (!response.ok) { let errorMessage = `Failed to export presentation (status ${response.status})` try { - const data = await response.json() + const text = await readResponseTextWithLimit(response, { + maxBytes: 64 * 1024, + label: 'Google Slides export error response', + }) + const data = JSON.parse(text) errorMessage = data.error?.message || errorMessage logger.error('Drive API error during export:', { data }) } catch { @@ -92,19 +127,38 @@ export const exportPresentationTool: ToolConfig< throw new Error(errorMessage) } - const buffer = await response.arrayBuffer() - const contentBase64 = Buffer.from(buffer).toString('base64') + const buffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_GOOGLE_SLIDES_EXPORT_BYTES, + label: 'Google Slides export', + }) const presentationId = params?.presentationId?.trim() || '' const format = (params?.exportFormat || 'PDF').toUpperCase() const mime = FORMAT_TO_MIME[format] ?? 'application/octet-stream' + const { context, userId } = getExecutionContext(params) + const filename = `${presentationId || 'presentation'}.${format.toLowerCase()}` + const userFile = context + ? await uploadExecutionFile(context, Buffer.from(buffer), filename, mime, userId) + : undefined + if (!userFile && buffer.length > MAX_LEGACY_INLINE_EXPORT_BYTES) { + throw new PayloadSizeLimitError({ + label: 'Google Slides legacy inline export', + maxBytes: MAX_LEGACY_INLINE_EXPORT_BYTES, + observedBytes: buffer.length, + }) + } + const contentBase64 = + !userFile && buffer.length <= MAX_LEGACY_INLINE_EXPORT_BYTES + ? buffer.toString('base64') + : undefined return { success: true, output: { - contentBase64, + ...(userFile ? { file: { ...userFile, mimeType: mime } } : {}), + ...(contentBase64 ? { contentBase64 } : {}), mimeType: mime, - sizeBytes: buffer.byteLength, + sizeBytes: buffer.length, metadata: { presentationId, url: presentationUrl(presentationId), @@ -115,7 +169,16 @@ export const exportPresentationTool: ToolConfig< }, outputs: { - contentBase64: { type: 'string', description: 'Base64-encoded exported file content' }, + file: { + type: 'file', + description: 'Stored exported presentation file', + optional: true, + }, + contentBase64: { + type: 'string', + description: 'Legacy base64 content field for small exports.', + optional: true, + }, mimeType: { type: 'string', description: 'MIME type of the exported content' }, sizeBytes: { type: 'number', description: 'Size of the exported content in bytes' }, metadata: { diff --git a/apps/sim/tools/http/request.test.ts b/apps/sim/tools/http/request.test.ts index 88c2e9086c7..ad612560595 100644 --- a/apps/sim/tools/http/request.test.ts +++ b/apps/sim/tools/http/request.test.ts @@ -237,6 +237,20 @@ describe('HTTP Request Tool', () => { expect(result.output.headers).toHaveProperty('content-type') }) + it('should reject responses that exceed the workflow data cap', async () => { + const response = new Response('too large', { + status: 200, + headers: { + 'content-type': 'text/plain', + 'content-length': '10485761', + }, + }) + + await expect(requestTool.transformResponse?.(response, {} as any)).rejects.toMatchObject({ + name: 'PayloadSizeLimitError', + }) + }) + it('should handle POST requests with body', async () => { tester.setup({ result: 'success' }) diff --git a/apps/sim/tools/http/request.ts b/apps/sim/tools/http/request.ts index 4472a37faf9..966384f7d0d 100644 --- a/apps/sim/tools/http/request.ts +++ b/apps/sim/tools/http/request.ts @@ -1,8 +1,11 @@ +import { readResponseTextWithLimit } from '@/lib/core/utils/stream-limits' import type { RequestParams, RequestResponse } from '@/tools/http/types' import { getDefaultHeaders, processUrl } from '@/tools/http/utils' import { transformTable } from '@/tools/shared/table' import type { ToolConfig } from '@/tools/types' +const MAX_HTTP_RESPONSE_BODY_BYTES = 10 * 1024 * 1024 + export const requestTool: ToolConfig = { id: 'http_request', name: 'HTTP Request', @@ -158,9 +161,11 @@ export const requestTool: ToolConfig = { headers[key] = value }) - const data = await (contentType.includes('application/json') - ? response.json() - : response.text()) + const responseText = await readResponseTextWithLimit(response, { + maxBytes: MAX_HTTP_RESPONSE_BODY_BYTES, + label: 'HTTP Request response body', + }) + const data = contentType.includes('application/json') ? JSON.parse(responseText) : responseText // Check if this is a proxy response (structured response from /api/proxy) if ( diff --git a/apps/sim/tools/index.test.ts b/apps/sim/tools/index.test.ts index 1a27cc552c8..f014fc72363 100644 --- a/apps/sim/tools/index.test.ts +++ b/apps/sim/tools/index.test.ts @@ -744,6 +744,88 @@ describe('Automatic Internal Route Detection', () => { Object.assign(tools, originalTools) }) + it('should reject internal tool responses that exceed the response body cap', async () => { + const mockTool = { + id: 'test_oversized_internal_tool', + name: 'Test Oversized Internal Tool', + description: 'A test tool with an oversized response', + version: '1.0.0', + params: {}, + request: { + url: '/api/test/oversized', + method: 'GET', + }, + transformResponse: vi.fn().mockResolvedValue({ + success: true, + output: { result: 'should not run' }, + }), + } + + const originalTools = { ...tools } + ;(tools as any).test_oversized_internal_tool = mockTool + + global.fetch = Object.assign( + vi.fn().mockResolvedValue( + new Response('too large', { + status: 200, + headers: { + 'content-length': '10485761', + 'content-type': 'text/plain', + }, + }) + ), + { preconnect: vi.fn() } + ) as typeof fetch + + const result = await executeTool('test_oversized_internal_tool', {}) + + expect(result.success).toBe(false) + expect(result.error).toContain('response size limit exceeded') + expect(mockTool.transformResponse).not.toHaveBeenCalled() + + Object.assign(tools, originalTools) + }) + + it('preserves structured 413 errors from internal tool routes', async () => { + const mockTool = { + id: 'test_internal_route_413_tool', + name: 'Test Internal Route 413 Tool', + description: 'A test tool with a route-produced payload limit error', + version: '1.0.0', + params: {}, + request: { + url: '/api/test/payload-limit', + method: 'GET', + }, + transformResponse: vi.fn().mockResolvedValue({ + success: true, + output: { result: 'should not run' }, + }), + } + + const originalTools = { ...tools } + ;(tools as any).test_internal_route_413_tool = mockTool + + global.fetch = Object.assign( + vi.fn().mockResolvedValue( + new Response(JSON.stringify({ error: 'Generated image exceeds maximum size' }), { + status: 413, + headers: { 'content-type': 'application/json' }, + }) + ), + { preconnect: vi.fn() } + ) as typeof fetch + + const result = await executeTool('test_internal_route_413_tool', {}) + + expect(result.success).toBe(false) + expect(result.error).toContain('Generated image exceeds maximum size') + expect(result.error).not.toContain('Request body size limit exceeded') + expect(mockTool.transformResponse).not.toHaveBeenCalled() + + Object.assign(tools, originalTools) + }) + it('should detect external routes (full URLs) and call directly with SSRF protection', async () => { // This test verifies that external URLs are called directly (not via proxy) // with SSRF protection via secureFetchWithPinnedIP diff --git a/apps/sim/tools/index.ts b/apps/sim/tools/index.ts index de5adb1dc7a..c10788c6378 100644 --- a/apps/sim/tools/index.ts +++ b/apps/sim/tools/index.ts @@ -13,6 +13,10 @@ import { } from '@/lib/core/security/input-validation.server' import { PlatformEvents } from '@/lib/core/telemetry' import { generateRequestId } from '@/lib/core/utils/request' +import { + isPayloadSizeLimitError, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' import { getBaseUrl, getInternalApiBaseUrl } from '@/lib/core/utils/urls' import { isUserFile } from '@/lib/core/utils/user-file' import { SIM_VIA_HEADER, serializeCallChain } from '@/lib/execution/call-chain' @@ -561,6 +565,7 @@ import { normalizeToolId } from '@/tools/normalize' * Next.js 16 has a default middleware/proxy body limit of 10MB. */ const MAX_REQUEST_BODY_SIZE_BYTES = 10 * 1024 * 1024 // 10MB +const MAX_TOOL_RESPONSE_BODY_BYTES = 10 * 1024 * 1024 // 10MB /** * User-friendly error message for body size limit exceeded @@ -568,6 +573,9 @@ const MAX_REQUEST_BODY_SIZE_BYTES = 10 * 1024 * 1024 // 10MB const BODY_SIZE_LIMIT_ERROR_MESSAGE = 'Request body size limit exceeded (10MB). The workflow data is too large to process. Try reducing the size of variables, inputs, or data being passed between blocks.' +const RESPONSE_SIZE_LIMIT_ERROR_MESSAGE = + 'Tool response size limit exceeded (10MB). The response is too large to keep in workflow data. Reduce the response size or return a file reference instead.' + /** * Validates request body size and throws a user-friendly error if exceeded * @param body - The request body string to check @@ -634,6 +642,66 @@ function handleBodySizeLimitError(error: unknown, requestId: string, context: st return false } +function handleResponseSizeLimitError(error: unknown, requestId: string, context: string): boolean { + if (!isPayloadSizeLimitError(error)) return false + + logger.error(`[${requestId}] Response body size limit exceeded for ${context}:`, { + label: error.label, + maxBytes: error.maxBytes, + observedBytes: error.observedBytes, + }) + throw new Error(RESPONSE_SIZE_LIMIT_ERROR_MESSAGE) +} + +function cloneResponseHeaders(headers: Headers | HeadersInit | undefined): Headers { + const clonedHeaders = new Headers() + if (!headers) return clonedHeaders + + if (typeof (headers as Headers).forEach === 'function') { + ;(headers as Headers).forEach((value, key) => { + clonedHeaders.set(key, value) + }) + return clonedHeaders + } + + return new Headers(headers) +} + +async function readToolResponseBody( + response: { + ok?: boolean + headers?: { get(name: string): string | null } + body?: ReadableStream | null + arrayBuffer?: () => Promise + text?: () => Promise + }, + options: { + requestId: string + toolId: string + signal?: AbortSignal + } +): Promise { + try { + return await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TOOL_RESPONSE_BODY_BYTES, + label: `${options.toolId} response body`, + signal: options.signal, + }) + } catch (error) { + if (isPayloadSizeLimitError(error) || response.ok !== false) { + throw error + } + + logger.warn( + `[${options.requestId}] Failed to read non-OK response body for ${options.toolId}`, + { + error: toError(error).message, + } + ) + return Buffer.alloc(0) + } +} + /** * System parameters that should be filtered out when extracting tool arguments * These are internal parameters used by the execution framework, not tool inputs @@ -1299,6 +1367,18 @@ function parseRetryAfterHeader(header: string | null): number { return 0 } +function shouldRetryWithoutReadingBody( + status: number, + headers: { get(name: string): string | null }, + retryConfig: ResolvedRetryConfig | null | undefined, + isLastAttempt: boolean +): boolean { + if (!retryConfig || isLastAttempt || !isRetryableFailure(null, status)) { + return false + } + return parseRetryAfterHeader(headers.get('retry-after')) <= retryConfig.maxDelayMs +} + /** * Execute a tool request directly * Internal routes (/api/...) use regular fetch @@ -1392,6 +1472,7 @@ async function executeToolRequest( let response: Response | undefined let lastError: unknown + const nullBodyStatuses = new Set([101, 204, 205, 304]) for (let attempt = 0; attempt < maxAttempts; attempt++) { const isLastAttempt = attempt === maxAttempts - 1 @@ -1416,12 +1497,39 @@ async function executeToolRequest( } try { - response = await fetch(fullUrl, { + const internalResponse = await fetch(fullUrl, { method: requestParams.method, headers: headers, body: requestParams.body, signal: controller.signal, }) + if ( + nullBodyStatuses.has(internalResponse.status) || + shouldRetryWithoutReadingBody( + internalResponse.status, + internalResponse.headers, + retryConfig, + isLastAttempt + ) + ) { + internalResponse.body?.cancel().catch(() => {}) + response = new Response(null, { + status: internalResponse.status, + statusText: internalResponse.statusText, + headers: cloneResponseHeaders(internalResponse.headers), + }) + } else { + const bodyBuffer = await readToolResponseBody(internalResponse, { + requestId, + toolId, + signal: controller.signal, + }) + response = new Response(new Uint8Array(bodyBuffer), { + status: internalResponse.status, + statusText: internalResponse.statusText, + headers: cloneResponseHeaders(internalResponse.headers), + }) + } } catch (error) { if (error instanceof Error && error.name === 'AbortError') { // Distinguish caller cancellation from local timeout: rethrow the AbortError @@ -1449,21 +1557,34 @@ async function executeToolRequest( headers: headersRecord, body: requestParams.body ?? undefined, timeout: requestParams.timeout, + maxResponseBytes: MAX_TOOL_RESPONSE_BODY_BYTES, signal, }) const responseHeaders = new Headers(secureResponse.headers.toRecord()) - const nullBodyStatuses = new Set([101, 204, 205, 304]) - if (nullBodyStatuses.has(secureResponse.status)) { + if ( + nullBodyStatuses.has(secureResponse.status) || + shouldRetryWithoutReadingBody( + secureResponse.status, + responseHeaders, + retryConfig, + isLastAttempt + ) + ) { + secureResponse.body?.cancel().catch(() => {}) response = new Response(null, { status: secureResponse.status, statusText: secureResponse.statusText, headers: responseHeaders, }) } else { - const bodyBuffer = await secureResponse.arrayBuffer() - response = new Response(bodyBuffer, { + const bodyBuffer = await readToolResponseBody(secureResponse, { + requestId, + toolId, + signal, + }) + response = new Response(new Uint8Array(bodyBuffer), { status: secureResponse.status, statusText: secureResponse.statusText, headers: responseHeaders, @@ -1484,7 +1605,7 @@ async function executeToolRequest( `[${requestId}] Retrying ${toolId} after error (attempt ${attempt + 1}/${maxAttempts})`, { delayMs } ) - await new Promise((r) => setTimeout(r, delayMs)) + await sleep(delayMs) continue } @@ -1517,7 +1638,7 @@ async function executeToolRequest( `[${requestId}] Retrying ${toolId} after HTTP ${response.status} (attempt ${attempt + 1}/${maxAttempts})`, { delayMs } ) - await new Promise((r) => setTimeout(r, delayMs)) + await sleep(delayMs) continue } @@ -1528,29 +1649,18 @@ async function executeToolRequest( throw lastError ?? new Error(`Request failed for ${toolId}`) } - // For non-OK responses, attempt JSON first; if parsing fails, fall back to text if (!response.ok) { - // Check for 413 (Entity Too Large) - body size limit exceeded - if (response.status === 413) { - logger.error(`[${requestId}] Request body too large for ${toolId} (HTTP 413):`, { - status: response.status, - statusText: response.statusText, - }) - throw new Error(BODY_SIZE_LIMIT_ERROR_MESSAGE) - } - let errorData: any try { - errorData = await response.json() - } catch (jsonError) { - // JSON parsing failed, fall back to reading as text for error extraction - logger.warn(`[${requestId}] Response is not JSON for ${toolId}, reading as text`) + const errorText = await response.text() try { - errorData = await response.text() - } catch (textError) { - logger.error(`[${requestId}] Failed to read response body for ${toolId}`) - errorData = null + errorData = JSON.parse(errorText) + } catch { + errorData = errorText } + } catch { + logger.error(`[${requestId}] Failed to read response body for ${toolId}`) + errorData = null } const errorInfo: ErrorInfo = { @@ -1560,6 +1670,20 @@ async function executeToolRequest( } const errorToTransform = createTransformedErrorFromErrorInfo(errorInfo, tool.errorExtractor) + const hasStructuredErrorPayload = + errorData !== null && + typeof errorData === 'object' && + !Array.isArray(errorData) && + ('error' in errorData || 'message' in errorData) + + if (response.status === 413 && !hasStructuredErrorPayload) { + logger.error(`[${requestId}] Request body too large for ${toolId} (HTTP 413):`, { + status: response.status, + statusText: response.statusText, + errorData, + }) + throw new Error(BODY_SIZE_LIMIT_ERROR_MESSAGE) + } logger.error(`[${requestId}] Internal API error for ${toolId}:`, { status: errorInfo.status, @@ -1636,6 +1760,8 @@ async function executeToolRequest( error: undefined, } } catch (error: any) { + handleResponseSizeLimitError(error, requestId, toolId) + // Check if this is a body size limit error and throw user-friendly message handleBodySizeLimitError(error, requestId, toolId) diff --git a/apps/sim/tools/typeform/files.test.ts b/apps/sim/tools/typeform/files.test.ts new file mode 100644 index 00000000000..0bee8b7d767 --- /dev/null +++ b/apps/sim/tools/typeform/files.test.ts @@ -0,0 +1,117 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockUploadExecutionFile } = vi.hoisted(() => ({ + mockUploadExecutionFile: vi.fn(), +})) + +vi.mock('@/lib/uploads/contexts/execution', () => ({ + uploadExecutionFile: mockUploadExecutionFile, +})) + +import { filesTool } from '@/tools/typeform/files' + +describe('Typeform files tool', () => { + beforeEach(() => { + vi.clearAllMocks() + mockUploadExecutionFile.mockResolvedValue({ + id: 'file-1', + name: 'upload.pdf', + size: 7, + type: 'application/pdf', + url: '/api/files/serve/execution/file-1', + key: 'execution/workflow/file-1', + context: 'execution', + }) + }) + + it('stores downloaded files as execution file references', async () => { + const response = new Response('content', { + status: 200, + headers: { + 'content-type': 'application/pdf', + 'content-disposition': 'attachment; filename="upload.pdf"', + }, + }) + + const result = await filesTool.transformResponse?.(response, { + formId: 'form-1', + responseId: 'response-1', + fieldId: 'field-1', + filename: 'upload.pdf', + apiKey: 'token', + _context: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + }, + }) + + expect(mockUploadExecutionFile).toHaveBeenCalledWith( + { workspaceId: 'workspace-1', workflowId: 'workflow-1', executionId: 'execution-1' }, + Buffer.from('content'), + 'upload.pdf', + 'application/pdf', + 'user-1' + ) + expect(result?.output.file).toMatchObject({ + key: 'execution/workflow/file-1', + context: 'execution', + mimeType: 'application/pdf', + }) + expect(result?.output.file).not.toHaveProperty('data') + }) + + it('preserves legacy base64 data when execution context is unavailable', async () => { + const bytes = Uint8Array.from([0, 255, 1, 254]) + const response = new Response(bytes, { + status: 200, + headers: { + 'content-type': 'application/pdf', + 'content-disposition': 'attachment; filename="upload.pdf"', + }, + }) + + const result = await filesTool.transformResponse?.(response, { + formId: 'form-1', + responseId: 'response-1', + fieldId: 'field-1', + filename: 'upload.pdf', + apiKey: 'token', + }) + + expect(mockUploadExecutionFile).not.toHaveBeenCalled() + expect(result?.output.file).toMatchObject({ + name: 'upload.pdf', + mimeType: 'application/pdf', + data: Buffer.from(bytes).toString('base64'), + size: bytes.byteLength, + }) + }) + + it('rejects large downloads when execution context is unavailable', async () => { + const response = new Response(new Uint8Array(8 * 1024 * 1024), { + status: 200, + headers: { + 'content-type': 'application/pdf', + 'content-disposition': 'attachment; filename="upload.pdf"', + }, + }) + + await expect( + filesTool.transformResponse?.(response, { + formId: 'form-1', + responseId: 'response-1', + fieldId: 'field-1', + filename: 'upload.pdf', + apiKey: 'token', + }) + ).rejects.toMatchObject({ + name: 'PayloadSizeLimitError', + label: 'Typeform legacy inline file', + }) + }) +}) diff --git a/apps/sim/tools/typeform/files.ts b/apps/sim/tools/typeform/files.ts index 6b0fe81e2f8..93b1542371b 100644 --- a/apps/sim/tools/typeform/files.ts +++ b/apps/sim/tools/typeform/files.ts @@ -1,6 +1,34 @@ +import { + PayloadSizeLimitError, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' +import { uploadExecutionFile } from '@/lib/uploads/contexts/execution' +import type { UserFile } from '@/executor/types' import type { TypeformFilesParams, TypeformFilesResponse } from '@/tools/typeform/types' import type { ToolConfig } from '@/tools/types' +const MAX_TYPEFORM_FILE_BYTES = 10 * 1024 * 1024 +const MAX_LEGACY_INLINE_FILE_BYTES = 7 * 1024 * 1024 + +function getExecutionContext(params?: TypeformFilesParams): { + context?: { workspaceId: string; workflowId: string; executionId: string } + userId?: string +} { + const context = ( + params as (TypeformFilesParams & { _context?: Record }) | undefined + )?._context + const workspaceId = typeof context?.workspaceId === 'string' ? context.workspaceId : undefined + const workflowId = typeof context?.workflowId === 'string' ? context.workflowId : undefined + const executionId = typeof context?.executionId === 'string' ? context.executionId : undefined + const userId = typeof context?.userId === 'string' ? context.userId : undefined + + if (!workspaceId || !workflowId || !executionId) { + return { userId } + } + + return { context: { workspaceId, workflowId, executionId }, userId } +} + export const filesTool: ToolConfig = { id: 'typeform_files', name: 'Typeform Files', @@ -73,8 +101,10 @@ export const filesTool: ToolConfig = // For file downloads, we get the file directly const contentType = response.headers.get('content-type') || 'application/octet-stream' const contentDisposition = response.headers.get('content-disposition') || '' - const arrayBuffer = await response.arrayBuffer() - const buffer = Buffer.from(arrayBuffer) + const buffer = await readResponseToBufferWithLimit(response, { + maxBytes: MAX_TYPEFORM_FILE_BYTES, + label: 'Typeform file download', + }) // Try to extract filename from content-disposition if possible let filename = '' @@ -106,16 +136,36 @@ export const filesTool: ToolConfig = } } + const { context, userId } = getExecutionContext(params) + let storedFile: (UserFile & { mimeType?: string }) | undefined + + if (context) { + const userFile = await uploadExecutionFile(context, buffer, filename, contentType, userId) + storedFile = { ...userFile, mimeType: contentType } + } + + if (!storedFile && buffer.length > MAX_LEGACY_INLINE_FILE_BYTES) { + throw new PayloadSizeLimitError({ + label: 'Typeform legacy inline file', + maxBytes: MAX_LEGACY_INLINE_FILE_BYTES, + observedBytes: buffer.length, + }) + } + return { success: true, output: { - fileUrl: fileUrl || '', - file: { - name: filename, - mimeType: contentType, - data: buffer.toString('base64'), - size: buffer.length, - }, + fileUrl: storedFile?.url || fileUrl || '', + file: storedFile + ? { + ...storedFile, + } + : { + name: filename, + mimeType: contentType, + data: buffer.toString('base64'), + size: buffer.length, + }, contentType, filename, }, diff --git a/apps/sim/tools/typeform/types.ts b/apps/sim/tools/typeform/types.ts index c6f639a512d..17179226e4d 100644 --- a/apps/sim/tools/typeform/types.ts +++ b/apps/sim/tools/typeform/types.ts @@ -1,3 +1,4 @@ +import type { UserFile } from '@/executor/types' import type { ToolFileData, ToolResponse } from '@/tools/types' export interface TypeformFilesParams { @@ -7,12 +8,13 @@ export interface TypeformFilesParams { filename: string inline?: boolean apiKey: string + _context?: Record } export interface TypeformFilesResponse extends ToolResponse { output: { fileUrl: string - file: ToolFileData + file: (UserFile & { mimeType?: string }) | ToolFileData contentType: string filename: string }