From 1a5025d22aab294654948f4133ee7ae8da99f385 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Sun, 15 Jun 2025 19:22:46 -0700 Subject: [PATCH 1/8] added blob storage option for azure, refactored storage client to be provider agnostic, tested kb & file upload and s3 is undisrupted, still have to test blob --- .gitignore | 1 - apps/sim/app/api/files/delete/route.test.ts | 36 +- apps/sim/app/api/files/delete/route.ts | 76 +- apps/sim/app/api/files/parse/route.test.ts | 30 +- apps/sim/app/api/files/parse/route.ts | 716 +++++++----------- .../sim/app/api/files/presigned/route.test.ts | 322 ++++++++ apps/sim/app/api/files/presigned/route.ts | 134 +++- .../api/files/serve/[...path]/route.test.ts | 232 ++++-- .../app/api/files/serve/[...path]/route.ts | 126 ++- apps/sim/app/api/files/upload/route.test.ts | 96 ++- apps/sim/app/api/files/upload/route.ts | 28 +- apps/sim/app/api/files/utils.ts | 17 + apps/sim/app/api/knowledge/utils.test.ts | 44 +- apps/sim/app/api/knowledge/utils.ts | 26 +- apps/sim/app/api/logs/cleanup/route.ts | 2 +- apps/sim/lib/documents/document-processor.ts | 543 ++++++------- apps/sim/lib/env.ts | 5 + apps/sim/lib/uploads/blob/blob-client.test.ts | 209 +++++ apps/sim/lib/uploads/blob/blob-client.ts | 290 +++++++ apps/sim/lib/uploads/blob/index.ts | 11 + apps/sim/lib/uploads/index.ts | 28 + apps/sim/lib/uploads/s3/index.ts | 11 + .../lib/uploads/{ => s3}/s3-client.test.ts | 223 +++--- apps/sim/lib/uploads/{ => s3}/s3-client.ts | 17 +- apps/sim/lib/uploads/setup.server.ts | 61 +- apps/sim/lib/uploads/setup.ts | 57 +- apps/sim/lib/uploads/storage-client.ts | 203 +++++ apps/sim/package.json | 1 + bun.lock | 31 + 29 files changed, 2405 insertions(+), 1171 deletions(-) create mode 100644 apps/sim/app/api/files/presigned/route.test.ts create mode 100644 apps/sim/lib/uploads/blob/blob-client.test.ts create mode 100644 apps/sim/lib/uploads/blob/blob-client.ts create mode 100644 apps/sim/lib/uploads/blob/index.ts create mode 100644 apps/sim/lib/uploads/index.ts create mode 100644 apps/sim/lib/uploads/s3/index.ts rename apps/sim/lib/uploads/{ => s3}/s3-client.test.ts (54%) rename apps/sim/lib/uploads/{ => s3}/s3-client.ts (92%) create mode 100644 apps/sim/lib/uploads/storage-client.ts diff --git a/.gitignore b/.gitignore index eac4fb83919..08dedb8678c 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,6 @@ sim-standalone.tar.gz # misc .DS_Store *.pem -uploads/ # env files .env diff --git a/apps/sim/app/api/files/delete/route.test.ts b/apps/sim/app/api/files/delete/route.test.ts index 4bf062f0e46..eb5f1ae4445 100644 --- a/apps/sim/app/api/files/delete/route.test.ts +++ b/apps/sim/app/api/files/delete/route.test.ts @@ -1,17 +1,12 @@ -/** - * Tests for file delete API route - * - * @vitest-environment node - */ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { createMockRequest } from '@/app/api/__test-utils__/utils' describe('File Delete API Route', () => { - // Mock file system modules + // Mock file system and storage modules const mockUnlink = vi.fn().mockResolvedValue(undefined) const mockExistsSync = vi.fn().mockReturnValue(true) - const mockDeleteFromS3 = vi.fn().mockResolvedValue(undefined) - const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true) + const mockDeleteFile = vi.fn().mockResolvedValue(undefined) + const mockIsUsingCloudStorage = vi.fn().mockReturnValue(false) beforeEach(() => { vi.resetModules() @@ -25,9 +20,10 @@ describe('File Delete API Route', () => { unlink: mockUnlink, })) - // Mock the S3 client - vi.doMock('@/lib/uploads/s3-client', () => ({ - deleteFromS3: mockDeleteFromS3, + // Mock the storage abstraction layer + vi.doMock('@/lib/uploads', () => ({ + deleteFile: mockDeleteFile, + isUsingCloudStorage: mockIsUsingCloudStorage, })) // Mock the logger @@ -40,15 +36,11 @@ describe('File Delete API Route', () => { }), })) - // Configure upload directory and S3 mode with all required exports + // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, - ensureUploadsDirectory: mockEnsureUploadsDirectory, - S3_CONFIG: { - bucket: 'test-bucket', - region: 'test-region', - }, + USE_BLOB_STORAGE: false, })) // Skip setup.server.ts side effects @@ -117,8 +109,12 @@ describe('File Delete API Route', () => { vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, + USE_BLOB_STORAGE: false, })) + // Mock cloud storage mode + mockIsUsingCloudStorage.mockReturnValue(true) + // Create request with S3 file path const req = createMockRequest('POST', { filePath: '/api/files/serve/s3/1234567890-test-file.txt', @@ -134,10 +130,10 @@ describe('File Delete API Route', () => { // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('success', true) - expect(data).toHaveProperty('message', 'File deleted successfully from S3') + expect(data).toHaveProperty('message', 'File deleted successfully from cloud storage') - // Verify deleteFromS3 was called with correct key - expect(mockDeleteFromS3).toHaveBeenCalledWith('1234567890-test-file.txt') + // Verify deleteFile was called with correct key + expect(mockDeleteFile).toHaveBeenCalledWith('1234567890-test-file.txt') }) it('should handle missing file path', async () => { diff --git a/apps/sim/app/api/files/delete/route.ts b/apps/sim/app/api/files/delete/route.ts index d0a86d03c1b..ac8c9af687b 100644 --- a/apps/sim/app/api/files/delete/route.ts +++ b/apps/sim/app/api/files/delete/route.ts @@ -3,17 +3,19 @@ import { unlink } from 'fs/promises' import { join } from 'path' import type { NextRequest } from 'next/server' import { createLogger } from '@/lib/logs/console-logger' -import { deleteFromS3 } from '@/lib/uploads/s3-client' -import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup' +import { deleteFile, isUsingCloudStorage } from '@/lib/uploads' +import { UPLOAD_DIR } from '@/lib/uploads/setup' import '@/lib/uploads/setup.server' import { createErrorResponse, createOptionsResponse, createSuccessResponse, + extractBlobKey, extractFilename, extractS3Key, InvalidRequestError, + isBlobPath, isS3Path, } from '../utils' @@ -38,8 +40,8 @@ export async function POST(request: NextRequest) { try { // Use appropriate handler based on path and environment const result = - isS3Path(filePath) || USE_S3_STORAGE - ? await handleS3FileDelete(filePath) + isS3Path(filePath) || isBlobPath(filePath) || isUsingCloudStorage() + ? await handleCloudFileDelete(filePath) : await handleLocalFileDelete(filePath) // Return success response @@ -57,24 +59,24 @@ export async function POST(request: NextRequest) { } /** - * Handle S3 file deletion + * Handle cloud file deletion (S3 or Azure Blob) */ -async function handleS3FileDelete(filePath: string) { - // Extract the S3 key from the path - const s3Key = extractS3Key(filePath) - logger.info(`Deleting file from S3: ${s3Key}`) +async function handleCloudFileDelete(filePath: string) { + // Extract the key from the path (works for both S3 and Blob paths) + const key = extractCloudKey(filePath) + logger.info(`Deleting file from cloud storage: ${key}`) try { - // Delete from S3 - await deleteFromS3(s3Key) - logger.info(`File successfully deleted from S3: ${s3Key}`) + // Delete from cloud storage using abstraction layer + await deleteFile(key) + logger.info(`File successfully deleted from cloud storage: ${key}`) return { success: true as const, - message: 'File deleted successfully from S3', + message: 'File deleted successfully from cloud storage', } } catch (error) { - logger.error('Error deleting file from S3:', error) + logger.error('Error deleting file from cloud storage:', error) throw error } } @@ -83,30 +85,52 @@ async function handleS3FileDelete(filePath: string) { * Handle local file deletion */ async function handleLocalFileDelete(filePath: string) { - // Extract the filename from the path const filename = extractFilename(filePath) - logger.info('Extracted filename for deletion:', filename) - const fullPath = join(UPLOAD_DIR, filename) - logger.info('Full file path for deletion:', fullPath) - // Check if file exists + logger.info(`Deleting local file: ${fullPath}`) + if (!existsSync(fullPath)) { - logger.info(`File not found for deletion at path: ${fullPath}`) + logger.info(`File not found, but that's okay: ${fullPath}`) return { success: true as const, message: "File not found, but that's okay", } } - // Delete the file - await unlink(fullPath) - logger.info(`File successfully deleted: ${fullPath}`) + try { + await unlink(fullPath) + logger.info(`File successfully deleted: ${fullPath}`) + + return { + success: true as const, + message: 'File deleted successfully', + } + } catch (error) { + logger.error('Error deleting local file:', error) + throw error + } +} + +/** + * Extract cloud storage key from file path (works for both S3 and Blob) + */ +function extractCloudKey(filePath: string): string { + if (isS3Path(filePath)) { + return extractS3Key(filePath) + } + + if (isBlobPath(filePath)) { + return extractBlobKey(filePath) + } - return { - success: true as const, - message: 'File deleted successfully', + // Backwards-compatibility: allow generic paths like "/api/files/serve/" + if (filePath.startsWith('/api/files/serve/')) { + return decodeURIComponent(filePath.substring('/api/files/serve/'.length)) } + + // As a last resort assume the incoming string is already a raw key. + return filePath } /** diff --git a/apps/sim/app/api/files/parse/route.test.ts b/apps/sim/app/api/files/parse/route.test.ts index 4258b5dd91b..7f22c128ea9 100644 --- a/apps/sim/app/api/files/parse/route.test.ts +++ b/apps/sim/app/api/files/parse/route.test.ts @@ -22,7 +22,8 @@ describe('File Parse API Route', () => { const mockUnlink = vi.fn().mockResolvedValue(undefined) const mockAccessFs = vi.fn().mockResolvedValue(undefined) const mockStatFs = vi.fn().mockImplementation(() => ({ isFile: () => true })) - const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content')) + const mockDownloadFile = vi.fn().mockResolvedValue(Buffer.from('test cloud file content')) + const mockIsUsingCloudStorage = vi.fn().mockReturnValue(false) const mockParseFile = vi.fn().mockResolvedValue({ content: 'parsed content', metadata: { pageCount: 1 }, @@ -59,8 +60,9 @@ describe('File Parse API Route', () => { stat: mockStatFs, })) - vi.doMock('@/lib/uploads/s3-client', () => ({ - downloadFromS3: mockDownloadFromS3, + vi.doMock('@/lib/uploads', () => ({ + downloadFile: mockDownloadFile, + isUsingCloudStorage: mockIsUsingCloudStorage, })) vi.doMock('@/lib/file-parsers', () => ({ @@ -90,10 +92,7 @@ describe('File Parse API Route', () => { vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, - S3_CONFIG: { - bucket: 'test-bucket', - region: 'test-region', - }, + USE_BLOB_STORAGE: false, })) vi.doMock('@/lib/uploads/setup.server', () => ({})) @@ -169,20 +168,27 @@ describe('File Parse API Route', () => { }) it('should handle S3 access errors gracefully', async () => { - mockDownloadFromS3.mockRejectedValueOnce(new Error('S3 access denied')) + // Mock cloud storage mode + mockIsUsingCloudStorage.mockReturnValue(true) - const req = createMockRequest('POST', { - filePath: '/api/files/serve/s3/access-denied.pdf', + // Mock download failure + mockDownloadFile.mockRejectedValueOnce(new Error('Access denied')) + + const req = new NextRequest('http://localhost:3000/api/files/parse', { + method: 'POST', + body: JSON.stringify({ + filePath: '/api/files/serve/s3/test-file.txt', + }), }) const { POST } = await import('./route') const response = await POST(req) const data = await response.json() - expect(response.status).toBe(200) + expect(response.status).toBe(500) expect(data).toHaveProperty('success', false) expect(data).toHaveProperty('error') - expect(data.error).toContain('S3 access denied') + expect(data.error).toContain('Access denied') }) it('should handle access errors gracefully', async () => { diff --git a/apps/sim/app/api/files/parse/route.ts b/apps/sim/app/api/files/parse/route.ts index c635a815bd3..6ab1f48bcd5 100644 --- a/apps/sim/app/api/files/parse/route.ts +++ b/apps/sim/app/api/files/parse/route.ts @@ -1,14 +1,13 @@ import { Buffer } from 'buffer' import { createHash } from 'crypto' -import fsPromises, { readFile, unlink, writeFile } from 'fs/promises' -import { tmpdir } from 'os' +import fsPromises, { readFile } from 'fs/promises' import path from 'path' import binaryExtensionsList from 'binary-extensions' import { type NextRequest, NextResponse } from 'next/server' import { isSupportedFileType, parseFile } from '@/lib/file-parsers' import { createLogger } from '@/lib/logs/console-logger' -import { downloadFromS3 } from '@/lib/uploads/s3-client' -import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup' +import { downloadFile, isUsingCloudStorage } from '@/lib/uploads' +import { UPLOAD_DIR } from '@/lib/uploads/setup' import '@/lib/uploads/setup.server' export const dynamic = 'force-dynamic' @@ -18,27 +17,19 @@ const logger = createLogger('FilesParseAPI') const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds -interface ParseSuccessResult { - success: true - output: { - content: string +interface ParseResult { + success: boolean + content?: string + error?: string + filePath: string + metadata?: { fileType: string size: number - name: string - binary: boolean - metadata?: Record + hash: string + processingTime: number } - filePath?: string -} - -interface ParseErrorResult { - success: false - error: string - filePath?: string } -type ParseResult = ParseSuccessResult | ParseErrorResult - // MIME type mapping for various file extensions const fileTypeMap: Record = { // Text formats @@ -74,52 +65,85 @@ const fileTypeMap: Record = { * Main API route handler */ export async function POST(request: NextRequest) { + const startTime = Date.now() + try { const requestData = await request.json() const { filePath, fileType } = requestData - logger.info('File parse request received:', { filePath, fileType }) - if (!filePath) { - return NextResponse.json({ error: 'No file path provided' }, { status: 400 }) + return NextResponse.json({ success: false, error: 'No file path provided' }, { status: 400 }) } - // Handle both single file path and array of file paths - const filePaths = Array.isArray(filePath) ? filePath : [filePath] - - // Parse each file - const results = await Promise.all( - filePaths.map(async (singleFilePath) => { - try { - return await parseFileSingle(singleFilePath, fileType) - } catch (error) { - logger.error(`Error parsing file ${singleFilePath}:`, error) - return { - success: false, - error: (error as Error).message, - filePath: singleFilePath, - } as ParseErrorResult + logger.info('File parse request received:', { filePath, fileType }) + + // Handle multiple files + if (Array.isArray(filePath)) { + const results = [] + for (const path of filePath) { + const result = await parseFileSingle(path, fileType) + // Add processing time to metadata + if (result.metadata) { + result.metadata.processingTime = Date.now() - startTime + } + + // Transform each result to match expected frontend format + if (result.success) { + results.push({ + success: true, + output: { + content: result.content, + name: result.filePath.split('/').pop() || 'unknown', + fileType: result.metadata?.fileType || 'application/octet-stream', + size: result.metadata?.size || 0, + binary: false, // We only return text content + }, + filePath: result.filePath, + }) + } else { + results.push(result) } + } + + return NextResponse.json({ + success: true, + results, }) - ) + } - // If it was a single file request, return a single result - // Otherwise return an array of results - if (!Array.isArray(filePath)) { - // Single file was requested - const result = results[0] - return NextResponse.json(result) + // Handle single file + const result = await parseFileSingle(filePath, fileType) + + // Add processing time to metadata + if (result.metadata) { + result.metadata.processingTime = Date.now() - startTime } - // Multiple files were requested - return NextResponse.json({ - success: true, - results, - }) + // Transform single file result to match expected frontend format + if (result.success) { + return NextResponse.json({ + success: true, + output: { + content: result.content, + name: result.filePath.split('/').pop() || 'unknown', + fileType: result.metadata?.fileType || 'application/octet-stream', + size: result.metadata?.size || 0, + binary: false, // We only return text content + }, + }) + } + + // Only return 500 for actual server errors, not file processing failures + // File processing failures (like file not found, parsing errors) should return 200 with success:false + return NextResponse.json(result) } catch (error) { - logger.error('Error parsing file(s):', error) + logger.error('Error in file parse API:', error) return NextResponse.json( - { error: 'Failed to parse file(s)', message: (error as Error).message }, + { + success: false, + error: error instanceof Error ? error.message : 'Unknown error occurred', + filePath: '', + }, { status: 500 } ) } @@ -131,17 +155,28 @@ export async function POST(request: NextRequest) { async function parseFileSingle(filePath: string, fileType?: string): Promise { logger.info('Parsing file:', filePath) + // Validate path for security before any processing + const pathValidation = validateFilePath(filePath) + if (!pathValidation.isValid) { + return { + success: false, + error: pathValidation.error || 'Invalid path', + filePath, + } + } + // Check if this is an external URL if (filePath.startsWith('http://') || filePath.startsWith('https://')) { return handleExternalUrl(filePath, fileType) } - // Check if this is an S3 path + // Check if this is a cloud storage path (S3 or Blob) const isS3Path = filePath.includes('/api/files/serve/s3/') + const isBlobPath = filePath.includes('/api/files/serve/blob/') - // Use S3 handler if it's an S3 path or we're in S3 mode - if (isS3Path || USE_S3_STORAGE) { - return handleS3File(filePath, fileType) + // Use cloud handler if it's a cloud path or we're in cloud mode + if (isS3Path || isBlobPath || isUsingCloudStorage()) { + return handleCloudFile(filePath, fileType) } // Use local handler for local files @@ -149,136 +184,107 @@ async function parseFileSingle(filePath: string, fileType?: string): Promise { - logger.info(`Handling external URL: ${url}`) - - try { - // Create a unique filename for the temporary file - const urlHash = createHash('md5').update(url).digest('hex') - const urlObj = new URL(url) - const originalFilename = urlObj.pathname.split('/').pop() || 'download' - const tmpFilename = `${urlHash}-${originalFilename}` - const tmpFilePath = path.join(tmpdir(), tmpFilename) - - // Download the file using native fetch - logger.info(`Downloading file from URL to ${tmpFilePath}`) - const response = await fetch(url, { - method: 'GET', - headers: { - 'User-Agent': 'SimStudio/1.0', - }, - signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS), // Add timeout - }) +function validateFilePath(filePath: string): { isValid: boolean; error?: string } { + // Check for null bytes + if (filePath.includes('\0')) { + return { isValid: false, error: 'Invalid path: null byte detected' } + } - if (!response.ok) { - throw new Error(`Failed to download file: ${response.status} ${response.statusText}`) - } + // Check for path traversal attempts + if (filePath.includes('..')) { + return { isValid: false, error: 'Access denied: path traversal detected' } + } - // Check file size before downloading content - const contentLength = response.headers.get('content-length') - if (contentLength) { - const fileSize = Number.parseInt(contentLength, 10) - if (fileSize > MAX_DOWNLOAD_SIZE_BYTES) { - throw new Error( - `File size (${prettySize(fileSize)}) exceeds the limit of ${prettySize( - MAX_DOWNLOAD_SIZE_BYTES - )}.` - ) - } - } else { - logger.warn('Content-Length header missing, cannot verify file size before download.') - } + // Check for tilde characters (home directory access) + if (filePath.includes('~')) { + return { isValid: false, error: 'Invalid path: tilde character not allowed' } + } - // Get the file buffer from response - const arrayBuffer = await response.arrayBuffer() - const fileBuffer = Buffer.from(arrayBuffer) + // Check for absolute paths outside allowed directories + if (filePath.startsWith('/') && !filePath.startsWith('/api/files/serve/')) { + return { isValid: false, error: 'Path outside allowed directory' } + } - // Write to temporary file - await writeFile(tmpFilePath, fileBuffer) - logger.info(`Downloaded ${fileBuffer.length} bytes to ${tmpFilePath}`) + // Check for Windows absolute paths + if (/^[A-Za-z]:\\/.test(filePath)) { + return { isValid: false, error: 'Path outside allowed directory' } + } - // Determine file extension and type - const contentType = response.headers.get('content-type') || '' - const extension = - path.extname(originalFilename).toLowerCase().substring(1) || - (contentType ? contentType.split('/').pop() || 'unknown' : 'unknown') + return { isValid: true } +} - try { - // Process based on file type - let result: ParseResult - - if (extension === 'pdf') { - result = await handlePdfBuffer(fileBuffer, originalFilename, fileType, url) - } else if (extension === 'csv') { - result = await handleCsvBuffer(fileBuffer, originalFilename, fileType, url) - } else if (isSupportedFileType(extension)) { - result = await handleGenericTextBuffer( - fileBuffer, - originalFilename, - extension, - fileType, - url - ) - } else { - result = handleGenericBuffer(fileBuffer, originalFilename, extension, fileType) - } +/** + * Handle external URL + */ +async function handleExternalUrl(url: string, fileType?: string): Promise { + try { + logger.info('Fetching external URL:', url) - // Clean up temporary file - try { - await unlink(tmpFilePath) - logger.info(`Deleted temporary file: ${tmpFilePath}`) - } catch (cleanupError) { - logger.warn(`Failed to delete temporary file ${tmpFilePath}:`, cleanupError) - } + const response = await fetch(url) + if (!response.ok) { + throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`) + } - return result - } catch (parseError) { - logger.error(`Error parsing downloaded file: ${url}`, parseError) + const buffer = Buffer.from(await response.arrayBuffer()) + logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`) - // Clean up temporary file on error - try { - await unlink(tmpFilePath) - } catch (_cleanupError) { - // Ignore cleanup errors on parse failure - } + // Extract filename from URL + const urlPath = new URL(url).pathname + const filename = urlPath.split('/').pop() || 'download' + const extension = path.extname(filename).toLowerCase().substring(1) - throw parseError + // Process the file based on its content type + if (extension === 'pdf') { + return await handlePdfBuffer(buffer, filename, fileType, url) + } + if (extension === 'csv') { + return await handleCsvBuffer(buffer, filename, fileType, url) + } + if (isSupportedFileType(extension)) { + return await handleGenericTextBuffer(buffer, filename, extension, fileType, url) } + + // For binary or unknown files + return handleGenericBuffer(buffer, filename, extension, fileType) } catch (error) { logger.error(`Error handling external URL ${url}:`, error) - let errorMessage = `Failed to download or process file from URL: ${(error as Error).message}` - if ((error as Error).name === 'TimeoutError') { - errorMessage = `Download timed out after ${DOWNLOAD_TIMEOUT_MS / 1000} seconds.` - } return { success: false, - error: errorMessage, + error: `Error fetching URL: ${(error as Error).message}`, filePath: url, } } } /** - * Handle file stored in S3 + * Handle file stored in cloud storage (S3 or Azure Blob) */ -async function handleS3File(filePath: string, fileType?: string): Promise { +async function handleCloudFile(filePath: string, fileType?: string): Promise { try { - // Extract the S3 key from the path - const isS3Path = filePath.includes('/api/files/serve/s3/') - const s3Key = isS3Path - ? decodeURIComponent(filePath.split('/api/files/serve/s3/')[1]) - : filePath + // Extract the cloud key from the path + let cloudKey: string + if (filePath.includes('/api/files/serve/s3/')) { + cloudKey = decodeURIComponent(filePath.split('/api/files/serve/s3/')[1]) + } else if (filePath.includes('/api/files/serve/blob/')) { + cloudKey = decodeURIComponent(filePath.split('/api/files/serve/blob/')[1]) + } else if (filePath.startsWith('/api/files/serve/')) { + // Backwards-compatibility: path like "/api/files/serve/" + cloudKey = decodeURIComponent(filePath.substring('/api/files/serve/'.length)) + } else { + // Assume raw key provided + cloudKey = filePath + } - logger.info('Extracted S3 key:', s3Key) + logger.info('Extracted cloud key:', cloudKey) - // Download the file from S3 - const fileBuffer = await downloadFromS3(s3Key) - logger.info(`Downloaded file from S3: ${s3Key}, size: ${fileBuffer.length} bytes`) + // Download the file from cloud storage - this can throw for access errors + const fileBuffer = await downloadFile(cloudKey) + logger.info(`Downloaded file from cloud storage: ${cloudKey}, size: ${fileBuffer.length} bytes`) - // Extract the filename from the S3 key - const filename = s3Key.split('/').pop() || s3Key + // Extract the filename from the cloud key + const filename = cloudKey.split('/').pop() || cloudKey const extension = path.extname(filename).toLowerCase().substring(1) // Process the file based on its content type @@ -295,10 +301,69 @@ async function handleS3File(filePath: string, fileType?: string): Promise { + try { + // Extract filename from path + const filename = filePath.split('/').pop() || filePath + const fullPath = path.join(UPLOAD_DIR, filename) + + logger.info('Processing local file:', fullPath) + + // Check if file exists + try { + await fsPromises.access(fullPath) + } catch { + throw new Error(`File not found: ${filename}`) + } + + // Parse the file directly + const result = await parseFile(fullPath) + + // Get file stats for metadata + const stats = await fsPromises.stat(fullPath) + const fileBuffer = await readFile(fullPath) + const hash = createHash('md5').update(fileBuffer).digest('hex') + + // Extract file extension for type detection + const extension = path.extname(filename).toLowerCase().substring(1) + + return { + success: true, + content: result.content, + filePath, + metadata: { + fileType: fileType || getMimeType(extension), + size: stats.size, + hash, + processingTime: 0, // Will be set by caller + }, + } + } catch (error) { + logger.error(`Error handling local file ${filePath}:`, error) return { success: false, - error: `Error accessing file from S3: ${(error as Error).message}`, + error: `Error processing local file: ${(error as Error).message}`, filePath, } } @@ -324,15 +389,14 @@ async function handlePdfBuffer( return { success: true, - output: { - content, + content, + filePath: originalPath || filename, + metadata: { fileType: fileType || 'application/pdf', size: fileBuffer.length, - name: filename, - binary: false, - metadata: result.metadata || {}, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, - filePath: originalPath, } } catch (error) { logger.error('Failed to parse PDF in memory:', error) @@ -347,14 +411,14 @@ async function handlePdfBuffer( return { success: true, - output: { - content, + content, + filePath: originalPath || filename, + metadata: { fileType: fileType || 'application/pdf', size: fileBuffer.length, - name: filename, - binary: false, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, - filePath: originalPath, } } } @@ -377,22 +441,27 @@ async function handleCsvBuffer( return { success: true, - output: { - content: result.content, + content: result.content, + filePath: originalPath || filename, + metadata: { fileType: fileType || 'text/csv', size: fileBuffer.length, - name: filename, - binary: false, - metadata: result.metadata || {}, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, - filePath: originalPath, } } catch (error) { logger.error('Failed to parse CSV in memory:', error) return { success: false, error: `Failed to parse CSV: ${(error as Error).message}`, - filePath: originalPath, + filePath: originalPath || filename, + metadata: { + fileType: 'text/csv', + size: 0, + hash: '', + processingTime: 0, // Will be set by caller + }, } } } @@ -419,15 +488,14 @@ async function handleGenericTextBuffer( return { success: true, - output: { - content: result.content, + content: result.content, + filePath: originalPath || filename, + metadata: { fileType: fileType || getMimeType(extension), size: fileBuffer.length, - name: filename, - binary: false, - metadata: result.metadata || {}, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, - filePath: originalPath, } } } catch (parserError) { @@ -439,21 +507,27 @@ async function handleGenericTextBuffer( return { success: true, - output: { - content, + content, + filePath: originalPath || filename, + metadata: { fileType: fileType || getMimeType(extension), size: fileBuffer.length, - name: filename, - binary: false, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, - filePath: originalPath, } } catch (error) { logger.error('Failed to parse text file in memory:', error) return { success: false, error: `Failed to parse file: ${(error as Error).message}`, - filePath: originalPath, + filePath: originalPath || filename, + metadata: { + fileType: 'text/plain', + size: 0, + hash: '', + processingTime: 0, // Will be set by caller + }, } } } @@ -474,12 +548,13 @@ function handleGenericBuffer( return { success: true, - output: { - content, + content, + filePath: filename, + metadata: { fileType: fileType || getMimeType(extension), size: fileBuffer.length, - name: filename, - binary: isBinary, + hash: createHash('md5').update(fileBuffer).digest('hex'), + processingTime: 0, // Will be set by caller }, } } @@ -513,257 +588,6 @@ async function parseBufferAsPdf(buffer: Buffer) { } } -/** - * Validate that a file path is safe and within allowed directories - */ -function validateAndResolvePath(inputPath: string): { - isValid: boolean - resolvedPath?: string - error?: string -} { - try { - let targetPath = inputPath - if (inputPath.startsWith('/api/files/serve/')) { - const filename = inputPath.replace('/api/files/serve/', '') - targetPath = path.join(UPLOAD_DIR, filename) - } - - const resolvedPath = path.resolve(targetPath) - const resolvedUploadDir = path.resolve(UPLOAD_DIR) - - if ( - !resolvedPath.startsWith(resolvedUploadDir + path.sep) && - resolvedPath !== resolvedUploadDir - ) { - return { - isValid: false, - error: `Access denied: Path outside allowed directory`, - } - } - - if (inputPath.includes('..') || inputPath.includes('~')) { - return { - isValid: false, - error: `Access denied: Invalid path characters detected`, - } - } - - return { - isValid: true, - resolvedPath, - } - } catch (error) { - return { - isValid: false, - error: `Path validation error: ${(error as Error).message}`, - } - } -} - -/** - * Handle a local file from the filesystem - */ -async function handleLocalFile(filePath: string, fileType?: string): Promise { - if (filePath.includes('/api/files/serve/s3/')) { - logger.warn(`S3 path detected in handleLocalFile, redirecting to S3 handler: ${filePath}`) - return handleS3File(filePath, fileType) - } - - try { - logger.info(`Handling local file: ${filePath}`) - - const pathValidation = validateAndResolvePath(filePath) - if (!pathValidation.isValid) { - logger.error(`Path validation failed: ${pathValidation.error}`, { filePath }) - return { - success: false, - error: pathValidation.error || 'Invalid file path', - filePath, - } - } - - const localFilePath = pathValidation.resolvedPath! - logger.info(`Validated and resolved path: ${localFilePath}`) - - try { - await fsPromises.access(localFilePath, fsPromises.constants.R_OK) - } catch (error) { - logger.error(`File access error: ${localFilePath}`, error) - return { - success: false, - error: `File not found or inaccessible: ${filePath}`, - filePath, - } - } - - // Get file stats - const stats = await fsPromises.stat(localFilePath) - if (!stats.isFile()) { - logger.error(`Not a file: ${localFilePath}`) - return { - success: false, - error: `Not a file: ${filePath}`, - filePath, - } - } - - // Extract the filename from the path - const filename = path.basename(localFilePath) - const extension = path.extname(filename).toLowerCase().substring(1) - - // Process the file based on its type - const result = isSupportedFileType(extension) - ? await processWithSpecializedParser(localFilePath, filename, extension, fileType, filePath) - : await handleGenericFile(localFilePath, filename, extension, fileType) - - return result - } catch (error) { - logger.error(`Error handling local file ${filePath}:`, error) - return { - success: false, - error: `Error processing file: ${(error as Error).message}`, - filePath, - } - } -} - -/** - * Process a file with a specialized parser - */ -async function processWithSpecializedParser( - filePath: string, - filename: string, - extension: string, - fileType?: string, - originalPath?: string -): Promise { - try { - logger.info(`Parsing ${filename} with specialized parser for ${extension}`) - const result = await parseFile(filePath) - - // Get file stats - const fileBuffer = await readFile(filePath) - const fileSize = fileBuffer.length - - // Handle PDF-specific validation - if ( - extension === 'pdf' && - (result.content.includes('\u0000') || - result.content.match(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]{10,}/g)) - ) { - result.content = createPdfFallbackMessage(result.metadata?.pageCount, fileSize, originalPath) - } - - return { - success: true, - output: { - content: result.content, - fileType: fileType || getMimeType(extension), - size: fileSize, - name: filename, - binary: false, - metadata: result.metadata || {}, - }, - filePath: originalPath || filePath, - } - } catch (error) { - logger.error(`Specialized parser failed for ${extension} file:`, error) - - // Special handling for PDFs - if (extension === 'pdf') { - const fileBuffer = await readFile(filePath) - const fileSize = fileBuffer.length - - // Get page count using a simple regex pattern - let pageCount = 0 - const pdfContent = fileBuffer.toString('utf-8') - const pageMatches = pdfContent.match(/\/Type\s*\/Page\b/gi) - if (pageMatches) { - pageCount = pageMatches.length - } - - const content = createPdfFailureMessage( - pageCount, - fileSize, - originalPath || filePath, - (error as Error).message - ) - - return { - success: true, - output: { - content, - fileType: fileType || getMimeType(extension), - size: fileSize, - name: filename, - binary: false, - }, - filePath: originalPath || filePath, - } - } - - // For other file types, fall back to generic handling - return handleGenericFile(filePath, filename, extension, fileType) - } -} - -/** - * Handle generic file types with basic parsing - */ -async function handleGenericFile( - filePath: string, - filename: string, - extension: string, - fileType?: string -): Promise { - try { - // Read the file - const fileBuffer = await readFile(filePath) - const fileSize = fileBuffer.length - - // Determine if file should be treated as binary - const isBinary = binaryExtensionsList.includes(extension) - - // Parse content based on binary status - let content: string - if (isBinary) { - content = `[Binary ${extension.toUpperCase()} file - ${fileSize} bytes]` - } else { - content = await parseTextFile(fileBuffer) - } - - // Always return success: true for generic files (even unsupported ones) - return { - success: true, - output: { - content, - fileType: fileType || getMimeType(extension), - size: fileSize, - name: filename, - binary: isBinary, - }, - } - } catch (error) { - logger.error('Error handling generic file:', error) - return { - success: false, - error: `Failed to parse file: ${(error as Error).message}`, - filePath, - } - } -} - -/** - * Parse a text file buffer to string - */ -async function parseTextFile(fileBuffer: Buffer): Promise { - try { - return fileBuffer.toString('utf-8') - } catch (error) { - return `[Unable to parse file as text: ${(error as Error).message}]` - } -} - /** * Get MIME type from file extension */ diff --git a/apps/sim/app/api/files/presigned/route.test.ts b/apps/sim/app/api/files/presigned/route.test.ts new file mode 100644 index 00000000000..4b1e7711f95 --- /dev/null +++ b/apps/sim/app/api/files/presigned/route.test.ts @@ -0,0 +1,322 @@ +import { NextRequest } from 'next/server' +import { beforeEach, describe, expect, test, vi } from 'vitest' +import { OPTIONS, POST } from './route' + +vi.mock('@/lib/logs/console-logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + }), +})) + +vi.mock('@/lib/uploads', () => ({ + getStorageProvider: vi.fn(), + isUsingCloudStorage: vi.fn(), +})) + +vi.mock('@/lib/uploads/s3/s3-client', () => ({ + getS3Client: vi.fn(), + sanitizeFilenameForMetadata: vi.fn((filename) => filename), +})) + +vi.mock('@/lib/uploads/blob/blob-client', () => ({ + getBlobServiceClient: vi.fn(), + sanitizeFilenameForMetadata: vi.fn((filename) => filename), +})) + +vi.mock('@/lib/uploads/setup', () => ({ + S3_CONFIG: { + bucket: 'test-s3-bucket', + region: 'us-east-1', + }, + BLOB_CONFIG: { + accountName: 'testaccount', + accountKey: 'testkey', + containerName: 'test-container', + }, +})) + +vi.mock('@aws-sdk/client-s3', () => ({ + PutObjectCommand: vi.fn(), +})) + +vi.mock('@aws-sdk/s3-request-presigner', () => ({ + getSignedUrl: vi.fn(), +})) + +vi.mock('@azure/storage-blob', () => ({ + BlobSASPermissions: { + parse: vi.fn(() => 'w'), + }, + generateBlobSASQueryParameters: vi.fn(() => ({ + toString: () => 'sas-token-string', + })), + StorageSharedKeyCredential: vi.fn(), +})) + +vi.mock('uuid', () => ({ + v4: () => 'mock-uuid-1234', +})) + +describe('/api/files/presigned', () => { + let mockGetStorageProvider: any + let mockIsUsingCloudStorage: any + let mockGetS3Client: any + let mockGetBlobServiceClient: any + let mockGetSignedUrl: any + + beforeEach(async () => { + vi.clearAllMocks() + vi.useFakeTimers() + vi.setSystemTime(new Date('2024-01-01T00:00:00Z')) + + mockGetStorageProvider = vi.mocked((await import('@/lib/uploads')).getStorageProvider) + mockIsUsingCloudStorage = vi.mocked((await import('@/lib/uploads')).isUsingCloudStorage) + mockGetS3Client = vi.mocked((await import('@/lib/uploads/s3/s3-client')).getS3Client) + mockGetBlobServiceClient = vi.mocked( + (await import('@/lib/uploads/blob/blob-client')).getBlobServiceClient + ) + mockGetSignedUrl = vi.mocked((await import('@aws-sdk/s3-request-presigner')).getSignedUrl) + }) + + afterEach(() => { + vi.useRealTimers() + }) + + describe('POST', () => { + test('should return error when cloud storage is not enabled', async () => { + mockIsUsingCloudStorage.mockReturnValue(false) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(400) + expect(data.error).toBe('Direct uploads are only available when cloud storage is enabled') + expect(data.directUploadSupported).toBe(false) + }) + + test('should return error when fileName is missing', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(400) + expect(data.error).toBe('Missing fileName or contentType') + }) + + test('should return error when contentType is missing', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test.txt', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(400) + expect(data.error).toBe('Missing fileName or contentType') + }) + + test('should generate S3 presigned URL successfully', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + mockGetStorageProvider.mockReturnValue('s3') + mockGetS3Client.mockReturnValue({} as any) + mockGetSignedUrl.mockResolvedValue('https://s3.amazonaws.com/test-bucket/presigned-url') + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test document.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.presignedUrl).toBe('https://s3.amazonaws.com/test-bucket/presigned-url') + expect(data.fileInfo).toMatchObject({ + path: expect.stringContaining('/api/files/serve/s3/'), + key: expect.stringContaining('test-document.txt'), + name: 'test document.txt', + size: 1024, + type: 'text/plain', + }) + expect(data.directUploadSupported).toBe(true) + }) + + test('should generate Azure Blob presigned URL successfully', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + mockGetStorageProvider.mockReturnValue('blob') + + const mockBlockBlobClient = { + url: 'https://testaccount.blob.core.windows.net/test-container/1704067200000-mock-uuid-1234-test-document.txt', + } + const mockContainerClient = { + getBlockBlobClient: vi.fn(() => mockBlockBlobClient), + } + const mockBlobServiceClient = { + getContainerClient: vi.fn(() => mockContainerClient), + } + + mockGetBlobServiceClient.mockReturnValue(mockBlobServiceClient as any) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test document.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data.presignedUrl).toBe( + 'https://testaccount.blob.core.windows.net/test-container/1704067200000-mock-uuid-1234-test-document.txt?sas-token-string' + ) + expect(data.fileInfo).toMatchObject({ + path: expect.stringContaining('/api/files/serve/blob/'), + key: expect.stringContaining('test-document.txt'), + name: 'test document.txt', + size: 1024, + type: 'text/plain', + }) + expect(data.directUploadSupported).toBe(true) + expect(data.uploadHeaders).toMatchObject({ + 'x-ms-blob-type': 'BlockBlob', + 'x-ms-blob-content-type': 'text/plain', + 'x-ms-meta-originalname': expect.any(String), + 'x-ms-meta-uploadedat': '2024-01-01T00:00:00.000Z', + }) + + // Verify Azure-specific calls + expect(mockBlobServiceClient.getContainerClient).toHaveBeenCalledWith('test-container') + expect(mockContainerClient.getBlockBlobClient).toHaveBeenCalledWith( + expect.stringContaining('test-document.txt') + ) + }) + + test('should return error for unknown storage provider', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + mockGetStorageProvider.mockReturnValue('unknown' as any) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(400) + expect(data.error).toBe('Unknown storage provider') + expect(data.directUploadSupported).toBe(false) + }) + + test('should handle S3 errors gracefully', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + mockGetStorageProvider.mockReturnValue('s3') + mockGetS3Client.mockReturnValue({} as any) + mockGetSignedUrl.mockRejectedValue(new Error('S3 service unavailable')) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(500) + expect(data.error).toBe('Error') + expect(data.message).toBe('S3 service unavailable') + }) + + test('should handle Azure Blob errors gracefully', async () => { + mockIsUsingCloudStorage.mockReturnValue(true) + mockGetStorageProvider.mockReturnValue('blob') + mockGetBlobServiceClient.mockImplementation(() => { + throw new Error('Azure service unavailable') + }) + + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: JSON.stringify({ + fileName: 'test.txt', + contentType: 'text/plain', + fileSize: 1024, + }), + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(500) + expect(data.error).toBe('Error') + expect(data.message).toBe('Azure service unavailable') + }) + + test('should handle malformed JSON gracefully', async () => { + const request = new NextRequest('http://localhost:3000/api/files/presigned', { + method: 'POST', + body: 'invalid json', + }) + + const response = await POST(request) + const data = await response.json() + + expect(response.status).toBe(500) + expect(data.error).toBe('SyntaxError') + expect(data.message).toContain('Unexpected token') + }) + }) + + describe('OPTIONS', () => { + test('should handle CORS preflight requests', async () => { + const response = await OPTIONS() + + expect(response.status).toBe(204) + expect(response.headers.get('Access-Control-Allow-Methods')).toBe( + 'GET, POST, DELETE, OPTIONS' + ) + expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type') + }) + }) +}) diff --git a/apps/sim/app/api/files/presigned/route.ts b/apps/sim/app/api/files/presigned/route.ts index f439fb551bd..d5ab914d94a 100644 --- a/apps/sim/app/api/files/presigned/route.ts +++ b/apps/sim/app/api/files/presigned/route.ts @@ -3,8 +3,10 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import { type NextRequest, NextResponse } from 'next/server' import { v4 as uuidv4 } from 'uuid' import { createLogger } from '@/lib/logs/console-logger' -import { getS3Client, sanitizeFilenameForMetadata } from '@/lib/uploads/s3-client' -import { S3_CONFIG, USE_S3_STORAGE } from '@/lib/uploads/setup' +import { getStorageProvider, isUsingCloudStorage } from '@/lib/uploads' +import { getBlobServiceClient } from '@/lib/uploads/blob/blob-client' +import { getS3Client, sanitizeFilenameForMetadata } from '@/lib/uploads/s3/s3-client' +import { BLOB_CONFIG, S3_CONFIG } from '@/lib/uploads/setup' import { createErrorResponse, createOptionsResponse } from '../utils' const logger = createLogger('PresignedUploadAPI') @@ -25,40 +27,112 @@ export async function POST(request: NextRequest) { return NextResponse.json({ error: 'Missing fileName or contentType' }, { status: 400 }) } - // Only proceed if S3 storage is enabled - if (!USE_S3_STORAGE) { + // Only proceed if cloud storage is enabled + if (!isUsingCloudStorage()) { return NextResponse.json( { - error: 'Direct uploads are only available when S3 storage is enabled', + error: 'Direct uploads are only available when cloud storage is enabled', directUploadSupported: false, }, { status: 400 } ) } - // Create a unique key for the file - const safeFileName = fileName.replace(/\s+/g, '-') - const uniqueKey = `${Date.now()}-${uuidv4()}-${safeFileName}` - - // Sanitize the original filename for S3 metadata to prevent header errors - const sanitizedOriginalName = sanitizeFilenameForMetadata(fileName) - - // Create the S3 command - const command = new PutObjectCommand({ - Bucket: S3_CONFIG.bucket, - Key: uniqueKey, - ContentType: contentType, - Metadata: { - originalName: encodeURIComponent(fileName), - uploadedAt: new Date().toISOString(), + const storageProvider = getStorageProvider() + + if (storageProvider === 's3') { + return await handleS3PresignedUrl(fileName, contentType, fileSize) + } + if (storageProvider === 'blob') { + return await handleBlobPresignedUrl(fileName, contentType, fileSize) + } + + return NextResponse.json( + { + error: 'Unknown storage provider', + directUploadSupported: false, }, - }) + { status: 400 } + ) + } catch (error) { + logger.error('Error generating presigned URL:', error) + return createErrorResponse( + error instanceof Error ? error : new Error('Failed to generate presigned URL') + ) + } +} + +async function handleS3PresignedUrl(fileName: string, contentType: string, fileSize: number) { + // Create a unique key for the file + const safeFileName = fileName.replace(/\s+/g, '-') + const uniqueKey = `${Date.now()}-${uuidv4()}-${safeFileName}` + + // Sanitize the original filename for S3 metadata to prevent header errors + const sanitizedOriginalName = sanitizeFilenameForMetadata(fileName) + + // Create the S3 command + const command = new PutObjectCommand({ + Bucket: S3_CONFIG.bucket, + Key: uniqueKey, + ContentType: contentType, + Metadata: { + originalName: encodeURIComponent(fileName), + uploadedAt: new Date().toISOString(), + }, + }) + + // Generate the presigned URL + const presignedUrl = await getSignedUrl(getS3Client(), command, { expiresIn: 3600 }) - // Generate the presigned URL - const presignedUrl = await getSignedUrl(getS3Client(), command, { expiresIn: 3600 }) + // Create a path for API to serve the file + const servePath = `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}` + + logger.info(`Generated presigned URL for ${fileName} (${uniqueKey})`) + + return NextResponse.json({ + presignedUrl, + fileInfo: { + path: servePath, + key: uniqueKey, + name: fileName, + size: fileSize, + type: contentType, + }, + directUploadSupported: true, + }) +} + +async function handleBlobPresignedUrl(fileName: string, contentType: string, fileSize: number) { + // Create a unique key for the file + const safeFileName = fileName.replace(/\s+/g, '-') + const uniqueKey = `${Date.now()}-${uuidv4()}-${safeFileName}` + + try { + const blobServiceClient = getBlobServiceClient() + const containerClient = blobServiceClient.getContainerClient(BLOB_CONFIG.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(uniqueKey) + + // Generate SAS token for upload (write permission) + const { BlobSASPermissions, generateBlobSASQueryParameters, StorageSharedKeyCredential } = + await import('@azure/storage-blob') + + const sasOptions = { + containerName: BLOB_CONFIG.containerName, + blobName: uniqueKey, + permissions: BlobSASPermissions.parse('w'), // Write permission for upload + startsOn: new Date(), + expiresOn: new Date(Date.now() + 3600 * 1000), // 1 hour expiration + } + + const sasToken = generateBlobSASQueryParameters( + sasOptions, + new StorageSharedKeyCredential(BLOB_CONFIG.accountName, BLOB_CONFIG.accountKey || '') + ).toString() + + const presignedUrl = `${blockBlobClient.url}?${sasToken}` // Create a path for API to serve the file - const servePath = `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}` + const servePath = `/api/files/serve/blob/${encodeURIComponent(uniqueKey)}` logger.info(`Generated presigned URL for ${fileName} (${uniqueKey})`) @@ -72,12 +146,16 @@ export async function POST(request: NextRequest) { type: contentType, }, directUploadSupported: true, + uploadHeaders: { + 'x-ms-blob-type': 'BlockBlob', + 'x-ms-blob-content-type': contentType, + 'x-ms-meta-originalname': encodeURIComponent(fileName), + 'x-ms-meta-uploadedat': new Date().toISOString(), + }, }) } catch (error) { - logger.error('Error generating presigned URL:', error) - return createErrorResponse( - error instanceof Error ? error : new Error('Failed to generate presigned URL') - ) + logger.error('Error generating Blob presigned URL:', error) + throw error } } diff --git a/apps/sim/app/api/files/serve/[...path]/route.test.ts b/apps/sim/app/api/files/serve/[...path]/route.test.ts index 59b3a52716a..c0d09d1fe91 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.test.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.test.ts @@ -7,11 +7,12 @@ import { NextRequest } from 'next/server' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' describe('File Serve API Route', () => { - // Mock file system and S3 client modules + // Mock file system and storage modules const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content')) const mockExistsSync = vi.fn().mockReturnValue(true) - const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content')) + const mockDownloadFile = vi.fn().mockResolvedValue(Buffer.from('test cloud file content')) const mockGetPresignedUrl = vi.fn().mockResolvedValue('https://example-s3.com/presigned-url') + const mockIsUsingCloudStorage = vi.fn().mockReturnValue(false) const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true) beforeEach(() => { @@ -26,10 +27,11 @@ describe('File Serve API Route', () => { readFile: mockReadFile, })) - // Mock the S3 client - vi.doMock('@/lib/uploads/s3-client', () => ({ - downloadFromS3: mockDownloadFromS3, + // Mock the storage abstraction layer + vi.doMock('@/lib/uploads', () => ({ + downloadFile: mockDownloadFile, getPresignedUrl: mockGetPresignedUrl, + isUsingCloudStorage: mockIsUsingCloudStorage, })) // Mock the logger @@ -42,10 +44,11 @@ describe('File Serve API Route', () => { }), })) - // Configure upload directory and S3 mode with all required exports + // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, + USE_BLOB_STORAGE: false, ensureUploadsDirectory: mockEnsureUploadsDirectory, S3_CONFIG: { bucket: 'test-bucket', @@ -53,6 +56,38 @@ describe('File Serve API Route', () => { }, })) + // Mock the file utils with all exports including FileNotFoundError + vi.doMock('@/app/api/files/utils', () => ({ + FileNotFoundError: class FileNotFoundError extends Error { + constructor(message: string) { + super(message) + this.name = 'FileNotFoundError' + } + }, + createFileResponse: vi.fn().mockImplementation((file) => { + return new Response(file.buffer, { + status: 200, + headers: { + 'Content-Type': file.contentType, + 'Content-Disposition': `inline; filename="${file.filename}"`, + }, + }) + }), + createErrorResponse: vi.fn().mockImplementation((error) => { + return new Response(JSON.stringify({ error: error.name, message: error.message }), { + status: error.name === 'FileNotFoundError' ? 404 : 500, + headers: { 'Content-Type': 'application/json' }, + }) + }), + getContentType: vi.fn().mockReturnValue('text/plain'), + isS3Path: vi.fn().mockReturnValue(false), + isBlobPath: vi.fn().mockReturnValue(false), + extractS3Key: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractBlobKey: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractFilename: vi.fn().mockImplementation((path) => path.split('/').pop()), + findLocalFile: vi.fn().mockReturnValue('/test/uploads/test-file.txt'), + })) + // Skip setup.server.ts side effects vi.doMock('@/lib/uploads/setup.server', () => ({})) }) @@ -78,18 +113,46 @@ describe('File Serve API Route', () => { expect(response.status).toBe(200) expect(response.headers.get('Content-Type')).toBe('text/plain') expect(response.headers.get('Content-Disposition')).toBe('inline; filename="test-file.txt"') - expect(response.headers.get('Cache-Control')).toBe('public, max-age=31536000') // Verify file was read from correct path expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt') - - // Verify response content - const buffer = await response.arrayBuffer() - const content = Buffer.from(buffer).toString() - expect(content).toBe('test file content') }) it('should handle nested paths correctly', async () => { + // Mock findLocalFile to return the nested path + const mockFindLocalFile = vi.fn().mockReturnValue('/test/uploads/nested/path/file.txt') + + vi.doMock('@/app/api/files/utils', () => ({ + FileNotFoundError: class FileNotFoundError extends Error { + constructor(message: string) { + super(message) + this.name = 'FileNotFoundError' + } + }, + createFileResponse: vi.fn().mockImplementation((file) => { + return new Response(file.buffer, { + status: 200, + headers: { + 'Content-Type': file.contentType, + 'Content-Disposition': `inline; filename="${file.filename}"`, + }, + }) + }), + createErrorResponse: vi.fn().mockImplementation((error) => { + return new Response(JSON.stringify({ error: error.name, message: error.message }), { + status: error.name === 'FileNotFoundError' ? 404 : 500, + headers: { 'Content-Type': 'application/json' }, + }) + }), + getContentType: vi.fn().mockReturnValue('text/plain'), + isS3Path: vi.fn().mockReturnValue(false), + isBlobPath: vi.fn().mockReturnValue(false), + extractS3Key: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractBlobKey: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractFilename: vi.fn().mockImplementation((path) => path.split('/').pop()), + findLocalFile: mockFindLocalFile, + })) + // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/nested/path/file.txt') @@ -100,49 +163,58 @@ describe('File Serve API Route', () => { const { GET } = await import('./route') // Call the handler - const _response = await GET(req, { params: Promise.resolve(params) }) + const response = await GET(req, { params: Promise.resolve(params) }) + + // Verify response + expect(response.status).toBe(200) // Verify file was read with correct path expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/nested/path/file.txt') }) - it('should serve S3 file with presigned URL redirect', async () => { - // Configure S3 storage mode + it('should serve cloud file by downloading and proxying', async () => { + // Configure cloud storage mode vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, + USE_BLOB_STORAGE: false, })) - // Create mock request - const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-file.pdf') - - // Create params similar to what Next.js would provide - const params = { path: ['s3', '1234567890-file.pdf'] } - - // Import the handler after mocks are set up - const { GET } = await import('./route') - - // Call the handler - const response = await GET(req, { params: Promise.resolve(params) }) + // Mock cloud storage mode + mockIsUsingCloudStorage.mockReturnValue(true) - // Verify redirect to presigned URL - expect(response.status).toBe(307) // Temporary redirect - expect(response.headers.get('Location')).toBe('https://example-s3.com/presigned-url') - - // Verify presigned URL was generated for correct S3 key - expect(mockGetPresignedUrl).toHaveBeenCalledWith('1234567890-file.pdf') - }) - - it('should handle S3 file download fallback if presigned URL fails', async () => { - // Configure S3 storage mode - vi.doMock('@/lib/uploads/setup', () => ({ - UPLOAD_DIR: '/test/uploads', - USE_S3_STORAGE: true, + // Mock content type detection for PNG + vi.doMock('@/app/api/files/utils', () => ({ + FileNotFoundError: class FileNotFoundError extends Error { + constructor(message: string) { + super(message) + this.name = 'FileNotFoundError' + } + }, + createFileResponse: vi.fn().mockImplementation((file) => { + return new Response(file.buffer, { + status: 200, + headers: { + 'Content-Type': file.contentType, + 'Content-Disposition': `inline; filename="${file.filename}"`, + }, + }) + }), + createErrorResponse: vi.fn().mockImplementation((error) => { + return new Response(JSON.stringify({ error: error.name, message: error.message }), { + status: error.name === 'FileNotFoundError' ? 404 : 500, + headers: { 'Content-Type': 'application/json' }, + }) + }), + getContentType: vi.fn().mockReturnValue('image/png'), + isS3Path: vi.fn().mockReturnValue(false), + isBlobPath: vi.fn().mockReturnValue(false), + extractS3Key: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractBlobKey: vi.fn().mockImplementation((path) => path.split('/').pop()), + extractFilename: vi.fn().mockImplementation((path) => path.split('/').pop()), + findLocalFile: vi.fn().mockReturnValue('/test/uploads/test-file.txt'), })) - // Mock presigned URL to fail - mockGetPresignedUrl.mockRejectedValueOnce(new Error('Presigned URL failed')) - // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-image.png') @@ -155,15 +227,70 @@ describe('File Serve API Route', () => { // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify response falls back to downloading and proxying the file + // Verify response downloads and proxies the file expect(response.status).toBe(200) expect(response.headers.get('Content-Type')).toBe('image/png') - expect(mockDownloadFromS3).toHaveBeenCalledWith('1234567890-image.png') + expect(mockDownloadFile).toHaveBeenCalledWith('1234567890-image.png') }) it('should return 404 when file not found', async () => { - // Mock file not existing - mockExistsSync.mockReturnValue(false) + // Mock readFile to throw an error for this specific test + const mockReadFileError = vi + .fn() + .mockRejectedValue(new Error('ENOENT: no such file or directory')) + + // Reset modules for this specific test + vi.resetModules() + + vi.doMock('fs', () => ({ + existsSync: vi.fn().mockReturnValue(false), // File doesn't exist + })) + + vi.doMock('fs/promises', () => ({ + readFile: mockReadFileError, // This will throw an error + })) + + vi.doMock('@/lib/uploads', () => ({ + downloadFile: mockDownloadFile, + getPresignedUrl: mockGetPresignedUrl, + isUsingCloudStorage: vi.fn().mockReturnValue(false), // Use local storage + })) + + vi.doMock('@/lib/logs/console-logger', () => ({ + createLogger: vi.fn().mockReturnValue({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + debug: vi.fn(), + }), + })) + + vi.doMock('@/lib/uploads/setup', () => ({ + UPLOAD_DIR: '/test/uploads', + USE_S3_STORAGE: false, + USE_BLOB_STORAGE: false, + })) + + vi.doMock('@/lib/uploads/setup.server', () => ({})) + + // Mock utils with findLocalFile returning null to trigger FileNotFoundError + vi.doMock('@/app/api/files/utils', () => ({ + FileNotFoundError: class FileNotFoundError extends Error { + constructor(message: string) { + super(message) + this.name = 'FileNotFoundError' + } + }, + createFileResponse: vi.fn(), + createErrorResponse: vi.fn(), + getContentType: vi.fn().mockReturnValue('text/plain'), + isS3Path: vi.fn().mockReturnValue(false), + isBlobPath: vi.fn().mockReturnValue(false), + extractS3Key: vi.fn(), + extractBlobKey: vi.fn(), + extractFilename: vi.fn(), + findLocalFile: vi.fn().mockReturnValue(null), // This should trigger FileNotFoundError + })) // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/nonexistent.txt') @@ -180,11 +307,8 @@ describe('File Serve API Route', () => { // Verify 404 response expect(response.status).toBe(404) - const data = await response.json() - // Updated to match actual error format - expect(data).toHaveProperty('error', 'FileNotFoundError') - expect(data).toHaveProperty('message') - expect(data.message).toContain('File not found') + const text = await response.text() + expect(text).toBe('File not found') }) // Instead of testing all content types in one test, let's separate them @@ -211,9 +335,10 @@ describe('File Serve API Route', () => { readFile: mockReadFile, })) - vi.doMock('@/lib/uploads/s3-client', () => ({ - downloadFromS3: mockDownloadFromS3, + vi.doMock('@/lib/uploads', () => ({ + downloadFile: mockDownloadFile, getPresignedUrl: mockGetPresignedUrl, + isUsingCloudStorage: mockIsUsingCloudStorage, })) vi.doMock('@/lib/logs/console-logger', () => ({ @@ -228,6 +353,7 @@ describe('File Serve API Route', () => { vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, + USE_BLOB_STORAGE: false, ensureUploadsDirectory: mockEnsureUploadsDirectory, S3_CONFIG: { bucket: 'test-bucket', diff --git a/apps/sim/app/api/files/serve/[...path]/route.ts b/apps/sim/app/api/files/serve/[...path]/route.ts index e325da6db53..8b74713a98d 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.ts @@ -1,8 +1,7 @@ import { readFile } from 'fs/promises' import { type NextRequest, NextResponse } from 'next/server' import { createLogger } from '@/lib/logs/console-logger' -import { downloadFromS3, getPresignedUrl } from '@/lib/uploads/s3-client' -import { USE_S3_STORAGE } from '@/lib/uploads/setup' +import { downloadFile, isUsingCloudStorage } from '@/lib/uploads' import '@/lib/uploads/setup.server' import { @@ -25,81 +24,76 @@ export async function GET( { params }: { params: Promise<{ path: string[] }> } ) { try { - // Extract params const { path } = await params - // Join the path segments to get the filename or S3 key - const pathString = path.join('/') - logger.info(`Serving file: ${pathString}`) + if (!path || path.length === 0) { + throw new FileNotFoundError('No file path provided') + } - // Check if this is an S3 file (path starts with 's3/') - const isS3Path = path[0] === 's3' + logger.info('File serve request:', { path }) - try { - // Use S3 handler if in production or path explicitly specifies S3 - if (USE_S3_STORAGE || isS3Path) { - return await handleS3File(path, isS3Path, pathString) - } - - // Use local handler for local files - return await handleLocalFile(path) - } catch (error) { - logger.error('Error serving file:', error) - return createErrorResponse(error as Error) + // Join the path segments to get the filename or cloud key + const fullPath = path.join('/') + + // Check if this is a cloud file (path starts with 's3/' or 'blob/') + const isS3Path = path[0] === 's3' + const isBlobPath = path[0] === 'blob' + const isCloudPath = isS3Path || isBlobPath + + // Use cloud handler if in production, path explicitly specifies cloud storage, or we're using cloud storage + if (isUsingCloudStorage() || isCloudPath) { + // Extract the actual key (remove 's3/' or 'blob/' prefix if present) + const cloudKey = isCloudPath ? path.slice(1).join('/') : fullPath + return await handleCloudProxy(cloudKey) } + + // Use local handler for local files + return await handleLocalFile(fullPath) } catch (error) { logger.error('Error serving file:', error) - return createErrorResponse(error as Error) - } -} -/** - * Handle S3 file serving - */ -async function handleS3File( - path: string[], - isS3Path: boolean, - pathString: string -): Promise { - // If path starts with s3/, remove that prefix to get the actual key - const s3Key = isS3Path ? decodeURIComponent(path.slice(1).join('/')) : pathString - logger.info(`Serving file from S3: ${s3Key}`) + if (error instanceof FileNotFoundError) { + return new NextResponse('File not found', { status: 404 }) + } - try { - // First try direct access via presigned URL (most efficient) - return await handleS3PresignedUrl(s3Key) - } catch (_error) { - logger.info('Falling back to proxy method for S3 file') - // Fall back to proxy method if presigned URL fails - return await handleS3Proxy(s3Key) + return createErrorResponse(error instanceof Error ? error : new Error('Failed to serve file')) } } /** - * Generate a presigned URL and redirect to it + * Handle local file serving */ -async function handleS3PresignedUrl(s3Key: string): Promise { +async function handleLocalFile(filename: string): Promise { try { - // Generate a presigned URL for direct S3 access - const presignedUrl = await getPresignedUrl(s3Key) + const filePath = findLocalFile(filename) + + if (!filePath) { + throw new FileNotFoundError(`File not found: ${filename}`) + } - // Redirect to the presigned URL for direct S3 access - return NextResponse.redirect(presignedUrl) + const fileBuffer = await readFile(filePath) + const contentType = getContentType(filename) + + return createFileResponse({ + buffer: fileBuffer, + contentType, + filename, + }) } catch (error) { - logger.error('Error generating presigned URL:', error) + logger.error('Error reading local file:', error) throw error } } /** - * Proxy S3 file through our server + * Proxy cloud file through our server */ -async function handleS3Proxy(s3Key: string): Promise { +async function handleCloudProxy(cloudKey: string): Promise { try { - const fileBuffer = await downloadFromS3(s3Key) + const fileBuffer = await downloadFile(cloudKey) // Extract the original filename from the key (last part after last /) - const originalFilename = s3Key.split('/').pop() || 'download' + const originalFilename = cloudKey.split('/').pop() || 'download' const contentType = getContentType(originalFilename) return createFileResponse({ @@ -108,35 +102,7 @@ async function handleS3Proxy(s3Key: string): Promise { filename: originalFilename, }) } catch (error) { - logger.error('Error downloading from S3:', error) + logger.error('Error downloading from cloud storage:', error) throw error } } - -/** - * Handle local file serving - */ -async function handleLocalFile(path: string[]): Promise { - // Join as a path for findLocalFile - const pathString = path.join('/') - const filePath = findLocalFile(pathString) - - // Handle file not found - if (!filePath) { - logger.error(`File not found in any checked paths for: ${pathString}`) - throw new FileNotFoundError(`File not found: ${pathString}`) - } - - // Read the file - const fileBuffer = await readFile(filePath) - - // Get filename for content type detection and response - const filename = path[path.length - 1] - const contentType = getContentType(filename) - - return createFileResponse({ - buffer: fileBuffer, - contentType, - filename, - }) -} diff --git a/apps/sim/app/api/files/upload/route.test.ts b/apps/sim/app/api/files/upload/route.test.ts index a8e9807c2bb..26a090f3c1e 100644 --- a/apps/sim/app/api/files/upload/route.test.ts +++ b/apps/sim/app/api/files/upload/route.test.ts @@ -7,17 +7,16 @@ import { NextRequest } from 'next/server' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' describe('File Upload API Route', () => { - // Mock file system and S3 client modules + // Mock file system and storage modules const mockWriteFile = vi.fn().mockResolvedValue(undefined) - const mockUploadToS3 = vi.fn().mockImplementation((buffer, fileName) => { - return Promise.resolve({ - path: `/api/files/serve/s3/${Date.now()}-${fileName}`, - key: `${Date.now()}-${fileName}`, - name: fileName, - size: buffer.length, - type: 'text/plain', - }) + const mockUploadFile = vi.fn().mockResolvedValue({ + path: '/api/files/serve/s3/test-key', + key: 'test-key', + name: 'test.txt', + size: 100, + type: 'text/plain', }) + const mockIsUsingCloudStorage = vi.fn().mockReturnValue(false) const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true) // Mock form data @@ -46,9 +45,10 @@ describe('File Upload API Route', () => { writeFile: mockWriteFile, })) - // Mock the S3 client - vi.doMock('@/lib/uploads/s3-client', () => ({ - uploadToS3: mockUploadToS3, + // Mock the storage abstraction layer + vi.doMock('@/lib/uploads', () => ({ + uploadFile: mockUploadFile, + isUsingCloudStorage: mockIsUsingCloudStorage, })) // Mock the logger @@ -66,10 +66,11 @@ describe('File Upload API Route', () => { v4: vi.fn().mockReturnValue('mock-uuid'), })) - // Configure upload directory and S3 mode with all required exports + // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, + USE_BLOB_STORAGE: false, ensureUploadsDirectory: mockEnsureUploadsDirectory, S3_CONFIG: { bucket: 'test-bucket', @@ -119,10 +120,14 @@ describe('File Upload API Route', () => { vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, + USE_BLOB_STORAGE: false, })) + // Mock cloud storage mode + mockIsUsingCloudStorage.mockReturnValue(true) + // Create a mock request with file - const mockFile = createMockFile('document.pdf', 'application/pdf') + const mockFile = createMockFile() const formData = createMockFormData([mockFile]) // Create mock request object @@ -142,28 +147,41 @@ describe('File Upload API Route', () => { expect(response.status).toBe(200) expect(data).toHaveProperty('path') expect(data.path).toContain('/api/files/serve/s3/') - expect(data).toHaveProperty('key') - expect(data).toHaveProperty('name', 'document.pdf') + expect(data).toHaveProperty('name', 'test.txt') + expect(data).toHaveProperty('size') + expect(data).toHaveProperty('type', 'text/plain') - // Verify uploadToS3 was called with correct parameters - expect(mockUploadToS3).toHaveBeenCalledWith( + // Verify uploadFile was called with correct parameters + expect(mockUploadFile).toHaveBeenCalledWith( expect.any(Buffer), - 'document.pdf', - 'application/pdf', + 'test.txt', + 'text/plain', expect.any(Number) ) - - // Verify local write was NOT called - expect(mockWriteFile).not.toHaveBeenCalled() }) it('should handle multiple file uploads', async () => { // Create multiple mock files - const mockFiles = [ - createMockFile('file1.txt', 'text/plain'), - createMockFile('file2.jpg', 'image/jpeg'), - ] - const formData = createMockFormData(mockFiles) + const mockFile1 = createMockFile('file1.txt', 'text/plain') + const mockFile2 = createMockFile('file2.txt', 'text/plain') + const formData = createMockFormData([mockFile1, mockFile2]) + + // Mock multiple upload responses + mockUploadFile + .mockResolvedValueOnce({ + path: '/api/files/serve/test1.txt', + key: 'test1.txt', + name: 'file1.txt', + size: 100, + type: 'text/plain', + }) + .mockResolvedValueOnce({ + path: '/api/files/serve/test2.txt', + key: 'test2.txt', + name: 'file2.txt', + size: 100, + type: 'text/plain', + }) // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { @@ -180,13 +198,13 @@ describe('File Upload API Route', () => { // Verify response has multiple results expect(response.status).toBe(200) - expect(Array.isArray(data)).toBe(true) - expect(data).toHaveLength(2) - expect(data[0]).toHaveProperty('name', 'file1.txt') - expect(data[1]).toHaveProperty('name', 'file2.jpg') + expect(data).toHaveProperty('files') + expect(Array.isArray(data.files)).toBe(true) + expect(data.files).toHaveLength(2) - // Verify files were written - expect(mockWriteFile).toHaveBeenCalledTimes(2) + // Verify each file was uploaded + expect(data.files[0]).toHaveProperty('name', 'file1.txt') + expect(data.files[1]).toHaveProperty('name', 'file2.txt') }) it('should handle missing files', async () => { @@ -217,10 +235,14 @@ describe('File Upload API Route', () => { vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, + USE_BLOB_STORAGE: false, })) - // Mock S3 upload failure - mockUploadToS3.mockRejectedValueOnce(new Error('S3 upload failed')) + // Mock cloud storage mode + mockIsUsingCloudStorage.mockReturnValue(true) + + // Mock upload failure + mockUploadFile.mockRejectedValueOnce(new Error('Upload failed')) // Create a mock request with file const mockFile = createMockFile() @@ -242,7 +264,7 @@ describe('File Upload API Route', () => { // Verify error response expect(response.status).toBe(500) expect(data).toHaveProperty('error', 'Error') - expect(data).toHaveProperty('message', 'S3 upload failed') + expect(data).toHaveProperty('message', 'Upload failed') }) it('should handle CORS preflight requests', async () => { diff --git a/apps/sim/app/api/files/upload/route.ts b/apps/sim/app/api/files/upload/route.ts index 7e427026d98..73ba39e5a23 100644 --- a/apps/sim/app/api/files/upload/route.ts +++ b/apps/sim/app/api/files/upload/route.ts @@ -3,8 +3,8 @@ import { join } from 'path' import { type NextRequest, NextResponse } from 'next/server' import { v4 as uuidv4 } from 'uuid' import { createLogger } from '@/lib/logs/console-logger' -import { uploadToS3 } from '@/lib/uploads/s3-client' -import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup' +import { isUsingCloudStorage, uploadFile } from '@/lib/uploads' +import { UPLOAD_DIR } from '@/lib/uploads/setup' // Import to ensure the uploads directory is created import '@/lib/uploads/setup.server' @@ -26,7 +26,8 @@ export async function POST(request: NextRequest) { } // Log storage mode - logger.info(`Using storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'} for file upload`) + const usingCloudStorage = isUsingCloudStorage() + logger.info(`Using storage mode: ${usingCloudStorage ? 'Cloud' : 'Local'} for file upload`) const uploadResults = [] @@ -36,15 +37,15 @@ export async function POST(request: NextRequest) { const bytes = await file.arrayBuffer() const buffer = Buffer.from(bytes) - if (USE_S3_STORAGE) { - // Upload to S3 in production + if (usingCloudStorage) { + // Upload to cloud storage (S3 or Azure Blob) try { - logger.info(`Uploading file to S3: ${originalName}`) - const result = await uploadToS3(buffer, originalName, file.type, file.size) - logger.info(`Successfully uploaded to S3: ${result.key}`) + logger.info(`Uploading file to cloud storage: ${originalName}`) + const result = await uploadFile(buffer, originalName, file.type, file.size) + logger.info(`Successfully uploaded to cloud storage: ${result.key}`) uploadResults.push(result) } catch (error) { - logger.error('Error uploading to S3:', error) + logger.error('Error uploading to cloud storage:', error) throw error } } else { @@ -67,10 +68,13 @@ export async function POST(request: NextRequest) { } // Return all file information - return NextResponse.json(files.length === 1 ? uploadResults[0] : uploadResults) + if (uploadResults.length === 1) { + return NextResponse.json(uploadResults[0]) + } + return NextResponse.json({ files: uploadResults }) } catch (error) { - logger.error('Error uploading files:', error) - return createErrorResponse(error instanceof Error ? error : new Error('Failed to upload files')) + logger.error('Error in file upload:', error) + return createErrorResponse(error instanceof Error ? error : new Error('File upload failed')) } } diff --git a/apps/sim/app/api/files/utils.ts b/apps/sim/app/api/files/utils.ts index 24d9f7080d3..3e8bca7603f 100644 --- a/apps/sim/app/api/files/utils.ts +++ b/apps/sim/app/api/files/utils.ts @@ -110,6 +110,13 @@ export function isS3Path(path: string): boolean { return path.includes('/api/files/serve/s3/') } +/** + * Check if a path is a Blob path + */ +export function isBlobPath(path: string): boolean { + return path.includes('/api/files/serve/blob/') +} + /** * Extract S3 key from a path */ @@ -120,6 +127,16 @@ export function extractS3Key(path: string): string { return path } +/** + * Extract Blob key from a path + */ +export function extractBlobKey(path: string): string { + if (isBlobPath(path)) { + return decodeURIComponent(path.split('/api/files/serve/blob/')[1]) + } + return path +} + /** * Extract filename from a serve path */ diff --git a/apps/sim/app/api/knowledge/utils.test.ts b/apps/sim/app/api/knowledge/utils.test.ts index e0b172f0ab4..5239219032a 100644 --- a/apps/sim/app/api/knowledge/utils.test.ts +++ b/apps/sim/app/api/knowledge/utils.test.ts @@ -103,31 +103,29 @@ vi.mock('@/lib/documents/utils', () => ({ })) vi.mock('@/lib/documents/document-processor', () => ({ - processDocuments: vi.fn().mockResolvedValue([ - { - chunks: [ - { - text: 'alpha', - tokenCount: 1, - metadata: { startIndex: 0, endIndex: 4 }, - }, - { - text: 'beta', - tokenCount: 1, - metadata: { startIndex: 5, endIndex: 8 }, - }, - ], - metadata: { - filename: 'dummy', - fileSize: 10, - mimeType: 'text/plain', - characterCount: 9, - tokenCount: 3, - chunkCount: 2, - processingMethod: 'file-parser', + processDocument: vi.fn().mockResolvedValue({ + chunks: [ + { + text: 'alpha', + tokenCount: 1, + metadata: { startIndex: 0, endIndex: 4 }, }, + { + text: 'beta', + tokenCount: 1, + metadata: { startIndex: 5, endIndex: 8 }, + }, + ], + metadata: { + filename: 'dummy', + fileSize: 10, + mimeType: 'text/plain', + characterCount: 9, + tokenCount: 3, + chunkCount: 2, + processingMethod: 'file-parser', }, - ]), + }), })) vi.stubGlobal( diff --git a/apps/sim/app/api/knowledge/utils.ts b/apps/sim/app/api/knowledge/utils.ts index 6cf756ac28a..56bc036257b 100644 --- a/apps/sim/app/api/knowledge/utils.ts +++ b/apps/sim/app/api/knowledge/utils.ts @@ -1,6 +1,6 @@ import crypto from 'crypto' import { and, eq, isNull, sql } from 'drizzle-orm' -import { processDocuments } from '@/lib/documents/document-processor' +import { processDocument } from '@/lib/documents/document-processor' import { retryWithExponentialBackoff } from '@/lib/documents/utils' import { env } from '@/lib/env' import { createLogger } from '@/lib/logs/console-logger' @@ -395,26 +395,14 @@ export async function processDocumentAsync( logger.info(`[${documentId}] Status updated to 'processing', starting document processor`) - const processedDocuments = await processDocuments( - [ - { - fileUrl: docData.fileUrl, - filename: docData.filename, - mimeType: docData.mimeType, - fileSize: docData.fileSize, - }, - ], - { - knowledgeBaseId, - ...processingOptions, - } + const processed = await processDocument( + docData.fileUrl, + docData.filename, + docData.mimeType, + processingOptions.chunkSize || 1000, + 200 // chunkOverlap ) - if (processedDocuments.length === 0) { - throw new Error('No document was processed') - } - - const processed = processedDocuments[0] const now = new Date() logger.info( diff --git a/apps/sim/app/api/logs/cleanup/route.ts b/apps/sim/app/api/logs/cleanup/route.ts index 61e6d6bff36..a79e4ddc349 100644 --- a/apps/sim/app/api/logs/cleanup/route.ts +++ b/apps/sim/app/api/logs/cleanup/route.ts @@ -3,7 +3,7 @@ import { and, eq, inArray, lt, sql } from 'drizzle-orm' import { NextResponse } from 'next/server' import { env } from '@/lib/env' import { createLogger } from '@/lib/logs/console-logger' -import { getS3Client } from '@/lib/uploads/s3-client' +import { getS3Client } from '@/lib/uploads/s3/s3-client' import { db } from '@/db' import { subscription, user, workflow, workflowLogs } from '@/db/schema' diff --git a/apps/sim/lib/documents/document-processor.ts b/apps/sim/lib/documents/document-processor.ts index ee7b243ec34..7a94bf1a5e9 100644 --- a/apps/sim/lib/documents/document-processor.ts +++ b/apps/sim/lib/documents/document-processor.ts @@ -1,16 +1,40 @@ import { type Chunk, TextChunker } from '@/lib/documents/chunker' import { env } from '@/lib/env' -import { isSupportedFileType, parseBuffer, parseFile } from '@/lib/file-parsers' +import { parseBuffer, parseFile } from '@/lib/file-parsers' import { createLogger } from '@/lib/logs/console-logger' -import { type CustomS3Config, getPresignedUrlWithConfig, uploadToS3 } from '@/lib/uploads/s3-client' +import { getPresignedUrlWithConfig, getStorageProvider, uploadFile } from '@/lib/uploads' +import { BLOB_KB_CONFIG, S3_KB_CONFIG } from '@/lib/uploads/setup' import { mistralParserTool } from '@/tools/mistral/parser' import { retryWithExponentialBackoff } from './utils' const logger = createLogger('DocumentProcessor') -const S3_KB_CONFIG: CustomS3Config = { - bucket: env.S3_KB_BUCKET_NAME || '', - region: env.AWS_REGION || '', +type S3Config = { + bucket: string + region: string +} + +type BlobConfig = { + containerName: string + accountName: string + accountKey?: string + connectionString?: string +} + +function getKBConfig(): S3Config | BlobConfig { + const provider = getStorageProvider() + if (provider === 'blob') { + return { + containerName: BLOB_KB_CONFIG.containerName, + accountName: BLOB_KB_CONFIG.accountName, + accountKey: BLOB_KB_CONFIG.accountKey, + connectionString: BLOB_KB_CONFIG.connectionString, + } + } + return { + bucket: S3_KB_CONFIG.bucket, + region: S3_KB_CONFIG.region, + } } class APIError extends Error { @@ -23,55 +47,69 @@ class APIError extends Error { } } -export interface ProcessedDocument { - content: string +/** + * Process a document by parsing it and chunking the content + */ +export async function processDocument( + fileUrl: string, + filename: string, + mimeType: string, + chunkSize = 1000, + chunkOverlap = 200 +): Promise<{ chunks: Chunk[] metadata: { filename: string fileSize: number mimeType: string - characterCount: number - tokenCount: number chunkCount: number + tokenCount: number + characterCount: number processingMethod: 'file-parser' | 'mistral-ocr' - s3Url?: string + cloudUrl?: string } -} +}> { + logger.info(`Processing document: ${filename}`) -export interface DocumentProcessingOptions { - knowledgeBaseId: string - chunkSize?: number - minCharactersPerChunk?: number - recipe?: string - lang?: string -} + try { + // Parse the document + const { content, processingMethod, cloudUrl } = await parseDocument(fileUrl, filename, mimeType) -/** - * Determines the appropriate processing method for a file based on its type - */ -function determineProcessingMethod( - mimeType: string, - filename: string -): 'file-parser' | 'mistral-ocr' { - // Use Mistral OCR for PDFs since it provides better results - if (mimeType === 'application/pdf' || filename.toLowerCase().endsWith('.pdf')) { - return 'mistral-ocr' - } + // Create chunker and process content + const chunker = new TextChunker({ + chunkSize, + overlap: chunkOverlap, + }) - // Extract file extension for supported file type check - const extension = filename.split('.').pop()?.toLowerCase() + const chunks = await chunker.chunk(content) - // Use file parser for supported non-PDF types - if (extension && isSupportedFileType(extension)) { - return 'file-parser' - } + // Calculate metadata + const characterCount = content.length + const tokenCount = chunks.reduce((sum: number, chunk: Chunk) => sum + chunk.tokenCount, 0) + + logger.info(`Document processed successfully: ${chunks.length} chunks, ${tokenCount} tokens`) - // For unsupported types, try file parser first (it might handle text files) - return 'file-parser' + return { + chunks, + metadata: { + filename, + fileSize: content.length, // Using content length as file size approximation + mimeType, + chunkCount: chunks.length, + tokenCount, + characterCount, + processingMethod, + cloudUrl, + }, + } + } catch (error) { + logger.error(`Error processing document ${filename}:`, error) + throw error + } } /** - * Parse a document using the appropriate method (file parser or Mistral OCR) + * Parse a document from a URL or file path */ async function parseDocument( fileUrl: string, @@ -80,309 +118,228 @@ async function parseDocument( ): Promise<{ content: string processingMethod: 'file-parser' | 'mistral-ocr' - s3Url?: string + cloudUrl?: string }> { - const processingMethod = determineProcessingMethod(mimeType, filename) - - logger.info(`Processing document "${filename}" using ${processingMethod}`) - - try { - if (processingMethod === 'mistral-ocr') { - // Use Mistral OCR for PDFs - but first ensure we have an HTTPS URL - const mistralApiKey = env.MISTRAL_API_KEY - if (!mistralApiKey) { - throw new Error('MISTRAL_API_KEY not configured') - } - - let httpsUrl = fileUrl - let s3Url: string | undefined - - // If the URL is not HTTPS, we need to upload to S3 first - if (!fileUrl.startsWith('https://')) { - logger.info(`Uploading "${filename}" to S3 for Mistral OCR access`) - - // Download the file content - const response = await fetch(fileUrl) - if (!response.ok) { - throw new Error(`Failed to download file for S3 upload: ${response.statusText}`) - } - - const buffer = Buffer.from(await response.arrayBuffer()) - - // Always upload to S3 for Mistral OCR, even in development - if (!S3_KB_CONFIG.bucket || !S3_KB_CONFIG.region) { - throw new Error( - 'S3 configuration missing: AWS_REGION and S3_KB_BUCKET_NAME environment variables are required for PDF processing with Mistral OCR' - ) - } - - try { - // Upload to S3 - const s3Result = await uploadToS3(buffer, filename, mimeType, S3_KB_CONFIG) - // Generate presigned URL with 15 minutes expiration - httpsUrl = await getPresignedUrlWithConfig(s3Result.key, S3_KB_CONFIG, 900) - s3Url = httpsUrl - logger.info(`Successfully uploaded to S3 for Mistral OCR: ${s3Result.key}`) - } catch (uploadError) { - logger.error('Failed to upload to S3 for Mistral OCR:', uploadError) - throw new Error( - `S3 upload failed: ${uploadError instanceof Error ? uploadError.message : 'Unknown error'}. S3 upload is required for PDF processing with Mistral OCR.` - ) - } - } + // Check if we should use Mistral OCR for PDFs + const shouldUseMistralOCR = mimeType === 'application/pdf' && env.MISTRAL_API_KEY - if (!mistralParserTool.request?.body) { - throw new Error('Mistral parser tool not properly configured') - } + if (shouldUseMistralOCR) { + logger.info(`Using Mistral OCR for PDF: ${filename}`) + return await parseWithMistralOCR(fileUrl, filename, mimeType) + } - const requestBody = mistralParserTool.request.body({ - filePath: httpsUrl, - apiKey: mistralApiKey, - resultType: 'text', - }) - - // Make the actual API call to Mistral with retry logic - const response = await retryWithExponentialBackoff( - async () => { - logger.info(`Calling Mistral OCR API for "${filename}"`) - - const response = await fetch('https://api.mistral.ai/v1/ocr', { - method: mistralParserTool.request.method, - headers: mistralParserTool.request.headers({ - filePath: httpsUrl, - apiKey: mistralApiKey, - resultType: 'text', - }), - body: JSON.stringify(requestBody), - }) - - if (!response.ok) { - const errorText = await response.text() - const error = new APIError( - `Mistral API error: ${response.status} ${response.statusText} - ${errorText}`, - response.status - ) - throw error - } - - return response - }, - { - maxRetries: 5, - initialDelayMs: 2000, // Start with 2 seconds for Mistral OCR - maxDelayMs: 120000, // Max 2 minutes delay for OCR processing - backoffMultiplier: 2, - } - ) + // Use standard file parser + logger.info(`Using file parser for: ${filename}`) + return await parseWithFileParser(fileUrl, filename, mimeType) +} - if (!mistralParserTool.transformResponse) { - throw new Error('Mistral parser transform function not available') - } +/** + * Parse document using Mistral OCR + */ +async function parseWithMistralOCR( + fileUrl: string, + filename: string, + mimeType: string +): Promise<{ + content: string + processingMethod: 'file-parser' | 'mistral-ocr' + cloudUrl?: string +}> { + const mistralApiKey = env.MISTRAL_API_KEY + if (!mistralApiKey) { + throw new Error('Mistral API key is required for OCR processing') + } - const result = await mistralParserTool.transformResponse(response, { - filePath: httpsUrl, - apiKey: mistralApiKey, - resultType: 'text', - }) + let httpsUrl = fileUrl + let cloudUrl: string | undefined - if (!result.success) { - throw new Error('Mistral OCR processing failed') - } + // If the URL is not HTTPS, we need to upload to cloud storage first + if (!fileUrl.startsWith('https://')) { + logger.info(`Uploading "${filename}" to cloud storage for Mistral OCR access`) - return { - content: result.output.content, - processingMethod: 'mistral-ocr', - s3Url, - } + // Download the file content + const response = await fetch(fileUrl) + if (!response.ok) { + throw new Error(`Failed to download file for cloud upload: ${response.statusText}`) } - // Use file parser for other supported types - let content: string + const buffer = Buffer.from(await response.arrayBuffer()) - if (fileUrl.startsWith('http')) { - // Download the file and parse buffer - const response = await fetch(fileUrl) - if (!response.ok) { - throw new Error(`Failed to download file: ${response.statusText}`) - } - - const buffer = Buffer.from(await response.arrayBuffer()) - const extension = filename.split('.').pop()?.toLowerCase() + // Always upload to cloud storage for Mistral OCR, even in development + const kbConfig = getKBConfig() + const provider = getStorageProvider() - if (!extension) { - throw new Error('Could not determine file extension') + if (provider === 'blob') { + const blobConfig = kbConfig as BlobConfig + if (!blobConfig.containerName || !blobConfig.accountName) { + throw new Error( + 'Azure Blob configuration missing: AZURE_ACCOUNT_NAME and AZURE_KB_CONTAINER_NAME environment variables are required for PDF processing with Mistral OCR' + ) } - - const parseResult = await parseBuffer(buffer, extension) - content = parseResult.content } else { - // Local file path - const parseResult = await parseFile(fileUrl) - content = parseResult.content + const s3Config = kbConfig as S3Config + if (!s3Config.bucket || !s3Config.region) { + throw new Error( + 'S3 configuration missing: AWS_REGION and S3_KB_BUCKET_NAME environment variables are required for PDF processing with Mistral OCR' + ) + } } - return { - content, - processingMethod: 'file-parser', + try { + // Upload to cloud storage + const cloudResult = await uploadFile(buffer, filename, mimeType, kbConfig as any) + // Generate presigned URL with 15 minutes expiration + httpsUrl = await getPresignedUrlWithConfig(cloudResult.key, kbConfig as any, 900) + cloudUrl = httpsUrl + logger.info(`Successfully uploaded to cloud storage for Mistral OCR: ${cloudResult.key}`) + } catch (uploadError) { + logger.error('Failed to upload to cloud storage for Mistral OCR:', uploadError) + throw new Error( + `Cloud upload failed: ${uploadError instanceof Error ? uploadError.message : 'Unknown error'}. Cloud upload is required for PDF processing with Mistral OCR.` + ) } - } catch (error) { - logger.error(`Failed to parse document "${filename}":`, error) - throw new Error( - `Document parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}` - ) } -} -/** - * Chunk text content using TextChunker - */ -async function chunkContent(content: string, options: DocumentProcessingOptions): Promise { - const chunker = new TextChunker({ - chunkSize: options.chunkSize || 512, - minChunkSize: options.minCharactersPerChunk || 24, + if (!mistralParserTool.request?.body) { + throw new Error('Mistral parser tool not properly configured') + } + + const requestBody = mistralParserTool.request.body({ + filePath: httpsUrl, + apiKey: mistralApiKey, + resultType: 'text', }) try { - logger.info('Chunking content with TextChunker', { - contentLength: content.length, - chunkSize: options.chunkSize || 512, - }) - - const chunks = await chunker.chunk(content) + const response = await retryWithExponentialBackoff( + async () => { + // Get the URL from the tool + const url = + typeof mistralParserTool.request!.url === 'function' + ? mistralParserTool.request!.url({ + filePath: httpsUrl, + apiKey: mistralApiKey, + resultType: 'text', + }) + : mistralParserTool.request!.url + + // Get headers from the tool + const headers = + typeof mistralParserTool.request!.headers === 'function' + ? mistralParserTool.request!.headers({ + filePath: httpsUrl, + apiKey: mistralApiKey, + resultType: 'text', + }) + : mistralParserTool.request!.headers + + const res = await fetch(url, { + method: mistralParserTool.request!.method, + headers, + body: JSON.stringify(requestBody), + }) + + if (!res.ok) { + const errorText = await res.text() + throw new APIError( + `Mistral OCR failed: ${res.status} ${res.statusText} - ${errorText}`, + res.status + ) + } - logger.info(`Successfully created ${chunks.length} chunks`) - return chunks - } catch (error) { - logger.error('Chunking failed:', error) - throw new Error( - `Text chunking failed: ${error instanceof Error ? error.message : 'Unknown error'}` + return res + }, + { + maxRetries: 3, + initialDelayMs: 1000, + maxDelayMs: 10000, + } ) - } -} - -/** - * Process a single document: parse content and create chunks - */ -export async function processDocument( - fileUrl: string, - filename: string, - mimeType: string, - fileSize: number, - options: DocumentProcessingOptions -): Promise { - const startTime = Date.now() - logger.info(`Starting document processing for "${filename}"`) - try { - // Step 1: Parse the document - const { content, processingMethod, s3Url } = await parseDocument(fileUrl, filename, mimeType) + // Use the tool's transformResponse function to process the response + const result = await mistralParserTool.transformResponse!(response, { + filePath: httpsUrl, + apiKey: mistralApiKey, + resultType: 'text', + }) - if (!content || content.trim().length === 0) { - throw new Error('No content extracted from document') + if (!result.success) { + throw new Error(`Mistral OCR processing failed: ${result.error || 'Unknown error'}`) } - // Step 2: Chunk the content - const chunks = await chunkContent(content, options) - - if (chunks.length === 0) { - throw new Error('No chunks created from content') + const content = result.output?.content || '' + if (!content.trim()) { + throw new Error('Mistral OCR returned empty content') } - // Step 3: Calculate metadata - const characterCount = content.length - const tokenCount = chunks.reduce((acc, chunk) => acc + chunk.tokenCount, 0) - const chunkCount = chunks.length - - const processedDocument: ProcessedDocument = { + logger.info(`Mistral OCR completed successfully for ${filename}`) + return { content, - chunks, - metadata: { - filename, - fileSize, - mimeType, - characterCount, - tokenCount, - chunkCount, - processingMethod, - s3Url, - }, + processingMethod: 'mistral-ocr', + cloudUrl, } - - const processingTime = Date.now() - startTime - logger.info(`Document processing completed for "${filename}"`, { - processingTime: `${processingTime}ms`, - contentLength: characterCount, - chunkCount, - tokenCount, - processingMethod, + } catch (error) { + // Log the full error details for debugging + logger.error(`Mistral OCR failed for ${filename}:`, { + message: error instanceof Error ? error.message : String(error), + stack: error instanceof Error ? error.stack : undefined, + name: error instanceof Error ? error.name : 'Unknown', }) - return processedDocument - } catch (error) { - const processingTime = Date.now() - startTime - logger.error(`Document processing failed for "${filename}" after ${processingTime}ms:`, error) - throw error + // Fall back to file parser + logger.info(`Falling back to file parser for ${filename}`) + return await parseWithFileParser(fileUrl, filename, mimeType) } } /** - * Process multiple documents in parallel + * Parse document using standard file parser */ -export async function processDocuments( - documents: Array<{ - fileUrl: string - filename: string - mimeType: string - fileSize: number - }>, - options: DocumentProcessingOptions -): Promise { - const startTime = Date.now() - logger.info(`Starting batch processing of ${documents.length} documents`) - +async function parseWithFileParser( + fileUrl: string, + filename: string, + mimeType: string +): Promise<{ + content: string + processingMethod: 'file-parser' | 'mistral-ocr' + cloudUrl?: string +}> { try { - // Process all documents in parallel - const processingPromises = documents.map((doc) => - processDocument(doc.fileUrl, doc.filename, doc.mimeType, doc.fileSize, options) - ) + let content: string - const results = await Promise.allSettled(processingPromises) - - // Separate successful and failed results - const successfulResults: ProcessedDocument[] = [] - const errors: string[] = [] - - results.forEach((result, index) => { - if (result.status === 'fulfilled') { - successfulResults.push(result.value) - } else { - const filename = documents[index].filename - const errorMessage = - result.reason instanceof Error ? result.reason.message : 'Unknown error' - errors.push(`${filename}: ${errorMessage}`) - logger.error(`Failed to process document "${filename}":`, result.reason) + if (fileUrl.startsWith('http://') || fileUrl.startsWith('https://')) { + // Download and parse remote file + const response = await fetch(fileUrl) + if (!response.ok) { + throw new Error(`Failed to download file: ${response.status} ${response.statusText}`) } - }) - const processingTime = Date.now() - startTime - logger.info(`Batch processing completed in ${processingTime}ms`, { - totalDocuments: documents.length, - successful: successfulResults.length, - failed: errors.length, - }) + const buffer = Buffer.from(await response.arrayBuffer()) + + // Extract file extension from filename + const extension = filename.split('.').pop()?.toLowerCase() || '' + if (!extension) { + throw new Error(`Could not determine file extension from filename: ${filename}`) + } - if (errors.length > 0) { - logger.warn('Some documents failed to process:', errors) + const result = await parseBuffer(buffer, extension) + content = result.content + } else { + // Parse local file + const result = await parseFile(fileUrl) + content = result.content } - if (successfulResults.length === 0) { - throw new Error(`All documents failed to process. Errors: ${errors.join('; ')}`) + if (!content.trim()) { + throw new Error('File parser returned empty content') } - return successfulResults + return { + content, + processingMethod: 'file-parser', + } } catch (error) { - const processingTime = Date.now() - startTime - logger.error(`Batch processing failed after ${processingTime}ms:`, error) + logger.error(`File parser failed for ${filename}:`, error) throw error } } diff --git a/apps/sim/lib/env.ts b/apps/sim/lib/env.ts index f305148d84c..a6fa6275651 100644 --- a/apps/sim/lib/env.ts +++ b/apps/sim/lib/env.ts @@ -54,6 +54,11 @@ export const env = createEnv({ S3_BUCKET_NAME: z.string().optional(), S3_LOGS_BUCKET_NAME: z.string().optional(), S3_KB_BUCKET_NAME: z.string().optional(), + AZURE_ACCOUNT_NAME: z.string().optional(), + AZURE_ACCOUNT_KEY: z.string().optional(), + AZURE_CONNECTION_STRING: z.string().optional(), + AZURE_STORAGE_CONTAINER_NAME: z.string().optional(), + AZURE_STORAGE_KB_CONTAINER_NAME: z.string().optional(), CRON_SECRET: z.string().optional(), FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), NODE_ENV: z.string().optional(), diff --git a/apps/sim/lib/uploads/blob/blob-client.test.ts b/apps/sim/lib/uploads/blob/blob-client.test.ts new file mode 100644 index 00000000000..01297bf01a6 --- /dev/null +++ b/apps/sim/lib/uploads/blob/blob-client.test.ts @@ -0,0 +1,209 @@ +/** + * Tests for Azure Blob Storage client + * + * @vitest-environment node + */ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +// Mock Azure Storage Blob +const mockUpload = vi.fn() +const mockDownload = vi.fn() +const mockDelete = vi.fn() +const mockGetBlockBlobClient = vi.fn() +const mockGetContainerClient = vi.fn() +const mockFromConnectionString = vi.fn() +const mockBlobServiceClient = vi.fn() +const mockStorageSharedKeyCredential = vi.fn() +const mockGenerateBlobSASQueryParameters = vi.fn() + +vi.mock('@azure/storage-blob', () => ({ + BlobServiceClient: { + fromConnectionString: mockFromConnectionString, + }, + StorageSharedKeyCredential: mockStorageSharedKeyCredential, + generateBlobSASQueryParameters: mockGenerateBlobSASQueryParameters, + BlobSASPermissions: { + parse: vi.fn().mockReturnValue('r'), + }, +})) + +describe('Azure Blob Storage Client', () => { + beforeEach(() => { + vi.resetAllMocks() + + // Mock the blob client chain + mockGetBlockBlobClient.mockReturnValue({ + upload: mockUpload, + download: mockDownload, + delete: mockDelete, + url: 'https://test.blob.core.windows.net/container/test-file', + }) + + mockGetContainerClient.mockReturnValue({ + getBlockBlobClient: mockGetBlockBlobClient, + }) + + mockFromConnectionString.mockReturnValue({ + getContainerClient: mockGetContainerClient, + }) + + mockBlobServiceClient.mockReturnValue({ + getContainerClient: mockGetContainerClient, + }) + + mockGenerateBlobSASQueryParameters.mockReturnValue({ + toString: () => 'sv=2021-06-08&se=2023-01-01T00%3A00%3A00Z&sr=b&sp=r&sig=test', + }) + + // Mock BLOB_CONFIG + vi.doMock('../setup', () => ({ + BLOB_CONFIG: { + accountName: 'testaccount', + accountKey: 'testkey', + connectionString: + 'DefaultEndpointsProtocol=https;AccountName=testaccount;AccountKey=testkey;EndpointSuffix=core.windows.net', + containerName: 'testcontainer', + }, + })) + + // Mock env + vi.doMock('../../env', () => ({ + env: { + AZURE_STORAGE_ACCOUNT_NAME: 'testaccount', + AZURE_STORAGE_ACCOUNT_KEY: 'testkey', + AZURE_STORAGE_CONNECTION_STRING: + 'DefaultEndpointsProtocol=https;AccountName=testaccount;AccountKey=testkey;EndpointSuffix=core.windows.net', + AZURE_STORAGE_CONTAINER_NAME: 'testcontainer', + }, + })) + }) + + afterEach(() => { + vi.clearAllMocks() + }) + + describe('uploadToBlob', () => { + it('should upload a file to Azure Blob Storage', async () => { + const { uploadToBlob } = await import('./blob-client') + + const testBuffer = Buffer.from('test file content') + const fileName = 'test-file.txt' + const contentType = 'text/plain' + + mockUpload.mockResolvedValueOnce({}) + + const result = await uploadToBlob(testBuffer, fileName, contentType) + + expect(mockUpload).toHaveBeenCalledWith(testBuffer, testBuffer.length, { + blobHTTPHeaders: { + blobContentType: contentType, + }, + metadata: { + originalName: encodeURIComponent(fileName), + uploadedAt: expect.any(String), + }, + }) + + expect(result).toEqual({ + path: expect.stringContaining('/api/files/serve/blob/'), + key: expect.stringContaining(fileName.replace(/\s+/g, '-')), + name: fileName, + size: testBuffer.length, + type: contentType, + }) + }) + + it('should handle custom blob configuration', async () => { + const { uploadToBlob } = await import('./blob-client') + + const testBuffer = Buffer.from('test file content') + const fileName = 'test-file.txt' + const contentType = 'text/plain' + const customConfig = { + containerName: 'customcontainer', + accountName: 'customaccount', + accountKey: 'customkey', + } + + mockUpload.mockResolvedValueOnce({}) + + const result = await uploadToBlob(testBuffer, fileName, contentType, customConfig) + + expect(result.name).toBe(fileName) + expect(result.type).toBe(contentType) + }) + }) + + describe('downloadFromBlob', () => { + it('should download a file from Azure Blob Storage', async () => { + const { downloadFromBlob } = await import('./blob-client') + + const testKey = 'test-file-key' + const testContent = Buffer.from('downloaded content') + + // Mock the readable stream + const mockReadableStream = { + on: vi.fn((event, callback) => { + if (event === 'data') { + callback(testContent) + } else if (event === 'end') { + callback() + } + }), + } + + mockDownload.mockResolvedValueOnce({ + readableStreamBody: mockReadableStream, + }) + + const result = await downloadFromBlob(testKey) + + expect(mockGetBlockBlobClient).toHaveBeenCalledWith(testKey) + expect(mockDownload).toHaveBeenCalled() + expect(result).toEqual(testContent) + }) + }) + + describe('deleteFromBlob', () => { + it('should delete a file from Azure Blob Storage', async () => { + const { deleteFromBlob } = await import('./blob-client') + + const testKey = 'test-file-key' + + mockDelete.mockResolvedValueOnce({}) + + await deleteFromBlob(testKey) + + expect(mockGetBlockBlobClient).toHaveBeenCalledWith(testKey) + expect(mockDelete).toHaveBeenCalled() + }) + }) + + describe('getPresignedUrl', () => { + it('should generate a presigned URL for Azure Blob Storage', async () => { + const { getPresignedUrl } = await import('./blob-client') + + const testKey = 'test-file-key' + const expiresIn = 3600 + + const result = await getPresignedUrl(testKey, expiresIn) + + expect(mockGetBlockBlobClient).toHaveBeenCalledWith(testKey) + expect(mockGenerateBlobSASQueryParameters).toHaveBeenCalled() + expect(result).toContain('https://test.blob.core.windows.net/container/test-file') + expect(result).toContain('sv=2021-06-08') + }) + }) + + describe('sanitizeFilenameForMetadata', () => { + it('should sanitize filenames for metadata', async () => { + const { sanitizeFilenameForMetadata } = await import('./blob-client') + + expect(sanitizeFilenameForMetadata('test file.txt')).toBe('test file.txt') + expect(sanitizeFilenameForMetadata('test"file.txt')).toBe('testfile.txt') + expect(sanitizeFilenameForMetadata('test\\file.txt')).toBe('testfile.txt') + expect(sanitizeFilenameForMetadata('test file.txt')).toBe('test file.txt') + expect(sanitizeFilenameForMetadata('')).toBe('file') + }) + }) +}) diff --git a/apps/sim/lib/uploads/blob/blob-client.ts b/apps/sim/lib/uploads/blob/blob-client.ts new file mode 100644 index 00000000000..56d15ff8daf --- /dev/null +++ b/apps/sim/lib/uploads/blob/blob-client.ts @@ -0,0 +1,290 @@ +import { + BlobSASPermissions, + BlobServiceClient, + generateBlobSASQueryParameters, + StorageSharedKeyCredential, +} from '@azure/storage-blob' +import { BLOB_CONFIG } from '../setup' + +// Lazily create a single Blob service client instance. +let _blobServiceClient: BlobServiceClient | null = null + +export function getBlobServiceClient(): BlobServiceClient { + if (_blobServiceClient) return _blobServiceClient + + const { accountName, accountKey, connectionString } = BLOB_CONFIG + + if (connectionString) { + // Use connection string if provided + _blobServiceClient = BlobServiceClient.fromConnectionString(connectionString) + } else if (accountName && accountKey) { + // Use account name and key + const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey) + _blobServiceClient = new BlobServiceClient( + `https://${accountName}.blob.core.windows.net`, + sharedKeyCredential + ) + } else { + throw new Error( + 'Azure Blob Storage credentials are missing – set AZURE_STORAGE_CONNECTION_STRING or both AZURE_STORAGE_ACCOUNT_NAME and AZURE_STORAGE_ACCOUNT_KEY in your environment.' + ) + } + + return _blobServiceClient +} + +/** + * Sanitize a filename for use in blob metadata headers + * Azure blob metadata headers must contain only ASCII printable characters + * and cannot contain certain special characters + */ +export function sanitizeFilenameForMetadata(filename: string): string { + return ( + filename + // Remove non-ASCII characters (keep only printable ASCII 0x20-0x7E) + .replace(/[^\x20-\x7E]/g, '') + // Remove characters that are problematic in HTTP headers + .replace(/["\\]/g, '') + // Replace multiple spaces with single space + .replace(/\s+/g, ' ') + // Trim whitespace + .trim() || + // Provide fallback if completely sanitized + 'file' + ) +} + +/** + * File information structure + */ +export interface FileInfo { + path: string // Path to access the file + key: string // Blob name or local filename + name: string // Original filename + size: number // File size in bytes + type: string // MIME type +} + +/** + * Custom Blob configuration + */ +export interface CustomBlobConfig { + containerName: string + accountName: string + accountKey?: string + connectionString?: string +} + +/** + * Upload a file to Azure Blob Storage + * @param file Buffer containing file data + * @param fileName Original file name + * @param contentType MIME type of the file + * @param size File size in bytes (optional, will use buffer length if not provided) + * @returns Object with file information + */ +export async function uploadToBlob( + file: Buffer, + fileName: string, + contentType: string, + size?: number +): Promise + +/** + * Upload a file to Azure Blob Storage with custom container configuration + * @param file Buffer containing file data + * @param fileName Original file name + * @param contentType MIME type of the file + * @param customConfig Custom Blob configuration (container and account info) + * @param size File size in bytes (optional, will use buffer length if not provided) + * @returns Object with file information + */ +export async function uploadToBlob( + file: Buffer, + fileName: string, + contentType: string, + customConfig: CustomBlobConfig, + size?: number +): Promise + +export async function uploadToBlob( + file: Buffer, + fileName: string, + contentType: string, + configOrSize?: CustomBlobConfig | number, + size?: number +): Promise { + // Handle overloaded parameters + let config: CustomBlobConfig + let fileSize: number + + if (typeof configOrSize === 'object') { + // Custom config provided + config = configOrSize + fileSize = size ?? file.length + } else { + // Use default config + config = { + containerName: BLOB_CONFIG.containerName, + accountName: BLOB_CONFIG.accountName, + accountKey: BLOB_CONFIG.accountKey, + connectionString: BLOB_CONFIG.connectionString, + } + fileSize = configOrSize ?? file.length + } + + // Create a unique filename with timestamp to prevent collisions + // Use a simple timestamp without directory structure + const safeFileName = fileName.replace(/\s+/g, '-') // Replace spaces with hyphens + const uniqueKey = `${Date.now()}-${safeFileName}` + + const blobServiceClient = getBlobServiceClient() + const containerClient = blobServiceClient.getContainerClient(config.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(uniqueKey) + + // Upload the file to Azure Blob Storage + await blockBlobClient.upload(file, file.length, { + blobHTTPHeaders: { + blobContentType: contentType, + }, + metadata: { + originalName: encodeURIComponent(fileName), // Encode filename to prevent invalid characters in HTTP headers + uploadedAt: new Date().toISOString(), + }, + }) + + // Create a path for API to serve the file + const servePath = `/api/files/serve/blob/${encodeURIComponent(uniqueKey)}` + + return { + path: servePath, + key: uniqueKey, + name: fileName, // Return the actual original filename in the response + size: fileSize, + type: contentType, + } +} + +/** + * Generate a presigned URL for direct file access + * @param key Blob name + * @param expiresIn Time in seconds until URL expires + * @returns Presigned URL + */ +export async function getPresignedUrl(key: string, expiresIn = 3600) { + const blobServiceClient = getBlobServiceClient() + const containerClient = blobServiceClient.getContainerClient(BLOB_CONFIG.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(key) + + // Generate SAS token for the blob + const sasOptions = { + containerName: BLOB_CONFIG.containerName, + blobName: key, + permissions: BlobSASPermissions.parse('r'), // Read permission + startsOn: new Date(), + expiresOn: new Date(Date.now() + expiresIn * 1000), + } + + const sasToken = generateBlobSASQueryParameters( + sasOptions, + new StorageSharedKeyCredential(BLOB_CONFIG.accountName, BLOB_CONFIG.accountKey || '') + ).toString() + + return `${blockBlobClient.url}?${sasToken}` +} + +/** + * Generate a presigned URL for direct file access with custom container + * @param key Blob name + * @param customConfig Custom Blob configuration + * @param expiresIn Time in seconds until URL expires + * @returns Presigned URL + */ +export async function getPresignedUrlWithConfig( + key: string, + customConfig: CustomBlobConfig, + expiresIn = 3600 +) { + // Create a temporary client for the custom config + let tempBlobServiceClient: BlobServiceClient + + if (customConfig.connectionString) { + tempBlobServiceClient = BlobServiceClient.fromConnectionString(customConfig.connectionString) + } else if (customConfig.accountName && customConfig.accountKey) { + const sharedKeyCredential = new StorageSharedKeyCredential( + customConfig.accountName, + customConfig.accountKey + ) + tempBlobServiceClient = new BlobServiceClient( + `https://${customConfig.accountName}.blob.core.windows.net`, + sharedKeyCredential + ) + } else { + throw new Error( + 'Custom blob config must include either connectionString or accountName + accountKey' + ) + } + + const containerClient = tempBlobServiceClient.getContainerClient(customConfig.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(key) + + // Generate SAS token for the blob + const sasOptions = { + containerName: customConfig.containerName, + blobName: key, + permissions: BlobSASPermissions.parse('r'), // Read permission + startsOn: new Date(), + expiresOn: new Date(Date.now() + expiresIn * 1000), + } + + const sasToken = generateBlobSASQueryParameters( + sasOptions, + new StorageSharedKeyCredential(customConfig.accountName, customConfig.accountKey || '') + ).toString() + + return `${blockBlobClient.url}?${sasToken}` +} + +/** + * Download a file from Azure Blob Storage + * @param key Blob name + * @returns File buffer + */ +export async function downloadFromBlob(key: string) { + const blobServiceClient = getBlobServiceClient() + const containerClient = blobServiceClient.getContainerClient(BLOB_CONFIG.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(key) + + const downloadBlockBlobResponse = await blockBlobClient.download() + const downloaded = await streamToBuffer(downloadBlockBlobResponse.readableStreamBody!) + + return downloaded +} + +/** + * Delete a file from Azure Blob Storage + * @param key Blob name + */ +export async function deleteFromBlob(key: string) { + const blobServiceClient = getBlobServiceClient() + const containerClient = blobServiceClient.getContainerClient(BLOB_CONFIG.containerName) + const blockBlobClient = containerClient.getBlockBlobClient(key) + + await blockBlobClient.delete() +} + +/** + * Helper function to convert a readable stream to a Buffer + */ +async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + readableStream.on('data', (data) => { + chunks.push(data instanceof Buffer ? data : Buffer.from(data)) + }) + readableStream.on('end', () => { + resolve(Buffer.concat(chunks)) + }) + readableStream.on('error', reject) + }) +} diff --git a/apps/sim/lib/uploads/blob/index.ts b/apps/sim/lib/uploads/blob/index.ts new file mode 100644 index 00000000000..dd894f535de --- /dev/null +++ b/apps/sim/lib/uploads/blob/index.ts @@ -0,0 +1,11 @@ +export { + type CustomBlobConfig, + deleteFromBlob, + downloadFromBlob, + type FileInfo, + getBlobServiceClient, + getPresignedUrl, + getPresignedUrlWithConfig, + sanitizeFilenameForMetadata, + uploadToBlob, +} from './blob-client' diff --git a/apps/sim/lib/uploads/index.ts b/apps/sim/lib/uploads/index.ts new file mode 100644 index 00000000000..84ef2206c4b --- /dev/null +++ b/apps/sim/lib/uploads/index.ts @@ -0,0 +1,28 @@ +// Export the storage abstraction layer + +export * as BlobClient from './blob/blob-client' +// Export specific storage clients for advanced use cases +export * as S3Client from './s3/s3-client' +// Export configuration +export { + BLOB_CONFIG, + BLOB_KB_CONFIG, + ensureUploadsDirectory, + S3_CONFIG, + S3_KB_CONFIG, + UPLOAD_DIR, + USE_BLOB_STORAGE, + USE_S3_STORAGE, +} from './setup' +export { + type CustomStorageConfig, + deleteFile, + downloadFile, + type FileInfo, + getPresignedUrl, + getPresignedUrlWithConfig, + getServePathPrefix, + getStorageProvider, + isUsingCloudStorage, + uploadFile, +} from './storage-client' diff --git a/apps/sim/lib/uploads/s3/index.ts b/apps/sim/lib/uploads/s3/index.ts new file mode 100644 index 00000000000..a2b525c6787 --- /dev/null +++ b/apps/sim/lib/uploads/s3/index.ts @@ -0,0 +1,11 @@ +export { + type CustomS3Config, + deleteFromS3, + downloadFromS3, + type FileInfo, + getPresignedUrl, + getPresignedUrlWithConfig, + getS3Client, + sanitizeFilenameForMetadata, + uploadToS3, +} from './s3-client' diff --git a/apps/sim/lib/uploads/s3-client.test.ts b/apps/sim/lib/uploads/s3/s3-client.test.ts similarity index 54% rename from apps/sim/lib/uploads/s3-client.test.ts rename to apps/sim/lib/uploads/s3/s3-client.test.ts index f5298e54eb6..beb5cd7fe00 100644 --- a/apps/sim/lib/uploads/s3-client.test.ts +++ b/apps/sim/lib/uploads/s3/s3-client.test.ts @@ -1,117 +1,96 @@ -import { - DeleteObjectCommand, - GetObjectCommand, - PutObjectCommand, - S3Client, -} from '@aws-sdk/client-s3' -import { getSignedUrl } from '@aws-sdk/s3-request-presigner' /** - * Unit tests for S3 client + * Tests for S3 client functionality * * @vitest-environment node */ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { deleteFromS3, downloadFromS3, getPresignedUrl, getS3Client, uploadToS3 } from './s3-client' -// Mock AWS SDK -vi.mock('@aws-sdk/client-s3', () => { +describe('S3 Client', () => { + // Mock AWS SDK modules const mockSend = vi.fn() - const mockS3Client = vi.fn().mockImplementation(() => ({ + const mockS3Client = { send: mockSend, - })) - - return { - S3Client: mockS3Client, - PutObjectCommand: vi.fn(), - GetObjectCommand: vi.fn(), - DeleteObjectCommand: vi.fn(), } -}) -vi.mock('@aws-sdk/s3-request-presigner', () => ({ - getSignedUrl: vi.fn().mockResolvedValue('https://example.com/presigned-url'), -})) - -// Mock date for predictable timestamps -vi.mock('./setup', () => ({ - S3_CONFIG: { - bucket: 'test-bucket', - region: 'test-region', - }, -})) - -// Mock logger -vi.mock('@/lib/logs/console-logger', () => ({ - createLogger: vi.fn().mockReturnValue({ - info: vi.fn(), - error: vi.fn(), - warn: vi.fn(), - debug: vi.fn(), - }), -})) - -const s3Client = getS3Client() - -describe('S3 Client', () => { - let mockDate: Date - let originalDateNow: typeof Date.now + const mockPutObjectCommand = vi.fn() + const mockGetObjectCommand = vi.fn() + const mockDeleteObjectCommand = vi.fn() + const mockGetSignedUrl = vi.fn() beforeEach(() => { + vi.resetModules() vi.clearAllMocks() - // Mock Date.now() for predictable timestamps - mockDate = new Date(2023, 0, 1, 12, 0, 0) // 2023-01-01 12:00:00 - originalDateNow = Date.now - Date.now = vi.fn(() => mockDate.getTime()) + // Mock the AWS SDK + vi.doMock('@aws-sdk/client-s3', () => ({ + S3Client: vi.fn(() => mockS3Client), + PutObjectCommand: mockPutObjectCommand, + GetObjectCommand: mockGetObjectCommand, + DeleteObjectCommand: mockDeleteObjectCommand, + })) + + vi.doMock('@aws-sdk/s3-request-presigner', () => ({ + getSignedUrl: mockGetSignedUrl, + })) + + // Mock the setup configuration with test values + vi.doMock('../setup', () => ({ + S3_CONFIG: { + bucket: 'test-bucket', + region: 'test-region', + }, + })) + + // Mock Date.now for consistent timestamps + vi.spyOn(Date, 'now').mockReturnValue(1672603200000) // Fixed timestamp + vi.spyOn(Date.prototype, 'toISOString').mockReturnValue('2025-06-16T01:13:10.765Z') }) afterEach(() => { - // Restore original Date.now - Date.now = originalDateNow + vi.restoreAllMocks() }) describe('uploadToS3', () => { it('should upload a file to S3 and return file info', async () => { - // Mock S3 client send method to return an appropriate type - vi.mocked(s3Client.send).mockResolvedValueOnce({ - $metadata: { httpStatusCode: 200 }, - } as any) + // Mock successful upload + mockSend.mockResolvedValueOnce({}) - const testFile = Buffer.from('test file content') + const { uploadToS3 } = await import('./s3-client') + + const file = Buffer.from('test content') const fileName = 'test-file.txt' const contentType = 'text/plain' - const fileSize = testFile.length - const result = await uploadToS3(testFile, fileName, contentType) + const result = await uploadToS3(file, fileName, contentType) // Check that S3 client was called with correct parameters - expect(PutObjectCommand).toHaveBeenCalledWith({ + expect(mockPutObjectCommand).toHaveBeenCalledWith({ Bucket: 'test-bucket', Key: expect.stringContaining('test-file.txt'), - Body: testFile, - ContentType: contentType, + Body: file, + ContentType: 'text/plain', Metadata: { - originalName: encodeURIComponent(fileName), + originalName: 'test-file.txt', uploadedAt: expect.any(String), }, }) - expect(s3Client.send).toHaveBeenCalledTimes(1) + expect(mockSend).toHaveBeenCalledWith(expect.any(Object)) // Check return value expect(result).toEqual({ path: expect.stringContaining('/api/files/serve/s3/'), key: expect.stringContaining('test-file.txt'), - name: fileName, - size: fileSize, - type: contentType, + name: 'test-file.txt', + size: file.length, + type: 'text/plain', }) }) it('should handle spaces in filenames', async () => { - vi.mocked(s3Client.send).mockResolvedValueOnce({ - $metadata: { httpStatusCode: 200 }, - } as any) + mockSend.mockResolvedValueOnce({}) + + const { uploadToS3 } = await import('./s3-client') const testFile = Buffer.from('test file content') const fileName = 'test file with spaces.txt' @@ -119,20 +98,26 @@ describe('S3 Client', () => { const result = await uploadToS3(testFile, fileName, contentType) - // Verify spaces were replaced with hyphens in the key but original name is preserved - expect(result.key).toContain('test-file-with-spaces.txt') + // Check that the filename was sanitized in the key + expect(mockPutObjectCommand).toHaveBeenCalledWith( + expect.objectContaining({ + Key: expect.stringContaining('test-file-with-spaces.txt'), + }) + ) + + // But the original name should be preserved in metadata and result expect(result.name).toBe(fileName) }) it('should use provided size if available', async () => { - vi.mocked(s3Client.send).mockResolvedValueOnce({ - $metadata: { httpStatusCode: 200 }, - } as any) + mockSend.mockResolvedValueOnce({}) + + const { uploadToS3 } = await import('./s3-client') const testFile = Buffer.from('test file content') const fileName = 'test-file.txt' const contentType = 'text/plain' - const providedSize = 12345 // Different from actual buffer size + const providedSize = 1000 const result = await uploadToS3(testFile, fileName, contentType, providedSize) @@ -141,7 +126,9 @@ describe('S3 Client', () => { it('should handle upload errors', async () => { const error = new Error('Upload failed') - vi.mocked(s3Client.send).mockRejectedValueOnce(error) + mockSend.mockRejectedValueOnce(error) + + const { uploadToS3 } = await import('./s3-client') const testFile = Buffer.from('test file content') const fileName = 'test-file.txt' @@ -153,28 +140,36 @@ describe('S3 Client', () => { describe('getPresignedUrl', () => { it('should generate a presigned URL for a file', async () => { + mockGetSignedUrl.mockResolvedValueOnce('https://example.com/presigned-url') + + const { getPresignedUrl } = await import('./s3-client') + const key = 'test-file.txt' - const expiresIn = 7200 + const expiresIn = 1800 const url = await getPresignedUrl(key, expiresIn) - expect(GetObjectCommand).toHaveBeenCalledWith({ + expect(mockGetObjectCommand).toHaveBeenCalledWith({ Bucket: 'test-bucket', Key: key, }) - expect(getSignedUrl).toHaveBeenCalledWith(s3Client, expect.any(Object), { expiresIn }) + expect(mockGetSignedUrl).toHaveBeenCalledWith(mockS3Client, expect.any(Object), { expiresIn }) expect(url).toBe('https://example.com/presigned-url') }) it('should use default expiration if not provided', async () => { + mockGetSignedUrl.mockResolvedValueOnce('https://example.com/presigned-url') + + const { getPresignedUrl } = await import('./s3-client') + const key = 'test-file.txt' await getPresignedUrl(key) - expect(getSignedUrl).toHaveBeenCalledWith( - s3Client, + expect(mockGetSignedUrl).toHaveBeenCalledWith( + mockS3Client, expect.any(Object), { expiresIn: 3600 } // Default is 3600 seconds (1 hour) ) @@ -182,7 +177,9 @@ describe('S3 Client', () => { it('should handle errors when generating presigned URL', async () => { const error = new Error('Presigned URL generation failed') - vi.mocked(getSignedUrl).mockRejectedValueOnce(error) + mockGetSignedUrl.mockRejectedValueOnce(error) + + const { getPresignedUrl } = await import('./s3-client') const key = 'test-file.txt' @@ -192,7 +189,7 @@ describe('S3 Client', () => { describe('downloadFromS3', () => { it('should download a file from S3', async () => { - // Create mock stream with data events + // Mock a readable stream const mockStream = { on: vi.fn((event, callback) => { if (event === 'data') { @@ -206,27 +203,29 @@ describe('S3 Client', () => { }), } - vi.mocked(s3Client.send).mockResolvedValueOnce({ + mockSend.mockResolvedValueOnce({ Body: mockStream, $metadata: { httpStatusCode: 200 }, - } as any) + }) + + const { downloadFromS3 } = await import('./s3-client') const key = 'test-file.txt' + const result = await downloadFromS3(key) - expect(GetObjectCommand).toHaveBeenCalledWith({ + expect(mockGetObjectCommand).toHaveBeenCalledWith({ Bucket: 'test-bucket', Key: key, }) - expect(s3Client.send).toHaveBeenCalledTimes(1) + expect(mockSend).toHaveBeenCalledTimes(1) expect(result).toBeInstanceOf(Buffer) - expect(Buffer.concat([Buffer.from('chunk1'), Buffer.from('chunk2')]).toString()).toEqual( - result.toString() - ) + expect(result.toString()).toBe('chunk1chunk2') }) it('should handle stream errors', async () => { + // Mock a readable stream that throws an error const mockStream = { on: vi.fn((event, callback) => { if (event === 'error') { @@ -236,59 +235,73 @@ describe('S3 Client', () => { }), } - vi.mocked(s3Client.send).mockResolvedValueOnce({ + mockSend.mockResolvedValueOnce({ Body: mockStream, $metadata: { httpStatusCode: 200 }, - } as any) + }) + + const { downloadFromS3 } = await import('./s3-client') const key = 'test-file.txt' + await expect(downloadFromS3(key)).rejects.toThrow('Stream error') }) it('should handle S3 client errors', async () => { const error = new Error('Download failed') - vi.mocked(s3Client.send).mockRejectedValueOnce(error) + mockSend.mockRejectedValueOnce(error) + + const { downloadFromS3 } = await import('./s3-client') const key = 'test-file.txt' + await expect(downloadFromS3(key)).rejects.toThrow('Download failed') }) }) describe('deleteFromS3', () => { it('should delete a file from S3', async () => { - vi.mocked(s3Client.send).mockResolvedValueOnce({ - $metadata: { httpStatusCode: 200 }, - } as any) + mockSend.mockResolvedValueOnce({}) + + const { deleteFromS3 } = await import('./s3-client') const key = 'test-file.txt' + await deleteFromS3(key) - expect(DeleteObjectCommand).toHaveBeenCalledWith({ + expect(mockDeleteObjectCommand).toHaveBeenCalledWith({ Bucket: 'test-bucket', Key: key, }) - expect(s3Client.send).toHaveBeenCalledTimes(1) + expect(mockSend).toHaveBeenCalledTimes(1) }) it('should handle delete errors', async () => { const error = new Error('Delete failed') - vi.mocked(s3Client.send).mockRejectedValueOnce(error) + mockSend.mockRejectedValueOnce(error) + + const { deleteFromS3 } = await import('./s3-client') const key = 'test-file.txt' + await expect(deleteFromS3(key)).rejects.toThrow('Delete failed') }) }) describe('s3Client initialization', () => { - it('should initialize with correct configuration', () => { + it('should initialize with correct configuration', async () => { + const { getS3Client } = await import('./s3-client') + const { S3Client } = await import('@aws-sdk/client-s3') + + // Get the client (this will trigger initialization) + const client = getS3Client() + // We can't test the constructor call easily since it happens at import time // Instead, we can test the s3Client properties - expect(s3Client).toBeDefined() + expect(client).toBeDefined() // Verify the client was constructed with the right configuration expect(S3Client).toBeDefined() - // We mocked S3Client function earlier, but that doesn't affect the imported s3Client object - // So instead of checking constructor call, check that mocked client exists }) }) }) diff --git a/apps/sim/lib/uploads/s3-client.ts b/apps/sim/lib/uploads/s3/s3-client.ts similarity index 92% rename from apps/sim/lib/uploads/s3-client.ts rename to apps/sim/lib/uploads/s3/s3-client.ts index ce3a2e544f2..eb101ea4aeb 100644 --- a/apps/sim/lib/uploads/s3-client.ts +++ b/apps/sim/lib/uploads/s3/s3-client.ts @@ -5,8 +5,8 @@ import { S3Client, } from '@aws-sdk/client-s3' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' -import { env } from '../env' -import { S3_CONFIG } from './setup' +import { env } from '../../env' +import { S3_CONFIG } from '../setup' // Lazily create a single S3 client instance. let _s3Client: S3Client | null = null @@ -22,12 +22,17 @@ export function getS3Client(): S3Client { ) } + // Only pass explicit credentials if both environment variables are available. + // Otherwise, fall back to the AWS SDK default credential provider chain (e.g. EC2/ECS roles, shared config files, etc.). _s3Client = new S3Client({ region, - credentials: { - accessKeyId: env.AWS_ACCESS_KEY_ID || '', - secretAccessKey: env.AWS_SECRET_ACCESS_KEY || '', - }, + credentials: + env.AWS_ACCESS_KEY_ID && env.AWS_SECRET_ACCESS_KEY + ? { + accessKeyId: env.AWS_ACCESS_KEY_ID, + secretAccessKey: env.AWS_SECRET_ACCESS_KEY, + } + : undefined, }) return _s3Client diff --git a/apps/sim/lib/uploads/setup.server.ts b/apps/sim/lib/uploads/setup.server.ts index 203c946c571..19c2308a680 100644 --- a/apps/sim/lib/uploads/setup.server.ts +++ b/apps/sim/lib/uploads/setup.server.ts @@ -1,24 +1,61 @@ import { createLogger } from '@/lib/logs/console-logger' import { env } from '../env' -import { ensureUploadsDirectory, USE_S3_STORAGE } from './setup' +import { + ensureUploadsDirectory, + getStorageProvider, + USE_BLOB_STORAGE, + USE_S3_STORAGE, +} from './setup' const logger = createLogger('UploadsSetup') // Immediately invoke on server startup if (typeof process !== 'undefined') { + const storageProvider = getStorageProvider() + // Log storage mode - logger.info(`Storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'}`) + logger.info(`Storage provider: ${storageProvider}`) - if (USE_S3_STORAGE) { + if (USE_BLOB_STORAGE) { + // Verify Azure Blob credentials + if (!env.AZURE_STORAGE_CONTAINER_NAME) { + logger.warn('Azure Blob storage is enabled but AZURE_STORAGE_CONTAINER_NAME is not set') + } else if (!env.AZURE_ACCOUNT_NAME && !env.AZURE_CONNECTION_STRING) { + logger.warn( + 'Azure Blob storage is enabled but neither AZURE_ACCOUNT_NAME nor AZURE_CONNECTION_STRING is set' + ) + logger.warn( + 'Set AZURE_ACCOUNT_NAME + AZURE_ACCOUNT_KEY or AZURE_CONNECTION_STRING for Azure Blob storage' + ) + } else if (env.AZURE_ACCOUNT_NAME && !env.AZURE_ACCOUNT_KEY && !env.AZURE_CONNECTION_STRING) { + logger.warn( + 'AZURE_ACCOUNT_NAME is set but AZURE_ACCOUNT_KEY is missing and no AZURE_CONNECTION_STRING provided' + ) + logger.warn('Set AZURE_ACCOUNT_KEY or use AZURE_CONNECTION_STRING for authentication') + } else { + logger.info('Azure Blob storage credentials found in environment variables') + if (env.AZURE_CONNECTION_STRING) { + logger.info('Using Azure connection string for authentication') + } else { + logger.info('Using Azure account name and key for authentication') + } + } + } else if (USE_S3_STORAGE) { // Verify AWS credentials - if (!env.AWS_ACCESS_KEY_ID || !env.AWS_SECRET_ACCESS_KEY) { - logger.warn('AWS credentials are not set in environment variables.') - logger.warn('Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for S3 storage.') + if (!env.S3_BUCKET_NAME || !env.AWS_REGION) { + logger.warn('S3 storage configuration is incomplete') + logger.warn('Set S3_BUCKET_NAME and AWS_REGION for S3 storage') + } else if (!env.AWS_ACCESS_KEY_ID || !env.AWS_SECRET_ACCESS_KEY) { + logger.warn('AWS credentials are not set in environment variables') + logger.warn('Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for S3 storage') } else { - logger.info('AWS credentials found in environment variables') + logger.info('AWS S3 credentials found in environment variables') } } else { - // Only initialize local uploads directory in development mode + // Local storage mode + logger.info('Using local file storage') + + // Only initialize local uploads directory when using local storage ensureUploadsDirectory().then((success) => { if (success) { logger.info('Local uploads directory initialized') @@ -27,6 +64,14 @@ if (typeof process !== 'undefined') { } }) } + + // Log additional configuration details + if (USE_BLOB_STORAGE && env.AZURE_STORAGE_KB_CONTAINER_NAME) { + logger.info(`Azure Blob knowledge base container: ${env.AZURE_STORAGE_KB_CONTAINER_NAME}`) + } + if (USE_S3_STORAGE && env.S3_KB_BUCKET_NAME) { + logger.info(`S3 knowledge base bucket: ${env.S3_KB_BUCKET_NAME}`) + } } export default ensureUploadsDirectory diff --git a/apps/sim/lib/uploads/setup.ts b/apps/sim/lib/uploads/setup.ts index 5b91cd15603..6a5fc134e96 100644 --- a/apps/sim/lib/uploads/setup.ts +++ b/apps/sim/lib/uploads/setup.ts @@ -10,19 +10,58 @@ const PROJECT_ROOT = path.resolve(process.cwd()) export const UPLOAD_DIR = join(PROJECT_ROOT, 'uploads') -export const USE_S3_STORAGE = process.env.NODE_ENV === 'production' +// Check if S3 is configured (has required credentials) +const hasS3Config = !!(env.S3_BUCKET_NAME && env.AWS_REGION) +// Check if Azure Blob is configured (has required credentials) +const hasBlobConfig = !!( + env.AZURE_STORAGE_CONTAINER_NAME && + (env.AZURE_ACCOUNT_NAME || env.AZURE_CONNECTION_STRING) +) + +// Storage configuration flags - auto-detect based on available credentials +// Priority: Blob > S3 > Local (if both are configured, Blob takes priority) +export const USE_BLOB_STORAGE = hasBlobConfig +export const USE_S3_STORAGE = hasS3Config && !USE_BLOB_STORAGE + +// S3 Configuration export const S3_CONFIG = { bucket: env.S3_BUCKET_NAME || '', region: env.AWS_REGION || '', } +// Azure Blob Storage Configuration +export const BLOB_CONFIG = { + accountName: env.AZURE_ACCOUNT_NAME || '', + accountKey: env.AZURE_ACCOUNT_KEY || '', + connectionString: env.AZURE_CONNECTION_STRING || '', + containerName: env.AZURE_STORAGE_CONTAINER_NAME || '', +} + +// Knowledge Base specific configurations +export const S3_KB_CONFIG = { + bucket: env.S3_KB_BUCKET_NAME || '', + region: env.AWS_REGION || '', +} + +export const BLOB_KB_CONFIG = { + accountName: env.AZURE_ACCOUNT_NAME || '', + accountKey: env.AZURE_ACCOUNT_KEY || '', + connectionString: env.AZURE_CONNECTION_STRING || '', + containerName: env.AZURE_STORAGE_KB_CONTAINER_NAME || '', +} + export async function ensureUploadsDirectory() { if (USE_S3_STORAGE) { logger.info('Using S3 storage, skipping local uploads directory creation') return true } + if (USE_BLOB_STORAGE) { + logger.info('Using Azure Blob storage, skipping local uploads directory creation') + return true + } + try { if (!existsSync(UPLOAD_DIR)) { await mkdir(UPLOAD_DIR, { recursive: true }) @@ -35,3 +74,19 @@ export async function ensureUploadsDirectory() { return false } } + +/** + * Get the current storage provider as a human-readable string + */ +export function getStorageProvider(): 'Azure Blob' | 'S3' | 'Local' { + if (USE_BLOB_STORAGE) return 'Azure Blob' + if (USE_S3_STORAGE) return 'S3' + return 'Local' +} + +/** + * Check if we're using any cloud storage (S3 or Blob) + */ +export function isUsingCloudStorage(): boolean { + return USE_S3_STORAGE || USE_BLOB_STORAGE +} diff --git a/apps/sim/lib/uploads/storage-client.ts b/apps/sim/lib/uploads/storage-client.ts new file mode 100644 index 00000000000..57a391b5e7a --- /dev/null +++ b/apps/sim/lib/uploads/storage-client.ts @@ -0,0 +1,203 @@ +import { createLogger } from '@/lib/logs/console-logger' +// Import Blob functions +import { + type FileInfo as BlobFileInfo, + type CustomBlobConfig, + deleteFromBlob, + downloadFromBlob, + getPresignedUrl as getBlobPresignedUrl, + getPresignedUrlWithConfig as getBlobPresignedUrlWithConfig, + uploadToBlob, +} from './blob/blob-client' +// Import S3 functions +import { + type CustomS3Config, + deleteFromS3, + downloadFromS3, + getPresignedUrl as getS3PresignedUrl, + getPresignedUrlWithConfig as getS3PresignedUrlWithConfig, + type FileInfo as S3FileInfo, + uploadToS3, +} from './s3/s3-client' +import { USE_BLOB_STORAGE, USE_S3_STORAGE } from './setup' + +const logger = createLogger('StorageClient') + +// Re-export common types +export type FileInfo = S3FileInfo | BlobFileInfo +export type CustomStorageConfig = CustomS3Config | CustomBlobConfig + +/** + * Upload a file to the configured storage provider + * @param file Buffer containing file data + * @param fileName Original file name + * @param contentType MIME type of the file + * @param size File size in bytes (optional, will use buffer length if not provided) + * @returns Object with file information + */ +export async function uploadFile( + file: Buffer, + fileName: string, + contentType: string, + size?: number +): Promise + +/** + * Upload a file to the configured storage provider with custom configuration + * @param file Buffer containing file data + * @param fileName Original file name + * @param contentType MIME type of the file + * @param customConfig Custom storage configuration + * @param size File size in bytes (optional, will use buffer length if not provided) + * @returns Object with file information + */ +export async function uploadFile( + file: Buffer, + fileName: string, + contentType: string, + customConfig: CustomStorageConfig, + size?: number +): Promise + +export async function uploadFile( + file: Buffer, + fileName: string, + contentType: string, + configOrSize?: CustomStorageConfig | number, + size?: number +): Promise { + if (USE_BLOB_STORAGE) { + logger.info(`Uploading file to Azure Blob Storage: ${fileName}`) + if (typeof configOrSize === 'object') { + return uploadToBlob(file, fileName, contentType, configOrSize as CustomBlobConfig, size) + } + return uploadToBlob(file, fileName, contentType, configOrSize) + } + + if (USE_S3_STORAGE) { + logger.info(`Uploading file to S3: ${fileName}`) + if (typeof configOrSize === 'object') { + return uploadToS3(file, fileName, contentType, configOrSize as CustomS3Config, size) + } + return uploadToS3(file, fileName, contentType, configOrSize) + } + + throw new Error( + 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + ) +} + +/** + * Download a file from the configured storage provider + * @param key File key/name + * @returns File buffer + */ +export async function downloadFile(key: string): Promise { + if (USE_BLOB_STORAGE) { + logger.info(`Downloading file from Azure Blob Storage: ${key}`) + return downloadFromBlob(key) + } + + if (USE_S3_STORAGE) { + logger.info(`Downloading file from S3: ${key}`) + return downloadFromS3(key) + } + + throw new Error( + 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + ) +} + +/** + * Delete a file from the configured storage provider + * @param key File key/name + */ +export async function deleteFile(key: string): Promise { + if (USE_BLOB_STORAGE) { + logger.info(`Deleting file from Azure Blob Storage: ${key}`) + return deleteFromBlob(key) + } + + if (USE_S3_STORAGE) { + logger.info(`Deleting file from S3: ${key}`) + return deleteFromS3(key) + } + + throw new Error( + 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + ) +} + +/** + * Generate a presigned URL for direct file access + * @param key File key/name + * @param expiresIn Time in seconds until URL expires + * @returns Presigned URL + */ +export async function getPresignedUrl(key: string, expiresIn = 3600): Promise { + if (USE_BLOB_STORAGE) { + logger.info(`Generating presigned URL for Azure Blob Storage: ${key}`) + return getBlobPresignedUrl(key, expiresIn) + } + + if (USE_S3_STORAGE) { + logger.info(`Generating presigned URL for S3: ${key}`) + return getS3PresignedUrl(key, expiresIn) + } + + throw new Error( + 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + ) +} + +/** + * Generate a presigned URL for direct file access with custom configuration + * @param key File key/name + * @param customConfig Custom storage configuration + * @param expiresIn Time in seconds until URL expires + * @returns Presigned URL + */ +export async function getPresignedUrlWithConfig( + key: string, + customConfig: CustomStorageConfig, + expiresIn = 3600 +): Promise { + if (USE_BLOB_STORAGE) { + logger.info(`Generating presigned URL for Azure Blob Storage with custom config: ${key}`) + return getBlobPresignedUrlWithConfig(key, customConfig as CustomBlobConfig, expiresIn) + } + + if (USE_S3_STORAGE) { + logger.info(`Generating presigned URL for S3 with custom config: ${key}`) + return getS3PresignedUrlWithConfig(key, customConfig as CustomS3Config, expiresIn) + } + + throw new Error( + 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + ) +} + +/** + * Get the current storage provider name + */ +export function getStorageProvider(): 'blob' | 's3' | 'local' { + if (USE_BLOB_STORAGE) return 'blob' + if (USE_S3_STORAGE) return 's3' + return 'local' +} + +/** + * Check if we're using cloud storage (either S3 or Blob) + */ +export function isUsingCloudStorage(): boolean { + return USE_BLOB_STORAGE || USE_S3_STORAGE +} + +/** + * Get the appropriate serve path prefix based on storage provider + */ +export function getServePathPrefix(): string { + if (USE_BLOB_STORAGE) return '/api/files/serve/blob/' + if (USE_S3_STORAGE) return '/api/files/serve/s3/' + return '/api/files/serve/' +} diff --git a/apps/sim/package.json b/apps/sim/package.json index f2a55e22067..db7a88f8edb 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -27,6 +27,7 @@ "@anthropic-ai/sdk": "^0.39.0", "@aws-sdk/client-s3": "^3.779.0", "@aws-sdk/s3-request-presigner": "^3.779.0", + "@azure/storage-blob": "12.27.0", "@better-auth/stripe": "^1.2.9", "@browserbasehq/stagehand": "^2.0.0", "@cerebras/cerebras_cloud_sdk": "^1.23.0", diff --git a/bun.lock b/bun.lock index 5fda94e685e..4e015d745c6 100644 --- a/bun.lock +++ b/bun.lock @@ -58,6 +58,7 @@ "@anthropic-ai/sdk": "^0.39.0", "@aws-sdk/client-s3": "^3.779.0", "@aws-sdk/s3-request-presigner": "^3.779.0", + "@azure/storage-blob": "12.27.0", "@better-auth/stripe": "^1.2.9", "@browserbasehq/stagehand": "^2.0.0", "@cerebras/cerebras_cloud_sdk": "^1.23.0", @@ -320,6 +321,30 @@ "@aws-sdk/xml-builder": ["@aws-sdk/xml-builder@3.821.0", "", { "dependencies": { "@smithy/types": "^4.3.1", "tslib": "^2.6.2" } }, "sha512-DIIotRnefVL6DiaHtO6/21DhJ4JZnnIwdNbpwiAhdt/AVbttcE4yw925gsjur0OGv5BTYXQXU3YnANBYnZjuQA=="], + "@azure/abort-controller": ["@azure/abort-controller@2.1.2", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA=="], + + "@azure/core-auth": ["@azure/core-auth@1.9.0", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@azure/core-util": "^1.11.0", "tslib": "^2.6.2" } }, "sha512-FPwHpZywuyasDSLMqJ6fhbOK3TqUdviZNF8OqRGA4W5Ewib2lEEZ+pBsYcBa88B2NGO/SEnYPGhyBqNlE8ilSw=="], + + "@azure/core-client": ["@azure/core-client@1.9.4", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@azure/core-auth": "^1.4.0", "@azure/core-rest-pipeline": "^1.20.0", "@azure/core-tracing": "^1.0.0", "@azure/core-util": "^1.6.1", "@azure/logger": "^1.0.0", "tslib": "^2.6.2" } }, "sha512-f7IxTD15Qdux30s2qFARH+JxgwxWLG2Rlr4oSkPGuLWm+1p5y1+C04XGLA0vmX6EtqfutmjvpNmAfgwVIS5hpw=="], + + "@azure/core-http-compat": ["@azure/core-http-compat@2.3.0", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@azure/core-client": "^1.3.0", "@azure/core-rest-pipeline": "^1.20.0" } }, "sha512-qLQujmUypBBG0gxHd0j6/Jdmul6ttl24c8WGiLXIk7IHXdBlfoBqW27hyz3Xn6xbfdyVSarl1Ttbk0AwnZBYCw=="], + + "@azure/core-lro": ["@azure/core-lro@2.7.2", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@azure/core-util": "^1.2.0", "@azure/logger": "^1.0.0", "tslib": "^2.6.2" } }, "sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw=="], + + "@azure/core-paging": ["@azure/core-paging@1.6.2", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-YKWi9YuCU04B55h25cnOYZHxXYtEvQEbKST5vqRga7hWY9ydd3FZHdeQF8pyh+acWZvppw13M/LMGx0LABUVMA=="], + + "@azure/core-rest-pipeline": ["@azure/core-rest-pipeline@1.21.0", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@azure/core-auth": "^1.8.0", "@azure/core-tracing": "^1.0.1", "@azure/core-util": "^1.11.0", "@azure/logger": "^1.0.0", "@typespec/ts-http-runtime": "^0.2.3", "tslib": "^2.6.2" } }, "sha512-a4MBwe/5WKbq9MIxikzgxLBbruC5qlkFYlBdI7Ev50Y7ib5Vo/Jvt5jnJo7NaWeJ908LCHL0S1Us4UMf1VoTfg=="], + + "@azure/core-tracing": ["@azure/core-tracing@1.2.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-UKTiEJPkWcESPYJz3X5uKRYyOcJD+4nYph+KpfdPRnQJVrZfk0KJgdnaAWKfhsBBtAf/D58Az4AvCJEmWgIBAg=="], + + "@azure/core-util": ["@azure/core-util@1.12.0", "", { "dependencies": { "@azure/abort-controller": "^2.0.0", "@typespec/ts-http-runtime": "^0.2.2", "tslib": "^2.6.2" } }, "sha512-13IyjTQgABPARvG90+N2dXpC+hwp466XCdQXPCRlbWHgd3SJd5Q1VvaBGv6k1BIa4MQm6hAF1UBU1m8QUxV8sQ=="], + + "@azure/core-xml": ["@azure/core-xml@1.4.5", "", { "dependencies": { "fast-xml-parser": "^5.0.7", "tslib": "^2.8.1" } }, "sha512-gT4H8mTaSXRz7eGTuQyq1aIJnJqeXzpOe9Ay7Z3FrCouer14CbV3VzjnJrNrQfbBpGBLO9oy8BmrY75A0p53cA=="], + + "@azure/logger": ["@azure/logger@1.2.0", "", { "dependencies": { "@typespec/ts-http-runtime": "^0.2.2", "tslib": "^2.6.2" } }, "sha512-0hKEzLhpw+ZTAfNJyRrn6s+V0nDWzXk9OjBr2TiGIu0OfMr5s2V4FpKLTAK3Ca5r5OKLbf4hkOGDPyiRjie/jA=="], + + "@azure/storage-blob": ["@azure/storage-blob@12.27.0", "", { "dependencies": { "@azure/abort-controller": "^2.1.2", "@azure/core-auth": "^1.4.0", "@azure/core-client": "^1.6.2", "@azure/core-http-compat": "^2.0.0", "@azure/core-lro": "^2.2.0", "@azure/core-paging": "^1.1.1", "@azure/core-rest-pipeline": "^1.10.1", "@azure/core-tracing": "^1.1.2", "@azure/core-util": "^1.6.1", "@azure/core-xml": "^1.4.3", "@azure/logger": "^1.0.0", "events": "^3.0.0", "tslib": "^2.2.0" } }, "sha512-IQjj9RIzAKatmNca3D6bT0qJ+Pkox1WZGOg2esJF2YLHb45pQKOwGPIAV+w3rfgkj7zV3RMxpn/c6iftzSOZJQ=="], + "@babel/code-frame": ["@babel/code-frame@7.27.1", "", { "dependencies": { "@babel/helper-validator-identifier": "^7.27.1", "js-tokens": "^4.0.0", "picocolors": "^1.1.1" } }, "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg=="], "@babel/compat-data": ["@babel/compat-data@7.27.3", "", {}, "sha512-V42wFfx1ymFte+ecf6iXghnnP8kWTO+ZLXIyZq+1LAXHHvTZdVxicn4yiVYdYMGaCO3tmqub11AorKkv+iodqw=="], @@ -1326,6 +1351,8 @@ "@types/yargs-parser": ["@types/yargs-parser@21.0.3", "", {}, "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ=="], + "@typespec/ts-http-runtime": ["@typespec/ts-http-runtime@0.2.3", "", { "dependencies": { "http-proxy-agent": "^7.0.0", "https-proxy-agent": "^7.0.0", "tslib": "^2.6.2" } }, "sha512-oRhjSzcVjX8ExyaF8hC0zzTqxlVuRlgMHL/Bh4w3xB9+wjbm0FpXylVU/lBrn+kgphwYTrOk3tp+AVShGmlYCg=="], + "@ungap/structured-clone": ["@ungap/structured-clone@1.3.0", "", {}, "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="], "@vercel/analytics": ["@vercel/analytics@1.5.0", "", { "peerDependencies": { "@remix-run/react": "^2", "@sveltejs/kit": "^1 || ^2", "next": ">= 13", "react": "^18 || ^19 || ^19.0.0-rc", "svelte": ">= 4", "vue": "^3", "vue-router": "^4" }, "optionalPeers": ["@remix-run/react", "@sveltejs/kit", "next", "react", "svelte", "vue", "vue-router"] }, "sha512-MYsBzfPki4gthY5HnYN7jgInhAZ7Ac1cYDoRWFomwGHWEX7odTEzbtg9kf/QSo7XEsEAqlQugA6gJ2WS2DEa3g=="], @@ -3022,6 +3049,8 @@ "@aws-crypto/util/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], + "@azure/core-xml/fast-xml-parser": ["fast-xml-parser@5.2.5", "", { "dependencies": { "strnum": "^2.1.0" }, "bin": { "fxparser": "src/cli/cli.js" } }, "sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ=="], + "@babel/code-frame/js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], "@babel/core/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], @@ -3496,6 +3525,8 @@ "@aws-crypto/util/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], + "@azure/core-xml/fast-xml-parser/strnum": ["strnum@2.1.1", "", {}, "sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw=="], + "@babel/helper-compilation-targets/lru-cache/yallist": ["yallist@3.1.1", "", {}, "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="], "@browserbasehq/sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], From 6c8f0da4faa976150e554826a9b57537968f123e Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 09:32:40 -0700 Subject: [PATCH 2/8] updated CORS policy for blob, added azure blob-specific headers --- .../sub-block/components/file-upload.tsx | 15 +++++++++++---- .../components/create-form/create-form.tsx | 13 ++++++++++--- apps/sim/next.config.ts | 2 +- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/apps/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx b/apps/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx index 6cbb44d0d8b..3477a08726a 100644 --- a/apps/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx +++ b/apps/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx @@ -170,12 +170,19 @@ export function FileUpload({ // Use direct upload method useDirectUpload = true - // Upload directly to S3 using the pre-signed URL + const uploadHeaders: Record = { + 'Content-Type': file.type, + } + + // Add Azure-specific headers if provided + if (presignedData.uploadHeaders) { + Object.assign(uploadHeaders, presignedData.uploadHeaders) + } + + // Upload directly to cloud storage using the pre-signed URL const uploadResponse = await fetch(presignedData.presignedUrl, { method: 'PUT', - headers: { - 'Content-Type': file.type, - }, + headers: uploadHeaders, // Use the merged headers body: file, }) diff --git a/apps/sim/app/w/knowledge/components/create-modal/components/create-form/create-form.tsx b/apps/sim/app/w/knowledge/components/create-modal/components/create-form/create-form.tsx index 211868f6dfd..7f53d905ea3 100644 --- a/apps/sim/app/w/knowledge/components/create-modal/components/create-form/create-form.tsx +++ b/apps/sim/app/w/knowledge/components/create-modal/components/create-form/create-form.tsx @@ -275,11 +275,18 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps) const presignedData = await presignedResponse.json() if (presignedResponse.ok && presignedData.directUploadSupported) { + const uploadHeaders: Record = { + 'Content-Type': file.type, + } + + // Add Azure-specific headers if provided + if (presignedData.uploadHeaders) { + Object.assign(uploadHeaders, presignedData.uploadHeaders) + } + const uploadResponse = await fetch(presignedData.presignedUrl, { method: 'PUT', - headers: { - 'Content-Type': file.type, - }, + headers: uploadHeaders, // Use the merged headers body: file, }) diff --git a/apps/sim/next.config.ts b/apps/sim/next.config.ts index 8c39a1f4493..d63bdd6eb3a 100644 --- a/apps/sim/next.config.ts +++ b/apps/sim/next.config.ts @@ -154,7 +154,7 @@ const nextConfig: NextConfig = { }, { key: 'Content-Security-Policy', - value: `default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval' https://*.google.com https://apis.google.com https://*.vercel-scripts.com https://*.vercel-insights.com https://vercel.live https://*.vercel.live https://vercel.com https://*.vercel.app; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.googleusercontent.com https://*.google.com https://*.atlassian.com https://cdn.discordapp.com https://*.githubusercontent.com; media-src 'self' blob:; font-src 'self' https://fonts.gstatic.com; connect-src 'self' ${process.env.NEXT_PUBLIC_APP_URL || ''} ${env.OLLAMA_URL || 'http://localhost:11434'} https://api.browser-use.com https://*.googleapis.com https://*.amazonaws.com https://*.s3.amazonaws.com https://*.vercel-insights.com https://*.atlassian.com https://vercel.live https://*.vercel.live https://vercel.com https://*.vercel.app; frame-src https://drive.google.com https://*.google.com; frame-ancestors 'self'; form-action 'self'; base-uri 'self'; object-src 'none'`, + value: `default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval' https://*.google.com https://apis.google.com https://*.vercel-scripts.com https://*.vercel-insights.com https://vercel.live https://*.vercel.live https://vercel.com https://*.vercel.app; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.googleusercontent.com https://*.google.com https://*.atlassian.com https://cdn.discordapp.com https://*.githubusercontent.com; media-src 'self' blob:; font-src 'self' https://fonts.gstatic.com; connect-src 'self' ${process.env.NEXT_PUBLIC_APP_URL || ''} ${env.OLLAMA_URL || 'http://localhost:11434'} https://api.browser-use.com https://*.googleapis.com https://*.amazonaws.com https://*.s3.amazonaws.com https://*.blob.core.windows.net https://*.vercel-insights.com https://*.atlassian.com https://vercel.live https://*.vercel.live https://vercel.com https://*.vercel.app; frame-src https://drive.google.com https://*.google.com; frame-ancestors 'self'; form-action 'self'; base-uri 'self'; object-src 'none'`, }, ], }, From 14906e10ae5cc92fb85cfa557edf72eeabb42c41 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 10:58:04 -0700 Subject: [PATCH 3/8] remove extraneous comments --- apps/sim/app/api/files/delete/route.test.ts | 32 ----------- apps/sim/app/api/files/parse/route.test.ts | 9 +--- .../api/files/serve/[...path]/route.test.ts | 53 ++----------------- apps/sim/app/api/files/upload/route.test.ts | 46 ---------------- apps/sim/lib/uploads/s3/s3-client.test.ts | 16 +----- apps/sim/lib/uploads/setup.ts | 3 -- apps/sim/lib/uploads/storage-client.ts | 3 -- 7 files changed, 7 insertions(+), 155 deletions(-) diff --git a/apps/sim/app/api/files/delete/route.test.ts b/apps/sim/app/api/files/delete/route.test.ts index eb5f1ae4445..da8391d4ab0 100644 --- a/apps/sim/app/api/files/delete/route.test.ts +++ b/apps/sim/app/api/files/delete/route.test.ts @@ -2,7 +2,6 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { createMockRequest } from '@/app/api/__test-utils__/utils' describe('File Delete API Route', () => { - // Mock file system and storage modules const mockUnlink = vi.fn().mockResolvedValue(undefined) const mockExistsSync = vi.fn().mockReturnValue(true) const mockDeleteFile = vi.fn().mockResolvedValue(undefined) @@ -11,7 +10,6 @@ describe('File Delete API Route', () => { beforeEach(() => { vi.resetModules() - // Mock filesystem operations vi.doMock('fs', () => ({ existsSync: mockExistsSync, })) @@ -20,13 +18,11 @@ describe('File Delete API Route', () => { unlink: mockUnlink, })) - // Mock the storage abstraction layer vi.doMock('@/lib/uploads', () => ({ deleteFile: mockDeleteFile, isUsingCloudStorage: mockIsUsingCloudStorage, })) - // Mock the logger vi.doMock('@/lib/logs/console-logger', () => ({ createLogger: vi.fn().mockReturnValue({ info: vi.fn(), @@ -36,14 +32,12 @@ describe('File Delete API Route', () => { }), })) - // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, USE_BLOB_STORAGE: false, })) - // Skip setup.server.ts side effects vi.doMock('@/lib/uploads/setup.server', () => ({})) }) @@ -52,115 +46,89 @@ describe('File Delete API Route', () => { }) it('should handle local file deletion successfully', async () => { - // Configure upload directory and S3 mode for this test vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, })) - // Create request with file path const req = createMockRequest('POST', { filePath: '/api/files/serve/test-file.txt', }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('success', true) expect(data).toHaveProperty('message', 'File deleted successfully') - // Verify unlink was called with correct path expect(mockUnlink).toHaveBeenCalledWith('/test/uploads/test-file.txt') }) it('should handle file not found gracefully', async () => { - // Mock file not existing mockExistsSync.mockReturnValueOnce(false) - // Create request with file path const req = createMockRequest('POST', { filePath: '/api/files/serve/nonexistent.txt', }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('success', true) expect(data).toHaveProperty('message', "File not found, but that's okay") - // Verify unlink was not called expect(mockUnlink).not.toHaveBeenCalled() }) it('should handle S3 file deletion successfully', async () => { - // Configure upload directory and S3 mode for this test vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, USE_BLOB_STORAGE: false, })) - // Mock cloud storage mode mockIsUsingCloudStorage.mockReturnValue(true) - // Create request with S3 file path const req = createMockRequest('POST', { filePath: '/api/files/serve/s3/1234567890-test-file.txt', }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('success', true) expect(data).toHaveProperty('message', 'File deleted successfully from cloud storage') - // Verify deleteFile was called with correct key expect(mockDeleteFile).toHaveBeenCalledWith('1234567890-test-file.txt') }) it('should handle missing file path', async () => { - // Create request with no file path const req = createMockRequest('POST', {}) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify error response expect(response.status).toBe(400) expect(data).toHaveProperty('error', 'InvalidRequestError') expect(data).toHaveProperty('message', 'No file path provided') }) it('should handle CORS preflight requests', async () => { - // Import the handler after mocks are set up const { OPTIONS } = await import('./route') - // Call the handler const response = await OPTIONS() - // Verify response expect(response.status).toBe(204) expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS') expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type') diff --git a/apps/sim/app/api/files/parse/route.test.ts b/apps/sim/app/api/files/parse/route.test.ts index 7f22c128ea9..195f4adb540 100644 --- a/apps/sim/app/api/files/parse/route.test.ts +++ b/apps/sim/app/api/files/parse/route.test.ts @@ -168,10 +168,8 @@ describe('File Parse API Route', () => { }) it('should handle S3 access errors gracefully', async () => { - // Mock cloud storage mode mockIsUsingCloudStorage.mockReturnValue(true) - // Mock download failure mockDownloadFile.mockRejectedValueOnce(new Error('Access denied')) const req = new NextRequest('http://localhost:3000/api/files/parse', { @@ -268,7 +266,7 @@ describe('Files Parse API - Path Traversal Security', () => { '/root/.bashrc', '/app/.env', '/var/log/auth.log', - 'C:\\Windows\\System32\\drivers\\etc\\hosts', // Windows path + 'C:\\Windows\\System32\\drivers\\etc\\hosts', ] for (const maliciousPath of maliciousPaths) { @@ -288,7 +286,6 @@ describe('Files Parse API - Path Traversal Security', () => { }) it('should allow valid paths within upload directory', async () => { - // Test that valid paths don't trigger path validation errors const validPaths = [ '/api/files/serve/document.txt', '/api/files/serve/folder/file.pdf', @@ -306,7 +303,6 @@ describe('Files Parse API - Path Traversal Security', () => { const response = await POST(request) const result = await response.json() - // Should not fail due to path validation (may fail for other reasons like file not found) if (result.error) { expect(result.error).not.toMatch( /Access denied|Path outside allowed directory|Invalid path/ @@ -326,7 +322,7 @@ describe('Files Parse API - Path Traversal Security', () => { const request = new NextRequest('http://localhost:3000/api/files/parse', { method: 'POST', body: JSON.stringify({ - filePath: decodeURIComponent(maliciousPath), // Simulate URL decoding + filePath: decodeURIComponent(maliciousPath), }), }) @@ -357,7 +353,6 @@ describe('Files Parse API - Path Traversal Security', () => { const result = await response.json() expect(result.success).toBe(false) - // Should be rejected either by path validation or file system access } }) }) diff --git a/apps/sim/app/api/files/serve/[...path]/route.test.ts b/apps/sim/app/api/files/serve/[...path]/route.test.ts index c0d09d1fe91..e9621a449fb 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.test.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.test.ts @@ -7,7 +7,6 @@ import { NextRequest } from 'next/server' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' describe('File Serve API Route', () => { - // Mock file system and storage modules const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content')) const mockExistsSync = vi.fn().mockReturnValue(true) const mockDownloadFile = vi.fn().mockResolvedValue(Buffer.from('test cloud file content')) @@ -18,7 +17,6 @@ describe('File Serve API Route', () => { beforeEach(() => { vi.resetModules() - // Mock filesystem operations vi.doMock('fs', () => ({ existsSync: mockExistsSync, })) @@ -27,14 +25,12 @@ describe('File Serve API Route', () => { readFile: mockReadFile, })) - // Mock the storage abstraction layer vi.doMock('@/lib/uploads', () => ({ downloadFile: mockDownloadFile, getPresignedUrl: mockGetPresignedUrl, isUsingCloudStorage: mockIsUsingCloudStorage, })) - // Mock the logger vi.doMock('@/lib/logs/console-logger', () => ({ createLogger: vi.fn().mockReturnValue({ info: vi.fn(), @@ -44,7 +40,6 @@ describe('File Serve API Route', () => { }), })) - // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, @@ -56,7 +51,6 @@ describe('File Serve API Route', () => { }, })) - // Mock the file utils with all exports including FileNotFoundError vi.doMock('@/app/api/files/utils', () => ({ FileNotFoundError: class FileNotFoundError extends Error { constructor(message: string) { @@ -88,7 +82,6 @@ describe('File Serve API Route', () => { findLocalFile: vi.fn().mockReturnValue('/test/uploads/test-file.txt'), })) - // Skip setup.server.ts side effects vi.doMock('@/lib/uploads/setup.server', () => ({})) }) @@ -97,29 +90,22 @@ describe('File Serve API Route', () => { }) it('should serve local file successfully', async () => { - // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/test-file.txt') - // Create params similar to what Next.js would provide const params = { path: ['test-file.txt'] } - // Import the handler after mocks are set up const { GET } = await import('./route') - // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify response expect(response.status).toBe(200) expect(response.headers.get('Content-Type')).toBe('text/plain') expect(response.headers.get('Content-Disposition')).toBe('inline; filename="test-file.txt"') - // Verify file was read from correct path expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt') }) it('should handle nested paths correctly', async () => { - // Mock findLocalFile to return the nested path const mockFindLocalFile = vi.fn().mockReturnValue('/test/uploads/nested/path/file.txt') vi.doMock('@/app/api/files/utils', () => ({ @@ -153,37 +139,28 @@ describe('File Serve API Route', () => { findLocalFile: mockFindLocalFile, })) - // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/nested/path/file.txt') - // Create params similar to what Next.js would provide const params = { path: ['nested', 'path', 'file.txt'] } - // Import the handler after mocks are set up const { GET } = await import('./route') - // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify response expect(response.status).toBe(200) - // Verify file was read with correct path expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/nested/path/file.txt') }) it('should serve cloud file by downloading and proxying', async () => { - // Configure cloud storage mode vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, USE_BLOB_STORAGE: false, })) - // Mock cloud storage mode mockIsUsingCloudStorage.mockReturnValue(true) - // Mock content type detection for PNG vi.doMock('@/app/api/files/utils', () => ({ FileNotFoundError: class FileNotFoundError extends Error { constructor(message: string) { @@ -215,45 +192,38 @@ describe('File Serve API Route', () => { findLocalFile: vi.fn().mockReturnValue('/test/uploads/test-file.txt'), })) - // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-image.png') - // Create params similar to what Next.js would provide const params = { path: ['s3', '1234567890-image.png'] } - // Import the handler after mocks are set up const { GET } = await import('./route') - // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify response downloads and proxies the file expect(response.status).toBe(200) expect(response.headers.get('Content-Type')).toBe('image/png') expect(mockDownloadFile).toHaveBeenCalledWith('1234567890-image.png') }) it('should return 404 when file not found', async () => { - // Mock readFile to throw an error for this specific test const mockReadFileError = vi .fn() .mockRejectedValue(new Error('ENOENT: no such file or directory')) - // Reset modules for this specific test vi.resetModules() vi.doMock('fs', () => ({ - existsSync: vi.fn().mockReturnValue(false), // File doesn't exist + existsSync: vi.fn().mockReturnValue(false), })) vi.doMock('fs/promises', () => ({ - readFile: mockReadFileError, // This will throw an error + readFile: mockReadFileError, })) vi.doMock('@/lib/uploads', () => ({ downloadFile: mockDownloadFile, getPresignedUrl: mockGetPresignedUrl, - isUsingCloudStorage: vi.fn().mockReturnValue(false), // Use local storage + isUsingCloudStorage: vi.fn().mockReturnValue(false), })) vi.doMock('@/lib/logs/console-logger', () => ({ @@ -273,7 +243,6 @@ describe('File Serve API Route', () => { vi.doMock('@/lib/uploads/setup.server', () => ({})) - // Mock utils with findLocalFile returning null to trigger FileNotFoundError vi.doMock('@/app/api/files/utils', () => ({ FileNotFoundError: class FileNotFoundError extends Error { constructor(message: string) { @@ -289,29 +258,23 @@ describe('File Serve API Route', () => { extractS3Key: vi.fn(), extractBlobKey: vi.fn(), extractFilename: vi.fn(), - findLocalFile: vi.fn().mockReturnValue(null), // This should trigger FileNotFoundError + findLocalFile: vi.fn().mockReturnValue(null), })) - // Create mock request const req = new NextRequest('http://localhost:3000/api/files/serve/nonexistent.txt') - // Create params similar to what Next.js would provide const params = { path: ['nonexistent.txt'] } - // Import the handler after mocks are set up const { GET } = await import('./route') - // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify 404 response expect(response.status).toBe(404) const text = await response.text() expect(text).toBe('File not found') }) - // Instead of testing all content types in one test, let's separate them describe('content type detection', () => { const contentTypeTests = [ { ext: 'pdf', contentType: 'application/pdf' }, @@ -323,10 +286,8 @@ describe('File Serve API Route', () => { for (const test of contentTypeTests) { it(`should serve ${test.ext} file with correct content type`, async () => { - // Reset modules for this test vi.resetModules() - // Re-apply all mocks vi.doMock('fs', () => ({ existsSync: mockExistsSync.mockReturnValue(true), })) @@ -363,7 +324,6 @@ describe('File Serve API Route', () => { vi.doMock('@/lib/uploads/setup.server', () => ({})) - // Mock utils functions that determine content type vi.doMock('@/app/api/files/utils', () => ({ getContentType: () => test.contentType, findLocalFile: () => `/test/uploads/file.${test.ext}`, @@ -379,19 +339,14 @@ describe('File Serve API Route', () => { createErrorResponse: () => new Response(null, { status: 404 }), })) - // Create mock request with this extension const req = new NextRequest(`http://localhost:3000/api/files/serve/file.${test.ext}`) - // Create params const params = { path: [`file.${test.ext}`] } - // Import the handler after mocks are set up const { GET } = await import('./route') - // Call the handler const response = await GET(req, { params: Promise.resolve(params) }) - // Verify correct content type expect(response.headers.get('Content-Type')).toBe(test.contentType) }) } diff --git a/apps/sim/app/api/files/upload/route.test.ts b/apps/sim/app/api/files/upload/route.test.ts index 26a090f3c1e..03a4304a7a2 100644 --- a/apps/sim/app/api/files/upload/route.test.ts +++ b/apps/sim/app/api/files/upload/route.test.ts @@ -7,7 +7,6 @@ import { NextRequest } from 'next/server' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' describe('File Upload API Route', () => { - // Mock file system and storage modules const mockWriteFile = vi.fn().mockResolvedValue(undefined) const mockUploadFile = vi.fn().mockResolvedValue({ path: '/api/files/serve/s3/test-key', @@ -19,7 +18,6 @@ describe('File Upload API Route', () => { const mockIsUsingCloudStorage = vi.fn().mockReturnValue(false) const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true) - // Mock form data const createMockFormData = (files: File[]): FormData => { const formData = new FormData() files.forEach((file) => { @@ -28,7 +26,6 @@ describe('File Upload API Route', () => { return formData } - // Mock file const createMockFile = ( name = 'test.txt', type = 'text/plain', @@ -40,18 +37,15 @@ describe('File Upload API Route', () => { beforeEach(() => { vi.resetModules() - // Mock filesystem operations vi.doMock('fs/promises', () => ({ writeFile: mockWriteFile, })) - // Mock the storage abstraction layer vi.doMock('@/lib/uploads', () => ({ uploadFile: mockUploadFile, isUsingCloudStorage: mockIsUsingCloudStorage, })) - // Mock the logger vi.doMock('@/lib/logs/console-logger', () => ({ createLogger: vi.fn().mockReturnValue({ info: vi.fn(), @@ -61,12 +55,10 @@ describe('File Upload API Route', () => { }), })) - // Mock UUID generation vi.doMock('uuid', () => ({ v4: vi.fn().mockReturnValue('mock-uuid'), })) - // Configure upload directory and storage mode with all required exports vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: false, @@ -78,7 +70,6 @@ describe('File Upload API Route', () => { }, })) - // Skip setup.server.ts side effects vi.doMock('@/lib/uploads/setup.server', () => ({})) }) @@ -87,63 +78,50 @@ describe('File Upload API Route', () => { }) it('should upload a file to local storage', async () => { - // Create a mock request with file const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { method: 'POST', body: formData, }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('path', '/api/files/serve/mock-uuid.txt') expect(data).toHaveProperty('name', 'test.txt') expect(data).toHaveProperty('size') expect(data).toHaveProperty('type', 'text/plain') - // Verify file was written to local storage expect(mockWriteFile).toHaveBeenCalledWith('/test/uploads/mock-uuid.txt', expect.any(Buffer)) }) it('should upload a file to S3 when in S3 mode', async () => { - // Configure S3 storage mode vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, USE_BLOB_STORAGE: false, })) - // Mock cloud storage mode mockIsUsingCloudStorage.mockReturnValue(true) - // Create a mock request with file const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { method: 'POST', body: formData, }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response expect(response.status).toBe(200) expect(data).toHaveProperty('path') expect(data.path).toContain('/api/files/serve/s3/') @@ -151,7 +129,6 @@ describe('File Upload API Route', () => { expect(data).toHaveProperty('size') expect(data).toHaveProperty('type', 'text/plain') - // Verify uploadFile was called with correct parameters expect(mockUploadFile).toHaveBeenCalledWith( expect.any(Buffer), 'test.txt', @@ -161,12 +138,10 @@ describe('File Upload API Route', () => { }) it('should handle multiple file uploads', async () => { - // Create multiple mock files const mockFile1 = createMockFile('file1.txt', 'text/plain') const mockFile2 = createMockFile('file2.txt', 'text/plain') const formData = createMockFormData([mockFile1, mockFile2]) - // Mock multiple upload responses mockUploadFile .mockResolvedValueOnce({ path: '/api/files/serve/test1.txt', @@ -183,98 +158,77 @@ describe('File Upload API Route', () => { type: 'text/plain', }) - // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { method: 'POST', body: formData, }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify response has multiple results expect(response.status).toBe(200) expect(data).toHaveProperty('files') expect(Array.isArray(data.files)).toBe(true) expect(data.files).toHaveLength(2) - // Verify each file was uploaded expect(data.files[0]).toHaveProperty('name', 'file1.txt') expect(data.files[1]).toHaveProperty('name', 'file2.txt') }) it('should handle missing files', async () => { - // Create empty form data const formData = new FormData() - // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { method: 'POST', body: formData, }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify error response expect(response.status).toBe(400) expect(data).toHaveProperty('error', 'InvalidRequestError') expect(data).toHaveProperty('message', 'No files provided') }) it('should handle S3 upload errors', async () => { - // Configure S3 storage mode vi.doMock('@/lib/uploads/setup', () => ({ UPLOAD_DIR: '/test/uploads', USE_S3_STORAGE: true, USE_BLOB_STORAGE: false, })) - // Mock cloud storage mode mockIsUsingCloudStorage.mockReturnValue(true) - // Mock upload failure mockUploadFile.mockRejectedValueOnce(new Error('Upload failed')) - // Create a mock request with file const mockFile = createMockFile() const formData = createMockFormData([mockFile]) - // Create mock request object const req = new NextRequest('http://localhost:3000/api/files/upload', { method: 'POST', body: formData, }) - // Import the handler after mocks are set up const { POST } = await import('./route') - // Call the handler const response = await POST(req) const data = await response.json() - // Verify error response expect(response.status).toBe(500) expect(data).toHaveProperty('error', 'Error') expect(data).toHaveProperty('message', 'Upload failed') }) it('should handle CORS preflight requests', async () => { - // Import the handler after mocks are set up const { OPTIONS } = await import('./route') - // Call the handler const response = await OPTIONS() - // Verify response expect(response.status).toBe(204) expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS') expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type') diff --git a/apps/sim/lib/uploads/s3/s3-client.test.ts b/apps/sim/lib/uploads/s3/s3-client.test.ts index beb5cd7fe00..fc80921d7d3 100644 --- a/apps/sim/lib/uploads/s3/s3-client.test.ts +++ b/apps/sim/lib/uploads/s3/s3-client.test.ts @@ -6,7 +6,6 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' describe('S3 Client', () => { - // Mock AWS SDK modules const mockSend = vi.fn() const mockS3Client = { send: mockSend, @@ -21,7 +20,6 @@ describe('S3 Client', () => { vi.resetModules() vi.clearAllMocks() - // Mock the AWS SDK vi.doMock('@aws-sdk/client-s3', () => ({ S3Client: vi.fn(() => mockS3Client), PutObjectCommand: mockPutObjectCommand, @@ -33,7 +31,6 @@ describe('S3 Client', () => { getSignedUrl: mockGetSignedUrl, })) - // Mock the setup configuration with test values vi.doMock('../setup', () => ({ S3_CONFIG: { bucket: 'test-bucket', @@ -41,8 +38,7 @@ describe('S3 Client', () => { }, })) - // Mock Date.now for consistent timestamps - vi.spyOn(Date, 'now').mockReturnValue(1672603200000) // Fixed timestamp + vi.spyOn(Date, 'now').mockReturnValue(1672603200000) vi.spyOn(Date.prototype, 'toISOString').mockReturnValue('2025-06-16T01:13:10.765Z') }) @@ -52,7 +48,6 @@ describe('S3 Client', () => { describe('uploadToS3', () => { it('should upload a file to S3 and return file info', async () => { - // Mock successful upload mockSend.mockResolvedValueOnce({}) const { uploadToS3 } = await import('./s3-client') @@ -63,7 +58,6 @@ describe('S3 Client', () => { const result = await uploadToS3(file, fileName, contentType) - // Check that S3 client was called with correct parameters expect(mockPutObjectCommand).toHaveBeenCalledWith({ Bucket: 'test-bucket', Key: expect.stringContaining('test-file.txt'), @@ -77,7 +71,6 @@ describe('S3 Client', () => { expect(mockSend).toHaveBeenCalledWith(expect.any(Object)) - // Check return value expect(result).toEqual({ path: expect.stringContaining('/api/files/serve/s3/'), key: expect.stringContaining('test-file.txt'), @@ -98,14 +91,12 @@ describe('S3 Client', () => { const result = await uploadToS3(testFile, fileName, contentType) - // Check that the filename was sanitized in the key expect(mockPutObjectCommand).toHaveBeenCalledWith( expect.objectContaining({ Key: expect.stringContaining('test-file-with-spaces.txt'), }) ) - // But the original name should be preserved in metadata and result expect(result.name).toBe(fileName) }) @@ -189,7 +180,6 @@ describe('S3 Client', () => { describe('downloadFromS3', () => { it('should download a file from S3', async () => { - // Mock a readable stream const mockStream = { on: vi.fn((event, callback) => { if (event === 'data') { @@ -225,7 +215,6 @@ describe('S3 Client', () => { }) it('should handle stream errors', async () => { - // Mock a readable stream that throws an error const mockStream = { on: vi.fn((event, callback) => { if (event === 'error') { @@ -294,11 +283,8 @@ describe('S3 Client', () => { const { getS3Client } = await import('./s3-client') const { S3Client } = await import('@aws-sdk/client-s3') - // Get the client (this will trigger initialization) const client = getS3Client() - // We can't test the constructor call easily since it happens at import time - // Instead, we can test the s3Client properties expect(client).toBeDefined() // Verify the client was constructed with the right configuration expect(S3Client).toBeDefined() diff --git a/apps/sim/lib/uploads/setup.ts b/apps/sim/lib/uploads/setup.ts index 6a5fc134e96..e35e33649f2 100644 --- a/apps/sim/lib/uploads/setup.ts +++ b/apps/sim/lib/uploads/setup.ts @@ -24,13 +24,11 @@ const hasBlobConfig = !!( export const USE_BLOB_STORAGE = hasBlobConfig export const USE_S3_STORAGE = hasS3Config && !USE_BLOB_STORAGE -// S3 Configuration export const S3_CONFIG = { bucket: env.S3_BUCKET_NAME || '', region: env.AWS_REGION || '', } -// Azure Blob Storage Configuration export const BLOB_CONFIG = { accountName: env.AZURE_ACCOUNT_NAME || '', accountKey: env.AZURE_ACCOUNT_KEY || '', @@ -38,7 +36,6 @@ export const BLOB_CONFIG = { containerName: env.AZURE_STORAGE_CONTAINER_NAME || '', } -// Knowledge Base specific configurations export const S3_KB_CONFIG = { bucket: env.S3_KB_BUCKET_NAME || '', region: env.AWS_REGION || '', diff --git a/apps/sim/lib/uploads/storage-client.ts b/apps/sim/lib/uploads/storage-client.ts index 57a391b5e7a..143cf31a813 100644 --- a/apps/sim/lib/uploads/storage-client.ts +++ b/apps/sim/lib/uploads/storage-client.ts @@ -1,5 +1,4 @@ import { createLogger } from '@/lib/logs/console-logger' -// Import Blob functions import { type FileInfo as BlobFileInfo, type CustomBlobConfig, @@ -9,7 +8,6 @@ import { getPresignedUrlWithConfig as getBlobPresignedUrlWithConfig, uploadToBlob, } from './blob/blob-client' -// Import S3 functions import { type CustomS3Config, deleteFromS3, @@ -23,7 +21,6 @@ import { USE_BLOB_STORAGE, USE_S3_STORAGE } from './setup' const logger = createLogger('StorageClient') -// Re-export common types export type FileInfo = S3FileInfo | BlobFileInfo export type CustomStorageConfig = CustomS3Config | CustomBlobConfig From 0f498d721288bd20f7f839b120d38a3b5fa382ec Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 13:23:26 -0700 Subject: [PATCH 4/8] add file size limit and timeout --- apps/sim/app/api/files/parse/route.ts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/apps/sim/app/api/files/parse/route.ts b/apps/sim/app/api/files/parse/route.ts index 6ab1f48bcd5..d23ce2d39f3 100644 --- a/apps/sim/app/api/files/parse/route.ts +++ b/apps/sim/app/api/files/parse/route.ts @@ -30,7 +30,6 @@ interface ParseResult { } } -// MIME type mapping for various file extensions const fileTypeMap: Record = { // Text formats txt: 'text/plain', @@ -222,12 +221,24 @@ async function handleExternalUrl(url: string, fileType?: string): Promise MAX_DOWNLOAD_SIZE_BYTES) { + throw new Error(`File too large: ${contentLength} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`) + } + const buffer = Buffer.from(await response.arrayBuffer()) + + if (buffer.length > MAX_DOWNLOAD_SIZE_BYTES) { + throw new Error(`File too large: ${buffer.length} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`) + } + logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`) // Extract filename from URL From 39825293299a8198baa588d95f997286ea1fc7e7 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 15:55:54 -0700 Subject: [PATCH 5/8] added some extra error handling in kb add documents --- .../knowledge/[id]/process-documents/route.ts | 30 +++- apps/sim/app/w/knowledge/[id]/base.tsx | 168 ++++++++++++++++-- apps/sim/stores/knowledge/store.ts | 13 +- 3 files changed, 190 insertions(+), 21 deletions(-) diff --git a/apps/sim/app/api/knowledge/[id]/process-documents/route.ts b/apps/sim/app/api/knowledge/[id]/process-documents/route.ts index d766720b6e6..97a920f6849 100644 --- a/apps/sim/app/api/knowledge/[id]/process-documents/route.ts +++ b/apps/sim/app/api/knowledge/[id]/process-documents/route.ts @@ -200,7 +200,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: const validatedData = ProcessDocumentsSchema.parse(body) const createdDocuments = await db.transaction(async (tx) => { - const documentPromises = validatedData.documents.map(async (docData) => { + const documentPromises = validatedData.documents.map(async (docData, index) => { const documentId = crypto.randomUUID() const now = new Date() @@ -220,11 +220,33 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: uploadedAt: now, } - await tx.insert(document).values(newDocument) - return { documentId, ...docData } + try { + await tx.insert(document).values(newDocument) + logger.info( + `[${requestId}] Document record created: ${documentId} for file: ${docData.filename}` + ) + return { documentId, ...docData } + } catch (dbError) { + logger.error( + `[${requestId}] Failed to create document record for ${docData.filename}:`, + dbError + ) + throw new Error( + `Failed to create document record for ${docData.filename}: ${dbError instanceof Error ? dbError.message : 'Unknown database error'}` + ) + } }) - return await Promise.all(documentPromises) + const results = await Promise.all(documentPromises) + + // Validate that all documents were created successfully + const invalidResults = results.filter((result) => !result.documentId || !result.filename) + if (invalidResults.length > 0) { + logger.error(`[${requestId}] Some documents failed to create properly:`, invalidResults) + throw new Error(`Failed to create ${invalidResults.length} document records`) + } + + return results }) logger.info( diff --git a/apps/sim/app/w/knowledge/[id]/base.tsx b/apps/sim/app/w/knowledge/[id]/base.tsx index 52f275ff5de..b91b6253550 100644 --- a/apps/sim/app/w/knowledge/[id]/base.tsx +++ b/apps/sim/app/w/knowledge/[id]/base.tsx @@ -1,8 +1,9 @@ 'use client' import { useEffect, useRef, useState } from 'react' -import { format } from 'date-fns' +import { format, formatDistanceToNow } from 'date-fns' import { + AlertCircle, Circle, CircleOff, FileText, @@ -83,7 +84,12 @@ const getStatusDisplay = (doc: DocumentData) => { } case 'failed': return { - text: 'Failed', + text: ( + <> + Failed + {doc.processingError && } + + ), className: 'inline-flex items-center rounded-md bg-red-100 px-2 py-1 text-xs font-medium text-red-700 dark:bg-red-900/30 dark:text-red-300', } @@ -154,6 +160,16 @@ export function KnowledgeBase({ const [showDeleteDialog, setShowDeleteDialog] = useState(false) const [isDeleting, setIsDeleting] = useState(false) const [isUploading, setIsUploading] = useState(false) + const [uploadError, setUploadError] = useState<{ + message: string + timestamp: number + } | null>(null) + const [uploadProgress, setUploadProgress] = useState<{ + stage: 'idle' | 'uploading' | 'processing' | 'completing' + filesCompleted: number + totalFiles: number + currentFile?: string + }>({ stage: 'idle', filesCompleted: 0, totalFiles: 0 }) const router = useRouter() const fileInputRef = useRef(null) @@ -182,6 +198,16 @@ export function KnowledgeBase({ return () => clearInterval(refreshInterval) }, [documents, refreshDocuments, isUploading, isDeleting]) + // Auto-dismiss upload error after 8 seconds + useEffect(() => { + if (uploadError) { + const timer = setTimeout(() => { + setUploadError(null) + }, 8000) + return () => clearTimeout(timer) + } + }, [uploadError]) + // Filter documents based on search query const filteredDocuments = documents.filter((doc) => doc.filename.toLowerCase().includes(searchQuery.toLowerCase()) @@ -371,11 +397,15 @@ export function KnowledgeBase({ try { setIsUploading(true) + setUploadError(null) + setUploadProgress({ stage: 'uploading', filesCompleted: 0, totalFiles: files.length }) // Upload all files and start processing const uploadedFiles: UploadedFile[] = [] + const fileArray = Array.from(files) - for (const file of Array.from(files)) { + for (const [index, file] of fileArray.entries()) { + setUploadProgress((prev) => ({ ...prev, currentFile: file.name, filesCompleted: index })) const formData = new FormData() formData.append('file', file) @@ -390,6 +420,12 @@ export function KnowledgeBase({ } const uploadResult = await uploadResponse.json() + + // Validate upload result structure + if (!uploadResult.path) { + throw new Error(`Invalid upload response for ${file.name}: missing file path`) + } + uploadedFiles.push({ filename: file.name, fileUrl: uploadResult.path.startsWith('http') @@ -401,6 +437,12 @@ export function KnowledgeBase({ }) } + setUploadProgress((prev) => ({ + ...prev, + stage: 'processing', + filesCompleted: fileArray.length, + })) + // Start async document processing const processResponse = await fetch(`/api/knowledge/${id}/process-documents`, { method: 'POST', @@ -419,15 +461,34 @@ export function KnowledgeBase({ }) if (!processResponse.ok) { - throw new Error('Failed to start document processing') + const errorData = await processResponse.json() + throw new Error( + `Failed to start document processing: ${errorData.error || 'Unknown error'}` + ) } const processResult = await processResponse.json() + // Validate process result structure + if (!processResult.success) { + throw new Error(`Document processing failed: ${processResult.error || 'Unknown error'}`) + } + + if (!processResult.data || !processResult.data.documentsCreated) { + throw new Error('Invalid processing response: missing document data') + } + // Create pending document objects and add them to the store immediately - if (processResult.success && processResult.data.documentsCreated) { - const pendingDocuments: DocumentData[] = processResult.data.documentsCreated.map( - (doc: ProcessedDocumentResponse, index: number) => ({ + const pendingDocuments: DocumentData[] = processResult.data.documentsCreated.map( + (doc: ProcessedDocumentResponse, index: number) => { + if (!doc.documentId || !doc.filename) { + logger.error(`Invalid document data received:`, doc) + throw new Error( + `Invalid document data for ${uploadedFiles[index]?.filename || 'unknown file'}` + ) + } + + return { id: doc.documentId, knowledgeBaseId: id, filename: doc.filename, @@ -444,18 +505,36 @@ export function KnowledgeBase({ processingError: null, enabled: true, uploadedAt: new Date().toISOString(), - }) - ) + } + } + ) - // Add pending documents to store for immediate UI update - useKnowledgeStore.getState().addPendingDocuments(id, pendingDocuments) - } + // Add pending documents to store for immediate UI update + useKnowledgeStore.getState().addPendingDocuments(id, pendingDocuments) - logger.info(`Started processing ${uploadedFiles.length} documents`) + logger.info(`Successfully started processing ${uploadedFiles.length} documents`) + + setUploadProgress((prev) => ({ ...prev, stage: 'completing' })) + + // Trigger a refresh to ensure documents are properly loaded + await refreshDocuments() + + setUploadProgress({ stage: 'idle', filesCompleted: 0, totalFiles: 0 }) } catch (err) { logger.error('Error uploading documents:', err) + + const errorMessage = + err instanceof Error ? err.message : 'Unknown error occurred during upload' + setUploadError({ + message: errorMessage, + timestamp: Date.now(), + }) + + // Show user-friendly error message in console for debugging + console.error('Document upload failed:', errorMessage) } finally { setIsUploading(false) + setUploadProgress({ stage: 'idle', filesCompleted: 0, totalFiles: 0 }) // Reset the file input if (fileInputRef.current) { fileInputRef.current.value = '' @@ -572,7 +651,15 @@ export function KnowledgeBase({ className='flex items-center gap-1 bg-[#701FFC] font-[480] text-white shadow-[0_0_0_0_#701FFC] transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]' > - {isUploading ? 'Uploading...' : 'Add Documents'} + {isUploading + ? uploadProgress.stage === 'uploading' + ? `Uploading ${uploadProgress.filesCompleted + 1}/${uploadProgress.totalFiles}...` + : uploadProgress.stage === 'processing' + ? 'Processing...' + : uploadProgress.stage === 'completing' + ? 'Completing...' + : 'Uploading...' + : 'Add Documents'} @@ -832,7 +919,25 @@ export function KnowledgeBase({ {/* Status column */} -
{statusDisplay.text}
+ {doc.processingStatus === 'failed' && doc.processingError ? ( + + +
+ {statusDisplay.text} +
+
+ + {doc.processingError} + +
+ ) : ( +
+ {statusDisplay.text} +
+ )} {/* Actions column */} @@ -948,6 +1053,39 @@ export function KnowledgeBase({ + + {/* Toast Notification */} + {uploadError && ( +
+
+
+ +
+
+
+ Error + + {formatDistanceToNow(uploadError.timestamp, { addSuffix: true }).replace( + 'less than a minute ago', + '<1 minute ago' + )} + +
+

+ {uploadError.message.length > 100 + ? `${uploadError.message.slice(0, 60)}...` + : uploadError.message} +

+
+ +
+
+ )} ) } diff --git a/apps/sim/stores/knowledge/store.ts b/apps/sim/stores/knowledge/store.ts index 0f6b0fb7542..d25f0bd240a 100644 --- a/apps/sim/stores/knowledge/store.ts +++ b/apps/sim/stores/knowledge/store.ts @@ -676,7 +676,16 @@ export const useKnowledgeStore = create((set, get) => ({ addPendingDocuments: (knowledgeBaseId: string, newDocuments: DocumentData[]) => { set((state) => { const existingDocuments = state.documents[knowledgeBaseId] || [] - const updatedDocuments = [...existingDocuments, ...newDocuments] + + const existingIds = new Set(existingDocuments.map((doc) => doc.id)) + const uniqueNewDocuments = newDocuments.filter((doc) => !existingIds.has(doc.id)) + + if (uniqueNewDocuments.length === 0) { + logger.warn(`No new documents to add - all ${newDocuments.length} documents already exist`) + return state + } + + const updatedDocuments = [...existingDocuments, ...uniqueNewDocuments] return { documents: { @@ -686,7 +695,7 @@ export const useKnowledgeStore = create((set, get) => ({ } }) logger.info( - `Added ${newDocuments.length} pending documents for knowledge base: ${knowledgeBaseId}` + `Added ${newDocuments.filter((doc) => !get().documents[knowledgeBaseId]?.some((existing) => existing.id === doc.id)).length} pending documents for knowledge base: ${knowledgeBaseId}` ) }, From b6601608692ce49e178e545bd511a07c1d5a8749 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 15:57:51 -0700 Subject: [PATCH 6/8] grouped envvars --- apps/sim/lib/env.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/sim/lib/env.ts b/apps/sim/lib/env.ts index a6fa6275651..fb9f1d7f53e 100644 --- a/apps/sim/lib/env.ts +++ b/apps/sim/lib/env.ts @@ -48,6 +48,8 @@ export const env = createEnv({ NEXT_TELEMETRY_DISABLED: z.string().optional(), NEXT_RUNTIME: z.string().optional(), VERCEL_ENV: z.string().optional(), + + // Storage AWS_REGION: z.string().optional(), AWS_ACCESS_KEY_ID: z.string().optional(), AWS_SECRET_ACCESS_KEY: z.string().optional(), @@ -59,6 +61,8 @@ export const env = createEnv({ AZURE_CONNECTION_STRING: z.string().optional(), AZURE_STORAGE_CONTAINER_NAME: z.string().optional(), AZURE_STORAGE_KB_CONTAINER_NAME: z.string().optional(), + + // Miscellaneous CRON_SECRET: z.string().optional(), FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), NODE_ENV: z.string().optional(), From 7defa15fa9c01485263cece4d0d03f88224efa95 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 16:26:23 -0700 Subject: [PATCH 7/8] ack PR comments --- apps/sim/app/api/files/delete/route.test.ts | 25 +++++++++++++++ apps/sim/app/api/files/delete/route.ts | 3 +- apps/sim/app/api/files/presigned/route.ts | 32 ++++++++++--------- .../api/files/serve/[...path]/route.test.ts | 14 ++++++-- .../app/api/files/serve/[...path]/route.ts | 4 +-- apps/sim/app/api/files/utils.ts | 28 +++++++++++----- .../knowledge/[id]/process-documents/route.ts | 1 + apps/sim/app/api/knowledge/utils.ts | 3 +- apps/sim/lib/documents/document-processor.ts | 9 ++++-- apps/sim/lib/uploads/blob/blob-client.test.ts | 19 +++++++---- apps/sim/lib/uploads/blob/blob-client.ts | 23 ++++++++++--- apps/sim/lib/uploads/s3/s3-client.test.ts | 2 +- apps/sim/lib/uploads/setup.ts | 2 +- apps/sim/lib/uploads/storage-client.ts | 10 +++--- apps/sim/tools/mistral/parser.ts | 4 --- 15 files changed, 124 insertions(+), 55 deletions(-) diff --git a/apps/sim/app/api/files/delete/route.test.ts b/apps/sim/app/api/files/delete/route.test.ts index da8391d4ab0..08b996fd8cc 100644 --- a/apps/sim/app/api/files/delete/route.test.ts +++ b/apps/sim/app/api/files/delete/route.test.ts @@ -111,6 +111,31 @@ describe('File Delete API Route', () => { expect(mockDeleteFile).toHaveBeenCalledWith('1234567890-test-file.txt') }) + it('should handle Azure Blob file deletion successfully', async () => { + vi.doMock('@/lib/uploads/setup', () => ({ + UPLOAD_DIR: '/test/uploads', + USE_S3_STORAGE: false, + USE_BLOB_STORAGE: true, + })) + + mockIsUsingCloudStorage.mockReturnValue(true) + + const req = createMockRequest('POST', { + filePath: '/api/files/serve/blob/1234567890-test-document.pdf', + }) + + const { POST } = await import('./route') + + const response = await POST(req) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data).toHaveProperty('success', true) + expect(data).toHaveProperty('message', 'File deleted successfully from cloud storage') + + expect(mockDeleteFile).toHaveBeenCalledWith('1234567890-test-document.pdf') + }) + it('should handle missing file path', async () => { const req = createMockRequest('POST', {}) diff --git a/apps/sim/app/api/files/delete/route.ts b/apps/sim/app/api/files/delete/route.ts index ac8c9af687b..afb7711fe77 100644 --- a/apps/sim/app/api/files/delete/route.ts +++ b/apps/sim/app/api/files/delete/route.ts @@ -16,6 +16,7 @@ import { extractS3Key, InvalidRequestError, isBlobPath, + isCloudPath, isS3Path, } from '../utils' @@ -40,7 +41,7 @@ export async function POST(request: NextRequest) { try { // Use appropriate handler based on path and environment const result = - isS3Path(filePath) || isBlobPath(filePath) || isUsingCloudStorage() + isCloudPath(filePath) || isUsingCloudStorage() ? await handleCloudFileDelete(filePath) : await handleLocalFileDelete(filePath) diff --git a/apps/sim/app/api/files/presigned/route.ts b/apps/sim/app/api/files/presigned/route.ts index d5ab914d94a..020d896ca20 100644 --- a/apps/sim/app/api/files/presigned/route.ts +++ b/apps/sim/app/api/files/presigned/route.ts @@ -40,20 +40,20 @@ export async function POST(request: NextRequest) { const storageProvider = getStorageProvider() - if (storageProvider === 's3') { - return await handleS3PresignedUrl(fileName, contentType, fileSize) + switch (storageProvider) { + case 's3': + return await handleS3PresignedUrl(fileName, contentType, fileSize) + case 'blob': + return await handleBlobPresignedUrl(fileName, contentType, fileSize) + default: + return NextResponse.json( + { + error: 'Unknown storage provider', + directUploadSupported: false, + }, + { status: 400 } + ) } - if (storageProvider === 'blob') { - return await handleBlobPresignedUrl(fileName, contentType, fileSize) - } - - return NextResponse.json( - { - error: 'Unknown storage provider', - directUploadSupported: false, - }, - { status: 400 } - ) } catch (error) { logger.error('Error generating presigned URL:', error) return createErrorResponse( @@ -76,7 +76,7 @@ async function handleS3PresignedUrl(fileName: string, contentType: string, fileS Key: uniqueKey, ContentType: contentType, Metadata: { - originalName: encodeURIComponent(fileName), + originalName: sanitizedOriginalName, uploadedAt: new Date().toISOString(), }, }) @@ -155,7 +155,9 @@ async function handleBlobPresignedUrl(fileName: string, contentType: string, fil }) } catch (error) { logger.error('Error generating Blob presigned URL:', error) - throw error + return createErrorResponse( + error instanceof Error ? error : new Error('Failed to generate Blob presigned URL') + ) } } diff --git a/apps/sim/app/api/files/serve/[...path]/route.test.ts b/apps/sim/app/api/files/serve/[...path]/route.test.ts index e9621a449fb..8d854a37711 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.test.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.test.ts @@ -251,7 +251,12 @@ describe('File Serve API Route', () => { } }, createFileResponse: vi.fn(), - createErrorResponse: vi.fn(), + createErrorResponse: vi.fn().mockImplementation((error) => { + return new Response(JSON.stringify({ error: error.name, message: error.message }), { + status: error.name === 'FileNotFoundError' ? 404 : 500, + headers: { 'Content-Type': 'application/json' }, + }) + }), getContentType: vi.fn().mockReturnValue('text/plain'), isS3Path: vi.fn().mockReturnValue(false), isBlobPath: vi.fn().mockReturnValue(false), @@ -271,8 +276,11 @@ describe('File Serve API Route', () => { expect(response.status).toBe(404) - const text = await response.text() - expect(text).toBe('File not found') + const responseData = await response.json() + expect(responseData).toEqual({ + error: 'FileNotFoundError', + message: expect.stringContaining('File not found'), + }) }) describe('content type detection', () => { diff --git a/apps/sim/app/api/files/serve/[...path]/route.ts b/apps/sim/app/api/files/serve/[...path]/route.ts index 8b74713a98d..e73a006724a 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.ts @@ -1,5 +1,5 @@ import { readFile } from 'fs/promises' -import { type NextRequest, NextResponse } from 'next/server' +import type { NextRequest, NextResponse } from 'next/server' import { createLogger } from '@/lib/logs/console-logger' import { downloadFile, isUsingCloudStorage } from '@/lib/uploads' import '@/lib/uploads/setup.server' @@ -53,7 +53,7 @@ export async function GET( logger.error('Error serving file:', error) if (error instanceof FileNotFoundError) { - return new NextResponse('File not found', { status: 404 }) + return createErrorResponse(error) } return createErrorResponse(error instanceof Error ? error : new Error('Failed to serve file')) diff --git a/apps/sim/app/api/files/utils.ts b/apps/sim/app/api/files/utils.ts index 3e8bca7603f..a629a992712 100644 --- a/apps/sim/app/api/files/utils.ts +++ b/apps/sim/app/api/files/utils.ts @@ -118,23 +118,35 @@ export function isBlobPath(path: string): boolean { } /** - * Extract S3 key from a path + * Check if a path points to cloud storage (S3, Blob, or generic cloud) */ -export function extractS3Key(path: string): string { - if (isS3Path(path)) { - return decodeURIComponent(path.split('/api/files/serve/s3/')[1]) +export function isCloudPath(path: string): boolean { + return isS3Path(path) || isBlobPath(path) +} + +/** + * Generic function to extract storage key from a path + */ +export function extractStorageKey(path: string, storageType: 's3' | 'blob'): string { + const prefix = `/api/files/serve/${storageType}/` + if (path.includes(prefix)) { + return decodeURIComponent(path.split(prefix)[1]) } return path } +/** + * Extract S3 key from a path + */ +export function extractS3Key(path: string): string { + return extractStorageKey(path, 's3') +} + /** * Extract Blob key from a path */ export function extractBlobKey(path: string): string { - if (isBlobPath(path)) { - return decodeURIComponent(path.split('/api/files/serve/blob/')[1]) - } - return path + return extractStorageKey(path, 'blob') } /** diff --git a/apps/sim/app/api/knowledge/[id]/process-documents/route.ts b/apps/sim/app/api/knowledge/[id]/process-documents/route.ts index 97a920f6849..2d636b0c569 100644 --- a/apps/sim/app/api/knowledge/[id]/process-documents/route.ts +++ b/apps/sim/app/api/knowledge/[id]/process-documents/route.ts @@ -24,6 +24,7 @@ const ProcessDocumentsSchema = z.object({ minCharactersPerChunk: z.number(), recipe: z.string(), lang: z.string(), + chunkOverlap: z.number().optional(), }), }) diff --git a/apps/sim/app/api/knowledge/utils.ts b/apps/sim/app/api/knowledge/utils.ts index 56bc036257b..4848bbcb48a 100644 --- a/apps/sim/app/api/knowledge/utils.ts +++ b/apps/sim/app/api/knowledge/utils.ts @@ -377,6 +377,7 @@ export async function processDocumentAsync( minCharactersPerChunk?: number recipe?: string lang?: string + chunkOverlap?: number } ): Promise { const startTime = Date.now() @@ -400,7 +401,7 @@ export async function processDocumentAsync( docData.filename, docData.mimeType, processingOptions.chunkSize || 1000, - 200 // chunkOverlap + processingOptions.chunkOverlap || 200 ) const now = new Date() diff --git a/apps/sim/lib/documents/document-processor.ts b/apps/sim/lib/documents/document-processor.ts index 7a94bf1a5e9..3b1fe44df72 100644 --- a/apps/sim/lib/documents/document-processor.ts +++ b/apps/sim/lib/documents/document-processor.ts @@ -171,16 +171,19 @@ async function parseWithMistralOCR( if (provider === 'blob') { const blobConfig = kbConfig as BlobConfig - if (!blobConfig.containerName || !blobConfig.accountName) { + if ( + !blobConfig.containerName || + (!blobConfig.connectionString && (!blobConfig.accountName || !blobConfig.accountKey)) + ) { throw new Error( - 'Azure Blob configuration missing: AZURE_ACCOUNT_NAME and AZURE_KB_CONTAINER_NAME environment variables are required for PDF processing with Mistral OCR' + 'Azure Blob configuration missing for PDF processing with Mistral OCR. Set AZURE_CONNECTION_STRING or both AZURE_ACCOUNT_NAME + AZURE_ACCOUNT_KEY, and AZURE_KB_CONTAINER_NAME.' ) } } else { const s3Config = kbConfig as S3Config if (!s3Config.bucket || !s3Config.region) { throw new Error( - 'S3 configuration missing: AWS_REGION and S3_KB_BUCKET_NAME environment variables are required for PDF processing with Mistral OCR' + 'S3 configuration missing for PDF processing with Mistral OCR. Set AWS_REGION and S3_KB_BUCKET_NAME environment variables.' ) } } diff --git a/apps/sim/lib/uploads/blob/blob-client.test.ts b/apps/sim/lib/uploads/blob/blob-client.test.ts index 01297bf01a6..6f7c416fcc3 100644 --- a/apps/sim/lib/uploads/blob/blob-client.test.ts +++ b/apps/sim/lib/uploads/blob/blob-client.test.ts @@ -129,6 +129,8 @@ describe('Azure Blob Storage Client', () => { const result = await uploadToBlob(testBuffer, fileName, contentType, customConfig) + // Verify the container client is called with correct custom configuration + expect(mockGetContainerClient).toHaveBeenCalledWith('customcontainer') expect(result.name).toBe(fileName) expect(result.type).toBe(contentType) }) @@ -196,14 +198,17 @@ describe('Azure Blob Storage Client', () => { }) describe('sanitizeFilenameForMetadata', () => { - it('should sanitize filenames for metadata', async () => { + const testCases = [ + { input: 'test file.txt', expected: 'test file.txt' }, + { input: 'test"file.txt', expected: 'testfile.txt' }, + { input: 'test\\file.txt', expected: 'testfile.txt' }, + { input: 'test file.txt', expected: 'test file.txt' }, + { input: '', expected: 'file' }, + ] + + test.each(testCases)('should sanitize "$input" to "$expected"', async ({ input, expected }) => { const { sanitizeFilenameForMetadata } = await import('./blob-client') - - expect(sanitizeFilenameForMetadata('test file.txt')).toBe('test file.txt') - expect(sanitizeFilenameForMetadata('test"file.txt')).toBe('testfile.txt') - expect(sanitizeFilenameForMetadata('test\\file.txt')).toBe('testfile.txt') - expect(sanitizeFilenameForMetadata('test file.txt')).toBe('test file.txt') - expect(sanitizeFilenameForMetadata('')).toBe('file') + expect(sanitizeFilenameForMetadata(input)).toBe(expected) }) }) }) diff --git a/apps/sim/lib/uploads/blob/blob-client.ts b/apps/sim/lib/uploads/blob/blob-client.ts index 56d15ff8daf..90232774c72 100644 --- a/apps/sim/lib/uploads/blob/blob-client.ts +++ b/apps/sim/lib/uploads/blob/blob-client.ts @@ -143,7 +143,7 @@ export async function uploadToBlob( const blockBlobClient = containerClient.getBlockBlobClient(uniqueKey) // Upload the file to Azure Blob Storage - await blockBlobClient.upload(file, file.length, { + await blockBlobClient.upload(file, fileSize, { blobHTTPHeaders: { blobContentType: contentType, }, @@ -187,7 +187,13 @@ export async function getPresignedUrl(key: string, expiresIn = 3600) { const sasToken = generateBlobSASQueryParameters( sasOptions, - new StorageSharedKeyCredential(BLOB_CONFIG.accountName, BLOB_CONFIG.accountKey || '') + new StorageSharedKeyCredential( + BLOB_CONFIG.accountName, + BLOB_CONFIG.accountKey ?? + (() => { + throw new Error('AZURE_ACCOUNT_KEY is required when using account name authentication') + })() + ) ).toString() return `${blockBlobClient.url}?${sasToken}` @@ -239,7 +245,13 @@ export async function getPresignedUrlWithConfig( const sasToken = generateBlobSASQueryParameters( sasOptions, - new StorageSharedKeyCredential(customConfig.accountName, customConfig.accountKey || '') + new StorageSharedKeyCredential( + customConfig.accountName, + customConfig.accountKey ?? + (() => { + throw new Error('Account key is required when using account name authentication') + })() + ) ).toString() return `${blockBlobClient.url}?${sasToken}` @@ -256,7 +268,10 @@ export async function downloadFromBlob(key: string) { const blockBlobClient = containerClient.getBlockBlobClient(key) const downloadBlockBlobResponse = await blockBlobClient.download() - const downloaded = await streamToBuffer(downloadBlockBlobResponse.readableStreamBody!) + if (!downloadBlockBlobResponse.readableStreamBody) { + throw new Error('Failed to get readable stream from blob download') + } + const downloaded = await streamToBuffer(downloadBlockBlobResponse.readableStreamBody) return downloaded } diff --git a/apps/sim/lib/uploads/s3/s3-client.test.ts b/apps/sim/lib/uploads/s3/s3-client.test.ts index fc80921d7d3..3a3a3d4e931 100644 --- a/apps/sim/lib/uploads/s3/s3-client.test.ts +++ b/apps/sim/lib/uploads/s3/s3-client.test.ts @@ -287,7 +287,7 @@ describe('S3 Client', () => { expect(client).toBeDefined() // Verify the client was constructed with the right configuration - expect(S3Client).toBeDefined() + expect(S3Client).toHaveBeenCalledWith({ region: 'test-region' }) }) }) }) diff --git a/apps/sim/lib/uploads/setup.ts b/apps/sim/lib/uploads/setup.ts index e35e33649f2..e4da2dfe936 100644 --- a/apps/sim/lib/uploads/setup.ts +++ b/apps/sim/lib/uploads/setup.ts @@ -16,7 +16,7 @@ const hasS3Config = !!(env.S3_BUCKET_NAME && env.AWS_REGION) // Check if Azure Blob is configured (has required credentials) const hasBlobConfig = !!( env.AZURE_STORAGE_CONTAINER_NAME && - (env.AZURE_ACCOUNT_NAME || env.AZURE_CONNECTION_STRING) + ((env.AZURE_ACCOUNT_NAME && env.AZURE_ACCOUNT_KEY) || env.AZURE_CONNECTION_STRING) ) // Storage configuration flags - auto-detect based on available credentials diff --git a/apps/sim/lib/uploads/storage-client.ts b/apps/sim/lib/uploads/storage-client.ts index 143cf31a813..67785228f25 100644 --- a/apps/sim/lib/uploads/storage-client.ts +++ b/apps/sim/lib/uploads/storage-client.ts @@ -80,7 +80,7 @@ export async function uploadFile( } throw new Error( - 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + 'No storage provider configured. Set Azure credentials (AZURE_CONNECTION_STRING or AZURE_ACCOUNT_NAME + AZURE_ACCOUNT_KEY) or configure AWS credentials for S3.' ) } @@ -101,7 +101,7 @@ export async function downloadFile(key: string): Promise { } throw new Error( - 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + 'No storage provider configured. Set Azure credentials (AZURE_CONNECTION_STRING or AZURE_ACCOUNT_NAME + AZURE_ACCOUNT_KEY) or configure AWS credentials for S3.' ) } @@ -121,7 +121,7 @@ export async function deleteFile(key: string): Promise { } throw new Error( - 'No storage provider configured. Set USE_BLOB=true or configure S3 for production.' + 'No storage provider configured. Set Azure credentials (AZURE_CONNECTION_STRING or AZURE_ACCOUNT_NAME + AZURE_ACCOUNT_KEY) or configure AWS credentials for S3.' ) } @@ -143,7 +143,7 @@ export async function getPresignedUrl(key: string, expiresIn = 3600): Promise { - logger.info( - 'Setting up headers with API key:', - params.apiKey ? `${params.apiKey.substring(0, 5)}...` : 'Missing' - ) return { 'Content-Type': 'application/json', Accept: 'application/json', From 10f0ec5c409c8bc5e4777495d36f49d4c8426128 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Mon, 16 Jun 2025 17:03:23 -0700 Subject: [PATCH 8/8] added sheetjs and xlsx parser --- apps/sim/lib/documents/document-processor.ts | 4 - apps/sim/lib/file-parsers/index.ts | 8 ++ apps/sim/lib/file-parsers/types.ts | 2 +- apps/sim/lib/file-parsers/xlsx-parser.ts | 104 +++++++++++++++++++ apps/sim/package.json | 2 + bun.lock | 22 ++++ 6 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 apps/sim/lib/file-parsers/xlsx-parser.ts diff --git a/apps/sim/lib/documents/document-processor.ts b/apps/sim/lib/documents/document-processor.ts index 3b1fe44df72..188da8aeb37 100644 --- a/apps/sim/lib/documents/document-processor.ts +++ b/apps/sim/lib/documents/document-processor.ts @@ -216,7 +216,6 @@ async function parseWithMistralOCR( try { const response = await retryWithExponentialBackoff( async () => { - // Get the URL from the tool const url = typeof mistralParserTool.request!.url === 'function' ? mistralParserTool.request!.url({ @@ -226,7 +225,6 @@ async function parseWithMistralOCR( }) : mistralParserTool.request!.url - // Get headers from the tool const headers = typeof mistralParserTool.request!.headers === 'function' ? mistralParserTool.request!.headers({ @@ -259,7 +257,6 @@ async function parseWithMistralOCR( } ) - // Use the tool's transformResponse function to process the response const result = await mistralParserTool.transformResponse!(response, { filePath: httpsUrl, apiKey: mistralApiKey, @@ -282,7 +279,6 @@ async function parseWithMistralOCR( cloudUrl, } } catch (error) { - // Log the full error details for debugging logger.error(`Mistral OCR failed for ${filename}:`, { message: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : undefined, diff --git a/apps/sim/lib/file-parsers/index.ts b/apps/sim/lib/file-parsers/index.ts index 26cd63f8832..a2526b7304e 100644 --- a/apps/sim/lib/file-parsers/index.ts +++ b/apps/sim/lib/file-parsers/index.ts @@ -89,6 +89,14 @@ function getParserInstances(): Record { } catch (error) { logger.error('Failed to load MD parser:', error) } + + try { + const { XlsxParser } = require('./xlsx-parser') + parserInstances.xlsx = new XlsxParser() + parserInstances.xls = new XlsxParser() // Both xls and xlsx use the same parser + } catch (error) { + logger.error('Failed to load XLSX parser:', error) + } } catch (error) { logger.error('Error loading file parsers:', error) } diff --git a/apps/sim/lib/file-parsers/types.ts b/apps/sim/lib/file-parsers/types.ts index e963343618c..161190e6138 100644 --- a/apps/sim/lib/file-parsers/types.ts +++ b/apps/sim/lib/file-parsers/types.ts @@ -8,4 +8,4 @@ export interface FileParser { parseBuffer?(buffer: Buffer): Promise } -export type SupportedFileType = 'pdf' | 'csv' | 'docx' +export type SupportedFileType = 'pdf' | 'csv' | 'docx' | 'xlsx' | 'xls' diff --git a/apps/sim/lib/file-parsers/xlsx-parser.ts b/apps/sim/lib/file-parsers/xlsx-parser.ts new file mode 100644 index 00000000000..cc1a1bbf113 --- /dev/null +++ b/apps/sim/lib/file-parsers/xlsx-parser.ts @@ -0,0 +1,104 @@ +import { existsSync } from 'fs' +import * as XLSX from 'xlsx' +import { createLogger } from '@/lib/logs/console-logger' +import type { FileParseResult, FileParser } from './types' + +const logger = createLogger('XlsxParser') + +export class XlsxParser implements FileParser { + async parseFile(filePath: string): Promise { + try { + // Validate input + if (!filePath) { + throw new Error('No file path provided') + } + + // Check if file exists + if (!existsSync(filePath)) { + throw new Error(`File not found: ${filePath}`) + } + + logger.info(`Parsing XLSX file: ${filePath}`) + + // Read the workbook + const workbook = XLSX.readFile(filePath) + return this.processWorkbook(workbook) + } catch (error) { + logger.error('XLSX file parsing error:', error) + throw new Error(`Failed to parse XLSX file: ${(error as Error).message}`) + } + } + + async parseBuffer(buffer: Buffer): Promise { + try { + logger.info('Parsing XLSX buffer, size:', buffer.length) + + if (!buffer || buffer.length === 0) { + throw new Error('Empty buffer provided') + } + + // Read the workbook from buffer + const workbook = XLSX.read(buffer, { type: 'buffer' }) + return this.processWorkbook(workbook) + } catch (error) { + logger.error('XLSX buffer parsing error:', error) + throw new Error(`Failed to parse XLSX buffer: ${(error as Error).message}`) + } + } + + private processWorkbook(workbook: XLSX.WorkBook): FileParseResult { + const sheetNames = workbook.SheetNames + const sheets: Record = {} + let content = '' + let totalRows = 0 + + // Process each worksheet + for (const sheetName of sheetNames) { + const worksheet = workbook.Sheets[sheetName] + + // Convert to array of objects + const sheetData = XLSX.utils.sheet_to_json(worksheet, { header: 1 }) + sheets[sheetName] = sheetData + totalRows += sheetData.length + + // Add sheet content to the overall content string + content += `Sheet: ${sheetName}\n` + content += `=${'='.repeat(sheetName.length + 6)}\n\n` + + if (sheetData.length > 0) { + // Process each row + sheetData.forEach((row: unknown, rowIndex: number) => { + if (Array.isArray(row) && row.length > 0) { + // Convert row to string, handling undefined/null values + const rowString = row + .map((cell) => { + if (cell === null || cell === undefined) { + return '' + } + return String(cell) + }) + .join('\t') + + content += `${rowString}\n` + } + }) + } else { + content += '[Empty sheet]\n' + } + + content += '\n' + } + + logger.info(`XLSX parsing completed: ${sheetNames.length} sheets, ${totalRows} total rows`) + + return { + content: content.trim(), + metadata: { + sheetCount: sheetNames.length, + sheetNames: sheetNames, + totalRows: totalRows, + sheets: sheets, + }, + } + } +} diff --git a/apps/sim/package.json b/apps/sim/package.json index db7a88f8edb..05a35eaf844 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -109,6 +109,7 @@ "tailwindcss-animate": "^1.0.7", "three": "0.177.0", "uuid": "^11.1.0", + "xlsx": "0.18.5", "zod": "^3.24.2" }, "devDependencies": { @@ -121,6 +122,7 @@ "@types/prismjs": "^1.26.5", "@types/react": "^19", "@types/react-dom": "^19", + "@types/xlsx": "0.0.36", "@vitejs/plugin-react": "^4.3.4", "@vitest/coverage-v8": "^3.0.8", "critters": "^0.0.23", diff --git a/bun.lock b/bun.lock index 4e015d745c6..9383302b81f 100644 --- a/bun.lock +++ b/bun.lock @@ -140,6 +140,7 @@ "tailwindcss-animate": "^1.0.7", "three": "0.177.0", "uuid": "^11.1.0", + "xlsx": "0.18.5", "zod": "^3.24.2", }, "devDependencies": { @@ -152,6 +153,7 @@ "@types/prismjs": "^1.26.5", "@types/react": "^19", "@types/react-dom": "^19", + "@types/xlsx": "0.0.36", "@vitejs/plugin-react": "^4.3.4", "@vitest/coverage-v8": "^3.0.8", "critters": "^0.0.23", @@ -1347,6 +1349,8 @@ "@types/webxr": ["@types/webxr@0.5.22", "", {}, "sha512-Vr6Stjv5jPRqH690f5I5GLjVk8GSsoQSYJ2FVd/3jJF7KaqfwPi3ehfBS96mlQ2kPCwZaX6U0rG2+NGHBKkA/A=="], + "@types/xlsx": ["@types/xlsx@0.0.36", "", { "dependencies": { "xlsx": "*" } }, "sha512-mvfrKiKKMErQzLMF8ElYEH21qxWCZtN59pHhWGmWCWFJStYdMWjkDSAy6mGowFxHXaXZWe5/TW7pBUiWclIVOw=="], + "@types/yargs": ["@types/yargs@15.0.19", "", { "dependencies": { "@types/yargs-parser": "*" } }, "sha512-2XUaGVmyQjgyAZldf0D0c14vvo/yv0MhQBSTJcejMMaitsn3nxCB6TmH4G0ZQf+uxROOa9mpanoSm8h6SG/1ZA=="], "@types/yargs-parser": ["@types/yargs-parser@21.0.3", "", {}, "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ=="], @@ -1427,6 +1431,8 @@ "acorn-jsx": ["acorn-jsx@5.3.2", "", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="], + "adler-32": ["adler-32@1.3.1", "", {}, "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A=="], + "agent-base": ["agent-base@7.1.3", "", {}, "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw=="], "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="], @@ -1535,6 +1541,8 @@ "ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="], + "cfb": ["cfb@1.2.2", "", { "dependencies": { "adler-32": "~1.3.0", "crc-32": "~1.2.0" } }, "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA=="], + "chai": ["chai@5.2.0", "", { "dependencies": { "assertion-error": "^2.0.1", "check-error": "^2.1.1", "deep-eql": "^5.0.1", "loupe": "^3.1.0", "pathval": "^2.0.0" } }, "sha512-mCuXncKXk5iCLhfhwTc0izo0gtEmpz5CtG2y8GiOINBlMVS6v8TMRc5TaLWKS6692m9+dVVfzgeVxR5UxWHTYw=="], "chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], @@ -1585,6 +1593,8 @@ "cmdk": ["cmdk@1.1.1", "", { "dependencies": { "@radix-ui/react-compose-refs": "^1.1.1", "@radix-ui/react-dialog": "^1.1.6", "@radix-ui/react-id": "^1.1.0", "@radix-ui/react-primitive": "^2.0.2" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "react-dom": "^18 || ^19 || ^19.0.0-rc" } }, "sha512-Vsv7kFaXm+ptHDMZ7izaRsP70GgrW9NBNGswt9OZaVBLlE0SNpDq8eu/VGXyF9r7M0azK3Wy7OlYXsuyYLFzHg=="], + "codepage": ["codepage@1.15.0", "", {}, "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA=="], + "collapse-white-space": ["collapse-white-space@2.1.0", "", {}, "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw=="], "color": ["color@4.2.3", "", { "dependencies": { "color-convert": "^2.0.1", "color-string": "^1.9.0" } }, "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A=="], @@ -1619,6 +1629,8 @@ "cosmiconfig": ["cosmiconfig@6.0.0", "", { "dependencies": { "@types/parse-json": "^4.0.0", "import-fresh": "^3.1.0", "parse-json": "^5.0.0", "path-type": "^4.0.0", "yaml": "^1.7.2" } }, "sha512-xb3ZL6+L8b9JLLCx3ZdoZy4+2ECphCMo2PwqgP1tlfVq6M6YReyzBJtvWWtbDSpNr9hn96pkCiZqUcFEc+54Qg=="], + "crc-32": ["crc-32@1.2.2", "", { "bin": { "crc32": "bin/crc32.njs" } }, "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ=="], + "critters": ["critters@0.0.23", "", { "dependencies": { "chalk": "^4.1.0", "css-select": "^5.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.2", "htmlparser2": "^8.0.2", "postcss": "^8.4.23", "postcss-media-query-parser": "^0.2.3" } }, "sha512-/MCsQbuzTPA/ZTOjjyr2Na5o3lRpr8vd0MZE8tMP0OBNg/VrLxWHteVKalQ8KR+fBmUadbJLdoyEz9sT+q84qg=="], "croner": ["croner@9.0.0", "", {}, "sha512-onMB0OkDjkXunhdW9htFjEhqrD54+M94i6ackoUkjHKbRnXdyEyKRelp4nJ1kAz32+s27jP1FsebpJCVl0BsvA=="], @@ -1885,6 +1897,8 @@ "forwarded-parse": ["forwarded-parse@2.1.2", "", {}, "sha512-alTFZZQDKMporBH77856pXgzhEzaUVmLCDk+egLgIgHst3Tpndzz8MnKe+GzRJRfvVdn69HhpW7cmXzvtLvJAw=="], + "frac": ["frac@1.1.2", "", {}, "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA=="], + "framer-motion": ["framer-motion@12.15.0", "", { "dependencies": { "motion-dom": "^12.15.0", "motion-utils": "^12.12.1", "tslib": "^2.4.0" }, "peerDependencies": { "@emotion/is-prop-valid": "*", "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" }, "optionalPeers": ["@emotion/is-prop-valid", "react", "react-dom"] }, "sha512-XKg/LnKExdLGugZrDILV7jZjI599785lDIJZLxMiiIFidCsy0a4R2ZEf+Izm67zyOuJgQYTHOmodi7igQsw3vg=="], "freestyle-sandboxes": ["freestyle-sandboxes@0.0.38", "", { "dependencies": { "@hey-api/client-fetch": "^0.5.7", "glob": "^11.0.1", "openai": "^4.77.3", "openapi": "^1.0.1", "zod": "^3.24.1" } }, "sha512-g1h9NPbIw8Bhd9nTkcYeA3I/ghV6qqVt9ogwZDFU9KMWPM5VedWwFUz29eELdcqJ4dnpA1NznU67h5/lsKlhvA=="], @@ -2765,6 +2779,8 @@ "sprintf-js": ["sprintf-js@1.0.3", "", {}, "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g=="], + "ssf": ["ssf@0.11.2", "", { "dependencies": { "frac": "~1.1.2" } }, "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g=="], + "stackback": ["stackback@0.0.2", "", {}, "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw=="], "stacktrace-parser": ["stacktrace-parser@0.1.11", "", { "dependencies": { "type-fest": "^0.7.1" } }, "sha512-WjlahMgHmCJpqzU8bIBy4qtsZdU9lRlcZE3Lvyej6t4tuOuv1vk57OW3MBrj6hXBFx/nNoC9MPMTcr5YA7NQbg=="], @@ -2995,6 +3011,10 @@ "why-is-node-running": ["why-is-node-running@2.3.0", "", { "dependencies": { "siginfo": "^2.0.0", "stackback": "0.0.2" }, "bin": { "why-is-node-running": "cli.js" } }, "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w=="], + "wmf": ["wmf@1.0.2", "", {}, "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw=="], + + "word": ["word@0.3.0", "", {}, "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA=="], + "wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "wrap-ansi-cjs": ["wrap-ansi@7.0.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q=="], @@ -3003,6 +3023,8 @@ "ws": ["ws@8.18.2", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ=="], + "xlsx": ["xlsx@0.18.5", "", { "dependencies": { "adler-32": "~1.3.0", "cfb": "~1.2.1", "codepage": "~1.15.0", "crc-32": "~1.2.1", "ssf": "~0.11.2", "wmf": "~1.0.1", "word": "~0.3.0" }, "bin": { "xlsx": "bin/xlsx.njs" } }, "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ=="], + "xml-name-validator": ["xml-name-validator@5.0.0", "", {}, "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg=="], "xmlbuilder": ["xmlbuilder@10.1.1", "", {}, "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg=="],