diff --git a/apps/sim/app/api/knowledge/[id]/documents/route.test.ts b/apps/sim/app/api/knowledge/[id]/documents/route.test.ts index c27bec231d5..61a702cc721 100644 --- a/apps/sim/app/api/knowledge/[id]/documents/route.test.ts +++ b/apps/sim/app/api/knowledge/[id]/documents/route.test.ts @@ -376,7 +376,7 @@ describe('Knowledge Base Documents API Route', () => { ], processingOptions: { chunkSize: 50, // Invalid: too small - minCharactersPerChunk: 10, // Invalid: too small + minCharactersPerChunk: 0, // Invalid: too small recipe: 'default', lang: 'en', chunkOverlap: 1000, // Invalid: too large diff --git a/apps/sim/app/api/knowledge/[id]/documents/route.ts b/apps/sim/app/api/knowledge/[id]/documents/route.ts index c3b14ac4a79..4c9813a02e2 100644 --- a/apps/sim/app/api/knowledge/[id]/documents/route.ts +++ b/apps/sim/app/api/knowledge/[id]/documents/route.ts @@ -295,7 +295,7 @@ const BulkCreateDocumentsSchema = z.object({ documents: z.array(CreateDocumentSchema), processingOptions: z.object({ chunkSize: z.number().min(100).max(4000), - minCharactersPerChunk: z.number().min(50).max(2000), + minCharactersPerChunk: z.number().min(1).max(2000), recipe: z.string(), lang: z.string(), chunkOverlap: z.number().min(0).max(500), diff --git a/apps/sim/app/api/knowledge/route.test.ts b/apps/sim/app/api/knowledge/route.test.ts index 97218d77c01..0d3d81fcd1a 100644 --- a/apps/sim/app/api/knowledge/route.test.ts +++ b/apps/sim/app/api/knowledge/route.test.ts @@ -168,7 +168,7 @@ describe('Knowledge Base API Route', () => { expect(data.data.embeddingDimension).toBe(1536) expect(data.data.chunkingConfig).toEqual({ maxSize: 1024, - minSize: 100, + minSize: 1, overlap: 200, }) }) diff --git a/apps/sim/app/api/knowledge/route.ts b/apps/sim/app/api/knowledge/route.ts index 0f73ea5bf0e..a4f5b2dd084 100644 --- a/apps/sim/app/api/knowledge/route.ts +++ b/apps/sim/app/api/knowledge/route.ts @@ -18,12 +18,12 @@ const CreateKnowledgeBaseSchema = z.object({ chunkingConfig: z .object({ maxSize: z.number().min(100).max(4000).default(1024), - minSize: z.number().min(50).max(2000).default(100), + minSize: z.number().min(1).max(2000).default(1), overlap: z.number().min(0).max(500).default(200), }) .default({ maxSize: 1024, - minSize: 100, + minSize: 1, overlap: 200, }) .refine((data) => data.minSize < data.maxSize, { diff --git a/apps/sim/app/api/knowledge/utils.ts b/apps/sim/app/api/knowledge/utils.ts index 4e9b1a158b2..448f6f53d10 100644 --- a/apps/sim/app/api/knowledge/utils.ts +++ b/apps/sim/app/api/knowledge/utils.ts @@ -531,7 +531,8 @@ export async function processDocumentAsync( docData.filename, docData.mimeType, processingOptions.chunkSize || 1000, - processingOptions.chunkOverlap || 200 + processingOptions.chunkOverlap || 200, + processingOptions.minCharactersPerChunk || 1 ) const now = new Date() diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/components/upload-modal/upload-modal.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/components/upload-modal/upload-modal.tsx index ccb474fd94a..0ebb3273bc1 100644 --- a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/components/upload-modal/upload-modal.tsx +++ b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/components/upload-modal/upload-modal.tsx @@ -142,7 +142,7 @@ export function UploadModal({ try { await uploadFiles(files, knowledgeBaseId, { chunkSize: chunkingConfig?.maxSize || 1024, - minCharactersPerChunk: chunkingConfig?.minSize || 100, + minCharactersPerChunk: chunkingConfig?.minSize || 1, chunkOverlap: chunkingConfig?.overlap || 200, recipe: 'default', }) diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-modal/create-modal.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-modal/create-modal.tsx index 5eece7b5af9..273a3c54198 100644 --- a/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-modal/create-modal.tsx +++ b/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-modal/create-modal.tsx @@ -50,7 +50,7 @@ const FormSchema = z description: z.string().max(500, 'Description must be less than 500 characters').optional(), minChunkSize: z .number() - .min(50, 'Min chunk size must be at least 50') + .min(1, 'Min chunk size must be at least 1') .max(2000, 'Min chunk size must be less than 2000'), maxChunkSize: z .number() @@ -115,7 +115,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea defaultValues: { name: '', description: '', - minChunkSize: 100, + minChunkSize: 1, maxChunkSize: 1024, overlapSize: 200, }, @@ -299,7 +299,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea reset({ name: '', description: '', - minChunkSize: 100, + minChunkSize: 1, maxChunkSize: 1024, overlapSize: 200, }) @@ -423,7 +423,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea :"/\\|?*]/.test(documentName)) { throw new Error('Document name contains invalid characters. Avoid: < > : " / \\ | ? *') } - if (!textContent || textContent.length < 10) { - throw new Error('Document content must be at least 10 characters long') + if (!textContent || textContent.length < 1) { + throw new Error('Document content cannot be empty') } if (textContent.length > 1000000) { throw new Error('Document content exceeds maximum size of 1MB') @@ -157,7 +157,7 @@ export const knowledgeCreateDocumentTool: ToolConfig