Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions apps/sim/lib/documents/document-processor.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { RecursiveChunker } from 'chonkie/cloud'
import { RecursiveChunker } from 'chonkie'
import type { RecursiveChunk } from 'chonkie/types'
import { env } from '@/lib/env'
import { isSupportedFileType, parseBuffer, parseFile } from '@/lib/file-parsers'
Expand Down Expand Up @@ -78,7 +78,11 @@ async function parseDocument(
fileUrl: string,
filename: string,
mimeType: string
): Promise<{ content: string; processingMethod: 'file-parser' | 'mistral-ocr'; s3Url?: string }> {
): Promise<{
content: string
processingMethod: 'file-parser' | 'mistral-ocr'
s3Url?: string
}> {
const processingMethod = determineProcessingMethod(mimeType, filename)

logger.info(`Processing document "${filename}" using ${processingMethod}`)
Expand Down Expand Up @@ -237,15 +241,8 @@ async function chunkContent(
content: string,
options: DocumentProcessingOptions
): Promise<RecursiveChunk[]> {
const apiKey = env.CHONKIE_API_KEY
if (!apiKey) {
throw new Error('CHONKIE_API_KEY not configured')
}

const chunker = new RecursiveChunker(apiKey, {
const chunker = await RecursiveChunker.create({
chunkSize: options.chunkSize || 512,
recipe: options.recipe || 'default',
lang: options.lang || 'en',
minCharactersPerChunk: options.minCharactersPerChunk || 24,
})

Expand All @@ -255,7 +252,7 @@ async function chunkContent(
chunkSize: options.chunkSize || 512,
})

const chunks = await chunker.chunk({ text: content })
const chunks = await chunker.chunk(content)

logger.info(`Successfully created ${chunks.length} chunks`)
return chunks as RecursiveChunk[]
Expand All @@ -266,7 +263,6 @@ async function chunkContent(
)
}
}

/**
* Calculate token count estimation (rough approximation: 4 chars per token)
*/
Expand Down
1 change: 0 additions & 1 deletion apps/sim/lib/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ export const env = createEnv({
FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(),
NODE_ENV: z.string().optional(),
GITHUB_TOKEN: z.string().optional(),
CHONKIE_API_KEY: z.string().min(1).optional(),
ELEVENLABS_API_KEY: z.string().min(1).optional(),

// OAuth blocks (all optional)
Expand Down