-
Notifications
You must be signed in to change notification settings - Fork 3.5k
feat(knowledge): add Ollama embedding provider support #3714
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: staging
Are you sure you want to change the base?
Changes from 1 commit
255640f
b043bc2
61f05a7
546dd7c
616761d
133f326
2693251
18e7ac2
983efc3
53a1423
0b5d218
606b70b
da36fcd
b9e6ab7
b1e92b8
3698a04
988158e
166a7f3
2f30934
f88e9f9
863e497
546061e
00b3c7d
075b005
ea59193
ee3cc30
e6d0a60
0812f3b
fd8d2b3
5c872c4
4571299
322dc4e
ef84871
d308fe0
aa452f4
8445d7e
185007a
456eaa4
1570b02
0e1dcf7
e2b8189
ea3dd08
24779a7
547de40
7afb708
2cdb519
507cc36
50858d4
5bdfe15
f6d121e
d210669
ff08fb0
5cebdea
2552edc
c6fde92
71b1769
61d7936
1991604
dbabedd
0f42820
9e3d8ce
dec517d
551dff9
952de73
97c0c71
578171c
e89e3c2
c527867
7bcee72
5638d3a
2c8cbb4
08e2b24
045824a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,12 @@ import { getStorageMethod, isRedisStorage } from '@/lib/core/storage' | |
| import { processDocument } from '@/lib/knowledge/documents/document-processor' | ||
| import { DocumentProcessingQueue } from '@/lib/knowledge/documents/queue' | ||
| import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types' | ||
| import { generateEmbeddings } from '@/lib/knowledge/embeddings' | ||
| import { | ||
| deleteKBDocumentEmbeddings, | ||
| insertKBEmbeddings, | ||
| parseEmbeddingModel, | ||
| } from '@/lib/knowledge/dynamic-tables' | ||
| import { generateEmbeddings, getOllamaModelContextLength } from '@/lib/knowledge/embeddings' | ||
| import { | ||
| buildUndefinedTagsError, | ||
| parseBooleanValue, | ||
|
|
@@ -410,6 +415,8 @@ export async function processDocumentAsync( | |
| userId: knowledgeBase.userId, | ||
| workspaceId: knowledgeBase.workspaceId, | ||
| chunkingConfig: knowledgeBase.chunkingConfig, | ||
| embeddingModel: knowledgeBase.embeddingModel, | ||
| embeddingDimension: knowledgeBase.embeddingDimension, | ||
| }) | ||
| .from(knowledgeBase) | ||
| .where(eq(knowledgeBase.id, knowledgeBaseId)) | ||
|
|
@@ -430,19 +437,60 @@ export async function processDocumentAsync( | |
|
|
||
| logger.info(`[${documentId}] Status updated to 'processing', starting document processor`) | ||
|
|
||
| const kbConfig = kb[0].chunkingConfig as { maxSize: number; minSize: number; overlap: number } | ||
| const kbConfig = kb[0].chunkingConfig as { | ||
| maxSize: number | ||
| minSize: number | ||
| overlap: number | ||
| ollamaBaseUrl?: string | ||
| } | ||
| const { provider: embeddingProvider, modelName: embeddingModelName } = parseEmbeddingModel( | ||
| kb[0].embeddingModel | ||
| ) | ||
|
|
||
| // For Ollama models, query the model's context length and cap chunk size accordingly. | ||
| // TextChunker uses ratio 3 for Ollama (1 estimated token = 3 chars), but the actual | ||
| // Ollama tokenizer may produce ~1 token per 1-2 chars (especially for PDF text with | ||
| // special characters). We use 30% of context length as the safe estimated-token limit | ||
| // so the resulting character count stays well within the model's actual token limit. | ||
| let effectiveChunkSize = processingOptions.chunkSize ?? kbConfig.maxSize | ||
| let effectiveOverlap = processingOptions.chunkOverlap ?? kbConfig.overlap | ||
| let ollamaContextLength: number | undefined | ||
| if (embeddingProvider === 'ollama') { | ||
| ollamaContextLength = await getOllamaModelContextLength( | ||
| embeddingModelName, | ||
| kbConfig.ollamaBaseUrl | ||
| ) | ||
| const safeChunkSize = Math.floor(ollamaContextLength * 0.3) | ||
| if (effectiveChunkSize > safeChunkSize) { | ||
| logger.info( | ||
| `[${documentId}] Capping chunk size from ${effectiveChunkSize} to ${safeChunkSize} tokens ` + | ||
| `(Ollama model ${embeddingModelName} context length: ${ollamaContextLength})` | ||
| ) | ||
| effectiveChunkSize = safeChunkSize | ||
| } | ||
| // Cap overlap to 20% of effective chunk size so overlap doesn't push chunks over context limit | ||
| const maxOverlap = Math.max(0, Math.floor(effectiveChunkSize * 0.2)) | ||
| if (effectiveOverlap > maxOverlap) { | ||
| logger.info( | ||
| `[${documentId}] Capping chunk overlap from ${effectiveOverlap} to ${maxOverlap} tokens ` + | ||
| `(20% of effective chunk size ${effectiveChunkSize})` | ||
| ) | ||
| effectiveOverlap = maxOverlap | ||
| } | ||
| } | ||
|
|
||
| await withTimeout( | ||
| (async () => { | ||
| const processed = await processDocument( | ||
| docData.fileUrl, | ||
| docData.filename, | ||
| docData.mimeType, | ||
| processingOptions.chunkSize ?? kbConfig.maxSize, | ||
| processingOptions.chunkOverlap ?? kbConfig.overlap, | ||
| effectiveChunkSize, | ||
| effectiveOverlap, | ||
| processingOptions.minCharactersPerChunk ?? kbConfig.minSize, | ||
| kb[0].userId, | ||
| kb[0].workspaceId | ||
| kb[0].workspaceId, | ||
| kb[0].embeddingModel | ||
| ) | ||
|
|
||
| if (processed.chunks.length > LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT) { | ||
|
|
@@ -472,7 +520,13 @@ export async function processDocumentAsync( | |
| const batchNum = Math.floor(i / batchSize) + 1 | ||
|
|
||
| logger.info(`[${documentId}] Processing embedding batch ${batchNum}/${totalBatches}`) | ||
| const batchEmbeddings = await generateEmbeddings(batch, undefined, kb[0].workspaceId) | ||
| const batchEmbeddings = await generateEmbeddings( | ||
| batch, | ||
| kb[0].embeddingModel, | ||
| kb[0].workspaceId, | ||
| kbConfig.ollamaBaseUrl, | ||
| ollamaContextLength | ||
| ) | ||
| for (const emb of batchEmbeddings) { | ||
| embeddings.push(emb) | ||
| } | ||
|
|
@@ -523,7 +577,7 @@ export async function processDocumentAsync( | |
| contentLength: chunk.text.length, | ||
| tokenCount: Math.ceil(chunk.text.length / 4), | ||
| embedding: embeddings[chunkIndex] || null, | ||
| embeddingModel: 'text-embedding-3-small', | ||
| embeddingModel: embeddingModelName, | ||
|
cursor[bot] marked this conversation as resolved.
|
||
| startOffset: chunk.metadata.startIndex, | ||
| endOffset: chunk.metadata.endIndex, | ||
| // Copy text tags from document (7 slots) | ||
|
|
@@ -551,34 +605,37 @@ export async function processDocumentAsync( | |
| updatedAt: now, | ||
| })) | ||
|
|
||
| await db.transaction(async (tx) => { | ||
| if (embeddingRecords.length > 0) { | ||
| await tx.delete(embedding).where(eq(embedding.documentId, documentId)) | ||
|
|
||
| const insertBatchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH | ||
| const batches: (typeof embeddingRecords)[] = [] | ||
| for (let i = 0; i < embeddingRecords.length; i += insertBatchSize) { | ||
| batches.push(embeddingRecords.slice(i, i + insertBatchSize)) | ||
| } | ||
| if (embeddingRecords.length > 0) { | ||
| logger.info(`[${documentId}] Inserting ${embeddingRecords.length} embeddings`) | ||
|
|
||
| logger.info(`[${documentId}] Inserting ${embeddingRecords.length} embeddings`) | ||
| for (const batch of batches) { | ||
| await tx.insert(embedding).values(batch) | ||
| } | ||
| if (embeddingProvider === 'ollama') { | ||
| // Per-KB table: delete old chunks then bulk-insert new ones | ||
| await deleteKBDocumentEmbeddings(knowledgeBaseId, documentId) | ||
| await insertKBEmbeddings(knowledgeBaseId, embeddingRecords, kb[0].embeddingDimension) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The Ollama path deletes all existing embeddings for the document and then inserts new ones without a wrapping transaction. If The OpenAI path below correctly wraps both operations in a |
||
| } else { | ||
| // Shared embedding table: delete + insert inside a transaction | ||
| await db.transaction(async (tx) => { | ||
| await tx.delete(embedding).where(eq(embedding.documentId, documentId)) | ||
|
|
||
| const insertBatchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH | ||
| for (let i = 0; i < embeddingRecords.length; i += insertBatchSize) { | ||
| await tx.insert(embedding).values(embeddingRecords.slice(i, i + insertBatchSize)) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| await tx | ||
| .update(document) | ||
| .set({ | ||
| chunkCount: processed.metadata.chunkCount, | ||
| tokenCount: processed.metadata.tokenCount, | ||
| characterCount: processed.metadata.characterCount, | ||
| processingStatus: 'completed', | ||
| processingCompletedAt: now, | ||
| processingError: null, | ||
| }) | ||
| .where(eq(document.id, documentId)) | ||
| }) | ||
| await db | ||
| .update(document) | ||
| .set({ | ||
| chunkCount: processed.metadata.chunkCount, | ||
| tokenCount: processed.metadata.tokenCount, | ||
| characterCount: processed.metadata.characterCount, | ||
| processingStatus: 'completed', | ||
| processingCompletedAt: now, | ||
| processingError: null, | ||
| }) | ||
| .where(eq(document.id, documentId)) | ||
|
cursor[bot] marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In the original code, both the embedding inserts and the For the OpenAI path, if the transaction (embedding inserts) succeeds but the subsequent status update fails, the document stays permanently in This is a regression for the OpenAI path that was introduced by the refactor to support the Ollama code path. The |
||
| })(), | ||
| TIMEOUTS.OVERALL_PROCESSING, | ||
| 'Document processing' | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.