Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix(knowledge): simplify embedding billing — use calculateCost, retur…
…n modelName

- Use calculateCost() from @/providers/utils instead of inline formula, consistent
  with how LLM billing works throughout the platform
- Return modelName from GenerateEmbeddingsResult so billing uses the actual model
  (handles custom Azure deployments) instead of a hardcoded fallback string
- Fix docs-chunker.ts empty-path fallback to satisfy full GenerateEmbeddingsResult type
  • Loading branch information
waleedlatif1 committed Apr 3, 2026
commit cad0c2ccbc4f0e81941b8285fea72544dc5945a9
7 changes: 6 additions & 1 deletion apps/sim/lib/chunkers/docs-chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ export class DocsChunker {
const { embeddings } =
textChunks.length > 0
? await generateEmbeddings(textChunks)
: { embeddings: [] as number[][] }
: {
embeddings: [] as number[][],
totalTokens: 0,
isBYOK: false,
modelName: 'text-embedding-3-small',
}
const embeddingModel = 'text-embedding-3-small'

const chunks: DocChunk[] = []
Expand Down
20 changes: 14 additions & 6 deletions apps/sim/lib/knowledge/documents/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import type { ProcessedDocumentTags } from '@/lib/knowledge/types'
import { deleteFile } from '@/lib/uploads/core/storage-service'
import { extractStorageKey } from '@/lib/uploads/utils/file-utils'
import type { DocumentProcessingPayload } from '@/background/knowledge-processing'
import { getEmbeddingModelPricing } from '@/providers/models'
import { calculateCost } from '@/providers/utils'

const logger = createLogger('DocumentService')

Expand Down Expand Up @@ -464,6 +464,7 @@ export async function processDocumentAsync(

let totalEmbeddingTokens = 0
let embeddingIsBYOK = false
let embeddingModelName = 'text-embedding-3-small'

await withTimeout(
(async () => {
Expand Down Expand Up @@ -509,12 +510,14 @@ export async function processDocumentAsync(
embeddings: batchEmbeddings,
totalTokens: batchTokens,
isBYOK,
modelName,
} = await generateEmbeddings(batch, undefined, kb[0].workspaceId)
for (const emb of batchEmbeddings) {
embeddings.push(emb)
}
totalEmbeddingTokens += batchTokens
embeddingIsBYOK = isBYOK
embeddingModelName = modelName
}
}

Expand Down Expand Up @@ -652,18 +655,23 @@ export async function processDocumentAsync(

if (!embeddingIsBYOK && totalEmbeddingTokens > 0 && kb[0].userId) {
try {
const embeddingModel = 'text-embedding-3-small'
const pricing = getEmbeddingModelPricing(embeddingModel)
if (pricing) {
const cost = (totalEmbeddingTokens / 1_000_000) * pricing.input * getCostMultiplier()
const costMultiplier = getCostMultiplier()
const { total: cost } = calculateCost(
embeddingModelName,
totalEmbeddingTokens,
0,
false,
costMultiplier
)
if (cost > 0) {
Comment thread
waleedlatif1 marked this conversation as resolved.
await recordUsage({
userId: kb[0].userId,
workspaceId: kb[0].workspaceId ?? undefined,
entries: [
{
category: 'model',
source: 'knowledge-base',
description: embeddingModel,
description: embeddingModelName,
cost,
metadata: { inputTokens: totalEmbeddingTokens, outputTokens: 0 },
},
Expand Down
12 changes: 9 additions & 3 deletions apps/sim/lib/knowledge/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
const data: EmbeddingAPIResponse = await response.json()
return {
embeddings: data.data.map((item) => item.embedding),
totalTokens: data.usage.total_tokens,

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should include correct request body for OpenAI

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:398:7

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should include correct request body for Azure OpenAI

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:367:7

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should use custom model name when provided in Azure config

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:284:7

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should use default API version when not provided in Azure config

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:254:7

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should fallback to OpenAI when no KB Azure config provided

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:220:22

Check failure on line 153 in apps/sim/lib/knowledge/embeddings.ts

View workflow job for this annotation

GitHub Actions / Test and Build / Test and Build

app/api/knowledge/search/utils.test.ts > Knowledge Search Utils > generateSearchEmbedding > should use Azure OpenAI when KB-specific config is provided

TypeError: Cannot read properties of undefined (reading 'total_tokens') ❯ maxRetries lib/knowledge/embeddings.ts:153:33 ❯ Module.generateSearchEmbedding lib/knowledge/embeddings.ts:257:26 ❯ app/api/knowledge/search/utils.test.ts:189:22
}
},
{
Expand Down Expand Up @@ -193,12 +193,13 @@
embeddings: number[][]
totalTokens: number
isBYOK: boolean
modelName: string
}

/**
* Generate embeddings for multiple texts with token-aware batching and parallel processing.
* Returns embeddings alongside the actual token count from the API and whether a BYOK key was used.
* Callers should use `totalTokens` and `isBYOK` to record billing via `recordUsage`.
* Returns embeddings alongside actual token count, model name, and whether a workspace BYOK key
* was used (vs. the platform's shared key) — enabling callers to make correct billing decisions.
*/
export async function generateEmbeddings(
texts: string[],
Expand Down Expand Up @@ -231,7 +232,12 @@
totalTokens += batch.totalTokens
}

return { embeddings: allEmbeddings, totalTokens, isBYOK: config.isBYOK }
return {
embeddings: allEmbeddings,
totalTokens,
isBYOK: config.isBYOK,
modelName: config.modelName,
}
}

/**
Expand Down
Loading