fix(knowledge): simplify embedding billing — use calculateCost, retur…

…n modelName - Use calculateCost() from @/providers/utils instead of inline formula, consistent with how LLM billing works throughout the platform - Return modelName from GenerateEmbeddingsResult so billing uses the actual model (handles custom Azure deployments) instead of a hardcoded fallback string - Fix docs-chunker.ts empty-path fallback to satisfy full GenerateEmbeddingsResult type
simstudioai · waleedlatif1 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
commit cad0c2ccbc4f0e81941b8285fea72544dc5945a9
diff --git a/apps/sim/lib/chunkers/docs-chunker.ts b/apps/sim/lib/chunkers/docs-chunker.ts
@@ -84,7 +84,12 @@ export class DocsChunker {
     const { embeddings } =
       textChunks.length > 0
         ? await generateEmbeddings(textChunks)
-        : { embeddings: [] as number[][] }
+        : {
+            embeddings: [] as number[][],
+            totalTokens: 0,
+            isBYOK: false,
+            modelName: 'text-embedding-3-small',
+          }
     const embeddingModel = 'text-embedding-3-small'
 
     const chunks: DocChunk[] = []

diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts
@@ -44,7 +44,7 @@ import type { ProcessedDocumentTags } from '@/lib/knowledge/types'
 import { deleteFile } from '@/lib/uploads/core/storage-service'
 import { extractStorageKey } from '@/lib/uploads/utils/file-utils'
 import type { DocumentProcessingPayload } from '@/background/knowledge-processing'
-import { getEmbeddingModelPricing } from '@/providers/models'
+import { calculateCost } from '@/providers/utils'
 
 const logger = createLogger('DocumentService')
 
@@ -464,6 +464,7 @@ export async function processDocumentAsync(
 
     let totalEmbeddingTokens = 0
     let embeddingIsBYOK = false
+    let embeddingModelName = 'text-embedding-3-small'
 
     await withTimeout(
       (async () => {
@@ -509,12 +510,14 @@ export async function processDocumentAsync(
               embeddings: batchEmbeddings,
               totalTokens: batchTokens,
               isBYOK,
+              modelName,
             } = await generateEmbeddings(batch, undefined, kb[0].workspaceId)
             for (const emb of batchEmbeddings) {
               embeddings.push(emb)
             }
             totalEmbeddingTokens += batchTokens
             embeddingIsBYOK = isBYOK
+            embeddingModelName = modelName
           }
         }
 
@@ -652,18 +655,23 @@ export async function processDocumentAsync(
 
     if (!embeddingIsBYOK && totalEmbeddingTokens > 0 && kb[0].userId) {
       try {
-        const embeddingModel = 'text-embedding-3-small'
-        const pricing = getEmbeddingModelPricing(embeddingModel)
-        if (pricing) {
-          const cost = (totalEmbeddingTokens / 1_000_000) * pricing.input * getCostMultiplier()
+        const costMultiplier = getCostMultiplier()
+        const { total: cost } = calculateCost(
+          embeddingModelName,
+          totalEmbeddingTokens,
+          0,
+          false,
+          costMultiplier
+        )
+        if (cost > 0) {
           await recordUsage({
             userId: kb[0].userId,
             workspaceId: kb[0].workspaceId ?? undefined,
             entries: [
               {
                 category: 'model',
                 source: 'knowledge-base',
-                description: embeddingModel,
+                description: embeddingModelName,
                 cost,
                 metadata: { inputTokens: totalEmbeddingTokens, outputTokens: 0 },
               },

diff --git a/apps/sim/lib/knowledge/embeddings.ts b/apps/sim/lib/knowledge/embeddings.ts
@@ -150,7 +150,7 @@
      const data: EmbeddingAPIResponse = await response.json()
      return {
        embeddings: data.data.map((item) => item.embedding),
        totalTokens: data.usage.total_tokens,
      }
    },
    {
@@ -193,12 +193,13 @@
   embeddings: number[][]
   totalTokens: number
   isBYOK: boolean
+  modelName: string
 }
 
 /**
  * Generate embeddings for multiple texts with token-aware batching and parallel processing.
- * Returns embeddings alongside the actual token count from the API and whether a BYOK key was used.
- * Callers should use `totalTokens` and `isBYOK` to record billing via `recordUsage`.
+ * Returns embeddings alongside actual token count, model name, and whether a workspace BYOK key
+ * was used (vs. the platform's shared key) — enabling callers to make correct billing decisions.
  */
 export async function generateEmbeddings(
   texts: string[],
@@ -231,7 +232,12 @@
     totalTokens += batch.totalTokens
   }
 
-  return { embeddings: allEmbeddings, totalTokens, isBYOK: config.isBYOK }
+  return {
+    embeddings: allEmbeddings,
+    totalTokens,
+    isBYOK: config.isBYOK,
+    modelName: config.modelName,
+  }
 }
 
 /**