fix(knowledge): infer MIME type from file extension in create/upsert …

…tools Both create_document and upsert_document forced .txt extension and text/plain MIME type regardless of the document name. Now the tools infer the correct MIME type from the file extension (html, md, csv, json, yaml, xml) and only default to .txt when no extension is given. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
simstudioai · waleedlatif1 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
commit daeaea6059dfeedde365bbc77f8afdffa188acde
diff --git a/apps/sim/tools/knowledge/create_document.ts b/apps/sim/tools/knowledge/create_document.ts
@@ -1,4 +1,7 @@
-import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types'
+import {
+  inferDocumentFileInfo,
+  type KnowledgeCreateDocumentResponse,
+} from '@/tools/knowledge/types'
 import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
 import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
 import type { ToolConfig } from '@/tools/types'
@@ -75,18 +78,18 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
           ? Buffer.from(textContent, 'utf8').toString('base64')
           : btoa(String.fromCharCode(...utf8Bytes))
 
-      const dataUri = `data:text/plain;base64,${base64Content}`
+      const { filename, mimeType } = inferDocumentFileInfo(documentName)
+      const dataUri = `data:${mimeType};base64,${base64Content}`
 
-      // Parse document tags from various formats (object, array, JSON string)
       const parsedTags = parseDocumentTags(params.documentTags)
       const tagData = formatDocumentTagsForAPI(parsedTags)
 
       const documents = [
         {
-          filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
+          filename,
           fileUrl: dataUri,
           fileSize: contentBytes,
-          mimeType: 'text/plain',
+          mimeType,
           ...tagData,
         },
       ]

diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts
@@ -1,3 +1,38 @@
+const EXTENSION_MIME_MAP: Record<string, string> = {
+  html: 'text/html',
+  htm: 'text/html',
+  md: 'text/markdown',
+  csv: 'text/csv',
+  json: 'application/json',
+  yaml: 'application/x-yaml',
+  yml: 'application/x-yaml',
+  xml: 'application/xml',
+  txt: 'text/plain',
+} as const
+
+/**
+ * Infers MIME type from a file extension. Returns `text/plain` for unknown extensions.
+ */
+export function getMimeTypeFromExtension(ext: string): string {
+  return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain'
+}
+
+/**
+ * Extracts extension from a filename and returns the normalized filename and MIME type.
+ * If no extension is present, appends `.txt` and uses `text/plain`.
+ */
+export function inferDocumentFileInfo(documentName: string): {
+  filename: string
+  mimeType: string
+} {
+  const dotIndex = documentName.lastIndexOf('.')
+  if (dotIndex > 0) {
+    const ext = documentName.slice(dotIndex + 1).toLowerCase()
+    return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) }
+  }
+  return { filename: `${documentName}.txt`, mimeType: 'text/plain' }
+}
+
 export interface KnowledgeSearchResult {
   documentId: string
   documentName: string

diff --git a/apps/sim/tools/knowledge/upsert_document.ts b/apps/sim/tools/knowledge/upsert_document.ts
@@ -1,6 +1,7 @@
-import type {
-  KnowledgeUpsertDocumentParams,
-  KnowledgeUpsertDocumentResponse,
+import {
+  inferDocumentFileInfo,
+  type KnowledgeUpsertDocumentParams,
+  type KnowledgeUpsertDocumentResponse,
 } from '@/tools/knowledge/types'
 import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
 import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
@@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig<
         base64Content = btoa(binary)
       }
 
-      const dataUri = `data:text/plain;base64,${base64Content}`
+      const { filename, mimeType } = inferDocumentFileInfo(documentName)
+      const dataUri = `data:${mimeType};base64,${base64Content}`
 
       const parsedTags = parseDocumentTags(params.documentTags)
       const tagData = formatDocumentTagsForAPI(parsedTags)
 
-      const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt`
-
       const requestBody: Record<string, unknown> = {
         filename,
         fileUrl: dataUri,
         fileSize: contentBytes,
-        mimeType: 'text/plain',
+        mimeType,
         ...tagData,
         processingOptions: {
           chunkSize: 1024,