Skip to content
Merged
Next Next commit
fix(knowledge): infer MIME type from file extension in create/upsert …
…tools

Both create_document and upsert_document forced .txt extension and
text/plain MIME type regardless of the document name. Now the tools
infer the correct MIME type from the file extension (html, md, csv,
json, yaml, xml) and only default to .txt when no extension is given.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
  • Loading branch information
waleedlatif1 and claude committed Mar 18, 2026
commit daeaea6059dfeedde365bbc77f8afdffa188acde
13 changes: 8 additions & 5 deletions apps/sim/tools/knowledge/create_document.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types'
import {
inferDocumentFileInfo,
type KnowledgeCreateDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
import type { ToolConfig } from '@/tools/types'
Expand Down Expand Up @@ -75,18 +78,18 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
? Buffer.from(textContent, 'utf8').toString('base64')
: btoa(String.fromCharCode(...utf8Bytes))

const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`

// Parse document tags from various formats (object, array, JSON string)
const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)

const documents = [
{
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
},
]
Expand Down
35 changes: 35 additions & 0 deletions apps/sim/tools/knowledge/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
const EXTENSION_MIME_MAP: Record<string, string> = {
html: 'text/html',
htm: 'text/html',
md: 'text/markdown',
csv: 'text/csv',
json: 'application/json',
yaml: 'application/x-yaml',
yml: 'application/x-yaml',
xml: 'application/xml',
txt: 'text/plain',
} as const

/**
* Infers MIME type from a file extension. Returns `text/plain` for unknown extensions.
*/
export function getMimeTypeFromExtension(ext: string): string {
return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain'
}

/**
* Extracts extension from a filename and returns the normalized filename and MIME type.
* If no extension is present, appends `.txt` and uses `text/plain`.
*/
export function inferDocumentFileInfo(documentName: string): {
Comment thread
waleedlatif1 marked this conversation as resolved.
filename: string
mimeType: string
} {
const dotIndex = documentName.lastIndexOf('.')
if (dotIndex > 0) {
const ext = documentName.slice(dotIndex + 1).toLowerCase()
return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) }
}
return { filename: `${documentName}.txt`, mimeType: 'text/plain' }
}

export interface KnowledgeSearchResult {
documentId: string
documentName: string
Expand Down
14 changes: 7 additions & 7 deletions apps/sim/tools/knowledge/upsert_document.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type {
KnowledgeUpsertDocumentParams,
KnowledgeUpsertDocumentResponse,
import {
inferDocumentFileInfo,
type KnowledgeUpsertDocumentParams,
type KnowledgeUpsertDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
Expand Down Expand Up @@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig<
base64Content = btoa(binary)
}

const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`

const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)

const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt`

const requestBody: Record<string, unknown> = {
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
processingOptions: {
chunkSize: 1024,
Expand Down
Loading