Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(knowledge): add upsert document operation to Knowledge block
Add a "Create or Update" (upsert) document capability that finds an
existing document by ID or filename, deletes it if found, then creates
a new document and queues re-processing. Includes new tool, API route,
block wiring, and typed interfaces.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
  • Loading branch information
waleedlatif1 and claude committed Mar 18, 2026
commit 8bf63fe6673dd37c8ecb0c6fcfcd032cfc73e850
234 changes: 234 additions & 0 deletions apps/sim/app/api/knowledge/[id]/documents/upsert/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import { randomUUID } from 'crypto'
import { db } from '@sim/db'
import { document } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, isNull } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log'
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
import {
createDocumentRecords,
deleteDocument,
getProcessingConfig,
processDocumentsWithQueue,
} from '@/lib/knowledge/documents/service'
import { authorizeWorkflowByWorkspacePermission } from '@/lib/workflows/utils'
import { checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'

const logger = createLogger('DocumentUpsertAPI')

const UpsertDocumentSchema = z.object({
documentId: z.string().optional(),
filename: z.string().min(1, 'Filename is required'),
fileUrl: z.string().min(1, 'File URL is required'),
fileSize: z.number().min(1, 'File size must be greater than 0'),
mimeType: z.string().min(1, 'MIME type is required'),
documentTagsData: z.string().optional(),
processingOptions: z.object({
chunkSize: z.number().min(100).max(4000),
minCharactersPerChunk: z.number().min(1).max(2000),
recipe: z.string(),
lang: z.string(),
chunkOverlap: z.number().min(0).max(500),
}),
workflowId: z.string().optional(),
})

export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId } = await params

try {
const body = await req.json()

logger.info(`[${requestId}] Knowledge base document upsert request`, {
knowledgeBaseId,
hasDocumentId: !!body.documentId,
filename: body.filename,
})

const auth = await checkSessionOrInternalAuth(req, { requireWorkflowId: false })
if (!auth.success || !auth.userId) {
logger.warn(`[${requestId}] Authentication failed: ${auth.error || 'Unauthorized'}`)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const userId = auth.userId

if (body.workflowId) {
const authorization = await authorizeWorkflowByWorkspacePermission({
workflowId: body.workflowId,
userId,
action: 'write',
})
if (!authorization.allowed) {
return NextResponse.json(
{ error: authorization.message || 'Access denied' },
{ status: authorization.status }
)
}
}
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated

const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, userId)

if (!accessCheck.hasAccess) {
if ('notFound' in accessCheck && accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${userId} attempted to upsert document in unauthorized knowledge base ${knowledgeBaseId}`
)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}

const validatedData = UpsertDocumentSchema.parse(body)

let existingDocumentId: string | null = null
let isUpdate = false

if (validatedData.documentId) {
const existingDoc = await db
.select({ id: document.id })
.from(document)
.where(
and(
eq(document.id, validatedData.documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)

if (existingDoc.length > 0) {
existingDocumentId = existingDoc[0].id
}
}

if (!existingDocumentId) {
const docsByFilename = await db
.select({ id: document.id })
.from(document)
.where(
and(
eq(document.filename, validatedData.filename),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)

if (docsByFilename.length > 0) {
existingDocumentId = docsByFilename[0].id
}
}
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated

if (existingDocumentId) {
isUpdate = true
logger.info(
`[${requestId}] Found existing document ${existingDocumentId}, deleting before re-creation`
)
await deleteDocument(existingDocumentId, requestId)
}

const createdDocuments = await createDocumentRecords(
[
{
filename: validatedData.filename,
fileUrl: validatedData.fileUrl,
fileSize: validatedData.fileSize,
mimeType: validatedData.mimeType,
...(validatedData.documentTagsData && {
documentTagsData: validatedData.documentTagsData,
}),
},
],
knowledgeBaseId,
requestId
)
Comment thread
waleedlatif1 marked this conversation as resolved.

const firstDocument = createdDocuments[0]

processDocumentsWithQueue(
createdDocuments,
knowledgeBaseId,
validatedData.processingOptions,
requestId
).catch((error: unknown) => {
logger.error(`[${requestId}] Critical error in document processing pipeline:`, error)
})

try {
const { PlatformEvents } = await import('@/lib/core/telemetry')
PlatformEvents.knowledgeBaseDocumentsUploaded({
knowledgeBaseId,
documentsCount: 1,
uploadType: 'single',
chunkSize: validatedData.processingOptions.chunkSize,
recipe: validatedData.processingOptions.recipe,
})
} catch (_e) {
// Silently fail
}

recordAudit({
workspaceId: accessCheck.knowledgeBase?.workspaceId ?? null,
actorId: userId,
actorName: auth.userName,
actorEmail: auth.userEmail,
action: isUpdate ? AuditAction.DOCUMENT_UPDATED : AuditAction.DOCUMENT_UPLOADED,
resourceType: AuditResourceType.DOCUMENT,
resourceId: knowledgeBaseId,
resourceName: validatedData.filename,
description: isUpdate
? `Upserted (replaced) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"`
: `Upserted (created) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"`,
metadata: {
fileName: validatedData.filename,
previousDocumentId: existingDocumentId,
isUpdate,
},
request: req,
})

return NextResponse.json({
success: true,
data: {
documentsCreated: [
{
documentId: firstDocument.documentId,
filename: firstDocument.filename,
status: 'pending',
},
],
isUpdate,
previousDocumentId: existingDocumentId,
processingMethod: 'background',
processingConfig: {
maxConcurrentDocuments: getProcessingConfig().maxConcurrentDocuments,
batchSize: getProcessingConfig().batchSize,
},
},
})
} catch (error) {
if (error instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid upsert request data`, { errors: error.errors })
return NextResponse.json(
{ error: 'Invalid request data', details: error.errors },
{ status: 400 }
)
}

logger.error(`[${requestId}] Error upserting document`, error)

const errorMessage = error instanceof Error ? error.message : 'Failed to upsert document'
const isStorageLimitError =
errorMessage.includes('Storage limit exceeded') || errorMessage.includes('storage limit')
const isMissingKnowledgeBase = errorMessage === 'Knowledge base not found'

return NextResponse.json(
{ error: errorMessage },
{ status: isMissingKnowledgeBase ? 404 : isStorageLimitError ? 413 : 500 }
)
}
}
38 changes: 37 additions & 1 deletion apps/sim/blocks/blocks/knowledge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export const KnowledgeBlock: BlockConfig = {
{ label: 'List Documents', id: 'list_documents' },
{ label: 'Get Document', id: 'get_document' },
{ label: 'Create Document', id: 'create_document' },
{ label: 'Upsert Document', id: 'upsert_document' },
{ label: 'Delete Document', id: 'delete_document' },
{ label: 'List Chunks', id: 'list_chunks' },
{ label: 'Upload Chunk', id: 'upload_chunk' },
Expand Down Expand Up @@ -198,7 +199,33 @@ export const KnowledgeBlock: BlockConfig = {
title: 'Document Tags',
type: 'document-tag-entry',
dependsOn: ['knowledgeBaseSelector'],
condition: { field: 'operation', value: 'create_document' },
condition: { field: 'operation', value: ['create_document', 'upsert_document'] },
},

// --- Upsert Document ---
{
id: 'name',
title: 'Document Name',
type: 'short-input',
placeholder: 'Enter document name',
required: true,
condition: { field: 'operation', value: 'upsert_document' },
},
{
id: 'content',
title: 'Document Content',
type: 'long-input',
placeholder: 'Enter the document content',
rows: 6,
required: true,
condition: { field: 'operation', value: 'upsert_document' },
},
{
id: 'upsertDocumentId',
title: 'Document ID (Optional)',
type: 'short-input',
placeholder: 'Enter existing document ID to update (or leave empty to match by name)',
condition: { field: 'operation', value: 'upsert_document' },
},
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated
Comment thread
waleedlatif1 marked this conversation as resolved.

// --- Update Chunk / Delete Chunk ---
Expand Down Expand Up @@ -264,6 +291,7 @@ export const KnowledgeBlock: BlockConfig = {
'knowledge_search',
'knowledge_upload_chunk',
'knowledge_create_document',
'knowledge_upsert_document',
'knowledge_list_tags',
'knowledge_list_documents',
'knowledge_get_document',
Expand All @@ -284,6 +312,8 @@ export const KnowledgeBlock: BlockConfig = {
return 'knowledge_upload_chunk'
case 'create_document':
return 'knowledge_create_document'
case 'upsert_document':
return 'knowledge_upsert_document'
case 'list_tags':
return 'knowledge_list_tags'
case 'list_documents':
Expand Down Expand Up @@ -355,6 +385,11 @@ export const KnowledgeBlock: BlockConfig = {
if (params.chunkEnabledFilter) params.enabled = params.chunkEnabledFilter
}

// Map upsert sub-block field to tool param
if (params.operation === 'upsert_document' && params.upsertDocumentId) {
params.documentId = String(params.upsertDocumentId).trim()
}

// Convert enabled dropdown string to boolean for update_chunk
if (params.operation === 'update_chunk' && typeof params.enabled === 'string') {
params.enabled = params.enabled === 'true'
Expand Down Expand Up @@ -382,6 +417,7 @@ export const KnowledgeBlock: BlockConfig = {
documentTags: { type: 'string', description: 'Document tags' },
chunkSearch: { type: 'string', description: 'Search filter for chunks' },
chunkEnabledFilter: { type: 'string', description: 'Filter chunks by enabled status' },
upsertDocumentId: { type: 'string', description: 'Document ID for upsert operation' },
connectorId: { type: 'string', description: 'Connector identifier' },
},
outputs: {
Expand Down
2 changes: 2 additions & 0 deletions apps/sim/tools/knowledge/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { knowledgeSearchTool } from '@/tools/knowledge/search'
import { knowledgeTriggerSyncTool } from '@/tools/knowledge/trigger_sync'
import { knowledgeUpdateChunkTool } from '@/tools/knowledge/update_chunk'
import { knowledgeUploadChunkTool } from '@/tools/knowledge/upload_chunk'
import { knowledgeUpsertDocumentTool } from '@/tools/knowledge/upsert_document'

export {
knowledgeSearchTool,
Expand All @@ -26,4 +27,5 @@ export {
knowledgeListConnectorsTool,
knowledgeGetConnectorTool,
knowledgeTriggerSyncTool,
knowledgeUpsertDocumentTool,
}
30 changes: 30 additions & 0 deletions apps/sim/tools/knowledge/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,33 @@ export interface KnowledgeTriggerSyncResponse {
}
error?: string
}

export interface KnowledgeUpsertDocumentParams {
knowledgeBaseId: string
name: string
content: string
documentId?: string
documentTags?: Record<string, unknown>
_context?: { workflowId?: string }
}

export interface KnowledgeUpsertDocumentResult {
documentId: string
documentName: string
type: string
enabled: boolean
isUpdate: boolean
previousDocumentId: string | null
createdAt: string
updatedAt: string
}

export interface KnowledgeUpsertDocumentResponse {
success: boolean
output: {
data: KnowledgeUpsertDocumentResult
message: string
documentId: string
}
error?: string
}
Loading
Loading