From ab41b14e7ae0367aa45ade1323e73f1ca1c115a7 Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 12:47:28 -0700 Subject: [PATCH 1/6] feat(providers): add Sakana AI provider with Fugu models OpenAI-compatible provider at https://api.sakana.ai/v1 (bearer auth). Registers fugu (fast default) and fugu-ultra (reasoning flagship), both 1M context. BYOK-only, never hosted/auto-billed. Streaming, tool loop, and response_format supported; attachments mirror deepseek (unsupported in the current adapter). --- apps/sim/components/icons.tsx | 10 + apps/sim/lib/tokenization/constants.ts | 5 + apps/sim/providers/attachments.ts | 6 +- apps/sim/providers/models.test.ts | 32 ++ apps/sim/providers/models.ts | 42 ++ apps/sim/providers/registry.ts | 2 + apps/sim/providers/sakana/index.ts | 546 +++++++++++++++++++++++++ apps/sim/providers/sakana/utils.ts | 14 + apps/sim/providers/types.ts | 1 + apps/sim/providers/utils.ts | 1 + 10 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 apps/sim/providers/sakana/index.ts create mode 100644 apps/sim/providers/sakana/utils.ts diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index c920a30428..9f8bd94fe9 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -3439,6 +3439,16 @@ export const DeepseekIcon = (props: SVGProps) => ( ) +export const SakanaIcon = (props: SVGProps) => ( + + Sakana AI + + +) + export function GeminiIcon(props: SVGProps) { const id = useId() const gradientId = `gemini_gradient_${id}` diff --git a/apps/sim/lib/tokenization/constants.ts b/apps/sim/lib/tokenization/constants.ts index a10b1995da..484a397f84 100644 --- a/apps/sim/lib/tokenization/constants.ts +++ b/apps/sim/lib/tokenization/constants.ts @@ -56,6 +56,11 @@ export const TOKENIZATION_CONFIG = { confidence: 'medium', supportedMethods: ['heuristic', 'fallback'], }, + sakana: { + avgCharsPerToken: 4, + confidence: 'medium', + supportedMethods: ['heuristic', 'fallback'], + }, ollama: { avgCharsPerToken: 4, confidence: 'low', diff --git a/apps/sim/providers/attachments.ts b/apps/sim/providers/attachments.ts index 6be9fb6b91..b87307ea81 100644 --- a/apps/sim/providers/attachments.ts +++ b/apps/sim/providers/attachments.ts @@ -35,6 +35,7 @@ export type AttachmentProvider = | 'xai' | 'deepseek' | 'cerebras' + | 'sakana' export interface PreparedProviderAttachment { file: UserFile @@ -118,7 +119,7 @@ const BEDROCK_DOCUMENT_FORMATS = new Set([ const BEDROCK_IMAGE_FORMATS = new Set(['png', 'jpeg', 'jpg', 'gif', 'webp']) const BEDROCK_VIDEO_FORMATS = new Set(['mp4', 'mov', 'mkv', 'webm']) -const UNSUPPORTED_FILE_PROVIDERS = new Set(['deepseek', 'cerebras']) +const UNSUPPORTED_FILE_PROVIDERS = new Set(['deepseek', 'cerebras', 'sakana']) const PROVIDER_SUPPORTED_LABELS: Record = { openai: 'images and documents through the Responses API input_image/input_file parts', @@ -137,6 +138,7 @@ const PROVIDER_SUPPORTED_LABELS: Record = { xai: 'images through image_url message parts on Grok vision models', deepseek: 'no file attachments in the current API adapter', cerebras: 'no file attachments in the current API adapter', + sakana: 'no file attachments in the current API adapter', } export function getAttachmentProvider(providerId: ProviderId | string): AttachmentProvider | null { @@ -156,6 +158,7 @@ export function getAttachmentProvider(providerId: ProviderId | string): Attachme if (providerId === 'xai') return 'xai' if (providerId === 'deepseek') return 'deepseek' if (providerId === 'cerebras') return 'cerebras' + if (providerId === 'sakana') return 'sakana' return null } @@ -303,6 +306,7 @@ function isMimeTypeSupportedByProvider( return isImageMimeType(mimeType) case 'deepseek': case 'cerebras': + case 'sakana': return false default: { const _exhaustive: never = provider diff --git a/apps/sim/providers/models.test.ts b/apps/sim/providers/models.test.ts index ca9af8a07c..b3b16b54bc 100644 --- a/apps/sim/providers/models.test.ts +++ b/apps/sim/providers/models.test.ts @@ -102,3 +102,35 @@ describe('orderModelIdsByReleaseDate', () => { expect([...ordered].sort()).toEqual([...input].sort()) }) }) + +describe('sakana provider definition', () => { + const sakana = PROVIDER_DEFINITIONS.sakana + + it('is registered with fugu as the default model', () => { + expect(sakana).toBeDefined() + expect(sakana.id).toBe('sakana') + expect(sakana.defaultModel).toBe('fugu') + expect(sakana.modelPatterns).toEqual([/^fugu/]) + }) + + it('exposes fugu and fugu-ultra with a 1M context window', () => { + expect(sakana.models.map((m) => m.id)).toEqual(['fugu', 'fugu-ultra']) + for (const model of sakana.models) { + expect(model.contextWindow).toBe(1000000) + } + }) + + it('prices both models at the documented fugu-ultra ceiling', () => { + for (const model of sakana.models) { + expect(model.pricing.input).toBe(5) + expect(model.pricing.output).toBe(30) + expect(model.pricing.cachedInput).toBe(0.5) + } + }) + + it('routes bare fugu model IDs to the sakana provider', () => { + const baseModels = getBaseModelProviders() + expect(baseModels.fugu).toBe('sakana') + expect(baseModels['fugu-ultra']).toBe('sakana') + }) +}) diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 99aaf203cf..3666033af1 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -23,6 +23,7 @@ import { OllamaIcon, OpenAIIcon, OpenRouterIcon, + SakanaIcon, TogetherIcon, VertexIcon, VllmIcon, @@ -2197,6 +2198,47 @@ export const PROVIDER_DEFINITIONS: Record = { }, ], }, + sakana: { + id: 'sakana', + name: 'Sakana AI', + description: "Sakana AI's Fugu multi-agent models via an OpenAI-compatible API", + defaultModel: 'fugu', + modelPatterns: [/^fugu/], + icon: SakanaIcon, + color: '#E60000', + capabilities: { + temperature: { min: 0, max: 2 }, + toolUsageControl: true, + }, + models: [ + { + id: 'fugu', + pricing: { + input: 5, + cachedInput: 0.5, + output: 30, + updatedAt: '2026-06-22', + }, + capabilities: {}, + contextWindow: 1000000, + releaseDate: '2026-06-15', + speedOptimized: true, + }, + { + id: 'fugu-ultra', + pricing: { + input: 5, + cachedInput: 0.5, + output: 30, + updatedAt: '2026-06-22', + }, + capabilities: {}, + contextWindow: 1000000, + releaseDate: '2026-06-15', + recommended: true, + }, + ], + }, mistral: { id: 'mistral', name: 'Mistral AI', diff --git a/apps/sim/providers/registry.ts b/apps/sim/providers/registry.ts index 5e65e92796..cb7d1a9cd0 100644 --- a/apps/sim/providers/registry.ts +++ b/apps/sim/providers/registry.ts @@ -16,6 +16,7 @@ import { ollamaProvider } from '@/providers/ollama' import { ollamaCloudProvider } from '@/providers/ollama-cloud' import { openaiProvider } from '@/providers/openai' import { openRouterProvider } from '@/providers/openrouter' +import { sakanaProvider } from '@/providers/sakana' import { togetherProvider } from '@/providers/together' import type { ProviderConfig, ProviderId } from '@/providers/types' import { vertexProvider } from '@/providers/vertex' @@ -34,6 +35,7 @@ const providerRegistry: Record = { xai: xAIProvider, cerebras: cerebrasProvider, groq: groqProvider, + sakana: sakanaProvider, vllm: vllmProvider, litellm: litellmProvider, mistral: mistralProvider, diff --git a/apps/sim/providers/sakana/index.ts b/apps/sim/providers/sakana/index.ts new file mode 100644 index 0000000000..e6580a3011 --- /dev/null +++ b/apps/sim/providers/sakana/index.ts @@ -0,0 +1,546 @@ +import { createLogger } from '@sim/logger' +import { getErrorMessage, toError } from '@sim/utils/errors' +import OpenAI from 'openai' +import type { StreamingExecution } from '@/executor/types' +import { MAX_TOOL_ITERATIONS } from '@/providers' +import { formatMessagesForProvider } from '@/providers/attachments' +import { getProviderDefaultModel, getProviderModels } from '@/providers/models' +import { createReadableStreamFromSakanaStream } from '@/providers/sakana/utils' +import { createStreamingExecution } from '@/providers/streaming-execution' +import { adaptOpenAIChatToolSchema } from '@/providers/tool-schema-adapter' +import { enrichLastModelSegmentFromChatCompletions } from '@/providers/trace-enrichment' +import type { + ProviderConfig, + ProviderRequest, + ProviderResponse, + TimeSegment, +} from '@/providers/types' +import { ProviderError } from '@/providers/types' +import { + calculateCost, + prepareToolExecution, + prepareToolsWithUsageControl, + sumToolCosts, + trackForcedToolUsage, +} from '@/providers/utils' +import { executeTool } from '@/tools' + +const logger = createLogger('SakanaProvider') + +const SAKANA_BASE_URL = 'https://api.sakana.ai/v1' + +export const sakanaProvider: ProviderConfig = { + id: 'sakana', + name: 'Sakana AI', + description: "Sakana AI's Fugu multi-agent models via an OpenAI-compatible API", + version: '1.0.0', + models: getProviderModels('sakana'), + defaultModel: getProviderDefaultModel('sakana'), + + executeRequest: async ( + request: ProviderRequest + ): Promise => { + if (!request.apiKey) { + throw new Error('API key is required for Sakana AI') + } + + const providerStartTime = Date.now() + const providerStartTimeISO = new Date(providerStartTime).toISOString() + + try { + const sakana = new OpenAI({ + apiKey: request.apiKey, + baseURL: SAKANA_BASE_URL, + }) + + const allMessages = [] + + if (request.systemPrompt) { + allMessages.push({ + role: 'system', + content: request.systemPrompt, + }) + } + + if (request.context) { + allMessages.push({ + role: 'user', + content: request.context, + }) + } + + if (request.messages) { + allMessages.push(...request.messages) + } + const formattedMessages = formatMessagesForProvider(allMessages, 'sakana') + + const tools = request.tools?.length + ? request.tools.map((tool) => adaptOpenAIChatToolSchema(tool)) + : undefined + + const payload: any = { + model: request.model, + messages: formattedMessages, + } + + if (request.temperature !== undefined) payload.temperature = request.temperature + if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens + + if (request.responseFormat) { + payload.response_format = { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, + }, + } + } + + let preparedTools: ReturnType | null = null + + if (tools?.length) { + preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'openai') + const { tools: filteredTools, toolChoice } = preparedTools + + if (filteredTools?.length && toolChoice) { + payload.tools = filteredTools + payload.tool_choice = toolChoice + + logger.info('Sakana request configuration:', { + toolCount: filteredTools.length, + toolChoice: + typeof toolChoice === 'string' + ? toolChoice + : toolChoice.type === 'function' + ? `force:${toolChoice.function.name}` + : 'unknown', + model: request.model, + }) + } + } + + if (request.stream && (!tools || tools.length === 0)) { + logger.info('Using streaming response for Sakana request (no tools)') + + const streamResponse = await sakana.chat.completions.create( + { + ...payload, + stream: true, + }, + request.abortSignal ? { signal: request.abortSignal } : undefined + ) + + const streamingResult = createStreamingExecution({ + model: request.model, + providerStartTime, + providerStartTimeISO, + timing: { kind: 'simple', segmentName: request.model }, + initialTokens: { input: 0, output: 0, total: 0 }, + initialCost: { input: 0, output: 0, total: 0 }, + isStreaming: true, + createStream: ({ output }) => + createReadableStreamFromSakanaStream(streamResponse as any, (content, usage) => { + output.content = content + output.tokens = { + input: usage.prompt_tokens, + output: usage.completion_tokens, + total: usage.total_tokens, + } + + const costResult = calculateCost( + request.model, + usage.prompt_tokens, + usage.completion_tokens + ) + output.cost = { + input: costResult.input, + output: costResult.output, + total: costResult.total, + } + }), + }) + + return streamingResult + } + + const initialCallTime = Date.now() + const originalToolChoice = payload.tool_choice + const forcedTools = preparedTools?.forcedTools || [] + let usedForcedTools: string[] = [] + + let currentResponse = await sakana.chat.completions.create( + payload, + request.abortSignal ? { signal: request.abortSignal } : undefined + ) + const firstResponseTime = Date.now() - initialCallTime + + let content = currentResponse.choices[0]?.message?.content || '' + + const tokens = { + input: currentResponse.usage?.prompt_tokens || 0, + output: currentResponse.usage?.completion_tokens || 0, + total: currentResponse.usage?.total_tokens || 0, + } + const toolCalls = [] + const toolResults: Record[] = [] + const currentMessages = [...formattedMessages] + let iterationCount = 0 + let hasUsedForcedTool = false + let modelTime = firstResponseTime + let toolsTime = 0 + + const timeSegments: TimeSegment[] = [ + { + type: 'model', + name: request.model, + startTime: initialCallTime, + endTime: initialCallTime + firstResponseTime, + duration: firstResponseTime, + }, + ] + + if ( + typeof originalToolChoice === 'object' && + currentResponse.choices[0]?.message?.tool_calls + ) { + const toolCallsResponse = currentResponse.choices[0].message.tool_calls + const result = trackForcedToolUsage( + toolCallsResponse, + originalToolChoice, + logger, + 'openai', + forcedTools, + usedForcedTools + ) + hasUsedForcedTool = result.hasUsedForcedTool + usedForcedTools = result.usedForcedTools + } + + try { + while (iterationCount < MAX_TOOL_ITERATIONS) { + if (currentResponse.choices[0]?.message?.content) { + content = currentResponse.choices[0].message.content + } + + const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls + + enrichLastModelSegmentFromChatCompletions( + timeSegments, + currentResponse, + toolCallsInResponse, + { model: request.model, provider: 'sakana' } + ) + + if (!toolCallsInResponse || toolCallsInResponse.length === 0) { + break + } + + const toolsStartTime = Date.now() + + const toolExecutionPromises = toolCallsInResponse.map(async (toolCall) => { + const toolCallStartTime = Date.now() + const toolName = toolCall.function.name + + try { + const toolArgs = JSON.parse(toolCall.function.arguments) + const tool = request.tools?.find((t) => t.id === toolName) + + if (!tool) return null + + const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) + const result = await executeTool(toolName, executionParams, { + signal: request.abortSignal, + }) + const toolCallEndTime = Date.now() + + return { + toolCall, + toolName, + toolParams, + result, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallEndTime - toolCallStartTime, + } + } catch (error) { + const toolCallEndTime = Date.now() + logger.error('Error processing tool call:', { error, toolName }) + + return { + toolCall, + toolName, + toolParams: {}, + result: { + success: false, + output: undefined, + error: getErrorMessage(error, 'Tool execution failed'), + }, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallEndTime - toolCallStartTime, + } + } + }) + + const executionResults = await Promise.allSettled(toolExecutionPromises) + + currentMessages.push({ + role: 'assistant', + content: null, + tool_calls: toolCallsInResponse.map((tc) => ({ + id: tc.id, + type: 'function', + function: { + name: tc.function.name, + arguments: tc.function.arguments, + }, + })), + }) + + for (const settledResult of executionResults) { + if (settledResult.status === 'rejected' || !settledResult.value) continue + + const { toolCall, toolName, toolParams, result, startTime, endTime, duration } = + settledResult.value + + timeSegments.push({ + type: 'tool', + name: toolName, + startTime: startTime, + endTime: endTime, + duration: duration, + toolCallId: toolCall.id, + }) + + let resultContent: any + if (result.success && result.output) { + toolResults.push(result.output) + resultContent = result.output + } else { + resultContent = { + error: true, + message: result.error || 'Tool execution failed', + tool: toolName, + } + } + + toolCalls.push({ + name: toolName, + arguments: toolParams, + startTime: new Date(startTime).toISOString(), + endTime: new Date(endTime).toISOString(), + duration: duration, + result: resultContent, + success: result.success, + }) + + currentMessages.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: JSON.stringify(resultContent), + }) + } + + const thisToolsTime = Date.now() - toolsStartTime + toolsTime += thisToolsTime + + const nextPayload = { + ...payload, + messages: currentMessages, + } + + if ( + typeof originalToolChoice === 'object' && + hasUsedForcedTool && + forcedTools.length > 0 + ) { + const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) + + if (remainingTools.length > 0) { + nextPayload.tool_choice = { + type: 'function', + function: { name: remainingTools[0] }, + } + logger.info(`Forcing next tool: ${remainingTools[0]}`) + } else { + nextPayload.tool_choice = 'auto' + logger.info('All forced tools have been used, switching to auto tool_choice') + } + } + + const nextModelStartTime = Date.now() + currentResponse = await sakana.chat.completions.create( + nextPayload, + request.abortSignal ? { signal: request.abortSignal } : undefined + ) + + if ( + typeof nextPayload.tool_choice === 'object' && + currentResponse.choices[0]?.message?.tool_calls + ) { + const toolCallsResponse = currentResponse.choices[0].message.tool_calls + const result = trackForcedToolUsage( + toolCallsResponse, + nextPayload.tool_choice, + logger, + 'openai', + forcedTools, + usedForcedTools + ) + hasUsedForcedTool = result.hasUsedForcedTool + usedForcedTools = result.usedForcedTools + } + + const nextModelEndTime = Date.now() + const thisModelTime = nextModelEndTime - nextModelStartTime + + timeSegments.push({ + type: 'model', + name: request.model, + startTime: nextModelStartTime, + endTime: nextModelEndTime, + duration: thisModelTime, + }) + + modelTime += thisModelTime + + if (currentResponse.choices[0]?.message?.content) { + content = currentResponse.choices[0].message.content + } + + if (currentResponse.usage) { + tokens.input += currentResponse.usage.prompt_tokens || 0 + tokens.output += currentResponse.usage.completion_tokens || 0 + tokens.total += currentResponse.usage.total_tokens || 0 + } + + iterationCount++ + } + + if (iterationCount === MAX_TOOL_ITERATIONS) { + enrichLastModelSegmentFromChatCompletions( + timeSegments, + currentResponse, + currentResponse.choices[0]?.message?.tool_calls, + { model: request.model, provider: 'sakana' } + ) + } + } catch (error) { + logger.error('Error in Sakana request:', { error }) + } + + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + if (request.stream) { + logger.info('Using streaming for final Sakana response after tool processing') + + const streamingPayload = { + ...payload, + messages: currentMessages, + tool_choice: 'auto', + stream: true, + } + + const streamResponse = await sakana.chat.completions.create( + streamingPayload, + request.abortSignal ? { signal: request.abortSignal } : undefined + ) + + const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output) + + const streamingResult = createStreamingExecution({ + model: request.model, + providerStartTime, + providerStartTimeISO, + timing: { + kind: 'accumulated', + modelTime, + toolsTime, + firstResponseTime, + iterations: iterationCount + 1, + timeSegments, + }, + initialTokens: { + input: tokens.input, + output: tokens.output, + total: tokens.total, + }, + initialCost: { + input: accumulatedCost.input, + output: accumulatedCost.output, + toolCost: undefined as number | undefined, + total: accumulatedCost.total, + }, + toolCalls: + toolCalls.length > 0 + ? { + list: toolCalls, + count: toolCalls.length, + } + : undefined, + isStreaming: true, + createStream: ({ output }) => + createReadableStreamFromSakanaStream(streamResponse as any, (content, usage) => { + output.content = content + output.tokens = { + input: tokens.input + usage.prompt_tokens, + output: tokens.output + usage.completion_tokens, + total: tokens.total + usage.total_tokens, + } + + const streamCost = calculateCost( + request.model, + usage.prompt_tokens, + usage.completion_tokens + ) + const tc = sumToolCosts(toolResults) + output.cost = { + input: accumulatedCost.input + streamCost.input, + output: accumulatedCost.output + streamCost.output, + toolCost: tc || undefined, + total: accumulatedCost.total + streamCost.total + tc, + } + }), + }) + + return streamingResult + } + + return { + content, + model: request.model, + tokens, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + toolResults: toolResults.length > 0 ? toolResults : undefined, + timing: { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + } + } catch (error) { + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + logger.error('Error in Sakana request:', { + error, + duration: totalDuration, + }) + + throw new ProviderError(toError(error).message, { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + }) + } + }, +} diff --git a/apps/sim/providers/sakana/utils.ts b/apps/sim/providers/sakana/utils.ts new file mode 100644 index 0000000000..ede98301a1 --- /dev/null +++ b/apps/sim/providers/sakana/utils.ts @@ -0,0 +1,14 @@ +import type { ChatCompletionChunk } from 'openai/resources/chat/completions' +import type { CompletionUsage } from 'openai/resources/completions' +import { createOpenAICompatibleStream } from '@/providers/utils' + +/** + * Creates a ReadableStream from a Sakana AI streaming response. + * Uses the shared OpenAI-compatible streaming utility. + */ +export function createReadableStreamFromSakanaStream( + sakanaStream: AsyncIterable, + onComplete?: (content: string, usage: CompletionUsage) => void +): ReadableStream { + return createOpenAICompatibleStream(sakanaStream, 'Sakana', onComplete) +} diff --git a/apps/sim/providers/types.ts b/apps/sim/providers/types.ts index f5ab7a812a..d13c236977 100644 --- a/apps/sim/providers/types.ts +++ b/apps/sim/providers/types.ts @@ -11,6 +11,7 @@ export type ProviderId = | 'xai' | 'cerebras' | 'groq' + | 'sakana' | 'mistral' | 'ollama' | 'ollama-cloud' diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts index 2c22c865e4..9d8e5dce84 100644 --- a/apps/sim/providers/utils.ts +++ b/apps/sim/providers/utils.ts @@ -151,6 +151,7 @@ export const providers: Record = { xai: buildProviderMetadata('xai'), cerebras: buildProviderMetadata('cerebras'), groq: buildProviderMetadata('groq'), + sakana: buildProviderMetadata('sakana'), mistral: buildProviderMetadata('mistral'), bedrock: buildProviderMetadata('bedrock'), openrouter: buildProviderMetadata('openrouter'), From bd422f8aa8f51f6201492018e981541a5034c918 Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 12:59:32 -0700 Subject: [PATCH 2/6] fix(providers): defer Sakana structured output until after tool loop OpenAI-compatible backends reject a request carrying both response_format and active tools/tool_choice. Mirror the LiteLLM pattern: withhold the JSON schema while tools are active and apply it on a final tool-free call (tool_choice: none) for both streaming and non-streaming paths. --- apps/sim/providers/sakana/index.ts | 93 +++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/apps/sim/providers/sakana/index.ts b/apps/sim/providers/sakana/index.ts index e6580a3011..4f018a460d 100644 --- a/apps/sim/providers/sakana/index.ts +++ b/apps/sim/providers/sakana/index.ts @@ -86,18 +86,19 @@ export const sakanaProvider: ProviderConfig = { if (request.temperature !== undefined) payload.temperature = request.temperature if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens - if (request.responseFormat) { - payload.response_format = { - type: 'json_schema', - json_schema: { - name: request.responseFormat.name || 'response_schema', - schema: request.responseFormat.schema || request.responseFormat, - strict: request.responseFormat.strict !== false, - }, - } - } + const responseFormatPayload = request.responseFormat + ? { + type: 'json_schema' as const, + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, + }, + } + : undefined let preparedTools: ReturnType | null = null + let hasActiveTools = false if (tools?.length) { preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'openai') @@ -106,6 +107,7 @@ export const sakanaProvider: ProviderConfig = { if (filteredTools?.length && toolChoice) { payload.tools = filteredTools payload.tool_choice = toolChoice + hasActiveTools = true logger.info('Sakana request configuration:', { toolCount: filteredTools.length, @@ -120,6 +122,14 @@ export const sakanaProvider: ProviderConfig = { } } + // Structured output and tool calling cannot be sent together — OpenAI-compatible + // backends reject a request that carries both `response_format` and active + // `tools`/`tool_choice`. Defer the schema until after the tool loop completes. + const deferResponseFormat = !!responseFormatPayload && hasActiveTools + if (responseFormatPayload && !deferResponseFormat) { + payload.response_format = responseFormatPayload + } + if (request.stream && (!tools || tools.length === 0)) { logger.info('Using streaming response for Sakana request (no tools)') @@ -430,19 +440,20 @@ export const sakanaProvider: ProviderConfig = { logger.error('Error in Sakana request:', { error }) } - const providerEndTime = Date.now() - const providerEndTimeISO = new Date(providerEndTime).toISOString() - const totalDuration = providerEndTime - providerStartTime - if (request.stream) { logger.info('Using streaming for final Sakana response after tool processing') - const streamingPayload = { + const streamingPayload: any = { ...payload, messages: currentMessages, tool_choice: 'auto', stream: true, } + if (deferResponseFormat && responseFormatPayload) { + streamingPayload.response_format = responseFormatPayload + streamingPayload.tool_choice = 'none' + streamingPayload.parallel_tool_calls = false + } const streamResponse = await sakana.chat.completions.create( streamingPayload, @@ -509,6 +520,58 @@ export const sakanaProvider: ProviderConfig = { return streamingResult } + // Tools were active, so `response_format` was withheld from the loop. Make one final + // tool-free call to obtain the structured response now that the tool work is done. + if (deferResponseFormat && responseFormatPayload) { + logger.info('Applying deferred JSON schema response format after tool processing') + + const finalFormatStartTime = Date.now() + const finalPayload: any = { + ...payload, + messages: currentMessages, + response_format: responseFormatPayload, + tool_choice: 'none', + parallel_tool_calls: false, + } + + currentResponse = await sakana.chat.completions.create( + finalPayload, + request.abortSignal ? { signal: request.abortSignal } : undefined + ) + + const finalFormatEndTime = Date.now() + timeSegments.push({ + type: 'model', + name: request.model, + startTime: finalFormatStartTime, + endTime: finalFormatEndTime, + duration: finalFormatEndTime - finalFormatStartTime, + }) + modelTime += finalFormatEndTime - finalFormatStartTime + + const formattedContent = currentResponse.choices[0]?.message?.content + if (formattedContent) { + content = formattedContent + } + + if (currentResponse.usage) { + tokens.input += currentResponse.usage.prompt_tokens || 0 + tokens.output += currentResponse.usage.completion_tokens || 0 + tokens.total += currentResponse.usage.total_tokens || 0 + } + + enrichLastModelSegmentFromChatCompletions( + timeSegments, + currentResponse, + currentResponse.choices[0]?.message?.tool_calls, + { model: request.model, provider: 'sakana' } + ) + } + + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + return { content, model: request.model, From 8d4acbabfd1021da708ec16ec5afaebe1665b42b Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 13:06:01 -0700 Subject: [PATCH 3/6] fix(providers): harden Sakana tool-loop error + final-stream tool_choice - Rethrow tool-loop failures instead of swallowing them, so a failed run surfaces as a ProviderError rather than a partial success (matches LiteLLM). - Force tool_choice: 'none' on the post-tool streaming pass so the model cannot emit fresh tool calls that the text-only stream adapter would drop. --- apps/sim/providers/sakana/index.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/sim/providers/sakana/index.ts b/apps/sim/providers/sakana/index.ts index 4f018a460d..1f20903e3b 100644 --- a/apps/sim/providers/sakana/index.ts +++ b/apps/sim/providers/sakana/index.ts @@ -438,20 +438,23 @@ export const sakanaProvider: ProviderConfig = { } } catch (error) { logger.error('Error in Sakana request:', { error }) + throw error } if (request.stream) { logger.info('Using streaming for final Sakana response after tool processing') + // The tool loop is complete: this final pass only produces the textual answer. + // Force `tool_choice: 'none'` so the model cannot emit fresh tool calls that the + // text-only stream adapter would silently drop. const streamingPayload: any = { ...payload, messages: currentMessages, - tool_choice: 'auto', + tool_choice: 'none', stream: true, } if (deferResponseFormat && responseFormatPayload) { streamingPayload.response_format = responseFormatPayload - streamingPayload.tool_choice = 'none' streamingPayload.parallel_tool_calls = false } From 3a24274b82bbe7971cf849d711817a8167a56123 Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 13:13:45 -0700 Subject: [PATCH 4/6] fix(providers): Sakana streaming usage + filtered-tools stream guard - Pass stream_options: { include_usage: true } on both streaming calls so token/cost data is captured (the shared OpenAI-compatible stream helper only fills usage from chunk usage, which the API omits without the flag). - Include !hasActiveTools in the early-stream guard so requests whose tools are all filtered out (e.g. usageControl 'none') still take the fast streaming path instead of the tool-loop path. Mirrors LiteLLM. --- apps/sim/providers/sakana/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/sim/providers/sakana/index.ts b/apps/sim/providers/sakana/index.ts index 1f20903e3b..99ec3d6774 100644 --- a/apps/sim/providers/sakana/index.ts +++ b/apps/sim/providers/sakana/index.ts @@ -130,13 +130,14 @@ export const sakanaProvider: ProviderConfig = { payload.response_format = responseFormatPayload } - if (request.stream && (!tools || tools.length === 0)) { + if (request.stream && (!tools || tools.length === 0 || !hasActiveTools)) { logger.info('Using streaming response for Sakana request (no tools)') const streamResponse = await sakana.chat.completions.create( { ...payload, stream: true, + stream_options: { include_usage: true }, }, request.abortSignal ? { signal: request.abortSignal } : undefined ) @@ -452,6 +453,7 @@ export const sakanaProvider: ProviderConfig = { messages: currentMessages, tool_choice: 'none', stream: true, + stream_options: { include_usage: true }, } if (deferResponseFormat && responseFormatPayload) { streamingPayload.response_format = responseFormatPayload From 2ff83bc17ac068cd0988989b00f145437667b801 Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 13:21:29 -0700 Subject: [PATCH 5/6] fix(providers): answer every Sakana tool_call to keep message history valid An assistant message lists all tool_calls, so a call for an unconfigured tool must still get a matching `tool` response or the next request violates the OpenAI message contract. Emit an error tool-result for unknown tools instead of dropping them. --- apps/sim/providers/sakana/index.ts | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/apps/sim/providers/sakana/index.ts b/apps/sim/providers/sakana/index.ts index 99ec3d6774..3988d3e96d 100644 --- a/apps/sim/providers/sakana/index.ts +++ b/apps/sim/providers/sakana/index.ts @@ -257,7 +257,25 @@ export const sakanaProvider: ProviderConfig = { const toolArgs = JSON.parse(toolCall.function.arguments) const tool = request.tools?.find((t) => t.id === toolName) - if (!tool) return null + // Every tool_call in the assistant message must be answered by a matching + // `tool` message, or the next request violates the OpenAI message contract. + // Emit an error result for an unknown tool rather than dropping it. + if (!tool) { + const toolCallEndTime = Date.now() + return { + toolCall, + toolName, + toolParams: {}, + result: { + success: false, + output: undefined, + error: `Tool "${toolName}" is not available`, + }, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallEndTime - toolCallStartTime, + } + } const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) const result = await executeTool(toolName, executionParams, { From ee72a69552e6f14ffd90130b7ffeb07923798bd7 Mon Sep 17 00:00:00 2001 From: waleed Date: Mon, 22 Jun 2026 13:34:28 -0700 Subject: [PATCH 6/6] test(session): de-flake SessionProvider normal-load test flush() only drained microtasks, so the query->render update occasionally lost the race and ctx.data was still null after the flush budget. Yield one macrotask tick per flush so React Query's notifyManager and deferred renders settle deterministically. Verified across repeated local runs. --- apps/sim/app/_shell/providers/session-provider.test.tsx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/sim/app/_shell/providers/session-provider.test.tsx b/apps/sim/app/_shell/providers/session-provider.test.tsx index 9e02703f05..4b0a2b68b8 100644 --- a/apps/sim/app/_shell/providers/session-provider.test.tsx +++ b/apps/sim/app/_shell/providers/session-provider.test.tsx @@ -107,12 +107,18 @@ function renderProvider(): Harness { } } -/** Flush pending microtasks inside an act() boundary. */ +/** + * Flush pending work inside an act() boundary. Drains the microtask queue and + * then yields one macrotask tick, so React Query's notifyManager (which can + * schedule observer notifications on a timer) and any deferred renders settle + * deterministically — microtask-only flushing raced the query→render update. + */ async function flush() { await act(async () => { await Promise.resolve() await Promise.resolve() await Promise.resolve() + await new Promise((resolve) => setTimeout(resolve, 0)) }) }