From 768ef9458a020d6cd052eda54fe3c5cbf5f789b6 Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 11 Jun 2026 19:18:58 -0700 Subject: [PATCH 1/4] fix(providers): correct pricing, deprecations, and capabilities across model catalog --- apps/sim/providers/models.ts | 372 ++++++++++++++++++------------- apps/sim/providers/utils.test.ts | 14 +- 2 files changed, 223 insertions(+), 163 deletions(-) diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 58dba7431b..b0caf31aff 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -208,7 +208,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.0, cachedInput: 0.5, output: 8.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -223,7 +223,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.4, cachedInput: 0.1, output: 1.6, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -238,7 +238,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.1, cachedInput: 0.025, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -296,7 +296,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 30.0, output: 180.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -313,7 +313,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.5, cachedInput: 0.25, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -333,7 +333,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.75, cachedInput: 0.075, output: 4.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -353,7 +353,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.2, cachedInput: 0.02, output: 1.25, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -374,7 +374,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 21.0, output: 168.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -391,7 +391,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.75, cachedInput: 0.175, output: 14.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -412,7 +412,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -432,7 +432,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 15.0, output: 120.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -441,7 +441,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 272000, }, contextWindow: 400000, - releaseDate: '2025-08-07', + releaseDate: '2025-10-06', }, { id: 'gpt-5', @@ -449,7 +449,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -469,7 +469,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.25, cachedInput: 0.025, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -489,7 +489,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.05, cachedInput: 0.005, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -509,7 +509,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -517,6 +517,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2025-08-07', + deprecated: true, }, // o-series reasoning models { @@ -525,7 +526,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.1, cachedInput: 0.275, output: 4.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -535,13 +536,14 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-04-16', + deprecated: true, }, { id: 'o3-pro', pricing: { input: 20.0, output: 80.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { maxOutputTokens: 100000, @@ -555,7 +557,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2, cachedInput: 0.5, output: 8, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -572,7 +574,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.1, cachedInput: 0.55, output: 4.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -589,7 +591,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 15.0, cachedInput: 7.5, output: 60, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -607,7 +609,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.5, cachedInput: 1.25, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -666,7 +668,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, }, contextWindow: 1000000, - releaseDate: '2026-05-27', + releaseDate: '2026-05-28', recommended: true, }, { @@ -694,7 +696,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 5.0, cachedInput: 0.5, output: 25.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -714,7 +716,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 3.0, cachedInput: 0.3, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -735,7 +737,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 5.0, cachedInput: 0.5, output: 25.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -755,11 +757,10 @@ export const PROVIDER_DEFINITIONS: Record = { input: 15.0, cachedInput: 1.5, output: 75.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, - nativeStructuredOutputs: true, maxOutputTokens: 32000, thinking: { levels: ['low', 'medium', 'high'], @@ -768,6 +769,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-08-05', + deprecated: true, }, { id: 'claude-opus-4-0', @@ -775,7 +777,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 15.0, cachedInput: 1.5, output: 75.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -787,6 +789,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-05-22', + deprecated: true, }, { id: 'claude-sonnet-4-5', @@ -794,7 +797,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 3.0, cachedInput: 0.3, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -805,7 +808,7 @@ export const PROVIDER_DEFINITIONS: Record = { default: 'high', }, }, - contextWindow: 1000000, + contextWindow: 200000, releaseDate: '2025-09-29', }, { @@ -814,7 +817,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 3.0, cachedInput: 0.3, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -824,8 +827,9 @@ export const PROVIDER_DEFINITIONS: Record = { default: 'high', }, }, - contextWindow: 1000000, + contextWindow: 200000, releaseDate: '2025-05-22', + deprecated: true, }, { id: 'claude-haiku-4-5', @@ -833,7 +837,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.0, cachedInput: 0.1, output: 5.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -884,13 +888,13 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.5, cachedInput: 1.25, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, }, contextWindow: 128000, - releaseDate: '2024-05-13', + releaseDate: '2024-11-20', }, { id: 'azure/gpt-5.4', @@ -898,11 +902,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.5, cachedInput: 0.25, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high', 'xhigh'], + values: ['none', 'low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -918,11 +922,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.75, cachedInput: 0.075, output: 4.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high', 'xhigh'], + values: ['none', 'low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -938,11 +942,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.2, cachedInput: 0.02, output: 1.25, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high', 'xhigh'], + values: ['none', 'low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -958,11 +962,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.75, cachedInput: 0.175, output: 14.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high', 'xhigh'], + values: ['none', 'low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -978,7 +982,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -998,7 +1002,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1011,6 +1015,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 400000, releaseDate: '2025-11-12', + deprecated: true, }, { id: 'azure/gpt-5', @@ -1018,7 +1023,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1038,7 +1043,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.25, cachedInput: 0.025, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1058,7 +1063,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.05, cachedInput: 0.005, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1073,15 +1078,16 @@ export const PROVIDER_DEFINITIONS: Record = { releaseDate: '2025-08-07', }, { - id: 'azure/gpt-5-chat-latest', + id: 'azure/gpt-5-chat', pricing: { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, + maxOutputTokens: 16384, }, contextWindow: 128000, releaseDate: '2025-08-07', @@ -1092,7 +1098,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2, cachedInput: 0.5, output: 8, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1109,7 +1115,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.1, cachedInput: 0.275, output: 4.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { reasoningEffort: { @@ -1126,7 +1132,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.0, cachedInput: 0.5, output: 8.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1141,7 +1147,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.4, cachedInput: 0.1, output: 1.6, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1156,7 +1162,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.1, cachedInput: 0.025, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1175,7 +1181,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: {}, contextWindow: 200000, - releaseDate: '2025-04-14', + releaseDate: '2025-05-19', }, ], }, @@ -1197,7 +1203,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 5.0, cachedInput: 0.5, output: 25.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -1208,7 +1214,7 @@ export const PROVIDER_DEFINITIONS: Record = { default: 'high', }, }, - contextWindow: 200000, + contextWindow: 1000000, releaseDate: '2026-02-05', }, { @@ -1217,7 +1223,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 5.0, cachedInput: 0.5, output: 25.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -1237,7 +1243,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 3.0, cachedInput: 0.3, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -1257,7 +1263,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 15.0, cachedInput: 1.5, output: 75.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -1270,6 +1276,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-08-05', + deprecated: true, }, { id: 'azure-anthropic/claude-haiku-4-5', @@ -1277,7 +1284,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.0, cachedInput: 0.1, output: 5.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -1331,12 +1338,12 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.0, cachedInput: 0.2, output: 12.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, thinking: { - levels: ['minimal', 'low', 'medium', 'high'], + levels: ['low', 'medium', 'high'], default: 'high', }, maxOutputTokens: 65536, @@ -1345,12 +1352,12 @@ export const PROVIDER_DEFINITIONS: Record = { releaseDate: '2026-02-19', }, { - id: 'gemini-3.1-flash-lite-preview', + id: 'gemini-3.1-flash-lite', pricing: { input: 0.25, cachedInput: 0.025, output: 1.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1369,7 +1376,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.5, cachedInput: 0.05, output: 3.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1379,7 +1386,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, maxOutputTokens: 65536, }, - contextWindow: 1000000, + contextWindow: 1048576, releaseDate: '2025-12-17', }, { @@ -1388,7 +1395,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1403,7 +1410,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.3, cachedInput: 0.03, output: 2.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1418,7 +1425,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.1, cachedInput: 0.01, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1434,7 +1441,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.1, cachedInput: 0.025, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1442,13 +1449,14 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2025-02-05', + deprecated: true, }, { id: 'gemini-2.0-flash-lite', pricing: { input: 0.075, output: 0.3, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1456,20 +1464,22 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2025-02-25', + deprecated: true, }, { id: 'deep-research-pro-preview-12-2025', pricing: { input: 2.0, - output: 2.0, - updatedAt: '2026-04-01', + cachedInput: 0.2, + output: 12.0, + updatedAt: '2026-06-11', }, capabilities: { deepResearch: true, memory: false, maxOutputTokens: 65536, }, - contextWindow: 1000000, + contextWindow: 1048576, releaseDate: '2025-12-11', }, ], @@ -1500,6 +1510,7 @@ export const PROVIDER_DEFINITIONS: Record = { levels: ['minimal', 'low', 'medium', 'high'], default: 'medium', }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2026-05-19', @@ -1510,25 +1521,26 @@ export const PROVIDER_DEFINITIONS: Record = { input: 2.0, cachedInput: 0.2, output: 12.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, thinking: { - levels: ['minimal', 'low', 'medium', 'high'], + levels: ['low', 'medium', 'high'], default: 'high', }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2026-02-19', }, { - id: 'vertex/gemini-3.1-flash-lite-preview', + id: 'vertex/gemini-3.1-flash-lite', pricing: { input: 0.25, cachedInput: 0.025, output: 1.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1536,6 +1548,7 @@ export const PROVIDER_DEFINITIONS: Record = { levels: ['minimal', 'low', 'medium', 'high'], default: 'minimal', }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2026-03-03', @@ -1557,6 +1570,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1000000, releaseDate: '2025-11-18', + deprecated: true, }, { id: 'vertex/gemini-3-flash-preview', @@ -1564,7 +1578,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.5, cachedInput: 0.05, output: 3.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1572,8 +1586,9 @@ export const PROVIDER_DEFINITIONS: Record = { levels: ['minimal', 'low', 'medium', 'high'], default: 'high', }, + maxOutputTokens: 65536, }, - contextWindow: 1000000, + contextWindow: 1048576, releaseDate: '2025-12-17', }, { @@ -1582,10 +1597,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 1.25, cachedInput: 0.125, output: 10.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2025-03-25', @@ -1596,10 +1612,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.3, cachedInput: 0.03, output: 2.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2025-05-20', @@ -1610,10 +1627,11 @@ export const PROVIDER_DEFINITIONS: Record = { input: 0.1, cachedInput: 0.01, output: 0.4, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, + maxOutputTokens: 65536, }, contextWindow: 1048576, releaseDate: '2025-06-17', @@ -1631,6 +1649,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2025-02-05', + deprecated: true, }, { id: 'vertex/gemini-2.0-flash-lite', @@ -1644,19 +1663,22 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2025-02-25', + deprecated: true, }, { id: 'vertex/deep-research-pro-preview-12-2025', pricing: { input: 2.0, - output: 2.0, - updatedAt: '2026-04-01', + cachedInput: 0.2, + output: 12.0, + updatedAt: '2026-06-11', }, capabilities: { deepResearch: true, memory: false, + maxOutputTokens: 65536, }, - contextWindow: 1000000, + contextWindow: 1048576, releaseDate: '2025-12-11', }, ], @@ -1676,13 +1698,13 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'deepseek-chat', pricing: { - input: 0.28, - cachedInput: 0.028, - output: 0.42, - updatedAt: '2026-04-01', + input: 0.14, + cachedInput: 0.0028, + output: 0.28, + updatedAt: '2026-06-11', }, capabilities: {}, - contextWindow: 128000, + contextWindow: 1000000, releaseDate: '2024-12-26', }, { @@ -1698,6 +1720,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-12-26', + deprecated: true, }, { id: 'deepseek-r1', @@ -1710,17 +1733,18 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: {}, contextWindow: 128000, releaseDate: '2025-01-20', + deprecated: true, }, { id: 'deepseek-reasoner', pricing: { - input: 0.28, - cachedInput: 0.028, - output: 0.42, - updatedAt: '2026-04-01', + input: 0.14, + cachedInput: 0.0028, + output: 0.28, + updatedAt: '2026-06-11', }, capabilities: {}, - contextWindow: 128000, + contextWindow: 1000000, releaseDate: '2025-01-20', }, ], @@ -1729,7 +1753,7 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'xai', name: 'xAI', description: "xAI's Grok models", - defaultModel: 'grok-4-latest', + defaultModel: 'grok-4.3', modelPatterns: [/^grok/], icon: xAIIcon, color: '#555555', @@ -1765,6 +1789,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 256000, releaseDate: '2025-07-09', + deprecated: true, }, { id: 'grok-4-0709', @@ -1779,6 +1804,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 256000, releaseDate: '2025-07-09', + deprecated: true, }, { id: 'grok-4-1-fast-reasoning', @@ -1793,6 +1819,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 2000000, releaseDate: '2025-11-19', + deprecated: true, }, { id: 'grok-4-1-fast-non-reasoning', @@ -1807,6 +1834,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 2000000, releaseDate: '2025-11-19', + deprecated: true, }, { id: 'grok-4-fast-reasoning', @@ -1821,6 +1849,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 2000000, releaseDate: '2025-09-19', + deprecated: true, }, { id: 'grok-4-fast-non-reasoning', @@ -1835,6 +1864,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 2000000, releaseDate: '2025-09-19', + deprecated: true, }, { id: 'grok-code-fast-1', @@ -1849,47 +1879,48 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 256000, releaseDate: '2025-08-28', + deprecated: true, }, { id: 'grok-4.20-0309-reasoning', pricing: { - input: 2.0, + input: 1.25, cachedInput: 0.2, - output: 6.0, - updatedAt: '2026-04-01', + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2026-03-10', }, { id: 'grok-4.20-0309-non-reasoning', pricing: { - input: 2.0, + input: 1.25, cachedInput: 0.2, - output: 6.0, - updatedAt: '2026-04-01', + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2026-03-10', }, { id: 'grok-4.20-multi-agent-0309', pricing: { - input: 2.0, + input: 1.25, cachedInput: 0.2, - output: 6.0, - updatedAt: '2026-04-01', + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2026-03-10', }, { @@ -1905,6 +1936,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 131072, releaseDate: '2025-02-17', + deprecated: true, }, { id: 'grok-3-fast-latest', @@ -1919,6 +1951,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 131072, releaseDate: '2025-02-17', + deprecated: true, }, ], }, @@ -1939,7 +1972,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.35, output: 0.75, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -1955,6 +1988,7 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: {}, contextWindow: 32768, releaseDate: '2024-08-27', + deprecated: true, }, { id: 'cerebras/qwen-3-235b-a22b-instruct-2507', @@ -1966,13 +2000,14 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: {}, contextWindow: 131072, releaseDate: '2025-07-29', + deprecated: true, }, { id: 'cerebras/zai-glm-4.7', pricing: { input: 2.25, output: 2.75, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -1996,8 +2031,9 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'groq/openai/gpt-oss-120b', pricing: { input: 0.15, + cachedInput: 0.075, output: 0.6, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2007,8 +2043,9 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'groq/openai/gpt-oss-20b', pricing: { input: 0.075, + cachedInput: 0.0375, output: 0.3, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2018,8 +2055,9 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'groq/openai/gpt-oss-safeguard-20b', pricing: { input: 0.075, + cachedInput: 0.0375, output: 0.3, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2030,7 +2068,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.29, output: 0.59, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2041,7 +2079,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.05, output: 0.08, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2052,7 +2090,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.59, output: 0.79, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2063,7 +2101,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.11, output: 0.34, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: {}, contextWindow: 131072, @@ -2079,6 +2117,7 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: {}, contextWindow: 262144, releaseDate: '2025-09-05', + deprecated: true, }, ], }, @@ -2106,7 +2145,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.5, output: 1.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2119,7 +2158,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.5, output: 1.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2132,7 +2171,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.15, output: 0.6, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2145,7 +2184,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.4, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2165,32 +2204,33 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-11-18', + deprecated: true, }, { id: 'magistral-medium-latest', pricing: { input: 2.0, output: 5.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, - releaseDate: '2025-06-10', + releaseDate: '2025-09-18', }, { id: 'magistral-medium-2509', pricing: { input: 2.0, output: 5.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, - releaseDate: '2025-09-17', + releaseDate: '2025-09-18', }, { id: 'magistral-small-latest', @@ -2203,7 +2243,8 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, - releaseDate: '2025-06-10', + releaseDate: '2025-09-18', + deprecated: true, }, { id: 'magistral-small-2509', @@ -2216,14 +2257,15 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, - releaseDate: '2025-09-17', + releaseDate: '2025-09-18', + deprecated: true, }, { id: 'mistral-medium-latest', pricing: { input: 0.4, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2236,7 +2278,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.4, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2249,7 +2291,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.4, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2262,7 +2304,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.15, output: 0.6, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2282,13 +2324,14 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2025-06-20', + deprecated: true, }, { id: 'open-mistral-nemo', pricing: { input: 0.15, output: 0.15, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2301,7 +2344,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.3, output: 0.9, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2314,7 +2357,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.3, output: 0.9, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2327,13 +2370,13 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.4, output: 2.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 256000, - releaseDate: '2025-05-21', + releaseDate: '2025-12-09', }, { id: 'devstral-small-latest', @@ -2346,7 +2389,8 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 256000, - releaseDate: '2025-07-10', + releaseDate: '2025-12-09', + deprecated: true, }, { id: 'devstral-small-2507', @@ -2360,6 +2404,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2025-07-10', + deprecated: true, }, { id: 'devstral-medium-2507', @@ -2373,13 +2418,14 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2025-07-10', + deprecated: true, }, { id: 'ministral-14b-latest', pricing: { input: 0.2, output: 0.2, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2392,7 +2438,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.2, output: 0.2, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2405,20 +2451,20 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.15, output: 0.15, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 256000, - releaseDate: '2024-10-16', + releaseDate: '2025-12-02', }, { id: 'ministral-8b-2512', pricing: { input: 0.15, output: 0.15, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2431,20 +2477,20 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.1, output: 0.1, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 256000, - releaseDate: '2024-10-16', + releaseDate: '2025-12-02', }, { id: 'ministral-3b-2512', pricing: { input: 0.1, output: 0.1, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2536,7 +2582,7 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: 64000, + maxOutputTokens: 32768, }, contextWindow: 200000, releaseDate: '2025-08-05', @@ -2553,6 +2599,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1000000, releaseDate: '2025-12-02', + deprecated: true, }, { id: 'bedrock/amazon.nova-2-lite-v1:0', @@ -2571,14 +2618,15 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/amazon.nova-premier-v1:0', pricing: { input: 2.5, - output: 10.0, - updatedAt: '2026-04-01', + output: 12.5, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, }, contextWindow: 1000000, releaseDate: '2025-04-30', + deprecated: true, }, { id: 'bedrock/amazon.nova-pro-v1:0', @@ -2642,7 +2690,7 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, }, - contextWindow: 3500000, + contextWindow: 10000000, releaseDate: '2025-04-05', }, { @@ -2670,6 +2718,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-09-25', + deprecated: true, }, { id: 'bedrock/meta.llama3-2-11b-instruct-v1:0', @@ -2683,6 +2732,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-09-25', + deprecated: true, }, { id: 'bedrock/meta.llama3-2-3b-instruct-v1:0', @@ -2696,6 +2746,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-09-25', + deprecated: true, }, { id: 'bedrock/meta.llama3-2-1b-instruct-v1:0', @@ -2709,6 +2760,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 128000, releaseDate: '2024-09-25', + deprecated: true, }, { id: 'bedrock/meta.llama3-1-405b-instruct-v1:0', @@ -2721,6 +2773,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, + deprecated: true, }, { id: 'bedrock/meta.llama3-1-70b-instruct-v1:0', @@ -2749,15 +2802,15 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/mistral.mistral-large-3-675b-instruct', pricing: { - input: 2.0, - output: 6.0, - updatedAt: '2026-04-01', + input: 0.5, + output: 1.5, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, maxOutputTokens: 32768, }, - contextWindow: 128000, + contextWindow: 256000, }, { id: 'bedrock/mistral.mistral-large-2411-v1:0', @@ -2770,6 +2823,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, + deprecated: true, }, { id: 'bedrock/mistral.mistral-large-2407-v1:0', @@ -2782,6 +2836,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, + deprecated: true, }, { id: 'bedrock/mistral.pixtral-large-2502-v1:0', @@ -2871,6 +2926,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 32000, + deprecated: true, }, { id: 'bedrock/cohere.command-r-plus-v1:0', @@ -2883,6 +2939,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, + deprecated: true, }, { id: 'bedrock/cohere.command-r-v1:0', @@ -2895,6 +2952,7 @@ export const PROVIDER_DEFINITIONS: Record = { temperature: { min: 0, max: 1 }, }, contextWindow: 128000, + deprecated: true, }, ], }, diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts index be01f0946c..f4612ccae8 100644 --- a/apps/sim/providers/utils.test.ts +++ b/apps/sim/providers/utils.test.ts @@ -193,7 +193,7 @@ describe('Model Capabilities', () => { 'gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-5-chat-latest', - 'azure/gpt-5-chat-latest', + 'azure/gpt-5-chat', 'gemini-2.5-flash', 'claude-sonnet-4-0', 'claude-opus-4-0', @@ -258,7 +258,7 @@ describe('Model Capabilities', () => { 'gpt-4o', 'azure/gpt-4o', 'gpt-5-chat-latest', - 'azure/gpt-5-chat-latest', + 'azure/gpt-5-chat', 'gemini-2.5-pro', 'gemini-2.5-flash', 'deepseek-v3', @@ -481,7 +481,7 @@ describe('Model Capabilities', () => { expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o4-mini') expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-5-chat-latest') - expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat-latest') + expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat') expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-4o') expect(MODELS_WITH_REASONING_EFFORT).not.toContain('claude-sonnet-4-0') @@ -506,7 +506,7 @@ describe('Model Capabilities', () => { expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.2') expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5-chat-latest') - expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat-latest') + expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat') expect(MODELS_WITH_VERBOSITY).not.toContain('o1') expect(MODELS_WITH_VERBOSITY).not.toContain('o3') @@ -603,7 +603,9 @@ describe('Model Capabilities', () => { const values = getReasoningEffortValuesForModel('azure/gpt-5.2') expect(values).toBeDefined() expect(values).not.toContain('minimal') - expect(values).toContain('xhigh') + expect(values).toContain('none') + expect(values).toContain('high') + expect(values).not.toContain('xhigh') }) }) @@ -713,7 +715,7 @@ describe('Max Output Tokens', () => { it.concurrent('should return published max for Bedrock Claude Opus 4.1', () => { expect(getMaxOutputTokensForModel('bedrock/anthropic.claude-opus-4-1-20250805-v1:0')).toBe( - 64000 + 32768 ) }) From 47e8baf347df6f040b24500a42530b44d85b208e Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 11 Jun 2026 19:56:33 -0700 Subject: [PATCH 2/4] fix(providers): apply full re-validation pass across model catalog with per-provider justification docs --- apps/sim/app/(landing)/models/utils.test.ts | 2 +- apps/sim/blocks/blocks/agent.ts | 22 ++ apps/sim/providers/bedrock/utils.test.ts | 46 +++ apps/sim/providers/bedrock/utils.ts | 27 +- apps/sim/providers/models.ts | 417 +++++++++++--------- apps/sim/providers/utils.test.ts | 39 +- apps/sim/providers/utils.ts | 8 +- docs/models/anthropic.md | 232 +++++++++++ docs/models/azure.md | 258 ++++++++++++ docs/models/bedrock.md | 226 +++++++++++ docs/models/deepseek-cerebras.md | 189 +++++++++ docs/models/embeddings-rerank-dynamic.md | 75 ++++ docs/models/google.md | 184 +++++++++ docs/models/groq.md | 157 ++++++++ docs/models/mistral.md | 305 ++++++++++++++ docs/models/openai.md | 338 ++++++++++++++++ docs/models/vertex.md | 212 ++++++++++ docs/models/xai.md | 91 +++++ 18 files changed, 2629 insertions(+), 199 deletions(-) create mode 100644 apps/sim/providers/bedrock/utils.test.ts create mode 100644 docs/models/anthropic.md create mode 100644 docs/models/azure.md create mode 100644 docs/models/bedrock.md create mode 100644 docs/models/deepseek-cerebras.md create mode 100644 docs/models/embeddings-rerank-dynamic.md create mode 100644 docs/models/google.md create mode 100644 docs/models/groq.md create mode 100644 docs/models/mistral.md create mode 100644 docs/models/openai.md create mode 100644 docs/models/vertex.md create mode 100644 docs/models/xai.md diff --git a/apps/sim/app/(landing)/models/utils.test.ts b/apps/sim/app/(landing)/models/utils.test.ts index 894c74500c..05c8d88cca 100644 --- a/apps/sim/app/(landing)/models/utils.test.ts +++ b/apps/sim/app/(landing)/models/utils.test.ts @@ -38,7 +38,7 @@ describe('model catalog capability facts', () => { it.concurrent('keeps best-for copy for clearly differentiated models only', () => { const researchModel = getModelBySlug('google', 'deep-research-pro-preview-12-2025') - const generalModel = getModelBySlug('xai', 'grok-4-latest') + const generalModel = getModelBySlug('mistral', 'mistral-medium-latest') expect(researchModel).not.toBeNull() expect(generalModel).not.toBeNull() diff --git a/apps/sim/blocks/blocks/agent.ts b/apps/sim/blocks/blocks/agent.ts index bb106f9fdc..afa55b44db 100644 --- a/apps/sim/blocks/blocks/agent.ts +++ b/apps/sim/blocks/blocks/agent.ts @@ -412,6 +412,28 @@ Return ONLY the JSON array.`, })(), }), }, + { + id: 'temperature', + title: 'Temperature', + type: 'slider', + min: 0, + max: 1.5, + defaultValue: 0.3, + mode: 'advanced', + condition: () => ({ + field: 'model', + value: (() => { + const deepResearch = new Set(MODELS_WITH_DEEP_RESEARCH.map((m) => m.toLowerCase())) + const allModels = Object.keys(getBaseModelProviders()) + return allModels.filter( + (model) => + supportsTemperature(model) && + getMaxTemperature(model) === 1.5 && + !deepResearch.has(model.toLowerCase()) + ) + })(), + }), + }, { id: 'temperature', title: 'Temperature', diff --git a/apps/sim/providers/bedrock/utils.test.ts b/apps/sim/providers/bedrock/utils.test.ts new file mode 100644 index 0000000000..a667d61412 --- /dev/null +++ b/apps/sim/providers/bedrock/utils.test.ts @@ -0,0 +1,46 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { getBedrockInferenceProfileId } from '@/providers/bedrock/utils' + +describe('getBedrockInferenceProfileId', () => { + it.concurrent('prefixes geo inference profile for models that require it', () => { + expect( + getBedrockInferenceProfileId('bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0', 'us-east-1') + ).toBe('us.anthropic.claude-sonnet-4-5-20250929-v1:0') + expect(getBedrockInferenceProfileId('bedrock/amazon.nova-pro-v1:0', 'eu-west-1')).toBe( + 'eu.amazon.nova-pro-v1:0' + ) + expect( + getBedrockInferenceProfileId('bedrock/meta.llama4-scout-17b-instruct-v1:0', 'us-west-2') + ).toBe('us.meta.llama4-scout-17b-instruct-v1:0') + }) + + it.concurrent('returns already-prefixed inference profile IDs unchanged', () => { + expect( + getBedrockInferenceProfileId('us.anthropic.claude-sonnet-4-5-20250929-v1:0', 'us-east-1') + ).toBe('us.anthropic.claude-sonnet-4-5-20250929-v1:0') + expect(getBedrockInferenceProfileId('global.amazon.nova-2-lite-v1:0', 'us-east-1')).toBe( + 'global.amazon.nova-2-lite-v1:0' + ) + }) + + it.concurrent('returns the bare model ID for models without geo profile support', () => { + expect( + getBedrockInferenceProfileId('bedrock/mistral.mistral-large-3-675b-instruct', 'us-east-1') + ).toBe('mistral.mistral-large-3-675b-instruct') + expect( + getBedrockInferenceProfileId('bedrock/mistral.ministral-3-8b-instruct', 'eu-west-1') + ).toBe('mistral.ministral-3-8b-instruct') + expect(getBedrockInferenceProfileId('bedrock/cohere.command-r-plus-v1:0', 'us-east-1')).toBe( + 'cohere.command-r-plus-v1:0' + ) + expect( + getBedrockInferenceProfileId('bedrock/mistral.mixtral-8x7b-instruct-v0:1', 'ap-southeast-1') + ).toBe('mistral.mixtral-8x7b-instruct-v0:1') + expect( + getBedrockInferenceProfileId('bedrock/amazon.titan-text-premier-v1:0', 'us-east-1') + ).toBe('amazon.titan-text-premier-v1:0') + }) +}) diff --git a/apps/sim/providers/bedrock/utils.ts b/apps/sim/providers/bedrock/utils.ts index 401c264c0c..a385ffd053 100644 --- a/apps/sim/providers/bedrock/utils.ts +++ b/apps/sim/providers/bedrock/utils.ts @@ -81,10 +81,31 @@ export function generateToolUseId(toolName: string): string { return `${truncatedName}${suffix}` } +/** + * Models whose AWS model cards state geo/cross-region inference profiles are + * not supported ("Geo inference ID: Not supported"). These must be invoked + * with the bare in-region model ID — prefixing them with a geo profile + * (e.g. us.mistral...) produces an invalid model identifier. + */ +const GEO_PROFILE_UNSUPPORTED_MODEL_IDS = new Set([ + 'mistral.mistral-large-3-675b-instruct', + 'mistral.mistral-large-2411-v1:0', + 'mistral.mistral-large-2407-v1:0', + 'mistral.magistral-small-2509', + 'mistral.ministral-3-14b-instruct', + 'mistral.ministral-3-8b-instruct', + 'mistral.ministral-3-3b-instruct', + 'mistral.mixtral-8x7b-instruct-v0:1', + 'amazon.titan-text-premier-v1:0', + 'cohere.command-r-v1:0', + 'cohere.command-r-plus-v1:0', +]) + /** * Converts a model ID to the Bedrock inference profile format. * AWS Bedrock requires inference profile IDs (e.g., us.anthropic.claude-...) - * for on-demand invocation of newer models. + * for on-demand invocation of newer models, while some models only accept + * the bare in-region model ID. * * @param modelId - The model ID (e.g., "bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0") * @param region - The AWS region (e.g., "us-east-1") @@ -97,6 +118,10 @@ export function getBedrockInferenceProfileId(modelId: string, region: string): s return baseModelId } + if (GEO_PROFILE_UNSUPPORTED_MODEL_IDS.has(baseModelId)) { + return baseModelId + } + let inferencePrefix: string if (region.startsWith('us-gov-')) { inferencePrefix = 'us-gov' diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index b0caf31aff..988074e6a6 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -110,7 +110,7 @@ export const PROVIDER_DEFINITIONS: Record = { color: '#EF2CC1', isReseller: true, capabilities: { - temperature: { min: 0, max: 2 }, + temperature: { min: 0, max: 1 }, toolUsageControl: true, }, contextInformationAvailable: false, @@ -157,7 +157,7 @@ export const PROVIDER_DEFINITIONS: Record = { isReseller: true, capabilities: { temperature: { min: 0, max: 2 }, - toolUsageControl: true, + toolUsageControl: false, }, contextInformationAvailable: false, models: [], @@ -246,6 +246,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1047576, releaseDate: '2025-04-14', + deprecated: true, }, // GPT-5.5 family { @@ -253,15 +254,12 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 30.0, output: 180.0, - updatedAt: '2026-04-23', + updatedAt: '2026-06-11', }, capabilities: { nativeStructuredOutputs: true, reasoningEffort: { - values: ['none', 'low', 'medium', 'high', 'xhigh'], - }, - verbosity: { - values: ['low', 'medium', 'high'], + values: ['medium', 'high', 'xhigh'], }, maxOutputTokens: 128000, }, @@ -274,7 +272,7 @@ export const PROVIDER_DEFINITIONS: Record = { input: 5.0, cachedInput: 0.5, output: 30.0, - updatedAt: '2026-04-23', + updatedAt: '2026-06-11', }, capabilities: { nativeStructuredOutputs: true, @@ -424,7 +422,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 128000, }, contextWindow: 400000, - releaseDate: '2025-11-12', + releaseDate: '2025-11-13', }, // GPT-5 family { @@ -567,6 +565,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-04-16', + deprecated: true, }, { id: 'o3-mini', @@ -584,6 +583,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-01-31', + deprecated: true, }, { id: 'o1', @@ -600,7 +600,8 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 100000, }, contextWindow: 200000, - releaseDate: '2024-12-05', + releaseDate: '2024-12-17', + deprecated: true, }, // Legacy { @@ -642,6 +643,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-09', }, capabilities: { + nativeStructuredOutputs: true, maxOutputTokens: 128000, thinking: { levels: ['low', 'medium', 'high', 'xhigh', 'max'], @@ -865,7 +867,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 4096, }, contextWindow: 200000, - releaseDate: '2024-03-07', + releaseDate: '2024-03-13', deprecated: true, }, ], @@ -892,9 +894,11 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { temperature: { min: 0, max: 2 }, + maxOutputTokens: 16384, }, contextWindow: 128000, releaseDate: '2024-11-20', + deprecated: true, }, { id: 'azure/gpt-5.4', @@ -906,7 +910,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high'], + values: ['low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -926,7 +930,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high'], + values: ['low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -946,7 +950,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { reasoningEffort: { - values: ['none', 'low', 'medium', 'high'], + values: ['low', 'medium', 'high'], }, verbosity: { values: ['low', 'medium', 'high'], @@ -994,7 +998,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 128000, }, contextWindow: 400000, - releaseDate: '2025-11-12', + releaseDate: '2025-11-13', }, { id: 'azure/gpt-5.1-codex', @@ -1014,8 +1018,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 128000, }, contextWindow: 400000, - releaseDate: '2025-11-12', - deprecated: true, + releaseDate: '2025-11-13', }, { id: 'azure/gpt-5', @@ -1267,7 +1270,6 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { temperature: { min: 0, max: 1 }, - nativeStructuredOutputs: true, maxOutputTokens: 32000, thinking: { levels: ['low', 'medium', 'high'], @@ -1368,7 +1370,8 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 65536, }, contextWindow: 1048576, - releaseDate: '2026-03-03', + releaseDate: '2026-05-07', + speedOptimized: true, }, { id: 'gemini-3-flash-preview', @@ -1388,6 +1391,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2025-12-17', + deprecated: true, }, { id: 'gemini-2.5-pro', @@ -1514,6 +1518,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 1048576, releaseDate: '2026-05-19', + recommended: true, }, { id: 'vertex/gemini-3.1-pro-preview', @@ -1551,7 +1556,8 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 65536, }, contextWindow: 1048576, - releaseDate: '2026-03-03', + releaseDate: '2026-05-07', + speedOptimized: true, }, { id: 'vertex/gemini-3-pro-preview', @@ -1601,7 +1607,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { temperature: { min: 0, max: 2 }, - maxOutputTokens: 65536, + maxOutputTokens: 65535, }, contextWindow: 1048576, releaseDate: '2025-03-25', @@ -1616,7 +1622,7 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { temperature: { min: 0, max: 2 }, - maxOutputTokens: 65536, + maxOutputTokens: 65535, }, contextWindow: 1048576, releaseDate: '2025-05-20', @@ -1631,18 +1637,19 @@ export const PROVIDER_DEFINITIONS: Record = { }, capabilities: { temperature: { min: 0, max: 2 }, - maxOutputTokens: 65536, + maxOutputTokens: 65535, }, contextWindow: 1048576, releaseDate: '2025-06-17', + speedOptimized: true, }, { id: 'vertex/gemini-2.0-flash', pricing: { - input: 0.1, + input: 0.15, cachedInput: 0.025, - output: 0.4, - updatedAt: '2026-04-01', + output: 0.6, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 2 }, @@ -1703,7 +1710,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.28, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + temperature: { min: 0, max: 2 }, + }, contextWindow: 1000000, releaseDate: '2024-12-26', }, @@ -1770,7 +1779,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-05-05', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, contextWindow: 1000000, releaseDate: '2026-04-30', @@ -1779,103 +1788,103 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'grok-4-latest', pricing: { - input: 3.0, - cachedInput: 0.75, - output: 15.0, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 256000, + contextWindow: 1000000, releaseDate: '2025-07-09', deprecated: true, }, { id: 'grok-4-0709', pricing: { - input: 3.0, - cachedInput: 0.75, - output: 15.0, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 256000, + contextWindow: 1000000, releaseDate: '2025-07-09', deprecated: true, }, { id: 'grok-4-1-fast-reasoning', pricing: { - input: 0.2, - cachedInput: 0.05, - output: 0.5, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2025-11-19', deprecated: true, }, { id: 'grok-4-1-fast-non-reasoning', pricing: { - input: 0.2, - cachedInput: 0.05, - output: 0.5, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2025-11-19', deprecated: true, }, { id: 'grok-4-fast-reasoning', pricing: { - input: 0.2, - cachedInput: 0.05, - output: 0.5, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2025-09-19', deprecated: true, }, { id: 'grok-4-fast-non-reasoning', pricing: { - input: 0.2, - cachedInput: 0.05, - output: 0.5, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 2000000, + contextWindow: 1000000, releaseDate: '2025-09-19', deprecated: true, }, { id: 'grok-code-fast-1', pricing: { - input: 0.2, - cachedInput: 0.02, - output: 1.5, - updatedAt: '2026-04-01', + input: 1.0, + cachedInput: 0.2, + output: 2.0, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, contextWindow: 256000, releaseDate: '2025-08-28', @@ -1890,7 +1899,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, contextWindow: 1000000, releaseDate: '2026-03-10', @@ -1904,7 +1913,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, contextWindow: 1000000, releaseDate: '2026-03-10', @@ -1918,7 +1927,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, contextWindow: 1000000, releaseDate: '2026-03-10', @@ -1926,30 +1935,30 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'grok-3-latest', pricing: { - input: 3.0, - cachedInput: 0.75, - output: 15.0, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 131072, + contextWindow: 1000000, releaseDate: '2025-02-17', deprecated: true, }, { id: 'grok-3-fast-latest', pricing: { - input: 5.0, - cachedInput: 0.75, - output: 25.0, - updatedAt: '2026-04-01', + input: 1.25, + cachedInput: 0.2, + output: 2.5, + updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 2 }, }, - contextWindow: 131072, + contextWindow: 1000000, releaseDate: '2025-02-17', deprecated: true, }, @@ -1974,7 +1983,10 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.75, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + temperature: { min: 0, max: 2 }, + maxOutputTokens: 40000, + }, contextWindow: 131072, releaseDate: '2025-08-05', }, @@ -2009,7 +2021,10 @@ export const PROVIDER_DEFINITIONS: Record = { output: 2.75, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + temperature: { min: 0, max: 2 }, + maxOutputTokens: 40000, + }, contextWindow: 131072, releaseDate: '2025-12-22', }, @@ -2024,6 +2039,7 @@ export const PROVIDER_DEFINITIONS: Record = { icon: GroqIcon, color: '#F55036', capabilities: { + temperature: { min: 0, max: 2 }, toolUsageControl: true, }, models: [ @@ -2035,9 +2051,12 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.6, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 65536, + }, contextWindow: 131072, releaseDate: '2025-08-05', + recommended: true, }, { id: 'groq/openai/gpt-oss-20b', @@ -2047,7 +2066,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.3, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 65536, + }, contextWindow: 131072, releaseDate: '2025-08-05', }, @@ -2059,7 +2080,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.3, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 65536, + }, contextWindow: 131072, releaseDate: '2025-10-29', }, @@ -2070,7 +2093,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.59, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 40960, + }, contextWindow: 131072, releaseDate: '2025-04-29', }, @@ -2081,9 +2106,12 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.08, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 131072, + }, contextWindow: 131072, releaseDate: '2024-07-23', + speedOptimized: true, }, { id: 'groq/llama-3.3-70b-versatile', @@ -2092,7 +2120,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.79, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 32768, + }, contextWindow: 131072, releaseDate: '2024-12-06', }, @@ -2103,7 +2133,9 @@ export const PROVIDER_DEFINITIONS: Record = { output: 0.34, updatedAt: '2026-06-11', }, - capabilities: {}, + capabilities: { + maxOutputTokens: 8192, + }, contextWindow: 131072, releaseDate: '2025-04-05', }, @@ -2148,10 +2180,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + recommended: true, }, { id: 'mistral-large-2512', @@ -2161,7 +2194,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', @@ -2174,7 +2207,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2026-03-16', @@ -2187,7 +2220,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-09', @@ -2200,7 +2233,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2024-11-18', @@ -2214,7 +2247,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-09-18', @@ -2227,7 +2260,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-09-18', @@ -2240,7 +2273,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-09-18', @@ -2254,7 +2287,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-09-18', @@ -2268,7 +2301,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-08-12', @@ -2281,7 +2314,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-08-12', @@ -2294,7 +2327,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-05-07', @@ -2307,7 +2340,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2026-03-16', @@ -2320,7 +2353,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-06-20', @@ -2334,7 +2367,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2024-07-18', @@ -2347,7 +2380,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-07-30', @@ -2360,7 +2393,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-07-30', @@ -2373,7 +2406,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-09', @@ -2386,7 +2419,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-09', @@ -2400,7 +2433,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-07-10', @@ -2414,7 +2447,7 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-04-01', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 128000, releaseDate: '2025-07-10', @@ -2428,10 +2461,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, { id: 'ministral-14b-2512', @@ -2441,10 +2475,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, { id: 'ministral-8b-latest', @@ -2454,10 +2489,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, { id: 'ministral-8b-2512', @@ -2467,10 +2503,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, { id: 'ministral-3b-latest', @@ -2480,10 +2517,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, { id: 'ministral-3b-2512', @@ -2493,10 +2531,11 @@ export const PROVIDER_DEFINITIONS: Record = { updatedAt: '2026-06-11', }, capabilities: { - temperature: { min: 0, max: 1 }, + temperature: { min: 0, max: 1.5 }, }, contextWindow: 256000, releaseDate: '2025-12-02', + speedOptimized: true, }, ], }, @@ -2531,8 +2570,9 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/anthropic.claude-opus-4-5-20251101-v1:0', pricing: { input: 5.0, + cachedInput: 0.5, output: 25.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2546,8 +2586,9 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0', pricing: { input: 3.0, + cachedInput: 0.3, output: 15.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2556,13 +2597,15 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-09-29', + recommended: true, }, { id: 'bedrock/anthropic.claude-haiku-4-5-20251001-v1:0', pricing: { input: 1.0, + cachedInput: 0.1, output: 5.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2571,18 +2614,20 @@ export const PROVIDER_DEFINITIONS: Record = { }, contextWindow: 200000, releaseDate: '2025-10-15', + speedOptimized: true, }, { id: 'bedrock/anthropic.claude-opus-4-1-20250805-v1:0', pricing: { input: 15.0, + cachedInput: 1.5, output: 75.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: 32768, + maxOutputTokens: 32000, }, contextWindow: 200000, releaseDate: '2025-08-05', @@ -2590,9 +2635,9 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/amazon.nova-2-pro-v1:0', pricing: { - input: 1.0, - output: 4.0, - updatedAt: '2026-04-01', + input: 1.375, + output: 11.0, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2604,12 +2649,14 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/amazon.nova-2-lite-v1:0', pricing: { - input: 0.08, - output: 0.32, - updatedAt: '2026-04-01', + input: 0.33, + cachedInput: 0.0825, + output: 2.75, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 64000, }, contextWindow: 1000000, releaseDate: '2025-12-02', @@ -2618,6 +2665,7 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/amazon.nova-premier-v1:0', pricing: { input: 2.5, + cachedInput: 0.625, output: 12.5, updatedAt: '2026-06-11', }, @@ -2632,11 +2680,13 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/amazon.nova-pro-v1:0', pricing: { input: 0.8, + cachedInput: 0.2, output: 3.2, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 5120, }, contextWindow: 300000, releaseDate: '2024-12-03', @@ -2645,11 +2695,13 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/amazon.nova-lite-v1:0', pricing: { input: 0.06, + cachedInput: 0.015, output: 0.24, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 5120, }, contextWindow: 300000, releaseDate: '2024-12-03', @@ -2658,24 +2710,28 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'bedrock/amazon.nova-micro-v1:0', pricing: { input: 0.035, + cachedInput: 0.00875, output: 0.14, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 5120, }, contextWindow: 128000, releaseDate: '2024-12-03', + speedOptimized: true, }, { id: 'bedrock/meta.llama4-maverick-17b-instruct-v1:0', pricing: { input: 0.24, output: 0.97, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 8192, }, contextWindow: 1000000, releaseDate: '2025-04-05', @@ -2683,12 +2739,13 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/meta.llama4-scout-17b-instruct-v1:0', pricing: { - input: 0.18, - output: 0.72, - updatedAt: '2026-04-01', + input: 0.17, + output: 0.66, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 8192, }, contextWindow: 10000000, releaseDate: '2025-04-05', @@ -2698,10 +2755,11 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.72, output: 0.72, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 4096, }, contextWindow: 128000, releaseDate: '2024-12-06', @@ -2709,9 +2767,9 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/meta.llama3-2-90b-instruct-v1:0', pricing: { - input: 2.0, - output: 2.0, - updatedAt: '2026-04-01', + input: 0.72, + output: 0.72, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2725,7 +2783,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.16, output: 0.16, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2739,7 +2797,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.15, output: 0.15, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2753,7 +2811,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.1, output: 0.1, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2765,9 +2823,9 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/meta.llama3-1-405b-instruct-v1:0', pricing: { - input: 5.32, - output: 16.0, - updatedAt: '2026-04-01', + input: 2.4, + output: 2.4, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2778,26 +2836,30 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/meta.llama3-1-70b-instruct-v1:0', pricing: { - input: 2.65, - output: 3.5, - updatedAt: '2026-04-01', + input: 0.72, + output: 0.72, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 4096, }, contextWindow: 128000, + releaseDate: '2024-07-23', }, { id: 'bedrock/meta.llama3-1-8b-instruct-v1:0', pricing: { - input: 0.3, - output: 0.6, - updatedAt: '2026-04-01', + input: 0.22, + output: 0.22, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 4096, }, contextWindow: 128000, + releaseDate: '2024-07-23', }, { id: 'bedrock/mistral.mistral-large-3-675b-instruct', @@ -2811,6 +2873,7 @@ export const PROVIDER_DEFINITIONS: Record = { maxOutputTokens: 32768, }, contextWindow: 256000, + releaseDate: '2025-12-02', }, { id: 'bedrock/mistral.mistral-large-2411-v1:0', @@ -2828,9 +2891,9 @@ export const PROVIDER_DEFINITIONS: Record = { { id: 'bedrock/mistral.mistral-large-2407-v1:0', pricing: { - input: 4.0, - output: 12.0, - updatedAt: '2026-04-01', + input: 2.0, + output: 6.0, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2843,7 +2906,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 2.0, output: 6.0, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2856,7 +2919,7 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.5, output: 1.5, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, @@ -2869,49 +2932,53 @@ export const PROVIDER_DEFINITIONS: Record = { pricing: { input: 0.2, output: 0.2, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, maxOutputTokens: 8192, }, contextWindow: 128000, + releaseDate: '2025-12-02', }, { id: 'bedrock/mistral.ministral-3-8b-instruct', pricing: { - input: 0.1, - output: 0.1, - updatedAt: '2026-04-01', + input: 0.15, + output: 0.15, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, maxOutputTokens: 8192, }, contextWindow: 128000, + releaseDate: '2025-12-02', }, { id: 'bedrock/mistral.ministral-3-3b-instruct', pricing: { - input: 0.04, - output: 0.04, - updatedAt: '2026-04-01', + input: 0.1, + output: 0.1, + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, maxOutputTokens: 8192, }, contextWindow: 128000, + releaseDate: '2025-12-02', }, { id: 'bedrock/mistral.mixtral-8x7b-instruct-v0:1', pricing: { input: 0.45, output: 0.7, - updatedAt: '2026-04-01', + updatedAt: '2026-06-11', }, capabilities: { temperature: { min: 0, max: 1 }, + maxOutputTokens: 4096, }, contextWindow: 32000, }, @@ -3122,23 +3189,11 @@ export function getModelsWithTemperatureSupport(): string[] { return models } -export function getModelsWithTempRange01(): string[] { - const models: string[] = [] - for (const provider of Object.values(PROVIDER_DEFINITIONS)) { - for (const model of provider.models) { - if (model.capabilities.temperature?.max === 1) { - models.push(model.id) - } - } - } - return models -} - -export function getModelsWithTempRange02(): string[] { +export function getModelsWithTemperatureRange(max: number): string[] { const models: string[] = [] for (const provider of Object.values(PROVIDER_DEFINITIONS)) { for (const model of provider.models) { - if (model.capabilities.temperature?.max === 2) { + if (model.capabilities.temperature?.max === max) { models.push(model.id) } } diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts index f4612ccae8..46d414cbb6 100644 --- a/apps/sim/providers/utils.test.ts +++ b/apps/sim/providers/utils.test.ts @@ -25,6 +25,7 @@ import { isProviderBlacklisted, MODELS_TEMP_RANGE_0_1, MODELS_TEMP_RANGE_0_2, + MODELS_TEMP_RANGE_0_15, MODELS_WITH_REASONING_EFFORT, MODELS_WITH_TEMPERATURE_SUPPORT, MODELS_WITH_THINKING, @@ -200,6 +201,9 @@ describe('Model Capabilities', () => { 'grok-3-latest', 'grok-3-fast-latest', 'deepseek-v3', + 'deepseek-chat', + 'groq/meta-llama/llama-4-scout-17b-16e-instruct', + 'mistral-large-latest', ] for (const model of supportedModels) { @@ -211,14 +215,12 @@ describe('Model Capabilities', () => { const unsupportedModels = [ 'unsupported-model', 'cerebras/llama-3.3-70b', - 'groq/meta-llama/llama-4-scout-17b-16e-instruct', 'o1', 'o3', 'o4-mini', 'azure/o3', 'azure/o4-mini', 'deepseek-r1', - 'deepseek-chat', 'azure/model-router', 'gpt-5.1', 'azure/gpt-5.1', @@ -262,6 +264,10 @@ describe('Model Capabilities', () => { 'gemini-2.5-pro', 'gemini-2.5-flash', 'deepseek-v3', + 'deepseek-chat', + 'grok-3-latest', + 'grok-3-fast-latest', + 'groq/meta-llama/llama-4-scout-17b-16e-instruct', ] for (const model of modelsRange02) { @@ -270,22 +276,24 @@ describe('Model Capabilities', () => { }) it.concurrent('should return 1 for models with temperature range 0-1', () => { - const modelsRange01 = [ - 'claude-sonnet-4-0', - 'claude-opus-4-0', - 'grok-3-latest', - 'grok-3-fast-latest', - ] + const modelsRange01 = ['claude-sonnet-4-0', 'claude-opus-4-0'] for (const model of modelsRange01) { expect(getMaxTemperature(model)).toBe(1) } }) + it.concurrent('should return 1.5 for models with temperature range 0-1.5', () => { + const modelsRange015 = ['mistral-large-latest', 'mistral-small-latest', 'codestral-latest'] + + for (const model of modelsRange015) { + expect(getMaxTemperature(model)).toBe(1.5) + } + }) + it.concurrent('should return undefined for models that do not support temperature', () => { expect(getMaxTemperature('unsupported-model')).toBeUndefined() expect(getMaxTemperature('cerebras/llama-3.3-70b')).toBeUndefined() - expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined() expect(getMaxTemperature('o1')).toBeUndefined() expect(getMaxTemperature('o3')).toBeUndefined() expect(getMaxTemperature('o4-mini')).toBeUndefined() @@ -428,12 +436,13 @@ describe('Model Capabilities', () => { expect(MODELS_TEMP_RANGE_0_2).toContain('gpt-4o') expect(MODELS_TEMP_RANGE_0_2).toContain('gemini-2.5-flash') expect(MODELS_TEMP_RANGE_0_2).toContain('deepseek-v3') + expect(MODELS_TEMP_RANGE_0_2).toContain('grok-3-latest') expect(MODELS_TEMP_RANGE_0_2).not.toContain('claude-sonnet-4-0') }) it.concurrent('should have correct models in MODELS_TEMP_RANGE_0_1', () => { expect(MODELS_TEMP_RANGE_0_1).toContain('claude-sonnet-4-0') - expect(MODELS_TEMP_RANGE_0_1).toContain('grok-3-latest') + expect(MODELS_TEMP_RANGE_0_1).not.toContain('grok-3-latest') expect(MODELS_TEMP_RANGE_0_1).not.toContain('gpt-4o') }) @@ -449,7 +458,9 @@ describe('Model Capabilities', () => { 'should combine both temperature ranges in MODELS_WITH_TEMPERATURE_SUPPORT', () => { expect(MODELS_WITH_TEMPERATURE_SUPPORT.length).toBe( - MODELS_TEMP_RANGE_0_2.length + MODELS_TEMP_RANGE_0_1.length + MODELS_TEMP_RANGE_0_2.length + + MODELS_TEMP_RANGE_0_15.length + + MODELS_TEMP_RANGE_0_1.length ) expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('gpt-4o') expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('claude-sonnet-4-0') @@ -538,6 +549,7 @@ describe('Model Capabilities', () => { (m) => m.includes('gpt-5') && !m.includes('chat-latest') && + !m.includes('gpt-5.5-pro') && !m.includes('gpt-5.4-pro') && !m.includes('gpt-5.2-pro') && !m.includes('gpt-5-pro') @@ -547,6 +559,9 @@ describe('Model Capabilities', () => { ) expect(gpt5ModelsWithReasoningEffort.sort()).toEqual(gpt5ModelsWithVerbosity.sort()) + expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.5-pro') + expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.5-pro') + expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.4-pro') expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.4-pro') @@ -715,7 +730,7 @@ describe('Max Output Tokens', () => { it.concurrent('should return published max for Bedrock Claude Opus 4.1', () => { expect(getMaxOutputTokensForModel('bedrock/anthropic.claude-opus-4-1-20250805-v1:0')).toBe( - 32768 + 32000 ) }) diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts index b2021817b6..c584261b4e 100644 --- a/apps/sim/providers/utils.ts +++ b/apps/sim/providers/utils.ts @@ -28,9 +28,8 @@ import { getModelsWithDeepResearch, getModelsWithoutMemory, getModelsWithReasoningEffort, + getModelsWithTemperatureRange, getModelsWithTemperatureSupport, - getModelsWithTempRange01, - getModelsWithTempRange02, getModelsWithThinking, getModelsWithVerbosity, getProviderDefaultModel as getProviderDefaultModelFromDefinitions, @@ -1167,8 +1166,9 @@ export function trackForcedToolUsage( } } -export const MODELS_TEMP_RANGE_0_2 = getModelsWithTempRange02() -export const MODELS_TEMP_RANGE_0_1 = getModelsWithTempRange01() +export const MODELS_TEMP_RANGE_0_2 = getModelsWithTemperatureRange(2) +export const MODELS_TEMP_RANGE_0_15 = getModelsWithTemperatureRange(1.5) +export const MODELS_TEMP_RANGE_0_1 = getModelsWithTemperatureRange(1) export const MODELS_WITH_TEMPERATURE_SUPPORT = getModelsWithTemperatureSupport() export const MODELS_WITH_REASONING_EFFORT = getModelsWithReasoningEffort() export const MODELS_WITH_VERBOSITY = getModelsWithVerbosity() diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md new file mode 100644 index 0000000000..81dad60b26 --- /dev/null +++ b/docs/models/anthropic.md @@ -0,0 +1,232 @@ +# Anthropic Provider Model Validation — Justification Doc + +- **Date:** 2026-06-11 +- **Scope:** `anthropic` provider block in `apps/sim/providers/models.ts` (12 models), re-verified after PR #4990 +- **Method:** Live WebFetch of official Anthropic docs (platform.claude.com), secondary pricing source (OpenRouter), Anthropic news posts via web search for launch dates, plus `rg` verification that every capability flag is actually consumed by provider code (`apps/sim/providers/anthropic/core.ts`, `apps/sim/providers/models.ts`, `apps/sim/providers/utils.ts`). +- **Primary sources:** + - Models overview: https://platform.claude.com/docs/en/about-claude/models/overview + - Pricing: https://platform.claude.com/docs/en/about-claude/pricing + - Deprecations: https://platform.claude.com/docs/en/about-claude/model-deprecations + - Effort: https://platform.claude.com/docs/en/build-with-claude/effort + - Structured outputs: https://platform.claude.com/docs/en/build-with-claude/structured-outputs + - Computer use: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool + - Messages API: https://platform.claude.com/docs/en/api/messages + - Secondary pricing: https://openrouter.ai/provider/anthropic + - Launch dates: https://www.anthropic.com/news/claude-4 , https://www.anthropic.com/news/claude-3-haiku + +**Verdict key:** ✓ = verified against live docs · ⚠ = recommended change · ◆ = intentional deviation (documented) · ◇ = unverifiable from live docs (reason given) + +--- + +## How capability fields are consumed (code verification) + +| Field | Consumer | Behavior | +|---|---|---| +| `thinking.levels` / `thinking.default` | `core.ts` `buildThinkingConfig()` via `getThinkingCapability()` | Level must be in `levels` or thinking is skipped. Fable 5 / Opus 4.8 / 4.7 / 4.6 / Sonnet 4.6 (`supportsAdaptiveThinking()`) → `thinking: {type: 'adaptive'}` + `output_config: {effort: }`. All other models → `thinking: {type: 'enabled', budget_tokens}` with low=2048 / medium=8192 / high=32768 (so `xhigh`/`max` must never appear on a budget-tokens model — `THINKING_BUDGET_TOKENS` has no entry and config would be dropped). | +| `temperature` | payload construction in `core.ts` | Presence of `capabilities.temperature` allows the param; omitted on a model means Sim never sends it. Stripped when thinking enabled (thinking incompatible with temperature). | +| `nativeStructuredOutputs` | `models.ts:3393` (`getModelsWithNativeStructuredOutputs`-style helper) consumed by `core.ts` | With flag → native `output_format`/`output_config` JSON-schema path; without → `generateSchemaInstructions()` prompt-injection fallback. | +| `computerUse` | `models.ts:3167` `getComputerUseModels()` → `providers/utils.ts:143` `computerUseModels` | Gates Sim's computer-use path per provider. **No Anthropic model currently sets it.** | +| `contextWindow` / `maxOutputTokens` / `pricing` | cost calculation, token clamping, UI | Straight passthrough. Sim does **not** send any `context-1m-*` beta header (`rg 'context-1m' apps/sim/providers/anthropic/` → no matches), so `contextWindow` must reflect the no-beta-header window. | +| `reasoningEffort` / `verbosity` | **not consumed** by the Anthropic provider (OpenAI-family fields) | Correctly absent from all Anthropic entries. | + +--- + +## Per-model field verification + +### claude-fable-5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing.input | 10.0 | Pricing doc ($10/MTok); OpenRouter $10/M | ✓ | +| pricing.cachedInput | 1.0 | Pricing doc cache hit $1/MTok (0.1×) | ✓ | +| pricing.output | 50.0 | Pricing doc $50/MTok; OpenRouter $50/M | ✓ | +| capabilities.temperature | absent | Deprecations doc: sampling params 400 on Opus 4.7 and later; Fable 5 rejects `temperature`/`top_p`/`top_k` | ✓ | +| capabilities.nativeStructuredOutputs | **absent** | Structured-outputs doc: "generally available … for **Claude Fable 5**, Claude Mythos 5, Claude Opus 4.8, …" | ⚠ **should be `true`** — Fable 5 is in the GA list; current absence routes Fable 5 through the prompt-injection fallback instead of native JSON-schema output | +| capabilities.maxOutputTokens | 128000 | Models overview: Max output 128k | ✓ | +| thinking.levels | low–xhigh–max | Effort doc: `max` available on Fable 5; `xhigh` available on Fable 5; low/medium/high universal | ✓ | +| thinking.default | high | Effort doc: default is `high` | ✓ | +| contextWindow | 1000000 | Models overview: 1M tokens (default, no beta header) | ✓ | +| releaseDate | 2026-06-09 | Models overview: "generally available … beginning June 9, 2026" | ✓ | +| (no deprecated flag) | — | Active | ✓ | + +Note: Fable 5's thinking is always-on; Sim's adaptive path (`thinking: {type:'adaptive'}` + effort) is the documented-correct call shape. The `'none'` sentinel omits the `thinking` param, which on Fable 5 means adaptive-by-default rather than disabled — acceptable (explicit `disabled` would 400). + +### claude-opus-4-8 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing.input / cachedInput / output | 5.0 / 0.5 / 25.0 | Pricing doc $5 / $0.50 cache-hit / $25; OpenRouter $5/$25 | ✓ | +| pricing.updatedAt | 2026-05-28 | bumped in PR #4990 | ✓ | +| temperature | absent | Deprecations doc: 400 on Opus 4.7 and later, "including Claude Opus 4.8" | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | +| maxOutputTokens | 128000 | Models overview | ✓ | +| thinking.levels | low–xhigh–max | Effort doc: `xhigh` and `max` available on Opus 4.8 | ✓ | +| thinking.default | high | Effort doc: "The default is `high` on all surfaces" | ✓ | +| contextWindow | 1000000 | Models overview: 1M (standard pricing, no long-context premium) | ✓ | +| releaseDate | 2026-05-28 | Deprecations doc: tentative retirement "Not sooner than May 28, **2027**" (release + 1 yr convention) — confirms the PR #4990 correction | ✓ changed this pass (PR #4990), re-verified | +| recommended | true | Sim product choice; consistent with docs' "most capable Opus-tier model" | ◆ product decision | + +### claude-opus-4-7 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-04-16) | Pricing doc; OpenRouter $5/$25 | ✓ | +| temperature | absent | Deprecations doc: 400 on Opus 4.7+ | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | +| maxOutputTokens | 128000 | Models overview (legacy table) | ✓ | +| thinking.levels | low–xhigh–max | Effort doc: `xhigh` introduced with 4.7; `max` available | ✓ | +| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ | +| releaseDate | 2026-04-16 | Deprecations doc: "Not sooner than April 16, 2027" | ✓ | + +### claude-opus-4-6 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ | +| temperature {0,1} | present | Sampling-param removal is "Opus 4.7 and later" — Opus 4.6 still accepts `temperature` (0.0–1.0 per Messages API) | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | +| maxOutputTokens | 128000 | Models overview legacy table | ✓ | +| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` on Opus 4.6 ✓; `xhigh` only on Fable 5 / Opus 4.8 / 4.7 — correctly excluded | ✓ | +| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ | +| releaseDate | 2026-02-05 | Deprecations doc: "Not sooner than February 5, 2027" | ✓ | + +### claude-sonnet-4-6 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc $3 / $0.30 / $15; OpenRouter $3/$15 | ✓ | +| temperature {0,1} | present | Sonnet 4.6 is not in the "Opus 4.7 and later" sampling-param removal; temperature 0.0–1.0 valid | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | +| maxOutputTokens | 64000 | Models overview: 64k | ✓ | +| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` available on Sonnet 4.6; `xhigh` is NOT (Fable 5 / Opus 4.8 / 4.7 only) | ✓ | +| contextWindow | 1000000 | Models overview: 1M, no beta header required; "Long context pricing": full 1M at standard pricing on Sonnet 4.6 | ✓ | +| releaseDate | 2026-02-17 | Deprecations doc: "Not sooner than February 17, 2027" | ✓ | +| recommended | true | Sim product choice ("best combination of speed and intelligence") | ◆ product decision | + +### claude-opus-4-5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ | +| temperature {0,1} | present | ≤ 4.6-era model; accepted | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Opus 4.5") | ✓ | +| maxOutputTokens | 64000 | Models overview legacy table | ✓ | +| thinking.levels | low/medium/high | Effort doc: Opus 4.5 supports effort but neither `max` nor `xhigh`. Sim's code path for 4.5 uses `budget_tokens` (not effort) — levels map to budget tiers; same three levels are valid either way | ✓ | +| contextWindow | 200000 | Models overview legacy table: 200k | ✓ | +| releaseDate | 2025-11-24 | Deprecations doc: "Not sooner than November 24, 2026"; anthropic.com/news/claude-opus-4-5 (Nov 24, 2025) | ✓ | + +### claude-opus-4-1 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc $15 / $1.50 / $75; OpenRouter $15/$75 | ✓ | +| temperature {0,1} | present | pre-4.7 model; accepted | ✓ | +| nativeStructuredOutputs | **removed in PR #4990** | Structured-outputs doc GA list does **not** include Opus 4.1 | ✓ changed this pass (PR #4990), re-verified correct | +| maxOutputTokens | 32000 | Models overview legacy table: 32k | ✓ | +| thinking.levels | low/medium/high | budget_tokens model; extended thinking supported | ✓ | +| contextWindow | 200000 | Models overview legacy table | ✓ | +| releaseDate | 2025-08-05 | Snapshot `claude-opus-4-1-20250805`; launched Aug 5, 2025 | ✓ | +| deprecated | true | Deprecations doc: deprecated June 5, 2026; retires Aug 5, 2026 → migrate to claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified | + +### claude-opus-4-0 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Opus 4 (deprecated)"); OpenRouter $15/$75 | ✓ | +| temperature {0,1} | present | pre-4.7; accepted | ✓ | +| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ | +| maxOutputTokens | 32000 | Models overview legacy table | ✓ | +| thinking.levels | low/medium/high | budget_tokens model | ✓ | +| contextWindow | 200000 | Models overview legacy table | ✓ | +| releaseDate | 2025-05-22 | **Open question (a) resolved:** Claude 4 (Opus 4 + Sonnet 4) launched **May 22, 2025** (anthropic.com/news/claude-4). The `20250514` in the full ID is the snapshot date, not the launch date. Repo convention uses launch dates (cf. haiku-4-5: launch 2025-10-15 vs snapshot 20251001) | ✓ — **no change recommended** | +| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified | + +### claude-sonnet-4-5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $3/$15 | ✓ | +| temperature {0,1} | present | pre-4.7; accepted | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Sonnet 4.5") | ✓ | +| maxOutputTokens | 64000 | Models overview legacy table | ✓ | +| thinking.levels | low/medium/high | Effort doc: effort errors on Sonnet 4.5 — Sim correctly routes it through budget_tokens; no max/xhigh | ✓ | +| contextWindow | 200000 | **Open question (e) resolved:** Models overview legacy table lists Sonnet 4.5 at **200k**. The historical 1M for Sonnet 4.5 required the `context-1m` beta header, which Sim does not send (`rg 'context-1m'` → no matches in `apps/sim/providers/anthropic/`) | ✓ changed this pass (PR #4990, 1000000 → 200000), re-verified correct | +| releaseDate | 2025-09-29 | Snapshot `claude-sonnet-4-5-20250929`; launched Sep 29, 2025 | ✓ | + +### claude-sonnet-4-0 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Sonnet 4 (deprecated)"); OpenRouter $3/$15 | ✓ | +| temperature {0,1} | present | pre-4.7; accepted | ✓ | +| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ | +| maxOutputTokens | 64000 | Models overview legacy table: 64k | ✓ | +| thinking.levels | low/medium/high | budget_tokens model | ✓ | +| contextWindow | 200000 | Models overview legacy table: 200k; same `context-1m` beta-header reasoning as Sonnet 4.5 | ✓ changed this pass (PR #4990), re-verified correct | +| releaseDate | 2025-05-22 | Claude 4 launch May 22, 2025 (see opus-4-0) — no change | ✓ | +| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-sonnet-4-6 | ✓ changed this pass (PR #4990), re-verified | + +### claude-haiku-4-5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing | 1.0 / 0.1 / 5.0 (updatedAt 2026-06-11) | Pricing doc $1 / $0.10 / $5; OpenRouter $1/$5 | ✓ | +| temperature {0,1} | present | pre-4.7; accepted | ✓ | +| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | +| maxOutputTokens | 64000 | Models overview: 64k | ✓ | +| thinking.levels | low/medium/high | Effort doc: effort errors on Haiku 4.5; extended thinking (budget_tokens) supported — Sim routes via budget_tokens | ✓ | +| contextWindow | 200000 | Models overview: 200k | ✓ | +| releaseDate | 2025-10-15 | Launch Oct 15, 2025 (deprecations doc: retirement "Not sooner than October 15, 2026"); snapshot is `20251001` — repo correctly uses the launch date | ✓ | +| speedOptimized | true | Sim-internal flag; docs: "The fastest model" | ◆ Sim-internal, consistent | + +### claude-3-haiku-20240307 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| pricing.input / output | 0.25 / 1.25 (updatedAt 2026-04-01) | ◇ No longer listed on the live pricing page (only retired Haiku 3.5 remains) or OpenRouter — model is retired. Values match Anthropic's historical published pricing ($0.25/$1.25) | ◇ unverifiable live; historically consistent — leave as-is | +| pricing.cachedInput | 0.03 | ◇ Historical cache-hit pricing for Claude 3 Haiku was $0.03/MTok (slightly above the 0.1× convention) | ◇ unverifiable live; historically consistent | +| temperature {0,1} | present | Claude 3-era; accepted (model no longer serves requests anyway) | ✓ (moot) | +| maxOutputTokens | 4096 | Historical Claude 3 Haiku max output | ◇ unverifiable live; historically consistent | +| no thinking capability | absent | Claude 3 Haiku has no extended thinking | ✓ | +| contextWindow | 200000 | Historical Claude 3 family window | ◇ unverifiable live; historically consistent | +| releaseDate | 2024-03-07 | Claude 3 Haiku GA was **March 13, 2024** (anthropic.com/news/claude-3-haiku); `20240307` is the snapshot date. Repo convention elsewhere uses launch dates | ⚠ optional: `2024-03-07` → `2024-03-13` (cosmetic; model is retired) | +| deprecated | true | Deprecations doc: **Retired April 20, 2026** ("Requests to retired models will fail") | ◆ see open question (b) below | + +--- + +## Changes made in this pass (PR #4990) — all re-verified correct + +| Change | Verification | +|---|---| +| opus-4-8 releaseDate → 2026-05-28 | Deprecations doc retirement floor "May 28, 2027" (release + 1 yr) ✓ | +| deprecated:true on opus-4-1 | Deprecated 2026-06-05, retires 2026-08-05 ✓ | +| deprecated:true on opus-4-0, sonnet-4-0 | Deprecated 2026-04-14, retire 2026-06-15 ✓ | +| sonnet-4-5 & sonnet-4-0 contextWindow 1000000 → 200000 | Models overview legacy table: both 200k. The 1M window on these models was beta-header-gated (`context-1m`); Sim never sends that header ✓ | +| removed nativeStructuredOutputs from opus-4-1 | Opus 4.1 absent from structured-outputs GA list ✓ | +| updatedAt bumps | informational ✓ | + +## Recommended fixes from THIS validation + +1. **claude-fable-5: add `nativeStructuredOutputs: true`.** Structured-outputs doc explicitly lists Claude Fable 5 as GA. Without the flag, Sim falls back to prompt-injected schema instructions for Fable 5 instead of the native JSON-schema output path — weaker guarantees on the flagship model. +2. *(optional, cosmetic)* **claude-3-haiku-20240307: releaseDate `2024-03-07` → `2024-03-13`.** Repo convention is launch date (not snapshot date); GA was March 13, 2024. Low value since the model is retired. + +## Deliberately not changed + +- **`computerUse` on Anthropic models (open question c).** Anthropic documents computer-use support (beta) for: Opus 4.8 / 4.7 / 4.6 / 4.5 + Sonnet 4.6 (header `computer-use-2025-11-24`) and Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4 (header `computer-use-2025-01-24`). **Claude Fable 5 is NOT in the documented list.** The flag IS consumed (`getComputerUseModels()` → `providers/utils.ts` `computerUseModels`), so setting it would light up Sim's computer-use path for these models — a feature-enablement/product decision (beta headers, screenshot plumbing, UX), not a data correction. Left unchanged; documented here for whoever owns that decision. +- **opus-4-0 / sonnet-4-0 releaseDate `2025-05-22` (open question a).** Confirmed correct: Claude 4 launched May 22, 2025; `20250514` is the snapshot suffix, not the launch date. +- **claude-3-haiku-20240307 entry kept (open question b).** The model was retired 2026-04-20 — live requests now fail. Recommendation: **keep the entry with `deprecated: true`** rather than delete. Removing it would break saved workflows that reference the model ID (model lookup, pricing for historical logs, UI rendering of old runs). The schema has no `retired` field; if one is ever added, this model is the first candidate. Runtime failures surface from Anthropic's API as clear 404s, which is an acceptable failure mode for a retired model. +- **`recommended` flags (opus-4-8, sonnet-4-6) and `speedOptimized` (haiku-4-5)** — Sim product/UI decisions, consistent with docs positioning; not doc-verifiable facts. +- **`defaultModel: 'claude-sonnet-4-6'`** — active, recommended model; valid product choice. +- **Thinking level lists for budget-tokens models (opus-4-5, sonnet-4-5, sonnet-4-0, opus-4-1, opus-4-0, haiku-4-5).** Their `low/medium/high` are Sim-defined budget tiers (2048/8192/32768 budget_tokens), not API effort levels — internally consistent with `THINKING_BUDGET_TOKENS` in `core.ts`. Note Opus 4.5 does support the API `effort` param (low/medium/high) per the effort doc, but Sim routes it through budget_tokens (`supportsAdaptiveThinking()` excludes 4.5); that is a code-path choice in `core.ts`, not a models.ts data error, and the level list is valid under either interpretation. + +## Open question (d) resolution — thinking levels & temperature boundary + +- `xhigh`: Fable 5, Opus 4.8, Opus 4.7 only (effort doc). Repo ✓. +- `max`: Fable 5, Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6 (effort doc; **not** Opus 4.5 / Sonnet 4.5 / Haiku 4.5). Repo ✓ — including Sonnet 4.6 `max`, verified. +- Effort default `high` on all supporting models (effort doc: "Setting effort to high produces exactly the same behavior as omitting the parameter"). Repo `default: 'high'` ✓. +- Temperature boundary: deprecations doc — `temperature`/`top_p`/`top_k` return 400 on **Opus 4.7 and later (incl. Opus 4.8) and Fable 5**; still valid (0.0–1.0, default 1.0 per Messages API) on Opus 4.6, Sonnet 4.6, and everything earlier. Repo: temperature absent exactly on fable-5 / opus-4-8 / opus-4-7, present `{min:0, max:1}` on opus-4-6 / sonnet-4-6 and all older models ✓. + +## Unverifiable + +- **claude-3-haiku-20240307 pricing, contextWindow (200k), maxOutputTokens (4096):** the model is retired and has been removed from the live pricing/overview pages and OpenRouter. Values match Anthropic's historical published specs; no contradiction found. No change recommended. +- **Exact cache-write pricing is not modeled** (Sim's schema has only `cachedInput` = cache read). Live docs confirm cache reads = 0.1× input for every current model, matching all `cachedInput` values. 5-min/1-hour write premiums (1.25× / 2×) are not representable in the current schema — noting for completeness, not a defect. diff --git a/docs/models/azure.md b/docs/models/azure.md new file mode 100644 index 0000000000..03f5dfd72d --- /dev/null +++ b/docs/models/azure.md @@ -0,0 +1,258 @@ +# Azure OpenAI & Azure Anthropic model validation + +**Date:** 2026-06-11 +**Scope:** `azure-openai` block (17 models) and `azure-anthropic` block (5 models) in `apps/sim/providers/models.ts`. Final exhaustive re-validation following PR #4990. + +## Method + +Every field was checked against live primary sources fetched on 2026-06-11: + +1. **Specs (context window, max output, version dates, API support, lifecycle):** + - https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure (doc updated 2026-06-05) + - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning (reasoning effort / verbosity feature matrix, doc updated 2026-06-05) + - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements (lifecycle policy + gpt-4o dates) + - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/model-router and .../concepts/model-router + - https://learn.microsoft.com/en-us/azure/foundry/foundry-models/how-to/use-foundry-models-claude (doc updated 2026-06-11) + - https://platform.claude.com/docs/en/build-with-claude/claude-in-microsoft-foundry + - https://platform.claude.com/docs/en/about-claude/pricing + - https://platform.claude.com/docs/en/about-claude/models/overview + - https://platform.claude.com/docs/en/build-with-claude/structured-outputs +2. **Azure OpenAI pricing:** Azure Retail Prices API (`https://prices.azure.com/api/retail/prices?$filter=serviceName eq 'Foundry Models' and contains(meterName,'...')`). All quoted prices are the **Global Standard** ("Gl"/"glbl") meters, normalized to USD per 1M tokens. The marketing pricing page times out; the Retail Prices API is authoritative for billed meters. +3. **Provider implementation:** `apps/sim/providers/azure-openai/index.ts` (API dispatch), `apps/sim/providers/azure-anthropic/index.ts` (Messages API via `@anthropic-ai/sdk` against `{endpoint}/anthropic`). + +Sim convention notes: `pricing.cachedInput` = cache-read price; `releaseDate` for `azure/*` entries = the Azure model **version date** (convention set in PR #4990 with gpt-4o → 2024-11-20 and model-router → 2025-05-19). + +--- + +## Block: `azure-openai` (defaultModel: `azure/gpt-4o`) + +### azure/gpt-4o + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing.input | 2.5 | Retail API `gpt 4o 1120 Inp glbl` = 0.0025/1K = $2.50/1M | OK | +| pricing.cachedInput | 1.25 | Retail API `gpt 4o 1120 cached Inp glbl` = 0.00125/1K = $1.25/1M | **OK — VERIFIED** (open question b resolved) | +| pricing.output | 10.0 | Retail API `gpt 4o 1120 Outp glbl` = 0.01/1K = $10/1M | OK | +| temperature 0–2 | yes | Standard chat model; reasoning-model parameter restrictions don't apply | OK | +| maxOutputTokens | **(absent)** | models-sold-directly: gpt-4o (2024-11-20) "Input: 128,000 / Output: 16,384" | **FIX: add `maxOutputTokens: 16384`** | +| contextWindow | 128000 | same row | OK | +| releaseDate | 2024-11-20 | Azure version `2024-11-20` (PR #4990 change re-verified) | OK | +| deprecated | (absent) | model-retirements: versions 2024-05-13 / 2024-08-06 **retired 2026-03-31** (auto-upgraded to gpt-5.1); version 2024-11-20 "retires **2026-10-01**" | **RECOMMEND `deprecated: true`** — firm retirement date within ~3.7 months. NOTE: gpt-4o is the `azure-openai` `defaultModel`; changing the default (e.g. to azure/gpt-5.1 per Azure's own auto-upgrade path) is a product decision — documented only, not assumed. | + +### azure/gpt-5.4 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 2.5 / 0.25 / 15.0 | Retail API `5.4 inp Gl` 2.5, `5.4 cd inp Gl` 0.25, `5.4 opt Gl` 15.0 | OK | +| reasoningEffort | none, low, medium, high | reasoning doc footnote 7 enumerates `'none'` support as exactly: gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.1-codex-mini — **gpt-5.4 family is not listed** | **FIX: drop `'none'`** → `['low','medium','high']` (open question c resolved). PR #4990's removal of `'xhigh'` re-verified correct: footnote 6 — xhigh is gpt-5.1-codex-max only. | +| verbosity | low, medium, high | reasoning doc "NEW GPT-5 reasoning features": verbosity options low/medium/high for GPT-5 series | OK | +| maxOutputTokens | 128000 | models-sold-directly: gpt-5.4 (2026-03-05) output 128,000 | OK | +| contextWindow | 1050000 | same row: 1,050,000 (Input 922,000 / Output 128,000) | OK | +| releaseDate | 2026-03-05 | Azure version `2026-03-05` | OK | + +Pricing limitation: a long-context tier exists (`5.4 longco inp Gl` $5.0 / `longco cd inp Gl` $0.5 / `longco opt Gl` $22.5) for requests beyond the standard context threshold. The flat pricing schema cannot express tiered pricing; standard-tier rates are recorded. + +### azure/gpt-5.4-mini + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 0.75 / 0.075 / 4.5 | Retail API `5.4 mini Inp Gl` 0.75, `cd Inp Gl` 0.075, `Opt Gl` 4.5 | OK | +| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** | +| verbosity | low, medium, high | GPT-5 series verbosity | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-mini (2026-03-17) 400,000 (272k in / 128k out) | OK | +| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK | + +### azure/gpt-5.4-nano + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 0.2 / 0.02 / 1.25 | Retail API `5.4 nano Inp Gl` 0.2, `cd Inp Gl` 0.02, `Opt Gl` 1.25 | OK | +| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** | +| verbosity | low, medium, high | GPT-5 series verbosity | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-nano (2026-03-17) | OK | +| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK | + +### azure/gpt-5.2 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 1.75 / 0.175 / 14.0 | Retail API `GPT 5.2 inp Gl` 1.75, `cd inp Gl` 0.175, `opt Gl` 14.0 | OK | +| reasoningEffort | none, low, medium, high | footnote 7 explicitly lists gpt-5.2 as supporting `'none'`; `'xhigh'` removal (PR #4990) correct — codex-max only; `'minimal'` correctly absent ("not supported with gpt-5.1 or greater") | OK | +| verbosity | low, medium, high | GPT-5 series verbosity | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.2 (2025-12-11) | OK | +| releaseDate | 2025-12-11 | Azure version `2025-12-11` | OK | + +### azure/gpt-5.1 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5.1 inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK | +| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1 (also: `reasoning_effort` defaults to `none` on 5.1); `'minimal'` correctly absent | OK | +| verbosity | low, medium, high | GPT-5 series verbosity | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1 | OK | +| releaseDate | 2025-11-12 | Azure version is **2025-11-13** in both the models table and the reasoning feature matrix | **FIX: → 2025-11-13** (per PR #4990's own convention of using the Azure version date, cf. gpt-4o 2024-11-20, model-router 2025-05-19) | + +### azure/gpt-5.1-codex + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 1.25 / 0.125 / 10.0 | Retail API `5.1 codex inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK | +| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1-codex | OK | +| verbosity | low, medium, high | GPT-5 series | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1-codex | OK | +| releaseDate | 2025-11-12 | Azure version `2025-11-13` | **FIX: → 2025-11-13** | +| deprecated | true (PR #4990 stopgap) | See ruling below | **RECOMMEND: KEEP entry, REVERT `deprecated: true`** | + +**Ruling on open question (a):** Responses-API-only status **confirmed** — models-sold-directly lists gpt-5.1-codex as "Responses API only", and the reasoning feature matrix shows Chat Completions = not supported. **However, the premise that it "never worked through Sim" is false.** `apps/sim/providers/azure-openai/index.ts` dispatches by endpoint shape: a full chat-completions URL → Chat Completions; a full responses URL → Responses; **the default path (plain resource base URL) constructs `{endpoint}/openai/v1/responses` and calls the Responses API** (lines ~743–765). So gpt-5.1-codex works for any user configured with a base endpoint or responses URL — the majority configuration. Azure itself has not deprecated the model (GA, "Access is no longer restricted"). Therefore: **KEEP the entry and revert `deprecated: true`**. The only genuinely broken configuration is a user-supplied chat-completions endpoint URL; that is an endpoint-configuration limitation, not a model lifecycle state, and `deprecated` (which signals retirement to users) is the wrong tool for it. + +### azure/gpt-5 · azure/gpt-5-mini · azure/gpt-5-nano + +| Field | gpt-5 | gpt-5-mini | gpt-5-nano | Source / evidence | Verdict | +| --- | --- | --- | --- | --- | --- | +| pricing in/cached/out | 1.25 / 0.125 / 10.0 | 0.25 / 0.025 / 2.0 | 0.05 / 0.005 / 0.4 | Retail API `GPT 5 [Mini\|Nano] [Inpt\|cchd Inpt\|outpt] Glbl` — exact matches all three | OK | +| reasoningEffort | minimal, low, medium, high | same | same | reasoning doc: "`minimal` is only supported with the original GPT-5 reasoning models"; `'none'` correctly absent (not in footnote 7); `'xhigh'` correctly absent | OK | +| verbosity | low/medium/high | same | same | GPT-5 series | OK | +| maxOutputTokens / contextWindow | 128000 / 400000 | same | same | models-sold-directly: all three 400,000 (272k/128k) | OK | +| releaseDate | 2025-08-07 | 2025-08-07 | 2025-08-07 | Azure version `2025-08-07` | OK | + +### azure/gpt-5-chat + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| id (deployable name) | `gpt-5-chat` | models-sold-directly lists `gpt-5-chat` (Preview), versions 2025-08-07 and 2025-10-03 — **exact name confirmed**; PR #4990 rename from `gpt-5-chat-latest` re-verified correct. Note: OpenAI's first-party `gpt-5-chat-latest`-style continuously-updated alias maps to a *different* Foundry product (`gpt-chat-latest`, now GPT-5.5 Instant) — our entry correctly tracks the deployable `gpt-5-chat` (open question e resolved) | OK | +| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5 Chat [Inpt\|cchd Inpt\|outpt] Glbl` = 1.25 / 0.125 / 10.0 | OK | +| temperature 0–2 | yes | gpt-5-chat is a non-reasoning chat model (temperature restriction applies to gpt-5.1-chat and later, which we do not list) | OK | +| maxOutputTokens | 16384 | models-sold-directly: 128,000 / **16,384** (PR #4990 addition re-verified) | OK | +| contextWindow | 128000 | same row | OK | +| releaseDate | 2025-08-07 | Azure version `2025-08-07` (a `2025-10-03` revision also exists; the original version date is kept) | OK | +| lifecycle | not marked | **Preview** on Azure. Preview lifecycle = "not sooner than" retirement, force-upgrade or 30-day-notice retirement, "not recommended for production". No retirement date currently announced → no `deprecated` flag warranted | OK (documented) | + +### azure/o3 · azure/o4-mini + +| Field | o3 | o4-mini | Source / evidence | Verdict | +| --- | --- | --- | --- | --- | +| pricing | 2 / 0.5 / 8 | 1.1 / 0.275 / 4.4 | Retail API `o3 0416` 0.002/0.0005/0.008 per 1K; `o4-mini 0416` 0.0011/0.000275/0.0044 per 1K | OK | +| reasoningEffort | low, medium, high | low, medium, high | reasoning doc: "low, medium, or high for all reasoning models except o1-mini"; o-series matrix has no none/minimal/xhigh | OK | +| verbosity | (absent) | (absent) | verbosity is a GPT-5-series-only parameter | OK | +| maxOutputTokens / contextWindow | 100000 / 200000 | 100000 / 200000 | models-sold-directly o-series: Input 200,000 / Output 100,000 | OK | +| releaseDate | 2025-04-16 | 2025-04-16 | Azure version `2025-04-16` for both | OK | + +### azure/gpt-4.1 · azure/gpt-4.1-mini · azure/gpt-4.1-nano + +| Field | 4.1 | 4.1-mini | 4.1-nano | Source / evidence | Verdict | +| --- | --- | --- | --- | --- | --- | +| pricing | 2.0 / 0.5 / 8.0 | 0.4 / 0.1 / 1.6 | 0.1 / 0.025 / 0.4 | Retail API `gpt 4.1 [mini\|nano] [Inp\|cached Inp\|Outp] glbl` — exact matches all three | OK | +| temperature 0–2 | yes | yes | yes | non-reasoning models | OK | +| maxOutputTokens | 32768 | 32768 | 32768 | models-sold-directly: 32,768 | OK | +| contextWindow | 1047576 | 1047576 | 1047576 | models-sold-directly: 1,047,576 (global standard; lower for regional standard/batch — global is the right representation) | OK | +| releaseDate | 2025-04-14 | 2025-04-14 | 2025-04-14 | Azure version `2025-04-14` | OK | + +### azure/model-router + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 2.0 / 0.5 / 8.0 | No `model-router` meter exists in the Retail Prices API (searched `Router`/`Rtr`/`rtr` under serviceName 'Foundry Models' and productName across all services — only Communication Services "Job Router" exists). Concepts page: "Model router usage is charged for input prompts at the rate listed on the pricing page"; how-to evaluation section: "Account for the **router markup on input tokens** plus the underlying model's input and output pricing." The reported $0.14/1M router markup could not be confirmed from any fetchable source (only the timing-out marketing page carries the number). | **KEEP as documented proxy** (open question d resolved — see below) | +| capabilities | {} (no reasoningEffort) | Router accepts `reasoning_effort` since version 2025-11-18 and forwards it; but our pinned version semantics are 2025-05-19 (gpt-4.1-family + o4-mini routing, none of which take temperature uniformly — temp/top_p silently dropped for o-series). Empty capabilities is the safest representation | OK | +| contextWindow | 200000 | models-sold-directly footnote: "Context window: 200,000" — the limit of the smallest underlying model; larger prompts succeed only if routed to a compatible model | OK | +| maxOutputTokens | (absent) | "max output tokens varies" (16,384–128,000 depending on routed model) — correctly unset | OK | +| releaseDate | 2025-05-19 | Original version `2025-05-19` confirmed (versions: 2025-05-19, 2025-08-07, 2025-11-18 latest); PR #4990 change re-verified | OK | + +**Pricing decision (open question d):** True billing = per-input-token router markup + the routed model's own input/output rates, which varies per request. The flat `{input, cachedInput, output}` schema cannot express this. The current 2.0/0.5/8.0 equals the gpt-4.1 rates — gpt-4.1 is the flagship of the 2025-05-19 routed set (gpt-4.1/-mini/-nano + o4-mini) and sits at the cost ceiling of that set alongside o3-class o4-mini rates, so it is a conservative (slightly pessimistic) proxy for cost estimation. **Keep 2.0/0.5/8.0.** This is a documented schema limitation, not a verified Azure price; cost estimates for model-router workloads in Sim are approximations. + +--- + +## Block: `azure-anthropic` (defaultModel: `azure-anthropic/claude-sonnet-4-5`) + +Pricing basis: platform.claude.com Claude-in-Microsoft-Foundry doc — "Pricing for Claude in the Microsoft Marketplace uses Anthropic's standard API pricing." So azure-anthropic pricing == Anthropic first-party pricing (open question f, pricing half, resolved). `cachedInput` maps to Anthropic "Cache Hits & Refreshes" (0.1× input). All five models are **(preview)** on Foundry; Foundry "follows the Claude API lifecycle schedule". + +### azure-anthropic/claude-opus-4-6 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.6 $5 in / $0.50 cache read / $25 out | OK | +| contextWindow | 1000000 | MS Foundry Claude doc: opus-4-6 "1M / 128K"; Anthropic Foundry doc: "Claude Fable 5, Claude Opus 4.7, Claude Opus 4.6, and Claude Sonnet 4.6 have a 1M-token context window on Microsoft Foundry"; Anthropic models overview: 1M. PR #4990 change re-verified. Long context is at **standard pricing** (Anthropic long-context pricing section), so no tiered-pricing concern | OK | +| maxOutputTokens | 128000 | both MS and Anthropic sources: 128K | OK | +| thinking levels | low, medium, high, max (default high) | MS Foundry Claude doc: effort supports low/medium/high, "also max for Opus 4.8, Opus 4.7, **Opus 4.6**, and Sonnet 4.6" | OK | +| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.6 supported (GA) | OK | +| temperature 0–1 | yes | Anthropic Messages API range | OK | +| releaseDate | 2026-02-05 | Not stated in any fetched doc (dateless model ID). Consistent with Opus 4.6 launch timeframe (early Feb 2026); convention = announcement date | Unverifiable (plausible, kept) | + +### azure-anthropic/claude-opus-4-5 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.5 $5 / $0.50 / $25 | OK | +| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview 200k / 64k | OK | +| thinking | low, medium, high | extended thinking; `max` effort not supported on 4.5-generation | OK | +| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.5 supported | OK | +| releaseDate | 2025-11-24 | Anthropic launch date (snapshot ID claude-opus-4-5-20251101; announcement 2025-11-24 — announcement-date convention) | OK | + +### azure-anthropic/claude-sonnet-4-5 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 3.0 / 0.3 / 15.0 | Anthropic pricing: Sonnet 4.5 $3 / $0.30 / $15 | OK | +| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview. Note: the Sonnet 4.5 **1M-context beta** on Foundry retires after 2026-04-30 (already past) — 200000 is correct | OK | +| thinking | low, medium, high | extended thinking | OK | +| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Sonnet 4.5 supported | OK | +| releaseDate | 2025-09-29 | snapshot claude-sonnet-4-5-20250929 | OK | + +### azure-anthropic/claude-opus-4-1 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 15.0 / 1.5 / 75.0 | Anthropic pricing: Opus 4.1 $15 / $1.50 / $75 | OK | +| contextWindow / maxOutputTokens | 200000 / 32000 | MS doc "200K / 32K"; Anthropic overview 200k / 32k | OK | +| thinking | low, medium, high | extended thinking | OK | +| nativeStructuredOutputs | **true** | Anthropic structured-outputs doc supported-model list **excludes Opus 4.1** (Fable 5, Mythos 5/Preview, Opus 4.8/4.7/4.6/4.5, Sonnet 4.6/4.5, Haiku 4.5 only). The first-party `anthropic` block's `claude-opus-4-1` entry correctly omits it (models.ts ~line 762). With this flag set, Sim sends the `structured-outputs-2025-11-13` beta header and `output_format` to a model that doesn't support it | **FIX: remove `nativeStructuredOutputs`** | +| deprecated | true | Anthropic Foundry doc model table: "Claude Opus 4.1 — Deprecated. **Retiring August 5, 2026**"; Anthropic pricing page marks it deprecated. PR #4990 change re-verified correct | OK | +| releaseDate | 2025-08-05 | snapshot claude-opus-4-1-20250805 | OK | + +### azure-anthropic/claude-haiku-4-5 + +| Field | Current value | Source / evidence | Verdict | +| --- | --- | --- | --- | +| pricing | 1.0 / 0.1 / 5.0 | Anthropic pricing: Haiku 4.5 $1 / $0.10 / $5 | OK | +| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview | OK | +| thinking | low, medium, high | extended thinking | OK | +| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Haiku 4.5 supported | OK | +| releaseDate | 2025-10-15 | Anthropic launch date (snapshot claude-haiku-4-5-20251001; announcement 2025-10-15 — announcement-date convention) | OK | + +--- + +## Changes made in PR #4990 — re-verification results + +| PR #4990 change | Verdict | +| --- | --- | +| Drop `'xhigh'` from azure/gpt-5.4, 5.4-mini, 5.4-nano, gpt-5.2 | **Correct** — `xhigh` is gpt-5.1-codex-max only (reasoning doc footnote 6) | +| `deprecated: true` on azure/gpt-5.1-codex | **Premise partially wrong** — Responses-API-only confirmed, but Sim's azure provider defaults to the Responses API; recommend reverting (see entry) | +| `deprecated: true` on azure-anthropic/claude-opus-4-1 | **Correct** — retiring 2026-08-05 | +| Rename azure/gpt-5-chat-latest → azure/gpt-5-chat + maxOutputTokens 16384 | **Correct** | +| azure/gpt-4o releaseDate → 2024-11-20 | **Correct** | +| azure/model-router releaseDate → 2025-05-19 | **Correct** | +| azure-anthropic/claude-opus-4-6 contextWindow → 1000000 | **Correct** | +| updatedAt bumps to 2026-06-11 | OK (azure/model-router still 2026-04-01; acceptable since its pricing is an unverifiable proxy) | + +## Recommended fixes from this pass (not applied — doc only) + +1. `azure/gpt-5.4`, `azure/gpt-5.4-mini`, `azure/gpt-5.4-nano`: reasoningEffort drop `'none'` → `['low','medium','high']` (reasoning doc footnote 7 enumerates 'none' support and excludes the 5.4 family). +2. `azure/gpt-4o`: add `maxOutputTokens: 16384`. +3. `azure/gpt-4o`: add `deprecated: true` (retires 2026-10-01). **Product caveat:** it is the block's `defaultModel`; the default-model change is a product decision, not made here. +4. `azure/gpt-5.1` and `azure/gpt-5.1-codex`: releaseDate `2025-11-12` → `2025-11-13` (Azure version date convention). +5. `azure/gpt-5.1-codex`: **KEEP entry; revert `deprecated: true`** (works through Sim's default Responses-API path; Azure lifecycle is GA, not deprecated). +6. `azure-anthropic/claude-opus-4-1`: remove `nativeStructuredOutputs: true` (unsupported model; matches first-party anthropic entry). + +## Deliberately not changed + +- **azure/model-router pricing 2.0/0.5/8.0** — kept as a documented gpt-4.1-rate proxy; real billing (input-token router markup + routed model rates) is unrepresentable in the flat pricing schema, and no router meter exists in the Retail Prices API to anchor a different number. +- **azure/gpt-5-chat Preview status** — no `deprecated` flag: Preview models have no announced retirement; flagging would misrepresent lifecycle. +- **gpt-5.4 long-context pricing tier** (5.0/0.5/22.5 "longco" meters) — schema cannot express tiered pricing; standard-tier rates kept. +- **gpt-4.1 contextWindow 1,047,576** — global-standard figure kept although regional standard (300,000) and batch (128,000) deployments are lower; Sim assumes global standard. +- **azure-anthropic releaseDates using announcement dates** (opus-4-5 2025-11-24, haiku-4-5 2025-10-15) rather than snapshot dates (20251101, 20251001) — consistent existing convention across the file. +- **Missing newer models** (out of scope, noted for follow-up): Azure now offers `gpt-5.5` (GA, 2026-04-24, 1.05M ctx), `gpt-chat-latest`, `gpt-5.4-pro`, `gpt-5.3-codex`/`gpt-5.3-chat`, `gpt-5.2-codex`/`gpt-5.2-chat`; Foundry Claude now offers `claude-fable-5`, `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6` (1M ctx GA). + +## Unverifiable + +- **model-router pricing** — no retail meter; the $0.14/1M router-markup figure appears only on the timing-out marketing pricing page and could not be confirmed. +- **azure-anthropic/claude-opus-4-6 releaseDate 2026-02-05** — no fetched source states the launch date (dateless model ID); plausible and consistent with Opus 4.6-era documentation, kept as-is. +- **Azure-side rate-limit/quota values** — not modeled in the schema; not validated. diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md new file mode 100644 index 0000000000..eff34fc335 --- /dev/null +++ b/docs/models/bedrock.md @@ -0,0 +1,226 @@ +# Bedrock provider validation — `apps/sim/providers/models.ts` + +- **Date:** 2026-06-11 (final exhaustive pass; re-verifies PR #4990) +- **Scope:** all 32 `bedrock/*` model entries +- **Method:** every fact below traced to a live source fetched today: + - **AWS Pricing API** (authoritative for token prices): `https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonBedrock/current/us-east-1/index.json` (1.37 MB, Last-Modified 2026-06-11) and the `us-west-2` offer file. Prices are per 1K tokens in the offer; converted ×1000 to per-1M below. Claude 4.x, Cohere, and Mistral Large 24.11 have **no SKUs** in the Pricing API (marketplace-billed / absent). + - **AWS model cards:** `docs.aws.amazon.com/bedrock/latest/userguide/model-card--.html` (authoritative for geo/global inference IDs, context window, max output, lifecycle, prompt caching). + - **Lifecycle:** `docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html` (Legacy/EOL table). + - **Anthropic:** `platform.claude.com/docs/en/about-claude/pricing` and `.../models/overview` (Claude prices, cache rates, max output, Bedrock geo premium). + - **AWS what's-new** for the Nova Premier GA date and Nova 2 announcements. + +--- + +## GEO-PROFILE TABLE (deliverable for `getBedrockInferenceProfileId`) + +Source: each model card's Programmatic Access table ("Geo inference ID" / "Global inference ID" columns). `geo` = inference profile required/available (the bare ID is generally **not** invokable on-demand for these, except where noted); `bare` = card lists "Not supported" for both Geo and Global — must invoke with the plain model ID. + +| model id suffix | verdict | profiles on card | +|---|---|---| +| anthropic.claude-opus-4-5-20251101-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` + `global.` (no apac/au/jp) | +| anthropic.claude-sonnet-4-5-20250929-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`) | +| anthropic.claude-haiku-4-5-20251001-v1:0 | **geo** (REQUIRED in most regions) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`; in-region only us-east-1/eu-north-1/eu-west-1/ap-northeast-1/ap-southeast-4) | +| anthropic.claude-opus-4-1-20250805-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported | +| amazon.nova-2-pro-v1:0 | **unknown** (no card; ID does not exist on Bedrock — real preview ID is `amazon.nova-2-pro-preview-20251202-v1:0`, served via geo/global profiles per cloudprice `apac.amazon.nova-2-pro-preview-…`) | +| amazon.nova-2-lite-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `jp.` + `global.` (no `apac.`) | +| amazon.nova-premier-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported | +| amazon.nova-pro-v1:0 | **geo** | `us.`, `eu.` (no apac/global; in-region exists in us-east-1 and a few others) | +| amazon.nova-lite-v1:0 | **geo** | `us.`, `eu.` (no apac/global) | +| amazon.nova-micro-v1:0 | **geo** | `us.`, `eu.` (no apac/global) | +| meta.llama4-maverick-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | +| meta.llama4-scout-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | +| meta.llama3-3-70b-instruct-v1:0 | **geo** | `us.` only | +| meta.llama3-2-90b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | +| meta.llama3-2-11b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | +| meta.llama3-2-3b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | +| meta.llama3-2-1b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | +| meta.llama3-1-405b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | +| meta.llama3-1-70b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | +| meta.llama3-1-8b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | +| mistral.mistral-large-3-675b-instruct | **bare** | Geo: Not supported; Global: Not supported (in-region, 11 regions) | +| mistral.mistral-large-2411-v1:0 | **bare** (phantom — see below; the Mistral Large card covers only `mistral-large-2402-v1:0`, bare) | +| mistral.mistral-large-2407-v1:0 | **bare** (no card; on-demand SKUs exist in us-west-2; the 2402 card shows Geo/Global Not supported — same family, in-region only) | +| mistral.pixtral-large-2502-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | +| mistral.magistral-small-2509 | **bare** | Geo: Not supported; Global: Not supported | +| mistral.ministral-3-14b-instruct | **bare** | Geo: Not supported; Global: Not supported | +| mistral.ministral-3-8b-instruct | **bare** | Geo: Not supported; Global: Not supported | +| mistral.ministral-3-3b-instruct | **bare** | Geo: Not supported; Global: Not supported (card "Ministral 3B" confirms this exact ID) | +| mistral.mixtral-8x7b-instruct-v0:1 | **bare** | Geo: Not supported; Global: Not supported | +| amazon.titan-text-premier-v1:0 | **bare** | model card removed from docs; historically in-region only, never had inference profiles | +| cohere.command-r-v1:0 | **bare** | card: Geo Not supported; Global Not supported | +| cohere.command-r-plus-v1:0 | **bare** | card: Geo Not supported; Global Not supported | + +Implications for `apps/sim/providers/bedrock/utils.ts` (`getBedrockInferenceProfileId`): + +1. All `mistral.*` IDs **except** `mistral.pixtral-large-2502-v1:0`, all `cohere.*` IDs, and `amazon.titan-text-premier-v1:0` must be passed through **unprefixed**. Today the function prefixes everything → `ValidationException` for these 10 models. +2. The blanket `ap-*/me-* → apac` mapping is wrong for every model in this list: **no bedrock-provider model has an `apac.` profile**. Claude Sonnet/Haiku 4.5 use `au.`/`jp.` (or `global.`); Nova 2 Lite has `jp.`; everything else is `us.`/`eu.` only. +3. `eu.` is only valid for: claude opus/sonnet/haiku 4.5, nova-2-lite, nova pro/lite/micro, llama3-2-3b/1b, pixtral-large. For the rest (opus-4-1, nova-premier, all other llamas) only `us.` exists — an `eu-*` region request currently produces a nonexistent `eu.` profile ID. + +--- + +## Per-model verification + +Prices are USD per 1M tokens, **standard on-demand, us-east-1** (us-west-2 where us-east-1 has no SKU). "Pricing API" = the offer file above, fetched 2026-06-11. + +### Anthropic (no Pricing API SKUs — verified against Anthropic pricing page; Bedrock bills Anthropic list prices) + +| model | field | repo | verified | source | verdict | +|---|---|---|---|---|---| +| claude-opus-4-5 | input/output | 5 / 25 | 5 / 25 | Anthropic pricing | OK | +| | cachedInput | — | 0.50 (0.1× input; Bedrock card: caching Yes, min 4096 tok) | Anthropic pricing + card | **ADD** | +| | maxOutputTokens | 64000 | 64K | card + Anthropic overview | OK | +| | contextWindow | 200000 | 200K | card | OK | +| | releaseDate | 2025-11-24 | Nov 24 2025 | card | OK | +| claude-sonnet-4-5 | input/output | 3 / 15 | 3 / 15 | Anthropic pricing | OK | +| | cachedInput | — | 0.30 | Anthropic pricing + card (caching Yes) | **ADD** | +| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK | +| | releaseDate | 2025-09-29 | card says Sep 30 2025; Anthropic launch Sep 29 2025 | keep repo (matches upstream launch) | +| | recommended | — | provider default model | models.ts convention | **ADD `recommended: true`** | +| claude-haiku-4-5 | input/output | 1 / 5 | 1 / 5 | Anthropic pricing | OK | +| | cachedInput | — | 0.10 | Anthropic pricing + card (caching Yes) | **ADD** | +| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK | +| | releaseDate | 2025-10-15 | card says Oct 16 2025; Anthropic launch Oct 15 2025 | keep repo | +| | speedOptimized | — | "the fastest model with near-frontier intelligence" | Anthropic overview | **ADD `speedOptimized: true`** | +| claude-opus-4-1 | input/output | 15 / 75 | 15 / 75 | Anthropic pricing | OK | +| | cachedInput | — | 1.50 | Anthropic pricing + card (caching Yes, 5m TTL only) | **ADD** | +| | maxOutputTokens | 32768 | **32K = 32000** (card "32K"; Anthropic overview "32k tokens") | **FIX 32768 → 32000** (32768 would exceed the documented cap) | +| | ctx / releaseDate / lifecycle | 200000 / 2025-08-05 / active | 200K / Aug 05 2025 / Active on Bedrock (deprecated on first-party API, retire 2026-08-05 — Bedrock lifecycle independent) | OK | + +**Geo premium (open question d):** Anthropic's pricing page states regional/multi-region endpoints carry a **10% premium over global** for Sonnet 4.5, Haiku 4.5, Opus 4.5 "and all future models" (earlier models keep existing pricing). Sim always builds geo profiles, so real spend on these three is 1.1× the table values. **Decision: keep base prices and document** — (a) the Pricing API exposes no Claude SKUs to anchor a geo-specific number, (b) repo convention is provider list price, (c) baking 1.1× would overbill if/when the provider routes `global.`. Revisit if Sim adds `global.` routing. + +### Amazon Nova (Pricing API us-east-1) + +| model | field | repo | verified | verdict | +|---|---|---|---|---| +| nova-2-pro | input/output | 1.0 / 4.0 | **1.375 / 11.0** (`USE1-Nova2.0Pro-text-input-tokens` 0.001375, `-text-output-tokens` 0.011; global cross-region 1.25/10.0) | **FIX**. Note: cloudprice lists 2.19/17.50 for an apac preview profile — AWS Pricing API wins | +| | identity | `amazon.nova-2-pro-v1:0` | no model card; not in catalog; real ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview, Nova Forge early access, per AWS re:Invent 2025 what's-new + cloudprice/getmaxim) | entry is a **phantom ID**; `deprecated: true` (PR #4990) keeps it hidden — acceptable; longer-term remove or migrate to the preview ID | +| nova-2-lite | input/output | 0.08 / 0.32 | **0.33 / 2.75** (`USE1-Nova2.0Lite-input-tokens` 0.00033, `-output-tokens` 0.00275) | **FIX** — resolves open question (a): repo was wrong AND the secondaries' 0.30/2.50 is the *global cross-region* price (`-cross-region-global` SKUs), not the geo/in-region price Sim pays | +| | cachedInput | — | **0.0825** (`-cache-read-input-token-count` 0.0000825; cache write $0) | **ADD** | +| | maxOutputTokens | — | 64K (card) | **ADD 64000** | +| | ctx / releaseDate / lifecycle | 1000000 / 2025-12-02 / active | 1M / Dec 02 2025 / Active; geo us/eu/jp + global | OK | +| nova-premier | input/output | 2.5 / 12.5 | 2.50 / 12.50 (`USE1-NovaPremier-*`) | OK (PR #4990 fix confirmed) | +| | cachedInput | — | 0.625 (`-cache-read` 0.000625) | **ADD** (model is Legacy but still billable until EOL 2026-09-14) | +| | deprecated | true | Legacy 2026-03-13, EOL 2026-09-14 (lifecycle page + card) | OK | +| | maxOutputTokens | — | 25K (card) | skip per instruction (deprecated); documented only | +| | releaseDate | 2025-04-30 | GA announced Apr 30 2025 (aws.amazon.com what's-new 2025/04 "Amazon Nova Premier… generally available"); card shows "Oct 31 2025" which conflicts with AWS's own GA announcement and the lifecycle history — treated as a card-metadata anomaly | **keep 2025-04-30** | +| nova-pro | input/output | 0.8 / 3.2 | 0.80 / 3.20 | OK (question b resolved) | +| | cachedInput | — | 0.20 | **ADD** | +| | maxOutputTokens | — | 5K (card) | **ADD 5120** (Nova "5K" cap; trackers/openrouter report 5,120) | +| | ctx | 300000 | 300K | OK; releaseDate repo 2024-12-03 (re:Invent announce) vs card Dec 05 2024 — keep repo, documented | +| nova-lite | input/output | 0.06 / 0.24 | 0.06 / 0.24 | OK | +| | cachedInput | — | 0.015 | **ADD** | +| | maxOutputTokens | — | 5K | **ADD 5120** | +| nova-micro | input/output | 0.035 / 0.14 | 0.035 / 0.14 | OK | +| | cachedInput | — | 0.00875 | **ADD** | +| | maxOutputTokens | — | 5K | **ADD 5120** | +| | speedOptimized | — | card: "Amazon's fastest text-only model, optimized for speed and low cost" | **ADD `speedOptimized: true`** | + +### Meta (Pricing API; all cards report max output 4K for 3.x, 8K for Llama 4) + +| model | field | repo | verified | verdict | +|---|---|---|---|---| +| llama4-maverick | input/output | 0.24 / 0.97 | 0.24 / 0.97 | OK | +| | maxOutputTokens | — | 8K (card) | **ADD 8192** | +| | ctx / date / lifecycle | 1M / 2025-04-05 / active | 1M / Apr 05 2025 / Active | OK | +| llama4-scout | input/output | 0.18 / 0.72 | **0.17 / 0.66** (`USE1-Llama4-Scout-17B-*` 0.00017 / 0.00066) | **FIX** | +| | maxOutputTokens | — | 8K | **ADD 8192** | +| | ctx | 10000000 | 10M (card) | OK (PR #4990 fix confirmed) | +| llama3-3-70b | input/output | 0.72 / 0.72 | 0.72 / 0.72 | OK | +| | lifecycle | active | **Active** (card; absent from Legacy table) — question (g) | OK | +| | maxOutputTokens | — | 4K | **ADD 4096** | +| llama3-2-90b | input/output | 2.0 / 2.0 | **0.72 / 0.72** (`USE1-Llama3-2-90B-*`) | **FIX** (deprecated but still billable until EOL 2026-07-07) | +| | deprecated | true | Legacy, EOL Jul 7 2026 | OK | +| llama3-2-11b | input/output | 0.16 / 0.16 | 0.16 / 0.16; Legacy EOL 2026-07-07 | OK | +| llama3-2-3b | input/output | 0.15 / 0.15 | 0.15 / 0.15; Legacy | OK | +| llama3-2-1b | input/output | 0.10 / 0.10 | 0.10 / 0.10; Legacy | OK | +| llama3-1-405b | input/output | 5.32 / 16.0 | **2.40 / 2.40** (`USW2-Llama3-1-405B-*` 0.0024; us-east-1 has only batch SKUs at 1.20) | **FIX** (deprecated, Legacy EOL 2026-07-07, but price was ~5× off) | +| llama3-1-70b | input/output | 2.65 / 3.5 | **0.72 / 0.72** (`USE1-Llama3-1-70B-*`; the 2.65 figure resembles no AWS SKU — latency-optimized variant is a separate SKU) | **FIX** | +| | lifecycle | active | **Active** (card) — question (g) | OK | +| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** | +| llama3-1-8b | input/output | 0.3 / 0.6 | **0.22 / 0.22** (`USE1-Llama3-1-8B-*`) | **FIX** | +| | lifecycle | active | **Active** (card) | OK | +| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** | + +### Mistral AI (Pricing API + cards) + +| model | field | repo | verified | verdict | +|---|---|---|---|---| +| mistral-large-3-675b | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-Mistral-Large-3-675b-Instruct-*`) | OK (PR #4990 confirmed) | +| | ctx / maxOutput | 256000 / 32768 | 256K / 32K (card) | OK | +| | releaseDate | — | Dec 2 2025 (card) | **ADD 2025-12-02** | +| | caching | — | card: prompt caching **Yes** (bedrock-runtime), but no cache-read SKU in Pricing API → rate unpublishable | no `cachedInput` (documented) | +| mistral-large-2411 | input/output | 2.0 / 6.0 | **UNVERIFIABLE — model appears not to exist on Bedrock**: no model card (Mistral card index has only "Mistral Large" = 2402 and "Mistral Large 3"), no Pricing API SKU in us-east-1 or us-west-2, not in lifecycle table | keep price; entry is already `deprecated: true` (hidden); recommend follow-up removal | +| mistral-large-2407 | input/output | 4.0 / 12.0 | **2.00 / 6.00** (`USW2-MistralLarge2407-*` 0.002/0.006; us-west-2 only). The 4/12 figure belongs to *Mistral Large 2402* (`USE1-MistralLarge-*` = 0.004/0.012) — repo had the two swapped | **FIX** (deprecated but billable) | +| pixtral-large-2502 | input/output | 2.0 / 6.0 | 2.00 / 6.00 (`USE1-PixtralLarge2502-*`) | OK (question b resolved) | +| | ctx / maxOutput / lifecycle | 128000 / 16384 / active | 128K / 16K / Active | OK | +| magistral-small-2509 | input/output | 0.5 / 1.5 | 0.50 / 1.50 | OK | +| | ctx / maxOutput / lifecycle | 128000 / 40000 / active | 128K / 40K / Active (card launch "Sep 2025", no day — no releaseDate added) | OK | +| ministral-3-14b | input/output | 0.2 / 0.2 | 0.20 / 0.20 (`USE1-Ministral-3-14b-Instruct-*`) | OK | +| | maxOutput / releaseDate | 8192 / — | 8K / Dec 2 2025 | **ADD 2025-12-02** | +| | caching | — | card shows no prompt-caching row → unconfirmed | no `cachedInput` | +| ministral-3-8b | input/output | 0.1 / 0.1 | **0.15 / 0.15** (`USE1-Ministral-3-8b-Instruct-*` 0.00015) | **FIX**; **ADD releaseDate 2025-12-02** | +| ministral-3-3b | input/output | 0.04 / 0.04 | **0.10 / 0.10** (`USE1-Ministral-3-3b-Instruct-*` 0.0001) | **FIX**; **ADD releaseDate 2025-12-02** (card "Ministral 3B" confirms ID `mistral.ministral-3-3b-instruct`, 128K ctx, 8K out, Active) | +| mixtral-8x7b | input/output | 0.45 / 0.7 | 0.45 / 0.70 (`USE1-Mixtral8x7B-*`) | OK (question b resolved) | +| | ctx / lifecycle | 32000 / active | 32K / Active | OK | +| | maxOutputTokens | — | 4K (card) | **ADD 4096** | + +### Amazon Titan / Cohere + +| model | field | repo | verified | verdict | +|---|---|---|---|---| +| titan-text-premier | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-TitanText-Premier-*`, attribute `titanModel: "Titan Text G1 Premier"`) | OK | +| | deprecated | true | model card **removed** from the model-cards index (only Titan embeddings/image cards remain); absent from the Legacy table (which excludes models already past EOL) | OK — keep deprecated | +| cohere command-r | input/output | 0.5 / 1.5 | not in Pricing API (marketplace-billed); matches long-standing AWS list price | UNVERIFIABLE via Pricing API — keep | +| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 (lifecycle + card) | OK | +| cohere command-r-plus | input/output | 3.0 / 15.0 | not in Pricing API; matches long-standing AWS list price | UNVERIFIABLE — keep | +| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 | OK | + +--- + +## Changes made in this pass (fix list for models.ts — to be applied by the follow-up code change) + +Pricing (all `updatedAt` → `2026-06-11`): + +1. `bedrock/amazon.nova-2-pro-v1:0`: input 1.0 → 1.375, output 4.0 → 11.0 (Pricing API `USE1-Nova2.0Pro-text-*`) +2. `bedrock/amazon.nova-2-lite-v1:0`: input 0.08 → 0.33, output 0.32 → 2.75 (Pricing API `USE1-Nova2.0Lite-*`) +3. `bedrock/meta.llama4-scout-17b-instruct-v1:0`: input 0.18 → 0.17, output 0.72 → 0.66 +4. `bedrock/meta.llama3-2-90b-instruct-v1:0`: 2.0/2.0 → 0.72/0.72 +5. `bedrock/meta.llama3-1-405b-instruct-v1:0`: 5.32/16.0 → 2.40/2.40 (USW2 on-demand) +6. `bedrock/meta.llama3-1-70b-instruct-v1:0`: 2.65/3.5 → 0.72/0.72 +7. `bedrock/meta.llama3-1-8b-instruct-v1:0`: 0.3/0.6 → 0.22/0.22 +8. `bedrock/mistral.mistral-large-2407-v1:0`: 4.0/12.0 → 2.0/6.0 (USW2 `MistralLarge2407`) +9. `bedrock/mistral.ministral-3-8b-instruct`: 0.1/0.1 → 0.15/0.15 +10. `bedrock/mistral.ministral-3-3b-instruct`: 0.04/0.04 → 0.10/0.10 + +cachedInput additions (cache-read rate): + +11. claude-opus-4-5: 0.5; claude-sonnet-4-5: 0.3; claude-haiku-4-5: 0.1; claude-opus-4-1: 1.5 (Anthropic pricing 0.1× input; Bedrock cards confirm caching) +12. nova-2-lite: 0.0825; nova-premier: 0.625; nova-pro: 0.2; nova-lite: 0.015; nova-micro: 0.00875 (Pricing API cache-read SKUs; Nova cache writes are $0) + +maxOutputTokens: + +13. claude-opus-4-1: 32768 → 32000 (Anthropic overview "32k"; Bedrock card "32K") +14. nova-2-lite: add 64000; nova-pro/lite/micro: add 5120 each +15. llama4-maverick/scout: add 8192 each; llama3-3-70b, llama3-1-70b, llama3-1-8b: add 4096 each; mixtral-8x7b: add 4096 + +Flags / metadata: + +16. claude-sonnet-4-5: add `recommended: true` (bedrock default model; matches other providers' convention) +17. claude-haiku-4-5 and nova-micro: add `speedOptimized: true` (Anthropic "fastest model"; card "Amazon's fastest text-only model"). Ruled **against** `speedOptimized` on nova-2-lite — its card positions it as cost-efficient multimodal, not the speed tier. +18. releaseDate additions: mistral-large-3 `2025-12-02`; ministral-3-14b/8b/3b `2025-12-02`; llama3-1-70b/8b `2024-07-23` + +## Deliberately not changed + +- **Claude 4.5-gen geo premium (q. d):** kept base list prices; Sim's geo-profile routing actually bills 1.1× for opus/sonnet/haiku 4.5 per Anthropic's pricing page. Documented here rather than baked in (no AWS SKU to anchor; would overstate global-endpoint cost; consistent with list-price convention). +- **Release-date nits (q. h):** sonnet-4-5 `2025-09-29` and haiku-4-5 `2025-10-15` kept (Anthropic launch dates; Bedrock cards say +1 day). nova pro/lite/micro `2024-12-03` kept (re:Invent announcement; cards say Dec 05). nova-premier `2025-04-30` kept — AWS what's-new confirms GA Apr 30 2025; the card's "Oct 31 2025" contradicts AWS's own announcement. +- **Deprecated models' maxOutputTokens** (nova-premier 25K, llama3-2 4K, command-r/r+ 4K, mistral-large-2407 4K): per instruction, not added. +- **All deprecated flags from PR #4990 re-verified correct:** nova-premier, llama3-2 ×4, llama3-1-405b, command-r/r+ (Legacy with EOL dates on the lifecycle page), titan-text-premier (card removed from catalog), mistral-large-2411/2407 (absent from catalog). llama3-1-70b/8b and llama3-3-70b confirmed **Active** — correctly not deprecated. +- **mistral-large-3 / magistral / ministral-14b `cachedInput`:** Large 3 card says caching is supported but no cache-read SKU exists in the Pricing API; ministral-14b card shows no caching row. No invented numbers. +- **`bedrock/amazon.nova-2-pro-v1:0` and `bedrock/mistral.mistral-large-2411-v1:0` entries kept** (both `deprecated: true`, hidden): the former's real Bedrock ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview), the latter appears to have never shipped on Bedrock. Recommend a follow-up PR to remove/rename — out of scope for a validation pass. + +## Unverifiable + +- **cohere.command-r-v1:0 / command-r-plus-v1:0 prices** (0.5/1.5, 3/15): absent from the Pricing API (marketplace-billed); match the long-standing published AWS rates; models are Legacy. Kept as-is. +- **mistral-large-2411 price** (2/6): no SKU, no card; phantom entry (see above). +- **nova-2-pro geo-profile support**: no card; preview ID served via profiles per third-party trackers only. +- **Mistral Large 3 cache-read rate**: caching supported per card; rate unpublished. diff --git a/docs/models/deepseek-cerebras.md b/docs/models/deepseek-cerebras.md new file mode 100644 index 0000000000..33f9927459 --- /dev/null +++ b/docs/models/deepseek-cerebras.md @@ -0,0 +1,189 @@ +# Model Validation: `deepseek` & `cerebras` — apps/sim/providers/models.ts + +- **Date:** 2026-06-11 +- **Scope:** Final exhaustive re-validation after PR #4990 (deepseek-chat/reasoner repricing + 1M ctx, deprecation flags on deepseek-v3/r1 and cerebras llama3.1-8b/qwen-3-235b) +- **Method:** Live WebFetch of provider docs (primary), OpenRouter/ArtificialAnalysis/aggregators (secondary), DeepSeek news archive for release dates, `rg` of provider code to confirm capability consumption. Provider docs win on conflicts. + +## Sources + +| Source | URL | +|---|---| +| DeepSeek pricing (primary) | https://api-docs.deepseek.com/quick_start/pricing | +| DeepSeek list-models (primary) | https://api-docs.deepseek.com/api/list-models | +| DeepSeek chat-completion API ref (primary) | https://api-docs.deepseek.com/api/create-chat-completion | +| DeepSeek reasoning guide (primary) | https://api-docs.deepseek.com/guides/reasoning_model | +| DeepSeek V3 announcement | https://api-docs.deepseek.com/news/news1226 | +| DeepSeek R1 announcement | https://api-docs.deepseek.com/news/news250120 | +| DeepSeek V4 preview announcement | https://api-docs.deepseek.com/news/news260424 | +| Cerebras models overview (primary) | https://inference-docs.cerebras.ai/models/overview | +| Cerebras gpt-oss model page (primary) | https://inference-docs.cerebras.ai/models/openai-oss | +| Cerebras zai-glm-4.7 model page (primary) | https://inference-docs.cerebras.ai/models/zai-glm-47 | +| Cerebras deprecations (primary) | https://inference-docs.cerebras.ai/support/deprecation | +| Cerebras chat-completions API ref (primary) | https://inference-docs.cerebras.ai/api-reference/chat-completions | +| OpenRouter deepseek-v4-flash (secondary) | https://openrouter.ai/deepseek/deepseek-v4-flash | +| OpenRouter GLM 4.7 (secondary) | https://openrouter.ai/z-ai/glm-4.7 | +| ArtificialAnalysis gpt-oss-120b providers (secondary) | https://artificialanalysis.ai/models/gpt-oss-120b/providers | +| aimodelapis Cerebras GLM-4.7 (secondary) | https://aimodelapis.com/providers/cerebras/cerebras-zai-glm-4-7 | +| Cerebras GLM-4.7 launch blog (secondary) | https://www.cerebras.ai/blog/glm-4-7 | + +## Code-consumption checks + +- `rg "temperature" apps/sim/providers/deepseek/ apps/sim/providers/cerebras/`: + - `deepseek/index.ts:89` — `if (request.temperature !== undefined) payload.temperature = request.temperature` + - `cerebras/index.ts:85` — `if (request.temperature !== undefined) payload.temperature = request.temperature` + - Both providers forward temperature when set; a `temperature` capability in models.ts is what surfaces the slider (`getMaxTempFromDefinitions` in `providers/utils.ts`). With `capabilities: {}` the slider is hidden even though the API accepts the param. +- No `reasoningEffort`, `verbosity`, `thinking`, `nativeStructuredOutputs`, or `computerUse` handling exists in either provider implementation — do **not** add those capabilities even though Cerebras documents `reasoning_effort` (not consumed by code). +- `maxOutputTokens` is a supported capability field (`models.ts:42`) consumed by `providers/index.ts` — safe to recommend. + +--- + +## DeepSeek + +### Alias status (Open Question a) + +**Confirmed.** DeepSeek pricing page: "The model names `deepseek-chat` and `deepseek-reasoner` will be deprecated on **2026/07/24 15:59 UTC**." They correspond to the **non-thinking** and **thinking** modes of `deepseek-v4-flash` respectively. The list-models API now returns only `deepseek-v4-flash` and `deepseek-v4-pro`. Until 2026-07-24 the aliases remain valid API ids, so keeping them non-deprecated in models.ts is correct **for now** — they must be flipped to `deprecated: true` (or removed) by 2026-07-24. + +**Recommendation (separate work, not part of this pass):** add `deepseek-v4-flash` (input $0.14 / cached $0.0028 / output $0.28, ctx 1M, max output 384K, released 2026-04-24) and `deepseek-v4-pro` (input $0.435 / cached $0.003625 / output $0.87, ctx 1M, max output 384K) as first-class entries before the 2026-07-24 alias retirement, then deprecate the aliases. + +### deepseek-chat + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `deepseek-chat` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash non-thinking) | pricing page | OK | +| pricing.input | 0.14 | $0.14/M (cache miss) | pricing page | OK | +| pricing.cachedInput | 0.0028 | $0.0028/M (cache hit) | pricing page | OK | +| pricing.output | 0.28 | $0.28/M | pricing page | OK | +| pricing.updatedAt | 2026-06-11 | — | — | OK | +| contextWindow | 1000000 | 1M tokens | pricing page | OK | +| capabilities.temperature | *(absent)* | Supported, range 0–2, default 1 ("What sampling temperature to use, between 0 and 2…") — applies to non-thinking mode | create-chat-completion API ref | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `deepseek/index.ts:89` consumes it) | +| capabilities.maxOutputTokens | *(unset)* | Conflict: pricing page says 384K max output for v4-flash; reasoning guide (thinking mode) says default 32K / max 64K | pricing page vs reasoning guide | Leave unset — see "Deliberately not changed" | +| releaseDate | 2024-12-26 | V3 announcement 2024-12-26 (date the alias pointed to V3); alias now points to v4-flash (released 2026-04-24) | news1226, news260424 | OK (alias semantics — keep original anchor) | +| deprecated | *(absent)* | Alias still live | pricing page | OK until 2026-07-24 | + +### deepseek-v3 + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `deepseek-v3` | **Not** a valid API id (list-models returns only v4-flash/v4-pro; never a documented API id — API ids were deepseek-chat/reasoner) | list-models | OK as `deprecated: true` | +| deprecated | true | Correct | list-models | OK | +| pricing | 0.28 / 0.028 / 0.42 (updatedAt 2026-04-01) | Historical V3.x pricing; model unpurchasable, frozen values acceptable | — | OK (legacy) | +| contextWindow | 128000 | Historical 128K | — | OK (legacy) | +| releaseDate | 2024-12-26 | DeepSeek-V3 announced 2024-12-26 | news1226 | **Verified** | + +### deepseek-r1 + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `deepseek-r1` | **Not** a valid API id (R1 was accessed as `deepseek-reasoner`) | list-models, news250120 | OK as `deprecated: true` | +| deprecated | true | Correct | list-models | OK | +| pricing | 0.55 / 0.14 / 2.19 | Matches original R1 launch pricing ($0.14 hit / $0.55 miss / $2.19 out) | news250120 | **Verified** (legacy, frozen) | +| contextWindow | 128000 | Historical | — | OK (legacy) | +| releaseDate | 2025-01-20 | R1 announced 2025-01-20 | news250120 | **Verified** | + +### deepseek-reasoner + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `deepseek-reasoner` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash thinking) | pricing page | OK | +| pricing.input / cachedInput / output | 0.14 / 0.0028 / 0.28 | $0.14 / $0.0028 / $0.28 (same v4-flash pricing, both modes) | pricing page | OK | +| pricing.updatedAt | 2026-06-11 | — | — | OK | +| contextWindow | 1000000 | 1M | pricing page | OK | +| capabilities | `{}` (no temperature) | Reasoning guide: `temperature`, `top_p`, `presence_penalty`, `frequency_penalty`, `logprobs`, `top_logprobs` **not supported** — "will not trigger an error but will also have no effect" | reasoning guide | OK — must NOT add temperature | +| capabilities.maxOutputTokens | *(unset)* | Conflict (384K vs 32K/64K) | see below | Leave unset | +| releaseDate | 2025-01-20 | `model=deepseek-reasoner` introduced with R1 release 2025-01-20 | news250120 ("Use DeepSeek-R1 by setting model=deepseek-reasoner") | **Verified** | + +### maxOutputTokens conflict (Open Question a) + +- Pricing page (current, v4-flash): **384K max output**. +- Reasoning guide (deepseek-reasoner page): **default 32K, max 64K** — appears not yet updated for V4 (still reflects R1-era limits). +- The aliases map to v4-flash modes, so 384K is *probably* correct, but DeepSeek's own docs disagree with each other and the reasoning guide is the page specific to `deepseek-reasoner`. **Resolution: leave `maxOutputTokens` unset on both aliases** (current state) and set 384000 on the future `deepseek-v4-flash`/`deepseek-v4-pro` entries, where the pricing page is unambiguous. + +### Secondary-source pricing (DeepSeek) + +OpenRouter lists deepseek-v4-flash at **$0.098 in / $0.196 out** — exactly 70% of official $0.14/$0.28, i.e. the OpenRouter **−30% promo is still present**. Per policy, provider docs win: $0.14 / $0.0028 / $0.28 stands. OpenRouter confirms 1M context and the 2026-04-24 release date. + +--- + +## Cerebras + +### Deprecations (confirmed) + +Cerebras deprecation page lists **llama3.1-8b** and **qwen-3-235b-a22b-instruct-2507** as deprecated **2026-05-27**, recommended replacement "GPT OSS 120B". Neither appears on the models overview anymore. `deprecated: true` on both entries (PR #4990) is correct. + +### cerebras/gpt-oss-120b + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `gpt-oss-120b` (after `cerebras/` strip at `cerebras/index.ts:82`) | Production model | models overview, model page | OK | +| pricing.input | 0.35 | $0.35/M | model page (live 2026-06-11) | OK | +| pricing.output | 0.75 | $0.75/M | model page | OK | +| pricing.updatedAt | 2026-06-11 | — | — | OK | +| contextWindow | 131072 | 131k (paid tiers; free tier 65k) | model page | OK (paid tier, consistent with repo convention) | +| capabilities.maxOutputTokens | *(unset)* | 40k paid tiers (32k free) | model page | **FIX: add `maxOutputTokens: 40000`** (paid tier, matching paid-tier ctx) | +| capabilities.temperature | *(absent)* | Cerebras chat-completions API: "sampling temperature to use, between 0 and 2.0" | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `cerebras/index.ts:85` consumes it) | +| releaseDate | 2025-08-05 | gpt-oss released 2025-08-05; Cerebras day-one launch | cerebras.ai blog "OpenAI GPT OSS 120B Runs Fastest on Cerebras", techintelpro | **Verified** | + +Secondary-source note: several aggregators (crackedaiengineering, ArtificialAnalysis blended $0.39) still show launch-era pricing **$0.25/$0.69** and 33K max output. The live Cerebras model page (fetched today) says $0.35/$0.75 and 40k paid-tier max output — provider docs win; aggregators are stale. + +### cerebras/llama3.1-8b + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** | +| pricing | 0.10 / 0.10 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) | +| contextWindow | 32768 | Historical | — | OK (legacy) | +| releaseDate | 2024-08-27 | Consistent with Cerebras Inference launch (2024-08-27); not re-verified against a live page this pass | — | Plausible / not re-verified (deprecated model, low stakes) | + +### cerebras/qwen-3-235b-a22b-instruct-2507 + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** | +| pricing | 0.6 / 1.2 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) | +| contextWindow | 131072 | Historical | — | OK (legacy) | +| releaseDate | 2025-07-29 | Could not verify the exact Cerebras availability date | — | **Unverifiable** (deprecated model; leave as-is) | + +### cerebras/zai-glm-4.7 + +| Field | Current value | Verified value | Source | Verdict | +|---|---|---|---|---| +| id valid | `zai-glm-4.7` | Preview model on overview | models overview, model page | OK | +| pricing.input | 2.25 | $2.25/M | model page; confirmed by aimodelapis (secondary) | OK | +| pricing.output | 2.75 | $2.75/M | model page; aimodelapis | OK | +| pricing.updatedAt | 2026-06-11 | — | — | OK | +| contextWindow | 131072 | 131k paid tiers (free 64k) | model page; aimodelapis (131,000) | OK | +| capabilities.maxOutputTokens | *(unset)* | 40k tokens (both tiers) | model page; aimodelapis (40,000) | **FIX: add `maxOutputTokens: 40000`** | +| capabilities.temperature | *(absent)* | API-wide param, 0–2.0 | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** | +| releaseDate | 2025-12-22 | GLM-4.7 released 2025-12-22 (OpenRouter "Dec 22, 2025"; PR Newswire; Cerebras same-day launch blog) | multiple | **Verified** | + +--- + +## Changes made in this pass (PR #4990) — all re-verified correct + +1. `deepseek-chat` & `deepseek-reasoner` repriced to $0.14 / $0.0028 cached / $0.28 — matches v4-flash pricing they now alias. ✅ +2. `deepseek-chat` & `deepseek-reasoner` contextWindow → 1,000,000 — matches v4-flash 1M default. ✅ +3. `deprecated: true` on `deepseek-v3` and `deepseek-r1` — neither is a valid API id (list-models returns only v4-flash/v4-pro). ✅ +4. `deprecated: true` on `cerebras/llama3.1-8b` and `cerebras/qwen-3-235b-a22b-instruct-2507` — Cerebras deprecation page, 2026-05-27. ✅ +5. `pricing.updatedAt: 2026-06-11` bumps on the four live-model entries. ✅ + +## Outstanding fixes recommended (not applied — doc-only pass) + +1. `deepseek-chat`: add `capabilities.temperature: { min: 0, max: 2 }` — API ref documents temperature 0–2 (default 1) for chat completions; non-thinking mode honors it; `deepseek/index.ts:89` forwards it. Currently the empty `capabilities` hides Sim's temperature slider for a model that supports it. +2. `cerebras/gpt-oss-120b`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`. +3. `cerebras/zai-glm-4.7`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`. + +## Deliberately not changed + +- **`deepseek-reasoner` capabilities stay `{}`** — reasoning guide explicitly lists temperature as unsupported/no-effect in thinking mode. +- **`deepseek-chat`/`deepseek-reasoner` not marked deprecated** — valid aliases until 2026-07-24 15:59 UTC. Calendar item: deprecate (and add v4-flash/v4-pro entries) before that date. +- **`maxOutputTokens` left unset on both DeepSeek aliases** — DeepSeek docs self-conflict (pricing page: 384K for v4-flash; reasoning guide: 32K default / 64K max for deepseek-reasoner). Set 384000 only on future first-class `deepseek-v4-*` entries where the pricing page is unambiguous. +- **Legacy pricing/ctx on the four deprecated entries** (deepseek-v3, deepseek-r1, llama3.1-8b, qwen-3-235b) — frozen historical values on unpurchasable models; R1 values cross-checked against the original announcement. +- **No `reasoningEffort` capability for Cerebras** despite the model pages documenting `reasoning_effort` — `cerebras/index.ts` does not consume it (capability additions must be backed by docs AND code). +- **OpenRouter −30% DeepSeek promo pricing ($0.098/$0.196) ignored** — provider docs win. +- **deepseek-chat releaseDate kept at 2024-12-26** — anchor is the V3 announcement; the id predates V3 and now aliases v4-flash (2026-04-24); any value is a judgment call for an alias, so the existing anchor is retained. + +## Unverifiable + +- `cerebras/qwen-3-235b-a22b-instruct-2507` releaseDate 2025-07-29 — no live source found for the exact Cerebras availability date (model delisted). Left as-is. +- `cerebras/llama3.1-8b` releaseDate 2024-08-27 — consistent with the known Cerebras Inference launch date but not re-verified against a live page this pass (model delisted). +- Cerebras temperature **default** value — API ref documents the 0–2.0 range but not a default. diff --git a/docs/models/embeddings-rerank-dynamic.md b/docs/models/embeddings-rerank-dynamic.md new file mode 100644 index 0000000000..46ef6b6967 --- /dev/null +++ b/docs/models/embeddings-rerank-dynamic.md @@ -0,0 +1,75 @@ +# Validation: EMBEDDING_MODEL_PRICING, RERANK_MODEL_PRICING, and dynamic providers + +- **Date:** 2026-06-11 +- **File validated:** `apps/sim/providers/models.ts` (`EMBEDDING_MODEL_PRICING` ~L3289, `RERANK_MODEL_PRICING` ~L3320, dynamic provider definitions ~L87–191, L2503–2515, update functions ~L3190–3287) +- **Method:** Every numeric claim checked via live WebFetch against the provider's first-party docs, with at least one secondary tracker where available. WebSearch used as fallback when a page truncated. No edits were made to `models.ts`. +- **Primary sources:** + - OpenAI: `developers.openai.com/api/docs/models/text-embedding-3-small` / `.../text-embedding-3-large` / `.../text-embedding-ada-002` (the aggregate pricing page truncates before the embeddings table; per-model pages carry the prices) + - Google: `ai.google.dev/gemini-api/docs/pricing` + - Cohere: `cohere.com/pricing` (Model Vault only — per-search API pricing not rendered), `docs.cohere.com/docs/how-does-cohere-pricing-work` (confirms rerank is billed per search, no numbers), `docs.cohere.com/docs/rerank` (model list) + - Secondary trackers: Vercel AI Gateway (`vercel.com/ai-gateway/models/rerank-v4-pro`, `.../rerank-v4-fast`), eesel.ai Cohere pricing guide, metacto.com Cohere pricing deep dive, cloudprice.net, TokenMix/costgoat (OpenAI embeddings) + - Provider API docs: `docs.fireworks.ai/api-reference/post-chatcompletions`, `docs.together.ai/reference/chat-completions`, `openrouter.ai/docs` parameters reference, `docs.ollama.com/api/openai-compatibility`, `docs.baseten.co/development/model-apis/overview` + +## EMBEDDING_MODEL_PRICING + +| Entry | Field | Value in code | Verified value | Source | Verdict | +|---|---|---|---|---|---| +| `text-embedding-3-small` | input | $0.02 / 1M | $0.02 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT | +| `text-embedding-3-small` | output | $0.00 | n/a (embeddings bill input only) | OpenAI docs | CORRECT | +| `text-embedding-3-large` | input | $0.13 / 1M | $0.13 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT | +| `text-embedding-3-large` | output | $0.00 | n/a | OpenAI docs | CORRECT | +| `text-embedding-ada-002` | input | $0.10 / 1M | $0.10 / 1M | developers.openai.com model page; search secondary | CORRECT | +| `text-embedding-ada-002` | output | $0.00 | n/a | OpenAI docs | CORRECT | +| `gemini-embedding-001` | input | $0.15 / 1M | $0.15 / 1M (paid tier, standard; batch is $0.075) | ai.google.dev/gemini-api/docs/pricing | CORRECT | +| `gemini-embedding-001` | output | $0.00 | n/a | Google docs | CORRECT | + +## RERANK_MODEL_PRICING (per search unit = 1 query × ≤100 docs) + +| Entry | Value in code | Verified value | Source | Verdict | +|---|---|---|---|---| +| `rerank-v4.0-pro` | $0.0025 / search | $2.50 / 1k searches ($0.0025) | Vercel AI Gateway rerank-v4-pro page ("$2.5/K, billed per search query"); eesel.ai ("$0.0025 / search") | CORRECT | +| `rerank-v4.0-fast` | $0.002 / search | $2.00 / 1k searches ($0.002) | Vercel AI Gateway rerank-v4-fast page ("$2/K"); eesel.ai ("$0.002 / search") | CORRECT | +| `rerank-v3.5` | $0.002 / search | $2.00 / 1k searches ($0.002) Cohere direct & Bedrock | metacto ("$2.00 per 1,000 searches"); cloudprice.net ($0.0020/unit, Cohere + Bedrock rows agree) | CORRECT | + +Notes: + +- `cohere.com/pricing` currently only renders Model Vault (dedicated instance) hourly pricing; the per-search API table is JS-rendered and not fetchable. `docs.cohere.com/docs/how-does-cohere-pricing-work` confirms rerank is "priced based on the quantity of searches" (per-search, not per-token), which validates the `perSearchUnit` modeling and the ≤100-doc cap comment in the code. +- Conflicting source resolved: OpenRouter lists `cohere/rerank-v3.5` at $0.001/search, but that is OpenRouter's reseller price, not Cohere first-party. Sim calls Cohere directly, so $0.002 stands. +- Cohere also offers `rerank-english-v3.0` and `rerank-multilingual-v3.0`; Sim does not expose them, so no entries are needed. + +## Dynamic providers (provider-level config sanity pass) + +All eight have empty static `models: []` populated at runtime via `update*Models()` (pricing zeroed, `updatedAt` set to today — intentional for BYOK/reseller providers). `modelPatterns` prefixes match each provider's `update*` function and prefix-stripping in the provider implementations. + +| Provider | Config checked | Verdict | +|---|---|---| +| `fireworks` | temp 0–2, toolUsageControl true, pattern `/^fireworks\//` | CORRECT — Fireworks docs: temperature "between 0 and 2", full `tool_choice` support (`none`/`auto`/`required`/named) | +| `together` | temp 0–2, toolUsageControl true, pattern `/^together\//` | **DISCREPANCY** — Together's own API reference documents temperature as "a decimal number from 0-1"; `tool_choice` supported. Sim declares max 2. Flagged below; not changed in this pass | +| `baseten` | temp 0–2, toolUsageControl true, pattern `/^baseten\//` | SANE — Model APIs are OpenAI-compatible (docs.baseten.co); exact temp bounds not published, 0–2 follows the OpenAI convention | +| `openrouter` | temp 0–2, toolUsageControl true, pattern `/^openrouter\//` | CORRECT — OpenRouter docs: temperature 0.0–2.0, default 1.0 | +| `ollama-cloud` | temp 0–2, toolUsageControl **true**, pattern `/^ollama-cloud\//` | **QUESTIONABLE** — Ollama's OpenAI-compat layer (same API at `ollama.com/v1`) explicitly lists `tool_choice` as unsupported, and Sim's own shared core (`apps/sim/providers/ollama/core.ts:140-147`) degrades forced tool selection to `auto` with a warning. Local `ollama` correctly sets `toolUsageControl: false`; `ollama-cloud: true` is inconsistent. Flagged below; not changed in this pass | +| `vllm` | temp 0–2, toolUsageControl true, `defaultModel: 'vllm/generic'`, pattern `/^vllm\//` | SANE — vLLM's OpenAI-compatible server accepts temperature ≥0 (no hard cap of 2); 0–2 is a reasonable UI cap. `vllm/generic` matches the pattern and is the documented placeholder (only other reference is the vllm provider test) | +| `litellm` | temp 0–2, toolUsageControl true, pattern `/^litellm\//` | SANE — proxy passthrough; effective bounds depend on the upstream model, 0–2 is the OpenAI-convention cap | +| `ollama` (local) | toolUsageControl false ("does not support tool_choice"), no temp block, `modelPatterns: []` | CORRECT — docs.ollama.com OpenAI-compatibility page lists `tool_choice` as unsupported (temperature is supported); empty patterns are intentional since local model names are arbitrary and matched via the providers store | + +## `gemini` vs `google` provider key + +- `PROVIDER_DEFINITIONS` contains only `google` (L1303, `defaultModel: 'gemini-2.5-pro'`, patterns `/^gemini/`, `/^deep-research/`). There is no `gemini` registry key, and nothing calls `getProviderModels('gemini')` — all callers use `'google'` (models.ts L3163, `apps/sim/providers/google/index.ts:21`). +- `apps/sim/providers/gemini/` exists but is **not a provider**: it holds only `core.ts`/`types.ts` (shared Gemini execution logic consumed by both the `google` and `vertex` providers). No `index.ts`, not registered in `registry.ts`. +- The only `'gemini'` string key is the rotating-API-key namespace: `apps/sim/providers/utils.ts:891` maps provider `google` → `getRotatingApiKey('gemini')`, matching the `GEMINI_API_KEY_*` env convention in `apps/sim/lib/core/config/api-keys.ts`. Intentional; nothing structurally odd. + +## Changes made in this pass + +None. All `EMBEDDING_MODEL_PRICING` and `RERANK_MODEL_PRICING` values verified correct; instructions prohibited edits to `models.ts`. + +## Deliberately not changed + +- **`together` temperature max 2 vs documented 0–1:** Together's API reference documents 0–1, but the endpoint is OpenAI-compatible and tolerantly accepts higher values in practice; tightening to `max: 1` would change UI slider behavior for existing workflows. Left for a deliberate follow-up decision. +- **`ollama-cloud` `toolUsageControl: true`:** inconsistent with local `ollama: false` and with Ollama's documented lack of `tool_choice`. Runtime is already safe (shared core degrades forced selection to `auto` with a warning), so this only mis-advertises a capability in the UI. Left for follow-up. +- Dynamic-model zero pricing (`input: 0, output: 0`) in all `update*Models()` functions — intentional for BYOK/reseller providers where Sim doesn't bill model usage. + +## Unverifiable + +- **Cohere first-party per-search price page:** `cohere.com/pricing`'s API pricing table does not render server-side; per-search numbers were confirmed via two independent secondary trackers per model plus Cohere docs confirming the per-search billing unit. +- **Baseten and LiteLLM exact temperature bounds:** neither publishes a numeric range (OpenAI-compatible passthrough); 0–2 judged sane by convention rather than verified. +- **vLLM upper temperature bound:** vLLM accepts temperatures above 2; the 0–2 cap is a UI choice, not a provider-documented limit. diff --git a/docs/models/google.md b/docs/models/google.md new file mode 100644 index 0000000000..215ea82402 --- /dev/null +++ b/docs/models/google.md @@ -0,0 +1,184 @@ +# Google Provider Model Validation — Final Pass + +- **Date:** 2026-06-11 +- **Scope:** `google` block in `apps/sim/providers/models.ts` (10 models), re-verifying everything including changes landed in PR #4990 +- **Method:** Live WebFetch of ai.google.dev (models overview, per-model pages, pricing, thinking, deprecations, changelog, generate-content API reference) and cloud.google.com Vertex AI pricing; OpenRouter as secondary pricing source; WebSearch for GA dates. Google docs treated as authoritative where sources conflict. +- **Primary sources:** + - https://ai.google.dev/gemini-api/docs/models (+ per-model pages) + - https://ai.google.dev/gemini-api/docs/pricing + - https://ai.google.dev/gemini-api/docs/thinking + - https://ai.google.dev/gemini-api/docs/deprecations + - https://ai.google.dev/gemini-api/docs/changelog + - https://ai.google.dev/gemini-api/docs/interactions/deep-research + - https://ai.google.dev/api/generate-content (GenerationConfig) + - https://cloud.google.com/vertex-ai/generative-ai/pricing ("Gemini Deep Research Agent" row) + - OpenRouter model pages (secondary pricing) + +## Provider-level checks + +| Check | Result | +|---|---| +| Capability consumption in `apps/sim/providers/gemini/` | Only `thinking` is consumed: `request.thinkingLevel` → `mapToThinkingLevel` → `thinkingConfig` (`gemini/core.ts:955-961`). No references to `reasoningEffort`, `verbosity`, `nativeStructuredOutputs`, or `computerUse`. Declaring `thinking.levels`/`default` per model is the only capability surface that affects requests. | +| `temperature: { min: 0, max: 2 }` | **Verified.** GenerationConfig documents temperature range [0.0, 2.0] (https://ai.google.dev/api/generate-content). Note Google recommends keeping 1.0 default on Gemini 3 models, but 0–2 is the accepted API range. Verdict: correct on all entries. | +| 2.5-series entries have no `thinking` capability | **Correct by design.** Gemini 2.5 uses `thinkingBudget`, not `thinkingLevel` (https://ai.google.dev/gemini-api/docs/thinking). Our provider only sends `thinkingConfig` when a level is selected, so omitting `thinking` on 2.5 entries is right. | + +## Per-model verification + +### gemini-3.5-flash + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| id | `gemini-3.5-flash` (stable/GA) | docs/models, model page | OK | +| pricing.input | 1.5 | docs/pricing ($1.50); Vertex ($1.50 global); OpenRouter ($1.50) | OK | +| pricing.cachedInput | 0.15 | docs/pricing ($0.15); Vertex ($0.15) | OK | +| pricing.output | 9.0 | docs/pricing ($9.00); Vertex ($9.00); OpenRouter ($9.00) | OK | +| thinking.levels | minimal/low/medium/high | docs/thinking | OK | +| thinking.default | medium | docs/thinking ("Default: medium"); OpenRouter ("defaults to medium thinking effort") | OK | +| maxOutputTokens | 65536 | model page (65,536) | OK | +| contextWindow | 1048576 | model page (1,048,576) | OK | +| releaseDate | 2026-05-19 | changelog: "May 19, 2026 — Released `gemini-3.5-flash`, the generally available (GA) version" | OK | +| recommended | true | Google's flagship recommendation; replacement target for 2.0-flash and 3-flash-preview | OK | + +### gemini-3.1-pro-preview + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| id | `gemini-3.1-pro-preview` | docs/models, model page | OK | +| pricing.input | 2.0 | docs/pricing ($2.00 ≤200k; $4.00 >200k); OpenRouter ($2) | OK (base tier; see "Deliberately not changed") | +| pricing.cachedInput | 0.2 | docs/pricing ($0.20 ≤200k) | OK | +| pricing.output | 12.0 | docs/pricing ($12.00 ≤200k; $18.00 >200k); OpenRouter ($12) | OK | +| thinking.levels | low/medium/high (no minimal — PR #4990 change) | docs/thinking: "Supported levels: low, medium, high"; "Thinking cannot be disabled" | OK — #4990 change re-confirmed | +| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK | +| maxOutputTokens | 65536 | model page | OK | +| contextWindow | 1048576 | model page (1,048,576) | OK | +| releaseDate | 2026-02-19 | changelog: "Feb 19, 2026 — Released Gemini 3.1 Pro Preview" | OK | + +### gemini-3.1-flash-lite + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| id | `gemini-3.1-flash-lite` (stable — PR #4990 rename) | docs/models lists stable; `gemini-3.1-flash-lite-preview` marked "Shut down" (May 25, 2026 per deprecations) | OK — rename re-confirmed | +| pricing.input | 0.25 | docs/pricing ($0.25 text); Vertex ($0.25 global); OpenRouter ($0.25) | OK | +| pricing.cachedInput | 0.025 | docs/pricing ($0.025); Vertex ($0.025) | OK | +| pricing.output | 1.5 | docs/pricing ($1.50); Vertex ($1.50); OpenRouter ($1.50) | OK | +| thinking.levels | minimal/low/medium/high | docs/thinking; OpenRouter ("full thinking levels (minimal, low, medium, high)") | OK | +| thinking.default | minimal | docs/thinking: "Default: minimal" — Google's documented API default for this model **is** `minimal`, so our value matches the API default (the earlier report that the API default is 'high' is not supported by current docs). Also aligns with our cost-saving intent. | OK | +| maxOutputTokens | 65536 | model page (65,536) | OK | +| contextWindow | 1048576 | model page (1,048,576) | OK | +| releaseDate | **2026-03-03 — STALE.** That is the preview's release date. GA changelog: "May 7, 2026 — Released `gemini-3.1-flash-lite`, the generally available (GA) version"; Google Cloud blog GA announcement published 2026-05-08. Changelog (Gemini API source of truth) wins. | changelog; cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | **FIX → 2026-05-07** | +| speedOptimized | (absent) | Model page: "optimized for low-latency, cost-effective" high-volume tasks; Google blog: "fastest and most cost-efficient Gemini 3 series model". Precedent: `gemini-2.5-flash-lite` carries `speedOptimized: true` and Google's models page calls 2.5-flash-lite "the fastest and most budget-friendly" of its generation — 3.1-flash-lite holds the same position in the Gemini 3 generation. | **FIX → add `speedOptimized: true`** | + +### gemini-3-flash-preview + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| id | `gemini-3-flash-preview` | docs/models, model page | OK | +| pricing.input | 0.5 | docs/pricing ($0.50 text); OpenRouter ($0.50) | OK | +| pricing.cachedInput | 0.05 | docs/pricing ($0.05) | OK | +| pricing.output | 3.0 | docs/pricing ($3.00); OpenRouter ($3.00) | OK | +| thinking.levels | minimal/low/medium/high | docs/thinking | OK | +| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK | +| maxOutputTokens | 65536 | model page | OK | +| contextWindow | 1048576 (PR #4990 change) | model page (1,048,576); OpenRouter (1M) | OK — #4990 change re-confirmed | +| releaseDate | 2025-12-17 | changelog: "Dec 17, 2025 — Launched Gemini 3 Flash Preview"; OpenRouter | OK | +| deprecated | (absent) | docs/deprecations lists `gemini-3-flash-preview` in the deprecation table with recommended replacement `gemini-3.5-flash`, **no shutdown date announced yet**. (The model's own page still renders as an active preview — the deprecations table is the authoritative lifecycle source.) | **FIX → add `deprecated: true`** | + +### gemini-2.5-pro + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| pricing.input | 1.25 | docs/pricing ($1.25 ≤200k); OpenRouter ($1.25) | OK (base tier) | +| pricing.cachedInput | 0.125 | docs/pricing ($0.125 ≤200k) | OK | +| pricing.output | 10.0 | docs/pricing ($10.00 ≤200k); OpenRouter ($10) | OK | +| maxOutputTokens | 65536 | longstanding model-page value | OK | +| contextWindow | 1048576 | OpenRouter (1M); longstanding model-page value | OK | +| releaseDate | 2025-03-25 | preview launch date (GA was 2025-06-17); repo convention uses first availability | OK | + +### gemini-2.5-flash + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| pricing.input | 0.3 | docs/pricing ($0.30 text) | OK | +| pricing.cachedInput | 0.03 | docs/pricing ($0.03) | OK | +| pricing.output | 2.5 | docs/pricing ($2.50) | OK | +| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK | +| releaseDate | 2025-05-20 | I/O 2025 preview launch | OK | + +### gemini-2.5-flash-lite + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| pricing.input | 0.1 | docs/pricing ($0.10 text) | OK | +| pricing.cachedInput | 0.01 | docs/pricing ($0.01) | OK | +| pricing.output | 0.4 | docs/pricing ($0.40) | OK | +| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK | +| releaseDate | 2025-06-17 | launch announcement | OK | +| speedOptimized | true | docs/models: "fastest and most budget-friendly multimodal model" | OK | + +### gemini-2.0-flash (deprecated) + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026; changelog: "now shut down"; docs/pricing marks "(deprecated; shutdown June 1, 2026)". Replacement: gemini-3.5-flash. | OK — #4990 change re-confirmed. Entry retained intentionally for saved-workflow history. | +| pricing | input 0.1 / cachedInput 0.025 / output 0.4 | docs/pricing (still published) | OK | +| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK | +| releaseDate | 2025-02-05 | GA announcement | OK | + +### gemini-2.0-flash-lite (deprecated) + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026. Replacement: gemini-3.1-flash-lite. | OK — re-confirmed; retained for history | +| pricing | input 0.075 / output 0.3 (no cachedInput — caching was never priced for this SKU) | docs/pricing | OK | +| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK | +| releaseDate | 2025-02-25 | GA announcement | OK | + +### deep-research-pro-preview-12-2025 + +| Field | Our value | Source | Verdict | +|---|---|---|---| +| id | `deep-research-pro-preview-12-2025` | model page https://ai.google.dev/gemini-api/docs/models/deep-research-pro-preview-12-2025 (Interactions API) | OK | +| pricing.input | 2.0 (PR #4990) | Vertex AI pricing, "Gemini Deep Research Agent": $2/1M input | OK — re-confirmed | +| pricing.cachedInput | 0.2 (PR #4990) | Vertex AI pricing: $0.2/1M cached input | OK — re-confirmed | +| pricing.output | 12.0 (PR #4990, was 2.0) | Vertex AI pricing: $12/1M output (response and reasoning). Consistent with underlying Gemini 3.1 Pro rates ($2/$0.2/$12). | OK — re-confirmed | +| capabilities | deepResearch: true, memory: false | model page (agentic researcher; Interactions API) | OK | +| maxOutputTokens | 65536 | model page (65,536) | OK | +| contextWindow | 1048576 (PR #4990) | model page (1,048,576) | OK — re-confirmed | +| releaseDate | 2025-12-11 | model page only says "December 2025"; exact day not published in fetched docs | Unverifiable to the day; month consistent — keep | +| Lifecycle | Not listed on docs/deprecations; no shutdown announced | docs/deprecations | OK to keep | + +**Recommendation (documented only, no entries added):** Google introduced `deep-research-preview-04-2026` and `deep-research-max-preview-04-2026` on 2026-04-21 (changelog; https://ai.google.dev/gemini-api/docs/interactions/deep-research). The Deep Research interactions doc now leads with these SKUs and prices them per-task (~$1–3 / ~$3–7). A follow-up should evaluate adding them once per-token pricing is published; `deep-research-pro-preview-12-2025` remains documented and un-deprecated, so no change now. + +## Changes made in this pass + +None to `models.ts` (per task rules — fix list reported separately). This document is the only artifact. + +## Re-confirmed PR #4990 changes + +1. `gemini-3.1-flash-lite-preview` → `gemini-3.1-flash-lite` rename — preview slug shut down 2026-05-25 (deprecations page); stable listed on docs/models. +2. `gemini-3.1-pro-preview` thinking.levels without `minimal` — docs/thinking lists low/medium/high only; "thinking cannot be disabled". +3. `gemini-3-flash-preview` contextWindow 1048576 — model page. +4. `deprecated: true` on gemini-2.0-flash and gemini-2.0-flash-lite — shut down 2026-06-01 (deprecations + changelog). +5. Deep Research output 12.0, cachedInput 0.2, contextWindow 1048576 — Vertex pricing row + model page. + +## Recommended fixes (not applied) + +1. `gemini-3.1-flash-lite`: `releaseDate` `2026-03-03` → `2026-05-07` — current value is the preview's release date; GA released May 7, 2026 per Gemini API changelog (Cloud blog announcement published May 8, 2026; changelog wins as the API source of truth). +2. `gemini-3.1-flash-lite`: add `speedOptimized: true` — Google positions it as the fastest, most cost-efficient Gemini 3 model (model page, GA blog); matches the precedent set by `gemini-2.5-flash-lite`. +3. `gemini-3-flash-preview`: add `deprecated: true` — formally listed on https://ai.google.dev/gemini-api/docs/deprecations with replacement `gemini-3.5-flash` (no shutdown date announced yet). + +## Deliberately not changed + +- **`gemini-3.1-flash-lite` thinking.default `minimal`** — matches Google's documented default for this model (docs/thinking: "Default: minimal") and is also our intended cost-saving default. No conflict. +- **Tiered pricing (`gemini-3.1-pro-preview`, `gemini-2.5-pro`)** — we model the ≤200k-token base tier ($2/$12 and $1.25/$10). The >200k tier ($4/$18 and $2.50/$15) is not representable in the flat pricing schema; base tier is the established repo convention. +- **Audio input pricing** (flash models have higher audio-input rates, e.g. 3.1-flash-lite $0.50 audio) — schema models text-input pricing only; convention. +- **gemini-2.0-flash / -flash-lite entries kept despite shutdown** — `deprecated: true` retained instead of deletion so saved workflows referencing them keep rendering history correctly. +- **Deep Research newer SKUs not added** — per-task preview pricing only; documented as a follow-up recommendation above. +- **`gemini-2.5-pro` releaseDate 2025-03-25** — preview-launch date; repo convention is first availability, not GA (2025-06-17). +- **`updatedAt: 2026-06-11`** on all entries — accurate as of this validation. + +## Unverifiable + +- **deep-research-pro-preview-12-2025 exact release day (2025-12-11)** — Google docs only state "December 2025"; the day-level value could not be confirmed or refuted. Month consistent; left as-is. +- **2.5-series maxOutputTokens (65,536) and 2.0-series limits (8,192 / 1,048,576)** — not re-fetched per-model in this pass; values match longstanding Google model-page specs and were unchanged by PR #4990. OpenRouter corroborates 1M context for 2.5-pro. +- **Gemini API pricing page for Deep Research** — the ai.google.dev pricing page does not list the 12-2025 SKU (it now points at the 04-2026 per-task estimates); per-token verification rests on the Vertex AI "Gemini Deep Research Agent" row alone (single — but official Google — source). diff --git a/docs/models/groq.md b/docs/models/groq.md new file mode 100644 index 0000000000..b4bc6905d4 --- /dev/null +++ b/docs/models/groq.md @@ -0,0 +1,157 @@ +# Groq Provider Validation — Final Pass + +**Date:** 2026-06-11 +**Scope:** `groq` provider block in `apps/sim/providers/models.ts` (8 models). Re-verifies everything, including the changes landed in PR #4990 (kimi `deprecated: true`, gpt-oss `cachedInput`, `updatedAt` bumps). + +## Sources & Method + +| Source | What it verified | +|---|---| +| `https://api.groq.com/openai/v1/models` (live, authenticated with local dev key) | Active model list, `context_window`, `max_completion_tokens`, `created` timestamps. Groq's own per-model doc pages render their spec tables client-side from this same data ("Loading model information..." in static HTML), so the API is the authoritative equivalent of the per-model pages. | +| `https://groq.com/pricing` (live fetch) | All input/cached-input/output rates | +| `https://console.groq.com/docs/prompt-caching` (live fetch) | Caching-supported model list, 50% cached-token discount | +| `https://console.groq.com/docs/deprecations` (live fetch) | kimi shutdown, qwen3-32b status | +| `https://console.groq.com/docs/models` + per-model `.md` pages (live fetch) | Featured/flagship positioning, context-window prose, model-card positioning | +| Groq OpenAPI spec embedded in `console.groq.com/docs/model/*` HTML | `temperature` parameter bounds (`minimum: 0, maximum: 2`) | +| OpenRouter `GET /api/v1/models//endpoints` Groq rows (secondary) | Pricing cross-check, `max_completion_tokens` cross-check | +| WebSearch (Meta blog coverage, Moonshot K2-0905 announcement coverage) | Upstream release dates | + +Rule applied: where Groq's own sources conflict with secondary sources, Groq wins. + +## Per-Model Verification + +### groq/openai/gpt-oss-120b + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.15 | $0.15/M | groq.com/pricing; OpenRouter Groq row 0.00000015 | OK | +| pricing.cachedInput | 0.075 | $0.075/M | groq.com/pricing (explicit cached column); prompt-caching doc 50% rule; OpenRouter 0.000000075 | OK (PR #4990 change confirmed) | +| pricing.output | 0.6 | $0.60/M | groq.com/pricing; OpenRouter | OK | +| contextWindow | 131072 | 131072 | api.groq.com/openai/v1/models; model card "131K context window" | OK | +| capabilities.maxOutputTokens | — (absent) | 65536 | api.groq.com/openai/v1/models `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | +| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754408224 → 2025-08-05 UTC | OK | +| recommended | — (absent) | should be `true` | console.groq.com/docs/models features it as "OpenAI's flagship open-weight language model" (~500 t/s); deprecations page names `openai/gpt-oss-120b` as the recommended replacement (incl. for kimi-k2-instruct-0905) | **FIX: add `recommended: true`** | +| deprecated | — | active | live API `active: true`; not on deprecations page | OK | + +### groq/openai/gpt-oss-20b + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.075 | $0.075/M | groq.com/pricing; OpenRouter | OK | +| pricing.cachedInput | 0.0375 | $0.0375/M | groq.com/pricing (explicit); OpenRouter 0.0000000375 | OK (PR #4990 confirmed) | +| pricing.output | 0.3 | $0.30/M | groq.com/pricing; OpenRouter | OK | +| contextWindow | 131072 | 131072 | Groq API; model card "up to 131K" | OK | +| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | +| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754407957 → 2025-08-05 UTC | OK | +| deprecated | — | active | live API; deprecations page | OK | + +### groq/openai/gpt-oss-safeguard-20b + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.075 | $0.075/M | groq.com/pricing | OK | +| pricing.cachedInput | 0.0375 | $0.0375/M | prompt-caching doc lists this model as caching-supported with "50% discount for cached input tokens" → 0.075 × 0.5 = 0.0375. Pricing page shows no cached column for this row; OpenRouter shows $0.037/M (rounding). Groq's caching doc wins. | OK (PR #4990 confirmed) | +| pricing.output | 0.3 | $0.30/M | groq.com/pricing | OK | +| contextWindow | 131072 | 131072 | Groq API | OK | +| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | +| releaseDate | 2025-10-29 | 2025-10-29 | Groq API `created` = 1761708789 → 2025-10-29 UTC | OK | +| deprecated | — | active | live API; deprecations page | OK | + +### groq/qwen/qwen3-32b + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.29 | $0.29/M | groq.com/pricing; OpenRouter | OK | +| pricing.cachedInput | — | none on Groq | Not in prompt-caching supported list (gpt-oss ×3 only); no cached column on pricing page. OpenRouter shows a 50% `input_cache_read` ($0.145) — Groq docs win; do not add. | OK (absent) | +| pricing.output | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK | +| contextWindow | 131072 | 131072 | Groq API | OK | +| capabilities.maxOutputTokens | — | 40960 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 40960** | +| releaseDate | 2025-04-29 | 2025-04-29 | Upstream Qwen3 family launch (field is "first publicly released"). Groq endpoint `created` is 2025-05-28 (when Groq added it) — repo convention uses upstream release. | OK | +| deprecated | — | **not deprecated** | `active: true` in live API; absent from deprecations page (appears there only as a *replacement* for mistral-saba-24b / qwen-qwq-32b) | OK — confirmed still active (open question f) | + +### groq/llama-3.1-8b-instant + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.05 | $0.05/M | groq.com/pricing; OpenRouter | OK | +| pricing.output | 0.08 | $0.08/M | groq.com/pricing; OpenRouter | OK | +| pricing.cachedInput | — | none on Groq | Not in caching-supported list; no cached column on pricing page (OpenRouter's $0.025 row not honored — Groq wins) | OK (absent) | +| contextWindow | 131072 | 131072 | Groq API | OK | +| capabilities.maxOutputTokens | — | 131072 | Groq API `max_completion_tokens` = 131072 (full window); OpenRouter agrees | **FIX: add 131072** | +| releaseDate | 2024-07-23 | 2024-07-23 | Meta released Llama 3.1 (8B/70B/405B) on 2024-07-23 (ai.meta.com/blog/meta-llama-3-1, press coverage dated 2024-07-23). Groq API `created` (2023-09-03) is a placeholder shared with whisper entries and predates Llama 3.1 — not meaningful. | OK — verified (open question g) | +| speedOptimized | — (absent) | should be `true` | Groq's speed-tier "-instant" naming; model card positions it for "Real-Time Applications … requiring instant responses and high throughput"; cheapest text model in the lineup. Matches repo precedent (claude-3-haiku, gemini-2.0-flash). | **FIX: add `speedOptimized: true`** | +| deprecated | — | active | live API; deprecations page (it is a replacement target, not deprecated) | OK | + +### groq/llama-3.3-70b-versatile + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK | +| pricing.output | 0.79 | $0.79/M | groq.com/pricing; OpenRouter | OK | +| contextWindow | 131072 | 131072 | Groq API | OK | +| capabilities.maxOutputTokens | — | 32768 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 32768** | +| releaseDate | 2024-12-06 | 2024-12-06 | Groq API `created` = 1733447754 → 2024-12-06 UTC, matching Meta's Llama 3.3 launch day | OK — verified (open question g) | +| deprecated | — | active | live API; deprecations page (replacement target for several retired models) | OK | + +### groq/meta-llama/llama-4-scout-17b-16e-instruct + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing.input | 0.11 | $0.11/M | groq.com/pricing; OpenRouter | OK | +| pricing.output | 0.34 | $0.34/M | groq.com/pricing; OpenRouter | OK | +| contextWindow | 131072 | 131072 | Groq API | OK | +| capabilities.maxOutputTokens | — | 8192 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 8192** | +| releaseDate | 2025-04-05 | 2025-04-05 | Groq API `created` = 1743874824 → 2025-04-05 UTC (Meta Llama 4 launch day) | OK | +| deprecated | — | active | live API; deprecations page | OK | + +### groq/moonshotai/kimi-k2-instruct-0905 + +| Field | Repo value | Verified value | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | shut down | console.groq.com/docs/deprecations: shutdown **04/15/26**, replacement `openai/gpt-oss-120b`; model entirely absent from the live `/v1/models` response | OK (PR #4990 change confirmed — open question regarding shutdown resolved) | +| pricing.input | 1.0 | $1.00/M | groq.com/pricing (row still present); OpenRouter | OK | +| pricing.output | 3.0 | $3.00/M | groq.com/pricing; OpenRouter | OK | +| pricing.cachedInput | — | conflicting | groq.com/pricing still shows $0.50 cached, but the prompt-caching doc's supported list contains only the 3 gpt-oss models, and the model is removed from the API. Conflicting Groq sources + shut-down model → not added (see "Deliberately not changed"). | OK (absent) | +| contextWindow | 262144 | 262144 | Moonshot K2-0905 announcement ("context length expanded from 128K to 256K"); Groq model card description "256K context"; OpenRouter Groq row 262144. Live Groq API no longer lists the model. | OK | +| capabilities.maxOutputTokens | — | 16384 (OpenRouter only) | Only source is OpenRouter; model is gone from Groq's API and its doc-page spec table cannot be rendered. Cannot confirm from Groq's own docs → **skipped** per validation rules. | Not added (unverifiable from Groq) | +| releaseDate | 2025-09-05 | 2025-09-05 | Moonshot AI announced K2-Instruct-0905 on September 5, 2025 (aibase coverage; simonwillison.net 2025-09-06; the `0905` suffix) | OK — verified (open question g) | +| pricing.updatedAt | 2026-04-01 | — | Prices re-checked today and unchanged; model is shut down, so no bump needed | OK | + +## Provider-Level Capability: temperature + +**Recommendation: add `temperature: { min: 0, max: 2 }` to the groq provider `capabilities`.** + +- Groq's OpenAPI spec (embedded in console.groq.com docs pages, chat-completions `temperature`): "What sampling temperature to use, between 0 and 2", `"minimum": 0, "maximum": 2`. +- `apps/sim/providers/groq/index.ts:82` already forwards it: `if (request.temperature !== undefined) payload.temperature = request.temperature` — so the only thing missing is the capability flag; today Sim hides the temperature slider for every Groq model while the provider would happily accept the value. +- Precedent: `fireworks` (models.ts:97), `together` (models.ts:113), and `baseten` (models.ts:129) all declare `temperature: { min: 0, max: 2 }` at the provider level for the same OpenAI-compatible 0–2 range. + +**Test impact** (`apps/sim/providers/utils.test.ts`): +- ~line 214: `'groq/meta-llama/llama-4-scout-17b-16e-instruct'` must be removed from the `unsupportedModels` list in the `supportsTemperature` → false test (it will now return `true`; move it to the supported list). +- ~line 288: `expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()` must change to expect `2` (move into the "range 0-2" group). + +## Changes made in this pass + +None to `models.ts` (per instructions — doc only). The fix list below is the recommended diff. + +1. `groq` provider capabilities: add `temperature: { min: 0, max: 2 }` (+ update the two utils.test.ts assertions above). +2. `groq/openai/gpt-oss-120b`: `capabilities: {}` → `capabilities: { maxOutputTokens: 65536 }`; add `recommended: true`. +3. `groq/openai/gpt-oss-20b`: add `maxOutputTokens: 65536`. +4. `groq/openai/gpt-oss-safeguard-20b`: add `maxOutputTokens: 65536`. +5. `groq/qwen/qwen3-32b`: add `maxOutputTokens: 40960`. +6. `groq/llama-3.1-8b-instant`: add `maxOutputTokens: 131072`; add `speedOptimized: true`. +7. `groq/llama-3.3-70b-versatile`: add `maxOutputTokens: 32768`. +8. `groq/meta-llama/llama-4-scout-17b-16e-instruct`: add `maxOutputTokens: 8192`. + +## Deliberately not changed + +- **kimi-k2-instruct-0905 `cachedInput`**: groq.com/pricing still shows $0.50 cached, but the canonical prompt-caching doc's supported-model list is exactly the three gpt-oss models, and the model is shut down (absent from the live API since the 2026-04-15 shutdown). Conflicting Groq sources for a decommissioned model — adding a cached rate would be dead config. Reconciliation: the pricing-page row is residual for a removed model; the caching doc never listed kimi. +- **kimi-k2-instruct-0905 `maxOutputTokens`**: 16384 is OpenRouter-only; cannot be confirmed from Groq's own docs/API (model removed). Skipped per validation rules. +- **`cachedInput` on qwen3-32b / llama-3.1-8b-instant**: OpenRouter's Groq endpoints advertise 50% `input_cache_read` rates, but Groq's prompt-caching doc explicitly limits caching support to the three gpt-oss models and the pricing page shows no cached column for them. Groq docs win. Re-check if Groq's promised caching rollout ("more models soon") lands. +- **All pricing, contextWindow, releaseDate values**: verified correct as-is (including all PR #4990 changes — kimi `deprecated: true`, the three gpt-oss `cachedInput` rates, and `updatedAt: '2026-06-11'` bumps). +- **kimi `pricing.updatedAt: '2026-04-01'`**: prices unchanged and model shut down; no bump needed. +- **`defaultModel: 'groq/llama-3.3-70b-versatile'`**: still active and reasonable; changing the default is a product decision, not a validation finding. + +## Unverifiable + +- **kimi-k2-instruct-0905 `maxOutputTokens` (16384)** — Groq removed the model from its API and the doc page's spec table no longer renders; only OpenRouter attests it. +- Nothing else: every other field was confirmed against at least one Groq-owned source (live `/v1/models` API, groq.com/pricing, prompt-caching doc, deprecations doc, or embedded OpenAPI spec), with OpenRouter as a corroborating secondary on pricing and token limits. diff --git a/docs/models/mistral.md b/docs/models/mistral.md new file mode 100644 index 0000000000..26b236e099 --- /dev/null +++ b/docs/models/mistral.md @@ -0,0 +1,305 @@ +# Mistral Provider Validation — Final Pass + +- **Date:** 2026-06-11 +- **Scope:** All 27 entries of the `mistral` provider block in `apps/sim/providers/models.ts` (lines ~2124–2501), re-verifying everything including the changes landed in PR #4990 (7 deprecations, 8 releaseDate fixes, updatedAt bumps). +- **Method:** Live fetches of Mistral docs (model overview, model cards, pricing page, prompt-caching guide), direct download + grep of the canonical OpenAPI spec, and — decisively — the **server-side model-card source data** in `mistralai/platform-docs-public` (`src/schema/models/models/*.ts`, shallow-cloned at `main` on 2026-06-11). These TypeScript data files are what docs.mistral.ai renders into the model cards, and they carry `apiNames` (alias mappings), prices, context lengths, release dates, and `deprecationDate`/`retirementDate` metadata that the rendered pages omit. OpenRouter used as the secondary pricing source. + +## Sources + +| Source | URL | +|---|---| +| Models overview | https://docs.mistral.ai/getting-started/models/models_overview | +| Pricing page | https://mistral.ai/pricing | +| Model cards | https://docs.mistral.ai/models/model-cards/<slug> (slugs cited per model below) | +| Model-card source data (authoritative) | https://github.com/mistralai/platform-docs-public — `src/schema/models/models/*.ts` @ `main`, 2026-06-11 | +| OpenAPI spec | https://raw.githubusercontent.com/mistralai/platform-docs-public/main/openapi.yaml | +| Prompt caching guide | https://docs.mistral.ai/studio-api/conversations/advanced/prompt-caching | +| OpenRouter (secondary pricing) | https://openrouter.ai/mistralai/<slug> | + +Below, "data file" = the model's source file in `src/schema/models/models/`. + +--- + +## Per-model verification + +### mistral-large-latest / mistral-large-2512 (Mistral Large 3, 25.12) + +Data file: `mistral-large-3-25-12.ts`. Model card: `/models/model-cards/mistral-large-3-25-12`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 per 1M | Data file, model card, pricing page ("Mistral Large 3: $0.5 / $1.5"), OpenRouter `mistral-large-2512` ($0.50/$1.50) | ✓ | +| contextWindow | 256000 | 256k | Data file `contextLength: '256k'`; OpenRouter shows 262K (same window, binary units) | ✓ | +| releaseDate | 2025-12-02 | 2025-12-02 | Data file `releaseDate: '2025-12-02'` | ✓ | +| alias | latest → 2512 | `apiNames: ['mistral-large-2512', 'mistral-large-latest']` | Data file | ✓ | +| status | active | `status: 'Active'` | Data file | ✓ | +| temperature | {0, 1} | spec allows {0, **1.5**} | OpenAPI `ChatCompletionRequest.temperature` | ✗ see Changes | +| recommended | (absent) | provider default, flagship | — | ✗ see Changes | + +Note: an initial pricing-page fetch summarized Large 3 as $2/$6; a verbatim re-fetch showed that was a summarization error — the literal row is "$0.5 / $1.5 /M tokens". $2/$6 is the legacy mistral-large-2411 price. + +### mistral-small-2603 / mistral-small-latest (Mistral Small 4, 26.03) — CONFLICT RULING + +Data file: `mistral-small-4-0-26-03.ts`. Model card: `/models/model-cards/mistral-small-4-0-26-03`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.15 / 0.6 | **$0.15 / $0.6** (ruling below) | Data file (`price: 0.15` / `price: 0.6`), model card, OpenRouter `mistral-small-2603` ($0.15/$0.60) | ✓ KEEP | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2026-03-16 | 2026-03-16 | Data file | ✓ | +| alias | latest → 2603 | `apiNames: ['mistral-small-2603', 'mistral-small-latest']` | Data file | ✓ | +| status | active | `status: 'Active'` | Data file | ✓ | + +**Ruling on the open price conflict (question a):** mistral.ai/pricing again printed "$0.1 / $0.3" for Mistral Small 4 (verbatim re-fetch, third consistent reading). But three independent confirmations say $0.15/$0.6: (1) the model card, (2) the model-card **source data file** that drives docs billing-side documentation, and (3) OpenRouter's Mistral endpoint, which mirrors what Mistral actually charges resellers. $0.1/$0.3 is exactly the price of the predecessor Mistral Small 3.2 (`mistral-small-2506`, verified below), so the pricing-page row is almost certainly a stale carry-over from Small 3.x, not a price cut. **Final value: 0.15 / 0.6 — no change.** Re-check if the pricing page row persists alongside an official price-cut announcement. + +### devstral-2512 / devstral-latest (Devstral 2, 25.12) + +Data file: `devstral-2-25-12.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.4 / 2.0 | $0.4 / $2 | Data file, pricing page ("Devstral 2: $0.4 / $2"), OpenRouter `devstral-2512` ($0.40/$2.00) | ✓ | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2025-12-09 | 2025-12-09 | Data file | ✓ | +| alias | devstral-latest → 2512 | `apiNames: ['devstral-2512', 'devstral-latest', 'devstral-medium-latest']` | Data file | ✓ (note: `devstral-medium-latest` is a third alias we don't list — fine) | +| status | active | `status: 'Active'` | Data file | ✓ | + +### mistral-large-2411 (deprecated) + +Data file: `mistral-large-2-1-24-11.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 2.0 / 6.0 | $2.0 / $6.0 | Data file (previously unverifiable — now confirmed) | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2024-11-18 | 2024-11-18 | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | + +### magistral-medium-latest / magistral-medium-2509 + +Data file: `magistral-medium-1-2-25-09.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 2.0 / 5.0 | $2.0 / $5.0 | Data file, pricing page ("Magistral Medium: $2 / $5") | ✓ (OpenRouter: not listed — single-family source) | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ | +| alias | latest → 2509 | `apiNames: ['magistral-medium-2509', 'magistral-medium-latest']` | Data file | ✓ | +| status | active | `status: 'Active'` | Data file | ✓ | + +Note: Magistral is a reasoning model (`output: ['reasoning', 'text']`); see "Deliberately not changed" re `reasoning_effort`. + +### magistral-small-latest / magistral-small-2509 (deprecated) + +Data file: `magistral-small-1-2-25-09.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 | Data file, pricing page | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ | +| alias | small-latest → 2509 | `apiNames: ['magistral-small-2509', 'magistral-small-latest']` | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31, replacement "Mistral Small 4" | Data file metadata | ✓ | + +### mistral-medium-latest / mistral-medium-2508 (Mistral Medium 3.1) + +Data file: `mistral-medium-3-1-25-08.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-08-12 | 2025-08-12 | Data file | ✓ | +| alias | latest → 2508 | `apiNames: ['mistral-medium-2508', 'mistral-medium-latest']` | Data file | ✓ — **`mistral-medium-latest` still maps to 2508, NOT to Medium 3.5** (3.5 has its own apiNames, see below) | +| status | active | `status: 'Active'` | Data file | ✓ | + +### mistral-medium-2505 (Mistral Medium 3) + +Data file: `mistral-medium-3-25-05.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-05-07 | 2025-05-07 | Data file | ✓ | +| status | active (no flag) | `status: 'Active'` — not deprecated despite age | Data file | ✓ | + +### mistral-small-2506 (Mistral Small 3.2, deprecated) + +Data file: `mistral-small-3-2-25-06.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-06-20 | 2025-06-20 | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31 | Data file metadata | ✓ | + +### open-mistral-nemo + +Data file: `mistral-nemo-12b-24-07.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page ("Mistral NeMo: $0.15 / $0.15") | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2024-07-18 | 2024-07-18 | Data file | ✓ | +| status | active (no flag) | `status: 'Active'` — still active | Data file | ✓ | + +### codestral-latest / codestral-2508 + +Data file: `codestral-25-08.ts`. Model card: `/models/model-cards/codestral-25-08`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.3 / 0.9 | $0.3 / $0.9 | Data file, model card, pricing page, OpenRouter `codestral-2508` ($0.30/$0.90) | ✓ | +| contextWindow | 128000 | 128k per Mistral docs (data file + live model card). OpenRouter claims 256K — **Mistral docs win**, keep 128000 | Data file, model card | ✓ | +| releaseDate | 2025-07-30 | 2025-07-30 | Data file | ✓ | +| alias | latest → 2508 | `apiNames: ['codestral-2508', 'codestral-latest']` | Data file | ✓ | +| status | active | `status: 'Active'` | Data file | ✓ | + +### devstral-small-latest (Devstral Small 2, 25.12, deprecated) + +Data file: `devstral-small-2-25-12.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file | ✓ | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2025-12-09 | 2025-12-09 | Data file (PR #4990 fix confirmed) | ✓ | +| alias | — | `apiNames: ['labs-devstral-small-2512', 'devstral-small-latest']` | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-03-31 (already retired), replacement "Devstral 2" | Data file metadata | ✓ | + +### devstral-small-2507 (deprecated) + +Data file: `devstral-small-1-1-25-07.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | + +### devstral-medium-2507 (deprecated) + +Data file: `devstral-medium-1-0-25-07.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file (previously unverifiable — now confirmed) | ✓ | +| contextWindow | 128000 | 128k | Data file | ✓ | +| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ | +| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | + +### ministral-14b-latest / ministral-14b-2512 (Ministral 3 14B) + +Data file: `ministral-3-14b-25-12.ts`. Model card: `/models/model-cards/ministral-3-14b-25-12`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.2 / 0.2 | $0.2 / $0.2 | Data file, pricing page, OpenRouter `ministral-14b-2512` ($0.20/$0.20) | ✓ | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2025-12-02 | 2025-12-02 | Data file | ✓ | +| alias | latest → 2512 | `apiNames: ['ministral-14b-2512', 'ministral-14b-latest']` | Data file | ✓ | +| status | active | `status: 'Active'` | Data file | ✓ | +| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | + +### ministral-8b-latest / ministral-8b-2512 (Ministral 3 8B) + +Data file: `ministral-3-8b-25-12.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page | ✓ | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ | +| alias | latest → 2512 | `apiNames: ['ministral-8b-2512', 'ministral-8b-latest']` | Data file | ✓ | +| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | + +### ministral-3b-latest / ministral-3b-2512 (Ministral 3 3B) + +Data file: `ministral-3-3b-25-12.ts`. + +| Field | Ours | Verified value | Source | Verdict | +|---|---|---|---|---| +| pricing input/output | 0.1 / 0.1 | $0.1 / $0.1 | Data file, pricing page | ✓ | +| contextWindow | 256000 | 256k | Data file | ✓ | +| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ | +| alias | latest → 2512 | `apiNames: ['ministral-3b-2512', 'ministral-3b-latest']` | Data file | ✓ | +| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | + +--- + +## Provider-wide checks + +### Temperature bounds (question e) — DISCREPANCY FOUND + +The live OpenAPI spec's `ChatCompletionRequest.temperature` (openapi.yaml, schema at line 11988, property at 11997) is: + +```yaml +temperature: + anyOf: + - type: number + maximum: 1.5 + minimum: 0 +``` + +with the description "we recommend between 0.0 and 0.7". So the chat-completions endpoint — the one Sim's provider calls (`https://api.mistral.ai/v1` + `chat.completions.create`) — accepts **0–1.5, not 0–1**. The 0–1 bound exists in the spec only on `CompletionArgs` (line ~8103), which is the **conversations/agents API**'s white-listed argument schema, not chat completions; that is likely where the earlier "max 1" belief came from. Verdict: our `{min: 0, max: 1}` is overly restrictive — users cannot select 1.0–1.5, which the API supports. Recommended fix: `max: 1.5` on all 27 entries. + +### Prompt caching (question b) — NOT WIRED, cachedInput NOT added + +- OpenAPI spec: `prompt_cache_key` exists on `ChatCompletionRequest` (line 12134), `FIMCompletionRequest` (12362), and `AgentsCompletionRequest` (13841): "A cache key to enable prompt caching. When provided, the API will attempt to reuse previously computed tokens... Cached tokens are billed at 10% of the standard input token price." +- Prompt-caching guide confirms caching is **opt-in**: "Set the same `prompt_cache_key` on requests that are likely to share a prefix"; 64-token block granularity; hits reported via `usage.prompt_tokens_details.cached_tokens`. +- Sim's provider (`apps/sim/providers/mistral/index.ts`) forwards only `temperature` and `max_tokens` (plus messages/tools/response_format). It does **not** send `prompt_cache_key`, so no Sim request can ever produce cached tokens. + +**Ruling: caching is opt-in, Sim does not opt in → adding `cachedInput` would be dead data. Not changed.** Recommended follow-up: wire `prompt_cache_key` in the Mistral provider (e.g. keyed per workflow execution/conversation), read `usage.prompt_tokens_details.cached_tokens`, then add `cachedInput = 0.1 × input` to all active entries (large 0.05, small 0.015, devstral 0.04, magistral-medium 0.2, medium 0.04, nemo 0.015, codestral 0.03, ministral-14b 0.02, ministral-8b 0.015, ministral-3b 0.01). + +### recommended / speedOptimized (question c) — BOTH JUSTIFIED + +- `recommended: true` on **mistral-large-latest**: it is the provider's `defaultModel`, Mistral's flagship generalist (Large 3), actively maintained, and the provider currently has zero recommended entries (every other major provider block marks its flagship). Justified. +- `speedOptimized: true` on the **ministral tier** (14b/8b/3b, `-latest` and `-2512`, 6 entries): Ministral 3 is Mistral's edge/low-latency family ("les Ministraux" — edge models), the smallest and cheapest tier, directly analogous to the existing `speedOptimized` entries in models.ts (gpt-5-mini-class at line ~369, Haiku at line ~853). Justified. + +### Alias map (question g) — ALL CONFIRMED + +| Alias | Expected | Data-file `apiNames` | Verdict | +|---|---|---|---| +| mistral-large-latest | mistral-large-2512 | ✓ | ✓ | +| mistral-small-latest | mistral-small-2603 | ✓ | ✓ | +| codestral-latest | codestral-2508 | ✓ | ✓ | +| devstral-latest | devstral-2512 | ✓ (also `devstral-medium-latest`) | ✓ | +| devstral-small-latest | labs-devstral-small-2512 (Devstral Small 2) | ✓ | ✓ | +| magistral-medium-latest | magistral-medium-2509 | ✓ | ✓ | +| magistral-small-latest | magistral-small-2509 | ✓ | ✓ | +| mistral-medium-latest | mistral-medium-2508 (NOT Medium 3.5) | ✓ | ✓ | +| ministral-14b/8b/3b-latest | ministral-*-2512 | ✓ | ✓ | + +--- + +## Changes made in this pass + +None to `models.ts` (per instructions, this pass writes only this document). The PR #4990 changes (7 deprecations, 8 releaseDate fixes) are all **confirmed correct** against the model-card source data. + +**Recommended fixes (the fix list):** + +1. `mistral-large-latest`: add `recommended: true` — provider default + flagship; provider has zero recommended entries. +2. `ministral-14b-latest`, `ministral-14b-2512`, `ministral-8b-latest`, `ministral-8b-2512`, `ministral-3b-latest`, `ministral-3b-2512`: add `speedOptimized: true` — edge/low-latency tier, consistent with gpt-mini/haiku precedent. +3. All 27 entries: `capabilities.temperature.max` 1 → **1.5** — OpenAPI `ChatCompletionRequest.temperature.maximum: 1.5`. (The 0–1 bound belongs to the conversations-API `CompletionArgs`, not chat completions. If the team prefers to cap the UI at Mistral's recommended sampling range instead of the API bound, keep 1 — but then document that choice; it does not match the endpoint Sim calls.) + +## Deliberately not changed + +- **mistral-small-2603 / mistral-small-latest pricing stays 0.15/0.6** — final ruling on the standing conflict: model card + model-card source data + OpenRouter all say $0.15/$0.6; only the marketing pricing page says $0.1/$0.3, which exactly equals the predecessor Small 3.2 price and is judged a stale row, not a price cut. +- **No `cachedInput` on any entry** — Mistral caching is opt-in via `prompt_cache_key` and Sim's provider does not send it; adding prices would be dead data. Requires provider wiring first (recommended follow-up above). +- **`mistral-medium-2505` left active** — `status: 'Active'` in source data, no deprecation metadata despite Medium 3.1/3.5 existing. +- **`open-mistral-nemo` left active** — still `status: 'Active'`. +- **codestral contextWindow stays 128000** — OpenRouter claims 256K but both the live model card and the source data say 128k; Mistral docs win. +- **`updatedAt: '2026-04-01'` left on deprecated entries** — their prices were verified unchanged; only active entries were bumped in PR #4990 and that remains coherent. +- **Reasoning params not wired** — spec exposes `reasoning_effort` (`high`/`none`) on `ChatCompletionRequest` (line 12119; `prompt_mode` is deprecated in its favor). Sim doesn't forward it, so no capabilities change; note for a future Magistral reasoning integration. +- **mistral-medium-3-5 NOT added in this pass** (documented as a recommended addition, question d): Mistral Medium 3.5 — `apiNames: ['mistral-medium-3-5', 'mistral-medium-3']`, released **2026-04-28**, **$1.5 / $7.5** per 1M (data file `mistral-medium-3-5-26-04.ts` + pricing page agree), **256k** context, Active, "frontier-class multimodal model optimized for agentic and coding". Matches existing `/^mistral/` modelPattern, so adding the entry is sufficient. Note its id does not follow the `-MMYY` convention — both apiNames could be listed if desired. + +## Unverifiable + +Nothing remains strictly unverifiable. The four previously-unverifiable legacy prices (mistral-large-2411 2.0/6.0, mistral-small-2506 0.1/0.3, devstral-small-2507 0.1/0.3, devstral-medium-2507 0.4/2.0) are now **confirmed** via the model-card source data files. Caveats: + +- `magistral-medium-2509` pricing has no independent second source (not listed on OpenRouter); verified only within the Mistral doc family (data file + pricing page, which agree). +- The Mistral Small 4 pricing-page row ($0.1/$0.3) remains in live contradiction with the model card; ruling above. Re-check on the next pass. diff --git a/docs/models/openai.md b/docs/models/openai.md new file mode 100644 index 0000000000..a1d81edb5b --- /dev/null +++ b/docs/models/openai.md @@ -0,0 +1,338 @@ +# OpenAI Provider Block — Final Validation & Justification + +**Validation date:** 2026-06-11 +**Scope:** `openai` provider block in `apps/sim/providers/models.ts` (23 models), including changes landed in PR #4990. +**Method:** Live WebFetch of every individual model page on `developers.openai.com/api/docs/models/`, the pricing page, the reasoning guide, the GPT-5.5 usage guide, the deprecations page, and the API reference; secondary pricing cross-checks against OpenRouter. All claims below were fetched live this session. Provider docs win over secondary sources. + +**Sources:** + +- Pricing: https://developers.openai.com/api/docs/pricing (only lists current gpt-5.5/5.4 families; per-model pricing taken from individual model pages) +- Model pages: `https://developers.openai.com/api/docs/models/` (fetched for all 23 ids) +- Reasoning guide: https://developers.openai.com/api/docs/guides/reasoning +- GPT-5.5 usage guide: https://developers.openai.com/api/docs/guides/latest-model +- Deprecations: https://developers.openai.com/api/docs/deprecations +- GPT-5.5 launch: https://openai.com/index/introducing-gpt-5-5/ (via search; release 2026-04-23, API availability 2026-04-24) +- Secondary pricing: https://openrouter.ai/openai/gpt-5.5, /gpt-5.5-pro, /gpt-5.4, /gpt-5.2, /o3, /gpt-4o — all consistent with provider docs + +**Flag consumption check** (`rg` over `apps/sim/providers/openai/`): `reasoningEffort` and `verbosity` are consumed in `apps/sim/providers/openai/core.ts` (sent as `reasoning.effort` / `text.verbosity` on the Responses API). `nativeStructuredOutputs` is NOT consumed by the provider runtime — its only consumer is the landing models page (`apps/sim/app/(landing)/models/utils.ts`), so it is display-only metadata. `thinking` / `computerUse` are not used by the OpenAI provider. + +Pricing is USD per 1M tokens throughout. "MP" = the model's own docs page (`developers.openai.com/api/docs/models/`). + +--- + +## Per-model verification + +### gpt-4.1 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 2.0 / 0.5 / 8.0 | MP gpt-4.1 | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ verified today | +| contextWindow | 1,047,576 | MP: "1,047,576 tokens" | ✓ verified | +| maxOutputTokens | 32,768 | MP | ✓ verified | +| temperature 0–2 | present | non-reasoning chat model; standard OpenAI sampling range | ✓ correct by convention (docs do not enumerate the range; 0–2 is the API-wide bound) | +| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-2025-04-14` | ✓ verified | +| deprecated | absent | deprecations page does not list gpt-4.1 base | ✓ verified active ("Default", "Smartest non-reasoning model") | + +### gpt-4.1-mini + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.4 / 0.1 / 1.6 | MP gpt-4.1-mini | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified | +| temperature 0–2 | present | convention (non-reasoning) | ✓ | +| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-mini-2025-04-14` | ✓ verified | +| deprecated | absent | not on deprecations page | ✓ verified | + +### gpt-4.1-nano + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.1 / 0.025 / 0.4 | MP gpt-4.1-nano | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified | +| temperature 0–2 | present | convention | ✓ | +| releaseDate | 2025-04-14 | MP (snapshot `gpt-4.1-nano-2025-04-14`, now marked deprecated) | ✓ verified | +| deprecated | **absent — should be `true`** | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-nano; MP also recommends "starting with GPT-5 nano" | **FIX: add `deprecated: true`** | + +### gpt-5.5-pro + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / output | 30.0 / 180.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | +| cachedInput | absent | MP: "GPT-5.5 Pro does not offer a cached input discount" | ✓ verified correct omission | +| updatedAt | **2026-04-23 — stale** | pricing re-verified 2026-06-11 this session | **FIX: bump to 2026-06-11** (PR #4990 claimed to bump all entries but missed this one) | +| contextWindow | 1,050,000 | MP: "1,050,000 context window" | ✓ verified | +| maxOutputTokens | 128,000 | MP | ✓ verified | +| nativeStructuredOutputs | true | MP: "Structured outputs: Supported" | ✓ verified (display-only flag) | +| reasoningEffort | **['none','low','medium','high','xhigh'] — wrong** | see Open Question (a) below | **FIX: change to `['medium','high','xhigh']`** | +| verbosity | **present — should be removed** | see Open Question (b) below | **FIX: remove `verbosity` block** | +| releaseDate | 2026-04-23 | MP snapshot `gpt-5.5-pro-2026-04-23` | ✓ verified | +| deprecated | absent | no deprecation notes on MP | ✓ verified | + +**Open Question (a) — resolved.** The gpt-5.5-pro model page does NOT enumerate reasoning effort values (fetched twice, explicitly asked for any sentence containing "effort" — the page contains no `reasoning.effort` enumeration). The reasoning guide says values are model-dependent and "check the relevant model page". Direct documentation for the siblings is explicit: gpt-5.4-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5.2-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5-pro MP — "defaults to (and only supports) reasoning.effort: high". Every pro-tier model that documents the parameter excludes `none` and `low` — the pro tier exists to "use more compute to think harder" (gpt-5.5-pro MP), making `none`/`low` incoherent with the product. The most defensible value set is **`['medium','high','xhigh']`**, matching both documented pro siblings. The current `['none','low','medium','high','xhigh']` appears copied from non-pro gpt-5.5 and is backed by no source. + +**Open Question (b) — resolved.** Not documented. The gpt-5.5-pro page does not mention `verbosity` (explicitly checked). No pro-tier model page (gpt-5.4-pro, gpt-5.2-pro, gpt-5-pro) documents verbosity, and the GPT-5.5 usage guide discusses `text.verbosity` only for gpt-5.5. Since `verbosity` is runtime-consumed (`core.ts` sends `text.verbosity` to the API), advertising it on a model that may reject it is a live failure risk. **Remove the verbosity block from gpt-5.5-pro.** + +### gpt-5.5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 5.0 / 0.5 / 30.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | +| updatedAt | **2026-04-23 — stale** | re-verified 2026-06-11 | **FIX: bump to 2026-06-11** (missed by PR #4990) | +| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | +| nativeStructuredOutputs | true | MP: structured outputs supported | ✓ verified | +| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none, low, medium (default), high and xhigh" | ✓ verified verbatim | +| verbosity ['low','medium','high'] | present | GPT-5.5 usage guide documents `text.verbosity` (recommends `low` for concise) | ✓ verified | +| releaseDate | 2026-04-23 | announcement 2026-04-23 (openai.com/index/introducing-gpt-5-5/, TechCrunch); pro sibling snapshot is `-2026-04-23` | ✓ verified (note: API availability was 2026-04-24; snapshot naming uses 04-23) | +| recommended | true | flagship per OpenAI ("latest GPT-5.5" is the recommended upgrade target on gpt-5.2/gpt-5/o3 pages) | ✓ intentional, docs-consistent | + +### gpt-5.4-pro + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / output | 30.0 / 180.0 | MP + pricing page | ✓ verified (note: MP — ">272K input tokens are priced at 2x input and 1.5x output"; the flat-rate model in `models.ts` cannot express this; under-bills long-context pro calls — pre-existing limitation, see Unverifiable/limitations) | +| cachedInput | absent | pricing page shows no cached rate for pro | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim | +| verbosity | absent | not documented for pro | ✓ correct omission | +| releaseDate | 2026-03-05 | gpt-5.4 snapshot `gpt-5.4-2026-03-05`; same launch | ✓ verified | +| deprecated | absent | none | ✓ | + +### gpt-5.4 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 2.5 / 0.25 / 15.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none (default), low, medium, high and xhigh" | ✓ verified verbatim | +| verbosity ['low','medium','high'] | present | not on MP; carried from GPT-5-line `text.verbosity` parameter (documented in usage guide / help center for the GPT-5 family) | ✓ kept — see "Deliberately not changed" | +| releaseDate | 2026-03-05 | MP snapshot `gpt-5.4-2026-03-05` | ✓ verified | + +### gpt-5.4-mini + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.75 / 0.075 / 4.5 | MP + pricing page | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['none','low','medium','high','xhigh'] | present | gpt-5.4 family per search-confirmed docs: "gpt-5.4, gpt-5.4-mini, and gpt-5.4-nano support none, low, medium, high, and xhigh" | ✓ verified | +| verbosity | present | family convention | ✓ kept | +| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-mini-2026-03-17` | ✓ verified | + +### gpt-5.4-nano + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.2 / 0.02 / 1.25 | MP + pricing page | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort / verbosity | as gpt-5.4-mini | same family docs | ✓ verified / kept | +| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-nano-2026-03-17` | ✓ verified | +| speedOptimized | true | MP: "cheapest GPT-5.4-class model", optimized for classification/extraction/sub-agents | ✓ intentional repo flag, consistent with docs | + +### gpt-5.2-pro + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / output | 21.0 / 168.0 | MP | ✓ verified | +| cachedInput | absent | MP shows none | ✓ | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim | +| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-pro-2025-12-11` | ✓ verified | +| deprecated | absent | MP recommends upgrading to gpt-5.5-pro but no shutdown date on deprecations page | ✓ verified (soft-superseded, not deprecated) | + +### gpt-5.2 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.75 / 0.175 / 14.0 | MP + OpenRouter | ✓ verified (two sources) | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "none (default), low, medium, high and xhigh" | ✓ verified verbatim | +| verbosity | present | family convention | ✓ kept | +| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-2025-12-11` | ✓ verified | +| deprecated | absent | superseded by 5.5 but no shutdown (only `gpt-5.2-chat-latest` has one) | ✓ verified | + +### gpt-5.1 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['none','low','medium','high'] | present | MP: "Reasoning.effort supports: none (default), low, medium, and high" (no xhigh) | ✓ verified verbatim | +| verbosity | present | family convention | ✓ kept | +| releaseDate | **2025-11-12** | MP snapshot is `gpt-5.1-2025-11-13` | **FIX: → 2025-11-13.** Repo convention everywhere else in this block is snapshot date (gpt-5-pro 10-06, gpt-5.2 12-11, gpt-4.1 04-14, o3-pro 06-10, …). 2025-11-12 is the announcement date; the API snapshot is 11-13 | + +### gpt-5-pro + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / output | 15.0 / 120.0 | MP | ✓ verified | +| cachedInput | absent | MP shows none | ✓ | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow | 400,000 | MP | ✓ verified | +| maxOutputTokens | 272,000 | MP: "272,000 max output tokens" | ✓ verified (yes, it really is larger than the rest of the family) | +| reasoningEffort ['high'] | present | MP: "defaults to (and only supports) `reasoning.effort: high`" | ✓ verified verbatim | +| releaseDate | 2025-10-06 | MP snapshot `gpt-5-pro-2025-10-06` | ✓ verified — **PR #4990's change confirmed correct** | +| deprecated | absent | none listed | ✓ | + +### gpt-5 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort ['minimal','low','medium','high'] | present | MP: "minimal, low, medium, and high"; reasoning guide confirms `minimal` introduced with GPT-5 | ✓ verified verbatim | +| verbosity | present | verbosity launched with GPT-5 | ✓ verified | +| releaseDate | 2025-08-07 | MP snapshot `gpt-5-2025-08-07` | ✓ verified | +| deprecated | absent | MP: "We recommend using the latest GPT-5.5" but no shutdown date — deprecations page: "not explicitly listed as deprecated" | ✓ verified (superseded, not deprecated) | + +### gpt-5-mini + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.25 / 0.025 / 2.0 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort / verbosity | gpt-5 family values | GPT-5 family launch docs | ✓ verified | +| releaseDate | 2025-08-07 | MP snapshot `gpt-5-mini-2025-08-07` | ✓ verified | + +### gpt-5-nano + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 0.05 / 0.005 / 0.4 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | +| reasoningEffort / verbosity | gpt-5 family values | family docs | ✓ verified | +| releaseDate | 2025-08-07 | MP snapshot `gpt-5-nano-2025-08-07` | ✓ verified | + +### gpt-5-chat-latest + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified | +| temperature 0–2 | present | non-reasoning chat snapshot | ✓ convention | +| releaseDate | 2025-08-07 | GPT-5 launch snapshot | ✓ verified | +| deprecated | true | **deprecations page: shutdown 2026-07-23, replacement gpt-5.5** | ✓ verified — **PR #4990's change confirmed correct and now formally docs-backed** | + +### o4-mini + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.1 / 0.275 / 4.4 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | +| reasoningEffort ['low','medium','high'] | present | see Open Question (c) below | ✓ verified | +| releaseDate | 2025-04-16 | MP snapshot `o4-mini-2025-04-16` | ✓ verified | +| deprecated | true | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-mini; MP: snapshot Deprecated, "succeeded by GPT-5 mini" | ✓ verified — **PR #4990's change confirmed correct** | + +### o3-pro + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / output | 20.0 / 80.0 | MP | ✓ verified | +| cachedInput | absent | MP shows none | ✓ | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | +| reasoningEffort | absent | MP: "Reasoning: Highest", no effort enum documented (pro pattern: fixed high effort) | ✓ correct omission | +| releaseDate | 2025-06-10 | MP snapshot `o3-pro-2025-06-10` | ✓ verified | +| deprecated | absent | deprecations page does not list o3-pro (only o3/o3-mini) | ✓ verified — note the oddity that base o3 is scheduled for shutdown while o3-pro is not; evidence-based, leave as is | + +### o3 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 2 / 0.5 / 8 | MP + OpenRouter ($2/$8) | ✓ verified (two sources) | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | +| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified | +| releaseDate | 2025-04-16 | MP snapshot `o3-2025-04-16` | ✓ verified | +| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5-pro; MP: "superseded by GPT-5" | **FIX: add `deprecated: true`** | + +### o3-mini + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 1.1 / 0.55 / 4.4 | MP (note: cachedInput 0.55 differs from o4-mini's 0.275 — both verified correct per their MPs) | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | +| reasoningEffort ['low','medium','high'] | present | o3-mini launch post: "three reasoning effort options—low, medium, and high" | ✓ verified explicitly | +| releaseDate | 2025-01-31 | MP snapshot `o3-mini-2025-01-31` | ✓ verified | +| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5; MP: snapshot marked deprecated | **FIX: add `deprecated: true`** | + +### o1 + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 15.0 / 7.5 / 60 | MP | ✓ verified | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | +| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified | +| releaseDate | **2024-12-05** | MP snapshot is `o1-2024-12-17` | **FIX (minor): → 2024-12-17** for snapshot-date consistency. 2024-12-05 is the ChatGPT launch; the API snapshot (the convention used by every other entry in this block) is 12-17 | +| deprecated | **absent — recommend `true`** | MP: sole snapshot `o1-2024-12-17` explicitly "Deprecated"; described as "Previous full o-series reasoning model". Base alias not on the deprecations shutdown table (only o1-preview/o1-mini, already shut down) | **FIX (recommended): add `deprecated: true`** — weaker evidence than o3/o3-mini (no shutdown date for the alias), but its only snapshot is deprecated and every other o-series peer is deprecated | + +**Open Question (c) — resolved.** The current model pages no longer enumerate `reasoning_effort` for the o-series, and the Responses API reference page content does not surface the enum inline. The reasoning guide states: "Supported values are model-dependent and can include `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`... check the relevant model page." Best available evidence: (1) o3-mini launch post (openai.com/index/openai-o3-mini/) explicitly: "three reasoning effort options—low, medium, and high"; (2) the API changelog notes `reasoning_effort` was added for o1 models with those three values; (3) `none`/`minimal`/`xhigh` were introduced with the GPT-5 line and were never back-ported to o-series. **`['low','medium','high']` for o1, o3, o3-mini, o4-mini is confirmed — no change.** + +### gpt-4o + +| Field | Value | Source | Verdict | +|---|---|---|---| +| input / cachedInput / output | 2.5 / 1.25 / 10.0 | MP + OpenRouter ($2.50/$10) | ✓ verified (two sources) | +| updatedAt | 2026-06-11 | this validation | ✓ | +| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified | +| temperature 0–2 | present | convention | ✓ | +| releaseDate | 2024-05-13 | MP snapshot `gpt-4o-2024-05-13`; OpenRouter "released May 13, 2024" | ✓ verified | +| deprecated | true | see Open Question (d) | ✓ verified — and now docs-backed | + +**Open Question (d) — resolved, better than expected.** The brief said gpt-4o is "active per OpenAI" and `deprecated: true` is a deliberate steering decision. The live deprecations page now shows **gpt-4o: shutdown 2026-10-23, replacement gpt-5.5**. So `deprecated: true` is no longer just an intentional product deviation — it is officially correct. Keep, no caveat needed. + +--- + +## Open Question (e) — `defaultModel: 'gpt-4.1'` + +OpenAI's flagship is gpt-5.5 (announcement 2026-04-23; the gpt-5.2/gpt-5/o3 pages all point at "the latest GPT-5.5"). gpt-4.1 remains active (it is OpenAI's "smartest non-reasoning model" and is not on the deprecations page), so the current default is not broken — it is a cheap, fast, temperature-supporting non-reasoning default, which is a defensible UX choice for new blocks. **Recommendation:** consider `defaultModel: 'gpt-5.5'` (or `gpt-5.4-mini` for a cost-conscious reasoning default) to match the flagship, but this is a **product decision**, not a correctness fix — not included in the machine-applyable list. + +--- + +## Changes made in this pass (recommended to apply now) + +1. **gpt-5.5-pro** — `reasoningEffort.values`: `['none','low','medium','high','xhigh']` → `['medium','high','xhigh']`. Undocumented on its own page; both documented pro siblings (gpt-5.4-pro, gpt-5.2-pro) enumerate exactly `medium, high, xhigh`; pro tier semantics exclude none/low. Sending `reasoning.effort: 'none'` to a pro model risks a 400. +2. **gpt-5.5-pro** — remove the `verbosity` block. Not documented for any pro model; the provider sends `text.verbosity` at runtime, so advertising it is a live API-error risk. +3. **gpt-5.5-pro** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (re-verified today; PR #4990 missed this entry despite claiming an all-entry bump). +4. **gpt-5.5** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (same). +5. **o3** — add `deprecated: true` (official shutdown 2026-10-23). +6. **o3-mini** — add `deprecated: true` (official shutdown 2026-10-23). +7. **gpt-4.1-nano** — add `deprecated: true` (official shutdown 2026-10-23, replacement gpt-5.4-nano). +8. **o1** — add `deprecated: true` (sole snapshot `o1-2024-12-17` marked Deprecated; "previous" o-series model; recommended, slightly weaker evidence). +9. **gpt-5.1** — `releaseDate`: `2025-11-12` → `2025-11-13` (snapshot `gpt-5.1-2025-11-13`; snapshot-date convention). +10. **o1** — `releaseDate`: `2024-12-05` → `2024-12-17` (snapshot `o1-2024-12-17`; snapshot-date convention; minor). + +## Deliberately not changed + +- **gpt-4o `deprecated: true`** — originally an intentional steering decision; now officially correct (shutdown 2026-10-23). Keep. +- **gpt-5-chat-latest / o4-mini `deprecated: true`** (PR #4990) — both confirmed by the deprecations page (2026-07-23 and 2026-10-23 shutdowns). Keep. +- **`defaultModel: 'gpt-4.1'`** — product decision; gpt-4.1 is active. Flagged for product review (gpt-5.5 is the flagship), not a correctness fix. +- **`verbosity` on non-pro gpt-5.x models (gpt-5.4/-mini/-nano, gpt-5.2, gpt-5.1, gpt-5 family)** — current model pages don't enumerate it per-model, but `text.verbosity` is a documented GPT-5-line parameter (GPT-5 launch; GPT-5.5 usage guide; OpenAI help center) and the provider has been sending it without errors. Keep. +- **`temperature {0,2}` on gpt-4.1 family, gpt-4o, gpt-5-chat-latest** — model pages don't state sampling ranges; 0–2 is the documented API-wide range for non-reasoning chat models. Correct by convention. +- **o3-pro not deprecated** — the deprecations page lists o3 and o3-mini but not o3-pro. Odd but evidence-based; leave. +- **gpt-5.2 / gpt-5 / gpt-5.2-pro not deprecated** — docs say "superseded / recommend GPT-5.5" but list no shutdown; superseded ≠ deprecated. Leave. +- **`recommended: true` on gpt-5.5 and `speedOptimized: true` on gpt-5.4-nano** — repo-internal flags, consistent with docs positioning. +- **o3-mini `cachedInput: 0.55` vs o4-mini `0.275`** — looks like a typo but both verified correct on their respective model pages. + +## Unverifiable / known limitations + +- **gpt-5.5-pro effort values** — no official enumeration exists anywhere fetched (model page, reasoning guide, usage guide, OpenRouter). The `['medium','high','xhigh']` recommendation is an inference from documented siblings — the strongest available evidence, but flagged as not directly documented. If OpenAI later publishes the enum, re-verify. +- **gpt-5.4-pro long-context surcharge** — MP states prompts >272K input tokens bill at 2x input / 1.5x output. The flat `pricing` shape in `models.ts` cannot represent tiered pricing; cost estimates for very long pro prompts will be low. Pre-existing schema limitation, out of scope here. +- **gpt-5.5 release date 04-23 vs API availability 04-24** — announcement and snapshot say 2026-04-23; press coverage says API access opened 2026-04-24. Kept 2026-04-23 (snapshot wins). +- **Verbosity enum per non-flagship model** — `['low','medium','high']` is documented at the parameter level, not re-enumerated on each model page. +- **`nativeStructuredOutputs`** — only gpt-5.5/gpt-5.5-pro carry it though most listed models support structured outputs; flag is display-only (landing page), so under-reporting is cosmetic, not functional. Left as is. diff --git a/docs/models/vertex.md b/docs/models/vertex.md new file mode 100644 index 0000000000..8e8da6ed1a --- /dev/null +++ b/docs/models/vertex.md @@ -0,0 +1,212 @@ +# Vertex AI provider — model validation (`models.ts` lines ~1487–1685) + +- **Date:** 2026-06-11 (final exhaustive pass, re-verifying PR #4990 changes) +- **Method:** Live WebFetch of Google pricing/model/changelog pages; Google Cloud doc pages render nav-only to fetchers, so Vertex-specific specs were verified via Context7 MCP (`/websites/cloud_google_vertex-ai`, `/websites/cloud_google_gemini-enterprise-agent-platform`) and WebSearch fallback, per the validate-model skill. Two-source rule applied to pricing (Vertex pricing page + Gemini API pricing page / OpenRouter / CloudPrice). +- **Primary sources:** + - https://cloud.google.com/vertex-ai/generative-ai/pricing (rendered fully — all pricing below) + - https://ai.google.dev/gemini-api/docs/pricing (cross-check; global-endpoint prices identical for 2.5/3.x) + - https://ai.google.dev/gemini-api/docs/models/gemini-3.5-flash, …/gemini-3.1-pro-preview, …/gemini-3.1-flash-lite, …/gemini-3-flash-preview, …/gemini-2.5-pro (token limits) + - https://ai.google.dev/gemini-api/docs/thinking (thinking levels/defaults) + - https://ai.google.dev/gemini-api/docs/changelog (lifecycle dates) + - https://deepmind.google/models/model-cards/gemini-3-5-flash/ (3.5 Flash card) + - Vertex docs via Context7: `…/models/gemini/2-5-pro` ("maximum output token limit of 65,535"), `…/migrate/migrate-palm-to-gemini`, `…/learn/model-versioning`, `…/learn/locations` + - https://blog.google/technology/developers/deep-research-agent-gemini-api/ (2025-12-11), https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-flash-lite/ (2026-03-03) +- **Provider implementation:** `apps/sim/providers/vertex/index.ts` contains no capability handling itself — it delegates to `executeGeminiRequest` in `apps/sim/providers/gemini/core.ts`, which consumes `request.thinkingLevel` (core.ts:955–961, sent only when user explicitly selects a level) and `request.maxTokens` (core.ts:934). `thinking`, `temperature`, and `maxOutputTokens` flags are live; the global `maxOutputTokens` fallback is 4096 (models.ts:865), which is why PR #4990 added explicit caps. + +--- + +## Per-model validation + +### vertex/gemini-3.5-flash + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| id | `gemini-3.5-flash` (GA 2026-05-19) | `gemini-3.5-flash` | ai.google.dev changelog ("Released `gemini-3.5-flash`… GA" 2026-05-19) | ✓ | +| input | 1.5 | $1.50 (global) | Vertex pricing + Gemini API pricing + OpenRouter | ✓ (3 sources) | +| cachedInput | 0.15 | $0.15 | Vertex pricing + Gemini API pricing | ✓ | +| output | 9.0 | $9.00 | Vertex pricing + Gemini API pricing + OpenRouter | ✓ | +| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.5-flash; DeepMind card "1M" | ✓ | +| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page ("64K" on DeepMind card) | ✓ | +| thinking | minimal/low/medium/high, default medium | minimal, low, medium, high; default medium | ai.google.dev/gemini-api/docs/thinking; OpenRouter ("defaults to medium thinking effort") | ✓ | +| releaseDate | 2026-05-19 | "Published 19 May 2026" | DeepMind model card + changelog | ✓ | +| recommended | absent | — | google provider entry has `recommended: true` on the same model | 🔵 add (see fixes) | + +Note: Vertex introduces **non-global endpoint pricing (+10%: $1.65 / $9.90 / $0.165) effective 2026-07-01**; our entries model global pricing. See operational caveats. + +### vertex/gemini-3.1-pro-preview + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| id | `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | ai.google.dev/gemini-api/docs/models/gemini-3.1-pro-preview | ✓ | +| input | 2.0 | $2 (≤200k); $4 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier; >200k tier not modeled — see caveats) | +| cachedInput | 0.2 | $0.20 (≤200k); $0.40 (>200k) | same | ✓ | +| output | 12.0 | $12 (≤200k input); $18 (>200k) | same | ✓ | +| contextWindow | 1048576 | 1,048,576 | ai.google.dev model page; Vertex release notes "1M token context window" | ✓ | +| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page | ✓ | +| thinking | low/medium/high, default high | low, medium, high; default high (Dynamic); **minimal not supported** | ai.google.dev/gemini-api/docs/thinking | ✓ (PR #4990 drop of 'minimal' confirmed correct) | +| releaseDate | 2026-02-19 | 2026-02-19 | blog.google gemini-3-1-pro; github.blog changelog 2026-02-19 | ✓ | + +**Operational caveat (open question f):** Google documents `gemini-3.1-pro-preview` as **global-endpoint-only on Vertex AI** (Vertex `learn/locations` lists it under global-endpoint models; third-party migration guides state regional endpoints don't serve it). `apps/sim/providers/vertex/index.ts:34` resolves location as `request.vertexLocation || env.VERTEX_LOCATION || 'us-central1'` — with the default `us-central1`, requests to this model will fail with model-not-found. Users must set `vertexLocation` / `VERTEX_LOCATION` to `global`. No code change made (per instructions); documented here. + +### vertex/gemini-3.1-flash-lite + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| id | `gemini-3.1-flash-lite` (renamed from `-preview` in PR #4990) | stable id `gemini-3.1-flash-lite`; preview id shut down on Gemini API 2026-05-25; Vertex preview-alias discontinuation 2026-07-09 | ai.google.dev changelog ("Released `gemini-3.1-flash-lite`… GA" 2026-05-07; preview "shut down" 2026-05-25); cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | ✓ rename confirmed correct | +| input | 0.25 | $0.25 (global, text) | Vertex pricing + Gemini API pricing | ✓ | +| cachedInput | 0.025 | $0.025 | same | ✓ | +| output | 1.5 | $1.50 | same + blog.google launch post | ✓ | +| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.1-flash-lite | ✓ | +| maxOutputTokens | 65536 | 65,536 | same | ✓ | +| thinking levels | minimal/low/medium/high | minimal "Supported (Default)", low, medium, high | ai.google.dev/gemini-api/docs/thinking (3.1 Flash-Lite row; the "Not supported" row is 3.1 **Pro**) | ✓ — orchestrator re-fetched the thinking doc and corrected this report's initial misreading | +| thinking default | 'minimal' | minimal ("Supported (Default)") | same | ✓ | +| releaseDate | 2026-05-07 | stable GA 2026-05-07 (preview launch was 2026-03-03) | ai.google.dev changelog | ✓ changed this pass to the GA date | +| speedOptimized | absent | "our most cost-effective model yet", lowest-latency tier | blog.google launch post | 🔵 add (see fixes) | + +**Open question (c) resolved:** the preview→stable rename is right (preview already shut down on the Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09). This report initially claimed `minimal` is rejected on 3.1 Flash-Lite — that was a misreading of the thinking-levels table (the "Not supported" cell belongs to 3.1 **Pro**). The orchestrator re-fetched ai.google.dev/gemini-api/docs/thinking, which states for Gemini 3.1 Flash-Lite: minimal "Supported (Default)", plus low/medium/high. The repo's `levels: ['minimal','low','medium','high'], default: 'minimal'` is correct and was left unchanged. + +### vertex/gemini-3-pro-preview (deprecated) + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | Gemini API shut down 2026-03-09 (`gemini-3-pro-preview` now aliases `gemini-3.1-pro-preview`); Vertex discontinuation 2026-03-26 | ai.google.dev changelog; Vertex deprecations (via third-party migration guides citing Google's table) | ✓ deprecated:true confirmed correct | +| pricing 2.0/0.2/12.0 | — | current pricing page no longer lists text Gemini 3 Pro (only "Gemini 3 Pro Image") | cloud.google.com/vertex-ai/generative-ai/pricing | ⚠️ historical values, unverifiable from current page; acceptable on a deprecated entry | +| contextWindow | 1000000 | launch materials said "1M token context window" | Vertex release notes | ⚠️ 1,000,000 vs sibling models' 1,048,576; left as-is (deprecated) | +| thinking | low/medium/high, default high | consistent with 3.x Pro line (no minimal) | ai.google.dev/gemini-api/docs/thinking (3.1-pro row) | ✓ | +| releaseDate | 2025-11-18 | 2025-11-18 | blog.google gemini-3; github.blog 2025-11-18; axios 2025-11-18 | ✓ | + +Note: since the id now auto-redirects to 3.1 Pro on Google's side, calls may silently serve 3.1 Pro; `deprecated: true` steering users away is the right call. + +### vertex/gemini-3-flash-preview + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| id | `gemini-3-flash-preview` | `gemini-3-flash-preview` | ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview | ✓ | +| input / cachedInput / output | 0.5 / 0.05 / 3.0 | $0.50 / $0.05 / $3.00 | Vertex pricing + Gemini API pricing + TechCrunch | ✓ | +| contextWindow | 1048576 (PR #4990 change) | 1,048,576 | ai.google.dev model page | ✓ change confirmed | +| maxOutputTokens | 65536 | 65,536 | same | ✓ | +| thinking | minimal/low/medium/high, default high | minimal, low, medium, high; default high (Dynamic) | ai.google.dev/gemini-api/docs/thinking | ✓ | +| releaseDate | 2025-12-17 | 2025-12-17 | techcrunch.com 2025/12/17; 9to5google 2025/12/17; blog.google | ✓ | + +### vertex/gemini-2.5-pro + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| input | 1.25 | $1.25 (≤200k); $2.50 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier) | +| cachedInput | 0.125 | Vertex page displays "$0.13" (rounded); Gemini API exact "$0.125" | both pricing pages | ✓ (0.125 is exact value) | +| output | 10.0 | $10 (≤200k); $15 (>200k) | same | ✓ | +| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-pro` (via Context7) + ai.google.dev | ✓ | +| maxOutputTokens | **65536** | **Vertex: 65,535** ("maximum output token limit of 65,535"); Gemini API page: 65,536 | docs.cloud.google.com/…/models/gemini/2-5-pro (via Context7); ai.google.dev/gemini-api/docs/models/gemini-2.5-pro | ✗ 🟡 — platforms disagree; this is the **Vertex** entry, so Vertex's 65,535 wins | +| releaseDate | 2025-03-25 | 2.5 Pro Experimental announced 2025-03-25 | blog.google gemini-model-thinking-updates-march-2025; siliconangle 2025/03/25 | ✓ | +| deprecated | absent | retirement on Vertex extended to **2026-10-16** | Vertex release notes (via gcpstudyhub summary of release-notes) | ✓ correctly NOT deprecated today — see (d) below | + +### vertex/gemini-2.5-flash + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| input / cachedInput / output | 0.3 / 0.03 / 2.5 | $0.30 / $0.03 / $2.50 | Vertex pricing + Gemini API pricing | ✓ | +| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash` (via Context7) | ✓ | +| maxOutputTokens | **65536** | **Vertex: 65,535** ("default output token limit of 65,535") | docs.cloud.google.com/…/models/gemini/2-5-flash (via Context7); also migrate-palm-to-gemini doc ("2.5 Pro and 2.5 Flash… output context length of 65,535") | ✗ 🟡 | +| releaseDate | 2025-05-20 | preview launched 2025-04-17 on Gemini API; I/O announcement 2025-05-20/21; Vertex GA June 2025 | ai.google.dev changelog; Google I/O coverage | ⚠️ date is the I/O announcement; preview predates it. Left as-is (convention ambiguity, not a factual error) | +| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today | + +### vertex/gemini-2.5-flash-lite + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| input / cachedInput / output | 0.1 / 0.01 / 0.4 | $0.10 / $0.01 / $0.40 | Vertex pricing + Gemini API pricing | ✓ | +| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash-lite` | ✓ | +| maxOutputTokens | **65536** | **65,535** | Vertex 2-5-flash-lite doc / Oracle OCI mirror of Google spec (websearch confirmation: "maximum output for Gemini 2.5 Flash-Lite is 65,535 tokens") | ✗ 🟡 | +| releaseDate | 2025-06-17 | 2.5 family GA + Flash-Lite preview announced 2025-06-17 | cloud.google.com blog "Gemini 2.5 Updates: Flash/Pro GA, SFT, Flash-Lite on Vertex AI" | ✓ | +| speedOptimized | absent | smallest/fastest 2.5 tier | google provider entry has `speedOptimized: true` (models.ts:1436) | 🔵 add (see fixes) | +| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today | + +### vertex/gemini-2.0-flash (deprecated) + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | discontinued on Vertex **2026-06-01** (model serving + Provisioned Throughput) | github.com/firebase/extensions/issues/2607; Vertex model-versioning doc ("as of March 6, 2026 … only available for existing customers") | ✓ PR #4990 change confirmed | +| input | **0.1** | **$0.15** (Vertex token-based row, text) | cloud.google.com/vertex-ai/generative-ai/pricing | ✗ 🟡 repo carries Gemini API pricing ($0.10), not Vertex's | +| output | **0.4** | **$0.60** | same | ✗ 🟡 | +| cachedInput | 0.025 | not listed on Vertex pricing page (that's the Gemini API cache price) | same | ❓ UNVERIFIED on Vertex | +| maxOutputTokens | absent (falls back 4096) | 8,192 ("output context length of 8,192 tokens by default") | Vertex migrate-palm-to-gemini doc | 🔵 google entry has 8192; add for parity (low priority, discontinued) | +| contextWindow | 1048576 | 1,048,576 | same doc | ✓ | +| releaseDate | 2025-02-05 | GA on Vertex 2025-02-05 | blog.google gemini-model-updates-february-2025; developers.googleblog.com | ✓ | + +### vertex/gemini-2.0-flash-lite (deprecated) + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| deprecated | true | discontinued on Vertex 2026-06-01 | same sources as 2.0-flash | ✓ | +| input / output | 0.075 / 0.3 | $0.075 / $0.30 | Vertex pricing page | ✓ | +| cachedInput | omitted | none listed | same | ✓ correctly omitted | +| maxOutputTokens | absent | 8,192 default | Vertex migrate doc | 🔵 parity suggestion (low priority) | +| releaseDate | 2025-02-25 | preview 2025-02-05; exact 2025-02-25 GA date not found in fetched pages | _attempted: blog.google, Vertex release notes_ | ❓ UNVERIFIED (plausible — GA followed preview by ~3 weeks; deprecated, left as-is) | + +### vertex/deep-research-pro-preview-12-2025 + +| Field | Repo | Live docs | Source | Verdict | +|---|---|---|---|---| +| id | `deep-research-pro-preview-12-2025` | Vertex pricing page has a "Gemini Deep Research Agent" row but no id; id appears on third-party Vertex trackers (CloudPrice `vertex_ai/deep-research-pro-preview-12-2025`); Gemini API changelog confirms Deep Research Agent preview launch 2025-12-11 but its docs now list `deep-research-preview-04-2026` / `deep-research-max-preview-04-2026` | cloud.google.com pricing; cloudprice.net; ai.google.dev/gemini-api/docs/deep-research + changelog | ⚠️ id verified only via secondary sources; **no announced shutdown of the 12-2025 id** — but Google has shipped 04-2026 successors on the Gemini API (watch item) | +| input | 2.0 | $2 | Vertex pricing page "Gemini Deep Research Agent" + CloudPrice | ✓ (open question a: pricing confirmed) | +| cachedInput | 0.2 | $0.20 | Vertex pricing page (CloudPrice omits cached) | ✓ | +| output | 12.0 | $12 | Vertex pricing page + CloudPrice | ✓ (PR #4990 output 12.0 confirmed) | +| contextWindow | 1048576 | **conflict**: CloudPrice says "66K tokens" context / "33K tokens" max output; underlying model is Gemini 3 Pro (1M ctx); no Google doc states the agent's window; launch blog only says it "handles large context gracefully" | cloudprice.net/models/vertex_ai/deep-research-pro-preview-12-2025; blog.google deep-research post; ai.google.dev/gemini-api/docs/deep-research (lists no token limits for any version) | ❓ UNVERIFIED — conflict NOT resolvable from Google docs (they publish no limits for the agent). 1048576 is an inference from the Gemini 3 Pro core; CloudPrice's 66K/33K (≈65,536/32,768) may reflect the agent's actual per-task envelope | +| maxOutputTokens | 65536 | no Google figure; CloudPrice says 33K | same | ❓ UNVERIFIED | +| capabilities deepResearch / memory:false | true / false | it is a managed autonomous research agent; multi-turn memory not offered in preview | blog.google + ai.google.dev/gemini-api/docs/deep-research | ✓ reasonable | +| releaseDate | 2025-12-11 | "Published December 11, 2025"; changelog: "Launched the Gemini Deep Research Agent in preview" 2025-12-11 | blog.google deep-research-agent-gemini-api; ai.google.dev changelog | ✓ | + +--- + +## Changes made in this pass (PR #4990) — re-verification verdicts + +| PR #4990 change | Verdict | +|---|---| +| Rename `vertex/gemini-3.1-flash-lite-preview` → `vertex/gemini-3.1-flash-lite` | ✓ correct — stable id GA 2026-05-07; preview shut down on Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09 | +| Drop `'minimal'` from 3.1-pro-preview thinking.levels | ✓ correct — thinking docs: minimal "Not supported" on 3.1 Pro | +| `deprecated: true` on gemini-3-pro-preview | ✓ correct — shut down (Gemini API 2026-03-09; Vertex 2026-03-26) | +| `deprecated: true` on both 2.0 models | ✓ correct — discontinued 2026-06-01 | +| deep-research output → 12.0, cachedInput 0.2 | ✓ correct — Vertex pricing page row | +| deep-research ctx 1048576 + maxOutputTokens 65536 | ❓ remains unverifiable; CloudPrice conflict (66K/33K) unresolved — Google publishes no limits for the agent | +| maxOutputTokens 65536 on 3.5-flash / 3.1-pro / 3.1-flash-lite / 3-flash | ✓ correct — all four documented at 65,536 on their Gemini API model pages | +| maxOutputTokens 65536 on 2.5-pro / 2.5-flash / 2.5-flash-lite | ✗ off-by-one for Vertex — Vertex docs say **65,535** (Gemini API pages say 65,536; platforms genuinely disagree; Vertex entry should carry the Vertex value) | +| gemini-3-flash-preview ctx → 1048576 | ✓ correct | +| updatedAt bumps to 2026-06-11 | ✓ all pricing values verified current today | + +## Recommended fixes (final disposition) + +Rejected by orchestrator re-verification (not applied): +1. ~~`vertex/gemini-3.1-flash-lite` thinking.levels / default change~~ — the thinking doc confirms minimal IS supported and is the default on 3.1 Flash-Lite; the report's initial reading was wrong. No change made (google entry likewise untouched). + +Applied (warning — platform-correct values): +3. `vertex/gemini-2.5-pro`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) +4. `vertex/gemini-2.5-flash`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) +5. `vertex/gemini-2.5-flash-lite`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) +6. `vertex/gemini-2.0-flash`: `input` 0.1 → 0.15, `output` 0.4 → 0.6 (Vertex pricing page; repo carries Gemini API prices). `cachedInput: 0.025` is unverified on Vertex — consider removing. Low urgency (model discontinued). + +Applied (suggestions): +7. `vertex/gemini-3.5-flash`: add `recommended: true` — parity with the google entry; vertex provider currently has no recommended model +8. `vertex/gemini-2.5-flash-lite`: add `speedOptimized: true` — parity with google entry (models.ts:1436) +9. `vertex/gemini-3.1-flash-lite`: add `speedOptimized: true` — "most cost-effective model yet" / lowest-latency tier (blog.google); apply to the google entry too for consistency +10. (optional) both vertex 2.0 entries: add `maxOutputTokens: 8192` for parity with google entries (Vertex docs: 8,192 default) — cosmetic, models discontinued + +Also applied: `releaseDate` 2026-03-03 → 2026-05-07 on both the vertex and google `gemini-3.1-flash-lite` entries (GA date per the Gemini API changelog). Item 10 (maxOutputTokens on discontinued 2.0 entries) was skipped as cosmetic; `cachedInput` on vertex/gemini-2.0-flash was kept (Gemini API documented the rate; no Vertex contradiction found). + +## Deliberately not changed + +- **2.5 Pro / Flash / Flash-Lite not marked deprecated (open question d):** Vertex retirement is 2026-10-16 (extended from June 2026; Google says the final date will be confirmed with ≥6 months notice once Gemini 3 is GA). They are fully supported today; `deprecated: true` would prematurely hide working models. Recommendation: revisit ~2026-09 (calendar item), keep undeprecated now. Note `defaultModel: 'vertex/gemini-2.5-pro'` (models.ts:1491) will need a new default before retirement — consider moving to `vertex/gemini-3.5-flash` when `recommended` is added. +- **>200k-token pricing tiers (3.1-pro, 2.5-pro)** are not modeled — `pricing` is a flat structure; entries carry the ≤200k tier. Pre-existing, consistent with the google provider. +- **Non-global endpoint surcharge (effective 2026-07-01):** Vertex adds +10% pricing for non-global endpoints on 3.x models ($1.65/$9.90 for 3.5-flash, etc.). Our default location is `us-central1` (non-global), so billed cost may exceed modeled cost starting July 1. Entries keep global pricing (the canonical published rate); flagged for ops awareness. +- **`vertex/gemini-3-pro-preview` pricing/ctx left as historical** — model discontinued and absent from the current pricing page; `deprecated: true` is the user-facing protection. +- **releaseDate conventions:** 2.5-flash 2025-05-20 (I/O) kept despite an earlier 2025-04-17 Gemini-API preview; 3.1-flash-lite 2026-03-03 (preview announcement) kept despite 2026-05-07 stable GA. Both match the repo's "first public launch announcement" convention. +- **deep-research id not migrated** to the newer `deep-research-preview-04-2026` family — no announced shutdown of `deep-research-pro-preview-12-2025`, and the Vertex pricing row still matches it. Watch item for the next pass. + +## Unverifiable + +| Item | Attempted sources | Notes | +|---|---|---| +| `vertex/deep-research-pro-preview-12-2025` `contextWindow: 1048576` and `maxOutputTokens: 65536` | cloud.google.com pricing (no limits), ai.google.dev/gemini-api/docs/deep-research (lists only 04-2026 versions, no limits), blog.google launch post (no numbers), cloudprice.net (claims 66K ctx / 33K out) | Conflict NOT resolved: Google publishes no token limits for the agent. CloudPrice's 66K/33K (~65,536/32,768) is the only concrete figure and contradicts the repo's 1M. Current values are an inference from the Gemini 3 Pro core. Ask Google docs or test live before changing. | +| Vertex-side model id for the Deep Research Agent | Vertex pricing page (row name only), Vertex docs (nav-only render), Context7 | Only third-party trackers tie `deep-research-pro-preview-12-2025` to Vertex. | +| `vertex/gemini-2.0-flash` `cachedInput: 0.025` | Vertex pricing page (no cached row for 2.0) | $0.025 is the Gemini API cache price. Discontinued model; consider dropping the field. | +| `vertex/gemini-2.0-flash-lite` `releaseDate: 2025-02-25` | blog.google Feb 2025 post (preview 2025-02-05), Vertex release notes (nav-only) | Exact GA date not found this session; plausible, left as-is. | +| Vertex 3-pro-preview discontinuation date 2026-03-26 (exact) | Vertex deprecations page (nav-only), third-party migration guides | Gemini API shutdown 2026-03-09 is confirmed by the changelog; the Vertex-specific 03-26 date comes from secondary sources citing Google's deprecations table. Either way `deprecated: true` is correct. | diff --git a/docs/models/xai.md b/docs/models/xai.md new file mode 100644 index 0000000000..1fd8d159f4 --- /dev/null +++ b/docs/models/xai.md @@ -0,0 +1,91 @@ +# xAI Provider Validation — models.ts + +- **Date:** 2026-06-11 +- **Scope:** `xai` provider block in `apps/sim/providers/models.ts` (~lines 1752–1956), 13 models + provider config. Final re-verification after PR #4990 (deprecation flags, grok-4.20 repricing $2/$6 → $1.25/$2.50 and 2M → 1M, defaultModel → grok-4.3). +- **Method:** Live WebFetch of xAI docs (primary source, wins all conflicts); OpenRouter as secondary pricing source; WebSearch for release-date pinning; `rg` audit of `apps/sim/providers/xai/` for parameter wiring. +- **Sources:** + - https://docs.x.ai/developers/models (model listing + pricing) + - https://docs.x.ai/developers/models/grok-4.3, .../grok-4.20-0309-reasoning, .../grok-4.20-0309-non-reasoning, .../grok-4.20-multi-agent-0309, .../grok-build-0.1, .../grok-3, .../grok-3-fast, .../grok-4 (per-model pages) + - https://docs.x.ai/developers/migration/may-15-retirement (retirement/redirect table) + - https://docs.x.ai/developers/rest-api-reference/inference/chat (parameter ranges) + - https://docs.x.ai/developers/model-capabilities/text/reasoning (reasoning_effort semantics) + - https://openrouter.ai/x-ai/grok-4.3, https://openrouter.ai/x-ai/grok-4.20 (secondary) + +## Provider config + +| Field | Repo value | Source | Verdict | +|---|---|---|---| +| `defaultModel` | `grok-4.3` | docs.x.ai/developers/models — grok-4.3 is the current flagship ("most intelligent and fastest"); all retired slugs redirect to it | CORRECT (PR #4990 change re-verified) | +| `modelPatterns` | `/^grok/` | All current model ids start with `grok` | CORRECT | + +## Active models + +### grok-4.3 + +| Field | Repo value | Source value | Source | Verdict | +|---|---|---|---|---| +| input | 1.25 | $1.25 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($1.25) | CORRECT | +| cachedInput | 0.2 | $0.20 / 1M | docs.x.ai/developers/models/grok-4.3 | CORRECT | +| output | 2.5 | $2.50 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($2.50) | CORRECT | +| contextWindow | 1000000 | 1,000,000 tokens | docs.x.ai per-model page; OpenRouter agrees (1M, "no output token limit") | CORRECT | +| releaseDate | 2026-04-30 | April 30, 2026 | OpenRouter created date; consistent with xAI announcement timeline | CORRECT | +| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference: "between 0 and 2" | ✓ after fix | +| recommended | true | flagship model | docs.x.ai | CORRECT | + +Caveat: OpenRouter notes grok-4.3 requests exceeding 200k total tokens bill at a higher tier. xAI's own pricing tables show flat $1.25/$2.50; Sim's pricing model is flat, so the base tier is recorded. No change. + +### grok-4.20-0309-reasoning / grok-4.20-0309-non-reasoning / grok-4.20-multi-agent-0309 + +All three per-model pages were fetched individually; all three show identical numbers (multi-agent is NOT priced differently): + +| Field | Repo value | Source value | Source | Verdict | +|---|---|---|---|---| +| input | 1.25 | $1.25 / 1M | all three per-model pages | CORRECT (PR #4990 reprice re-verified) | +| cachedInput | 0.2 | $0.20 / 1M | all three per-model pages | CORRECT | +| output | 2.5 | $2.50 / 1M | all three per-model pages | CORRECT | +| contextWindow | 1000000 | 1,000,000 tokens | all three per-model pages | CORRECT — see conflict note | +| releaseDate | 2026-03-10 | API availability March 10, 2026 | WebSearch (xAI API made Grok 4.20 + multi-agent available 2026-03-10; `0309` slug = March 9 snapshot) | CORRECT (secondary-source verified) | +| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference | ✓ after fix | + +**1M vs 2M conflict resolved:** OpenRouter (x-ai/grok-4.20) lists 2M context; xAI's three per-model pages each state "Context window: 1,000,000 tokens". Press coverage attributes the larger window to "agent modes" (consumer-side), not the API. xAI docs win → **1M confirmed, keep**. (OpenRouter's created date of 2026-03-31 is its listing date, not the API release.) + +## Deprecated models (9 entries) + +Retirement source: docs.x.ai/developers/migration/may-15-retirement — "After May 15, 2026 at 12:00 PM PT, requests to the retired model slugs will automatically redirect" and bill at the redirect target's rates. Today (2026-06-11) is past that date: the redirects are live. The per-model docs pages for the legacy slugs (`grok-4`, `grok-4-0709`, `grok-3`, `grok-3-fast`) now resolve to the grok-4.3 page showing $1.25/$0.20/$2.50 — direct confirmation that the slugs are aliases billing at target rates. + +| Model id | Redirect target (source: may-15-retirement page) | `deprecated: true` verdict | +|---|---|---| +| grok-4-latest | grok-4.3 (alias of grok-4-0709; per-model page now resolves to grok-4.3) | CORRECT | +| grok-4-0709 | grok-4.3 (reasoning_effort low) — explicitly listed | CORRECT | +| grok-4-1-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT | +| grok-4-1-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT | +| grok-4-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT | +| grok-4-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT | +| grok-code-fast-1 | grok-build-0.1 — explicitly listed | CORRECT | +| grok-3-latest | grok-4.3 (none) — `grok-3` explicitly listed; `-latest` is its alias | CORRECT | +| grok-3-fast-latest | grok-4.3 — not on the May-15 table by name, but docs.x.ai/developers/models/grok-3-fast now resolves to the grok-4.3 page with grok-4.3 pricing | CORRECT | + +Legacy pricing fields on these entries ($3/$15 for grok-4 family and grok-3, $5/$25 for grok-3-fast, $0.20/$0.50 fast families, $0.20/$1.50 grok-code-fast-1) match the rates these models historically carried, but xAI no longer publishes them — they are unverifiable against live docs and, more importantly, **no longer what calls cost**. + +**Recommendation (one clear position):** reprice the deprecated entries to their redirect targets' rates — the 8 grok-4.3-redirected slugs to $1.25 / $0.20 cached / $2.50, and grok-code-fast-1 to grok-build-0.1's $1.00 / $0.20 cached / $2.00. Rationale: Sim computes execution cost at run time from the current `models.ts` values and stores the result in execution logs; past log rows are unaffected by a reprice, so nothing historical is lost. Meanwhile any workflow still pointed at a retired slug bills at redirect rates today, so the legacy numbers overestimate live costs by up to 6× (grok-4-latest: $15 vs $2.50 output). This is docs-backed (the retirement page states the redirect billing explicitly). **Disposition: APPLIED in this pass** — the 8 grok-4.3 redirects now carry $1.25 / $0.20 cached / $2.50 with `contextWindow: 1000000`, and grok-code-fast-1 carries grok-build-0.1's $1.00 / $0.20 cached / $2.00 (256k unchanged). + +## Changes made in this pass + +None to `models.ts` (per instructions, this pass writes only this justification doc). The verified pending fix: + +- **all 13 xai entries: `capabilities.temperature.max` 1 → 2.** The xAI chat REST reference documents `temperature` as "between 0 and 2" (same range OpenAI uses). The repo UI uses this for slider bounds, so the current `max: 1` artificially halves the usable range. Source: https://docs.x.ai/developers/rest-api-reference/inference/chat + +Changes from PR #4990 re-verified and confirmed correct: 9 deprecation flags, grok-4.20 trio reprice to $1.25/$2.50 with 1M context, defaultModel grok-4.3. + +## Deliberately not changed + +- **grok-4.3 `reasoningEffort` capability flag — not added.** The REST reference and reasoning docs confirm grok-4.3 supports `reasoning_effort` with `none` / `low` (default) / `medium` / `high` ("Only supported by grok-4.3"). However, `apps/sim/providers/xai/index.ts` forwards only `temperature` (verified by rg: single hit at line 101, `basePayload.temperature`); no `reasoning_effort` wiring exists, so the capability flag would be dead metadata. **Recommended follow-up:** wire `reasoning_effort` in the xai provider, then add the capability flag to grok-4.3. Note for that follow-up: per the reasoning docs, `presence_penalty`, `frequency_penalty`, and `stop` cannot be combined with reasoning, and grok-4.20-multi-agent uses a different control (`reasoning.effort`: low/medium/high/xhigh, controlling agent count, not reasoning depth). + +- **grok-build-0.1 — not added.** grok-code-fast-1's successor: $1.00 input / $0.20 cached / $2.00 output, 256k context, "xAI's fast coding model trained specifically for agentic coding" (docs.x.ai/developers/models/grok-build-0.1). Recommended addition; adding models is separate work from validation. +- **grok-4.3 tiered >200k-token pricing — not modeled.** Sim's pricing schema is flat; base tier recorded (and xAI's own table is flat). + +## Unverifiable + +- **Original (pre-retirement) pricing of the 9 deprecated entries** — xAI docs no longer publish historical rates; values match known historical pricing but cannot be confirmed against a live source. +- **Release dates of deprecated entries** (2025-07-09, 2025-11-19, 2025-09-19, 2025-08-28, 2025-02-17) — consistent with historical announcements/slugs (e.g. `grok-4-0709`), not republished on live docs. +- **grok-4.3 / grok-4.20 official release dates on xAI docs** — per-model pages omit release dates. grok-4.3: 2026-04-30 corroborated by OpenRouter. grok-4.20: 2026-03-10 corroborated by secondary reporting of xAI API availability plus the `0309` snapshot slug; treated as verified-by-secondary-source. From 025f84b9e0dfa584e3fc03b794c30668a89ed7cc Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 11 Jun 2026 20:06:40 -0700 Subject: [PATCH 3/4] chore(providers): keep model validation logs local, not in the repo --- docs/models/anthropic.md | 232 ---------------- docs/models/azure.md | 258 ----------------- docs/models/bedrock.md | 226 --------------- docs/models/deepseek-cerebras.md | 189 ------------- docs/models/embeddings-rerank-dynamic.md | 75 ----- docs/models/google.md | 184 ------------ docs/models/groq.md | 157 ----------- docs/models/mistral.md | 305 -------------------- docs/models/openai.md | 338 ----------------------- docs/models/vertex.md | 212 -------------- docs/models/xai.md | 91 ------ 11 files changed, 2267 deletions(-) delete mode 100644 docs/models/anthropic.md delete mode 100644 docs/models/azure.md delete mode 100644 docs/models/bedrock.md delete mode 100644 docs/models/deepseek-cerebras.md delete mode 100644 docs/models/embeddings-rerank-dynamic.md delete mode 100644 docs/models/google.md delete mode 100644 docs/models/groq.md delete mode 100644 docs/models/mistral.md delete mode 100644 docs/models/openai.md delete mode 100644 docs/models/vertex.md delete mode 100644 docs/models/xai.md diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md deleted file mode 100644 index 81dad60b26..0000000000 --- a/docs/models/anthropic.md +++ /dev/null @@ -1,232 +0,0 @@ -# Anthropic Provider Model Validation — Justification Doc - -- **Date:** 2026-06-11 -- **Scope:** `anthropic` provider block in `apps/sim/providers/models.ts` (12 models), re-verified after PR #4990 -- **Method:** Live WebFetch of official Anthropic docs (platform.claude.com), secondary pricing source (OpenRouter), Anthropic news posts via web search for launch dates, plus `rg` verification that every capability flag is actually consumed by provider code (`apps/sim/providers/anthropic/core.ts`, `apps/sim/providers/models.ts`, `apps/sim/providers/utils.ts`). -- **Primary sources:** - - Models overview: https://platform.claude.com/docs/en/about-claude/models/overview - - Pricing: https://platform.claude.com/docs/en/about-claude/pricing - - Deprecations: https://platform.claude.com/docs/en/about-claude/model-deprecations - - Effort: https://platform.claude.com/docs/en/build-with-claude/effort - - Structured outputs: https://platform.claude.com/docs/en/build-with-claude/structured-outputs - - Computer use: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool - - Messages API: https://platform.claude.com/docs/en/api/messages - - Secondary pricing: https://openrouter.ai/provider/anthropic - - Launch dates: https://www.anthropic.com/news/claude-4 , https://www.anthropic.com/news/claude-3-haiku - -**Verdict key:** ✓ = verified against live docs · ⚠ = recommended change · ◆ = intentional deviation (documented) · ◇ = unverifiable from live docs (reason given) - ---- - -## How capability fields are consumed (code verification) - -| Field | Consumer | Behavior | -|---|---|---| -| `thinking.levels` / `thinking.default` | `core.ts` `buildThinkingConfig()` via `getThinkingCapability()` | Level must be in `levels` or thinking is skipped. Fable 5 / Opus 4.8 / 4.7 / 4.6 / Sonnet 4.6 (`supportsAdaptiveThinking()`) → `thinking: {type: 'adaptive'}` + `output_config: {effort: }`. All other models → `thinking: {type: 'enabled', budget_tokens}` with low=2048 / medium=8192 / high=32768 (so `xhigh`/`max` must never appear on a budget-tokens model — `THINKING_BUDGET_TOKENS` has no entry and config would be dropped). | -| `temperature` | payload construction in `core.ts` | Presence of `capabilities.temperature` allows the param; omitted on a model means Sim never sends it. Stripped when thinking enabled (thinking incompatible with temperature). | -| `nativeStructuredOutputs` | `models.ts:3393` (`getModelsWithNativeStructuredOutputs`-style helper) consumed by `core.ts` | With flag → native `output_format`/`output_config` JSON-schema path; without → `generateSchemaInstructions()` prompt-injection fallback. | -| `computerUse` | `models.ts:3167` `getComputerUseModels()` → `providers/utils.ts:143` `computerUseModels` | Gates Sim's computer-use path per provider. **No Anthropic model currently sets it.** | -| `contextWindow` / `maxOutputTokens` / `pricing` | cost calculation, token clamping, UI | Straight passthrough. Sim does **not** send any `context-1m-*` beta header (`rg 'context-1m' apps/sim/providers/anthropic/` → no matches), so `contextWindow` must reflect the no-beta-header window. | -| `reasoningEffort` / `verbosity` | **not consumed** by the Anthropic provider (OpenAI-family fields) | Correctly absent from all Anthropic entries. | - ---- - -## Per-model field verification - -### claude-fable-5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing.input | 10.0 | Pricing doc ($10/MTok); OpenRouter $10/M | ✓ | -| pricing.cachedInput | 1.0 | Pricing doc cache hit $1/MTok (0.1×) | ✓ | -| pricing.output | 50.0 | Pricing doc $50/MTok; OpenRouter $50/M | ✓ | -| capabilities.temperature | absent | Deprecations doc: sampling params 400 on Opus 4.7 and later; Fable 5 rejects `temperature`/`top_p`/`top_k` | ✓ | -| capabilities.nativeStructuredOutputs | **absent** | Structured-outputs doc: "generally available … for **Claude Fable 5**, Claude Mythos 5, Claude Opus 4.8, …" | ⚠ **should be `true`** — Fable 5 is in the GA list; current absence routes Fable 5 through the prompt-injection fallback instead of native JSON-schema output | -| capabilities.maxOutputTokens | 128000 | Models overview: Max output 128k | ✓ | -| thinking.levels | low–xhigh–max | Effort doc: `max` available on Fable 5; `xhigh` available on Fable 5; low/medium/high universal | ✓ | -| thinking.default | high | Effort doc: default is `high` | ✓ | -| contextWindow | 1000000 | Models overview: 1M tokens (default, no beta header) | ✓ | -| releaseDate | 2026-06-09 | Models overview: "generally available … beginning June 9, 2026" | ✓ | -| (no deprecated flag) | — | Active | ✓ | - -Note: Fable 5's thinking is always-on; Sim's adaptive path (`thinking: {type:'adaptive'}` + effort) is the documented-correct call shape. The `'none'` sentinel omits the `thinking` param, which on Fable 5 means adaptive-by-default rather than disabled — acceptable (explicit `disabled` would 400). - -### claude-opus-4-8 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing.input / cachedInput / output | 5.0 / 0.5 / 25.0 | Pricing doc $5 / $0.50 cache-hit / $25; OpenRouter $5/$25 | ✓ | -| pricing.updatedAt | 2026-05-28 | bumped in PR #4990 | ✓ | -| temperature | absent | Deprecations doc: 400 on Opus 4.7 and later, "including Claude Opus 4.8" | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | -| maxOutputTokens | 128000 | Models overview | ✓ | -| thinking.levels | low–xhigh–max | Effort doc: `xhigh` and `max` available on Opus 4.8 | ✓ | -| thinking.default | high | Effort doc: "The default is `high` on all surfaces" | ✓ | -| contextWindow | 1000000 | Models overview: 1M (standard pricing, no long-context premium) | ✓ | -| releaseDate | 2026-05-28 | Deprecations doc: tentative retirement "Not sooner than May 28, **2027**" (release + 1 yr convention) — confirms the PR #4990 correction | ✓ changed this pass (PR #4990), re-verified | -| recommended | true | Sim product choice; consistent with docs' "most capable Opus-tier model" | ◆ product decision | - -### claude-opus-4-7 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-04-16) | Pricing doc; OpenRouter $5/$25 | ✓ | -| temperature | absent | Deprecations doc: 400 on Opus 4.7+ | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | -| maxOutputTokens | 128000 | Models overview (legacy table) | ✓ | -| thinking.levels | low–xhigh–max | Effort doc: `xhigh` introduced with 4.7; `max` available | ✓ | -| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ | -| releaseDate | 2026-04-16 | Deprecations doc: "Not sooner than April 16, 2027" | ✓ | - -### claude-opus-4-6 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ | -| temperature {0,1} | present | Sampling-param removal is "Opus 4.7 and later" — Opus 4.6 still accepts `temperature` (0.0–1.0 per Messages API) | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | -| maxOutputTokens | 128000 | Models overview legacy table | ✓ | -| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` on Opus 4.6 ✓; `xhigh` only on Fable 5 / Opus 4.8 / 4.7 — correctly excluded | ✓ | -| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ | -| releaseDate | 2026-02-05 | Deprecations doc: "Not sooner than February 5, 2027" | ✓ | - -### claude-sonnet-4-6 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc $3 / $0.30 / $15; OpenRouter $3/$15 | ✓ | -| temperature {0,1} | present | Sonnet 4.6 is not in the "Opus 4.7 and later" sampling-param removal; temperature 0.0–1.0 valid | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | -| maxOutputTokens | 64000 | Models overview: 64k | ✓ | -| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` available on Sonnet 4.6; `xhigh` is NOT (Fable 5 / Opus 4.8 / 4.7 only) | ✓ | -| contextWindow | 1000000 | Models overview: 1M, no beta header required; "Long context pricing": full 1M at standard pricing on Sonnet 4.6 | ✓ | -| releaseDate | 2026-02-17 | Deprecations doc: "Not sooner than February 17, 2027" | ✓ | -| recommended | true | Sim product choice ("best combination of speed and intelligence") | ◆ product decision | - -### claude-opus-4-5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ | -| temperature {0,1} | present | ≤ 4.6-era model; accepted | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Opus 4.5") | ✓ | -| maxOutputTokens | 64000 | Models overview legacy table | ✓ | -| thinking.levels | low/medium/high | Effort doc: Opus 4.5 supports effort but neither `max` nor `xhigh`. Sim's code path for 4.5 uses `budget_tokens` (not effort) — levels map to budget tiers; same three levels are valid either way | ✓ | -| contextWindow | 200000 | Models overview legacy table: 200k | ✓ | -| releaseDate | 2025-11-24 | Deprecations doc: "Not sooner than November 24, 2026"; anthropic.com/news/claude-opus-4-5 (Nov 24, 2025) | ✓ | - -### claude-opus-4-1 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc $15 / $1.50 / $75; OpenRouter $15/$75 | ✓ | -| temperature {0,1} | present | pre-4.7 model; accepted | ✓ | -| nativeStructuredOutputs | **removed in PR #4990** | Structured-outputs doc GA list does **not** include Opus 4.1 | ✓ changed this pass (PR #4990), re-verified correct | -| maxOutputTokens | 32000 | Models overview legacy table: 32k | ✓ | -| thinking.levels | low/medium/high | budget_tokens model; extended thinking supported | ✓ | -| contextWindow | 200000 | Models overview legacy table | ✓ | -| releaseDate | 2025-08-05 | Snapshot `claude-opus-4-1-20250805`; launched Aug 5, 2025 | ✓ | -| deprecated | true | Deprecations doc: deprecated June 5, 2026; retires Aug 5, 2026 → migrate to claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified | - -### claude-opus-4-0 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Opus 4 (deprecated)"); OpenRouter $15/$75 | ✓ | -| temperature {0,1} | present | pre-4.7; accepted | ✓ | -| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ | -| maxOutputTokens | 32000 | Models overview legacy table | ✓ | -| thinking.levels | low/medium/high | budget_tokens model | ✓ | -| contextWindow | 200000 | Models overview legacy table | ✓ | -| releaseDate | 2025-05-22 | **Open question (a) resolved:** Claude 4 (Opus 4 + Sonnet 4) launched **May 22, 2025** (anthropic.com/news/claude-4). The `20250514` in the full ID is the snapshot date, not the launch date. Repo convention uses launch dates (cf. haiku-4-5: launch 2025-10-15 vs snapshot 20251001) | ✓ — **no change recommended** | -| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified | - -### claude-sonnet-4-5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $3/$15 | ✓ | -| temperature {0,1} | present | pre-4.7; accepted | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Sonnet 4.5") | ✓ | -| maxOutputTokens | 64000 | Models overview legacy table | ✓ | -| thinking.levels | low/medium/high | Effort doc: effort errors on Sonnet 4.5 — Sim correctly routes it through budget_tokens; no max/xhigh | ✓ | -| contextWindow | 200000 | **Open question (e) resolved:** Models overview legacy table lists Sonnet 4.5 at **200k**. The historical 1M for Sonnet 4.5 required the `context-1m` beta header, which Sim does not send (`rg 'context-1m'` → no matches in `apps/sim/providers/anthropic/`) | ✓ changed this pass (PR #4990, 1000000 → 200000), re-verified correct | -| releaseDate | 2025-09-29 | Snapshot `claude-sonnet-4-5-20250929`; launched Sep 29, 2025 | ✓ | - -### claude-sonnet-4-0 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Sonnet 4 (deprecated)"); OpenRouter $3/$15 | ✓ | -| temperature {0,1} | present | pre-4.7; accepted | ✓ | -| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ | -| maxOutputTokens | 64000 | Models overview legacy table: 64k | ✓ | -| thinking.levels | low/medium/high | budget_tokens model | ✓ | -| contextWindow | 200000 | Models overview legacy table: 200k; same `context-1m` beta-header reasoning as Sonnet 4.5 | ✓ changed this pass (PR #4990), re-verified correct | -| releaseDate | 2025-05-22 | Claude 4 launch May 22, 2025 (see opus-4-0) — no change | ✓ | -| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-sonnet-4-6 | ✓ changed this pass (PR #4990), re-verified | - -### claude-haiku-4-5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing | 1.0 / 0.1 / 5.0 (updatedAt 2026-06-11) | Pricing doc $1 / $0.10 / $5; OpenRouter $1/$5 | ✓ | -| temperature {0,1} | present | pre-4.7; accepted | ✓ | -| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ | -| maxOutputTokens | 64000 | Models overview: 64k | ✓ | -| thinking.levels | low/medium/high | Effort doc: effort errors on Haiku 4.5; extended thinking (budget_tokens) supported — Sim routes via budget_tokens | ✓ | -| contextWindow | 200000 | Models overview: 200k | ✓ | -| releaseDate | 2025-10-15 | Launch Oct 15, 2025 (deprecations doc: retirement "Not sooner than October 15, 2026"); snapshot is `20251001` — repo correctly uses the launch date | ✓ | -| speedOptimized | true | Sim-internal flag; docs: "The fastest model" | ◆ Sim-internal, consistent | - -### claude-3-haiku-20240307 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| pricing.input / output | 0.25 / 1.25 (updatedAt 2026-04-01) | ◇ No longer listed on the live pricing page (only retired Haiku 3.5 remains) or OpenRouter — model is retired. Values match Anthropic's historical published pricing ($0.25/$1.25) | ◇ unverifiable live; historically consistent — leave as-is | -| pricing.cachedInput | 0.03 | ◇ Historical cache-hit pricing for Claude 3 Haiku was $0.03/MTok (slightly above the 0.1× convention) | ◇ unverifiable live; historically consistent | -| temperature {0,1} | present | Claude 3-era; accepted (model no longer serves requests anyway) | ✓ (moot) | -| maxOutputTokens | 4096 | Historical Claude 3 Haiku max output | ◇ unverifiable live; historically consistent | -| no thinking capability | absent | Claude 3 Haiku has no extended thinking | ✓ | -| contextWindow | 200000 | Historical Claude 3 family window | ◇ unverifiable live; historically consistent | -| releaseDate | 2024-03-07 | Claude 3 Haiku GA was **March 13, 2024** (anthropic.com/news/claude-3-haiku); `20240307` is the snapshot date. Repo convention elsewhere uses launch dates | ⚠ optional: `2024-03-07` → `2024-03-13` (cosmetic; model is retired) | -| deprecated | true | Deprecations doc: **Retired April 20, 2026** ("Requests to retired models will fail") | ◆ see open question (b) below | - ---- - -## Changes made in this pass (PR #4990) — all re-verified correct - -| Change | Verification | -|---|---| -| opus-4-8 releaseDate → 2026-05-28 | Deprecations doc retirement floor "May 28, 2027" (release + 1 yr) ✓ | -| deprecated:true on opus-4-1 | Deprecated 2026-06-05, retires 2026-08-05 ✓ | -| deprecated:true on opus-4-0, sonnet-4-0 | Deprecated 2026-04-14, retire 2026-06-15 ✓ | -| sonnet-4-5 & sonnet-4-0 contextWindow 1000000 → 200000 | Models overview legacy table: both 200k. The 1M window on these models was beta-header-gated (`context-1m`); Sim never sends that header ✓ | -| removed nativeStructuredOutputs from opus-4-1 | Opus 4.1 absent from structured-outputs GA list ✓ | -| updatedAt bumps | informational ✓ | - -## Recommended fixes from THIS validation - -1. **claude-fable-5: add `nativeStructuredOutputs: true`.** Structured-outputs doc explicitly lists Claude Fable 5 as GA. Without the flag, Sim falls back to prompt-injected schema instructions for Fable 5 instead of the native JSON-schema output path — weaker guarantees on the flagship model. -2. *(optional, cosmetic)* **claude-3-haiku-20240307: releaseDate `2024-03-07` → `2024-03-13`.** Repo convention is launch date (not snapshot date); GA was March 13, 2024. Low value since the model is retired. - -## Deliberately not changed - -- **`computerUse` on Anthropic models (open question c).** Anthropic documents computer-use support (beta) for: Opus 4.8 / 4.7 / 4.6 / 4.5 + Sonnet 4.6 (header `computer-use-2025-11-24`) and Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4 (header `computer-use-2025-01-24`). **Claude Fable 5 is NOT in the documented list.** The flag IS consumed (`getComputerUseModels()` → `providers/utils.ts` `computerUseModels`), so setting it would light up Sim's computer-use path for these models — a feature-enablement/product decision (beta headers, screenshot plumbing, UX), not a data correction. Left unchanged; documented here for whoever owns that decision. -- **opus-4-0 / sonnet-4-0 releaseDate `2025-05-22` (open question a).** Confirmed correct: Claude 4 launched May 22, 2025; `20250514` is the snapshot suffix, not the launch date. -- **claude-3-haiku-20240307 entry kept (open question b).** The model was retired 2026-04-20 — live requests now fail. Recommendation: **keep the entry with `deprecated: true`** rather than delete. Removing it would break saved workflows that reference the model ID (model lookup, pricing for historical logs, UI rendering of old runs). The schema has no `retired` field; if one is ever added, this model is the first candidate. Runtime failures surface from Anthropic's API as clear 404s, which is an acceptable failure mode for a retired model. -- **`recommended` flags (opus-4-8, sonnet-4-6) and `speedOptimized` (haiku-4-5)** — Sim product/UI decisions, consistent with docs positioning; not doc-verifiable facts. -- **`defaultModel: 'claude-sonnet-4-6'`** — active, recommended model; valid product choice. -- **Thinking level lists for budget-tokens models (opus-4-5, sonnet-4-5, sonnet-4-0, opus-4-1, opus-4-0, haiku-4-5).** Their `low/medium/high` are Sim-defined budget tiers (2048/8192/32768 budget_tokens), not API effort levels — internally consistent with `THINKING_BUDGET_TOKENS` in `core.ts`. Note Opus 4.5 does support the API `effort` param (low/medium/high) per the effort doc, but Sim routes it through budget_tokens (`supportsAdaptiveThinking()` excludes 4.5); that is a code-path choice in `core.ts`, not a models.ts data error, and the level list is valid under either interpretation. - -## Open question (d) resolution — thinking levels & temperature boundary - -- `xhigh`: Fable 5, Opus 4.8, Opus 4.7 only (effort doc). Repo ✓. -- `max`: Fable 5, Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6 (effort doc; **not** Opus 4.5 / Sonnet 4.5 / Haiku 4.5). Repo ✓ — including Sonnet 4.6 `max`, verified. -- Effort default `high` on all supporting models (effort doc: "Setting effort to high produces exactly the same behavior as omitting the parameter"). Repo `default: 'high'` ✓. -- Temperature boundary: deprecations doc — `temperature`/`top_p`/`top_k` return 400 on **Opus 4.7 and later (incl. Opus 4.8) and Fable 5**; still valid (0.0–1.0, default 1.0 per Messages API) on Opus 4.6, Sonnet 4.6, and everything earlier. Repo: temperature absent exactly on fable-5 / opus-4-8 / opus-4-7, present `{min:0, max:1}` on opus-4-6 / sonnet-4-6 and all older models ✓. - -## Unverifiable - -- **claude-3-haiku-20240307 pricing, contextWindow (200k), maxOutputTokens (4096):** the model is retired and has been removed from the live pricing/overview pages and OpenRouter. Values match Anthropic's historical published specs; no contradiction found. No change recommended. -- **Exact cache-write pricing is not modeled** (Sim's schema has only `cachedInput` = cache read). Live docs confirm cache reads = 0.1× input for every current model, matching all `cachedInput` values. 5-min/1-hour write premiums (1.25× / 2×) are not representable in the current schema — noting for completeness, not a defect. diff --git a/docs/models/azure.md b/docs/models/azure.md deleted file mode 100644 index 03f5dfd72d..0000000000 --- a/docs/models/azure.md +++ /dev/null @@ -1,258 +0,0 @@ -# Azure OpenAI & Azure Anthropic model validation - -**Date:** 2026-06-11 -**Scope:** `azure-openai` block (17 models) and `azure-anthropic` block (5 models) in `apps/sim/providers/models.ts`. Final exhaustive re-validation following PR #4990. - -## Method - -Every field was checked against live primary sources fetched on 2026-06-11: - -1. **Specs (context window, max output, version dates, API support, lifecycle):** - - https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure (doc updated 2026-06-05) - - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning (reasoning effort / verbosity feature matrix, doc updated 2026-06-05) - - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements (lifecycle policy + gpt-4o dates) - - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/model-router and .../concepts/model-router - - https://learn.microsoft.com/en-us/azure/foundry/foundry-models/how-to/use-foundry-models-claude (doc updated 2026-06-11) - - https://platform.claude.com/docs/en/build-with-claude/claude-in-microsoft-foundry - - https://platform.claude.com/docs/en/about-claude/pricing - - https://platform.claude.com/docs/en/about-claude/models/overview - - https://platform.claude.com/docs/en/build-with-claude/structured-outputs -2. **Azure OpenAI pricing:** Azure Retail Prices API (`https://prices.azure.com/api/retail/prices?$filter=serviceName eq 'Foundry Models' and contains(meterName,'...')`). All quoted prices are the **Global Standard** ("Gl"/"glbl") meters, normalized to USD per 1M tokens. The marketing pricing page times out; the Retail Prices API is authoritative for billed meters. -3. **Provider implementation:** `apps/sim/providers/azure-openai/index.ts` (API dispatch), `apps/sim/providers/azure-anthropic/index.ts` (Messages API via `@anthropic-ai/sdk` against `{endpoint}/anthropic`). - -Sim convention notes: `pricing.cachedInput` = cache-read price; `releaseDate` for `azure/*` entries = the Azure model **version date** (convention set in PR #4990 with gpt-4o → 2024-11-20 and model-router → 2025-05-19). - ---- - -## Block: `azure-openai` (defaultModel: `azure/gpt-4o`) - -### azure/gpt-4o - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing.input | 2.5 | Retail API `gpt 4o 1120 Inp glbl` = 0.0025/1K = $2.50/1M | OK | -| pricing.cachedInput | 1.25 | Retail API `gpt 4o 1120 cached Inp glbl` = 0.00125/1K = $1.25/1M | **OK — VERIFIED** (open question b resolved) | -| pricing.output | 10.0 | Retail API `gpt 4o 1120 Outp glbl` = 0.01/1K = $10/1M | OK | -| temperature 0–2 | yes | Standard chat model; reasoning-model parameter restrictions don't apply | OK | -| maxOutputTokens | **(absent)** | models-sold-directly: gpt-4o (2024-11-20) "Input: 128,000 / Output: 16,384" | **FIX: add `maxOutputTokens: 16384`** | -| contextWindow | 128000 | same row | OK | -| releaseDate | 2024-11-20 | Azure version `2024-11-20` (PR #4990 change re-verified) | OK | -| deprecated | (absent) | model-retirements: versions 2024-05-13 / 2024-08-06 **retired 2026-03-31** (auto-upgraded to gpt-5.1); version 2024-11-20 "retires **2026-10-01**" | **RECOMMEND `deprecated: true`** — firm retirement date within ~3.7 months. NOTE: gpt-4o is the `azure-openai` `defaultModel`; changing the default (e.g. to azure/gpt-5.1 per Azure's own auto-upgrade path) is a product decision — documented only, not assumed. | - -### azure/gpt-5.4 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 2.5 / 0.25 / 15.0 | Retail API `5.4 inp Gl` 2.5, `5.4 cd inp Gl` 0.25, `5.4 opt Gl` 15.0 | OK | -| reasoningEffort | none, low, medium, high | reasoning doc footnote 7 enumerates `'none'` support as exactly: gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.1-codex-mini — **gpt-5.4 family is not listed** | **FIX: drop `'none'`** → `['low','medium','high']` (open question c resolved). PR #4990's removal of `'xhigh'` re-verified correct: footnote 6 — xhigh is gpt-5.1-codex-max only. | -| verbosity | low, medium, high | reasoning doc "NEW GPT-5 reasoning features": verbosity options low/medium/high for GPT-5 series | OK | -| maxOutputTokens | 128000 | models-sold-directly: gpt-5.4 (2026-03-05) output 128,000 | OK | -| contextWindow | 1050000 | same row: 1,050,000 (Input 922,000 / Output 128,000) | OK | -| releaseDate | 2026-03-05 | Azure version `2026-03-05` | OK | - -Pricing limitation: a long-context tier exists (`5.4 longco inp Gl` $5.0 / `longco cd inp Gl` $0.5 / `longco opt Gl` $22.5) for requests beyond the standard context threshold. The flat pricing schema cannot express tiered pricing; standard-tier rates are recorded. - -### azure/gpt-5.4-mini - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 0.75 / 0.075 / 4.5 | Retail API `5.4 mini Inp Gl` 0.75, `cd Inp Gl` 0.075, `Opt Gl` 4.5 | OK | -| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** | -| verbosity | low, medium, high | GPT-5 series verbosity | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-mini (2026-03-17) 400,000 (272k in / 128k out) | OK | -| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK | - -### azure/gpt-5.4-nano - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 0.2 / 0.02 / 1.25 | Retail API `5.4 nano Inp Gl` 0.2, `cd Inp Gl` 0.02, `Opt Gl` 1.25 | OK | -| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** | -| verbosity | low, medium, high | GPT-5 series verbosity | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-nano (2026-03-17) | OK | -| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK | - -### azure/gpt-5.2 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 1.75 / 0.175 / 14.0 | Retail API `GPT 5.2 inp Gl` 1.75, `cd inp Gl` 0.175, `opt Gl` 14.0 | OK | -| reasoningEffort | none, low, medium, high | footnote 7 explicitly lists gpt-5.2 as supporting `'none'`; `'xhigh'` removal (PR #4990) correct — codex-max only; `'minimal'` correctly absent ("not supported with gpt-5.1 or greater") | OK | -| verbosity | low, medium, high | GPT-5 series verbosity | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.2 (2025-12-11) | OK | -| releaseDate | 2025-12-11 | Azure version `2025-12-11` | OK | - -### azure/gpt-5.1 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5.1 inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK | -| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1 (also: `reasoning_effort` defaults to `none` on 5.1); `'minimal'` correctly absent | OK | -| verbosity | low, medium, high | GPT-5 series verbosity | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1 | OK | -| releaseDate | 2025-11-12 | Azure version is **2025-11-13** in both the models table and the reasoning feature matrix | **FIX: → 2025-11-13** (per PR #4990's own convention of using the Azure version date, cf. gpt-4o 2024-11-20, model-router 2025-05-19) | - -### azure/gpt-5.1-codex - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 1.25 / 0.125 / 10.0 | Retail API `5.1 codex inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK | -| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1-codex | OK | -| verbosity | low, medium, high | GPT-5 series | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1-codex | OK | -| releaseDate | 2025-11-12 | Azure version `2025-11-13` | **FIX: → 2025-11-13** | -| deprecated | true (PR #4990 stopgap) | See ruling below | **RECOMMEND: KEEP entry, REVERT `deprecated: true`** | - -**Ruling on open question (a):** Responses-API-only status **confirmed** — models-sold-directly lists gpt-5.1-codex as "Responses API only", and the reasoning feature matrix shows Chat Completions = not supported. **However, the premise that it "never worked through Sim" is false.** `apps/sim/providers/azure-openai/index.ts` dispatches by endpoint shape: a full chat-completions URL → Chat Completions; a full responses URL → Responses; **the default path (plain resource base URL) constructs `{endpoint}/openai/v1/responses` and calls the Responses API** (lines ~743–765). So gpt-5.1-codex works for any user configured with a base endpoint or responses URL — the majority configuration. Azure itself has not deprecated the model (GA, "Access is no longer restricted"). Therefore: **KEEP the entry and revert `deprecated: true`**. The only genuinely broken configuration is a user-supplied chat-completions endpoint URL; that is an endpoint-configuration limitation, not a model lifecycle state, and `deprecated` (which signals retirement to users) is the wrong tool for it. - -### azure/gpt-5 · azure/gpt-5-mini · azure/gpt-5-nano - -| Field | gpt-5 | gpt-5-mini | gpt-5-nano | Source / evidence | Verdict | -| --- | --- | --- | --- | --- | --- | -| pricing in/cached/out | 1.25 / 0.125 / 10.0 | 0.25 / 0.025 / 2.0 | 0.05 / 0.005 / 0.4 | Retail API `GPT 5 [Mini\|Nano] [Inpt\|cchd Inpt\|outpt] Glbl` — exact matches all three | OK | -| reasoningEffort | minimal, low, medium, high | same | same | reasoning doc: "`minimal` is only supported with the original GPT-5 reasoning models"; `'none'` correctly absent (not in footnote 7); `'xhigh'` correctly absent | OK | -| verbosity | low/medium/high | same | same | GPT-5 series | OK | -| maxOutputTokens / contextWindow | 128000 / 400000 | same | same | models-sold-directly: all three 400,000 (272k/128k) | OK | -| releaseDate | 2025-08-07 | 2025-08-07 | 2025-08-07 | Azure version `2025-08-07` | OK | - -### azure/gpt-5-chat - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| id (deployable name) | `gpt-5-chat` | models-sold-directly lists `gpt-5-chat` (Preview), versions 2025-08-07 and 2025-10-03 — **exact name confirmed**; PR #4990 rename from `gpt-5-chat-latest` re-verified correct. Note: OpenAI's first-party `gpt-5-chat-latest`-style continuously-updated alias maps to a *different* Foundry product (`gpt-chat-latest`, now GPT-5.5 Instant) — our entry correctly tracks the deployable `gpt-5-chat` (open question e resolved) | OK | -| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5 Chat [Inpt\|cchd Inpt\|outpt] Glbl` = 1.25 / 0.125 / 10.0 | OK | -| temperature 0–2 | yes | gpt-5-chat is a non-reasoning chat model (temperature restriction applies to gpt-5.1-chat and later, which we do not list) | OK | -| maxOutputTokens | 16384 | models-sold-directly: 128,000 / **16,384** (PR #4990 addition re-verified) | OK | -| contextWindow | 128000 | same row | OK | -| releaseDate | 2025-08-07 | Azure version `2025-08-07` (a `2025-10-03` revision also exists; the original version date is kept) | OK | -| lifecycle | not marked | **Preview** on Azure. Preview lifecycle = "not sooner than" retirement, force-upgrade or 30-day-notice retirement, "not recommended for production". No retirement date currently announced → no `deprecated` flag warranted | OK (documented) | - -### azure/o3 · azure/o4-mini - -| Field | o3 | o4-mini | Source / evidence | Verdict | -| --- | --- | --- | --- | --- | -| pricing | 2 / 0.5 / 8 | 1.1 / 0.275 / 4.4 | Retail API `o3 0416` 0.002/0.0005/0.008 per 1K; `o4-mini 0416` 0.0011/0.000275/0.0044 per 1K | OK | -| reasoningEffort | low, medium, high | low, medium, high | reasoning doc: "low, medium, or high for all reasoning models except o1-mini"; o-series matrix has no none/minimal/xhigh | OK | -| verbosity | (absent) | (absent) | verbosity is a GPT-5-series-only parameter | OK | -| maxOutputTokens / contextWindow | 100000 / 200000 | 100000 / 200000 | models-sold-directly o-series: Input 200,000 / Output 100,000 | OK | -| releaseDate | 2025-04-16 | 2025-04-16 | Azure version `2025-04-16` for both | OK | - -### azure/gpt-4.1 · azure/gpt-4.1-mini · azure/gpt-4.1-nano - -| Field | 4.1 | 4.1-mini | 4.1-nano | Source / evidence | Verdict | -| --- | --- | --- | --- | --- | --- | -| pricing | 2.0 / 0.5 / 8.0 | 0.4 / 0.1 / 1.6 | 0.1 / 0.025 / 0.4 | Retail API `gpt 4.1 [mini\|nano] [Inp\|cached Inp\|Outp] glbl` — exact matches all three | OK | -| temperature 0–2 | yes | yes | yes | non-reasoning models | OK | -| maxOutputTokens | 32768 | 32768 | 32768 | models-sold-directly: 32,768 | OK | -| contextWindow | 1047576 | 1047576 | 1047576 | models-sold-directly: 1,047,576 (global standard; lower for regional standard/batch — global is the right representation) | OK | -| releaseDate | 2025-04-14 | 2025-04-14 | 2025-04-14 | Azure version `2025-04-14` | OK | - -### azure/model-router - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 2.0 / 0.5 / 8.0 | No `model-router` meter exists in the Retail Prices API (searched `Router`/`Rtr`/`rtr` under serviceName 'Foundry Models' and productName across all services — only Communication Services "Job Router" exists). Concepts page: "Model router usage is charged for input prompts at the rate listed on the pricing page"; how-to evaluation section: "Account for the **router markup on input tokens** plus the underlying model's input and output pricing." The reported $0.14/1M router markup could not be confirmed from any fetchable source (only the timing-out marketing page carries the number). | **KEEP as documented proxy** (open question d resolved — see below) | -| capabilities | {} (no reasoningEffort) | Router accepts `reasoning_effort` since version 2025-11-18 and forwards it; but our pinned version semantics are 2025-05-19 (gpt-4.1-family + o4-mini routing, none of which take temperature uniformly — temp/top_p silently dropped for o-series). Empty capabilities is the safest representation | OK | -| contextWindow | 200000 | models-sold-directly footnote: "Context window: 200,000" — the limit of the smallest underlying model; larger prompts succeed only if routed to a compatible model | OK | -| maxOutputTokens | (absent) | "max output tokens varies" (16,384–128,000 depending on routed model) — correctly unset | OK | -| releaseDate | 2025-05-19 | Original version `2025-05-19` confirmed (versions: 2025-05-19, 2025-08-07, 2025-11-18 latest); PR #4990 change re-verified | OK | - -**Pricing decision (open question d):** True billing = per-input-token router markup + the routed model's own input/output rates, which varies per request. The flat `{input, cachedInput, output}` schema cannot express this. The current 2.0/0.5/8.0 equals the gpt-4.1 rates — gpt-4.1 is the flagship of the 2025-05-19 routed set (gpt-4.1/-mini/-nano + o4-mini) and sits at the cost ceiling of that set alongside o3-class o4-mini rates, so it is a conservative (slightly pessimistic) proxy for cost estimation. **Keep 2.0/0.5/8.0.** This is a documented schema limitation, not a verified Azure price; cost estimates for model-router workloads in Sim are approximations. - ---- - -## Block: `azure-anthropic` (defaultModel: `azure-anthropic/claude-sonnet-4-5`) - -Pricing basis: platform.claude.com Claude-in-Microsoft-Foundry doc — "Pricing for Claude in the Microsoft Marketplace uses Anthropic's standard API pricing." So azure-anthropic pricing == Anthropic first-party pricing (open question f, pricing half, resolved). `cachedInput` maps to Anthropic "Cache Hits & Refreshes" (0.1× input). All five models are **(preview)** on Foundry; Foundry "follows the Claude API lifecycle schedule". - -### azure-anthropic/claude-opus-4-6 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.6 $5 in / $0.50 cache read / $25 out | OK | -| contextWindow | 1000000 | MS Foundry Claude doc: opus-4-6 "1M / 128K"; Anthropic Foundry doc: "Claude Fable 5, Claude Opus 4.7, Claude Opus 4.6, and Claude Sonnet 4.6 have a 1M-token context window on Microsoft Foundry"; Anthropic models overview: 1M. PR #4990 change re-verified. Long context is at **standard pricing** (Anthropic long-context pricing section), so no tiered-pricing concern | OK | -| maxOutputTokens | 128000 | both MS and Anthropic sources: 128K | OK | -| thinking levels | low, medium, high, max (default high) | MS Foundry Claude doc: effort supports low/medium/high, "also max for Opus 4.8, Opus 4.7, **Opus 4.6**, and Sonnet 4.6" | OK | -| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.6 supported (GA) | OK | -| temperature 0–1 | yes | Anthropic Messages API range | OK | -| releaseDate | 2026-02-05 | Not stated in any fetched doc (dateless model ID). Consistent with Opus 4.6 launch timeframe (early Feb 2026); convention = announcement date | Unverifiable (plausible, kept) | - -### azure-anthropic/claude-opus-4-5 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.5 $5 / $0.50 / $25 | OK | -| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview 200k / 64k | OK | -| thinking | low, medium, high | extended thinking; `max` effort not supported on 4.5-generation | OK | -| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.5 supported | OK | -| releaseDate | 2025-11-24 | Anthropic launch date (snapshot ID claude-opus-4-5-20251101; announcement 2025-11-24 — announcement-date convention) | OK | - -### azure-anthropic/claude-sonnet-4-5 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 3.0 / 0.3 / 15.0 | Anthropic pricing: Sonnet 4.5 $3 / $0.30 / $15 | OK | -| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview. Note: the Sonnet 4.5 **1M-context beta** on Foundry retires after 2026-04-30 (already past) — 200000 is correct | OK | -| thinking | low, medium, high | extended thinking | OK | -| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Sonnet 4.5 supported | OK | -| releaseDate | 2025-09-29 | snapshot claude-sonnet-4-5-20250929 | OK | - -### azure-anthropic/claude-opus-4-1 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 15.0 / 1.5 / 75.0 | Anthropic pricing: Opus 4.1 $15 / $1.50 / $75 | OK | -| contextWindow / maxOutputTokens | 200000 / 32000 | MS doc "200K / 32K"; Anthropic overview 200k / 32k | OK | -| thinking | low, medium, high | extended thinking | OK | -| nativeStructuredOutputs | **true** | Anthropic structured-outputs doc supported-model list **excludes Opus 4.1** (Fable 5, Mythos 5/Preview, Opus 4.8/4.7/4.6/4.5, Sonnet 4.6/4.5, Haiku 4.5 only). The first-party `anthropic` block's `claude-opus-4-1` entry correctly omits it (models.ts ~line 762). With this flag set, Sim sends the `structured-outputs-2025-11-13` beta header and `output_format` to a model that doesn't support it | **FIX: remove `nativeStructuredOutputs`** | -| deprecated | true | Anthropic Foundry doc model table: "Claude Opus 4.1 — Deprecated. **Retiring August 5, 2026**"; Anthropic pricing page marks it deprecated. PR #4990 change re-verified correct | OK | -| releaseDate | 2025-08-05 | snapshot claude-opus-4-1-20250805 | OK | - -### azure-anthropic/claude-haiku-4-5 - -| Field | Current value | Source / evidence | Verdict | -| --- | --- | --- | --- | -| pricing | 1.0 / 0.1 / 5.0 | Anthropic pricing: Haiku 4.5 $1 / $0.10 / $5 | OK | -| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview | OK | -| thinking | low, medium, high | extended thinking | OK | -| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Haiku 4.5 supported | OK | -| releaseDate | 2025-10-15 | Anthropic launch date (snapshot claude-haiku-4-5-20251001; announcement 2025-10-15 — announcement-date convention) | OK | - ---- - -## Changes made in PR #4990 — re-verification results - -| PR #4990 change | Verdict | -| --- | --- | -| Drop `'xhigh'` from azure/gpt-5.4, 5.4-mini, 5.4-nano, gpt-5.2 | **Correct** — `xhigh` is gpt-5.1-codex-max only (reasoning doc footnote 6) | -| `deprecated: true` on azure/gpt-5.1-codex | **Premise partially wrong** — Responses-API-only confirmed, but Sim's azure provider defaults to the Responses API; recommend reverting (see entry) | -| `deprecated: true` on azure-anthropic/claude-opus-4-1 | **Correct** — retiring 2026-08-05 | -| Rename azure/gpt-5-chat-latest → azure/gpt-5-chat + maxOutputTokens 16384 | **Correct** | -| azure/gpt-4o releaseDate → 2024-11-20 | **Correct** | -| azure/model-router releaseDate → 2025-05-19 | **Correct** | -| azure-anthropic/claude-opus-4-6 contextWindow → 1000000 | **Correct** | -| updatedAt bumps to 2026-06-11 | OK (azure/model-router still 2026-04-01; acceptable since its pricing is an unverifiable proxy) | - -## Recommended fixes from this pass (not applied — doc only) - -1. `azure/gpt-5.4`, `azure/gpt-5.4-mini`, `azure/gpt-5.4-nano`: reasoningEffort drop `'none'` → `['low','medium','high']` (reasoning doc footnote 7 enumerates 'none' support and excludes the 5.4 family). -2. `azure/gpt-4o`: add `maxOutputTokens: 16384`. -3. `azure/gpt-4o`: add `deprecated: true` (retires 2026-10-01). **Product caveat:** it is the block's `defaultModel`; the default-model change is a product decision, not made here. -4. `azure/gpt-5.1` and `azure/gpt-5.1-codex`: releaseDate `2025-11-12` → `2025-11-13` (Azure version date convention). -5. `azure/gpt-5.1-codex`: **KEEP entry; revert `deprecated: true`** (works through Sim's default Responses-API path; Azure lifecycle is GA, not deprecated). -6. `azure-anthropic/claude-opus-4-1`: remove `nativeStructuredOutputs: true` (unsupported model; matches first-party anthropic entry). - -## Deliberately not changed - -- **azure/model-router pricing 2.0/0.5/8.0** — kept as a documented gpt-4.1-rate proxy; real billing (input-token router markup + routed model rates) is unrepresentable in the flat pricing schema, and no router meter exists in the Retail Prices API to anchor a different number. -- **azure/gpt-5-chat Preview status** — no `deprecated` flag: Preview models have no announced retirement; flagging would misrepresent lifecycle. -- **gpt-5.4 long-context pricing tier** (5.0/0.5/22.5 "longco" meters) — schema cannot express tiered pricing; standard-tier rates kept. -- **gpt-4.1 contextWindow 1,047,576** — global-standard figure kept although regional standard (300,000) and batch (128,000) deployments are lower; Sim assumes global standard. -- **azure-anthropic releaseDates using announcement dates** (opus-4-5 2025-11-24, haiku-4-5 2025-10-15) rather than snapshot dates (20251101, 20251001) — consistent existing convention across the file. -- **Missing newer models** (out of scope, noted for follow-up): Azure now offers `gpt-5.5` (GA, 2026-04-24, 1.05M ctx), `gpt-chat-latest`, `gpt-5.4-pro`, `gpt-5.3-codex`/`gpt-5.3-chat`, `gpt-5.2-codex`/`gpt-5.2-chat`; Foundry Claude now offers `claude-fable-5`, `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6` (1M ctx GA). - -## Unverifiable - -- **model-router pricing** — no retail meter; the $0.14/1M router-markup figure appears only on the timing-out marketing pricing page and could not be confirmed. -- **azure-anthropic/claude-opus-4-6 releaseDate 2026-02-05** — no fetched source states the launch date (dateless model ID); plausible and consistent with Opus 4.6-era documentation, kept as-is. -- **Azure-side rate-limit/quota values** — not modeled in the schema; not validated. diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md deleted file mode 100644 index eff34fc335..0000000000 --- a/docs/models/bedrock.md +++ /dev/null @@ -1,226 +0,0 @@ -# Bedrock provider validation — `apps/sim/providers/models.ts` - -- **Date:** 2026-06-11 (final exhaustive pass; re-verifies PR #4990) -- **Scope:** all 32 `bedrock/*` model entries -- **Method:** every fact below traced to a live source fetched today: - - **AWS Pricing API** (authoritative for token prices): `https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonBedrock/current/us-east-1/index.json` (1.37 MB, Last-Modified 2026-06-11) and the `us-west-2` offer file. Prices are per 1K tokens in the offer; converted ×1000 to per-1M below. Claude 4.x, Cohere, and Mistral Large 24.11 have **no SKUs** in the Pricing API (marketplace-billed / absent). - - **AWS model cards:** `docs.aws.amazon.com/bedrock/latest/userguide/model-card--.html` (authoritative for geo/global inference IDs, context window, max output, lifecycle, prompt caching). - - **Lifecycle:** `docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html` (Legacy/EOL table). - - **Anthropic:** `platform.claude.com/docs/en/about-claude/pricing` and `.../models/overview` (Claude prices, cache rates, max output, Bedrock geo premium). - - **AWS what's-new** for the Nova Premier GA date and Nova 2 announcements. - ---- - -## GEO-PROFILE TABLE (deliverable for `getBedrockInferenceProfileId`) - -Source: each model card's Programmatic Access table ("Geo inference ID" / "Global inference ID" columns). `geo` = inference profile required/available (the bare ID is generally **not** invokable on-demand for these, except where noted); `bare` = card lists "Not supported" for both Geo and Global — must invoke with the plain model ID. - -| model id suffix | verdict | profiles on card | -|---|---|---| -| anthropic.claude-opus-4-5-20251101-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` + `global.` (no apac/au/jp) | -| anthropic.claude-sonnet-4-5-20250929-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`) | -| anthropic.claude-haiku-4-5-20251001-v1:0 | **geo** (REQUIRED in most regions) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`; in-region only us-east-1/eu-north-1/eu-west-1/ap-northeast-1/ap-southeast-4) | -| anthropic.claude-opus-4-1-20250805-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported | -| amazon.nova-2-pro-v1:0 | **unknown** (no card; ID does not exist on Bedrock — real preview ID is `amazon.nova-2-pro-preview-20251202-v1:0`, served via geo/global profiles per cloudprice `apac.amazon.nova-2-pro-preview-…`) | -| amazon.nova-2-lite-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `jp.` + `global.` (no `apac.`) | -| amazon.nova-premier-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported | -| amazon.nova-pro-v1:0 | **geo** | `us.`, `eu.` (no apac/global; in-region exists in us-east-1 and a few others) | -| amazon.nova-lite-v1:0 | **geo** | `us.`, `eu.` (no apac/global) | -| amazon.nova-micro-v1:0 | **geo** | `us.`, `eu.` (no apac/global) | -| meta.llama4-maverick-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | -| meta.llama4-scout-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | -| meta.llama3-3-70b-instruct-v1:0 | **geo** | `us.` only | -| meta.llama3-2-90b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | -| meta.llama3-2-11b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only | -| meta.llama3-2-3b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | -| meta.llama3-2-1b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | -| meta.llama3-1-405b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | -| meta.llama3-1-70b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | -| meta.llama3-1-8b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) | -| mistral.mistral-large-3-675b-instruct | **bare** | Geo: Not supported; Global: Not supported (in-region, 11 regions) | -| mistral.mistral-large-2411-v1:0 | **bare** (phantom — see below; the Mistral Large card covers only `mistral-large-2402-v1:0`, bare) | -| mistral.mistral-large-2407-v1:0 | **bare** (no card; on-demand SKUs exist in us-west-2; the 2402 card shows Geo/Global Not supported — same family, in-region only) | -| mistral.pixtral-large-2502-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` | -| mistral.magistral-small-2509 | **bare** | Geo: Not supported; Global: Not supported | -| mistral.ministral-3-14b-instruct | **bare** | Geo: Not supported; Global: Not supported | -| mistral.ministral-3-8b-instruct | **bare** | Geo: Not supported; Global: Not supported | -| mistral.ministral-3-3b-instruct | **bare** | Geo: Not supported; Global: Not supported (card "Ministral 3B" confirms this exact ID) | -| mistral.mixtral-8x7b-instruct-v0:1 | **bare** | Geo: Not supported; Global: Not supported | -| amazon.titan-text-premier-v1:0 | **bare** | model card removed from docs; historically in-region only, never had inference profiles | -| cohere.command-r-v1:0 | **bare** | card: Geo Not supported; Global Not supported | -| cohere.command-r-plus-v1:0 | **bare** | card: Geo Not supported; Global Not supported | - -Implications for `apps/sim/providers/bedrock/utils.ts` (`getBedrockInferenceProfileId`): - -1. All `mistral.*` IDs **except** `mistral.pixtral-large-2502-v1:0`, all `cohere.*` IDs, and `amazon.titan-text-premier-v1:0` must be passed through **unprefixed**. Today the function prefixes everything → `ValidationException` for these 10 models. -2. The blanket `ap-*/me-* → apac` mapping is wrong for every model in this list: **no bedrock-provider model has an `apac.` profile**. Claude Sonnet/Haiku 4.5 use `au.`/`jp.` (or `global.`); Nova 2 Lite has `jp.`; everything else is `us.`/`eu.` only. -3. `eu.` is only valid for: claude opus/sonnet/haiku 4.5, nova-2-lite, nova pro/lite/micro, llama3-2-3b/1b, pixtral-large. For the rest (opus-4-1, nova-premier, all other llamas) only `us.` exists — an `eu-*` region request currently produces a nonexistent `eu.` profile ID. - ---- - -## Per-model verification - -Prices are USD per 1M tokens, **standard on-demand, us-east-1** (us-west-2 where us-east-1 has no SKU). "Pricing API" = the offer file above, fetched 2026-06-11. - -### Anthropic (no Pricing API SKUs — verified against Anthropic pricing page; Bedrock bills Anthropic list prices) - -| model | field | repo | verified | source | verdict | -|---|---|---|---|---|---| -| claude-opus-4-5 | input/output | 5 / 25 | 5 / 25 | Anthropic pricing | OK | -| | cachedInput | — | 0.50 (0.1× input; Bedrock card: caching Yes, min 4096 tok) | Anthropic pricing + card | **ADD** | -| | maxOutputTokens | 64000 | 64K | card + Anthropic overview | OK | -| | contextWindow | 200000 | 200K | card | OK | -| | releaseDate | 2025-11-24 | Nov 24 2025 | card | OK | -| claude-sonnet-4-5 | input/output | 3 / 15 | 3 / 15 | Anthropic pricing | OK | -| | cachedInput | — | 0.30 | Anthropic pricing + card (caching Yes) | **ADD** | -| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK | -| | releaseDate | 2025-09-29 | card says Sep 30 2025; Anthropic launch Sep 29 2025 | keep repo (matches upstream launch) | -| | recommended | — | provider default model | models.ts convention | **ADD `recommended: true`** | -| claude-haiku-4-5 | input/output | 1 / 5 | 1 / 5 | Anthropic pricing | OK | -| | cachedInput | — | 0.10 | Anthropic pricing + card (caching Yes) | **ADD** | -| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK | -| | releaseDate | 2025-10-15 | card says Oct 16 2025; Anthropic launch Oct 15 2025 | keep repo | -| | speedOptimized | — | "the fastest model with near-frontier intelligence" | Anthropic overview | **ADD `speedOptimized: true`** | -| claude-opus-4-1 | input/output | 15 / 75 | 15 / 75 | Anthropic pricing | OK | -| | cachedInput | — | 1.50 | Anthropic pricing + card (caching Yes, 5m TTL only) | **ADD** | -| | maxOutputTokens | 32768 | **32K = 32000** (card "32K"; Anthropic overview "32k tokens") | **FIX 32768 → 32000** (32768 would exceed the documented cap) | -| | ctx / releaseDate / lifecycle | 200000 / 2025-08-05 / active | 200K / Aug 05 2025 / Active on Bedrock (deprecated on first-party API, retire 2026-08-05 — Bedrock lifecycle independent) | OK | - -**Geo premium (open question d):** Anthropic's pricing page states regional/multi-region endpoints carry a **10% premium over global** for Sonnet 4.5, Haiku 4.5, Opus 4.5 "and all future models" (earlier models keep existing pricing). Sim always builds geo profiles, so real spend on these three is 1.1× the table values. **Decision: keep base prices and document** — (a) the Pricing API exposes no Claude SKUs to anchor a geo-specific number, (b) repo convention is provider list price, (c) baking 1.1× would overbill if/when the provider routes `global.`. Revisit if Sim adds `global.` routing. - -### Amazon Nova (Pricing API us-east-1) - -| model | field | repo | verified | verdict | -|---|---|---|---|---| -| nova-2-pro | input/output | 1.0 / 4.0 | **1.375 / 11.0** (`USE1-Nova2.0Pro-text-input-tokens` 0.001375, `-text-output-tokens` 0.011; global cross-region 1.25/10.0) | **FIX**. Note: cloudprice lists 2.19/17.50 for an apac preview profile — AWS Pricing API wins | -| | identity | `amazon.nova-2-pro-v1:0` | no model card; not in catalog; real ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview, Nova Forge early access, per AWS re:Invent 2025 what's-new + cloudprice/getmaxim) | entry is a **phantom ID**; `deprecated: true` (PR #4990) keeps it hidden — acceptable; longer-term remove or migrate to the preview ID | -| nova-2-lite | input/output | 0.08 / 0.32 | **0.33 / 2.75** (`USE1-Nova2.0Lite-input-tokens` 0.00033, `-output-tokens` 0.00275) | **FIX** — resolves open question (a): repo was wrong AND the secondaries' 0.30/2.50 is the *global cross-region* price (`-cross-region-global` SKUs), not the geo/in-region price Sim pays | -| | cachedInput | — | **0.0825** (`-cache-read-input-token-count` 0.0000825; cache write $0) | **ADD** | -| | maxOutputTokens | — | 64K (card) | **ADD 64000** | -| | ctx / releaseDate / lifecycle | 1000000 / 2025-12-02 / active | 1M / Dec 02 2025 / Active; geo us/eu/jp + global | OK | -| nova-premier | input/output | 2.5 / 12.5 | 2.50 / 12.50 (`USE1-NovaPremier-*`) | OK (PR #4990 fix confirmed) | -| | cachedInput | — | 0.625 (`-cache-read` 0.000625) | **ADD** (model is Legacy but still billable until EOL 2026-09-14) | -| | deprecated | true | Legacy 2026-03-13, EOL 2026-09-14 (lifecycle page + card) | OK | -| | maxOutputTokens | — | 25K (card) | skip per instruction (deprecated); documented only | -| | releaseDate | 2025-04-30 | GA announced Apr 30 2025 (aws.amazon.com what's-new 2025/04 "Amazon Nova Premier… generally available"); card shows "Oct 31 2025" which conflicts with AWS's own GA announcement and the lifecycle history — treated as a card-metadata anomaly | **keep 2025-04-30** | -| nova-pro | input/output | 0.8 / 3.2 | 0.80 / 3.20 | OK (question b resolved) | -| | cachedInput | — | 0.20 | **ADD** | -| | maxOutputTokens | — | 5K (card) | **ADD 5120** (Nova "5K" cap; trackers/openrouter report 5,120) | -| | ctx | 300000 | 300K | OK; releaseDate repo 2024-12-03 (re:Invent announce) vs card Dec 05 2024 — keep repo, documented | -| nova-lite | input/output | 0.06 / 0.24 | 0.06 / 0.24 | OK | -| | cachedInput | — | 0.015 | **ADD** | -| | maxOutputTokens | — | 5K | **ADD 5120** | -| nova-micro | input/output | 0.035 / 0.14 | 0.035 / 0.14 | OK | -| | cachedInput | — | 0.00875 | **ADD** | -| | maxOutputTokens | — | 5K | **ADD 5120** | -| | speedOptimized | — | card: "Amazon's fastest text-only model, optimized for speed and low cost" | **ADD `speedOptimized: true`** | - -### Meta (Pricing API; all cards report max output 4K for 3.x, 8K for Llama 4) - -| model | field | repo | verified | verdict | -|---|---|---|---|---| -| llama4-maverick | input/output | 0.24 / 0.97 | 0.24 / 0.97 | OK | -| | maxOutputTokens | — | 8K (card) | **ADD 8192** | -| | ctx / date / lifecycle | 1M / 2025-04-05 / active | 1M / Apr 05 2025 / Active | OK | -| llama4-scout | input/output | 0.18 / 0.72 | **0.17 / 0.66** (`USE1-Llama4-Scout-17B-*` 0.00017 / 0.00066) | **FIX** | -| | maxOutputTokens | — | 8K | **ADD 8192** | -| | ctx | 10000000 | 10M (card) | OK (PR #4990 fix confirmed) | -| llama3-3-70b | input/output | 0.72 / 0.72 | 0.72 / 0.72 | OK | -| | lifecycle | active | **Active** (card; absent from Legacy table) — question (g) | OK | -| | maxOutputTokens | — | 4K | **ADD 4096** | -| llama3-2-90b | input/output | 2.0 / 2.0 | **0.72 / 0.72** (`USE1-Llama3-2-90B-*`) | **FIX** (deprecated but still billable until EOL 2026-07-07) | -| | deprecated | true | Legacy, EOL Jul 7 2026 | OK | -| llama3-2-11b | input/output | 0.16 / 0.16 | 0.16 / 0.16; Legacy EOL 2026-07-07 | OK | -| llama3-2-3b | input/output | 0.15 / 0.15 | 0.15 / 0.15; Legacy | OK | -| llama3-2-1b | input/output | 0.10 / 0.10 | 0.10 / 0.10; Legacy | OK | -| llama3-1-405b | input/output | 5.32 / 16.0 | **2.40 / 2.40** (`USW2-Llama3-1-405B-*` 0.0024; us-east-1 has only batch SKUs at 1.20) | **FIX** (deprecated, Legacy EOL 2026-07-07, but price was ~5× off) | -| llama3-1-70b | input/output | 2.65 / 3.5 | **0.72 / 0.72** (`USE1-Llama3-1-70B-*`; the 2.65 figure resembles no AWS SKU — latency-optimized variant is a separate SKU) | **FIX** | -| | lifecycle | active | **Active** (card) — question (g) | OK | -| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** | -| llama3-1-8b | input/output | 0.3 / 0.6 | **0.22 / 0.22** (`USE1-Llama3-1-8B-*`) | **FIX** | -| | lifecycle | active | **Active** (card) | OK | -| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** | - -### Mistral AI (Pricing API + cards) - -| model | field | repo | verified | verdict | -|---|---|---|---|---| -| mistral-large-3-675b | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-Mistral-Large-3-675b-Instruct-*`) | OK (PR #4990 confirmed) | -| | ctx / maxOutput | 256000 / 32768 | 256K / 32K (card) | OK | -| | releaseDate | — | Dec 2 2025 (card) | **ADD 2025-12-02** | -| | caching | — | card: prompt caching **Yes** (bedrock-runtime), but no cache-read SKU in Pricing API → rate unpublishable | no `cachedInput` (documented) | -| mistral-large-2411 | input/output | 2.0 / 6.0 | **UNVERIFIABLE — model appears not to exist on Bedrock**: no model card (Mistral card index has only "Mistral Large" = 2402 and "Mistral Large 3"), no Pricing API SKU in us-east-1 or us-west-2, not in lifecycle table | keep price; entry is already `deprecated: true` (hidden); recommend follow-up removal | -| mistral-large-2407 | input/output | 4.0 / 12.0 | **2.00 / 6.00** (`USW2-MistralLarge2407-*` 0.002/0.006; us-west-2 only). The 4/12 figure belongs to *Mistral Large 2402* (`USE1-MistralLarge-*` = 0.004/0.012) — repo had the two swapped | **FIX** (deprecated but billable) | -| pixtral-large-2502 | input/output | 2.0 / 6.0 | 2.00 / 6.00 (`USE1-PixtralLarge2502-*`) | OK (question b resolved) | -| | ctx / maxOutput / lifecycle | 128000 / 16384 / active | 128K / 16K / Active | OK | -| magistral-small-2509 | input/output | 0.5 / 1.5 | 0.50 / 1.50 | OK | -| | ctx / maxOutput / lifecycle | 128000 / 40000 / active | 128K / 40K / Active (card launch "Sep 2025", no day — no releaseDate added) | OK | -| ministral-3-14b | input/output | 0.2 / 0.2 | 0.20 / 0.20 (`USE1-Ministral-3-14b-Instruct-*`) | OK | -| | maxOutput / releaseDate | 8192 / — | 8K / Dec 2 2025 | **ADD 2025-12-02** | -| | caching | — | card shows no prompt-caching row → unconfirmed | no `cachedInput` | -| ministral-3-8b | input/output | 0.1 / 0.1 | **0.15 / 0.15** (`USE1-Ministral-3-8b-Instruct-*` 0.00015) | **FIX**; **ADD releaseDate 2025-12-02** | -| ministral-3-3b | input/output | 0.04 / 0.04 | **0.10 / 0.10** (`USE1-Ministral-3-3b-Instruct-*` 0.0001) | **FIX**; **ADD releaseDate 2025-12-02** (card "Ministral 3B" confirms ID `mistral.ministral-3-3b-instruct`, 128K ctx, 8K out, Active) | -| mixtral-8x7b | input/output | 0.45 / 0.7 | 0.45 / 0.70 (`USE1-Mixtral8x7B-*`) | OK (question b resolved) | -| | ctx / lifecycle | 32000 / active | 32K / Active | OK | -| | maxOutputTokens | — | 4K (card) | **ADD 4096** | - -### Amazon Titan / Cohere - -| model | field | repo | verified | verdict | -|---|---|---|---|---| -| titan-text-premier | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-TitanText-Premier-*`, attribute `titanModel: "Titan Text G1 Premier"`) | OK | -| | deprecated | true | model card **removed** from the model-cards index (only Titan embeddings/image cards remain); absent from the Legacy table (which excludes models already past EOL) | OK — keep deprecated | -| cohere command-r | input/output | 0.5 / 1.5 | not in Pricing API (marketplace-billed); matches long-standing AWS list price | UNVERIFIABLE via Pricing API — keep | -| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 (lifecycle + card) | OK | -| cohere command-r-plus | input/output | 3.0 / 15.0 | not in Pricing API; matches long-standing AWS list price | UNVERIFIABLE — keep | -| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 | OK | - ---- - -## Changes made in this pass (fix list for models.ts — to be applied by the follow-up code change) - -Pricing (all `updatedAt` → `2026-06-11`): - -1. `bedrock/amazon.nova-2-pro-v1:0`: input 1.0 → 1.375, output 4.0 → 11.0 (Pricing API `USE1-Nova2.0Pro-text-*`) -2. `bedrock/amazon.nova-2-lite-v1:0`: input 0.08 → 0.33, output 0.32 → 2.75 (Pricing API `USE1-Nova2.0Lite-*`) -3. `bedrock/meta.llama4-scout-17b-instruct-v1:0`: input 0.18 → 0.17, output 0.72 → 0.66 -4. `bedrock/meta.llama3-2-90b-instruct-v1:0`: 2.0/2.0 → 0.72/0.72 -5. `bedrock/meta.llama3-1-405b-instruct-v1:0`: 5.32/16.0 → 2.40/2.40 (USW2 on-demand) -6. `bedrock/meta.llama3-1-70b-instruct-v1:0`: 2.65/3.5 → 0.72/0.72 -7. `bedrock/meta.llama3-1-8b-instruct-v1:0`: 0.3/0.6 → 0.22/0.22 -8. `bedrock/mistral.mistral-large-2407-v1:0`: 4.0/12.0 → 2.0/6.0 (USW2 `MistralLarge2407`) -9. `bedrock/mistral.ministral-3-8b-instruct`: 0.1/0.1 → 0.15/0.15 -10. `bedrock/mistral.ministral-3-3b-instruct`: 0.04/0.04 → 0.10/0.10 - -cachedInput additions (cache-read rate): - -11. claude-opus-4-5: 0.5; claude-sonnet-4-5: 0.3; claude-haiku-4-5: 0.1; claude-opus-4-1: 1.5 (Anthropic pricing 0.1× input; Bedrock cards confirm caching) -12. nova-2-lite: 0.0825; nova-premier: 0.625; nova-pro: 0.2; nova-lite: 0.015; nova-micro: 0.00875 (Pricing API cache-read SKUs; Nova cache writes are $0) - -maxOutputTokens: - -13. claude-opus-4-1: 32768 → 32000 (Anthropic overview "32k"; Bedrock card "32K") -14. nova-2-lite: add 64000; nova-pro/lite/micro: add 5120 each -15. llama4-maverick/scout: add 8192 each; llama3-3-70b, llama3-1-70b, llama3-1-8b: add 4096 each; mixtral-8x7b: add 4096 - -Flags / metadata: - -16. claude-sonnet-4-5: add `recommended: true` (bedrock default model; matches other providers' convention) -17. claude-haiku-4-5 and nova-micro: add `speedOptimized: true` (Anthropic "fastest model"; card "Amazon's fastest text-only model"). Ruled **against** `speedOptimized` on nova-2-lite — its card positions it as cost-efficient multimodal, not the speed tier. -18. releaseDate additions: mistral-large-3 `2025-12-02`; ministral-3-14b/8b/3b `2025-12-02`; llama3-1-70b/8b `2024-07-23` - -## Deliberately not changed - -- **Claude 4.5-gen geo premium (q. d):** kept base list prices; Sim's geo-profile routing actually bills 1.1× for opus/sonnet/haiku 4.5 per Anthropic's pricing page. Documented here rather than baked in (no AWS SKU to anchor; would overstate global-endpoint cost; consistent with list-price convention). -- **Release-date nits (q. h):** sonnet-4-5 `2025-09-29` and haiku-4-5 `2025-10-15` kept (Anthropic launch dates; Bedrock cards say +1 day). nova pro/lite/micro `2024-12-03` kept (re:Invent announcement; cards say Dec 05). nova-premier `2025-04-30` kept — AWS what's-new confirms GA Apr 30 2025; the card's "Oct 31 2025" contradicts AWS's own announcement. -- **Deprecated models' maxOutputTokens** (nova-premier 25K, llama3-2 4K, command-r/r+ 4K, mistral-large-2407 4K): per instruction, not added. -- **All deprecated flags from PR #4990 re-verified correct:** nova-premier, llama3-2 ×4, llama3-1-405b, command-r/r+ (Legacy with EOL dates on the lifecycle page), titan-text-premier (card removed from catalog), mistral-large-2411/2407 (absent from catalog). llama3-1-70b/8b and llama3-3-70b confirmed **Active** — correctly not deprecated. -- **mistral-large-3 / magistral / ministral-14b `cachedInput`:** Large 3 card says caching is supported but no cache-read SKU exists in the Pricing API; ministral-14b card shows no caching row. No invented numbers. -- **`bedrock/amazon.nova-2-pro-v1:0` and `bedrock/mistral.mistral-large-2411-v1:0` entries kept** (both `deprecated: true`, hidden): the former's real Bedrock ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview), the latter appears to have never shipped on Bedrock. Recommend a follow-up PR to remove/rename — out of scope for a validation pass. - -## Unverifiable - -- **cohere.command-r-v1:0 / command-r-plus-v1:0 prices** (0.5/1.5, 3/15): absent from the Pricing API (marketplace-billed); match the long-standing published AWS rates; models are Legacy. Kept as-is. -- **mistral-large-2411 price** (2/6): no SKU, no card; phantom entry (see above). -- **nova-2-pro geo-profile support**: no card; preview ID served via profiles per third-party trackers only. -- **Mistral Large 3 cache-read rate**: caching supported per card; rate unpublished. diff --git a/docs/models/deepseek-cerebras.md b/docs/models/deepseek-cerebras.md deleted file mode 100644 index 33f9927459..0000000000 --- a/docs/models/deepseek-cerebras.md +++ /dev/null @@ -1,189 +0,0 @@ -# Model Validation: `deepseek` & `cerebras` — apps/sim/providers/models.ts - -- **Date:** 2026-06-11 -- **Scope:** Final exhaustive re-validation after PR #4990 (deepseek-chat/reasoner repricing + 1M ctx, deprecation flags on deepseek-v3/r1 and cerebras llama3.1-8b/qwen-3-235b) -- **Method:** Live WebFetch of provider docs (primary), OpenRouter/ArtificialAnalysis/aggregators (secondary), DeepSeek news archive for release dates, `rg` of provider code to confirm capability consumption. Provider docs win on conflicts. - -## Sources - -| Source | URL | -|---|---| -| DeepSeek pricing (primary) | https://api-docs.deepseek.com/quick_start/pricing | -| DeepSeek list-models (primary) | https://api-docs.deepseek.com/api/list-models | -| DeepSeek chat-completion API ref (primary) | https://api-docs.deepseek.com/api/create-chat-completion | -| DeepSeek reasoning guide (primary) | https://api-docs.deepseek.com/guides/reasoning_model | -| DeepSeek V3 announcement | https://api-docs.deepseek.com/news/news1226 | -| DeepSeek R1 announcement | https://api-docs.deepseek.com/news/news250120 | -| DeepSeek V4 preview announcement | https://api-docs.deepseek.com/news/news260424 | -| Cerebras models overview (primary) | https://inference-docs.cerebras.ai/models/overview | -| Cerebras gpt-oss model page (primary) | https://inference-docs.cerebras.ai/models/openai-oss | -| Cerebras zai-glm-4.7 model page (primary) | https://inference-docs.cerebras.ai/models/zai-glm-47 | -| Cerebras deprecations (primary) | https://inference-docs.cerebras.ai/support/deprecation | -| Cerebras chat-completions API ref (primary) | https://inference-docs.cerebras.ai/api-reference/chat-completions | -| OpenRouter deepseek-v4-flash (secondary) | https://openrouter.ai/deepseek/deepseek-v4-flash | -| OpenRouter GLM 4.7 (secondary) | https://openrouter.ai/z-ai/glm-4.7 | -| ArtificialAnalysis gpt-oss-120b providers (secondary) | https://artificialanalysis.ai/models/gpt-oss-120b/providers | -| aimodelapis Cerebras GLM-4.7 (secondary) | https://aimodelapis.com/providers/cerebras/cerebras-zai-glm-4-7 | -| Cerebras GLM-4.7 launch blog (secondary) | https://www.cerebras.ai/blog/glm-4-7 | - -## Code-consumption checks - -- `rg "temperature" apps/sim/providers/deepseek/ apps/sim/providers/cerebras/`: - - `deepseek/index.ts:89` — `if (request.temperature !== undefined) payload.temperature = request.temperature` - - `cerebras/index.ts:85` — `if (request.temperature !== undefined) payload.temperature = request.temperature` - - Both providers forward temperature when set; a `temperature` capability in models.ts is what surfaces the slider (`getMaxTempFromDefinitions` in `providers/utils.ts`). With `capabilities: {}` the slider is hidden even though the API accepts the param. -- No `reasoningEffort`, `verbosity`, `thinking`, `nativeStructuredOutputs`, or `computerUse` handling exists in either provider implementation — do **not** add those capabilities even though Cerebras documents `reasoning_effort` (not consumed by code). -- `maxOutputTokens` is a supported capability field (`models.ts:42`) consumed by `providers/index.ts` — safe to recommend. - ---- - -## DeepSeek - -### Alias status (Open Question a) - -**Confirmed.** DeepSeek pricing page: "The model names `deepseek-chat` and `deepseek-reasoner` will be deprecated on **2026/07/24 15:59 UTC**." They correspond to the **non-thinking** and **thinking** modes of `deepseek-v4-flash` respectively. The list-models API now returns only `deepseek-v4-flash` and `deepseek-v4-pro`. Until 2026-07-24 the aliases remain valid API ids, so keeping them non-deprecated in models.ts is correct **for now** — they must be flipped to `deprecated: true` (or removed) by 2026-07-24. - -**Recommendation (separate work, not part of this pass):** add `deepseek-v4-flash` (input $0.14 / cached $0.0028 / output $0.28, ctx 1M, max output 384K, released 2026-04-24) and `deepseek-v4-pro` (input $0.435 / cached $0.003625 / output $0.87, ctx 1M, max output 384K) as first-class entries before the 2026-07-24 alias retirement, then deprecate the aliases. - -### deepseek-chat - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `deepseek-chat` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash non-thinking) | pricing page | OK | -| pricing.input | 0.14 | $0.14/M (cache miss) | pricing page | OK | -| pricing.cachedInput | 0.0028 | $0.0028/M (cache hit) | pricing page | OK | -| pricing.output | 0.28 | $0.28/M | pricing page | OK | -| pricing.updatedAt | 2026-06-11 | — | — | OK | -| contextWindow | 1000000 | 1M tokens | pricing page | OK | -| capabilities.temperature | *(absent)* | Supported, range 0–2, default 1 ("What sampling temperature to use, between 0 and 2…") — applies to non-thinking mode | create-chat-completion API ref | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `deepseek/index.ts:89` consumes it) | -| capabilities.maxOutputTokens | *(unset)* | Conflict: pricing page says 384K max output for v4-flash; reasoning guide (thinking mode) says default 32K / max 64K | pricing page vs reasoning guide | Leave unset — see "Deliberately not changed" | -| releaseDate | 2024-12-26 | V3 announcement 2024-12-26 (date the alias pointed to V3); alias now points to v4-flash (released 2026-04-24) | news1226, news260424 | OK (alias semantics — keep original anchor) | -| deprecated | *(absent)* | Alias still live | pricing page | OK until 2026-07-24 | - -### deepseek-v3 - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `deepseek-v3` | **Not** a valid API id (list-models returns only v4-flash/v4-pro; never a documented API id — API ids were deepseek-chat/reasoner) | list-models | OK as `deprecated: true` | -| deprecated | true | Correct | list-models | OK | -| pricing | 0.28 / 0.028 / 0.42 (updatedAt 2026-04-01) | Historical V3.x pricing; model unpurchasable, frozen values acceptable | — | OK (legacy) | -| contextWindow | 128000 | Historical 128K | — | OK (legacy) | -| releaseDate | 2024-12-26 | DeepSeek-V3 announced 2024-12-26 | news1226 | **Verified** | - -### deepseek-r1 - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `deepseek-r1` | **Not** a valid API id (R1 was accessed as `deepseek-reasoner`) | list-models, news250120 | OK as `deprecated: true` | -| deprecated | true | Correct | list-models | OK | -| pricing | 0.55 / 0.14 / 2.19 | Matches original R1 launch pricing ($0.14 hit / $0.55 miss / $2.19 out) | news250120 | **Verified** (legacy, frozen) | -| contextWindow | 128000 | Historical | — | OK (legacy) | -| releaseDate | 2025-01-20 | R1 announced 2025-01-20 | news250120 | **Verified** | - -### deepseek-reasoner - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `deepseek-reasoner` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash thinking) | pricing page | OK | -| pricing.input / cachedInput / output | 0.14 / 0.0028 / 0.28 | $0.14 / $0.0028 / $0.28 (same v4-flash pricing, both modes) | pricing page | OK | -| pricing.updatedAt | 2026-06-11 | — | — | OK | -| contextWindow | 1000000 | 1M | pricing page | OK | -| capabilities | `{}` (no temperature) | Reasoning guide: `temperature`, `top_p`, `presence_penalty`, `frequency_penalty`, `logprobs`, `top_logprobs` **not supported** — "will not trigger an error but will also have no effect" | reasoning guide | OK — must NOT add temperature | -| capabilities.maxOutputTokens | *(unset)* | Conflict (384K vs 32K/64K) | see below | Leave unset | -| releaseDate | 2025-01-20 | `model=deepseek-reasoner` introduced with R1 release 2025-01-20 | news250120 ("Use DeepSeek-R1 by setting model=deepseek-reasoner") | **Verified** | - -### maxOutputTokens conflict (Open Question a) - -- Pricing page (current, v4-flash): **384K max output**. -- Reasoning guide (deepseek-reasoner page): **default 32K, max 64K** — appears not yet updated for V4 (still reflects R1-era limits). -- The aliases map to v4-flash modes, so 384K is *probably* correct, but DeepSeek's own docs disagree with each other and the reasoning guide is the page specific to `deepseek-reasoner`. **Resolution: leave `maxOutputTokens` unset on both aliases** (current state) and set 384000 on the future `deepseek-v4-flash`/`deepseek-v4-pro` entries, where the pricing page is unambiguous. - -### Secondary-source pricing (DeepSeek) - -OpenRouter lists deepseek-v4-flash at **$0.098 in / $0.196 out** — exactly 70% of official $0.14/$0.28, i.e. the OpenRouter **−30% promo is still present**. Per policy, provider docs win: $0.14 / $0.0028 / $0.28 stands. OpenRouter confirms 1M context and the 2026-04-24 release date. - ---- - -## Cerebras - -### Deprecations (confirmed) - -Cerebras deprecation page lists **llama3.1-8b** and **qwen-3-235b-a22b-instruct-2507** as deprecated **2026-05-27**, recommended replacement "GPT OSS 120B". Neither appears on the models overview anymore. `deprecated: true` on both entries (PR #4990) is correct. - -### cerebras/gpt-oss-120b - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `gpt-oss-120b` (after `cerebras/` strip at `cerebras/index.ts:82`) | Production model | models overview, model page | OK | -| pricing.input | 0.35 | $0.35/M | model page (live 2026-06-11) | OK | -| pricing.output | 0.75 | $0.75/M | model page | OK | -| pricing.updatedAt | 2026-06-11 | — | — | OK | -| contextWindow | 131072 | 131k (paid tiers; free tier 65k) | model page | OK (paid tier, consistent with repo convention) | -| capabilities.maxOutputTokens | *(unset)* | 40k paid tiers (32k free) | model page | **FIX: add `maxOutputTokens: 40000`** (paid tier, matching paid-tier ctx) | -| capabilities.temperature | *(absent)* | Cerebras chat-completions API: "sampling temperature to use, between 0 and 2.0" | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `cerebras/index.ts:85` consumes it) | -| releaseDate | 2025-08-05 | gpt-oss released 2025-08-05; Cerebras day-one launch | cerebras.ai blog "OpenAI GPT OSS 120B Runs Fastest on Cerebras", techintelpro | **Verified** | - -Secondary-source note: several aggregators (crackedaiengineering, ArtificialAnalysis blended $0.39) still show launch-era pricing **$0.25/$0.69** and 33K max output. The live Cerebras model page (fetched today) says $0.35/$0.75 and 40k paid-tier max output — provider docs win; aggregators are stale. - -### cerebras/llama3.1-8b - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** | -| pricing | 0.10 / 0.10 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) | -| contextWindow | 32768 | Historical | — | OK (legacy) | -| releaseDate | 2024-08-27 | Consistent with Cerebras Inference launch (2024-08-27); not re-verified against a live page this pass | — | Plausible / not re-verified (deprecated model, low stakes) | - -### cerebras/qwen-3-235b-a22b-instruct-2507 - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** | -| pricing | 0.6 / 1.2 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) | -| contextWindow | 131072 | Historical | — | OK (legacy) | -| releaseDate | 2025-07-29 | Could not verify the exact Cerebras availability date | — | **Unverifiable** (deprecated model; leave as-is) | - -### cerebras/zai-glm-4.7 - -| Field | Current value | Verified value | Source | Verdict | -|---|---|---|---|---| -| id valid | `zai-glm-4.7` | Preview model on overview | models overview, model page | OK | -| pricing.input | 2.25 | $2.25/M | model page; confirmed by aimodelapis (secondary) | OK | -| pricing.output | 2.75 | $2.75/M | model page; aimodelapis | OK | -| pricing.updatedAt | 2026-06-11 | — | — | OK | -| contextWindow | 131072 | 131k paid tiers (free 64k) | model page; aimodelapis (131,000) | OK | -| capabilities.maxOutputTokens | *(unset)* | 40k tokens (both tiers) | model page; aimodelapis (40,000) | **FIX: add `maxOutputTokens: 40000`** | -| capabilities.temperature | *(absent)* | API-wide param, 0–2.0 | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** | -| releaseDate | 2025-12-22 | GLM-4.7 released 2025-12-22 (OpenRouter "Dec 22, 2025"; PR Newswire; Cerebras same-day launch blog) | multiple | **Verified** | - ---- - -## Changes made in this pass (PR #4990) — all re-verified correct - -1. `deepseek-chat` & `deepseek-reasoner` repriced to $0.14 / $0.0028 cached / $0.28 — matches v4-flash pricing they now alias. ✅ -2. `deepseek-chat` & `deepseek-reasoner` contextWindow → 1,000,000 — matches v4-flash 1M default. ✅ -3. `deprecated: true` on `deepseek-v3` and `deepseek-r1` — neither is a valid API id (list-models returns only v4-flash/v4-pro). ✅ -4. `deprecated: true` on `cerebras/llama3.1-8b` and `cerebras/qwen-3-235b-a22b-instruct-2507` — Cerebras deprecation page, 2026-05-27. ✅ -5. `pricing.updatedAt: 2026-06-11` bumps on the four live-model entries. ✅ - -## Outstanding fixes recommended (not applied — doc-only pass) - -1. `deepseek-chat`: add `capabilities.temperature: { min: 0, max: 2 }` — API ref documents temperature 0–2 (default 1) for chat completions; non-thinking mode honors it; `deepseek/index.ts:89` forwards it. Currently the empty `capabilities` hides Sim's temperature slider for a model that supports it. -2. `cerebras/gpt-oss-120b`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`. -3. `cerebras/zai-glm-4.7`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`. - -## Deliberately not changed - -- **`deepseek-reasoner` capabilities stay `{}`** — reasoning guide explicitly lists temperature as unsupported/no-effect in thinking mode. -- **`deepseek-chat`/`deepseek-reasoner` not marked deprecated** — valid aliases until 2026-07-24 15:59 UTC. Calendar item: deprecate (and add v4-flash/v4-pro entries) before that date. -- **`maxOutputTokens` left unset on both DeepSeek aliases** — DeepSeek docs self-conflict (pricing page: 384K for v4-flash; reasoning guide: 32K default / 64K max for deepseek-reasoner). Set 384000 only on future first-class `deepseek-v4-*` entries where the pricing page is unambiguous. -- **Legacy pricing/ctx on the four deprecated entries** (deepseek-v3, deepseek-r1, llama3.1-8b, qwen-3-235b) — frozen historical values on unpurchasable models; R1 values cross-checked against the original announcement. -- **No `reasoningEffort` capability for Cerebras** despite the model pages documenting `reasoning_effort` — `cerebras/index.ts` does not consume it (capability additions must be backed by docs AND code). -- **OpenRouter −30% DeepSeek promo pricing ($0.098/$0.196) ignored** — provider docs win. -- **deepseek-chat releaseDate kept at 2024-12-26** — anchor is the V3 announcement; the id predates V3 and now aliases v4-flash (2026-04-24); any value is a judgment call for an alias, so the existing anchor is retained. - -## Unverifiable - -- `cerebras/qwen-3-235b-a22b-instruct-2507` releaseDate 2025-07-29 — no live source found for the exact Cerebras availability date (model delisted). Left as-is. -- `cerebras/llama3.1-8b` releaseDate 2024-08-27 — consistent with the known Cerebras Inference launch date but not re-verified against a live page this pass (model delisted). -- Cerebras temperature **default** value — API ref documents the 0–2.0 range but not a default. diff --git a/docs/models/embeddings-rerank-dynamic.md b/docs/models/embeddings-rerank-dynamic.md deleted file mode 100644 index 46ef6b6967..0000000000 --- a/docs/models/embeddings-rerank-dynamic.md +++ /dev/null @@ -1,75 +0,0 @@ -# Validation: EMBEDDING_MODEL_PRICING, RERANK_MODEL_PRICING, and dynamic providers - -- **Date:** 2026-06-11 -- **File validated:** `apps/sim/providers/models.ts` (`EMBEDDING_MODEL_PRICING` ~L3289, `RERANK_MODEL_PRICING` ~L3320, dynamic provider definitions ~L87–191, L2503–2515, update functions ~L3190–3287) -- **Method:** Every numeric claim checked via live WebFetch against the provider's first-party docs, with at least one secondary tracker where available. WebSearch used as fallback when a page truncated. No edits were made to `models.ts`. -- **Primary sources:** - - OpenAI: `developers.openai.com/api/docs/models/text-embedding-3-small` / `.../text-embedding-3-large` / `.../text-embedding-ada-002` (the aggregate pricing page truncates before the embeddings table; per-model pages carry the prices) - - Google: `ai.google.dev/gemini-api/docs/pricing` - - Cohere: `cohere.com/pricing` (Model Vault only — per-search API pricing not rendered), `docs.cohere.com/docs/how-does-cohere-pricing-work` (confirms rerank is billed per search, no numbers), `docs.cohere.com/docs/rerank` (model list) - - Secondary trackers: Vercel AI Gateway (`vercel.com/ai-gateway/models/rerank-v4-pro`, `.../rerank-v4-fast`), eesel.ai Cohere pricing guide, metacto.com Cohere pricing deep dive, cloudprice.net, TokenMix/costgoat (OpenAI embeddings) - - Provider API docs: `docs.fireworks.ai/api-reference/post-chatcompletions`, `docs.together.ai/reference/chat-completions`, `openrouter.ai/docs` parameters reference, `docs.ollama.com/api/openai-compatibility`, `docs.baseten.co/development/model-apis/overview` - -## EMBEDDING_MODEL_PRICING - -| Entry | Field | Value in code | Verified value | Source | Verdict | -|---|---|---|---|---|---| -| `text-embedding-3-small` | input | $0.02 / 1M | $0.02 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT | -| `text-embedding-3-small` | output | $0.00 | n/a (embeddings bill input only) | OpenAI docs | CORRECT | -| `text-embedding-3-large` | input | $0.13 / 1M | $0.13 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT | -| `text-embedding-3-large` | output | $0.00 | n/a | OpenAI docs | CORRECT | -| `text-embedding-ada-002` | input | $0.10 / 1M | $0.10 / 1M | developers.openai.com model page; search secondary | CORRECT | -| `text-embedding-ada-002` | output | $0.00 | n/a | OpenAI docs | CORRECT | -| `gemini-embedding-001` | input | $0.15 / 1M | $0.15 / 1M (paid tier, standard; batch is $0.075) | ai.google.dev/gemini-api/docs/pricing | CORRECT | -| `gemini-embedding-001` | output | $0.00 | n/a | Google docs | CORRECT | - -## RERANK_MODEL_PRICING (per search unit = 1 query × ≤100 docs) - -| Entry | Value in code | Verified value | Source | Verdict | -|---|---|---|---|---| -| `rerank-v4.0-pro` | $0.0025 / search | $2.50 / 1k searches ($0.0025) | Vercel AI Gateway rerank-v4-pro page ("$2.5/K, billed per search query"); eesel.ai ("$0.0025 / search") | CORRECT | -| `rerank-v4.0-fast` | $0.002 / search | $2.00 / 1k searches ($0.002) | Vercel AI Gateway rerank-v4-fast page ("$2/K"); eesel.ai ("$0.002 / search") | CORRECT | -| `rerank-v3.5` | $0.002 / search | $2.00 / 1k searches ($0.002) Cohere direct & Bedrock | metacto ("$2.00 per 1,000 searches"); cloudprice.net ($0.0020/unit, Cohere + Bedrock rows agree) | CORRECT | - -Notes: - -- `cohere.com/pricing` currently only renders Model Vault (dedicated instance) hourly pricing; the per-search API table is JS-rendered and not fetchable. `docs.cohere.com/docs/how-does-cohere-pricing-work` confirms rerank is "priced based on the quantity of searches" (per-search, not per-token), which validates the `perSearchUnit` modeling and the ≤100-doc cap comment in the code. -- Conflicting source resolved: OpenRouter lists `cohere/rerank-v3.5` at $0.001/search, but that is OpenRouter's reseller price, not Cohere first-party. Sim calls Cohere directly, so $0.002 stands. -- Cohere also offers `rerank-english-v3.0` and `rerank-multilingual-v3.0`; Sim does not expose them, so no entries are needed. - -## Dynamic providers (provider-level config sanity pass) - -All eight have empty static `models: []` populated at runtime via `update*Models()` (pricing zeroed, `updatedAt` set to today — intentional for BYOK/reseller providers). `modelPatterns` prefixes match each provider's `update*` function and prefix-stripping in the provider implementations. - -| Provider | Config checked | Verdict | -|---|---|---| -| `fireworks` | temp 0–2, toolUsageControl true, pattern `/^fireworks\//` | CORRECT — Fireworks docs: temperature "between 0 and 2", full `tool_choice` support (`none`/`auto`/`required`/named) | -| `together` | temp 0–2, toolUsageControl true, pattern `/^together\//` | **DISCREPANCY** — Together's own API reference documents temperature as "a decimal number from 0-1"; `tool_choice` supported. Sim declares max 2. Flagged below; not changed in this pass | -| `baseten` | temp 0–2, toolUsageControl true, pattern `/^baseten\//` | SANE — Model APIs are OpenAI-compatible (docs.baseten.co); exact temp bounds not published, 0–2 follows the OpenAI convention | -| `openrouter` | temp 0–2, toolUsageControl true, pattern `/^openrouter\//` | CORRECT — OpenRouter docs: temperature 0.0–2.0, default 1.0 | -| `ollama-cloud` | temp 0–2, toolUsageControl **true**, pattern `/^ollama-cloud\//` | **QUESTIONABLE** — Ollama's OpenAI-compat layer (same API at `ollama.com/v1`) explicitly lists `tool_choice` as unsupported, and Sim's own shared core (`apps/sim/providers/ollama/core.ts:140-147`) degrades forced tool selection to `auto` with a warning. Local `ollama` correctly sets `toolUsageControl: false`; `ollama-cloud: true` is inconsistent. Flagged below; not changed in this pass | -| `vllm` | temp 0–2, toolUsageControl true, `defaultModel: 'vllm/generic'`, pattern `/^vllm\//` | SANE — vLLM's OpenAI-compatible server accepts temperature ≥0 (no hard cap of 2); 0–2 is a reasonable UI cap. `vllm/generic` matches the pattern and is the documented placeholder (only other reference is the vllm provider test) | -| `litellm` | temp 0–2, toolUsageControl true, pattern `/^litellm\//` | SANE — proxy passthrough; effective bounds depend on the upstream model, 0–2 is the OpenAI-convention cap | -| `ollama` (local) | toolUsageControl false ("does not support tool_choice"), no temp block, `modelPatterns: []` | CORRECT — docs.ollama.com OpenAI-compatibility page lists `tool_choice` as unsupported (temperature is supported); empty patterns are intentional since local model names are arbitrary and matched via the providers store | - -## `gemini` vs `google` provider key - -- `PROVIDER_DEFINITIONS` contains only `google` (L1303, `defaultModel: 'gemini-2.5-pro'`, patterns `/^gemini/`, `/^deep-research/`). There is no `gemini` registry key, and nothing calls `getProviderModels('gemini')` — all callers use `'google'` (models.ts L3163, `apps/sim/providers/google/index.ts:21`). -- `apps/sim/providers/gemini/` exists but is **not a provider**: it holds only `core.ts`/`types.ts` (shared Gemini execution logic consumed by both the `google` and `vertex` providers). No `index.ts`, not registered in `registry.ts`. -- The only `'gemini'` string key is the rotating-API-key namespace: `apps/sim/providers/utils.ts:891` maps provider `google` → `getRotatingApiKey('gemini')`, matching the `GEMINI_API_KEY_*` env convention in `apps/sim/lib/core/config/api-keys.ts`. Intentional; nothing structurally odd. - -## Changes made in this pass - -None. All `EMBEDDING_MODEL_PRICING` and `RERANK_MODEL_PRICING` values verified correct; instructions prohibited edits to `models.ts`. - -## Deliberately not changed - -- **`together` temperature max 2 vs documented 0–1:** Together's API reference documents 0–1, but the endpoint is OpenAI-compatible and tolerantly accepts higher values in practice; tightening to `max: 1` would change UI slider behavior for existing workflows. Left for a deliberate follow-up decision. -- **`ollama-cloud` `toolUsageControl: true`:** inconsistent with local `ollama: false` and with Ollama's documented lack of `tool_choice`. Runtime is already safe (shared core degrades forced selection to `auto` with a warning), so this only mis-advertises a capability in the UI. Left for follow-up. -- Dynamic-model zero pricing (`input: 0, output: 0`) in all `update*Models()` functions — intentional for BYOK/reseller providers where Sim doesn't bill model usage. - -## Unverifiable - -- **Cohere first-party per-search price page:** `cohere.com/pricing`'s API pricing table does not render server-side; per-search numbers were confirmed via two independent secondary trackers per model plus Cohere docs confirming the per-search billing unit. -- **Baseten and LiteLLM exact temperature bounds:** neither publishes a numeric range (OpenAI-compatible passthrough); 0–2 judged sane by convention rather than verified. -- **vLLM upper temperature bound:** vLLM accepts temperatures above 2; the 0–2 cap is a UI choice, not a provider-documented limit. diff --git a/docs/models/google.md b/docs/models/google.md deleted file mode 100644 index 215ea82402..0000000000 --- a/docs/models/google.md +++ /dev/null @@ -1,184 +0,0 @@ -# Google Provider Model Validation — Final Pass - -- **Date:** 2026-06-11 -- **Scope:** `google` block in `apps/sim/providers/models.ts` (10 models), re-verifying everything including changes landed in PR #4990 -- **Method:** Live WebFetch of ai.google.dev (models overview, per-model pages, pricing, thinking, deprecations, changelog, generate-content API reference) and cloud.google.com Vertex AI pricing; OpenRouter as secondary pricing source; WebSearch for GA dates. Google docs treated as authoritative where sources conflict. -- **Primary sources:** - - https://ai.google.dev/gemini-api/docs/models (+ per-model pages) - - https://ai.google.dev/gemini-api/docs/pricing - - https://ai.google.dev/gemini-api/docs/thinking - - https://ai.google.dev/gemini-api/docs/deprecations - - https://ai.google.dev/gemini-api/docs/changelog - - https://ai.google.dev/gemini-api/docs/interactions/deep-research - - https://ai.google.dev/api/generate-content (GenerationConfig) - - https://cloud.google.com/vertex-ai/generative-ai/pricing ("Gemini Deep Research Agent" row) - - OpenRouter model pages (secondary pricing) - -## Provider-level checks - -| Check | Result | -|---|---| -| Capability consumption in `apps/sim/providers/gemini/` | Only `thinking` is consumed: `request.thinkingLevel` → `mapToThinkingLevel` → `thinkingConfig` (`gemini/core.ts:955-961`). No references to `reasoningEffort`, `verbosity`, `nativeStructuredOutputs`, or `computerUse`. Declaring `thinking.levels`/`default` per model is the only capability surface that affects requests. | -| `temperature: { min: 0, max: 2 }` | **Verified.** GenerationConfig documents temperature range [0.0, 2.0] (https://ai.google.dev/api/generate-content). Note Google recommends keeping 1.0 default on Gemini 3 models, but 0–2 is the accepted API range. Verdict: correct on all entries. | -| 2.5-series entries have no `thinking` capability | **Correct by design.** Gemini 2.5 uses `thinkingBudget`, not `thinkingLevel` (https://ai.google.dev/gemini-api/docs/thinking). Our provider only sends `thinkingConfig` when a level is selected, so omitting `thinking` on 2.5 entries is right. | - -## Per-model verification - -### gemini-3.5-flash - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| id | `gemini-3.5-flash` (stable/GA) | docs/models, model page | OK | -| pricing.input | 1.5 | docs/pricing ($1.50); Vertex ($1.50 global); OpenRouter ($1.50) | OK | -| pricing.cachedInput | 0.15 | docs/pricing ($0.15); Vertex ($0.15) | OK | -| pricing.output | 9.0 | docs/pricing ($9.00); Vertex ($9.00); OpenRouter ($9.00) | OK | -| thinking.levels | minimal/low/medium/high | docs/thinking | OK | -| thinking.default | medium | docs/thinking ("Default: medium"); OpenRouter ("defaults to medium thinking effort") | OK | -| maxOutputTokens | 65536 | model page (65,536) | OK | -| contextWindow | 1048576 | model page (1,048,576) | OK | -| releaseDate | 2026-05-19 | changelog: "May 19, 2026 — Released `gemini-3.5-flash`, the generally available (GA) version" | OK | -| recommended | true | Google's flagship recommendation; replacement target for 2.0-flash and 3-flash-preview | OK | - -### gemini-3.1-pro-preview - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| id | `gemini-3.1-pro-preview` | docs/models, model page | OK | -| pricing.input | 2.0 | docs/pricing ($2.00 ≤200k; $4.00 >200k); OpenRouter ($2) | OK (base tier; see "Deliberately not changed") | -| pricing.cachedInput | 0.2 | docs/pricing ($0.20 ≤200k) | OK | -| pricing.output | 12.0 | docs/pricing ($12.00 ≤200k; $18.00 >200k); OpenRouter ($12) | OK | -| thinking.levels | low/medium/high (no minimal — PR #4990 change) | docs/thinking: "Supported levels: low, medium, high"; "Thinking cannot be disabled" | OK — #4990 change re-confirmed | -| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK | -| maxOutputTokens | 65536 | model page | OK | -| contextWindow | 1048576 | model page (1,048,576) | OK | -| releaseDate | 2026-02-19 | changelog: "Feb 19, 2026 — Released Gemini 3.1 Pro Preview" | OK | - -### gemini-3.1-flash-lite - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| id | `gemini-3.1-flash-lite` (stable — PR #4990 rename) | docs/models lists stable; `gemini-3.1-flash-lite-preview` marked "Shut down" (May 25, 2026 per deprecations) | OK — rename re-confirmed | -| pricing.input | 0.25 | docs/pricing ($0.25 text); Vertex ($0.25 global); OpenRouter ($0.25) | OK | -| pricing.cachedInput | 0.025 | docs/pricing ($0.025); Vertex ($0.025) | OK | -| pricing.output | 1.5 | docs/pricing ($1.50); Vertex ($1.50); OpenRouter ($1.50) | OK | -| thinking.levels | minimal/low/medium/high | docs/thinking; OpenRouter ("full thinking levels (minimal, low, medium, high)") | OK | -| thinking.default | minimal | docs/thinking: "Default: minimal" — Google's documented API default for this model **is** `minimal`, so our value matches the API default (the earlier report that the API default is 'high' is not supported by current docs). Also aligns with our cost-saving intent. | OK | -| maxOutputTokens | 65536 | model page (65,536) | OK | -| contextWindow | 1048576 | model page (1,048,576) | OK | -| releaseDate | **2026-03-03 — STALE.** That is the preview's release date. GA changelog: "May 7, 2026 — Released `gemini-3.1-flash-lite`, the generally available (GA) version"; Google Cloud blog GA announcement published 2026-05-08. Changelog (Gemini API source of truth) wins. | changelog; cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | **FIX → 2026-05-07** | -| speedOptimized | (absent) | Model page: "optimized for low-latency, cost-effective" high-volume tasks; Google blog: "fastest and most cost-efficient Gemini 3 series model". Precedent: `gemini-2.5-flash-lite` carries `speedOptimized: true` and Google's models page calls 2.5-flash-lite "the fastest and most budget-friendly" of its generation — 3.1-flash-lite holds the same position in the Gemini 3 generation. | **FIX → add `speedOptimized: true`** | - -### gemini-3-flash-preview - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| id | `gemini-3-flash-preview` | docs/models, model page | OK | -| pricing.input | 0.5 | docs/pricing ($0.50 text); OpenRouter ($0.50) | OK | -| pricing.cachedInput | 0.05 | docs/pricing ($0.05) | OK | -| pricing.output | 3.0 | docs/pricing ($3.00); OpenRouter ($3.00) | OK | -| thinking.levels | minimal/low/medium/high | docs/thinking | OK | -| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK | -| maxOutputTokens | 65536 | model page | OK | -| contextWindow | 1048576 (PR #4990 change) | model page (1,048,576); OpenRouter (1M) | OK — #4990 change re-confirmed | -| releaseDate | 2025-12-17 | changelog: "Dec 17, 2025 — Launched Gemini 3 Flash Preview"; OpenRouter | OK | -| deprecated | (absent) | docs/deprecations lists `gemini-3-flash-preview` in the deprecation table with recommended replacement `gemini-3.5-flash`, **no shutdown date announced yet**. (The model's own page still renders as an active preview — the deprecations table is the authoritative lifecycle source.) | **FIX → add `deprecated: true`** | - -### gemini-2.5-pro - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| pricing.input | 1.25 | docs/pricing ($1.25 ≤200k); OpenRouter ($1.25) | OK (base tier) | -| pricing.cachedInput | 0.125 | docs/pricing ($0.125 ≤200k) | OK | -| pricing.output | 10.0 | docs/pricing ($10.00 ≤200k); OpenRouter ($10) | OK | -| maxOutputTokens | 65536 | longstanding model-page value | OK | -| contextWindow | 1048576 | OpenRouter (1M); longstanding model-page value | OK | -| releaseDate | 2025-03-25 | preview launch date (GA was 2025-06-17); repo convention uses first availability | OK | - -### gemini-2.5-flash - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| pricing.input | 0.3 | docs/pricing ($0.30 text) | OK | -| pricing.cachedInput | 0.03 | docs/pricing ($0.03) | OK | -| pricing.output | 2.5 | docs/pricing ($2.50) | OK | -| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK | -| releaseDate | 2025-05-20 | I/O 2025 preview launch | OK | - -### gemini-2.5-flash-lite - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| pricing.input | 0.1 | docs/pricing ($0.10 text) | OK | -| pricing.cachedInput | 0.01 | docs/pricing ($0.01) | OK | -| pricing.output | 0.4 | docs/pricing ($0.40) | OK | -| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK | -| releaseDate | 2025-06-17 | launch announcement | OK | -| speedOptimized | true | docs/models: "fastest and most budget-friendly multimodal model" | OK | - -### gemini-2.0-flash (deprecated) - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026; changelog: "now shut down"; docs/pricing marks "(deprecated; shutdown June 1, 2026)". Replacement: gemini-3.5-flash. | OK — #4990 change re-confirmed. Entry retained intentionally for saved-workflow history. | -| pricing | input 0.1 / cachedInput 0.025 / output 0.4 | docs/pricing (still published) | OK | -| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK | -| releaseDate | 2025-02-05 | GA announcement | OK | - -### gemini-2.0-flash-lite (deprecated) - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026. Replacement: gemini-3.1-flash-lite. | OK — re-confirmed; retained for history | -| pricing | input 0.075 / output 0.3 (no cachedInput — caching was never priced for this SKU) | docs/pricing | OK | -| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK | -| releaseDate | 2025-02-25 | GA announcement | OK | - -### deep-research-pro-preview-12-2025 - -| Field | Our value | Source | Verdict | -|---|---|---|---| -| id | `deep-research-pro-preview-12-2025` | model page https://ai.google.dev/gemini-api/docs/models/deep-research-pro-preview-12-2025 (Interactions API) | OK | -| pricing.input | 2.0 (PR #4990) | Vertex AI pricing, "Gemini Deep Research Agent": $2/1M input | OK — re-confirmed | -| pricing.cachedInput | 0.2 (PR #4990) | Vertex AI pricing: $0.2/1M cached input | OK — re-confirmed | -| pricing.output | 12.0 (PR #4990, was 2.0) | Vertex AI pricing: $12/1M output (response and reasoning). Consistent with underlying Gemini 3.1 Pro rates ($2/$0.2/$12). | OK — re-confirmed | -| capabilities | deepResearch: true, memory: false | model page (agentic researcher; Interactions API) | OK | -| maxOutputTokens | 65536 | model page (65,536) | OK | -| contextWindow | 1048576 (PR #4990) | model page (1,048,576) | OK — re-confirmed | -| releaseDate | 2025-12-11 | model page only says "December 2025"; exact day not published in fetched docs | Unverifiable to the day; month consistent — keep | -| Lifecycle | Not listed on docs/deprecations; no shutdown announced | docs/deprecations | OK to keep | - -**Recommendation (documented only, no entries added):** Google introduced `deep-research-preview-04-2026` and `deep-research-max-preview-04-2026` on 2026-04-21 (changelog; https://ai.google.dev/gemini-api/docs/interactions/deep-research). The Deep Research interactions doc now leads with these SKUs and prices them per-task (~$1–3 / ~$3–7). A follow-up should evaluate adding them once per-token pricing is published; `deep-research-pro-preview-12-2025` remains documented and un-deprecated, so no change now. - -## Changes made in this pass - -None to `models.ts` (per task rules — fix list reported separately). This document is the only artifact. - -## Re-confirmed PR #4990 changes - -1. `gemini-3.1-flash-lite-preview` → `gemini-3.1-flash-lite` rename — preview slug shut down 2026-05-25 (deprecations page); stable listed on docs/models. -2. `gemini-3.1-pro-preview` thinking.levels without `minimal` — docs/thinking lists low/medium/high only; "thinking cannot be disabled". -3. `gemini-3-flash-preview` contextWindow 1048576 — model page. -4. `deprecated: true` on gemini-2.0-flash and gemini-2.0-flash-lite — shut down 2026-06-01 (deprecations + changelog). -5. Deep Research output 12.0, cachedInput 0.2, contextWindow 1048576 — Vertex pricing row + model page. - -## Recommended fixes (not applied) - -1. `gemini-3.1-flash-lite`: `releaseDate` `2026-03-03` → `2026-05-07` — current value is the preview's release date; GA released May 7, 2026 per Gemini API changelog (Cloud blog announcement published May 8, 2026; changelog wins as the API source of truth). -2. `gemini-3.1-flash-lite`: add `speedOptimized: true` — Google positions it as the fastest, most cost-efficient Gemini 3 model (model page, GA blog); matches the precedent set by `gemini-2.5-flash-lite`. -3. `gemini-3-flash-preview`: add `deprecated: true` — formally listed on https://ai.google.dev/gemini-api/docs/deprecations with replacement `gemini-3.5-flash` (no shutdown date announced yet). - -## Deliberately not changed - -- **`gemini-3.1-flash-lite` thinking.default `minimal`** — matches Google's documented default for this model (docs/thinking: "Default: minimal") and is also our intended cost-saving default. No conflict. -- **Tiered pricing (`gemini-3.1-pro-preview`, `gemini-2.5-pro`)** — we model the ≤200k-token base tier ($2/$12 and $1.25/$10). The >200k tier ($4/$18 and $2.50/$15) is not representable in the flat pricing schema; base tier is the established repo convention. -- **Audio input pricing** (flash models have higher audio-input rates, e.g. 3.1-flash-lite $0.50 audio) — schema models text-input pricing only; convention. -- **gemini-2.0-flash / -flash-lite entries kept despite shutdown** — `deprecated: true` retained instead of deletion so saved workflows referencing them keep rendering history correctly. -- **Deep Research newer SKUs not added** — per-task preview pricing only; documented as a follow-up recommendation above. -- **`gemini-2.5-pro` releaseDate 2025-03-25** — preview-launch date; repo convention is first availability, not GA (2025-06-17). -- **`updatedAt: 2026-06-11`** on all entries — accurate as of this validation. - -## Unverifiable - -- **deep-research-pro-preview-12-2025 exact release day (2025-12-11)** — Google docs only state "December 2025"; the day-level value could not be confirmed or refuted. Month consistent; left as-is. -- **2.5-series maxOutputTokens (65,536) and 2.0-series limits (8,192 / 1,048,576)** — not re-fetched per-model in this pass; values match longstanding Google model-page specs and were unchanged by PR #4990. OpenRouter corroborates 1M context for 2.5-pro. -- **Gemini API pricing page for Deep Research** — the ai.google.dev pricing page does not list the 12-2025 SKU (it now points at the 04-2026 per-task estimates); per-token verification rests on the Vertex AI "Gemini Deep Research Agent" row alone (single — but official Google — source). diff --git a/docs/models/groq.md b/docs/models/groq.md deleted file mode 100644 index b4bc6905d4..0000000000 --- a/docs/models/groq.md +++ /dev/null @@ -1,157 +0,0 @@ -# Groq Provider Validation — Final Pass - -**Date:** 2026-06-11 -**Scope:** `groq` provider block in `apps/sim/providers/models.ts` (8 models). Re-verifies everything, including the changes landed in PR #4990 (kimi `deprecated: true`, gpt-oss `cachedInput`, `updatedAt` bumps). - -## Sources & Method - -| Source | What it verified | -|---|---| -| `https://api.groq.com/openai/v1/models` (live, authenticated with local dev key) | Active model list, `context_window`, `max_completion_tokens`, `created` timestamps. Groq's own per-model doc pages render their spec tables client-side from this same data ("Loading model information..." in static HTML), so the API is the authoritative equivalent of the per-model pages. | -| `https://groq.com/pricing` (live fetch) | All input/cached-input/output rates | -| `https://console.groq.com/docs/prompt-caching` (live fetch) | Caching-supported model list, 50% cached-token discount | -| `https://console.groq.com/docs/deprecations` (live fetch) | kimi shutdown, qwen3-32b status | -| `https://console.groq.com/docs/models` + per-model `.md` pages (live fetch) | Featured/flagship positioning, context-window prose, model-card positioning | -| Groq OpenAPI spec embedded in `console.groq.com/docs/model/*` HTML | `temperature` parameter bounds (`minimum: 0, maximum: 2`) | -| OpenRouter `GET /api/v1/models//endpoints` Groq rows (secondary) | Pricing cross-check, `max_completion_tokens` cross-check | -| WebSearch (Meta blog coverage, Moonshot K2-0905 announcement coverage) | Upstream release dates | - -Rule applied: where Groq's own sources conflict with secondary sources, Groq wins. - -## Per-Model Verification - -### groq/openai/gpt-oss-120b - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.15 | $0.15/M | groq.com/pricing; OpenRouter Groq row 0.00000015 | OK | -| pricing.cachedInput | 0.075 | $0.075/M | groq.com/pricing (explicit cached column); prompt-caching doc 50% rule; OpenRouter 0.000000075 | OK (PR #4990 change confirmed) | -| pricing.output | 0.6 | $0.60/M | groq.com/pricing; OpenRouter | OK | -| contextWindow | 131072 | 131072 | api.groq.com/openai/v1/models; model card "131K context window" | OK | -| capabilities.maxOutputTokens | — (absent) | 65536 | api.groq.com/openai/v1/models `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | -| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754408224 → 2025-08-05 UTC | OK | -| recommended | — (absent) | should be `true` | console.groq.com/docs/models features it as "OpenAI's flagship open-weight language model" (~500 t/s); deprecations page names `openai/gpt-oss-120b` as the recommended replacement (incl. for kimi-k2-instruct-0905) | **FIX: add `recommended: true`** | -| deprecated | — | active | live API `active: true`; not on deprecations page | OK | - -### groq/openai/gpt-oss-20b - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.075 | $0.075/M | groq.com/pricing; OpenRouter | OK | -| pricing.cachedInput | 0.0375 | $0.0375/M | groq.com/pricing (explicit); OpenRouter 0.0000000375 | OK (PR #4990 confirmed) | -| pricing.output | 0.3 | $0.30/M | groq.com/pricing; OpenRouter | OK | -| contextWindow | 131072 | 131072 | Groq API; model card "up to 131K" | OK | -| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | -| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754407957 → 2025-08-05 UTC | OK | -| deprecated | — | active | live API; deprecations page | OK | - -### groq/openai/gpt-oss-safeguard-20b - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.075 | $0.075/M | groq.com/pricing | OK | -| pricing.cachedInput | 0.0375 | $0.0375/M | prompt-caching doc lists this model as caching-supported with "50% discount for cached input tokens" → 0.075 × 0.5 = 0.0375. Pricing page shows no cached column for this row; OpenRouter shows $0.037/M (rounding). Groq's caching doc wins. | OK (PR #4990 confirmed) | -| pricing.output | 0.3 | $0.30/M | groq.com/pricing | OK | -| contextWindow | 131072 | 131072 | Groq API | OK | -| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** | -| releaseDate | 2025-10-29 | 2025-10-29 | Groq API `created` = 1761708789 → 2025-10-29 UTC | OK | -| deprecated | — | active | live API; deprecations page | OK | - -### groq/qwen/qwen3-32b - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.29 | $0.29/M | groq.com/pricing; OpenRouter | OK | -| pricing.cachedInput | — | none on Groq | Not in prompt-caching supported list (gpt-oss ×3 only); no cached column on pricing page. OpenRouter shows a 50% `input_cache_read` ($0.145) — Groq docs win; do not add. | OK (absent) | -| pricing.output | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK | -| contextWindow | 131072 | 131072 | Groq API | OK | -| capabilities.maxOutputTokens | — | 40960 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 40960** | -| releaseDate | 2025-04-29 | 2025-04-29 | Upstream Qwen3 family launch (field is "first publicly released"). Groq endpoint `created` is 2025-05-28 (when Groq added it) — repo convention uses upstream release. | OK | -| deprecated | — | **not deprecated** | `active: true` in live API; absent from deprecations page (appears there only as a *replacement* for mistral-saba-24b / qwen-qwq-32b) | OK — confirmed still active (open question f) | - -### groq/llama-3.1-8b-instant - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.05 | $0.05/M | groq.com/pricing; OpenRouter | OK | -| pricing.output | 0.08 | $0.08/M | groq.com/pricing; OpenRouter | OK | -| pricing.cachedInput | — | none on Groq | Not in caching-supported list; no cached column on pricing page (OpenRouter's $0.025 row not honored — Groq wins) | OK (absent) | -| contextWindow | 131072 | 131072 | Groq API | OK | -| capabilities.maxOutputTokens | — | 131072 | Groq API `max_completion_tokens` = 131072 (full window); OpenRouter agrees | **FIX: add 131072** | -| releaseDate | 2024-07-23 | 2024-07-23 | Meta released Llama 3.1 (8B/70B/405B) on 2024-07-23 (ai.meta.com/blog/meta-llama-3-1, press coverage dated 2024-07-23). Groq API `created` (2023-09-03) is a placeholder shared with whisper entries and predates Llama 3.1 — not meaningful. | OK — verified (open question g) | -| speedOptimized | — (absent) | should be `true` | Groq's speed-tier "-instant" naming; model card positions it for "Real-Time Applications … requiring instant responses and high throughput"; cheapest text model in the lineup. Matches repo precedent (claude-3-haiku, gemini-2.0-flash). | **FIX: add `speedOptimized: true`** | -| deprecated | — | active | live API; deprecations page (it is a replacement target, not deprecated) | OK | - -### groq/llama-3.3-70b-versatile - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK | -| pricing.output | 0.79 | $0.79/M | groq.com/pricing; OpenRouter | OK | -| contextWindow | 131072 | 131072 | Groq API | OK | -| capabilities.maxOutputTokens | — | 32768 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 32768** | -| releaseDate | 2024-12-06 | 2024-12-06 | Groq API `created` = 1733447754 → 2024-12-06 UTC, matching Meta's Llama 3.3 launch day | OK — verified (open question g) | -| deprecated | — | active | live API; deprecations page (replacement target for several retired models) | OK | - -### groq/meta-llama/llama-4-scout-17b-16e-instruct - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing.input | 0.11 | $0.11/M | groq.com/pricing; OpenRouter | OK | -| pricing.output | 0.34 | $0.34/M | groq.com/pricing; OpenRouter | OK | -| contextWindow | 131072 | 131072 | Groq API | OK | -| capabilities.maxOutputTokens | — | 8192 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 8192** | -| releaseDate | 2025-04-05 | 2025-04-05 | Groq API `created` = 1743874824 → 2025-04-05 UTC (Meta Llama 4 launch day) | OK | -| deprecated | — | active | live API; deprecations page | OK | - -### groq/moonshotai/kimi-k2-instruct-0905 - -| Field | Repo value | Verified value | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | shut down | console.groq.com/docs/deprecations: shutdown **04/15/26**, replacement `openai/gpt-oss-120b`; model entirely absent from the live `/v1/models` response | OK (PR #4990 change confirmed — open question regarding shutdown resolved) | -| pricing.input | 1.0 | $1.00/M | groq.com/pricing (row still present); OpenRouter | OK | -| pricing.output | 3.0 | $3.00/M | groq.com/pricing; OpenRouter | OK | -| pricing.cachedInput | — | conflicting | groq.com/pricing still shows $0.50 cached, but the prompt-caching doc's supported list contains only the 3 gpt-oss models, and the model is removed from the API. Conflicting Groq sources + shut-down model → not added (see "Deliberately not changed"). | OK (absent) | -| contextWindow | 262144 | 262144 | Moonshot K2-0905 announcement ("context length expanded from 128K to 256K"); Groq model card description "256K context"; OpenRouter Groq row 262144. Live Groq API no longer lists the model. | OK | -| capabilities.maxOutputTokens | — | 16384 (OpenRouter only) | Only source is OpenRouter; model is gone from Groq's API and its doc-page spec table cannot be rendered. Cannot confirm from Groq's own docs → **skipped** per validation rules. | Not added (unverifiable from Groq) | -| releaseDate | 2025-09-05 | 2025-09-05 | Moonshot AI announced K2-Instruct-0905 on September 5, 2025 (aibase coverage; simonwillison.net 2025-09-06; the `0905` suffix) | OK — verified (open question g) | -| pricing.updatedAt | 2026-04-01 | — | Prices re-checked today and unchanged; model is shut down, so no bump needed | OK | - -## Provider-Level Capability: temperature - -**Recommendation: add `temperature: { min: 0, max: 2 }` to the groq provider `capabilities`.** - -- Groq's OpenAPI spec (embedded in console.groq.com docs pages, chat-completions `temperature`): "What sampling temperature to use, between 0 and 2", `"minimum": 0, "maximum": 2`. -- `apps/sim/providers/groq/index.ts:82` already forwards it: `if (request.temperature !== undefined) payload.temperature = request.temperature` — so the only thing missing is the capability flag; today Sim hides the temperature slider for every Groq model while the provider would happily accept the value. -- Precedent: `fireworks` (models.ts:97), `together` (models.ts:113), and `baseten` (models.ts:129) all declare `temperature: { min: 0, max: 2 }` at the provider level for the same OpenAI-compatible 0–2 range. - -**Test impact** (`apps/sim/providers/utils.test.ts`): -- ~line 214: `'groq/meta-llama/llama-4-scout-17b-16e-instruct'` must be removed from the `unsupportedModels` list in the `supportsTemperature` → false test (it will now return `true`; move it to the supported list). -- ~line 288: `expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()` must change to expect `2` (move into the "range 0-2" group). - -## Changes made in this pass - -None to `models.ts` (per instructions — doc only). The fix list below is the recommended diff. - -1. `groq` provider capabilities: add `temperature: { min: 0, max: 2 }` (+ update the two utils.test.ts assertions above). -2. `groq/openai/gpt-oss-120b`: `capabilities: {}` → `capabilities: { maxOutputTokens: 65536 }`; add `recommended: true`. -3. `groq/openai/gpt-oss-20b`: add `maxOutputTokens: 65536`. -4. `groq/openai/gpt-oss-safeguard-20b`: add `maxOutputTokens: 65536`. -5. `groq/qwen/qwen3-32b`: add `maxOutputTokens: 40960`. -6. `groq/llama-3.1-8b-instant`: add `maxOutputTokens: 131072`; add `speedOptimized: true`. -7. `groq/llama-3.3-70b-versatile`: add `maxOutputTokens: 32768`. -8. `groq/meta-llama/llama-4-scout-17b-16e-instruct`: add `maxOutputTokens: 8192`. - -## Deliberately not changed - -- **kimi-k2-instruct-0905 `cachedInput`**: groq.com/pricing still shows $0.50 cached, but the canonical prompt-caching doc's supported-model list is exactly the three gpt-oss models, and the model is shut down (absent from the live API since the 2026-04-15 shutdown). Conflicting Groq sources for a decommissioned model — adding a cached rate would be dead config. Reconciliation: the pricing-page row is residual for a removed model; the caching doc never listed kimi. -- **kimi-k2-instruct-0905 `maxOutputTokens`**: 16384 is OpenRouter-only; cannot be confirmed from Groq's own docs/API (model removed). Skipped per validation rules. -- **`cachedInput` on qwen3-32b / llama-3.1-8b-instant**: OpenRouter's Groq endpoints advertise 50% `input_cache_read` rates, but Groq's prompt-caching doc explicitly limits caching support to the three gpt-oss models and the pricing page shows no cached column for them. Groq docs win. Re-check if Groq's promised caching rollout ("more models soon") lands. -- **All pricing, contextWindow, releaseDate values**: verified correct as-is (including all PR #4990 changes — kimi `deprecated: true`, the three gpt-oss `cachedInput` rates, and `updatedAt: '2026-06-11'` bumps). -- **kimi `pricing.updatedAt: '2026-04-01'`**: prices unchanged and model shut down; no bump needed. -- **`defaultModel: 'groq/llama-3.3-70b-versatile'`**: still active and reasonable; changing the default is a product decision, not a validation finding. - -## Unverifiable - -- **kimi-k2-instruct-0905 `maxOutputTokens` (16384)** — Groq removed the model from its API and the doc page's spec table no longer renders; only OpenRouter attests it. -- Nothing else: every other field was confirmed against at least one Groq-owned source (live `/v1/models` API, groq.com/pricing, prompt-caching doc, deprecations doc, or embedded OpenAPI spec), with OpenRouter as a corroborating secondary on pricing and token limits. diff --git a/docs/models/mistral.md b/docs/models/mistral.md deleted file mode 100644 index 26b236e099..0000000000 --- a/docs/models/mistral.md +++ /dev/null @@ -1,305 +0,0 @@ -# Mistral Provider Validation — Final Pass - -- **Date:** 2026-06-11 -- **Scope:** All 27 entries of the `mistral` provider block in `apps/sim/providers/models.ts` (lines ~2124–2501), re-verifying everything including the changes landed in PR #4990 (7 deprecations, 8 releaseDate fixes, updatedAt bumps). -- **Method:** Live fetches of Mistral docs (model overview, model cards, pricing page, prompt-caching guide), direct download + grep of the canonical OpenAPI spec, and — decisively — the **server-side model-card source data** in `mistralai/platform-docs-public` (`src/schema/models/models/*.ts`, shallow-cloned at `main` on 2026-06-11). These TypeScript data files are what docs.mistral.ai renders into the model cards, and they carry `apiNames` (alias mappings), prices, context lengths, release dates, and `deprecationDate`/`retirementDate` metadata that the rendered pages omit. OpenRouter used as the secondary pricing source. - -## Sources - -| Source | URL | -|---|---| -| Models overview | https://docs.mistral.ai/getting-started/models/models_overview | -| Pricing page | https://mistral.ai/pricing | -| Model cards | https://docs.mistral.ai/models/model-cards/<slug> (slugs cited per model below) | -| Model-card source data (authoritative) | https://github.com/mistralai/platform-docs-public — `src/schema/models/models/*.ts` @ `main`, 2026-06-11 | -| OpenAPI spec | https://raw.githubusercontent.com/mistralai/platform-docs-public/main/openapi.yaml | -| Prompt caching guide | https://docs.mistral.ai/studio-api/conversations/advanced/prompt-caching | -| OpenRouter (secondary pricing) | https://openrouter.ai/mistralai/<slug> | - -Below, "data file" = the model's source file in `src/schema/models/models/`. - ---- - -## Per-model verification - -### mistral-large-latest / mistral-large-2512 (Mistral Large 3, 25.12) - -Data file: `mistral-large-3-25-12.ts`. Model card: `/models/model-cards/mistral-large-3-25-12`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 per 1M | Data file, model card, pricing page ("Mistral Large 3: $0.5 / $1.5"), OpenRouter `mistral-large-2512` ($0.50/$1.50) | ✓ | -| contextWindow | 256000 | 256k | Data file `contextLength: '256k'`; OpenRouter shows 262K (same window, binary units) | ✓ | -| releaseDate | 2025-12-02 | 2025-12-02 | Data file `releaseDate: '2025-12-02'` | ✓ | -| alias | latest → 2512 | `apiNames: ['mistral-large-2512', 'mistral-large-latest']` | Data file | ✓ | -| status | active | `status: 'Active'` | Data file | ✓ | -| temperature | {0, 1} | spec allows {0, **1.5**} | OpenAPI `ChatCompletionRequest.temperature` | ✗ see Changes | -| recommended | (absent) | provider default, flagship | — | ✗ see Changes | - -Note: an initial pricing-page fetch summarized Large 3 as $2/$6; a verbatim re-fetch showed that was a summarization error — the literal row is "$0.5 / $1.5 /M tokens". $2/$6 is the legacy mistral-large-2411 price. - -### mistral-small-2603 / mistral-small-latest (Mistral Small 4, 26.03) — CONFLICT RULING - -Data file: `mistral-small-4-0-26-03.ts`. Model card: `/models/model-cards/mistral-small-4-0-26-03`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.15 / 0.6 | **$0.15 / $0.6** (ruling below) | Data file (`price: 0.15` / `price: 0.6`), model card, OpenRouter `mistral-small-2603` ($0.15/$0.60) | ✓ KEEP | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2026-03-16 | 2026-03-16 | Data file | ✓ | -| alias | latest → 2603 | `apiNames: ['mistral-small-2603', 'mistral-small-latest']` | Data file | ✓ | -| status | active | `status: 'Active'` | Data file | ✓ | - -**Ruling on the open price conflict (question a):** mistral.ai/pricing again printed "$0.1 / $0.3" for Mistral Small 4 (verbatim re-fetch, third consistent reading). But three independent confirmations say $0.15/$0.6: (1) the model card, (2) the model-card **source data file** that drives docs billing-side documentation, and (3) OpenRouter's Mistral endpoint, which mirrors what Mistral actually charges resellers. $0.1/$0.3 is exactly the price of the predecessor Mistral Small 3.2 (`mistral-small-2506`, verified below), so the pricing-page row is almost certainly a stale carry-over from Small 3.x, not a price cut. **Final value: 0.15 / 0.6 — no change.** Re-check if the pricing page row persists alongside an official price-cut announcement. - -### devstral-2512 / devstral-latest (Devstral 2, 25.12) - -Data file: `devstral-2-25-12.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.4 / 2.0 | $0.4 / $2 | Data file, pricing page ("Devstral 2: $0.4 / $2"), OpenRouter `devstral-2512` ($0.40/$2.00) | ✓ | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2025-12-09 | 2025-12-09 | Data file | ✓ | -| alias | devstral-latest → 2512 | `apiNames: ['devstral-2512', 'devstral-latest', 'devstral-medium-latest']` | Data file | ✓ (note: `devstral-medium-latest` is a third alias we don't list — fine) | -| status | active | `status: 'Active'` | Data file | ✓ | - -### mistral-large-2411 (deprecated) - -Data file: `mistral-large-2-1-24-11.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 2.0 / 6.0 | $2.0 / $6.0 | Data file (previously unverifiable — now confirmed) | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2024-11-18 | 2024-11-18 | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | - -### magistral-medium-latest / magistral-medium-2509 - -Data file: `magistral-medium-1-2-25-09.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 2.0 / 5.0 | $2.0 / $5.0 | Data file, pricing page ("Magistral Medium: $2 / $5") | ✓ (OpenRouter: not listed — single-family source) | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ | -| alias | latest → 2509 | `apiNames: ['magistral-medium-2509', 'magistral-medium-latest']` | Data file | ✓ | -| status | active | `status: 'Active'` | Data file | ✓ | - -Note: Magistral is a reasoning model (`output: ['reasoning', 'text']`); see "Deliberately not changed" re `reasoning_effort`. - -### magistral-small-latest / magistral-small-2509 (deprecated) - -Data file: `magistral-small-1-2-25-09.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 | Data file, pricing page | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ | -| alias | small-latest → 2509 | `apiNames: ['magistral-small-2509', 'magistral-small-latest']` | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31, replacement "Mistral Small 4" | Data file metadata | ✓ | - -### mistral-medium-latest / mistral-medium-2508 (Mistral Medium 3.1) - -Data file: `mistral-medium-3-1-25-08.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-08-12 | 2025-08-12 | Data file | ✓ | -| alias | latest → 2508 | `apiNames: ['mistral-medium-2508', 'mistral-medium-latest']` | Data file | ✓ — **`mistral-medium-latest` still maps to 2508, NOT to Medium 3.5** (3.5 has its own apiNames, see below) | -| status | active | `status: 'Active'` | Data file | ✓ | - -### mistral-medium-2505 (Mistral Medium 3) - -Data file: `mistral-medium-3-25-05.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-05-07 | 2025-05-07 | Data file | ✓ | -| status | active (no flag) | `status: 'Active'` — not deprecated despite age | Data file | ✓ | - -### mistral-small-2506 (Mistral Small 3.2, deprecated) - -Data file: `mistral-small-3-2-25-06.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-06-20 | 2025-06-20 | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31 | Data file metadata | ✓ | - -### open-mistral-nemo - -Data file: `mistral-nemo-12b-24-07.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page ("Mistral NeMo: $0.15 / $0.15") | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2024-07-18 | 2024-07-18 | Data file | ✓ | -| status | active (no flag) | `status: 'Active'` — still active | Data file | ✓ | - -### codestral-latest / codestral-2508 - -Data file: `codestral-25-08.ts`. Model card: `/models/model-cards/codestral-25-08`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.3 / 0.9 | $0.3 / $0.9 | Data file, model card, pricing page, OpenRouter `codestral-2508` ($0.30/$0.90) | ✓ | -| contextWindow | 128000 | 128k per Mistral docs (data file + live model card). OpenRouter claims 256K — **Mistral docs win**, keep 128000 | Data file, model card | ✓ | -| releaseDate | 2025-07-30 | 2025-07-30 | Data file | ✓ | -| alias | latest → 2508 | `apiNames: ['codestral-2508', 'codestral-latest']` | Data file | ✓ | -| status | active | `status: 'Active'` | Data file | ✓ | - -### devstral-small-latest (Devstral Small 2, 25.12, deprecated) - -Data file: `devstral-small-2-25-12.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file | ✓ | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2025-12-09 | 2025-12-09 | Data file (PR #4990 fix confirmed) | ✓ | -| alias | — | `apiNames: ['labs-devstral-small-2512', 'devstral-small-latest']` | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-03-31 (already retired), replacement "Devstral 2" | Data file metadata | ✓ | - -### devstral-small-2507 (deprecated) - -Data file: `devstral-small-1-1-25-07.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | - -### devstral-medium-2507 (deprecated) - -Data file: `devstral-medium-1-0-25-07.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file (previously unverifiable — now confirmed) | ✓ | -| contextWindow | 128000 | 128k | Data file | ✓ | -| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ | -| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ | - -### ministral-14b-latest / ministral-14b-2512 (Ministral 3 14B) - -Data file: `ministral-3-14b-25-12.ts`. Model card: `/models/model-cards/ministral-3-14b-25-12`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.2 / 0.2 | $0.2 / $0.2 | Data file, pricing page, OpenRouter `ministral-14b-2512` ($0.20/$0.20) | ✓ | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2025-12-02 | 2025-12-02 | Data file | ✓ | -| alias | latest → 2512 | `apiNames: ['ministral-14b-2512', 'ministral-14b-latest']` | Data file | ✓ | -| status | active | `status: 'Active'` | Data file | ✓ | -| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | - -### ministral-8b-latest / ministral-8b-2512 (Ministral 3 8B) - -Data file: `ministral-3-8b-25-12.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page | ✓ | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ | -| alias | latest → 2512 | `apiNames: ['ministral-8b-2512', 'ministral-8b-latest']` | Data file | ✓ | -| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | - -### ministral-3b-latest / ministral-3b-2512 (Ministral 3 3B) - -Data file: `ministral-3-3b-25-12.ts`. - -| Field | Ours | Verified value | Source | Verdict | -|---|---|---|---|---| -| pricing input/output | 0.1 / 0.1 | $0.1 / $0.1 | Data file, pricing page | ✓ | -| contextWindow | 256000 | 256k | Data file | ✓ | -| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ | -| alias | latest → 2512 | `apiNames: ['ministral-3b-2512', 'ministral-3b-latest']` | Data file | ✓ | -| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes | - ---- - -## Provider-wide checks - -### Temperature bounds (question e) — DISCREPANCY FOUND - -The live OpenAPI spec's `ChatCompletionRequest.temperature` (openapi.yaml, schema at line 11988, property at 11997) is: - -```yaml -temperature: - anyOf: - - type: number - maximum: 1.5 - minimum: 0 -``` - -with the description "we recommend between 0.0 and 0.7". So the chat-completions endpoint — the one Sim's provider calls (`https://api.mistral.ai/v1` + `chat.completions.create`) — accepts **0–1.5, not 0–1**. The 0–1 bound exists in the spec only on `CompletionArgs` (line ~8103), which is the **conversations/agents API**'s white-listed argument schema, not chat completions; that is likely where the earlier "max 1" belief came from. Verdict: our `{min: 0, max: 1}` is overly restrictive — users cannot select 1.0–1.5, which the API supports. Recommended fix: `max: 1.5` on all 27 entries. - -### Prompt caching (question b) — NOT WIRED, cachedInput NOT added - -- OpenAPI spec: `prompt_cache_key` exists on `ChatCompletionRequest` (line 12134), `FIMCompletionRequest` (12362), and `AgentsCompletionRequest` (13841): "A cache key to enable prompt caching. When provided, the API will attempt to reuse previously computed tokens... Cached tokens are billed at 10% of the standard input token price." -- Prompt-caching guide confirms caching is **opt-in**: "Set the same `prompt_cache_key` on requests that are likely to share a prefix"; 64-token block granularity; hits reported via `usage.prompt_tokens_details.cached_tokens`. -- Sim's provider (`apps/sim/providers/mistral/index.ts`) forwards only `temperature` and `max_tokens` (plus messages/tools/response_format). It does **not** send `prompt_cache_key`, so no Sim request can ever produce cached tokens. - -**Ruling: caching is opt-in, Sim does not opt in → adding `cachedInput` would be dead data. Not changed.** Recommended follow-up: wire `prompt_cache_key` in the Mistral provider (e.g. keyed per workflow execution/conversation), read `usage.prompt_tokens_details.cached_tokens`, then add `cachedInput = 0.1 × input` to all active entries (large 0.05, small 0.015, devstral 0.04, magistral-medium 0.2, medium 0.04, nemo 0.015, codestral 0.03, ministral-14b 0.02, ministral-8b 0.015, ministral-3b 0.01). - -### recommended / speedOptimized (question c) — BOTH JUSTIFIED - -- `recommended: true` on **mistral-large-latest**: it is the provider's `defaultModel`, Mistral's flagship generalist (Large 3), actively maintained, and the provider currently has zero recommended entries (every other major provider block marks its flagship). Justified. -- `speedOptimized: true` on the **ministral tier** (14b/8b/3b, `-latest` and `-2512`, 6 entries): Ministral 3 is Mistral's edge/low-latency family ("les Ministraux" — edge models), the smallest and cheapest tier, directly analogous to the existing `speedOptimized` entries in models.ts (gpt-5-mini-class at line ~369, Haiku at line ~853). Justified. - -### Alias map (question g) — ALL CONFIRMED - -| Alias | Expected | Data-file `apiNames` | Verdict | -|---|---|---|---| -| mistral-large-latest | mistral-large-2512 | ✓ | ✓ | -| mistral-small-latest | mistral-small-2603 | ✓ | ✓ | -| codestral-latest | codestral-2508 | ✓ | ✓ | -| devstral-latest | devstral-2512 | ✓ (also `devstral-medium-latest`) | ✓ | -| devstral-small-latest | labs-devstral-small-2512 (Devstral Small 2) | ✓ | ✓ | -| magistral-medium-latest | magistral-medium-2509 | ✓ | ✓ | -| magistral-small-latest | magistral-small-2509 | ✓ | ✓ | -| mistral-medium-latest | mistral-medium-2508 (NOT Medium 3.5) | ✓ | ✓ | -| ministral-14b/8b/3b-latest | ministral-*-2512 | ✓ | ✓ | - ---- - -## Changes made in this pass - -None to `models.ts` (per instructions, this pass writes only this document). The PR #4990 changes (7 deprecations, 8 releaseDate fixes) are all **confirmed correct** against the model-card source data. - -**Recommended fixes (the fix list):** - -1. `mistral-large-latest`: add `recommended: true` — provider default + flagship; provider has zero recommended entries. -2. `ministral-14b-latest`, `ministral-14b-2512`, `ministral-8b-latest`, `ministral-8b-2512`, `ministral-3b-latest`, `ministral-3b-2512`: add `speedOptimized: true` — edge/low-latency tier, consistent with gpt-mini/haiku precedent. -3. All 27 entries: `capabilities.temperature.max` 1 → **1.5** — OpenAPI `ChatCompletionRequest.temperature.maximum: 1.5`. (The 0–1 bound belongs to the conversations-API `CompletionArgs`, not chat completions. If the team prefers to cap the UI at Mistral's recommended sampling range instead of the API bound, keep 1 — but then document that choice; it does not match the endpoint Sim calls.) - -## Deliberately not changed - -- **mistral-small-2603 / mistral-small-latest pricing stays 0.15/0.6** — final ruling on the standing conflict: model card + model-card source data + OpenRouter all say $0.15/$0.6; only the marketing pricing page says $0.1/$0.3, which exactly equals the predecessor Small 3.2 price and is judged a stale row, not a price cut. -- **No `cachedInput` on any entry** — Mistral caching is opt-in via `prompt_cache_key` and Sim's provider does not send it; adding prices would be dead data. Requires provider wiring first (recommended follow-up above). -- **`mistral-medium-2505` left active** — `status: 'Active'` in source data, no deprecation metadata despite Medium 3.1/3.5 existing. -- **`open-mistral-nemo` left active** — still `status: 'Active'`. -- **codestral contextWindow stays 128000** — OpenRouter claims 256K but both the live model card and the source data say 128k; Mistral docs win. -- **`updatedAt: '2026-04-01'` left on deprecated entries** — their prices were verified unchanged; only active entries were bumped in PR #4990 and that remains coherent. -- **Reasoning params not wired** — spec exposes `reasoning_effort` (`high`/`none`) on `ChatCompletionRequest` (line 12119; `prompt_mode` is deprecated in its favor). Sim doesn't forward it, so no capabilities change; note for a future Magistral reasoning integration. -- **mistral-medium-3-5 NOT added in this pass** (documented as a recommended addition, question d): Mistral Medium 3.5 — `apiNames: ['mistral-medium-3-5', 'mistral-medium-3']`, released **2026-04-28**, **$1.5 / $7.5** per 1M (data file `mistral-medium-3-5-26-04.ts` + pricing page agree), **256k** context, Active, "frontier-class multimodal model optimized for agentic and coding". Matches existing `/^mistral/` modelPattern, so adding the entry is sufficient. Note its id does not follow the `-MMYY` convention — both apiNames could be listed if desired. - -## Unverifiable - -Nothing remains strictly unverifiable. The four previously-unverifiable legacy prices (mistral-large-2411 2.0/6.0, mistral-small-2506 0.1/0.3, devstral-small-2507 0.1/0.3, devstral-medium-2507 0.4/2.0) are now **confirmed** via the model-card source data files. Caveats: - -- `magistral-medium-2509` pricing has no independent second source (not listed on OpenRouter); verified only within the Mistral doc family (data file + pricing page, which agree). -- The Mistral Small 4 pricing-page row ($0.1/$0.3) remains in live contradiction with the model card; ruling above. Re-check on the next pass. diff --git a/docs/models/openai.md b/docs/models/openai.md deleted file mode 100644 index a1d81edb5b..0000000000 --- a/docs/models/openai.md +++ /dev/null @@ -1,338 +0,0 @@ -# OpenAI Provider Block — Final Validation & Justification - -**Validation date:** 2026-06-11 -**Scope:** `openai` provider block in `apps/sim/providers/models.ts` (23 models), including changes landed in PR #4990. -**Method:** Live WebFetch of every individual model page on `developers.openai.com/api/docs/models/`, the pricing page, the reasoning guide, the GPT-5.5 usage guide, the deprecations page, and the API reference; secondary pricing cross-checks against OpenRouter. All claims below were fetched live this session. Provider docs win over secondary sources. - -**Sources:** - -- Pricing: https://developers.openai.com/api/docs/pricing (only lists current gpt-5.5/5.4 families; per-model pricing taken from individual model pages) -- Model pages: `https://developers.openai.com/api/docs/models/` (fetched for all 23 ids) -- Reasoning guide: https://developers.openai.com/api/docs/guides/reasoning -- GPT-5.5 usage guide: https://developers.openai.com/api/docs/guides/latest-model -- Deprecations: https://developers.openai.com/api/docs/deprecations -- GPT-5.5 launch: https://openai.com/index/introducing-gpt-5-5/ (via search; release 2026-04-23, API availability 2026-04-24) -- Secondary pricing: https://openrouter.ai/openai/gpt-5.5, /gpt-5.5-pro, /gpt-5.4, /gpt-5.2, /o3, /gpt-4o — all consistent with provider docs - -**Flag consumption check** (`rg` over `apps/sim/providers/openai/`): `reasoningEffort` and `verbosity` are consumed in `apps/sim/providers/openai/core.ts` (sent as `reasoning.effort` / `text.verbosity` on the Responses API). `nativeStructuredOutputs` is NOT consumed by the provider runtime — its only consumer is the landing models page (`apps/sim/app/(landing)/models/utils.ts`), so it is display-only metadata. `thinking` / `computerUse` are not used by the OpenAI provider. - -Pricing is USD per 1M tokens throughout. "MP" = the model's own docs page (`developers.openai.com/api/docs/models/`). - ---- - -## Per-model verification - -### gpt-4.1 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 2.0 / 0.5 / 8.0 | MP gpt-4.1 | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ verified today | -| contextWindow | 1,047,576 | MP: "1,047,576 tokens" | ✓ verified | -| maxOutputTokens | 32,768 | MP | ✓ verified | -| temperature 0–2 | present | non-reasoning chat model; standard OpenAI sampling range | ✓ correct by convention (docs do not enumerate the range; 0–2 is the API-wide bound) | -| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-2025-04-14` | ✓ verified | -| deprecated | absent | deprecations page does not list gpt-4.1 base | ✓ verified active ("Default", "Smartest non-reasoning model") | - -### gpt-4.1-mini - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.4 / 0.1 / 1.6 | MP gpt-4.1-mini | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified | -| temperature 0–2 | present | convention (non-reasoning) | ✓ | -| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-mini-2025-04-14` | ✓ verified | -| deprecated | absent | not on deprecations page | ✓ verified | - -### gpt-4.1-nano - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.1 / 0.025 / 0.4 | MP gpt-4.1-nano | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified | -| temperature 0–2 | present | convention | ✓ | -| releaseDate | 2025-04-14 | MP (snapshot `gpt-4.1-nano-2025-04-14`, now marked deprecated) | ✓ verified | -| deprecated | **absent — should be `true`** | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-nano; MP also recommends "starting with GPT-5 nano" | **FIX: add `deprecated: true`** | - -### gpt-5.5-pro - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / output | 30.0 / 180.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | -| cachedInput | absent | MP: "GPT-5.5 Pro does not offer a cached input discount" | ✓ verified correct omission | -| updatedAt | **2026-04-23 — stale** | pricing re-verified 2026-06-11 this session | **FIX: bump to 2026-06-11** (PR #4990 claimed to bump all entries but missed this one) | -| contextWindow | 1,050,000 | MP: "1,050,000 context window" | ✓ verified | -| maxOutputTokens | 128,000 | MP | ✓ verified | -| nativeStructuredOutputs | true | MP: "Structured outputs: Supported" | ✓ verified (display-only flag) | -| reasoningEffort | **['none','low','medium','high','xhigh'] — wrong** | see Open Question (a) below | **FIX: change to `['medium','high','xhigh']`** | -| verbosity | **present — should be removed** | see Open Question (b) below | **FIX: remove `verbosity` block** | -| releaseDate | 2026-04-23 | MP snapshot `gpt-5.5-pro-2026-04-23` | ✓ verified | -| deprecated | absent | no deprecation notes on MP | ✓ verified | - -**Open Question (a) — resolved.** The gpt-5.5-pro model page does NOT enumerate reasoning effort values (fetched twice, explicitly asked for any sentence containing "effort" — the page contains no `reasoning.effort` enumeration). The reasoning guide says values are model-dependent and "check the relevant model page". Direct documentation for the siblings is explicit: gpt-5.4-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5.2-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5-pro MP — "defaults to (and only supports) reasoning.effort: high". Every pro-tier model that documents the parameter excludes `none` and `low` — the pro tier exists to "use more compute to think harder" (gpt-5.5-pro MP), making `none`/`low` incoherent with the product. The most defensible value set is **`['medium','high','xhigh']`**, matching both documented pro siblings. The current `['none','low','medium','high','xhigh']` appears copied from non-pro gpt-5.5 and is backed by no source. - -**Open Question (b) — resolved.** Not documented. The gpt-5.5-pro page does not mention `verbosity` (explicitly checked). No pro-tier model page (gpt-5.4-pro, gpt-5.2-pro, gpt-5-pro) documents verbosity, and the GPT-5.5 usage guide discusses `text.verbosity` only for gpt-5.5. Since `verbosity` is runtime-consumed (`core.ts` sends `text.verbosity` to the API), advertising it on a model that may reject it is a live failure risk. **Remove the verbosity block from gpt-5.5-pro.** - -### gpt-5.5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 5.0 / 0.5 / 30.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | -| updatedAt | **2026-04-23 — stale** | re-verified 2026-06-11 | **FIX: bump to 2026-06-11** (missed by PR #4990) | -| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | -| nativeStructuredOutputs | true | MP: structured outputs supported | ✓ verified | -| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none, low, medium (default), high and xhigh" | ✓ verified verbatim | -| verbosity ['low','medium','high'] | present | GPT-5.5 usage guide documents `text.verbosity` (recommends `low` for concise) | ✓ verified | -| releaseDate | 2026-04-23 | announcement 2026-04-23 (openai.com/index/introducing-gpt-5-5/, TechCrunch); pro sibling snapshot is `-2026-04-23` | ✓ verified (note: API availability was 2026-04-24; snapshot naming uses 04-23) | -| recommended | true | flagship per OpenAI ("latest GPT-5.5" is the recommended upgrade target on gpt-5.2/gpt-5/o3 pages) | ✓ intentional, docs-consistent | - -### gpt-5.4-pro - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / output | 30.0 / 180.0 | MP + pricing page | ✓ verified (note: MP — ">272K input tokens are priced at 2x input and 1.5x output"; the flat-rate model in `models.ts` cannot express this; under-bills long-context pro calls — pre-existing limitation, see Unverifiable/limitations) | -| cachedInput | absent | pricing page shows no cached rate for pro | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim | -| verbosity | absent | not documented for pro | ✓ correct omission | -| releaseDate | 2026-03-05 | gpt-5.4 snapshot `gpt-5.4-2026-03-05`; same launch | ✓ verified | -| deprecated | absent | none | ✓ | - -### gpt-5.4 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 2.5 / 0.25 / 15.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none (default), low, medium, high and xhigh" | ✓ verified verbatim | -| verbosity ['low','medium','high'] | present | not on MP; carried from GPT-5-line `text.verbosity` parameter (documented in usage guide / help center for the GPT-5 family) | ✓ kept — see "Deliberately not changed" | -| releaseDate | 2026-03-05 | MP snapshot `gpt-5.4-2026-03-05` | ✓ verified | - -### gpt-5.4-mini - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.75 / 0.075 / 4.5 | MP + pricing page | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['none','low','medium','high','xhigh'] | present | gpt-5.4 family per search-confirmed docs: "gpt-5.4, gpt-5.4-mini, and gpt-5.4-nano support none, low, medium, high, and xhigh" | ✓ verified | -| verbosity | present | family convention | ✓ kept | -| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-mini-2026-03-17` | ✓ verified | - -### gpt-5.4-nano - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.2 / 0.02 / 1.25 | MP + pricing page | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort / verbosity | as gpt-5.4-mini | same family docs | ✓ verified / kept | -| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-nano-2026-03-17` | ✓ verified | -| speedOptimized | true | MP: "cheapest GPT-5.4-class model", optimized for classification/extraction/sub-agents | ✓ intentional repo flag, consistent with docs | - -### gpt-5.2-pro - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / output | 21.0 / 168.0 | MP | ✓ verified | -| cachedInput | absent | MP shows none | ✓ | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim | -| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-pro-2025-12-11` | ✓ verified | -| deprecated | absent | MP recommends upgrading to gpt-5.5-pro but no shutdown date on deprecations page | ✓ verified (soft-superseded, not deprecated) | - -### gpt-5.2 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.75 / 0.175 / 14.0 | MP + OpenRouter | ✓ verified (two sources) | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "none (default), low, medium, high and xhigh" | ✓ verified verbatim | -| verbosity | present | family convention | ✓ kept | -| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-2025-12-11` | ✓ verified | -| deprecated | absent | superseded by 5.5 but no shutdown (only `gpt-5.2-chat-latest` has one) | ✓ verified | - -### gpt-5.1 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['none','low','medium','high'] | present | MP: "Reasoning.effort supports: none (default), low, medium, and high" (no xhigh) | ✓ verified verbatim | -| verbosity | present | family convention | ✓ kept | -| releaseDate | **2025-11-12** | MP snapshot is `gpt-5.1-2025-11-13` | **FIX: → 2025-11-13.** Repo convention everywhere else in this block is snapshot date (gpt-5-pro 10-06, gpt-5.2 12-11, gpt-4.1 04-14, o3-pro 06-10, …). 2025-11-12 is the announcement date; the API snapshot is 11-13 | - -### gpt-5-pro - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / output | 15.0 / 120.0 | MP | ✓ verified | -| cachedInput | absent | MP shows none | ✓ | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow | 400,000 | MP | ✓ verified | -| maxOutputTokens | 272,000 | MP: "272,000 max output tokens" | ✓ verified (yes, it really is larger than the rest of the family) | -| reasoningEffort ['high'] | present | MP: "defaults to (and only supports) `reasoning.effort: high`" | ✓ verified verbatim | -| releaseDate | 2025-10-06 | MP snapshot `gpt-5-pro-2025-10-06` | ✓ verified — **PR #4990's change confirmed correct** | -| deprecated | absent | none listed | ✓ | - -### gpt-5 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort ['minimal','low','medium','high'] | present | MP: "minimal, low, medium, and high"; reasoning guide confirms `minimal` introduced with GPT-5 | ✓ verified verbatim | -| verbosity | present | verbosity launched with GPT-5 | ✓ verified | -| releaseDate | 2025-08-07 | MP snapshot `gpt-5-2025-08-07` | ✓ verified | -| deprecated | absent | MP: "We recommend using the latest GPT-5.5" but no shutdown date — deprecations page: "not explicitly listed as deprecated" | ✓ verified (superseded, not deprecated) | - -### gpt-5-mini - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.25 / 0.025 / 2.0 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort / verbosity | gpt-5 family values | GPT-5 family launch docs | ✓ verified | -| releaseDate | 2025-08-07 | MP snapshot `gpt-5-mini-2025-08-07` | ✓ verified | - -### gpt-5-nano - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 0.05 / 0.005 / 0.4 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified | -| reasoningEffort / verbosity | gpt-5 family values | family docs | ✓ verified | -| releaseDate | 2025-08-07 | MP snapshot `gpt-5-nano-2025-08-07` | ✓ verified | - -### gpt-5-chat-latest - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified | -| temperature 0–2 | present | non-reasoning chat snapshot | ✓ convention | -| releaseDate | 2025-08-07 | GPT-5 launch snapshot | ✓ verified | -| deprecated | true | **deprecations page: shutdown 2026-07-23, replacement gpt-5.5** | ✓ verified — **PR #4990's change confirmed correct and now formally docs-backed** | - -### o4-mini - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.1 / 0.275 / 4.4 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | -| reasoningEffort ['low','medium','high'] | present | see Open Question (c) below | ✓ verified | -| releaseDate | 2025-04-16 | MP snapshot `o4-mini-2025-04-16` | ✓ verified | -| deprecated | true | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-mini; MP: snapshot Deprecated, "succeeded by GPT-5 mini" | ✓ verified — **PR #4990's change confirmed correct** | - -### o3-pro - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / output | 20.0 / 80.0 | MP | ✓ verified | -| cachedInput | absent | MP shows none | ✓ | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | -| reasoningEffort | absent | MP: "Reasoning: Highest", no effort enum documented (pro pattern: fixed high effort) | ✓ correct omission | -| releaseDate | 2025-06-10 | MP snapshot `o3-pro-2025-06-10` | ✓ verified | -| deprecated | absent | deprecations page does not list o3-pro (only o3/o3-mini) | ✓ verified — note the oddity that base o3 is scheduled for shutdown while o3-pro is not; evidence-based, leave as is | - -### o3 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 2 / 0.5 / 8 | MP + OpenRouter ($2/$8) | ✓ verified (two sources) | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | -| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified | -| releaseDate | 2025-04-16 | MP snapshot `o3-2025-04-16` | ✓ verified | -| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5-pro; MP: "superseded by GPT-5" | **FIX: add `deprecated: true`** | - -### o3-mini - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 1.1 / 0.55 / 4.4 | MP (note: cachedInput 0.55 differs from o4-mini's 0.275 — both verified correct per their MPs) | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | -| reasoningEffort ['low','medium','high'] | present | o3-mini launch post: "three reasoning effort options—low, medium, and high" | ✓ verified explicitly | -| releaseDate | 2025-01-31 | MP snapshot `o3-mini-2025-01-31` | ✓ verified | -| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5; MP: snapshot marked deprecated | **FIX: add `deprecated: true`** | - -### o1 - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 15.0 / 7.5 / 60 | MP | ✓ verified | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified | -| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified | -| releaseDate | **2024-12-05** | MP snapshot is `o1-2024-12-17` | **FIX (minor): → 2024-12-17** for snapshot-date consistency. 2024-12-05 is the ChatGPT launch; the API snapshot (the convention used by every other entry in this block) is 12-17 | -| deprecated | **absent — recommend `true`** | MP: sole snapshot `o1-2024-12-17` explicitly "Deprecated"; described as "Previous full o-series reasoning model". Base alias not on the deprecations shutdown table (only o1-preview/o1-mini, already shut down) | **FIX (recommended): add `deprecated: true`** — weaker evidence than o3/o3-mini (no shutdown date for the alias), but its only snapshot is deprecated and every other o-series peer is deprecated | - -**Open Question (c) — resolved.** The current model pages no longer enumerate `reasoning_effort` for the o-series, and the Responses API reference page content does not surface the enum inline. The reasoning guide states: "Supported values are model-dependent and can include `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`... check the relevant model page." Best available evidence: (1) o3-mini launch post (openai.com/index/openai-o3-mini/) explicitly: "three reasoning effort options—low, medium, and high"; (2) the API changelog notes `reasoning_effort` was added for o1 models with those three values; (3) `none`/`minimal`/`xhigh` were introduced with the GPT-5 line and were never back-ported to o-series. **`['low','medium','high']` for o1, o3, o3-mini, o4-mini is confirmed — no change.** - -### gpt-4o - -| Field | Value | Source | Verdict | -|---|---|---|---| -| input / cachedInput / output | 2.5 / 1.25 / 10.0 | MP + OpenRouter ($2.50/$10) | ✓ verified (two sources) | -| updatedAt | 2026-06-11 | this validation | ✓ | -| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified | -| temperature 0–2 | present | convention | ✓ | -| releaseDate | 2024-05-13 | MP snapshot `gpt-4o-2024-05-13`; OpenRouter "released May 13, 2024" | ✓ verified | -| deprecated | true | see Open Question (d) | ✓ verified — and now docs-backed | - -**Open Question (d) — resolved, better than expected.** The brief said gpt-4o is "active per OpenAI" and `deprecated: true` is a deliberate steering decision. The live deprecations page now shows **gpt-4o: shutdown 2026-10-23, replacement gpt-5.5**. So `deprecated: true` is no longer just an intentional product deviation — it is officially correct. Keep, no caveat needed. - ---- - -## Open Question (e) — `defaultModel: 'gpt-4.1'` - -OpenAI's flagship is gpt-5.5 (announcement 2026-04-23; the gpt-5.2/gpt-5/o3 pages all point at "the latest GPT-5.5"). gpt-4.1 remains active (it is OpenAI's "smartest non-reasoning model" and is not on the deprecations page), so the current default is not broken — it is a cheap, fast, temperature-supporting non-reasoning default, which is a defensible UX choice for new blocks. **Recommendation:** consider `defaultModel: 'gpt-5.5'` (or `gpt-5.4-mini` for a cost-conscious reasoning default) to match the flagship, but this is a **product decision**, not a correctness fix — not included in the machine-applyable list. - ---- - -## Changes made in this pass (recommended to apply now) - -1. **gpt-5.5-pro** — `reasoningEffort.values`: `['none','low','medium','high','xhigh']` → `['medium','high','xhigh']`. Undocumented on its own page; both documented pro siblings (gpt-5.4-pro, gpt-5.2-pro) enumerate exactly `medium, high, xhigh`; pro tier semantics exclude none/low. Sending `reasoning.effort: 'none'` to a pro model risks a 400. -2. **gpt-5.5-pro** — remove the `verbosity` block. Not documented for any pro model; the provider sends `text.verbosity` at runtime, so advertising it is a live API-error risk. -3. **gpt-5.5-pro** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (re-verified today; PR #4990 missed this entry despite claiming an all-entry bump). -4. **gpt-5.5** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (same). -5. **o3** — add `deprecated: true` (official shutdown 2026-10-23). -6. **o3-mini** — add `deprecated: true` (official shutdown 2026-10-23). -7. **gpt-4.1-nano** — add `deprecated: true` (official shutdown 2026-10-23, replacement gpt-5.4-nano). -8. **o1** — add `deprecated: true` (sole snapshot `o1-2024-12-17` marked Deprecated; "previous" o-series model; recommended, slightly weaker evidence). -9. **gpt-5.1** — `releaseDate`: `2025-11-12` → `2025-11-13` (snapshot `gpt-5.1-2025-11-13`; snapshot-date convention). -10. **o1** — `releaseDate`: `2024-12-05` → `2024-12-17` (snapshot `o1-2024-12-17`; snapshot-date convention; minor). - -## Deliberately not changed - -- **gpt-4o `deprecated: true`** — originally an intentional steering decision; now officially correct (shutdown 2026-10-23). Keep. -- **gpt-5-chat-latest / o4-mini `deprecated: true`** (PR #4990) — both confirmed by the deprecations page (2026-07-23 and 2026-10-23 shutdowns). Keep. -- **`defaultModel: 'gpt-4.1'`** — product decision; gpt-4.1 is active. Flagged for product review (gpt-5.5 is the flagship), not a correctness fix. -- **`verbosity` on non-pro gpt-5.x models (gpt-5.4/-mini/-nano, gpt-5.2, gpt-5.1, gpt-5 family)** — current model pages don't enumerate it per-model, but `text.verbosity` is a documented GPT-5-line parameter (GPT-5 launch; GPT-5.5 usage guide; OpenAI help center) and the provider has been sending it without errors. Keep. -- **`temperature {0,2}` on gpt-4.1 family, gpt-4o, gpt-5-chat-latest** — model pages don't state sampling ranges; 0–2 is the documented API-wide range for non-reasoning chat models. Correct by convention. -- **o3-pro not deprecated** — the deprecations page lists o3 and o3-mini but not o3-pro. Odd but evidence-based; leave. -- **gpt-5.2 / gpt-5 / gpt-5.2-pro not deprecated** — docs say "superseded / recommend GPT-5.5" but list no shutdown; superseded ≠ deprecated. Leave. -- **`recommended: true` on gpt-5.5 and `speedOptimized: true` on gpt-5.4-nano** — repo-internal flags, consistent with docs positioning. -- **o3-mini `cachedInput: 0.55` vs o4-mini `0.275`** — looks like a typo but both verified correct on their respective model pages. - -## Unverifiable / known limitations - -- **gpt-5.5-pro effort values** — no official enumeration exists anywhere fetched (model page, reasoning guide, usage guide, OpenRouter). The `['medium','high','xhigh']` recommendation is an inference from documented siblings — the strongest available evidence, but flagged as not directly documented. If OpenAI later publishes the enum, re-verify. -- **gpt-5.4-pro long-context surcharge** — MP states prompts >272K input tokens bill at 2x input / 1.5x output. The flat `pricing` shape in `models.ts` cannot represent tiered pricing; cost estimates for very long pro prompts will be low. Pre-existing schema limitation, out of scope here. -- **gpt-5.5 release date 04-23 vs API availability 04-24** — announcement and snapshot say 2026-04-23; press coverage says API access opened 2026-04-24. Kept 2026-04-23 (snapshot wins). -- **Verbosity enum per non-flagship model** — `['low','medium','high']` is documented at the parameter level, not re-enumerated on each model page. -- **`nativeStructuredOutputs`** — only gpt-5.5/gpt-5.5-pro carry it though most listed models support structured outputs; flag is display-only (landing page), so under-reporting is cosmetic, not functional. Left as is. diff --git a/docs/models/vertex.md b/docs/models/vertex.md deleted file mode 100644 index 8e8da6ed1a..0000000000 --- a/docs/models/vertex.md +++ /dev/null @@ -1,212 +0,0 @@ -# Vertex AI provider — model validation (`models.ts` lines ~1487–1685) - -- **Date:** 2026-06-11 (final exhaustive pass, re-verifying PR #4990 changes) -- **Method:** Live WebFetch of Google pricing/model/changelog pages; Google Cloud doc pages render nav-only to fetchers, so Vertex-specific specs were verified via Context7 MCP (`/websites/cloud_google_vertex-ai`, `/websites/cloud_google_gemini-enterprise-agent-platform`) and WebSearch fallback, per the validate-model skill. Two-source rule applied to pricing (Vertex pricing page + Gemini API pricing page / OpenRouter / CloudPrice). -- **Primary sources:** - - https://cloud.google.com/vertex-ai/generative-ai/pricing (rendered fully — all pricing below) - - https://ai.google.dev/gemini-api/docs/pricing (cross-check; global-endpoint prices identical for 2.5/3.x) - - https://ai.google.dev/gemini-api/docs/models/gemini-3.5-flash, …/gemini-3.1-pro-preview, …/gemini-3.1-flash-lite, …/gemini-3-flash-preview, …/gemini-2.5-pro (token limits) - - https://ai.google.dev/gemini-api/docs/thinking (thinking levels/defaults) - - https://ai.google.dev/gemini-api/docs/changelog (lifecycle dates) - - https://deepmind.google/models/model-cards/gemini-3-5-flash/ (3.5 Flash card) - - Vertex docs via Context7: `…/models/gemini/2-5-pro` ("maximum output token limit of 65,535"), `…/migrate/migrate-palm-to-gemini`, `…/learn/model-versioning`, `…/learn/locations` - - https://blog.google/technology/developers/deep-research-agent-gemini-api/ (2025-12-11), https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-flash-lite/ (2026-03-03) -- **Provider implementation:** `apps/sim/providers/vertex/index.ts` contains no capability handling itself — it delegates to `executeGeminiRequest` in `apps/sim/providers/gemini/core.ts`, which consumes `request.thinkingLevel` (core.ts:955–961, sent only when user explicitly selects a level) and `request.maxTokens` (core.ts:934). `thinking`, `temperature`, and `maxOutputTokens` flags are live; the global `maxOutputTokens` fallback is 4096 (models.ts:865), which is why PR #4990 added explicit caps. - ---- - -## Per-model validation - -### vertex/gemini-3.5-flash - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| id | `gemini-3.5-flash` (GA 2026-05-19) | `gemini-3.5-flash` | ai.google.dev changelog ("Released `gemini-3.5-flash`… GA" 2026-05-19) | ✓ | -| input | 1.5 | $1.50 (global) | Vertex pricing + Gemini API pricing + OpenRouter | ✓ (3 sources) | -| cachedInput | 0.15 | $0.15 | Vertex pricing + Gemini API pricing | ✓ | -| output | 9.0 | $9.00 | Vertex pricing + Gemini API pricing + OpenRouter | ✓ | -| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.5-flash; DeepMind card "1M" | ✓ | -| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page ("64K" on DeepMind card) | ✓ | -| thinking | minimal/low/medium/high, default medium | minimal, low, medium, high; default medium | ai.google.dev/gemini-api/docs/thinking; OpenRouter ("defaults to medium thinking effort") | ✓ | -| releaseDate | 2026-05-19 | "Published 19 May 2026" | DeepMind model card + changelog | ✓ | -| recommended | absent | — | google provider entry has `recommended: true` on the same model | 🔵 add (see fixes) | - -Note: Vertex introduces **non-global endpoint pricing (+10%: $1.65 / $9.90 / $0.165) effective 2026-07-01**; our entries model global pricing. See operational caveats. - -### vertex/gemini-3.1-pro-preview - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| id | `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | ai.google.dev/gemini-api/docs/models/gemini-3.1-pro-preview | ✓ | -| input | 2.0 | $2 (≤200k); $4 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier; >200k tier not modeled — see caveats) | -| cachedInput | 0.2 | $0.20 (≤200k); $0.40 (>200k) | same | ✓ | -| output | 12.0 | $12 (≤200k input); $18 (>200k) | same | ✓ | -| contextWindow | 1048576 | 1,048,576 | ai.google.dev model page; Vertex release notes "1M token context window" | ✓ | -| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page | ✓ | -| thinking | low/medium/high, default high | low, medium, high; default high (Dynamic); **minimal not supported** | ai.google.dev/gemini-api/docs/thinking | ✓ (PR #4990 drop of 'minimal' confirmed correct) | -| releaseDate | 2026-02-19 | 2026-02-19 | blog.google gemini-3-1-pro; github.blog changelog 2026-02-19 | ✓ | - -**Operational caveat (open question f):** Google documents `gemini-3.1-pro-preview` as **global-endpoint-only on Vertex AI** (Vertex `learn/locations` lists it under global-endpoint models; third-party migration guides state regional endpoints don't serve it). `apps/sim/providers/vertex/index.ts:34` resolves location as `request.vertexLocation || env.VERTEX_LOCATION || 'us-central1'` — with the default `us-central1`, requests to this model will fail with model-not-found. Users must set `vertexLocation` / `VERTEX_LOCATION` to `global`. No code change made (per instructions); documented here. - -### vertex/gemini-3.1-flash-lite - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| id | `gemini-3.1-flash-lite` (renamed from `-preview` in PR #4990) | stable id `gemini-3.1-flash-lite`; preview id shut down on Gemini API 2026-05-25; Vertex preview-alias discontinuation 2026-07-09 | ai.google.dev changelog ("Released `gemini-3.1-flash-lite`… GA" 2026-05-07; preview "shut down" 2026-05-25); cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | ✓ rename confirmed correct | -| input | 0.25 | $0.25 (global, text) | Vertex pricing + Gemini API pricing | ✓ | -| cachedInput | 0.025 | $0.025 | same | ✓ | -| output | 1.5 | $1.50 | same + blog.google launch post | ✓ | -| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.1-flash-lite | ✓ | -| maxOutputTokens | 65536 | 65,536 | same | ✓ | -| thinking levels | minimal/low/medium/high | minimal "Supported (Default)", low, medium, high | ai.google.dev/gemini-api/docs/thinking (3.1 Flash-Lite row; the "Not supported" row is 3.1 **Pro**) | ✓ — orchestrator re-fetched the thinking doc and corrected this report's initial misreading | -| thinking default | 'minimal' | minimal ("Supported (Default)") | same | ✓ | -| releaseDate | 2026-05-07 | stable GA 2026-05-07 (preview launch was 2026-03-03) | ai.google.dev changelog | ✓ changed this pass to the GA date | -| speedOptimized | absent | "our most cost-effective model yet", lowest-latency tier | blog.google launch post | 🔵 add (see fixes) | - -**Open question (c) resolved:** the preview→stable rename is right (preview already shut down on the Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09). This report initially claimed `minimal` is rejected on 3.1 Flash-Lite — that was a misreading of the thinking-levels table (the "Not supported" cell belongs to 3.1 **Pro**). The orchestrator re-fetched ai.google.dev/gemini-api/docs/thinking, which states for Gemini 3.1 Flash-Lite: minimal "Supported (Default)", plus low/medium/high. The repo's `levels: ['minimal','low','medium','high'], default: 'minimal'` is correct and was left unchanged. - -### vertex/gemini-3-pro-preview (deprecated) - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | Gemini API shut down 2026-03-09 (`gemini-3-pro-preview` now aliases `gemini-3.1-pro-preview`); Vertex discontinuation 2026-03-26 | ai.google.dev changelog; Vertex deprecations (via third-party migration guides citing Google's table) | ✓ deprecated:true confirmed correct | -| pricing 2.0/0.2/12.0 | — | current pricing page no longer lists text Gemini 3 Pro (only "Gemini 3 Pro Image") | cloud.google.com/vertex-ai/generative-ai/pricing | ⚠️ historical values, unverifiable from current page; acceptable on a deprecated entry | -| contextWindow | 1000000 | launch materials said "1M token context window" | Vertex release notes | ⚠️ 1,000,000 vs sibling models' 1,048,576; left as-is (deprecated) | -| thinking | low/medium/high, default high | consistent with 3.x Pro line (no minimal) | ai.google.dev/gemini-api/docs/thinking (3.1-pro row) | ✓ | -| releaseDate | 2025-11-18 | 2025-11-18 | blog.google gemini-3; github.blog 2025-11-18; axios 2025-11-18 | ✓ | - -Note: since the id now auto-redirects to 3.1 Pro on Google's side, calls may silently serve 3.1 Pro; `deprecated: true` steering users away is the right call. - -### vertex/gemini-3-flash-preview - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| id | `gemini-3-flash-preview` | `gemini-3-flash-preview` | ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview | ✓ | -| input / cachedInput / output | 0.5 / 0.05 / 3.0 | $0.50 / $0.05 / $3.00 | Vertex pricing + Gemini API pricing + TechCrunch | ✓ | -| contextWindow | 1048576 (PR #4990 change) | 1,048,576 | ai.google.dev model page | ✓ change confirmed | -| maxOutputTokens | 65536 | 65,536 | same | ✓ | -| thinking | minimal/low/medium/high, default high | minimal, low, medium, high; default high (Dynamic) | ai.google.dev/gemini-api/docs/thinking | ✓ | -| releaseDate | 2025-12-17 | 2025-12-17 | techcrunch.com 2025/12/17; 9to5google 2025/12/17; blog.google | ✓ | - -### vertex/gemini-2.5-pro - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| input | 1.25 | $1.25 (≤200k); $2.50 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier) | -| cachedInput | 0.125 | Vertex page displays "$0.13" (rounded); Gemini API exact "$0.125" | both pricing pages | ✓ (0.125 is exact value) | -| output | 10.0 | $10 (≤200k); $15 (>200k) | same | ✓ | -| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-pro` (via Context7) + ai.google.dev | ✓ | -| maxOutputTokens | **65536** | **Vertex: 65,535** ("maximum output token limit of 65,535"); Gemini API page: 65,536 | docs.cloud.google.com/…/models/gemini/2-5-pro (via Context7); ai.google.dev/gemini-api/docs/models/gemini-2.5-pro | ✗ 🟡 — platforms disagree; this is the **Vertex** entry, so Vertex's 65,535 wins | -| releaseDate | 2025-03-25 | 2.5 Pro Experimental announced 2025-03-25 | blog.google gemini-model-thinking-updates-march-2025; siliconangle 2025/03/25 | ✓ | -| deprecated | absent | retirement on Vertex extended to **2026-10-16** | Vertex release notes (via gcpstudyhub summary of release-notes) | ✓ correctly NOT deprecated today — see (d) below | - -### vertex/gemini-2.5-flash - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| input / cachedInput / output | 0.3 / 0.03 / 2.5 | $0.30 / $0.03 / $2.50 | Vertex pricing + Gemini API pricing | ✓ | -| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash` (via Context7) | ✓ | -| maxOutputTokens | **65536** | **Vertex: 65,535** ("default output token limit of 65,535") | docs.cloud.google.com/…/models/gemini/2-5-flash (via Context7); also migrate-palm-to-gemini doc ("2.5 Pro and 2.5 Flash… output context length of 65,535") | ✗ 🟡 | -| releaseDate | 2025-05-20 | preview launched 2025-04-17 on Gemini API; I/O announcement 2025-05-20/21; Vertex GA June 2025 | ai.google.dev changelog; Google I/O coverage | ⚠️ date is the I/O announcement; preview predates it. Left as-is (convention ambiguity, not a factual error) | -| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today | - -### vertex/gemini-2.5-flash-lite - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| input / cachedInput / output | 0.1 / 0.01 / 0.4 | $0.10 / $0.01 / $0.40 | Vertex pricing + Gemini API pricing | ✓ | -| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash-lite` | ✓ | -| maxOutputTokens | **65536** | **65,535** | Vertex 2-5-flash-lite doc / Oracle OCI mirror of Google spec (websearch confirmation: "maximum output for Gemini 2.5 Flash-Lite is 65,535 tokens") | ✗ 🟡 | -| releaseDate | 2025-06-17 | 2.5 family GA + Flash-Lite preview announced 2025-06-17 | cloud.google.com blog "Gemini 2.5 Updates: Flash/Pro GA, SFT, Flash-Lite on Vertex AI" | ✓ | -| speedOptimized | absent | smallest/fastest 2.5 tier | google provider entry has `speedOptimized: true` (models.ts:1436) | 🔵 add (see fixes) | -| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today | - -### vertex/gemini-2.0-flash (deprecated) - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | discontinued on Vertex **2026-06-01** (model serving + Provisioned Throughput) | github.com/firebase/extensions/issues/2607; Vertex model-versioning doc ("as of March 6, 2026 … only available for existing customers") | ✓ PR #4990 change confirmed | -| input | **0.1** | **$0.15** (Vertex token-based row, text) | cloud.google.com/vertex-ai/generative-ai/pricing | ✗ 🟡 repo carries Gemini API pricing ($0.10), not Vertex's | -| output | **0.4** | **$0.60** | same | ✗ 🟡 | -| cachedInput | 0.025 | not listed on Vertex pricing page (that's the Gemini API cache price) | same | ❓ UNVERIFIED on Vertex | -| maxOutputTokens | absent (falls back 4096) | 8,192 ("output context length of 8,192 tokens by default") | Vertex migrate-palm-to-gemini doc | 🔵 google entry has 8192; add for parity (low priority, discontinued) | -| contextWindow | 1048576 | 1,048,576 | same doc | ✓ | -| releaseDate | 2025-02-05 | GA on Vertex 2025-02-05 | blog.google gemini-model-updates-february-2025; developers.googleblog.com | ✓ | - -### vertex/gemini-2.0-flash-lite (deprecated) - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| deprecated | true | discontinued on Vertex 2026-06-01 | same sources as 2.0-flash | ✓ | -| input / output | 0.075 / 0.3 | $0.075 / $0.30 | Vertex pricing page | ✓ | -| cachedInput | omitted | none listed | same | ✓ correctly omitted | -| maxOutputTokens | absent | 8,192 default | Vertex migrate doc | 🔵 parity suggestion (low priority) | -| releaseDate | 2025-02-25 | preview 2025-02-05; exact 2025-02-25 GA date not found in fetched pages | _attempted: blog.google, Vertex release notes_ | ❓ UNVERIFIED (plausible — GA followed preview by ~3 weeks; deprecated, left as-is) | - -### vertex/deep-research-pro-preview-12-2025 - -| Field | Repo | Live docs | Source | Verdict | -|---|---|---|---|---| -| id | `deep-research-pro-preview-12-2025` | Vertex pricing page has a "Gemini Deep Research Agent" row but no id; id appears on third-party Vertex trackers (CloudPrice `vertex_ai/deep-research-pro-preview-12-2025`); Gemini API changelog confirms Deep Research Agent preview launch 2025-12-11 but its docs now list `deep-research-preview-04-2026` / `deep-research-max-preview-04-2026` | cloud.google.com pricing; cloudprice.net; ai.google.dev/gemini-api/docs/deep-research + changelog | ⚠️ id verified only via secondary sources; **no announced shutdown of the 12-2025 id** — but Google has shipped 04-2026 successors on the Gemini API (watch item) | -| input | 2.0 | $2 | Vertex pricing page "Gemini Deep Research Agent" + CloudPrice | ✓ (open question a: pricing confirmed) | -| cachedInput | 0.2 | $0.20 | Vertex pricing page (CloudPrice omits cached) | ✓ | -| output | 12.0 | $12 | Vertex pricing page + CloudPrice | ✓ (PR #4990 output 12.0 confirmed) | -| contextWindow | 1048576 | **conflict**: CloudPrice says "66K tokens" context / "33K tokens" max output; underlying model is Gemini 3 Pro (1M ctx); no Google doc states the agent's window; launch blog only says it "handles large context gracefully" | cloudprice.net/models/vertex_ai/deep-research-pro-preview-12-2025; blog.google deep-research post; ai.google.dev/gemini-api/docs/deep-research (lists no token limits for any version) | ❓ UNVERIFIED — conflict NOT resolvable from Google docs (they publish no limits for the agent). 1048576 is an inference from the Gemini 3 Pro core; CloudPrice's 66K/33K (≈65,536/32,768) may reflect the agent's actual per-task envelope | -| maxOutputTokens | 65536 | no Google figure; CloudPrice says 33K | same | ❓ UNVERIFIED | -| capabilities deepResearch / memory:false | true / false | it is a managed autonomous research agent; multi-turn memory not offered in preview | blog.google + ai.google.dev/gemini-api/docs/deep-research | ✓ reasonable | -| releaseDate | 2025-12-11 | "Published December 11, 2025"; changelog: "Launched the Gemini Deep Research Agent in preview" 2025-12-11 | blog.google deep-research-agent-gemini-api; ai.google.dev changelog | ✓ | - ---- - -## Changes made in this pass (PR #4990) — re-verification verdicts - -| PR #4990 change | Verdict | -|---|---| -| Rename `vertex/gemini-3.1-flash-lite-preview` → `vertex/gemini-3.1-flash-lite` | ✓ correct — stable id GA 2026-05-07; preview shut down on Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09 | -| Drop `'minimal'` from 3.1-pro-preview thinking.levels | ✓ correct — thinking docs: minimal "Not supported" on 3.1 Pro | -| `deprecated: true` on gemini-3-pro-preview | ✓ correct — shut down (Gemini API 2026-03-09; Vertex 2026-03-26) | -| `deprecated: true` on both 2.0 models | ✓ correct — discontinued 2026-06-01 | -| deep-research output → 12.0, cachedInput 0.2 | ✓ correct — Vertex pricing page row | -| deep-research ctx 1048576 + maxOutputTokens 65536 | ❓ remains unverifiable; CloudPrice conflict (66K/33K) unresolved — Google publishes no limits for the agent | -| maxOutputTokens 65536 on 3.5-flash / 3.1-pro / 3.1-flash-lite / 3-flash | ✓ correct — all four documented at 65,536 on their Gemini API model pages | -| maxOutputTokens 65536 on 2.5-pro / 2.5-flash / 2.5-flash-lite | ✗ off-by-one for Vertex — Vertex docs say **65,535** (Gemini API pages say 65,536; platforms genuinely disagree; Vertex entry should carry the Vertex value) | -| gemini-3-flash-preview ctx → 1048576 | ✓ correct | -| updatedAt bumps to 2026-06-11 | ✓ all pricing values verified current today | - -## Recommended fixes (final disposition) - -Rejected by orchestrator re-verification (not applied): -1. ~~`vertex/gemini-3.1-flash-lite` thinking.levels / default change~~ — the thinking doc confirms minimal IS supported and is the default on 3.1 Flash-Lite; the report's initial reading was wrong. No change made (google entry likewise untouched). - -Applied (warning — platform-correct values): -3. `vertex/gemini-2.5-pro`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) -4. `vertex/gemini-2.5-flash`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) -5. `vertex/gemini-2.5-flash-lite`: `maxOutputTokens` 65536 → 65535 (Vertex model doc) -6. `vertex/gemini-2.0-flash`: `input` 0.1 → 0.15, `output` 0.4 → 0.6 (Vertex pricing page; repo carries Gemini API prices). `cachedInput: 0.025` is unverified on Vertex — consider removing. Low urgency (model discontinued). - -Applied (suggestions): -7. `vertex/gemini-3.5-flash`: add `recommended: true` — parity with the google entry; vertex provider currently has no recommended model -8. `vertex/gemini-2.5-flash-lite`: add `speedOptimized: true` — parity with google entry (models.ts:1436) -9. `vertex/gemini-3.1-flash-lite`: add `speedOptimized: true` — "most cost-effective model yet" / lowest-latency tier (blog.google); apply to the google entry too for consistency -10. (optional) both vertex 2.0 entries: add `maxOutputTokens: 8192` for parity with google entries (Vertex docs: 8,192 default) — cosmetic, models discontinued - -Also applied: `releaseDate` 2026-03-03 → 2026-05-07 on both the vertex and google `gemini-3.1-flash-lite` entries (GA date per the Gemini API changelog). Item 10 (maxOutputTokens on discontinued 2.0 entries) was skipped as cosmetic; `cachedInput` on vertex/gemini-2.0-flash was kept (Gemini API documented the rate; no Vertex contradiction found). - -## Deliberately not changed - -- **2.5 Pro / Flash / Flash-Lite not marked deprecated (open question d):** Vertex retirement is 2026-10-16 (extended from June 2026; Google says the final date will be confirmed with ≥6 months notice once Gemini 3 is GA). They are fully supported today; `deprecated: true` would prematurely hide working models. Recommendation: revisit ~2026-09 (calendar item), keep undeprecated now. Note `defaultModel: 'vertex/gemini-2.5-pro'` (models.ts:1491) will need a new default before retirement — consider moving to `vertex/gemini-3.5-flash` when `recommended` is added. -- **>200k-token pricing tiers (3.1-pro, 2.5-pro)** are not modeled — `pricing` is a flat structure; entries carry the ≤200k tier. Pre-existing, consistent with the google provider. -- **Non-global endpoint surcharge (effective 2026-07-01):** Vertex adds +10% pricing for non-global endpoints on 3.x models ($1.65/$9.90 for 3.5-flash, etc.). Our default location is `us-central1` (non-global), so billed cost may exceed modeled cost starting July 1. Entries keep global pricing (the canonical published rate); flagged for ops awareness. -- **`vertex/gemini-3-pro-preview` pricing/ctx left as historical** — model discontinued and absent from the current pricing page; `deprecated: true` is the user-facing protection. -- **releaseDate conventions:** 2.5-flash 2025-05-20 (I/O) kept despite an earlier 2025-04-17 Gemini-API preview; 3.1-flash-lite 2026-03-03 (preview announcement) kept despite 2026-05-07 stable GA. Both match the repo's "first public launch announcement" convention. -- **deep-research id not migrated** to the newer `deep-research-preview-04-2026` family — no announced shutdown of `deep-research-pro-preview-12-2025`, and the Vertex pricing row still matches it. Watch item for the next pass. - -## Unverifiable - -| Item | Attempted sources | Notes | -|---|---|---| -| `vertex/deep-research-pro-preview-12-2025` `contextWindow: 1048576` and `maxOutputTokens: 65536` | cloud.google.com pricing (no limits), ai.google.dev/gemini-api/docs/deep-research (lists only 04-2026 versions, no limits), blog.google launch post (no numbers), cloudprice.net (claims 66K ctx / 33K out) | Conflict NOT resolved: Google publishes no token limits for the agent. CloudPrice's 66K/33K (~65,536/32,768) is the only concrete figure and contradicts the repo's 1M. Current values are an inference from the Gemini 3 Pro core. Ask Google docs or test live before changing. | -| Vertex-side model id for the Deep Research Agent | Vertex pricing page (row name only), Vertex docs (nav-only render), Context7 | Only third-party trackers tie `deep-research-pro-preview-12-2025` to Vertex. | -| `vertex/gemini-2.0-flash` `cachedInput: 0.025` | Vertex pricing page (no cached row for 2.0) | $0.025 is the Gemini API cache price. Discontinued model; consider dropping the field. | -| `vertex/gemini-2.0-flash-lite` `releaseDate: 2025-02-25` | blog.google Feb 2025 post (preview 2025-02-05), Vertex release notes (nav-only) | Exact GA date not found this session; plausible, left as-is. | -| Vertex 3-pro-preview discontinuation date 2026-03-26 (exact) | Vertex deprecations page (nav-only), third-party migration guides | Gemini API shutdown 2026-03-09 is confirmed by the changelog; the Vertex-specific 03-26 date comes from secondary sources citing Google's deprecations table. Either way `deprecated: true` is correct. | diff --git a/docs/models/xai.md b/docs/models/xai.md deleted file mode 100644 index 1fd8d159f4..0000000000 --- a/docs/models/xai.md +++ /dev/null @@ -1,91 +0,0 @@ -# xAI Provider Validation — models.ts - -- **Date:** 2026-06-11 -- **Scope:** `xai` provider block in `apps/sim/providers/models.ts` (~lines 1752–1956), 13 models + provider config. Final re-verification after PR #4990 (deprecation flags, grok-4.20 repricing $2/$6 → $1.25/$2.50 and 2M → 1M, defaultModel → grok-4.3). -- **Method:** Live WebFetch of xAI docs (primary source, wins all conflicts); OpenRouter as secondary pricing source; WebSearch for release-date pinning; `rg` audit of `apps/sim/providers/xai/` for parameter wiring. -- **Sources:** - - https://docs.x.ai/developers/models (model listing + pricing) - - https://docs.x.ai/developers/models/grok-4.3, .../grok-4.20-0309-reasoning, .../grok-4.20-0309-non-reasoning, .../grok-4.20-multi-agent-0309, .../grok-build-0.1, .../grok-3, .../grok-3-fast, .../grok-4 (per-model pages) - - https://docs.x.ai/developers/migration/may-15-retirement (retirement/redirect table) - - https://docs.x.ai/developers/rest-api-reference/inference/chat (parameter ranges) - - https://docs.x.ai/developers/model-capabilities/text/reasoning (reasoning_effort semantics) - - https://openrouter.ai/x-ai/grok-4.3, https://openrouter.ai/x-ai/grok-4.20 (secondary) - -## Provider config - -| Field | Repo value | Source | Verdict | -|---|---|---|---| -| `defaultModel` | `grok-4.3` | docs.x.ai/developers/models — grok-4.3 is the current flagship ("most intelligent and fastest"); all retired slugs redirect to it | CORRECT (PR #4990 change re-verified) | -| `modelPatterns` | `/^grok/` | All current model ids start with `grok` | CORRECT | - -## Active models - -### grok-4.3 - -| Field | Repo value | Source value | Source | Verdict | -|---|---|---|---|---| -| input | 1.25 | $1.25 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($1.25) | CORRECT | -| cachedInput | 0.2 | $0.20 / 1M | docs.x.ai/developers/models/grok-4.3 | CORRECT | -| output | 2.5 | $2.50 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($2.50) | CORRECT | -| contextWindow | 1000000 | 1,000,000 tokens | docs.x.ai per-model page; OpenRouter agrees (1M, "no output token limit") | CORRECT | -| releaseDate | 2026-04-30 | April 30, 2026 | OpenRouter created date; consistent with xAI announcement timeline | CORRECT | -| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference: "between 0 and 2" | ✓ after fix | -| recommended | true | flagship model | docs.x.ai | CORRECT | - -Caveat: OpenRouter notes grok-4.3 requests exceeding 200k total tokens bill at a higher tier. xAI's own pricing tables show flat $1.25/$2.50; Sim's pricing model is flat, so the base tier is recorded. No change. - -### grok-4.20-0309-reasoning / grok-4.20-0309-non-reasoning / grok-4.20-multi-agent-0309 - -All three per-model pages were fetched individually; all three show identical numbers (multi-agent is NOT priced differently): - -| Field | Repo value | Source value | Source | Verdict | -|---|---|---|---|---| -| input | 1.25 | $1.25 / 1M | all three per-model pages | CORRECT (PR #4990 reprice re-verified) | -| cachedInput | 0.2 | $0.20 / 1M | all three per-model pages | CORRECT | -| output | 2.5 | $2.50 / 1M | all three per-model pages | CORRECT | -| contextWindow | 1000000 | 1,000,000 tokens | all three per-model pages | CORRECT — see conflict note | -| releaseDate | 2026-03-10 | API availability March 10, 2026 | WebSearch (xAI API made Grok 4.20 + multi-agent available 2026-03-10; `0309` slug = March 9 snapshot) | CORRECT (secondary-source verified) | -| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference | ✓ after fix | - -**1M vs 2M conflict resolved:** OpenRouter (x-ai/grok-4.20) lists 2M context; xAI's three per-model pages each state "Context window: 1,000,000 tokens". Press coverage attributes the larger window to "agent modes" (consumer-side), not the API. xAI docs win → **1M confirmed, keep**. (OpenRouter's created date of 2026-03-31 is its listing date, not the API release.) - -## Deprecated models (9 entries) - -Retirement source: docs.x.ai/developers/migration/may-15-retirement — "After May 15, 2026 at 12:00 PM PT, requests to the retired model slugs will automatically redirect" and bill at the redirect target's rates. Today (2026-06-11) is past that date: the redirects are live. The per-model docs pages for the legacy slugs (`grok-4`, `grok-4-0709`, `grok-3`, `grok-3-fast`) now resolve to the grok-4.3 page showing $1.25/$0.20/$2.50 — direct confirmation that the slugs are aliases billing at target rates. - -| Model id | Redirect target (source: may-15-retirement page) | `deprecated: true` verdict | -|---|---|---| -| grok-4-latest | grok-4.3 (alias of grok-4-0709; per-model page now resolves to grok-4.3) | CORRECT | -| grok-4-0709 | grok-4.3 (reasoning_effort low) — explicitly listed | CORRECT | -| grok-4-1-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT | -| grok-4-1-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT | -| grok-4-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT | -| grok-4-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT | -| grok-code-fast-1 | grok-build-0.1 — explicitly listed | CORRECT | -| grok-3-latest | grok-4.3 (none) — `grok-3` explicitly listed; `-latest` is its alias | CORRECT | -| grok-3-fast-latest | grok-4.3 — not on the May-15 table by name, but docs.x.ai/developers/models/grok-3-fast now resolves to the grok-4.3 page with grok-4.3 pricing | CORRECT | - -Legacy pricing fields on these entries ($3/$15 for grok-4 family and grok-3, $5/$25 for grok-3-fast, $0.20/$0.50 fast families, $0.20/$1.50 grok-code-fast-1) match the rates these models historically carried, but xAI no longer publishes them — they are unverifiable against live docs and, more importantly, **no longer what calls cost**. - -**Recommendation (one clear position):** reprice the deprecated entries to their redirect targets' rates — the 8 grok-4.3-redirected slugs to $1.25 / $0.20 cached / $2.50, and grok-code-fast-1 to grok-build-0.1's $1.00 / $0.20 cached / $2.00. Rationale: Sim computes execution cost at run time from the current `models.ts` values and stores the result in execution logs; past log rows are unaffected by a reprice, so nothing historical is lost. Meanwhile any workflow still pointed at a retired slug bills at redirect rates today, so the legacy numbers overestimate live costs by up to 6× (grok-4-latest: $15 vs $2.50 output). This is docs-backed (the retirement page states the redirect billing explicitly). **Disposition: APPLIED in this pass** — the 8 grok-4.3 redirects now carry $1.25 / $0.20 cached / $2.50 with `contextWindow: 1000000`, and grok-code-fast-1 carries grok-build-0.1's $1.00 / $0.20 cached / $2.00 (256k unchanged). - -## Changes made in this pass - -None to `models.ts` (per instructions, this pass writes only this justification doc). The verified pending fix: - -- **all 13 xai entries: `capabilities.temperature.max` 1 → 2.** The xAI chat REST reference documents `temperature` as "between 0 and 2" (same range OpenAI uses). The repo UI uses this for slider bounds, so the current `max: 1` artificially halves the usable range. Source: https://docs.x.ai/developers/rest-api-reference/inference/chat - -Changes from PR #4990 re-verified and confirmed correct: 9 deprecation flags, grok-4.20 trio reprice to $1.25/$2.50 with 1M context, defaultModel grok-4.3. - -## Deliberately not changed - -- **grok-4.3 `reasoningEffort` capability flag — not added.** The REST reference and reasoning docs confirm grok-4.3 supports `reasoning_effort` with `none` / `low` (default) / `medium` / `high` ("Only supported by grok-4.3"). However, `apps/sim/providers/xai/index.ts` forwards only `temperature` (verified by rg: single hit at line 101, `basePayload.temperature`); no `reasoning_effort` wiring exists, so the capability flag would be dead metadata. **Recommended follow-up:** wire `reasoning_effort` in the xai provider, then add the capability flag to grok-4.3. Note for that follow-up: per the reasoning docs, `presence_penalty`, `frequency_penalty`, and `stop` cannot be combined with reasoning, and grok-4.20-multi-agent uses a different control (`reasoning.effort`: low/medium/high/xhigh, controlling agent count, not reasoning depth). - -- **grok-build-0.1 — not added.** grok-code-fast-1's successor: $1.00 input / $0.20 cached / $2.00 output, 256k context, "xAI's fast coding model trained specifically for agentic coding" (docs.x.ai/developers/models/grok-build-0.1). Recommended addition; adding models is separate work from validation. -- **grok-4.3 tiered >200k-token pricing — not modeled.** Sim's pricing schema is flat; base tier recorded (and xAI's own table is flat). - -## Unverifiable - -- **Original (pre-retirement) pricing of the 9 deprecated entries** — xAI docs no longer publish historical rates; values match known historical pricing but cannot be confirmed against a live source. -- **Release dates of deprecated entries** (2025-07-09, 2025-11-19, 2025-09-19, 2025-08-28, 2025-02-17) — consistent with historical announcements/slugs (e.g. `grok-4-0709`), not republished on live docs. -- **grok-4.3 / grok-4.20 official release dates on xAI docs** — per-model pages omit release dates. grok-4.3: 2026-04-30 corroborated by OpenRouter. grok-4.20: 2026-03-10 corroborated by secondary reporting of xAI API availability plus the `0309` snapshot slug; treated as verified-by-secondary-source. From ddadc30c9db51d8ca60b92610d450b3fb46f021a Mon Sep 17 00:00:00 2001 From: waleed Date: Thu, 11 Jun 2026 20:10:02 -0700 Subject: [PATCH 4/4] fix(providers): default azure-openai to gpt-5.4 instead of deprecated gpt-4o --- apps/sim/providers/models.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 988074e6a6..b48805dec2 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -876,7 +876,7 @@ export const PROVIDER_DEFINITIONS: Record = { id: 'azure-openai', name: 'Azure OpenAI', description: 'Microsoft Azure OpenAI Service models', - defaultModel: 'azure/gpt-4o', + defaultModel: 'azure/gpt-5.4', modelPatterns: [/^azure\//], capabilities: { toolUsageControl: true,