From 768ef9458a020d6cd052eda54fe3c5cbf5f789b6 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Thu, 11 Jun 2026 19:18:58 -0700
Subject: [PATCH 1/4] fix(providers): correct pricing, deprecations, and
 capabilities across model catalog

---
 apps/sim/providers/models.ts     | 372 ++++++++++++++++++-------------
 apps/sim/providers/utils.test.ts |  14 +-
 2 files changed, 223 insertions(+), 163 deletions(-)

diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts
index 58dba7431b..b0caf31aff 100644
--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
@@ -208,7 +208,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.0,
           cachedInput: 0.5,
           output: 8.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -223,7 +223,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.4,
           cachedInput: 0.1,
           output: 1.6,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -238,7 +238,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.1,
           cachedInput: 0.025,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -296,7 +296,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 30.0,
           output: 180.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -313,7 +313,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.5,
           cachedInput: 0.25,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -333,7 +333,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.75,
           cachedInput: 0.075,
           output: 4.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -353,7 +353,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.2,
           cachedInput: 0.02,
           output: 1.25,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -374,7 +374,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 21.0,
           output: 168.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -391,7 +391,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.75,
           cachedInput: 0.175,
           output: 14.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -412,7 +412,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -432,7 +432,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 15.0,
           output: 120.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -441,7 +441,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 272000,
         },
         contextWindow: 400000,
-        releaseDate: '2025-08-07',
+        releaseDate: '2025-10-06',
       },
       {
         id: 'gpt-5',
@@ -449,7 +449,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -469,7 +469,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.25,
           cachedInput: 0.025,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -489,7 +489,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.05,
           cachedInput: 0.005,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -509,7 +509,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -517,6 +517,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2025-08-07',
+        deprecated: true,
       },
       // o-series reasoning models
       {
@@ -525,7 +526,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.1,
           cachedInput: 0.275,
           output: 4.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -535,13 +536,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-04-16',
+        deprecated: true,
       },
       {
         id: 'o3-pro',
         pricing: {
           input: 20.0,
           output: 80.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           maxOutputTokens: 100000,
@@ -555,7 +557,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2,
           cachedInput: 0.5,
           output: 8,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -572,7 +574,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.1,
           cachedInput: 0.55,
           output: 4.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -589,7 +591,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 15.0,
           cachedInput: 7.5,
           output: 60,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -607,7 +609,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.5,
           cachedInput: 1.25,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -666,7 +668,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           },
         },
         contextWindow: 1000000,
-        releaseDate: '2026-05-27',
+        releaseDate: '2026-05-28',
         recommended: true,
       },
       {
@@ -694,7 +696,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 5.0,
           cachedInput: 0.5,
           output: 25.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -714,7 +716,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 3.0,
           cachedInput: 0.3,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -735,7 +737,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 5.0,
           cachedInput: 0.5,
           output: 25.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -755,11 +757,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 15.0,
           cachedInput: 1.5,
           output: 75.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
-          nativeStructuredOutputs: true,
           maxOutputTokens: 32000,
           thinking: {
             levels: ['low', 'medium', 'high'],
@@ -768,6 +769,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-08-05',
+        deprecated: true,
       },
       {
         id: 'claude-opus-4-0',
@@ -775,7 +777,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 15.0,
           cachedInput: 1.5,
           output: 75.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -787,6 +789,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-05-22',
+        deprecated: true,
       },
       {
         id: 'claude-sonnet-4-5',
@@ -794,7 +797,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 3.0,
           cachedInput: 0.3,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -805,7 +808,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             default: 'high',
           },
         },
-        contextWindow: 1000000,
+        contextWindow: 200000,
         releaseDate: '2025-09-29',
       },
       {
@@ -814,7 +817,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 3.0,
           cachedInput: 0.3,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -824,8 +827,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             default: 'high',
           },
         },
-        contextWindow: 1000000,
+        contextWindow: 200000,
         releaseDate: '2025-05-22',
+        deprecated: true,
       },
       {
         id: 'claude-haiku-4-5',
@@ -833,7 +837,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.0,
           cachedInput: 0.1,
           output: 5.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -884,13 +888,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.5,
           cachedInput: 1.25,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
         },
         contextWindow: 128000,
-        releaseDate: '2024-05-13',
+        releaseDate: '2024-11-20',
       },
       {
         id: 'azure/gpt-5.4',
@@ -898,11 +902,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.5,
           cachedInput: 0.25,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high', 'xhigh'],
+            values: ['none', 'low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -918,11 +922,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.75,
           cachedInput: 0.075,
           output: 4.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high', 'xhigh'],
+            values: ['none', 'low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -938,11 +942,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.2,
           cachedInput: 0.02,
           output: 1.25,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high', 'xhigh'],
+            values: ['none', 'low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -958,11 +962,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.75,
           cachedInput: 0.175,
           output: 14.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high', 'xhigh'],
+            values: ['none', 'low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -978,7 +982,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -998,7 +1002,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1011,6 +1015,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 400000,
         releaseDate: '2025-11-12',
+        deprecated: true,
       },
       {
         id: 'azure/gpt-5',
@@ -1018,7 +1023,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1038,7 +1043,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.25,
           cachedInput: 0.025,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1058,7 +1063,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.05,
           cachedInput: 0.005,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1073,15 +1078,16 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         releaseDate: '2025-08-07',
       },
       {
-        id: 'azure/gpt-5-chat-latest',
+        id: 'azure/gpt-5-chat',
         pricing: {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
+          maxOutputTokens: 16384,
         },
         contextWindow: 128000,
         releaseDate: '2025-08-07',
@@ -1092,7 +1098,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2,
           cachedInput: 0.5,
           output: 8,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1109,7 +1115,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.1,
           cachedInput: 0.275,
           output: 4.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           reasoningEffort: {
@@ -1126,7 +1132,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.0,
           cachedInput: 0.5,
           output: 8.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1141,7 +1147,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.4,
           cachedInput: 0.1,
           output: 1.6,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1156,7 +1162,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.1,
           cachedInput: 0.025,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1175,7 +1181,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {},
         contextWindow: 200000,
-        releaseDate: '2025-04-14',
+        releaseDate: '2025-05-19',
       },
     ],
   },
@@ -1197,7 +1203,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 5.0,
           cachedInput: 0.5,
           output: 25.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -1208,7 +1214,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             default: 'high',
           },
         },
-        contextWindow: 200000,
+        contextWindow: 1000000,
         releaseDate: '2026-02-05',
       },
       {
@@ -1217,7 +1223,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 5.0,
           cachedInput: 0.5,
           output: 25.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -1237,7 +1243,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 3.0,
           cachedInput: 0.3,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -1257,7 +1263,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 15.0,
           cachedInput: 1.5,
           output: 75.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -1270,6 +1276,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-08-05',
+        deprecated: true,
       },
       {
         id: 'azure-anthropic/claude-haiku-4-5',
@@ -1277,7 +1284,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.0,
           cachedInput: 0.1,
           output: 5.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -1331,12 +1338,12 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.0,
           cachedInput: 0.2,
           output: 12.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
           thinking: {
-            levels: ['minimal', 'low', 'medium', 'high'],
+            levels: ['low', 'medium', 'high'],
             default: 'high',
           },
           maxOutputTokens: 65536,
@@ -1345,12 +1352,12 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         releaseDate: '2026-02-19',
       },
       {
-        id: 'gemini-3.1-flash-lite-preview',
+        id: 'gemini-3.1-flash-lite',
         pricing: {
           input: 0.25,
           cachedInput: 0.025,
           output: 1.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1369,7 +1376,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.5,
           cachedInput: 0.05,
           output: 3.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1379,7 +1386,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           },
           maxOutputTokens: 65536,
         },
-        contextWindow: 1000000,
+        contextWindow: 1048576,
         releaseDate: '2025-12-17',
       },
       {
@@ -1388,7 +1395,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1403,7 +1410,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.3,
           cachedInput: 0.03,
           output: 2.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1418,7 +1425,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.1,
           cachedInput: 0.01,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1434,7 +1441,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.1,
           cachedInput: 0.025,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1442,13 +1449,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2025-02-05',
+        deprecated: true,
       },
       {
         id: 'gemini-2.0-flash-lite',
         pricing: {
           input: 0.075,
           output: 0.3,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1456,20 +1464,22 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2025-02-25',
+        deprecated: true,
       },
       {
         id: 'deep-research-pro-preview-12-2025',
         pricing: {
           input: 2.0,
-          output: 2.0,
-          updatedAt: '2026-04-01',
+          cachedInput: 0.2,
+          output: 12.0,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           deepResearch: true,
           memory: false,
           maxOutputTokens: 65536,
         },
-        contextWindow: 1000000,
+        contextWindow: 1048576,
         releaseDate: '2025-12-11',
       },
     ],
@@ -1500,6 +1510,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             levels: ['minimal', 'low', 'medium', 'high'],
             default: 'medium',
           },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2026-05-19',
@@ -1510,25 +1521,26 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 2.0,
           cachedInput: 0.2,
           output: 12.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
           thinking: {
-            levels: ['minimal', 'low', 'medium', 'high'],
+            levels: ['low', 'medium', 'high'],
             default: 'high',
           },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2026-02-19',
       },
       {
-        id: 'vertex/gemini-3.1-flash-lite-preview',
+        id: 'vertex/gemini-3.1-flash-lite',
         pricing: {
           input: 0.25,
           cachedInput: 0.025,
           output: 1.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1536,6 +1548,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             levels: ['minimal', 'low', 'medium', 'high'],
             default: 'minimal',
           },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2026-03-03',
@@ -1557,6 +1570,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1000000,
         releaseDate: '2025-11-18',
+        deprecated: true,
       },
       {
         id: 'vertex/gemini-3-flash-preview',
@@ -1564,7 +1578,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.5,
           cachedInput: 0.05,
           output: 3.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1572,8 +1586,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
             levels: ['minimal', 'low', 'medium', 'high'],
             default: 'high',
           },
+          maxOutputTokens: 65536,
         },
-        contextWindow: 1000000,
+        contextWindow: 1048576,
         releaseDate: '2025-12-17',
       },
       {
@@ -1582,10 +1597,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 1.25,
           cachedInput: 0.125,
           output: 10.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2025-03-25',
@@ -1596,10 +1612,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.3,
           cachedInput: 0.03,
           output: 2.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2025-05-20',
@@ -1610,10 +1627,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 0.1,
           cachedInput: 0.01,
           output: 0.4,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
+          maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
         releaseDate: '2025-06-17',
@@ -1631,6 +1649,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2025-02-05',
+        deprecated: true,
       },
       {
         id: 'vertex/gemini-2.0-flash-lite',
@@ -1644,19 +1663,22 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2025-02-25',
+        deprecated: true,
       },
       {
         id: 'vertex/deep-research-pro-preview-12-2025',
         pricing: {
           input: 2.0,
-          output: 2.0,
-          updatedAt: '2026-04-01',
+          cachedInput: 0.2,
+          output: 12.0,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           deepResearch: true,
           memory: false,
+          maxOutputTokens: 65536,
         },
-        contextWindow: 1000000,
+        contextWindow: 1048576,
         releaseDate: '2025-12-11',
       },
     ],
@@ -1676,13 +1698,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'deepseek-chat',
         pricing: {
-          input: 0.28,
-          cachedInput: 0.028,
-          output: 0.42,
-          updatedAt: '2026-04-01',
+          input: 0.14,
+          cachedInput: 0.0028,
+          output: 0.28,
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
-        contextWindow: 128000,
+        contextWindow: 1000000,
         releaseDate: '2024-12-26',
       },
       {
@@ -1698,6 +1720,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-12-26',
+        deprecated: true,
       },
       {
         id: 'deepseek-r1',
@@ -1710,17 +1733,18 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {},
         contextWindow: 128000,
         releaseDate: '2025-01-20',
+        deprecated: true,
       },
       {
         id: 'deepseek-reasoner',
         pricing: {
-          input: 0.28,
-          cachedInput: 0.028,
-          output: 0.42,
-          updatedAt: '2026-04-01',
+          input: 0.14,
+          cachedInput: 0.0028,
+          output: 0.28,
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
-        contextWindow: 128000,
+        contextWindow: 1000000,
         releaseDate: '2025-01-20',
       },
     ],
@@ -1729,7 +1753,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
     id: 'xai',
     name: 'xAI',
     description: "xAI's Grok models",
-    defaultModel: 'grok-4-latest',
+    defaultModel: 'grok-4.3',
     modelPatterns: [/^grok/],
     icon: xAIIcon,
     color: '#555555',
@@ -1765,6 +1789,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 256000,
         releaseDate: '2025-07-09',
+        deprecated: true,
       },
       {
         id: 'grok-4-0709',
@@ -1779,6 +1804,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 256000,
         releaseDate: '2025-07-09',
+        deprecated: true,
       },
       {
         id: 'grok-4-1-fast-reasoning',
@@ -1793,6 +1819,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 2000000,
         releaseDate: '2025-11-19',
+        deprecated: true,
       },
       {
         id: 'grok-4-1-fast-non-reasoning',
@@ -1807,6 +1834,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 2000000,
         releaseDate: '2025-11-19',
+        deprecated: true,
       },
       {
         id: 'grok-4-fast-reasoning',
@@ -1821,6 +1849,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 2000000,
         releaseDate: '2025-09-19',
+        deprecated: true,
       },
       {
         id: 'grok-4-fast-non-reasoning',
@@ -1835,6 +1864,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 2000000,
         releaseDate: '2025-09-19',
+        deprecated: true,
       },
       {
         id: 'grok-code-fast-1',
@@ -1849,47 +1879,48 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 256000,
         releaseDate: '2025-08-28',
+        deprecated: true,
       },
       {
         id: 'grok-4.20-0309-reasoning',
         pricing: {
-          input: 2.0,
+          input: 1.25,
           cachedInput: 0.2,
-          output: 6.0,
-          updatedAt: '2026-04-01',
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2026-03-10',
       },
       {
         id: 'grok-4.20-0309-non-reasoning',
         pricing: {
-          input: 2.0,
+          input: 1.25,
           cachedInput: 0.2,
-          output: 6.0,
-          updatedAt: '2026-04-01',
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2026-03-10',
       },
       {
         id: 'grok-4.20-multi-agent-0309',
         pricing: {
-          input: 2.0,
+          input: 1.25,
           cachedInput: 0.2,
-          output: 6.0,
-          updatedAt: '2026-04-01',
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2026-03-10',
       },
       {
@@ -1905,6 +1936,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 131072,
         releaseDate: '2025-02-17',
+        deprecated: true,
       },
       {
         id: 'grok-3-fast-latest',
@@ -1919,6 +1951,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 131072,
         releaseDate: '2025-02-17',
+        deprecated: true,
       },
     ],
   },
@@ -1939,7 +1972,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.35,
           output: 0.75,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -1955,6 +1988,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {},
         contextWindow: 32768,
         releaseDate: '2024-08-27',
+        deprecated: true,
       },
       {
         id: 'cerebras/qwen-3-235b-a22b-instruct-2507',
@@ -1966,13 +2000,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {},
         contextWindow: 131072,
         releaseDate: '2025-07-29',
+        deprecated: true,
       },
       {
         id: 'cerebras/zai-glm-4.7',
         pricing: {
           input: 2.25,
           output: 2.75,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -1996,8 +2031,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'groq/openai/gpt-oss-120b',
         pricing: {
           input: 0.15,
+          cachedInput: 0.075,
           output: 0.6,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2007,8 +2043,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'groq/openai/gpt-oss-20b',
         pricing: {
           input: 0.075,
+          cachedInput: 0.0375,
           output: 0.3,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2018,8 +2055,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'groq/openai/gpt-oss-safeguard-20b',
         pricing: {
           input: 0.075,
+          cachedInput: 0.0375,
           output: 0.3,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2030,7 +2068,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.29,
           output: 0.59,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2041,7 +2079,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.05,
           output: 0.08,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2052,7 +2090,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.59,
           output: 0.79,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2063,7 +2101,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.11,
           output: 0.34,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {},
         contextWindow: 131072,
@@ -2079,6 +2117,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {},
         contextWindow: 262144,
         releaseDate: '2025-09-05',
+        deprecated: true,
       },
     ],
   },
@@ -2106,7 +2145,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.5,
           output: 1.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2119,7 +2158,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.5,
           output: 1.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2132,7 +2171,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.15,
           output: 0.6,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2145,7 +2184,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.4,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2165,32 +2204,33 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-11-18',
+        deprecated: true,
       },
       {
         id: 'magistral-medium-latest',
         pricing: {
           input: 2.0,
           output: 5.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
-        releaseDate: '2025-06-10',
+        releaseDate: '2025-09-18',
       },
       {
         id: 'magistral-medium-2509',
         pricing: {
           input: 2.0,
           output: 5.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
-        releaseDate: '2025-09-17',
+        releaseDate: '2025-09-18',
       },
       {
         id: 'magistral-small-latest',
@@ -2203,7 +2243,8 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
-        releaseDate: '2025-06-10',
+        releaseDate: '2025-09-18',
+        deprecated: true,
       },
       {
         id: 'magistral-small-2509',
@@ -2216,14 +2257,15 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
-        releaseDate: '2025-09-17',
+        releaseDate: '2025-09-18',
+        deprecated: true,
       },
       {
         id: 'mistral-medium-latest',
         pricing: {
           input: 0.4,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2236,7 +2278,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.4,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2249,7 +2291,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.4,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2262,7 +2304,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.15,
           output: 0.6,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2282,13 +2324,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2025-06-20',
+        deprecated: true,
       },
       {
         id: 'open-mistral-nemo',
         pricing: {
           input: 0.15,
           output: 0.15,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2301,7 +2344,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.3,
           output: 0.9,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2314,7 +2357,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.3,
           output: 0.9,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2327,13 +2370,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.4,
           output: 2.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 256000,
-        releaseDate: '2025-05-21',
+        releaseDate: '2025-12-09',
       },
       {
         id: 'devstral-small-latest',
@@ -2346,7 +2389,8 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 256000,
-        releaseDate: '2025-07-10',
+        releaseDate: '2025-12-09',
+        deprecated: true,
       },
       {
         id: 'devstral-small-2507',
@@ -2360,6 +2404,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2025-07-10',
+        deprecated: true,
       },
       {
         id: 'devstral-medium-2507',
@@ -2373,13 +2418,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2025-07-10',
+        deprecated: true,
       },
       {
         id: 'ministral-14b-latest',
         pricing: {
           input: 0.2,
           output: 0.2,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2392,7 +2438,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.2,
           output: 0.2,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2405,20 +2451,20 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.15,
           output: 0.15,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 256000,
-        releaseDate: '2024-10-16',
+        releaseDate: '2025-12-02',
       },
       {
         id: 'ministral-8b-2512',
         pricing: {
           input: 0.15,
           output: 0.15,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2431,20 +2477,20 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.1,
           output: 0.1,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 256000,
-        releaseDate: '2024-10-16',
+        releaseDate: '2025-12-02',
       },
       {
         id: 'ministral-3b-2512',
         pricing: {
           input: 0.1,
           output: 0.1,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2536,7 +2582,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: 64000,
+          maxOutputTokens: 32768,
         },
         contextWindow: 200000,
         releaseDate: '2025-08-05',
@@ -2553,6 +2599,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1000000,
         releaseDate: '2025-12-02',
+        deprecated: true,
       },
       {
         id: 'bedrock/amazon.nova-2-lite-v1:0',
@@ -2571,14 +2618,15 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/amazon.nova-premier-v1:0',
         pricing: {
           input: 2.5,
-          output: 10.0,
-          updatedAt: '2026-04-01',
+          output: 12.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 1000000,
         releaseDate: '2025-04-30',
+        deprecated: true,
       },
       {
         id: 'bedrock/amazon.nova-pro-v1:0',
@@ -2642,7 +2690,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
         },
-        contextWindow: 3500000,
+        contextWindow: 10000000,
         releaseDate: '2025-04-05',
       },
       {
@@ -2670,6 +2718,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-09-25',
+        deprecated: true,
       },
       {
         id: 'bedrock/meta.llama3-2-11b-instruct-v1:0',
@@ -2683,6 +2732,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-09-25',
+        deprecated: true,
       },
       {
         id: 'bedrock/meta.llama3-2-3b-instruct-v1:0',
@@ -2696,6 +2746,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-09-25',
+        deprecated: true,
       },
       {
         id: 'bedrock/meta.llama3-2-1b-instruct-v1:0',
@@ -2709,6 +2760,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 128000,
         releaseDate: '2024-09-25',
+        deprecated: true,
       },
       {
         id: 'bedrock/meta.llama3-1-405b-instruct-v1:0',
@@ -2721,6 +2773,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
+        deprecated: true,
       },
       {
         id: 'bedrock/meta.llama3-1-70b-instruct-v1:0',
@@ -2749,15 +2802,15 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/mistral.mistral-large-3-675b-instruct',
         pricing: {
-          input: 2.0,
-          output: 6.0,
-          updatedAt: '2026-04-01',
+          input: 0.5,
+          output: 1.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
           maxOutputTokens: 32768,
         },
-        contextWindow: 128000,
+        contextWindow: 256000,
       },
       {
         id: 'bedrock/mistral.mistral-large-2411-v1:0',
@@ -2770,6 +2823,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
+        deprecated: true,
       },
       {
         id: 'bedrock/mistral.mistral-large-2407-v1:0',
@@ -2782,6 +2836,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
+        deprecated: true,
       },
       {
         id: 'bedrock/mistral.pixtral-large-2502-v1:0',
@@ -2871,6 +2926,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 32000,
+        deprecated: true,
       },
       {
         id: 'bedrock/cohere.command-r-plus-v1:0',
@@ -2883,6 +2939,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
+        deprecated: true,
       },
       {
         id: 'bedrock/cohere.command-r-v1:0',
@@ -2895,6 +2952,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           temperature: { min: 0, max: 1 },
         },
         contextWindow: 128000,
+        deprecated: true,
       },
     ],
   },
diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts
index be01f0946c..f4612ccae8 100644
--- a/apps/sim/providers/utils.test.ts
+++ b/apps/sim/providers/utils.test.ts
@@ -193,7 +193,7 @@ describe('Model Capabilities', () => {
         'gpt-4.1-mini',
         'gpt-4.1-nano',
         'gpt-5-chat-latest',
-        'azure/gpt-5-chat-latest',
+        'azure/gpt-5-chat',
         'gemini-2.5-flash',
         'claude-sonnet-4-0',
         'claude-opus-4-0',
@@ -258,7 +258,7 @@ describe('Model Capabilities', () => {
         'gpt-4o',
         'azure/gpt-4o',
         'gpt-5-chat-latest',
-        'azure/gpt-5-chat-latest',
+        'azure/gpt-5-chat',
         'gemini-2.5-pro',
         'gemini-2.5-flash',
         'deepseek-v3',
@@ -481,7 +481,7 @@ describe('Model Capabilities', () => {
       expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o4-mini')
 
       expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-5-chat-latest')
-      expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat-latest')
+      expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat')
 
       expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-4o')
       expect(MODELS_WITH_REASONING_EFFORT).not.toContain('claude-sonnet-4-0')
@@ -506,7 +506,7 @@ describe('Model Capabilities', () => {
       expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.2')
 
       expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5-chat-latest')
-      expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat-latest')
+      expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat')
 
       expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
       expect(MODELS_WITH_VERBOSITY).not.toContain('o3')
@@ -603,7 +603,9 @@ describe('Model Capabilities', () => {
       const values = getReasoningEffortValuesForModel('azure/gpt-5.2')
       expect(values).toBeDefined()
       expect(values).not.toContain('minimal')
-      expect(values).toContain('xhigh')
+      expect(values).toContain('none')
+      expect(values).toContain('high')
+      expect(values).not.toContain('xhigh')
     })
   })
 
@@ -713,7 +715,7 @@ describe('Max Output Tokens', () => {
 
     it.concurrent('should return published max for Bedrock Claude Opus 4.1', () => {
       expect(getMaxOutputTokensForModel('bedrock/anthropic.claude-opus-4-1-20250805-v1:0')).toBe(
-        64000
+        32768
       )
     })
 

From 47e8baf347df6f040b24500a42530b44d85b208e Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Thu, 11 Jun 2026 19:56:33 -0700
Subject: [PATCH 2/4] fix(providers): apply full re-validation pass across
 model catalog with per-provider justification docs

---
 apps/sim/app/(landing)/models/utils.test.ts |   2 +-
 apps/sim/blocks/blocks/agent.ts             |  22 ++
 apps/sim/providers/bedrock/utils.test.ts    |  46 +++
 apps/sim/providers/bedrock/utils.ts         |  27 +-
 apps/sim/providers/models.ts                | 417 +++++++++++---------
 apps/sim/providers/utils.test.ts            |  39 +-
 apps/sim/providers/utils.ts                 |   8 +-
 docs/models/anthropic.md                    | 232 +++++++++++
 docs/models/azure.md                        | 258 ++++++++++++
 docs/models/bedrock.md                      | 226 +++++++++++
 docs/models/deepseek-cerebras.md            | 189 +++++++++
 docs/models/embeddings-rerank-dynamic.md    |  75 ++++
 docs/models/google.md                       | 184 +++++++++
 docs/models/groq.md                         | 157 ++++++++
 docs/models/mistral.md                      | 305 ++++++++++++++
 docs/models/openai.md                       | 338 ++++++++++++++++
 docs/models/vertex.md                       | 212 ++++++++++
 docs/models/xai.md                          |  91 +++++
 18 files changed, 2629 insertions(+), 199 deletions(-)
 create mode 100644 apps/sim/providers/bedrock/utils.test.ts
 create mode 100644 docs/models/anthropic.md
 create mode 100644 docs/models/azure.md
 create mode 100644 docs/models/bedrock.md
 create mode 100644 docs/models/deepseek-cerebras.md
 create mode 100644 docs/models/embeddings-rerank-dynamic.md
 create mode 100644 docs/models/google.md
 create mode 100644 docs/models/groq.md
 create mode 100644 docs/models/mistral.md
 create mode 100644 docs/models/openai.md
 create mode 100644 docs/models/vertex.md
 create mode 100644 docs/models/xai.md

diff --git a/apps/sim/app/(landing)/models/utils.test.ts b/apps/sim/app/(landing)/models/utils.test.ts
index 894c74500c..05c8d88cca 100644
--- a/apps/sim/app/(landing)/models/utils.test.ts
+++ b/apps/sim/app/(landing)/models/utils.test.ts
@@ -38,7 +38,7 @@ describe('model catalog capability facts', () => {
 
   it.concurrent('keeps best-for copy for clearly differentiated models only', () => {
     const researchModel = getModelBySlug('google', 'deep-research-pro-preview-12-2025')
-    const generalModel = getModelBySlug('xai', 'grok-4-latest')
+    const generalModel = getModelBySlug('mistral', 'mistral-medium-latest')
 
     expect(researchModel).not.toBeNull()
     expect(generalModel).not.toBeNull()
diff --git a/apps/sim/blocks/blocks/agent.ts b/apps/sim/blocks/blocks/agent.ts
index bb106f9fdc..afa55b44db 100644
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -412,6 +412,28 @@ Return ONLY the JSON array.`,
         })(),
       }),
     },
+    {
+      id: 'temperature',
+      title: 'Temperature',
+      type: 'slider',
+      min: 0,
+      max: 1.5,
+      defaultValue: 0.3,
+      mode: 'advanced',
+      condition: () => ({
+        field: 'model',
+        value: (() => {
+          const deepResearch = new Set(MODELS_WITH_DEEP_RESEARCH.map((m) => m.toLowerCase()))
+          const allModels = Object.keys(getBaseModelProviders())
+          return allModels.filter(
+            (model) =>
+              supportsTemperature(model) &&
+              getMaxTemperature(model) === 1.5 &&
+              !deepResearch.has(model.toLowerCase())
+          )
+        })(),
+      }),
+    },
     {
       id: 'temperature',
       title: 'Temperature',
diff --git a/apps/sim/providers/bedrock/utils.test.ts b/apps/sim/providers/bedrock/utils.test.ts
new file mode 100644
index 0000000000..a667d61412
--- /dev/null
+++ b/apps/sim/providers/bedrock/utils.test.ts
@@ -0,0 +1,46 @@
+/**
+ * @vitest-environment node
+ */
+import { describe, expect, it } from 'vitest'
+import { getBedrockInferenceProfileId } from '@/providers/bedrock/utils'
+
+describe('getBedrockInferenceProfileId', () => {
+  it.concurrent('prefixes geo inference profile for models that require it', () => {
+    expect(
+      getBedrockInferenceProfileId('bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0', 'us-east-1')
+    ).toBe('us.anthropic.claude-sonnet-4-5-20250929-v1:0')
+    expect(getBedrockInferenceProfileId('bedrock/amazon.nova-pro-v1:0', 'eu-west-1')).toBe(
+      'eu.amazon.nova-pro-v1:0'
+    )
+    expect(
+      getBedrockInferenceProfileId('bedrock/meta.llama4-scout-17b-instruct-v1:0', 'us-west-2')
+    ).toBe('us.meta.llama4-scout-17b-instruct-v1:0')
+  })
+
+  it.concurrent('returns already-prefixed inference profile IDs unchanged', () => {
+    expect(
+      getBedrockInferenceProfileId('us.anthropic.claude-sonnet-4-5-20250929-v1:0', 'us-east-1')
+    ).toBe('us.anthropic.claude-sonnet-4-5-20250929-v1:0')
+    expect(getBedrockInferenceProfileId('global.amazon.nova-2-lite-v1:0', 'us-east-1')).toBe(
+      'global.amazon.nova-2-lite-v1:0'
+    )
+  })
+
+  it.concurrent('returns the bare model ID for models without geo profile support', () => {
+    expect(
+      getBedrockInferenceProfileId('bedrock/mistral.mistral-large-3-675b-instruct', 'us-east-1')
+    ).toBe('mistral.mistral-large-3-675b-instruct')
+    expect(
+      getBedrockInferenceProfileId('bedrock/mistral.ministral-3-8b-instruct', 'eu-west-1')
+    ).toBe('mistral.ministral-3-8b-instruct')
+    expect(getBedrockInferenceProfileId('bedrock/cohere.command-r-plus-v1:0', 'us-east-1')).toBe(
+      'cohere.command-r-plus-v1:0'
+    )
+    expect(
+      getBedrockInferenceProfileId('bedrock/mistral.mixtral-8x7b-instruct-v0:1', 'ap-southeast-1')
+    ).toBe('mistral.mixtral-8x7b-instruct-v0:1')
+    expect(
+      getBedrockInferenceProfileId('bedrock/amazon.titan-text-premier-v1:0', 'us-east-1')
+    ).toBe('amazon.titan-text-premier-v1:0')
+  })
+})
diff --git a/apps/sim/providers/bedrock/utils.ts b/apps/sim/providers/bedrock/utils.ts
index 401c264c0c..a385ffd053 100644
--- a/apps/sim/providers/bedrock/utils.ts
+++ b/apps/sim/providers/bedrock/utils.ts
@@ -81,10 +81,31 @@ export function generateToolUseId(toolName: string): string {
   return `${truncatedName}${suffix}`
 }
 
+/**
+ * Models whose AWS model cards state geo/cross-region inference profiles are
+ * not supported ("Geo inference ID: Not supported"). These must be invoked
+ * with the bare in-region model ID — prefixing them with a geo profile
+ * (e.g. us.mistral...) produces an invalid model identifier.
+ */
+const GEO_PROFILE_UNSUPPORTED_MODEL_IDS = new Set([
+  'mistral.mistral-large-3-675b-instruct',
+  'mistral.mistral-large-2411-v1:0',
+  'mistral.mistral-large-2407-v1:0',
+  'mistral.magistral-small-2509',
+  'mistral.ministral-3-14b-instruct',
+  'mistral.ministral-3-8b-instruct',
+  'mistral.ministral-3-3b-instruct',
+  'mistral.mixtral-8x7b-instruct-v0:1',
+  'amazon.titan-text-premier-v1:0',
+  'cohere.command-r-v1:0',
+  'cohere.command-r-plus-v1:0',
+])
+
 /**
  * Converts a model ID to the Bedrock inference profile format.
  * AWS Bedrock requires inference profile IDs (e.g., us.anthropic.claude-...)
- * for on-demand invocation of newer models.
+ * for on-demand invocation of newer models, while some models only accept
+ * the bare in-region model ID.
  *
  * @param modelId - The model ID (e.g., "bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0")
  * @param region - The AWS region (e.g., "us-east-1")
@@ -97,6 +118,10 @@ export function getBedrockInferenceProfileId(modelId: string, region: string): s
     return baseModelId
   }
 
+  if (GEO_PROFILE_UNSUPPORTED_MODEL_IDS.has(baseModelId)) {
+    return baseModelId
+  }
+
   let inferencePrefix: string
   if (region.startsWith('us-gov-')) {
     inferencePrefix = 'us-gov'
diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts
index b0caf31aff..988074e6a6 100644
--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
@@ -110,7 +110,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
     color: '#EF2CC1',
     isReseller: true,
     capabilities: {
-      temperature: { min: 0, max: 2 },
+      temperature: { min: 0, max: 1 },
       toolUsageControl: true,
     },
     contextInformationAvailable: false,
@@ -157,7 +157,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
     isReseller: true,
     capabilities: {
       temperature: { min: 0, max: 2 },
-      toolUsageControl: true,
+      toolUsageControl: false,
     },
     contextInformationAvailable: false,
     models: [],
@@ -246,6 +246,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1047576,
         releaseDate: '2025-04-14',
+        deprecated: true,
       },
       // GPT-5.5 family
       {
@@ -253,15 +254,12 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 30.0,
           output: 180.0,
-          updatedAt: '2026-04-23',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           nativeStructuredOutputs: true,
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high', 'xhigh'],
-          },
-          verbosity: {
-            values: ['low', 'medium', 'high'],
+            values: ['medium', 'high', 'xhigh'],
           },
           maxOutputTokens: 128000,
         },
@@ -274,7 +272,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           input: 5.0,
           cachedInput: 0.5,
           output: 30.0,
-          updatedAt: '2026-04-23',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           nativeStructuredOutputs: true,
@@ -424,7 +422,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 128000,
         },
         contextWindow: 400000,
-        releaseDate: '2025-11-12',
+        releaseDate: '2025-11-13',
       },
       // GPT-5 family
       {
@@ -567,6 +565,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-04-16',
+        deprecated: true,
       },
       {
         id: 'o3-mini',
@@ -584,6 +583,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-01-31',
+        deprecated: true,
       },
       {
         id: 'o1',
@@ -600,7 +600,8 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 100000,
         },
         contextWindow: 200000,
-        releaseDate: '2024-12-05',
+        releaseDate: '2024-12-17',
+        deprecated: true,
       },
       // Legacy
       {
@@ -642,6 +643,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-09',
         },
         capabilities: {
+          nativeStructuredOutputs: true,
           maxOutputTokens: 128000,
           thinking: {
             levels: ['low', 'medium', 'high', 'xhigh', 'max'],
@@ -865,7 +867,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 4096,
         },
         contextWindow: 200000,
-        releaseDate: '2024-03-07',
+        releaseDate: '2024-03-13',
         deprecated: true,
       },
     ],
@@ -892,9 +894,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
+          maxOutputTokens: 16384,
         },
         contextWindow: 128000,
         releaseDate: '2024-11-20',
+        deprecated: true,
       },
       {
         id: 'azure/gpt-5.4',
@@ -906,7 +910,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high'],
+            values: ['low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -926,7 +930,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high'],
+            values: ['low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -946,7 +950,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           reasoningEffort: {
-            values: ['none', 'low', 'medium', 'high'],
+            values: ['low', 'medium', 'high'],
           },
           verbosity: {
             values: ['low', 'medium', 'high'],
@@ -994,7 +998,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 128000,
         },
         contextWindow: 400000,
-        releaseDate: '2025-11-12',
+        releaseDate: '2025-11-13',
       },
       {
         id: 'azure/gpt-5.1-codex',
@@ -1014,8 +1018,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 128000,
         },
         contextWindow: 400000,
-        releaseDate: '2025-11-12',
-        deprecated: true,
+        releaseDate: '2025-11-13',
       },
       {
         id: 'azure/gpt-5',
@@ -1267,7 +1270,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
-          nativeStructuredOutputs: true,
           maxOutputTokens: 32000,
           thinking: {
             levels: ['low', 'medium', 'high'],
@@ -1368,7 +1370,8 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
-        releaseDate: '2026-03-03',
+        releaseDate: '2026-05-07',
+        speedOptimized: true,
       },
       {
         id: 'gemini-3-flash-preview',
@@ -1388,6 +1391,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2025-12-17',
+        deprecated: true,
       },
       {
         id: 'gemini-2.5-pro',
@@ -1514,6 +1518,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 1048576,
         releaseDate: '2026-05-19',
+        recommended: true,
       },
       {
         id: 'vertex/gemini-3.1-pro-preview',
@@ -1551,7 +1556,8 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 65536,
         },
         contextWindow: 1048576,
-        releaseDate: '2026-03-03',
+        releaseDate: '2026-05-07',
+        speedOptimized: true,
       },
       {
         id: 'vertex/gemini-3-pro-preview',
@@ -1601,7 +1607,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
-          maxOutputTokens: 65536,
+          maxOutputTokens: 65535,
         },
         contextWindow: 1048576,
         releaseDate: '2025-03-25',
@@ -1616,7 +1622,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
-          maxOutputTokens: 65536,
+          maxOutputTokens: 65535,
         },
         contextWindow: 1048576,
         releaseDate: '2025-05-20',
@@ -1631,18 +1637,19 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
-          maxOutputTokens: 65536,
+          maxOutputTokens: 65535,
         },
         contextWindow: 1048576,
         releaseDate: '2025-06-17',
+        speedOptimized: true,
       },
       {
         id: 'vertex/gemini-2.0-flash',
         pricing: {
-          input: 0.1,
+          input: 0.15,
           cachedInput: 0.025,
-          output: 0.4,
-          updatedAt: '2026-04-01',
+          output: 0.6,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 2 },
@@ -1703,7 +1710,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.28,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          temperature: { min: 0, max: 2 },
+        },
         contextWindow: 1000000,
         releaseDate: '2024-12-26',
       },
@@ -1770,7 +1779,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-05-05',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
         contextWindow: 1000000,
         releaseDate: '2026-04-30',
@@ -1779,103 +1788,103 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'grok-4-latest',
         pricing: {
-          input: 3.0,
-          cachedInput: 0.75,
-          output: 15.0,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 256000,
+        contextWindow: 1000000,
         releaseDate: '2025-07-09',
         deprecated: true,
       },
       {
         id: 'grok-4-0709',
         pricing: {
-          input: 3.0,
-          cachedInput: 0.75,
-          output: 15.0,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 256000,
+        contextWindow: 1000000,
         releaseDate: '2025-07-09',
         deprecated: true,
       },
       {
         id: 'grok-4-1-fast-reasoning',
         pricing: {
-          input: 0.2,
-          cachedInput: 0.05,
-          output: 0.5,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2025-11-19',
         deprecated: true,
       },
       {
         id: 'grok-4-1-fast-non-reasoning',
         pricing: {
-          input: 0.2,
-          cachedInput: 0.05,
-          output: 0.5,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2025-11-19',
         deprecated: true,
       },
       {
         id: 'grok-4-fast-reasoning',
         pricing: {
-          input: 0.2,
-          cachedInput: 0.05,
-          output: 0.5,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2025-09-19',
         deprecated: true,
       },
       {
         id: 'grok-4-fast-non-reasoning',
         pricing: {
-          input: 0.2,
-          cachedInput: 0.05,
-          output: 0.5,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 2000000,
+        contextWindow: 1000000,
         releaseDate: '2025-09-19',
         deprecated: true,
       },
       {
         id: 'grok-code-fast-1',
         pricing: {
-          input: 0.2,
-          cachedInput: 0.02,
-          output: 1.5,
-          updatedAt: '2026-04-01',
+          input: 1.0,
+          cachedInput: 0.2,
+          output: 2.0,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
         contextWindow: 256000,
         releaseDate: '2025-08-28',
@@ -1890,7 +1899,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
         contextWindow: 1000000,
         releaseDate: '2026-03-10',
@@ -1904,7 +1913,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
         contextWindow: 1000000,
         releaseDate: '2026-03-10',
@@ -1918,7 +1927,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
         contextWindow: 1000000,
         releaseDate: '2026-03-10',
@@ -1926,30 +1935,30 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'grok-3-latest',
         pricing: {
-          input: 3.0,
-          cachedInput: 0.75,
-          output: 15.0,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 131072,
+        contextWindow: 1000000,
         releaseDate: '2025-02-17',
         deprecated: true,
       },
       {
         id: 'grok-3-fast-latest',
         pricing: {
-          input: 5.0,
-          cachedInput: 0.75,
-          output: 25.0,
-          updatedAt: '2026-04-01',
+          input: 1.25,
+          cachedInput: 0.2,
+          output: 2.5,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 2 },
         },
-        contextWindow: 131072,
+        contextWindow: 1000000,
         releaseDate: '2025-02-17',
         deprecated: true,
       },
@@ -1974,7 +1983,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.75,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          temperature: { min: 0, max: 2 },
+          maxOutputTokens: 40000,
+        },
         contextWindow: 131072,
         releaseDate: '2025-08-05',
       },
@@ -2009,7 +2021,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 2.75,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          temperature: { min: 0, max: 2 },
+          maxOutputTokens: 40000,
+        },
         contextWindow: 131072,
         releaseDate: '2025-12-22',
       },
@@ -2024,6 +2039,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
     icon: GroqIcon,
     color: '#F55036',
     capabilities: {
+      temperature: { min: 0, max: 2 },
       toolUsageControl: true,
     },
     models: [
@@ -2035,9 +2051,12 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.6,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 65536,
+        },
         contextWindow: 131072,
         releaseDate: '2025-08-05',
+        recommended: true,
       },
       {
         id: 'groq/openai/gpt-oss-20b',
@@ -2047,7 +2066,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.3,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 65536,
+        },
         contextWindow: 131072,
         releaseDate: '2025-08-05',
       },
@@ -2059,7 +2080,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.3,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 65536,
+        },
         contextWindow: 131072,
         releaseDate: '2025-10-29',
       },
@@ -2070,7 +2093,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.59,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 40960,
+        },
         contextWindow: 131072,
         releaseDate: '2025-04-29',
       },
@@ -2081,9 +2106,12 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.08,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 131072,
+        },
         contextWindow: 131072,
         releaseDate: '2024-07-23',
+        speedOptimized: true,
       },
       {
         id: 'groq/llama-3.3-70b-versatile',
@@ -2092,7 +2120,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.79,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 32768,
+        },
         contextWindow: 131072,
         releaseDate: '2024-12-06',
       },
@@ -2103,7 +2133,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           output: 0.34,
           updatedAt: '2026-06-11',
         },
-        capabilities: {},
+        capabilities: {
+          maxOutputTokens: 8192,
+        },
         contextWindow: 131072,
         releaseDate: '2025-04-05',
       },
@@ -2148,10 +2180,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        recommended: true,
       },
       {
         id: 'mistral-large-2512',
@@ -2161,7 +2194,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
@@ -2174,7 +2207,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2026-03-16',
@@ -2187,7 +2220,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-09',
@@ -2200,7 +2233,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2024-11-18',
@@ -2214,7 +2247,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-09-18',
@@ -2227,7 +2260,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-09-18',
@@ -2240,7 +2273,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-09-18',
@@ -2254,7 +2287,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-09-18',
@@ -2268,7 +2301,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-08-12',
@@ -2281,7 +2314,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-08-12',
@@ -2294,7 +2327,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-05-07',
@@ -2307,7 +2340,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2026-03-16',
@@ -2320,7 +2353,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-06-20',
@@ -2334,7 +2367,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2024-07-18',
@@ -2347,7 +2380,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-07-30',
@@ -2360,7 +2393,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-07-30',
@@ -2373,7 +2406,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-09',
@@ -2386,7 +2419,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-09',
@@ -2400,7 +2433,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-07-10',
@@ -2414,7 +2447,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-04-01',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 128000,
         releaseDate: '2025-07-10',
@@ -2428,10 +2461,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
       {
         id: 'ministral-14b-2512',
@@ -2441,10 +2475,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
       {
         id: 'ministral-8b-latest',
@@ -2454,10 +2489,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
       {
         id: 'ministral-8b-2512',
@@ -2467,10 +2503,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
       {
         id: 'ministral-3b-latest',
@@ -2480,10 +2517,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
       {
         id: 'ministral-3b-2512',
@@ -2493,10 +2531,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           updatedAt: '2026-06-11',
         },
         capabilities: {
-          temperature: { min: 0, max: 1 },
+          temperature: { min: 0, max: 1.5 },
         },
         contextWindow: 256000,
         releaseDate: '2025-12-02',
+        speedOptimized: true,
       },
     ],
   },
@@ -2531,8 +2570,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/anthropic.claude-opus-4-5-20251101-v1:0',
         pricing: {
           input: 5.0,
+          cachedInput: 0.5,
           output: 25.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2546,8 +2586,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0',
         pricing: {
           input: 3.0,
+          cachedInput: 0.3,
           output: 15.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2556,13 +2597,15 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-09-29',
+        recommended: true,
       },
       {
         id: 'bedrock/anthropic.claude-haiku-4-5-20251001-v1:0',
         pricing: {
           input: 1.0,
+          cachedInput: 0.1,
           output: 5.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2571,18 +2614,20 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         contextWindow: 200000,
         releaseDate: '2025-10-15',
+        speedOptimized: true,
       },
       {
         id: 'bedrock/anthropic.claude-opus-4-1-20250805-v1:0',
         pricing: {
           input: 15.0,
+          cachedInput: 1.5,
           output: 75.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: 32768,
+          maxOutputTokens: 32000,
         },
         contextWindow: 200000,
         releaseDate: '2025-08-05',
@@ -2590,9 +2635,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/amazon.nova-2-pro-v1:0',
         pricing: {
-          input: 1.0,
-          output: 4.0,
-          updatedAt: '2026-04-01',
+          input: 1.375,
+          output: 11.0,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2604,12 +2649,14 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/amazon.nova-2-lite-v1:0',
         pricing: {
-          input: 0.08,
-          output: 0.32,
-          updatedAt: '2026-04-01',
+          input: 0.33,
+          cachedInput: 0.0825,
+          output: 2.75,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 64000,
         },
         contextWindow: 1000000,
         releaseDate: '2025-12-02',
@@ -2618,6 +2665,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/amazon.nova-premier-v1:0',
         pricing: {
           input: 2.5,
+          cachedInput: 0.625,
           output: 12.5,
           updatedAt: '2026-06-11',
         },
@@ -2632,11 +2680,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/amazon.nova-pro-v1:0',
         pricing: {
           input: 0.8,
+          cachedInput: 0.2,
           output: 3.2,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 5120,
         },
         contextWindow: 300000,
         releaseDate: '2024-12-03',
@@ -2645,11 +2695,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/amazon.nova-lite-v1:0',
         pricing: {
           input: 0.06,
+          cachedInput: 0.015,
           output: 0.24,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 5120,
         },
         contextWindow: 300000,
         releaseDate: '2024-12-03',
@@ -2658,24 +2710,28 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         id: 'bedrock/amazon.nova-micro-v1:0',
         pricing: {
           input: 0.035,
+          cachedInput: 0.00875,
           output: 0.14,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 5120,
         },
         contextWindow: 128000,
         releaseDate: '2024-12-03',
+        speedOptimized: true,
       },
       {
         id: 'bedrock/meta.llama4-maverick-17b-instruct-v1:0',
         pricing: {
           input: 0.24,
           output: 0.97,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 8192,
         },
         contextWindow: 1000000,
         releaseDate: '2025-04-05',
@@ -2683,12 +2739,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/meta.llama4-scout-17b-instruct-v1:0',
         pricing: {
-          input: 0.18,
-          output: 0.72,
-          updatedAt: '2026-04-01',
+          input: 0.17,
+          output: 0.66,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 8192,
         },
         contextWindow: 10000000,
         releaseDate: '2025-04-05',
@@ -2698,10 +2755,11 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.72,
           output: 0.72,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 4096,
         },
         contextWindow: 128000,
         releaseDate: '2024-12-06',
@@ -2709,9 +2767,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/meta.llama3-2-90b-instruct-v1:0',
         pricing: {
-          input: 2.0,
-          output: 2.0,
-          updatedAt: '2026-04-01',
+          input: 0.72,
+          output: 0.72,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2725,7 +2783,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.16,
           output: 0.16,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2739,7 +2797,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.15,
           output: 0.15,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2753,7 +2811,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.1,
           output: 0.1,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2765,9 +2823,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/meta.llama3-1-405b-instruct-v1:0',
         pricing: {
-          input: 5.32,
-          output: 16.0,
-          updatedAt: '2026-04-01',
+          input: 2.4,
+          output: 2.4,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2778,26 +2836,30 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/meta.llama3-1-70b-instruct-v1:0',
         pricing: {
-          input: 2.65,
-          output: 3.5,
-          updatedAt: '2026-04-01',
+          input: 0.72,
+          output: 0.72,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 4096,
         },
         contextWindow: 128000,
+        releaseDate: '2024-07-23',
       },
       {
         id: 'bedrock/meta.llama3-1-8b-instruct-v1:0',
         pricing: {
-          input: 0.3,
-          output: 0.6,
-          updatedAt: '2026-04-01',
+          input: 0.22,
+          output: 0.22,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 4096,
         },
         contextWindow: 128000,
+        releaseDate: '2024-07-23',
       },
       {
         id: 'bedrock/mistral.mistral-large-3-675b-instruct',
@@ -2811,6 +2873,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
           maxOutputTokens: 32768,
         },
         contextWindow: 256000,
+        releaseDate: '2025-12-02',
       },
       {
         id: 'bedrock/mistral.mistral-large-2411-v1:0',
@@ -2828,9 +2891,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
       {
         id: 'bedrock/mistral.mistral-large-2407-v1:0',
         pricing: {
-          input: 4.0,
-          output: 12.0,
-          updatedAt: '2026-04-01',
+          input: 2.0,
+          output: 6.0,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2843,7 +2906,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 2.0,
           output: 6.0,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2856,7 +2919,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.5,
           output: 1.5,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
@@ -2869,49 +2932,53 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         pricing: {
           input: 0.2,
           output: 0.2,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
           maxOutputTokens: 8192,
         },
         contextWindow: 128000,
+        releaseDate: '2025-12-02',
       },
       {
         id: 'bedrock/mistral.ministral-3-8b-instruct',
         pricing: {
-          input: 0.1,
-          output: 0.1,
-          updatedAt: '2026-04-01',
+          input: 0.15,
+          output: 0.15,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
           maxOutputTokens: 8192,
         },
         contextWindow: 128000,
+        releaseDate: '2025-12-02',
       },
       {
         id: 'bedrock/mistral.ministral-3-3b-instruct',
         pricing: {
-          input: 0.04,
-          output: 0.04,
-          updatedAt: '2026-04-01',
+          input: 0.1,
+          output: 0.1,
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
           maxOutputTokens: 8192,
         },
         contextWindow: 128000,
+        releaseDate: '2025-12-02',
       },
       {
         id: 'bedrock/mistral.mixtral-8x7b-instruct-v0:1',
         pricing: {
           input: 0.45,
           output: 0.7,
-          updatedAt: '2026-04-01',
+          updatedAt: '2026-06-11',
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
+          maxOutputTokens: 4096,
         },
         contextWindow: 32000,
       },
@@ -3122,23 +3189,11 @@ export function getModelsWithTemperatureSupport(): string[] {
   return models
 }
 
-export function getModelsWithTempRange01(): string[] {
-  const models: string[] = []
-  for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
-    for (const model of provider.models) {
-      if (model.capabilities.temperature?.max === 1) {
-        models.push(model.id)
-      }
-    }
-  }
-  return models
-}
-
-export function getModelsWithTempRange02(): string[] {
+export function getModelsWithTemperatureRange(max: number): string[] {
   const models: string[] = []
   for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
     for (const model of provider.models) {
-      if (model.capabilities.temperature?.max === 2) {
+      if (model.capabilities.temperature?.max === max) {
         models.push(model.id)
       }
     }
diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts
index f4612ccae8..46d414cbb6 100644
--- a/apps/sim/providers/utils.test.ts
+++ b/apps/sim/providers/utils.test.ts
@@ -25,6 +25,7 @@ import {
   isProviderBlacklisted,
   MODELS_TEMP_RANGE_0_1,
   MODELS_TEMP_RANGE_0_2,
+  MODELS_TEMP_RANGE_0_15,
   MODELS_WITH_REASONING_EFFORT,
   MODELS_WITH_TEMPERATURE_SUPPORT,
   MODELS_WITH_THINKING,
@@ -200,6 +201,9 @@ describe('Model Capabilities', () => {
         'grok-3-latest',
         'grok-3-fast-latest',
         'deepseek-v3',
+        'deepseek-chat',
+        'groq/meta-llama/llama-4-scout-17b-16e-instruct',
+        'mistral-large-latest',
       ]
 
       for (const model of supportedModels) {
@@ -211,14 +215,12 @@ describe('Model Capabilities', () => {
       const unsupportedModels = [
         'unsupported-model',
         'cerebras/llama-3.3-70b',
-        'groq/meta-llama/llama-4-scout-17b-16e-instruct',
         'o1',
         'o3',
         'o4-mini',
         'azure/o3',
         'azure/o4-mini',
         'deepseek-r1',
-        'deepseek-chat',
         'azure/model-router',
         'gpt-5.1',
         'azure/gpt-5.1',
@@ -262,6 +264,10 @@ describe('Model Capabilities', () => {
         'gemini-2.5-pro',
         'gemini-2.5-flash',
         'deepseek-v3',
+        'deepseek-chat',
+        'grok-3-latest',
+        'grok-3-fast-latest',
+        'groq/meta-llama/llama-4-scout-17b-16e-instruct',
       ]
 
       for (const model of modelsRange02) {
@@ -270,22 +276,24 @@ describe('Model Capabilities', () => {
     })
 
     it.concurrent('should return 1 for models with temperature range 0-1', () => {
-      const modelsRange01 = [
-        'claude-sonnet-4-0',
-        'claude-opus-4-0',
-        'grok-3-latest',
-        'grok-3-fast-latest',
-      ]
+      const modelsRange01 = ['claude-sonnet-4-0', 'claude-opus-4-0']
 
       for (const model of modelsRange01) {
         expect(getMaxTemperature(model)).toBe(1)
       }
     })
 
+    it.concurrent('should return 1.5 for models with temperature range 0-1.5', () => {
+      const modelsRange015 = ['mistral-large-latest', 'mistral-small-latest', 'codestral-latest']
+
+      for (const model of modelsRange015) {
+        expect(getMaxTemperature(model)).toBe(1.5)
+      }
+    })
+
     it.concurrent('should return undefined for models that do not support temperature', () => {
       expect(getMaxTemperature('unsupported-model')).toBeUndefined()
       expect(getMaxTemperature('cerebras/llama-3.3-70b')).toBeUndefined()
-      expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()
       expect(getMaxTemperature('o1')).toBeUndefined()
       expect(getMaxTemperature('o3')).toBeUndefined()
       expect(getMaxTemperature('o4-mini')).toBeUndefined()
@@ -428,12 +436,13 @@ describe('Model Capabilities', () => {
       expect(MODELS_TEMP_RANGE_0_2).toContain('gpt-4o')
       expect(MODELS_TEMP_RANGE_0_2).toContain('gemini-2.5-flash')
       expect(MODELS_TEMP_RANGE_0_2).toContain('deepseek-v3')
+      expect(MODELS_TEMP_RANGE_0_2).toContain('grok-3-latest')
       expect(MODELS_TEMP_RANGE_0_2).not.toContain('claude-sonnet-4-0')
     })
 
     it.concurrent('should have correct models in MODELS_TEMP_RANGE_0_1', () => {
       expect(MODELS_TEMP_RANGE_0_1).toContain('claude-sonnet-4-0')
-      expect(MODELS_TEMP_RANGE_0_1).toContain('grok-3-latest')
+      expect(MODELS_TEMP_RANGE_0_1).not.toContain('grok-3-latest')
       expect(MODELS_TEMP_RANGE_0_1).not.toContain('gpt-4o')
     })
 
@@ -449,7 +458,9 @@ describe('Model Capabilities', () => {
       'should combine both temperature ranges in MODELS_WITH_TEMPERATURE_SUPPORT',
       () => {
         expect(MODELS_WITH_TEMPERATURE_SUPPORT.length).toBe(
-          MODELS_TEMP_RANGE_0_2.length + MODELS_TEMP_RANGE_0_1.length
+          MODELS_TEMP_RANGE_0_2.length +
+            MODELS_TEMP_RANGE_0_15.length +
+            MODELS_TEMP_RANGE_0_1.length
         )
         expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('gpt-4o')
         expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('claude-sonnet-4-0')
@@ -538,6 +549,7 @@ describe('Model Capabilities', () => {
         (m) =>
           m.includes('gpt-5') &&
           !m.includes('chat-latest') &&
+          !m.includes('gpt-5.5-pro') &&
           !m.includes('gpt-5.4-pro') &&
           !m.includes('gpt-5.2-pro') &&
           !m.includes('gpt-5-pro')
@@ -547,6 +559,9 @@ describe('Model Capabilities', () => {
       )
       expect(gpt5ModelsWithReasoningEffort.sort()).toEqual(gpt5ModelsWithVerbosity.sort())
 
+      expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.5-pro')
+      expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.5-pro')
+
       expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.4-pro')
       expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.4-pro')
 
@@ -715,7 +730,7 @@ describe('Max Output Tokens', () => {
 
     it.concurrent('should return published max for Bedrock Claude Opus 4.1', () => {
       expect(getMaxOutputTokensForModel('bedrock/anthropic.claude-opus-4-1-20250805-v1:0')).toBe(
-        32768
+        32000
       )
     })
 
diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts
index b2021817b6..c584261b4e 100644
--- a/apps/sim/providers/utils.ts
+++ b/apps/sim/providers/utils.ts
@@ -28,9 +28,8 @@ import {
   getModelsWithDeepResearch,
   getModelsWithoutMemory,
   getModelsWithReasoningEffort,
+  getModelsWithTemperatureRange,
   getModelsWithTemperatureSupport,
-  getModelsWithTempRange01,
-  getModelsWithTempRange02,
   getModelsWithThinking,
   getModelsWithVerbosity,
   getProviderDefaultModel as getProviderDefaultModelFromDefinitions,
@@ -1167,8 +1166,9 @@ export function trackForcedToolUsage(
   }
 }
 
-export const MODELS_TEMP_RANGE_0_2 = getModelsWithTempRange02()
-export const MODELS_TEMP_RANGE_0_1 = getModelsWithTempRange01()
+export const MODELS_TEMP_RANGE_0_2 = getModelsWithTemperatureRange(2)
+export const MODELS_TEMP_RANGE_0_15 = getModelsWithTemperatureRange(1.5)
+export const MODELS_TEMP_RANGE_0_1 = getModelsWithTemperatureRange(1)
 export const MODELS_WITH_TEMPERATURE_SUPPORT = getModelsWithTemperatureSupport()
 export const MODELS_WITH_REASONING_EFFORT = getModelsWithReasoningEffort()
 export const MODELS_WITH_VERBOSITY = getModelsWithVerbosity()
diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md
new file mode 100644
index 0000000000..81dad60b26
--- /dev/null
+++ b/docs/models/anthropic.md
@@ -0,0 +1,232 @@
+# Anthropic Provider Model Validation — Justification Doc
+
+- **Date:** 2026-06-11
+- **Scope:** `anthropic` provider block in `apps/sim/providers/models.ts` (12 models), re-verified after PR #4990
+- **Method:** Live WebFetch of official Anthropic docs (platform.claude.com), secondary pricing source (OpenRouter), Anthropic news posts via web search for launch dates, plus `rg` verification that every capability flag is actually consumed by provider code (`apps/sim/providers/anthropic/core.ts`, `apps/sim/providers/models.ts`, `apps/sim/providers/utils.ts`).
+- **Primary sources:**
+  - Models overview: https://platform.claude.com/docs/en/about-claude/models/overview
+  - Pricing: https://platform.claude.com/docs/en/about-claude/pricing
+  - Deprecations: https://platform.claude.com/docs/en/about-claude/model-deprecations
+  - Effort: https://platform.claude.com/docs/en/build-with-claude/effort
+  - Structured outputs: https://platform.claude.com/docs/en/build-with-claude/structured-outputs
+  - Computer use: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool
+  - Messages API: https://platform.claude.com/docs/en/api/messages
+  - Secondary pricing: https://openrouter.ai/provider/anthropic
+  - Launch dates: https://www.anthropic.com/news/claude-4 , https://www.anthropic.com/news/claude-3-haiku
+
+**Verdict key:** ✓ = verified against live docs · ⚠ = recommended change · ◆ = intentional deviation (documented) · ◇ = unverifiable from live docs (reason given)
+
+---
+
+## How capability fields are consumed (code verification)
+
+| Field | Consumer | Behavior |
+|---|---|---|
+| `thinking.levels` / `thinking.default` | `core.ts` `buildThinkingConfig()` via `getThinkingCapability()` | Level must be in `levels` or thinking is skipped. Fable 5 / Opus 4.8 / 4.7 / 4.6 / Sonnet 4.6 (`supportsAdaptiveThinking()`) → `thinking: {type: 'adaptive'}` + `output_config: {effort: <level>}`. All other models → `thinking: {type: 'enabled', budget_tokens}` with low=2048 / medium=8192 / high=32768 (so `xhigh`/`max` must never appear on a budget-tokens model — `THINKING_BUDGET_TOKENS` has no entry and config would be dropped). |
+| `temperature` | payload construction in `core.ts` | Presence of `capabilities.temperature` allows the param; omitted on a model means Sim never sends it. Stripped when thinking enabled (thinking incompatible with temperature). |
+| `nativeStructuredOutputs` | `models.ts:3393` (`getModelsWithNativeStructuredOutputs`-style helper) consumed by `core.ts` | With flag → native `output_format`/`output_config` JSON-schema path; without → `generateSchemaInstructions()` prompt-injection fallback. |
+| `computerUse` | `models.ts:3167` `getComputerUseModels()` → `providers/utils.ts:143` `computerUseModels` | Gates Sim's computer-use path per provider. **No Anthropic model currently sets it.** |
+| `contextWindow` / `maxOutputTokens` / `pricing` | cost calculation, token clamping, UI | Straight passthrough. Sim does **not** send any `context-1m-*` beta header (`rg 'context-1m' apps/sim/providers/anthropic/` → no matches), so `contextWindow` must reflect the no-beta-header window. |
+| `reasoningEffort` / `verbosity` | **not consumed** by the Anthropic provider (OpenAI-family fields) | Correctly absent from all Anthropic entries. |
+
+---
+
+## Per-model field verification
+
+### claude-fable-5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing.input | 10.0 | Pricing doc ($10/MTok); OpenRouter $10/M | ✓ |
+| pricing.cachedInput | 1.0 | Pricing doc cache hit $1/MTok (0.1×) | ✓ |
+| pricing.output | 50.0 | Pricing doc $50/MTok; OpenRouter $50/M | ✓ |
+| capabilities.temperature | absent | Deprecations doc: sampling params 400 on Opus 4.7 and later; Fable 5 rejects `temperature`/`top_p`/`top_k` | ✓ |
+| capabilities.nativeStructuredOutputs | **absent** | Structured-outputs doc: "generally available … for **Claude Fable 5**, Claude Mythos 5, Claude Opus 4.8, …" | ⚠ **should be `true`** — Fable 5 is in the GA list; current absence routes Fable 5 through the prompt-injection fallback instead of native JSON-schema output |
+| capabilities.maxOutputTokens | 128000 | Models overview: Max output 128k | ✓ |
+| thinking.levels | low–xhigh–max | Effort doc: `max` available on Fable 5; `xhigh` available on Fable 5; low/medium/high universal | ✓ |
+| thinking.default | high | Effort doc: default is `high` | ✓ |
+| contextWindow | 1000000 | Models overview: 1M tokens (default, no beta header) | ✓ |
+| releaseDate | 2026-06-09 | Models overview: "generally available … beginning June 9, 2026" | ✓ |
+| (no deprecated flag) | — | Active | ✓ |
+
+Note: Fable 5's thinking is always-on; Sim's adaptive path (`thinking: {type:'adaptive'}` + effort) is the documented-correct call shape. The `'none'` sentinel omits the `thinking` param, which on Fable 5 means adaptive-by-default rather than disabled — acceptable (explicit `disabled` would 400).
+
+### claude-opus-4-8
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing.input / cachedInput / output | 5.0 / 0.5 / 25.0 | Pricing doc $5 / $0.50 cache-hit / $25; OpenRouter $5/$25 | ✓ |
+| pricing.updatedAt | 2026-05-28 | bumped in PR #4990 | ✓ |
+| temperature | absent | Deprecations doc: 400 on Opus 4.7 and later, "including Claude Opus 4.8" | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
+| maxOutputTokens | 128000 | Models overview | ✓ |
+| thinking.levels | low–xhigh–max | Effort doc: `xhigh` and `max` available on Opus 4.8 | ✓ |
+| thinking.default | high | Effort doc: "The default is `high` on all surfaces" | ✓ |
+| contextWindow | 1000000 | Models overview: 1M (standard pricing, no long-context premium) | ✓ |
+| releaseDate | 2026-05-28 | Deprecations doc: tentative retirement "Not sooner than May 28, **2027**" (release + 1 yr convention) — confirms the PR #4990 correction | ✓ changed this pass (PR #4990), re-verified |
+| recommended | true | Sim product choice; consistent with docs' "most capable Opus-tier model" | ◆ product decision |
+
+### claude-opus-4-7
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-04-16) | Pricing doc; OpenRouter $5/$25 | ✓ |
+| temperature | absent | Deprecations doc: 400 on Opus 4.7+ | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
+| maxOutputTokens | 128000 | Models overview (legacy table) | ✓ |
+| thinking.levels | low–xhigh–max | Effort doc: `xhigh` introduced with 4.7; `max` available | ✓ |
+| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ |
+| releaseDate | 2026-04-16 | Deprecations doc: "Not sooner than April 16, 2027" | ✓ |
+
+### claude-opus-4-6
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ |
+| temperature {0,1} | present | Sampling-param removal is "Opus 4.7 and later" — Opus 4.6 still accepts `temperature` (0.0–1.0 per Messages API) | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
+| maxOutputTokens | 128000 | Models overview legacy table | ✓ |
+| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` on Opus 4.6 ✓; `xhigh` only on Fable 5 / Opus 4.8 / 4.7 — correctly excluded | ✓ |
+| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ |
+| releaseDate | 2026-02-05 | Deprecations doc: "Not sooner than February 5, 2027" | ✓ |
+
+### claude-sonnet-4-6
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc $3 / $0.30 / $15; OpenRouter $3/$15 | ✓ |
+| temperature {0,1} | present | Sonnet 4.6 is not in the "Opus 4.7 and later" sampling-param removal; temperature 0.0–1.0 valid | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
+| maxOutputTokens | 64000 | Models overview: 64k | ✓ |
+| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` available on Sonnet 4.6; `xhigh` is NOT (Fable 5 / Opus 4.8 / 4.7 only) | ✓ |
+| contextWindow | 1000000 | Models overview: 1M, no beta header required; "Long context pricing": full 1M at standard pricing on Sonnet 4.6 | ✓ |
+| releaseDate | 2026-02-17 | Deprecations doc: "Not sooner than February 17, 2027" | ✓ |
+| recommended | true | Sim product choice ("best combination of speed and intelligence") | ◆ product decision |
+
+### claude-opus-4-5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ |
+| temperature {0,1} | present | ≤ 4.6-era model; accepted | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Opus 4.5") | ✓ |
+| maxOutputTokens | 64000 | Models overview legacy table | ✓ |
+| thinking.levels | low/medium/high | Effort doc: Opus 4.5 supports effort but neither `max` nor `xhigh`. Sim's code path for 4.5 uses `budget_tokens` (not effort) — levels map to budget tiers; same three levels are valid either way | ✓ |
+| contextWindow | 200000 | Models overview legacy table: 200k | ✓ |
+| releaseDate | 2025-11-24 | Deprecations doc: "Not sooner than November 24, 2026"; anthropic.com/news/claude-opus-4-5 (Nov 24, 2025) | ✓ |
+
+### claude-opus-4-1
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc $15 / $1.50 / $75; OpenRouter $15/$75 | ✓ |
+| temperature {0,1} | present | pre-4.7 model; accepted | ✓ |
+| nativeStructuredOutputs | **removed in PR #4990** | Structured-outputs doc GA list does **not** include Opus 4.1 | ✓ changed this pass (PR #4990), re-verified correct |
+| maxOutputTokens | 32000 | Models overview legacy table: 32k | ✓ |
+| thinking.levels | low/medium/high | budget_tokens model; extended thinking supported | ✓ |
+| contextWindow | 200000 | Models overview legacy table | ✓ |
+| releaseDate | 2025-08-05 | Snapshot `claude-opus-4-1-20250805`; launched Aug 5, 2025 | ✓ |
+| deprecated | true | Deprecations doc: deprecated June 5, 2026; retires Aug 5, 2026 → migrate to claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified |
+
+### claude-opus-4-0
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Opus 4 (deprecated)"); OpenRouter $15/$75 | ✓ |
+| temperature {0,1} | present | pre-4.7; accepted | ✓ |
+| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ |
+| maxOutputTokens | 32000 | Models overview legacy table | ✓ |
+| thinking.levels | low/medium/high | budget_tokens model | ✓ |
+| contextWindow | 200000 | Models overview legacy table | ✓ |
+| releaseDate | 2025-05-22 | **Open question (a) resolved:** Claude 4 (Opus 4 + Sonnet 4) launched **May 22, 2025** (anthropic.com/news/claude-4). The `20250514` in the full ID is the snapshot date, not the launch date. Repo convention uses launch dates (cf. haiku-4-5: launch 2025-10-15 vs snapshot 20251001) | ✓ — **no change recommended** |
+| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified |
+
+### claude-sonnet-4-5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $3/$15 | ✓ |
+| temperature {0,1} | present | pre-4.7; accepted | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Sonnet 4.5") | ✓ |
+| maxOutputTokens | 64000 | Models overview legacy table | ✓ |
+| thinking.levels | low/medium/high | Effort doc: effort errors on Sonnet 4.5 — Sim correctly routes it through budget_tokens; no max/xhigh | ✓ |
+| contextWindow | 200000 | **Open question (e) resolved:** Models overview legacy table lists Sonnet 4.5 at **200k**. The historical 1M for Sonnet 4.5 required the `context-1m` beta header, which Sim does not send (`rg 'context-1m'` → no matches in `apps/sim/providers/anthropic/`) | ✓ changed this pass (PR #4990, 1000000 → 200000), re-verified correct |
+| releaseDate | 2025-09-29 | Snapshot `claude-sonnet-4-5-20250929`; launched Sep 29, 2025 | ✓ |
+
+### claude-sonnet-4-0
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Sonnet 4 (deprecated)"); OpenRouter $3/$15 | ✓ |
+| temperature {0,1} | present | pre-4.7; accepted | ✓ |
+| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ |
+| maxOutputTokens | 64000 | Models overview legacy table: 64k | ✓ |
+| thinking.levels | low/medium/high | budget_tokens model | ✓ |
+| contextWindow | 200000 | Models overview legacy table: 200k; same `context-1m` beta-header reasoning as Sonnet 4.5 | ✓ changed this pass (PR #4990), re-verified correct |
+| releaseDate | 2025-05-22 | Claude 4 launch May 22, 2025 (see opus-4-0) — no change | ✓ |
+| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-sonnet-4-6 | ✓ changed this pass (PR #4990), re-verified |
+
+### claude-haiku-4-5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing | 1.0 / 0.1 / 5.0 (updatedAt 2026-06-11) | Pricing doc $1 / $0.10 / $5; OpenRouter $1/$5 | ✓ |
+| temperature {0,1} | present | pre-4.7; accepted | ✓ |
+| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
+| maxOutputTokens | 64000 | Models overview: 64k | ✓ |
+| thinking.levels | low/medium/high | Effort doc: effort errors on Haiku 4.5; extended thinking (budget_tokens) supported — Sim routes via budget_tokens | ✓ |
+| contextWindow | 200000 | Models overview: 200k | ✓ |
+| releaseDate | 2025-10-15 | Launch Oct 15, 2025 (deprecations doc: retirement "Not sooner than October 15, 2026"); snapshot is `20251001` — repo correctly uses the launch date | ✓ |
+| speedOptimized | true | Sim-internal flag; docs: "The fastest model" | ◆ Sim-internal, consistent |
+
+### claude-3-haiku-20240307
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| pricing.input / output | 0.25 / 1.25 (updatedAt 2026-04-01) | ◇ No longer listed on the live pricing page (only retired Haiku 3.5 remains) or OpenRouter — model is retired. Values match Anthropic's historical published pricing ($0.25/$1.25) | ◇ unverifiable live; historically consistent — leave as-is |
+| pricing.cachedInput | 0.03 | ◇ Historical cache-hit pricing for Claude 3 Haiku was $0.03/MTok (slightly above the 0.1× convention) | ◇ unverifiable live; historically consistent |
+| temperature {0,1} | present | Claude 3-era; accepted (model no longer serves requests anyway) | ✓ (moot) |
+| maxOutputTokens | 4096 | Historical Claude 3 Haiku max output | ◇ unverifiable live; historically consistent |
+| no thinking capability | absent | Claude 3 Haiku has no extended thinking | ✓ |
+| contextWindow | 200000 | Historical Claude 3 family window | ◇ unverifiable live; historically consistent |
+| releaseDate | 2024-03-07 | Claude 3 Haiku GA was **March 13, 2024** (anthropic.com/news/claude-3-haiku); `20240307` is the snapshot date. Repo convention elsewhere uses launch dates | ⚠ optional: `2024-03-07` → `2024-03-13` (cosmetic; model is retired) |
+| deprecated | true | Deprecations doc: **Retired April 20, 2026** ("Requests to retired models will fail") | ◆ see open question (b) below |
+
+---
+
+## Changes made in this pass (PR #4990) — all re-verified correct
+
+| Change | Verification |
+|---|---|
+| opus-4-8 releaseDate → 2026-05-28 | Deprecations doc retirement floor "May 28, 2027" (release + 1 yr) ✓ |
+| deprecated:true on opus-4-1 | Deprecated 2026-06-05, retires 2026-08-05 ✓ |
+| deprecated:true on opus-4-0, sonnet-4-0 | Deprecated 2026-04-14, retire 2026-06-15 ✓ |
+| sonnet-4-5 & sonnet-4-0 contextWindow 1000000 → 200000 | Models overview legacy table: both 200k. The 1M window on these models was beta-header-gated (`context-1m`); Sim never sends that header ✓ |
+| removed nativeStructuredOutputs from opus-4-1 | Opus 4.1 absent from structured-outputs GA list ✓ |
+| updatedAt bumps | informational ✓ |
+
+## Recommended fixes from THIS validation
+
+1. **claude-fable-5: add `nativeStructuredOutputs: true`.** Structured-outputs doc explicitly lists Claude Fable 5 as GA. Without the flag, Sim falls back to prompt-injected schema instructions for Fable 5 instead of the native JSON-schema output path — weaker guarantees on the flagship model.
+2. *(optional, cosmetic)* **claude-3-haiku-20240307: releaseDate `2024-03-07` → `2024-03-13`.** Repo convention is launch date (not snapshot date); GA was March 13, 2024. Low value since the model is retired.
+
+## Deliberately not changed
+
+- **`computerUse` on Anthropic models (open question c).** Anthropic documents computer-use support (beta) for: Opus 4.8 / 4.7 / 4.6 / 4.5 + Sonnet 4.6 (header `computer-use-2025-11-24`) and Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4 (header `computer-use-2025-01-24`). **Claude Fable 5 is NOT in the documented list.** The flag IS consumed (`getComputerUseModels()` → `providers/utils.ts` `computerUseModels`), so setting it would light up Sim's computer-use path for these models — a feature-enablement/product decision (beta headers, screenshot plumbing, UX), not a data correction. Left unchanged; documented here for whoever owns that decision.
+- **opus-4-0 / sonnet-4-0 releaseDate `2025-05-22` (open question a).** Confirmed correct: Claude 4 launched May 22, 2025; `20250514` is the snapshot suffix, not the launch date.
+- **claude-3-haiku-20240307 entry kept (open question b).** The model was retired 2026-04-20 — live requests now fail. Recommendation: **keep the entry with `deprecated: true`** rather than delete. Removing it would break saved workflows that reference the model ID (model lookup, pricing for historical logs, UI rendering of old runs). The schema has no `retired` field; if one is ever added, this model is the first candidate. Runtime failures surface from Anthropic's API as clear 404s, which is an acceptable failure mode for a retired model.
+- **`recommended` flags (opus-4-8, sonnet-4-6) and `speedOptimized` (haiku-4-5)** — Sim product/UI decisions, consistent with docs positioning; not doc-verifiable facts.
+- **`defaultModel: 'claude-sonnet-4-6'`** — active, recommended model; valid product choice.
+- **Thinking level lists for budget-tokens models (opus-4-5, sonnet-4-5, sonnet-4-0, opus-4-1, opus-4-0, haiku-4-5).** Their `low/medium/high` are Sim-defined budget tiers (2048/8192/32768 budget_tokens), not API effort levels — internally consistent with `THINKING_BUDGET_TOKENS` in `core.ts`. Note Opus 4.5 does support the API `effort` param (low/medium/high) per the effort doc, but Sim routes it through budget_tokens (`supportsAdaptiveThinking()` excludes 4.5); that is a code-path choice in `core.ts`, not a models.ts data error, and the level list is valid under either interpretation.
+
+## Open question (d) resolution — thinking levels & temperature boundary
+
+- `xhigh`: Fable 5, Opus 4.8, Opus 4.7 only (effort doc). Repo ✓.
+- `max`: Fable 5, Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6 (effort doc; **not** Opus 4.5 / Sonnet 4.5 / Haiku 4.5). Repo ✓ — including Sonnet 4.6 `max`, verified.
+- Effort default `high` on all supporting models (effort doc: "Setting effort to high produces exactly the same behavior as omitting the parameter"). Repo `default: 'high'` ✓.
+- Temperature boundary: deprecations doc — `temperature`/`top_p`/`top_k` return 400 on **Opus 4.7 and later (incl. Opus 4.8) and Fable 5**; still valid (0.0–1.0, default 1.0 per Messages API) on Opus 4.6, Sonnet 4.6, and everything earlier. Repo: temperature absent exactly on fable-5 / opus-4-8 / opus-4-7, present `{min:0, max:1}` on opus-4-6 / sonnet-4-6 and all older models ✓.
+
+## Unverifiable
+
+- **claude-3-haiku-20240307 pricing, contextWindow (200k), maxOutputTokens (4096):** the model is retired and has been removed from the live pricing/overview pages and OpenRouter. Values match Anthropic's historical published specs; no contradiction found. No change recommended.
+- **Exact cache-write pricing is not modeled** (Sim's schema has only `cachedInput` = cache read). Live docs confirm cache reads = 0.1× input for every current model, matching all `cachedInput` values. 5-min/1-hour write premiums (1.25× / 2×) are not representable in the current schema — noting for completeness, not a defect.
diff --git a/docs/models/azure.md b/docs/models/azure.md
new file mode 100644
index 0000000000..03f5dfd72d
--- /dev/null
+++ b/docs/models/azure.md
@@ -0,0 +1,258 @@
+# Azure OpenAI & Azure Anthropic model validation
+
+**Date:** 2026-06-11
+**Scope:** `azure-openai` block (17 models) and `azure-anthropic` block (5 models) in `apps/sim/providers/models.ts`. Final exhaustive re-validation following PR #4990.
+
+## Method
+
+Every field was checked against live primary sources fetched on 2026-06-11:
+
+1. **Specs (context window, max output, version dates, API support, lifecycle):**
+   - https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure (doc updated 2026-06-05)
+   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning (reasoning effort / verbosity feature matrix, doc updated 2026-06-05)
+   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements (lifecycle policy + gpt-4o dates)
+   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/model-router and .../concepts/model-router
+   - https://learn.microsoft.com/en-us/azure/foundry/foundry-models/how-to/use-foundry-models-claude (doc updated 2026-06-11)
+   - https://platform.claude.com/docs/en/build-with-claude/claude-in-microsoft-foundry
+   - https://platform.claude.com/docs/en/about-claude/pricing
+   - https://platform.claude.com/docs/en/about-claude/models/overview
+   - https://platform.claude.com/docs/en/build-with-claude/structured-outputs
+2. **Azure OpenAI pricing:** Azure Retail Prices API (`https://prices.azure.com/api/retail/prices?$filter=serviceName eq 'Foundry Models' and contains(meterName,'...')`). All quoted prices are the **Global Standard** ("Gl"/"glbl") meters, normalized to USD per 1M tokens. The marketing pricing page times out; the Retail Prices API is authoritative for billed meters.
+3. **Provider implementation:** `apps/sim/providers/azure-openai/index.ts` (API dispatch), `apps/sim/providers/azure-anthropic/index.ts` (Messages API via `@anthropic-ai/sdk` against `{endpoint}/anthropic`).
+
+Sim convention notes: `pricing.cachedInput` = cache-read price; `releaseDate` for `azure/*` entries = the Azure model **version date** (convention set in PR #4990 with gpt-4o → 2024-11-20 and model-router → 2025-05-19).
+
+---
+
+## Block: `azure-openai` (defaultModel: `azure/gpt-4o`)
+
+### azure/gpt-4o
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing.input | 2.5 | Retail API `gpt 4o 1120 Inp glbl` = 0.0025/1K = $2.50/1M | OK |
+| pricing.cachedInput | 1.25 | Retail API `gpt 4o 1120 cached Inp glbl` = 0.00125/1K = $1.25/1M | **OK — VERIFIED** (open question b resolved) |
+| pricing.output | 10.0 | Retail API `gpt 4o 1120 Outp glbl` = 0.01/1K = $10/1M | OK |
+| temperature 0–2 | yes | Standard chat model; reasoning-model parameter restrictions don't apply | OK |
+| maxOutputTokens | **(absent)** | models-sold-directly: gpt-4o (2024-11-20) "Input: 128,000 / Output: 16,384" | **FIX: add `maxOutputTokens: 16384`** |
+| contextWindow | 128000 | same row | OK |
+| releaseDate | 2024-11-20 | Azure version `2024-11-20` (PR #4990 change re-verified) | OK |
+| deprecated | (absent) | model-retirements: versions 2024-05-13 / 2024-08-06 **retired 2026-03-31** (auto-upgraded to gpt-5.1); version 2024-11-20 "retires **2026-10-01**" | **RECOMMEND `deprecated: true`** — firm retirement date within ~3.7 months. NOTE: gpt-4o is the `azure-openai` `defaultModel`; changing the default (e.g. to azure/gpt-5.1 per Azure's own auto-upgrade path) is a product decision — documented only, not assumed. |
+
+### azure/gpt-5.4
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 2.5 / 0.25 / 15.0 | Retail API `5.4 inp Gl` 2.5, `5.4 cd inp Gl` 0.25, `5.4 opt Gl` 15.0 | OK |
+| reasoningEffort | none, low, medium, high | reasoning doc footnote 7 enumerates `'none'` support as exactly: gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.1-codex-mini — **gpt-5.4 family is not listed** | **FIX: drop `'none'`** → `['low','medium','high']` (open question c resolved). PR #4990's removal of `'xhigh'` re-verified correct: footnote 6 — xhigh is gpt-5.1-codex-max only. |
+| verbosity | low, medium, high | reasoning doc "NEW GPT-5 reasoning features": verbosity options low/medium/high for GPT-5 series | OK |
+| maxOutputTokens | 128000 | models-sold-directly: gpt-5.4 (2026-03-05) output 128,000 | OK |
+| contextWindow | 1050000 | same row: 1,050,000 (Input 922,000 / Output 128,000) | OK |
+| releaseDate | 2026-03-05 | Azure version `2026-03-05` | OK |
+
+Pricing limitation: a long-context tier exists (`5.4 longco inp Gl` $5.0 / `longco cd inp Gl` $0.5 / `longco opt Gl` $22.5) for requests beyond the standard context threshold. The flat pricing schema cannot express tiered pricing; standard-tier rates are recorded.
+
+### azure/gpt-5.4-mini
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 0.75 / 0.075 / 4.5 | Retail API `5.4 mini Inp Gl` 0.75, `cd Inp Gl` 0.075, `Opt Gl` 4.5 | OK |
+| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** |
+| verbosity | low, medium, high | GPT-5 series verbosity | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-mini (2026-03-17) 400,000 (272k in / 128k out) | OK |
+| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK |
+
+### azure/gpt-5.4-nano
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 0.2 / 0.02 / 1.25 | Retail API `5.4 nano Inp Gl` 0.2, `cd Inp Gl` 0.02, `Opt Gl` 1.25 | OK |
+| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** |
+| verbosity | low, medium, high | GPT-5 series verbosity | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-nano (2026-03-17) | OK |
+| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK |
+
+### azure/gpt-5.2
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 1.75 / 0.175 / 14.0 | Retail API `GPT 5.2 inp Gl` 1.75, `cd inp Gl` 0.175, `opt Gl` 14.0 | OK |
+| reasoningEffort | none, low, medium, high | footnote 7 explicitly lists gpt-5.2 as supporting `'none'`; `'xhigh'` removal (PR #4990) correct — codex-max only; `'minimal'` correctly absent ("not supported with gpt-5.1 or greater") | OK |
+| verbosity | low, medium, high | GPT-5 series verbosity | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.2 (2025-12-11) | OK |
+| releaseDate | 2025-12-11 | Azure version `2025-12-11` | OK |
+
+### azure/gpt-5.1
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5.1 inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK |
+| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1 (also: `reasoning_effort` defaults to `none` on 5.1); `'minimal'` correctly absent | OK |
+| verbosity | low, medium, high | GPT-5 series verbosity | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1 | OK |
+| releaseDate | 2025-11-12 | Azure version is **2025-11-13** in both the models table and the reasoning feature matrix | **FIX: → 2025-11-13** (per PR #4990's own convention of using the Azure version date, cf. gpt-4o 2024-11-20, model-router 2025-05-19) |
+
+### azure/gpt-5.1-codex
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 1.25 / 0.125 / 10.0 | Retail API `5.1 codex inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK |
+| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1-codex | OK |
+| verbosity | low, medium, high | GPT-5 series | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1-codex | OK |
+| releaseDate | 2025-11-12 | Azure version `2025-11-13` | **FIX: → 2025-11-13** |
+| deprecated | true (PR #4990 stopgap) | See ruling below | **RECOMMEND: KEEP entry, REVERT `deprecated: true`** |
+
+**Ruling on open question (a):** Responses-API-only status **confirmed** — models-sold-directly lists gpt-5.1-codex as "Responses API only", and the reasoning feature matrix shows Chat Completions = not supported. **However, the premise that it "never worked through Sim" is false.** `apps/sim/providers/azure-openai/index.ts` dispatches by endpoint shape: a full chat-completions URL → Chat Completions; a full responses URL → Responses; **the default path (plain resource base URL) constructs `{endpoint}/openai/v1/responses` and calls the Responses API** (lines ~743–765). So gpt-5.1-codex works for any user configured with a base endpoint or responses URL — the majority configuration. Azure itself has not deprecated the model (GA, "Access is no longer restricted"). Therefore: **KEEP the entry and revert `deprecated: true`**. The only genuinely broken configuration is a user-supplied chat-completions endpoint URL; that is an endpoint-configuration limitation, not a model lifecycle state, and `deprecated` (which signals retirement to users) is the wrong tool for it.
+
+### azure/gpt-5 · azure/gpt-5-mini · azure/gpt-5-nano
+
+| Field | gpt-5 | gpt-5-mini | gpt-5-nano | Source / evidence | Verdict |
+| --- | --- | --- | --- | --- | --- |
+| pricing in/cached/out | 1.25 / 0.125 / 10.0 | 0.25 / 0.025 / 2.0 | 0.05 / 0.005 / 0.4 | Retail API `GPT 5 [Mini\|Nano] [Inpt\|cchd Inpt\|outpt] Glbl` — exact matches all three | OK |
+| reasoningEffort | minimal, low, medium, high | same | same | reasoning doc: "`minimal` is only supported with the original GPT-5 reasoning models"; `'none'` correctly absent (not in footnote 7); `'xhigh'` correctly absent | OK |
+| verbosity | low/medium/high | same | same | GPT-5 series | OK |
+| maxOutputTokens / contextWindow | 128000 / 400000 | same | same | models-sold-directly: all three 400,000 (272k/128k) | OK |
+| releaseDate | 2025-08-07 | 2025-08-07 | 2025-08-07 | Azure version `2025-08-07` | OK |
+
+### azure/gpt-5-chat
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| id (deployable name) | `gpt-5-chat` | models-sold-directly lists `gpt-5-chat` (Preview), versions 2025-08-07 and 2025-10-03 — **exact name confirmed**; PR #4990 rename from `gpt-5-chat-latest` re-verified correct. Note: OpenAI's first-party `gpt-5-chat-latest`-style continuously-updated alias maps to a *different* Foundry product (`gpt-chat-latest`, now GPT-5.5 Instant) — our entry correctly tracks the deployable `gpt-5-chat` (open question e resolved) | OK |
+| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5 Chat [Inpt\|cchd Inpt\|outpt] Glbl` = 1.25 / 0.125 / 10.0 | OK |
+| temperature 0–2 | yes | gpt-5-chat is a non-reasoning chat model (temperature restriction applies to gpt-5.1-chat and later, which we do not list) | OK |
+| maxOutputTokens | 16384 | models-sold-directly: 128,000 / **16,384** (PR #4990 addition re-verified) | OK |
+| contextWindow | 128000 | same row | OK |
+| releaseDate | 2025-08-07 | Azure version `2025-08-07` (a `2025-10-03` revision also exists; the original version date is kept) | OK |
+| lifecycle | not marked | **Preview** on Azure. Preview lifecycle = "not sooner than" retirement, force-upgrade or 30-day-notice retirement, "not recommended for production". No retirement date currently announced → no `deprecated` flag warranted | OK (documented) |
+
+### azure/o3 · azure/o4-mini
+
+| Field | o3 | o4-mini | Source / evidence | Verdict |
+| --- | --- | --- | --- | --- |
+| pricing | 2 / 0.5 / 8 | 1.1 / 0.275 / 4.4 | Retail API `o3 0416` 0.002/0.0005/0.008 per 1K; `o4-mini 0416` 0.0011/0.000275/0.0044 per 1K | OK |
+| reasoningEffort | low, medium, high | low, medium, high | reasoning doc: "low, medium, or high for all reasoning models except o1-mini"; o-series matrix has no none/minimal/xhigh | OK |
+| verbosity | (absent) | (absent) | verbosity is a GPT-5-series-only parameter | OK |
+| maxOutputTokens / contextWindow | 100000 / 200000 | 100000 / 200000 | models-sold-directly o-series: Input 200,000 / Output 100,000 | OK |
+| releaseDate | 2025-04-16 | 2025-04-16 | Azure version `2025-04-16` for both | OK |
+
+### azure/gpt-4.1 · azure/gpt-4.1-mini · azure/gpt-4.1-nano
+
+| Field | 4.1 | 4.1-mini | 4.1-nano | Source / evidence | Verdict |
+| --- | --- | --- | --- | --- | --- |
+| pricing | 2.0 / 0.5 / 8.0 | 0.4 / 0.1 / 1.6 | 0.1 / 0.025 / 0.4 | Retail API `gpt 4.1 [mini\|nano] [Inp\|cached Inp\|Outp] glbl` — exact matches all three | OK |
+| temperature 0–2 | yes | yes | yes | non-reasoning models | OK |
+| maxOutputTokens | 32768 | 32768 | 32768 | models-sold-directly: 32,768 | OK |
+| contextWindow | 1047576 | 1047576 | 1047576 | models-sold-directly: 1,047,576 (global standard; lower for regional standard/batch — global is the right representation) | OK |
+| releaseDate | 2025-04-14 | 2025-04-14 | 2025-04-14 | Azure version `2025-04-14` | OK |
+
+### azure/model-router
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 2.0 / 0.5 / 8.0 | No `model-router` meter exists in the Retail Prices API (searched `Router`/`Rtr`/`rtr` under serviceName 'Foundry Models' and productName across all services — only Communication Services "Job Router" exists). Concepts page: "Model router usage is charged for input prompts at the rate listed on the pricing page"; how-to evaluation section: "Account for the **router markup on input tokens** plus the underlying model's input and output pricing." The reported $0.14/1M router markup could not be confirmed from any fetchable source (only the timing-out marketing page carries the number). | **KEEP as documented proxy** (open question d resolved — see below) |
+| capabilities | {} (no reasoningEffort) | Router accepts `reasoning_effort` since version 2025-11-18 and forwards it; but our pinned version semantics are 2025-05-19 (gpt-4.1-family + o4-mini routing, none of which take temperature uniformly — temp/top_p silently dropped for o-series). Empty capabilities is the safest representation | OK |
+| contextWindow | 200000 | models-sold-directly footnote: "Context window: 200,000" — the limit of the smallest underlying model; larger prompts succeed only if routed to a compatible model | OK |
+| maxOutputTokens | (absent) | "max output tokens varies" (16,384–128,000 depending on routed model) — correctly unset | OK |
+| releaseDate | 2025-05-19 | Original version `2025-05-19` confirmed (versions: 2025-05-19, 2025-08-07, 2025-11-18 latest); PR #4990 change re-verified | OK |
+
+**Pricing decision (open question d):** True billing = per-input-token router markup + the routed model's own input/output rates, which varies per request. The flat `{input, cachedInput, output}` schema cannot express this. The current 2.0/0.5/8.0 equals the gpt-4.1 rates — gpt-4.1 is the flagship of the 2025-05-19 routed set (gpt-4.1/-mini/-nano + o4-mini) and sits at the cost ceiling of that set alongside o3-class o4-mini rates, so it is a conservative (slightly pessimistic) proxy for cost estimation. **Keep 2.0/0.5/8.0.** This is a documented schema limitation, not a verified Azure price; cost estimates for model-router workloads in Sim are approximations.
+
+---
+
+## Block: `azure-anthropic` (defaultModel: `azure-anthropic/claude-sonnet-4-5`)
+
+Pricing basis: platform.claude.com Claude-in-Microsoft-Foundry doc — "Pricing for Claude in the Microsoft Marketplace uses Anthropic's standard API pricing." So azure-anthropic pricing == Anthropic first-party pricing (open question f, pricing half, resolved). `cachedInput` maps to Anthropic "Cache Hits & Refreshes" (0.1× input). All five models are **(preview)** on Foundry; Foundry "follows the Claude API lifecycle schedule".
+
+### azure-anthropic/claude-opus-4-6
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.6 $5 in / $0.50 cache read / $25 out | OK |
+| contextWindow | 1000000 | MS Foundry Claude doc: opus-4-6 "1M / 128K"; Anthropic Foundry doc: "Claude Fable 5, Claude Opus 4.7, Claude Opus 4.6, and Claude Sonnet 4.6 have a 1M-token context window on Microsoft Foundry"; Anthropic models overview: 1M. PR #4990 change re-verified. Long context is at **standard pricing** (Anthropic long-context pricing section), so no tiered-pricing concern | OK |
+| maxOutputTokens | 128000 | both MS and Anthropic sources: 128K | OK |
+| thinking levels | low, medium, high, max (default high) | MS Foundry Claude doc: effort supports low/medium/high, "also max for Opus 4.8, Opus 4.7, **Opus 4.6**, and Sonnet 4.6" | OK |
+| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.6 supported (GA) | OK |
+| temperature 0–1 | yes | Anthropic Messages API range | OK |
+| releaseDate | 2026-02-05 | Not stated in any fetched doc (dateless model ID). Consistent with Opus 4.6 launch timeframe (early Feb 2026); convention = announcement date | Unverifiable (plausible, kept) |
+
+### azure-anthropic/claude-opus-4-5
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.5 $5 / $0.50 / $25 | OK |
+| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview 200k / 64k | OK |
+| thinking | low, medium, high | extended thinking; `max` effort not supported on 4.5-generation | OK |
+| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.5 supported | OK |
+| releaseDate | 2025-11-24 | Anthropic launch date (snapshot ID claude-opus-4-5-20251101; announcement 2025-11-24 — announcement-date convention) | OK |
+
+### azure-anthropic/claude-sonnet-4-5
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 3.0 / 0.3 / 15.0 | Anthropic pricing: Sonnet 4.5 $3 / $0.30 / $15 | OK |
+| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview. Note: the Sonnet 4.5 **1M-context beta** on Foundry retires after 2026-04-30 (already past) — 200000 is correct | OK |
+| thinking | low, medium, high | extended thinking | OK |
+| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Sonnet 4.5 supported | OK |
+| releaseDate | 2025-09-29 | snapshot claude-sonnet-4-5-20250929 | OK |
+
+### azure-anthropic/claude-opus-4-1
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 15.0 / 1.5 / 75.0 | Anthropic pricing: Opus 4.1 $15 / $1.50 / $75 | OK |
+| contextWindow / maxOutputTokens | 200000 / 32000 | MS doc "200K / 32K"; Anthropic overview 200k / 32k | OK |
+| thinking | low, medium, high | extended thinking | OK |
+| nativeStructuredOutputs | **true** | Anthropic structured-outputs doc supported-model list **excludes Opus 4.1** (Fable 5, Mythos 5/Preview, Opus 4.8/4.7/4.6/4.5, Sonnet 4.6/4.5, Haiku 4.5 only). The first-party `anthropic` block's `claude-opus-4-1` entry correctly omits it (models.ts ~line 762). With this flag set, Sim sends the `structured-outputs-2025-11-13` beta header and `output_format` to a model that doesn't support it | **FIX: remove `nativeStructuredOutputs`** |
+| deprecated | true | Anthropic Foundry doc model table: "Claude Opus 4.1 — Deprecated. **Retiring August 5, 2026**"; Anthropic pricing page marks it deprecated. PR #4990 change re-verified correct | OK |
+| releaseDate | 2025-08-05 | snapshot claude-opus-4-1-20250805 | OK |
+
+### azure-anthropic/claude-haiku-4-5
+
+| Field | Current value | Source / evidence | Verdict |
+| --- | --- | --- | --- |
+| pricing | 1.0 / 0.1 / 5.0 | Anthropic pricing: Haiku 4.5 $1 / $0.10 / $5 | OK |
+| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview | OK |
+| thinking | low, medium, high | extended thinking | OK |
+| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Haiku 4.5 supported | OK |
+| releaseDate | 2025-10-15 | Anthropic launch date (snapshot claude-haiku-4-5-20251001; announcement 2025-10-15 — announcement-date convention) | OK |
+
+---
+
+## Changes made in PR #4990 — re-verification results
+
+| PR #4990 change | Verdict |
+| --- | --- |
+| Drop `'xhigh'` from azure/gpt-5.4, 5.4-mini, 5.4-nano, gpt-5.2 | **Correct** — `xhigh` is gpt-5.1-codex-max only (reasoning doc footnote 6) |
+| `deprecated: true` on azure/gpt-5.1-codex | **Premise partially wrong** — Responses-API-only confirmed, but Sim's azure provider defaults to the Responses API; recommend reverting (see entry) |
+| `deprecated: true` on azure-anthropic/claude-opus-4-1 | **Correct** — retiring 2026-08-05 |
+| Rename azure/gpt-5-chat-latest → azure/gpt-5-chat + maxOutputTokens 16384 | **Correct** |
+| azure/gpt-4o releaseDate → 2024-11-20 | **Correct** |
+| azure/model-router releaseDate → 2025-05-19 | **Correct** |
+| azure-anthropic/claude-opus-4-6 contextWindow → 1000000 | **Correct** |
+| updatedAt bumps to 2026-06-11 | OK (azure/model-router still 2026-04-01; acceptable since its pricing is an unverifiable proxy) |
+
+## Recommended fixes from this pass (not applied — doc only)
+
+1. `azure/gpt-5.4`, `azure/gpt-5.4-mini`, `azure/gpt-5.4-nano`: reasoningEffort drop `'none'` → `['low','medium','high']` (reasoning doc footnote 7 enumerates 'none' support and excludes the 5.4 family).
+2. `azure/gpt-4o`: add `maxOutputTokens: 16384`.
+3. `azure/gpt-4o`: add `deprecated: true` (retires 2026-10-01). **Product caveat:** it is the block's `defaultModel`; the default-model change is a product decision, not made here.
+4. `azure/gpt-5.1` and `azure/gpt-5.1-codex`: releaseDate `2025-11-12` → `2025-11-13` (Azure version date convention).
+5. `azure/gpt-5.1-codex`: **KEEP entry; revert `deprecated: true`** (works through Sim's default Responses-API path; Azure lifecycle is GA, not deprecated).
+6. `azure-anthropic/claude-opus-4-1`: remove `nativeStructuredOutputs: true` (unsupported model; matches first-party anthropic entry).
+
+## Deliberately not changed
+
+- **azure/model-router pricing 2.0/0.5/8.0** — kept as a documented gpt-4.1-rate proxy; real billing (input-token router markup + routed model rates) is unrepresentable in the flat pricing schema, and no router meter exists in the Retail Prices API to anchor a different number.
+- **azure/gpt-5-chat Preview status** — no `deprecated` flag: Preview models have no announced retirement; flagging would misrepresent lifecycle.
+- **gpt-5.4 long-context pricing tier** (5.0/0.5/22.5 "longco" meters) — schema cannot express tiered pricing; standard-tier rates kept.
+- **gpt-4.1 contextWindow 1,047,576** — global-standard figure kept although regional standard (300,000) and batch (128,000) deployments are lower; Sim assumes global standard.
+- **azure-anthropic releaseDates using announcement dates** (opus-4-5 2025-11-24, haiku-4-5 2025-10-15) rather than snapshot dates (20251101, 20251001) — consistent existing convention across the file.
+- **Missing newer models** (out of scope, noted for follow-up): Azure now offers `gpt-5.5` (GA, 2026-04-24, 1.05M ctx), `gpt-chat-latest`, `gpt-5.4-pro`, `gpt-5.3-codex`/`gpt-5.3-chat`, `gpt-5.2-codex`/`gpt-5.2-chat`; Foundry Claude now offers `claude-fable-5`, `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6` (1M ctx GA).
+
+## Unverifiable
+
+- **model-router pricing** — no retail meter; the $0.14/1M router-markup figure appears only on the timing-out marketing pricing page and could not be confirmed.
+- **azure-anthropic/claude-opus-4-6 releaseDate 2026-02-05** — no fetched source states the launch date (dateless model ID); plausible and consistent with Opus 4.6-era documentation, kept as-is.
+- **Azure-side rate-limit/quota values** — not modeled in the schema; not validated.
diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md
new file mode 100644
index 0000000000..eff34fc335
--- /dev/null
+++ b/docs/models/bedrock.md
@@ -0,0 +1,226 @@
+# Bedrock provider validation — `apps/sim/providers/models.ts`
+
+- **Date:** 2026-06-11 (final exhaustive pass; re-verifies PR #4990)
+- **Scope:** all 32 `bedrock/*` model entries
+- **Method:** every fact below traced to a live source fetched today:
+  - **AWS Pricing API** (authoritative for token prices): `https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonBedrock/current/us-east-1/index.json` (1.37 MB, Last-Modified 2026-06-11) and the `us-west-2` offer file. Prices are per 1K tokens in the offer; converted ×1000 to per-1M below. Claude 4.x, Cohere, and Mistral Large 24.11 have **no SKUs** in the Pricing API (marketplace-billed / absent).
+  - **AWS model cards:** `docs.aws.amazon.com/bedrock/latest/userguide/model-card-<provider>-<model>.html` (authoritative for geo/global inference IDs, context window, max output, lifecycle, prompt caching).
+  - **Lifecycle:** `docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html` (Legacy/EOL table).
+  - **Anthropic:** `platform.claude.com/docs/en/about-claude/pricing` and `.../models/overview` (Claude prices, cache rates, max output, Bedrock geo premium).
+  - **AWS what's-new** for the Nova Premier GA date and Nova 2 announcements.
+
+---
+
+## GEO-PROFILE TABLE (deliverable for `getBedrockInferenceProfileId`)
+
+Source: each model card's Programmatic Access table ("Geo inference ID" / "Global inference ID" columns). `geo` = inference profile required/available (the bare ID is generally **not** invokable on-demand for these, except where noted); `bare` = card lists "Not supported" for both Geo and Global — must invoke with the plain model ID.
+
+| model id suffix | verdict | profiles on card |
+|---|---|---|
+| anthropic.claude-opus-4-5-20251101-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` + `global.` (no apac/au/jp) |
+| anthropic.claude-sonnet-4-5-20250929-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`) |
+| anthropic.claude-haiku-4-5-20251001-v1:0 | **geo** (REQUIRED in most regions) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`; in-region only us-east-1/eu-north-1/eu-west-1/ap-northeast-1/ap-southeast-4) |
+| anthropic.claude-opus-4-1-20250805-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported |
+| amazon.nova-2-pro-v1:0 | **unknown** (no card; ID does not exist on Bedrock — real preview ID is `amazon.nova-2-pro-preview-20251202-v1:0`, served via geo/global profiles per cloudprice `apac.amazon.nova-2-pro-preview-…`) |
+| amazon.nova-2-lite-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `jp.` + `global.` (no `apac.`) |
+| amazon.nova-premier-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported |
+| amazon.nova-pro-v1:0 | **geo** | `us.`, `eu.` (no apac/global; in-region exists in us-east-1 and a few others) |
+| amazon.nova-lite-v1:0 | **geo** | `us.`, `eu.` (no apac/global) |
+| amazon.nova-micro-v1:0 | **geo** | `us.`, `eu.` (no apac/global) |
+| meta.llama4-maverick-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
+| meta.llama4-scout-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
+| meta.llama3-3-70b-instruct-v1:0 | **geo** | `us.` only |
+| meta.llama3-2-90b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
+| meta.llama3-2-11b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
+| meta.llama3-2-3b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
+| meta.llama3-2-1b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
+| meta.llama3-1-405b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
+| meta.llama3-1-70b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
+| meta.llama3-1-8b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
+| mistral.mistral-large-3-675b-instruct | **bare** | Geo: Not supported; Global: Not supported (in-region, 11 regions) |
+| mistral.mistral-large-2411-v1:0 | **bare** (phantom — see below; the Mistral Large card covers only `mistral-large-2402-v1:0`, bare) |
+| mistral.mistral-large-2407-v1:0 | **bare** (no card; on-demand SKUs exist in us-west-2; the 2402 card shows Geo/Global Not supported — same family, in-region only) |
+| mistral.pixtral-large-2502-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
+| mistral.magistral-small-2509 | **bare** | Geo: Not supported; Global: Not supported |
+| mistral.ministral-3-14b-instruct | **bare** | Geo: Not supported; Global: Not supported |
+| mistral.ministral-3-8b-instruct | **bare** | Geo: Not supported; Global: Not supported |
+| mistral.ministral-3-3b-instruct | **bare** | Geo: Not supported; Global: Not supported (card "Ministral 3B" confirms this exact ID) |
+| mistral.mixtral-8x7b-instruct-v0:1 | **bare** | Geo: Not supported; Global: Not supported |
+| amazon.titan-text-premier-v1:0 | **bare** | model card removed from docs; historically in-region only, never had inference profiles |
+| cohere.command-r-v1:0 | **bare** | card: Geo Not supported; Global Not supported |
+| cohere.command-r-plus-v1:0 | **bare** | card: Geo Not supported; Global Not supported |
+
+Implications for `apps/sim/providers/bedrock/utils.ts` (`getBedrockInferenceProfileId`):
+
+1. All `mistral.*` IDs **except** `mistral.pixtral-large-2502-v1:0`, all `cohere.*` IDs, and `amazon.titan-text-premier-v1:0` must be passed through **unprefixed**. Today the function prefixes everything → `ValidationException` for these 10 models.
+2. The blanket `ap-*/me-* → apac` mapping is wrong for every model in this list: **no bedrock-provider model has an `apac.` profile**. Claude Sonnet/Haiku 4.5 use `au.`/`jp.` (or `global.`); Nova 2 Lite has `jp.`; everything else is `us.`/`eu.` only.
+3. `eu.` is only valid for: claude opus/sonnet/haiku 4.5, nova-2-lite, nova pro/lite/micro, llama3-2-3b/1b, pixtral-large. For the rest (opus-4-1, nova-premier, all other llamas) only `us.` exists — an `eu-*` region request currently produces a nonexistent `eu.` profile ID.
+
+---
+
+## Per-model verification
+
+Prices are USD per 1M tokens, **standard on-demand, us-east-1** (us-west-2 where us-east-1 has no SKU). "Pricing API" = the offer file above, fetched 2026-06-11.
+
+### Anthropic (no Pricing API SKUs — verified against Anthropic pricing page; Bedrock bills Anthropic list prices)
+
+| model | field | repo | verified | source | verdict |
+|---|---|---|---|---|---|
+| claude-opus-4-5 | input/output | 5 / 25 | 5 / 25 | Anthropic pricing | OK |
+| | cachedInput | — | 0.50 (0.1× input; Bedrock card: caching Yes, min 4096 tok) | Anthropic pricing + card | **ADD** |
+| | maxOutputTokens | 64000 | 64K | card + Anthropic overview | OK |
+| | contextWindow | 200000 | 200K | card | OK |
+| | releaseDate | 2025-11-24 | Nov 24 2025 | card | OK |
+| claude-sonnet-4-5 | input/output | 3 / 15 | 3 / 15 | Anthropic pricing | OK |
+| | cachedInput | — | 0.30 | Anthropic pricing + card (caching Yes) | **ADD** |
+| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK |
+| | releaseDate | 2025-09-29 | card says Sep 30 2025; Anthropic launch Sep 29 2025 | keep repo (matches upstream launch) |
+| | recommended | — | provider default model | models.ts convention | **ADD `recommended: true`** |
+| claude-haiku-4-5 | input/output | 1 / 5 | 1 / 5 | Anthropic pricing | OK |
+| | cachedInput | — | 0.10 | Anthropic pricing + card (caching Yes) | **ADD** |
+| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK |
+| | releaseDate | 2025-10-15 | card says Oct 16 2025; Anthropic launch Oct 15 2025 | keep repo |
+| | speedOptimized | — | "the fastest model with near-frontier intelligence" | Anthropic overview | **ADD `speedOptimized: true`** |
+| claude-opus-4-1 | input/output | 15 / 75 | 15 / 75 | Anthropic pricing | OK |
+| | cachedInput | — | 1.50 | Anthropic pricing + card (caching Yes, 5m TTL only) | **ADD** |
+| | maxOutputTokens | 32768 | **32K = 32000** (card "32K"; Anthropic overview "32k tokens") | **FIX 32768 → 32000** (32768 would exceed the documented cap) |
+| | ctx / releaseDate / lifecycle | 200000 / 2025-08-05 / active | 200K / Aug 05 2025 / Active on Bedrock (deprecated on first-party API, retire 2026-08-05 — Bedrock lifecycle independent) | OK |
+
+**Geo premium (open question d):** Anthropic's pricing page states regional/multi-region endpoints carry a **10% premium over global** for Sonnet 4.5, Haiku 4.5, Opus 4.5 "and all future models" (earlier models keep existing pricing). Sim always builds geo profiles, so real spend on these three is 1.1× the table values. **Decision: keep base prices and document** — (a) the Pricing API exposes no Claude SKUs to anchor a geo-specific number, (b) repo convention is provider list price, (c) baking 1.1× would overbill if/when the provider routes `global.`. Revisit if Sim adds `global.` routing.
+
+### Amazon Nova (Pricing API us-east-1)
+
+| model | field | repo | verified | verdict |
+|---|---|---|---|---|
+| nova-2-pro | input/output | 1.0 / 4.0 | **1.375 / 11.0** (`USE1-Nova2.0Pro-text-input-tokens` 0.001375, `-text-output-tokens` 0.011; global cross-region 1.25/10.0) | **FIX**. Note: cloudprice lists 2.19/17.50 for an apac preview profile — AWS Pricing API wins |
+| | identity | `amazon.nova-2-pro-v1:0` | no model card; not in catalog; real ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview, Nova Forge early access, per AWS re:Invent 2025 what's-new + cloudprice/getmaxim) | entry is a **phantom ID**; `deprecated: true` (PR #4990) keeps it hidden — acceptable; longer-term remove or migrate to the preview ID |
+| nova-2-lite | input/output | 0.08 / 0.32 | **0.33 / 2.75** (`USE1-Nova2.0Lite-input-tokens` 0.00033, `-output-tokens` 0.00275) | **FIX** — resolves open question (a): repo was wrong AND the secondaries' 0.30/2.50 is the *global cross-region* price (`-cross-region-global` SKUs), not the geo/in-region price Sim pays |
+| | cachedInput | — | **0.0825** (`-cache-read-input-token-count` 0.0000825; cache write $0) | **ADD** |
+| | maxOutputTokens | — | 64K (card) | **ADD 64000** |
+| | ctx / releaseDate / lifecycle | 1000000 / 2025-12-02 / active | 1M / Dec 02 2025 / Active; geo us/eu/jp + global | OK |
+| nova-premier | input/output | 2.5 / 12.5 | 2.50 / 12.50 (`USE1-NovaPremier-*`) | OK (PR #4990 fix confirmed) |
+| | cachedInput | — | 0.625 (`-cache-read` 0.000625) | **ADD** (model is Legacy but still billable until EOL 2026-09-14) |
+| | deprecated | true | Legacy 2026-03-13, EOL 2026-09-14 (lifecycle page + card) | OK |
+| | maxOutputTokens | — | 25K (card) | skip per instruction (deprecated); documented only |
+| | releaseDate | 2025-04-30 | GA announced Apr 30 2025 (aws.amazon.com what's-new 2025/04 "Amazon Nova Premier… generally available"); card shows "Oct 31 2025" which conflicts with AWS's own GA announcement and the lifecycle history — treated as a card-metadata anomaly | **keep 2025-04-30** |
+| nova-pro | input/output | 0.8 / 3.2 | 0.80 / 3.20 | OK (question b resolved) |
+| | cachedInput | — | 0.20 | **ADD** |
+| | maxOutputTokens | — | 5K (card) | **ADD 5120** (Nova "5K" cap; trackers/openrouter report 5,120) |
+| | ctx | 300000 | 300K | OK; releaseDate repo 2024-12-03 (re:Invent announce) vs card Dec 05 2024 — keep repo, documented |
+| nova-lite | input/output | 0.06 / 0.24 | 0.06 / 0.24 | OK |
+| | cachedInput | — | 0.015 | **ADD** |
+| | maxOutputTokens | — | 5K | **ADD 5120** |
+| nova-micro | input/output | 0.035 / 0.14 | 0.035 / 0.14 | OK |
+| | cachedInput | — | 0.00875 | **ADD** |
+| | maxOutputTokens | — | 5K | **ADD 5120** |
+| | speedOptimized | — | card: "Amazon's fastest text-only model, optimized for speed and low cost" | **ADD `speedOptimized: true`** |
+
+### Meta (Pricing API; all cards report max output 4K for 3.x, 8K for Llama 4)
+
+| model | field | repo | verified | verdict |
+|---|---|---|---|---|
+| llama4-maverick | input/output | 0.24 / 0.97 | 0.24 / 0.97 | OK |
+| | maxOutputTokens | — | 8K (card) | **ADD 8192** |
+| | ctx / date / lifecycle | 1M / 2025-04-05 / active | 1M / Apr 05 2025 / Active | OK |
+| llama4-scout | input/output | 0.18 / 0.72 | **0.17 / 0.66** (`USE1-Llama4-Scout-17B-*` 0.00017 / 0.00066) | **FIX** |
+| | maxOutputTokens | — | 8K | **ADD 8192** |
+| | ctx | 10000000 | 10M (card) | OK (PR #4990 fix confirmed) |
+| llama3-3-70b | input/output | 0.72 / 0.72 | 0.72 / 0.72 | OK |
+| | lifecycle | active | **Active** (card; absent from Legacy table) — question (g) | OK |
+| | maxOutputTokens | — | 4K | **ADD 4096** |
+| llama3-2-90b | input/output | 2.0 / 2.0 | **0.72 / 0.72** (`USE1-Llama3-2-90B-*`) | **FIX** (deprecated but still billable until EOL 2026-07-07) |
+| | deprecated | true | Legacy, EOL Jul 7 2026 | OK |
+| llama3-2-11b | input/output | 0.16 / 0.16 | 0.16 / 0.16; Legacy EOL 2026-07-07 | OK |
+| llama3-2-3b | input/output | 0.15 / 0.15 | 0.15 / 0.15; Legacy | OK |
+| llama3-2-1b | input/output | 0.10 / 0.10 | 0.10 / 0.10; Legacy | OK |
+| llama3-1-405b | input/output | 5.32 / 16.0 | **2.40 / 2.40** (`USW2-Llama3-1-405B-*` 0.0024; us-east-1 has only batch SKUs at 1.20) | **FIX** (deprecated, Legacy EOL 2026-07-07, but price was ~5× off) |
+| llama3-1-70b | input/output | 2.65 / 3.5 | **0.72 / 0.72** (`USE1-Llama3-1-70B-*`; the 2.65 figure resembles no AWS SKU — latency-optimized variant is a separate SKU) | **FIX** |
+| | lifecycle | active | **Active** (card) — question (g) | OK |
+| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** |
+| llama3-1-8b | input/output | 0.3 / 0.6 | **0.22 / 0.22** (`USE1-Llama3-1-8B-*`) | **FIX** |
+| | lifecycle | active | **Active** (card) | OK |
+| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** |
+
+### Mistral AI (Pricing API + cards)
+
+| model | field | repo | verified | verdict |
+|---|---|---|---|---|
+| mistral-large-3-675b | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-Mistral-Large-3-675b-Instruct-*`) | OK (PR #4990 confirmed) |
+| | ctx / maxOutput | 256000 / 32768 | 256K / 32K (card) | OK |
+| | releaseDate | — | Dec 2 2025 (card) | **ADD 2025-12-02** |
+| | caching | — | card: prompt caching **Yes** (bedrock-runtime), but no cache-read SKU in Pricing API → rate unpublishable | no `cachedInput` (documented) |
+| mistral-large-2411 | input/output | 2.0 / 6.0 | **UNVERIFIABLE — model appears not to exist on Bedrock**: no model card (Mistral card index has only "Mistral Large" = 2402 and "Mistral Large 3"), no Pricing API SKU in us-east-1 or us-west-2, not in lifecycle table | keep price; entry is already `deprecated: true` (hidden); recommend follow-up removal |
+| mistral-large-2407 | input/output | 4.0 / 12.0 | **2.00 / 6.00** (`USW2-MistralLarge2407-*` 0.002/0.006; us-west-2 only). The 4/12 figure belongs to *Mistral Large 2402* (`USE1-MistralLarge-*` = 0.004/0.012) — repo had the two swapped | **FIX** (deprecated but billable) |
+| pixtral-large-2502 | input/output | 2.0 / 6.0 | 2.00 / 6.00 (`USE1-PixtralLarge2502-*`) | OK (question b resolved) |
+| | ctx / maxOutput / lifecycle | 128000 / 16384 / active | 128K / 16K / Active | OK |
+| magistral-small-2509 | input/output | 0.5 / 1.5 | 0.50 / 1.50 | OK |
+| | ctx / maxOutput / lifecycle | 128000 / 40000 / active | 128K / 40K / Active (card launch "Sep 2025", no day — no releaseDate added) | OK |
+| ministral-3-14b | input/output | 0.2 / 0.2 | 0.20 / 0.20 (`USE1-Ministral-3-14b-Instruct-*`) | OK |
+| | maxOutput / releaseDate | 8192 / — | 8K / Dec 2 2025 | **ADD 2025-12-02** |
+| | caching | — | card shows no prompt-caching row → unconfirmed | no `cachedInput` |
+| ministral-3-8b | input/output | 0.1 / 0.1 | **0.15 / 0.15** (`USE1-Ministral-3-8b-Instruct-*` 0.00015) | **FIX**; **ADD releaseDate 2025-12-02** |
+| ministral-3-3b | input/output | 0.04 / 0.04 | **0.10 / 0.10** (`USE1-Ministral-3-3b-Instruct-*` 0.0001) | **FIX**; **ADD releaseDate 2025-12-02** (card "Ministral 3B" confirms ID `mistral.ministral-3-3b-instruct`, 128K ctx, 8K out, Active) |
+| mixtral-8x7b | input/output | 0.45 / 0.7 | 0.45 / 0.70 (`USE1-Mixtral8x7B-*`) | OK (question b resolved) |
+| | ctx / lifecycle | 32000 / active | 32K / Active | OK |
+| | maxOutputTokens | — | 4K (card) | **ADD 4096** |
+
+### Amazon Titan / Cohere
+
+| model | field | repo | verified | verdict |
+|---|---|---|---|---|
+| titan-text-premier | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-TitanText-Premier-*`, attribute `titanModel: "Titan Text G1 Premier"`) | OK |
+| | deprecated | true | model card **removed** from the model-cards index (only Titan embeddings/image cards remain); absent from the Legacy table (which excludes models already past EOL) | OK — keep deprecated |
+| cohere command-r | input/output | 0.5 / 1.5 | not in Pricing API (marketplace-billed); matches long-standing AWS list price | UNVERIFIABLE via Pricing API — keep |
+| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 (lifecycle + card) | OK |
+| cohere command-r-plus | input/output | 3.0 / 15.0 | not in Pricing API; matches long-standing AWS list price | UNVERIFIABLE — keep |
+| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 | OK |
+
+---
+
+## Changes made in this pass (fix list for models.ts — to be applied by the follow-up code change)
+
+Pricing (all `updatedAt` → `2026-06-11`):
+
+1. `bedrock/amazon.nova-2-pro-v1:0`: input 1.0 → 1.375, output 4.0 → 11.0 (Pricing API `USE1-Nova2.0Pro-text-*`)
+2. `bedrock/amazon.nova-2-lite-v1:0`: input 0.08 → 0.33, output 0.32 → 2.75 (Pricing API `USE1-Nova2.0Lite-*`)
+3. `bedrock/meta.llama4-scout-17b-instruct-v1:0`: input 0.18 → 0.17, output 0.72 → 0.66
+4. `bedrock/meta.llama3-2-90b-instruct-v1:0`: 2.0/2.0 → 0.72/0.72
+5. `bedrock/meta.llama3-1-405b-instruct-v1:0`: 5.32/16.0 → 2.40/2.40 (USW2 on-demand)
+6. `bedrock/meta.llama3-1-70b-instruct-v1:0`: 2.65/3.5 → 0.72/0.72
+7. `bedrock/meta.llama3-1-8b-instruct-v1:0`: 0.3/0.6 → 0.22/0.22
+8. `bedrock/mistral.mistral-large-2407-v1:0`: 4.0/12.0 → 2.0/6.0 (USW2 `MistralLarge2407`)
+9. `bedrock/mistral.ministral-3-8b-instruct`: 0.1/0.1 → 0.15/0.15
+10. `bedrock/mistral.ministral-3-3b-instruct`: 0.04/0.04 → 0.10/0.10
+
+cachedInput additions (cache-read rate):
+
+11. claude-opus-4-5: 0.5; claude-sonnet-4-5: 0.3; claude-haiku-4-5: 0.1; claude-opus-4-1: 1.5 (Anthropic pricing 0.1× input; Bedrock cards confirm caching)
+12. nova-2-lite: 0.0825; nova-premier: 0.625; nova-pro: 0.2; nova-lite: 0.015; nova-micro: 0.00875 (Pricing API cache-read SKUs; Nova cache writes are $0)
+
+maxOutputTokens:
+
+13. claude-opus-4-1: 32768 → 32000 (Anthropic overview "32k"; Bedrock card "32K")
+14. nova-2-lite: add 64000; nova-pro/lite/micro: add 5120 each
+15. llama4-maverick/scout: add 8192 each; llama3-3-70b, llama3-1-70b, llama3-1-8b: add 4096 each; mixtral-8x7b: add 4096
+
+Flags / metadata:
+
+16. claude-sonnet-4-5: add `recommended: true` (bedrock default model; matches other providers' convention)
+17. claude-haiku-4-5 and nova-micro: add `speedOptimized: true` (Anthropic "fastest model"; card "Amazon's fastest text-only model"). Ruled **against** `speedOptimized` on nova-2-lite — its card positions it as cost-efficient multimodal, not the speed tier.
+18. releaseDate additions: mistral-large-3 `2025-12-02`; ministral-3-14b/8b/3b `2025-12-02`; llama3-1-70b/8b `2024-07-23`
+
+## Deliberately not changed
+
+- **Claude 4.5-gen geo premium (q. d):** kept base list prices; Sim's geo-profile routing actually bills 1.1× for opus/sonnet/haiku 4.5 per Anthropic's pricing page. Documented here rather than baked in (no AWS SKU to anchor; would overstate global-endpoint cost; consistent with list-price convention).
+- **Release-date nits (q. h):** sonnet-4-5 `2025-09-29` and haiku-4-5 `2025-10-15` kept (Anthropic launch dates; Bedrock cards say +1 day). nova pro/lite/micro `2024-12-03` kept (re:Invent announcement; cards say Dec 05). nova-premier `2025-04-30` kept — AWS what's-new confirms GA Apr 30 2025; the card's "Oct 31 2025" contradicts AWS's own announcement.
+- **Deprecated models' maxOutputTokens** (nova-premier 25K, llama3-2 4K, command-r/r+ 4K, mistral-large-2407 4K): per instruction, not added.
+- **All deprecated flags from PR #4990 re-verified correct:** nova-premier, llama3-2 ×4, llama3-1-405b, command-r/r+ (Legacy with EOL dates on the lifecycle page), titan-text-premier (card removed from catalog), mistral-large-2411/2407 (absent from catalog). llama3-1-70b/8b and llama3-3-70b confirmed **Active** — correctly not deprecated.
+- **mistral-large-3 / magistral / ministral-14b `cachedInput`:** Large 3 card says caching is supported but no cache-read SKU exists in the Pricing API; ministral-14b card shows no caching row. No invented numbers.
+- **`bedrock/amazon.nova-2-pro-v1:0` and `bedrock/mistral.mistral-large-2411-v1:0` entries kept** (both `deprecated: true`, hidden): the former's real Bedrock ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview), the latter appears to have never shipped on Bedrock. Recommend a follow-up PR to remove/rename — out of scope for a validation pass.
+
+## Unverifiable
+
+- **cohere.command-r-v1:0 / command-r-plus-v1:0 prices** (0.5/1.5, 3/15): absent from the Pricing API (marketplace-billed); match the long-standing published AWS rates; models are Legacy. Kept as-is.
+- **mistral-large-2411 price** (2/6): no SKU, no card; phantom entry (see above).
+- **nova-2-pro geo-profile support**: no card; preview ID served via profiles per third-party trackers only.
+- **Mistral Large 3 cache-read rate**: caching supported per card; rate unpublished.
diff --git a/docs/models/deepseek-cerebras.md b/docs/models/deepseek-cerebras.md
new file mode 100644
index 0000000000..33f9927459
--- /dev/null
+++ b/docs/models/deepseek-cerebras.md
@@ -0,0 +1,189 @@
+# Model Validation: `deepseek` & `cerebras` — apps/sim/providers/models.ts
+
+- **Date:** 2026-06-11
+- **Scope:** Final exhaustive re-validation after PR #4990 (deepseek-chat/reasoner repricing + 1M ctx, deprecation flags on deepseek-v3/r1 and cerebras llama3.1-8b/qwen-3-235b)
+- **Method:** Live WebFetch of provider docs (primary), OpenRouter/ArtificialAnalysis/aggregators (secondary), DeepSeek news archive for release dates, `rg` of provider code to confirm capability consumption. Provider docs win on conflicts.
+
+## Sources
+
+| Source | URL |
+|---|---|
+| DeepSeek pricing (primary) | https://api-docs.deepseek.com/quick_start/pricing |
+| DeepSeek list-models (primary) | https://api-docs.deepseek.com/api/list-models |
+| DeepSeek chat-completion API ref (primary) | https://api-docs.deepseek.com/api/create-chat-completion |
+| DeepSeek reasoning guide (primary) | https://api-docs.deepseek.com/guides/reasoning_model |
+| DeepSeek V3 announcement | https://api-docs.deepseek.com/news/news1226 |
+| DeepSeek R1 announcement | https://api-docs.deepseek.com/news/news250120 |
+| DeepSeek V4 preview announcement | https://api-docs.deepseek.com/news/news260424 |
+| Cerebras models overview (primary) | https://inference-docs.cerebras.ai/models/overview |
+| Cerebras gpt-oss model page (primary) | https://inference-docs.cerebras.ai/models/openai-oss |
+| Cerebras zai-glm-4.7 model page (primary) | https://inference-docs.cerebras.ai/models/zai-glm-47 |
+| Cerebras deprecations (primary) | https://inference-docs.cerebras.ai/support/deprecation |
+| Cerebras chat-completions API ref (primary) | https://inference-docs.cerebras.ai/api-reference/chat-completions |
+| OpenRouter deepseek-v4-flash (secondary) | https://openrouter.ai/deepseek/deepseek-v4-flash |
+| OpenRouter GLM 4.7 (secondary) | https://openrouter.ai/z-ai/glm-4.7 |
+| ArtificialAnalysis gpt-oss-120b providers (secondary) | https://artificialanalysis.ai/models/gpt-oss-120b/providers |
+| aimodelapis Cerebras GLM-4.7 (secondary) | https://aimodelapis.com/providers/cerebras/cerebras-zai-glm-4-7 |
+| Cerebras GLM-4.7 launch blog (secondary) | https://www.cerebras.ai/blog/glm-4-7 |
+
+## Code-consumption checks
+
+- `rg "temperature" apps/sim/providers/deepseek/ apps/sim/providers/cerebras/`:
+  - `deepseek/index.ts:89` — `if (request.temperature !== undefined) payload.temperature = request.temperature`
+  - `cerebras/index.ts:85` — `if (request.temperature !== undefined) payload.temperature = request.temperature`
+  - Both providers forward temperature when set; a `temperature` capability in models.ts is what surfaces the slider (`getMaxTempFromDefinitions` in `providers/utils.ts`). With `capabilities: {}` the slider is hidden even though the API accepts the param.
+- No `reasoningEffort`, `verbosity`, `thinking`, `nativeStructuredOutputs`, or `computerUse` handling exists in either provider implementation — do **not** add those capabilities even though Cerebras documents `reasoning_effort` (not consumed by code).
+- `maxOutputTokens` is a supported capability field (`models.ts:42`) consumed by `providers/index.ts` — safe to recommend.
+
+---
+
+## DeepSeek
+
+### Alias status (Open Question a)
+
+**Confirmed.** DeepSeek pricing page: "The model names `deepseek-chat` and `deepseek-reasoner` will be deprecated on **2026/07/24 15:59 UTC**." They correspond to the **non-thinking** and **thinking** modes of `deepseek-v4-flash` respectively. The list-models API now returns only `deepseek-v4-flash` and `deepseek-v4-pro`. Until 2026-07-24 the aliases remain valid API ids, so keeping them non-deprecated in models.ts is correct **for now** — they must be flipped to `deprecated: true` (or removed) by 2026-07-24.
+
+**Recommendation (separate work, not part of this pass):** add `deepseek-v4-flash` (input $0.14 / cached $0.0028 / output $0.28, ctx 1M, max output 384K, released 2026-04-24) and `deepseek-v4-pro` (input $0.435 / cached $0.003625 / output $0.87, ctx 1M, max output 384K) as first-class entries before the 2026-07-24 alias retirement, then deprecate the aliases.
+
+### deepseek-chat
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `deepseek-chat` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash non-thinking) | pricing page | OK |
+| pricing.input | 0.14 | $0.14/M (cache miss) | pricing page | OK |
+| pricing.cachedInput | 0.0028 | $0.0028/M (cache hit) | pricing page | OK |
+| pricing.output | 0.28 | $0.28/M | pricing page | OK |
+| pricing.updatedAt | 2026-06-11 | — | — | OK |
+| contextWindow | 1000000 | 1M tokens | pricing page | OK |
+| capabilities.temperature | *(absent)* | Supported, range 0–2, default 1 ("What sampling temperature to use, between 0 and 2…") — applies to non-thinking mode | create-chat-completion API ref | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `deepseek/index.ts:89` consumes it) |
+| capabilities.maxOutputTokens | *(unset)* | Conflict: pricing page says 384K max output for v4-flash; reasoning guide (thinking mode) says default 32K / max 64K | pricing page vs reasoning guide | Leave unset — see "Deliberately not changed" |
+| releaseDate | 2024-12-26 | V3 announcement 2024-12-26 (date the alias pointed to V3); alias now points to v4-flash (released 2026-04-24) | news1226, news260424 | OK (alias semantics — keep original anchor) |
+| deprecated | *(absent)* | Alias still live | pricing page | OK until 2026-07-24 |
+
+### deepseek-v3
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `deepseek-v3` | **Not** a valid API id (list-models returns only v4-flash/v4-pro; never a documented API id — API ids were deepseek-chat/reasoner) | list-models | OK as `deprecated: true` |
+| deprecated | true | Correct | list-models | OK |
+| pricing | 0.28 / 0.028 / 0.42 (updatedAt 2026-04-01) | Historical V3.x pricing; model unpurchasable, frozen values acceptable | — | OK (legacy) |
+| contextWindow | 128000 | Historical 128K | — | OK (legacy) |
+| releaseDate | 2024-12-26 | DeepSeek-V3 announced 2024-12-26 | news1226 | **Verified** |
+
+### deepseek-r1
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `deepseek-r1` | **Not** a valid API id (R1 was accessed as `deepseek-reasoner`) | list-models, news250120 | OK as `deprecated: true` |
+| deprecated | true | Correct | list-models | OK |
+| pricing | 0.55 / 0.14 / 2.19 | Matches original R1 launch pricing ($0.14 hit / $0.55 miss / $2.19 out) | news250120 | **Verified** (legacy, frozen) |
+| contextWindow | 128000 | Historical | — | OK (legacy) |
+| releaseDate | 2025-01-20 | R1 announced 2025-01-20 | news250120 | **Verified** |
+
+### deepseek-reasoner
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `deepseek-reasoner` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash thinking) | pricing page | OK |
+| pricing.input / cachedInput / output | 0.14 / 0.0028 / 0.28 | $0.14 / $0.0028 / $0.28 (same v4-flash pricing, both modes) | pricing page | OK |
+| pricing.updatedAt | 2026-06-11 | — | — | OK |
+| contextWindow | 1000000 | 1M | pricing page | OK |
+| capabilities | `{}` (no temperature) | Reasoning guide: `temperature`, `top_p`, `presence_penalty`, `frequency_penalty`, `logprobs`, `top_logprobs` **not supported** — "will not trigger an error but will also have no effect" | reasoning guide | OK — must NOT add temperature |
+| capabilities.maxOutputTokens | *(unset)* | Conflict (384K vs 32K/64K) | see below | Leave unset |
+| releaseDate | 2025-01-20 | `model=deepseek-reasoner` introduced with R1 release 2025-01-20 | news250120 ("Use DeepSeek-R1 by setting model=deepseek-reasoner") | **Verified** |
+
+### maxOutputTokens conflict (Open Question a)
+
+- Pricing page (current, v4-flash): **384K max output**.
+- Reasoning guide (deepseek-reasoner page): **default 32K, max 64K** — appears not yet updated for V4 (still reflects R1-era limits).
+- The aliases map to v4-flash modes, so 384K is *probably* correct, but DeepSeek's own docs disagree with each other and the reasoning guide is the page specific to `deepseek-reasoner`. **Resolution: leave `maxOutputTokens` unset on both aliases** (current state) and set 384000 on the future `deepseek-v4-flash`/`deepseek-v4-pro` entries, where the pricing page is unambiguous.
+
+### Secondary-source pricing (DeepSeek)
+
+OpenRouter lists deepseek-v4-flash at **$0.098 in / $0.196 out** — exactly 70% of official $0.14/$0.28, i.e. the OpenRouter **−30% promo is still present**. Per policy, provider docs win: $0.14 / $0.0028 / $0.28 stands. OpenRouter confirms 1M context and the 2026-04-24 release date.
+
+---
+
+## Cerebras
+
+### Deprecations (confirmed)
+
+Cerebras deprecation page lists **llama3.1-8b** and **qwen-3-235b-a22b-instruct-2507** as deprecated **2026-05-27**, recommended replacement "GPT OSS 120B". Neither appears on the models overview anymore. `deprecated: true` on both entries (PR #4990) is correct.
+
+### cerebras/gpt-oss-120b
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `gpt-oss-120b` (after `cerebras/` strip at `cerebras/index.ts:82`) | Production model | models overview, model page | OK |
+| pricing.input | 0.35 | $0.35/M | model page (live 2026-06-11) | OK |
+| pricing.output | 0.75 | $0.75/M | model page | OK |
+| pricing.updatedAt | 2026-06-11 | — | — | OK |
+| contextWindow | 131072 | 131k (paid tiers; free tier 65k) | model page | OK (paid tier, consistent with repo convention) |
+| capabilities.maxOutputTokens | *(unset)* | 40k paid tiers (32k free) | model page | **FIX: add `maxOutputTokens: 40000`** (paid tier, matching paid-tier ctx) |
+| capabilities.temperature | *(absent)* | Cerebras chat-completions API: "sampling temperature to use, between 0 and 2.0" | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `cerebras/index.ts:85` consumes it) |
+| releaseDate | 2025-08-05 | gpt-oss released 2025-08-05; Cerebras day-one launch | cerebras.ai blog "OpenAI GPT OSS 120B Runs Fastest on Cerebras", techintelpro | **Verified** |
+
+Secondary-source note: several aggregators (crackedaiengineering, ArtificialAnalysis blended $0.39) still show launch-era pricing **$0.25/$0.69** and 33K max output. The live Cerebras model page (fetched today) says $0.35/$0.75 and 40k paid-tier max output — provider docs win; aggregators are stale.
+
+### cerebras/llama3.1-8b
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** |
+| pricing | 0.10 / 0.10 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) |
+| contextWindow | 32768 | Historical | — | OK (legacy) |
+| releaseDate | 2024-08-27 | Consistent with Cerebras Inference launch (2024-08-27); not re-verified against a live page this pass | — | Plausible / not re-verified (deprecated model, low stakes) |
+
+### cerebras/qwen-3-235b-a22b-instruct-2507
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** |
+| pricing | 0.6 / 1.2 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) |
+| contextWindow | 131072 | Historical | — | OK (legacy) |
+| releaseDate | 2025-07-29 | Could not verify the exact Cerebras availability date | — | **Unverifiable** (deprecated model; leave as-is) |
+
+### cerebras/zai-glm-4.7
+
+| Field | Current value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| id valid | `zai-glm-4.7` | Preview model on overview | models overview, model page | OK |
+| pricing.input | 2.25 | $2.25/M | model page; confirmed by aimodelapis (secondary) | OK |
+| pricing.output | 2.75 | $2.75/M | model page; aimodelapis | OK |
+| pricing.updatedAt | 2026-06-11 | — | — | OK |
+| contextWindow | 131072 | 131k paid tiers (free 64k) | model page; aimodelapis (131,000) | OK |
+| capabilities.maxOutputTokens | *(unset)* | 40k tokens (both tiers) | model page; aimodelapis (40,000) | **FIX: add `maxOutputTokens: 40000`** |
+| capabilities.temperature | *(absent)* | API-wide param, 0–2.0 | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** |
+| releaseDate | 2025-12-22 | GLM-4.7 released 2025-12-22 (OpenRouter "Dec 22, 2025"; PR Newswire; Cerebras same-day launch blog) | multiple | **Verified** |
+
+---
+
+## Changes made in this pass (PR #4990) — all re-verified correct
+
+1. `deepseek-chat` & `deepseek-reasoner` repriced to $0.14 / $0.0028 cached / $0.28 — matches v4-flash pricing they now alias. ✅
+2. `deepseek-chat` & `deepseek-reasoner` contextWindow → 1,000,000 — matches v4-flash 1M default. ✅
+3. `deprecated: true` on `deepseek-v3` and `deepseek-r1` — neither is a valid API id (list-models returns only v4-flash/v4-pro). ✅
+4. `deprecated: true` on `cerebras/llama3.1-8b` and `cerebras/qwen-3-235b-a22b-instruct-2507` — Cerebras deprecation page, 2026-05-27. ✅
+5. `pricing.updatedAt: 2026-06-11` bumps on the four live-model entries. ✅
+
+## Outstanding fixes recommended (not applied — doc-only pass)
+
+1. `deepseek-chat`: add `capabilities.temperature: { min: 0, max: 2 }` — API ref documents temperature 0–2 (default 1) for chat completions; non-thinking mode honors it; `deepseek/index.ts:89` forwards it. Currently the empty `capabilities` hides Sim's temperature slider for a model that supports it.
+2. `cerebras/gpt-oss-120b`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`.
+3. `cerebras/zai-glm-4.7`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`.
+
+## Deliberately not changed
+
+- **`deepseek-reasoner` capabilities stay `{}`** — reasoning guide explicitly lists temperature as unsupported/no-effect in thinking mode.
+- **`deepseek-chat`/`deepseek-reasoner` not marked deprecated** — valid aliases until 2026-07-24 15:59 UTC. Calendar item: deprecate (and add v4-flash/v4-pro entries) before that date.
+- **`maxOutputTokens` left unset on both DeepSeek aliases** — DeepSeek docs self-conflict (pricing page: 384K for v4-flash; reasoning guide: 32K default / 64K max for deepseek-reasoner). Set 384000 only on future first-class `deepseek-v4-*` entries where the pricing page is unambiguous.
+- **Legacy pricing/ctx on the four deprecated entries** (deepseek-v3, deepseek-r1, llama3.1-8b, qwen-3-235b) — frozen historical values on unpurchasable models; R1 values cross-checked against the original announcement.
+- **No `reasoningEffort` capability for Cerebras** despite the model pages documenting `reasoning_effort` — `cerebras/index.ts` does not consume it (capability additions must be backed by docs AND code).
+- **OpenRouter −30% DeepSeek promo pricing ($0.098/$0.196) ignored** — provider docs win.
+- **deepseek-chat releaseDate kept at 2024-12-26** — anchor is the V3 announcement; the id predates V3 and now aliases v4-flash (2026-04-24); any value is a judgment call for an alias, so the existing anchor is retained.
+
+## Unverifiable
+
+- `cerebras/qwen-3-235b-a22b-instruct-2507` releaseDate 2025-07-29 — no live source found for the exact Cerebras availability date (model delisted). Left as-is.
+- `cerebras/llama3.1-8b` releaseDate 2024-08-27 — consistent with the known Cerebras Inference launch date but not re-verified against a live page this pass (model delisted).
+- Cerebras temperature **default** value — API ref documents the 0–2.0 range but not a default.
diff --git a/docs/models/embeddings-rerank-dynamic.md b/docs/models/embeddings-rerank-dynamic.md
new file mode 100644
index 0000000000..46ef6b6967
--- /dev/null
+++ b/docs/models/embeddings-rerank-dynamic.md
@@ -0,0 +1,75 @@
+# Validation: EMBEDDING_MODEL_PRICING, RERANK_MODEL_PRICING, and dynamic providers
+
+- **Date:** 2026-06-11
+- **File validated:** `apps/sim/providers/models.ts` (`EMBEDDING_MODEL_PRICING` ~L3289, `RERANK_MODEL_PRICING` ~L3320, dynamic provider definitions ~L87–191, L2503–2515, update functions ~L3190–3287)
+- **Method:** Every numeric claim checked via live WebFetch against the provider's first-party docs, with at least one secondary tracker where available. WebSearch used as fallback when a page truncated. No edits were made to `models.ts`.
+- **Primary sources:**
+  - OpenAI: `developers.openai.com/api/docs/models/text-embedding-3-small` / `.../text-embedding-3-large` / `.../text-embedding-ada-002` (the aggregate pricing page truncates before the embeddings table; per-model pages carry the prices)
+  - Google: `ai.google.dev/gemini-api/docs/pricing`
+  - Cohere: `cohere.com/pricing` (Model Vault only — per-search API pricing not rendered), `docs.cohere.com/docs/how-does-cohere-pricing-work` (confirms rerank is billed per search, no numbers), `docs.cohere.com/docs/rerank` (model list)
+  - Secondary trackers: Vercel AI Gateway (`vercel.com/ai-gateway/models/rerank-v4-pro`, `.../rerank-v4-fast`), eesel.ai Cohere pricing guide, metacto.com Cohere pricing deep dive, cloudprice.net, TokenMix/costgoat (OpenAI embeddings)
+  - Provider API docs: `docs.fireworks.ai/api-reference/post-chatcompletions`, `docs.together.ai/reference/chat-completions`, `openrouter.ai/docs` parameters reference, `docs.ollama.com/api/openai-compatibility`, `docs.baseten.co/development/model-apis/overview`
+
+## EMBEDDING_MODEL_PRICING
+
+| Entry | Field | Value in code | Verified value | Source | Verdict |
+|---|---|---|---|---|---|
+| `text-embedding-3-small` | input | $0.02 / 1M | $0.02 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT |
+| `text-embedding-3-small` | output | $0.00 | n/a (embeddings bill input only) | OpenAI docs | CORRECT |
+| `text-embedding-3-large` | input | $0.13 / 1M | $0.13 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT |
+| `text-embedding-3-large` | output | $0.00 | n/a | OpenAI docs | CORRECT |
+| `text-embedding-ada-002` | input | $0.10 / 1M | $0.10 / 1M | developers.openai.com model page; search secondary | CORRECT |
+| `text-embedding-ada-002` | output | $0.00 | n/a | OpenAI docs | CORRECT |
+| `gemini-embedding-001` | input | $0.15 / 1M | $0.15 / 1M (paid tier, standard; batch is $0.075) | ai.google.dev/gemini-api/docs/pricing | CORRECT |
+| `gemini-embedding-001` | output | $0.00 | n/a | Google docs | CORRECT |
+
+## RERANK_MODEL_PRICING (per search unit = 1 query × ≤100 docs)
+
+| Entry | Value in code | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| `rerank-v4.0-pro` | $0.0025 / search | $2.50 / 1k searches ($0.0025) | Vercel AI Gateway rerank-v4-pro page ("$2.5/K, billed per search query"); eesel.ai ("$0.0025 / search") | CORRECT |
+| `rerank-v4.0-fast` | $0.002 / search | $2.00 / 1k searches ($0.002) | Vercel AI Gateway rerank-v4-fast page ("$2/K"); eesel.ai ("$0.002 / search") | CORRECT |
+| `rerank-v3.5` | $0.002 / search | $2.00 / 1k searches ($0.002) Cohere direct & Bedrock | metacto ("$2.00 per 1,000 searches"); cloudprice.net ($0.0020/unit, Cohere + Bedrock rows agree) | CORRECT |
+
+Notes:
+
+- `cohere.com/pricing` currently only renders Model Vault (dedicated instance) hourly pricing; the per-search API table is JS-rendered and not fetchable. `docs.cohere.com/docs/how-does-cohere-pricing-work` confirms rerank is "priced based on the quantity of searches" (per-search, not per-token), which validates the `perSearchUnit` modeling and the ≤100-doc cap comment in the code.
+- Conflicting source resolved: OpenRouter lists `cohere/rerank-v3.5` at $0.001/search, but that is OpenRouter's reseller price, not Cohere first-party. Sim calls Cohere directly, so $0.002 stands.
+- Cohere also offers `rerank-english-v3.0` and `rerank-multilingual-v3.0`; Sim does not expose them, so no entries are needed.
+
+## Dynamic providers (provider-level config sanity pass)
+
+All eight have empty static `models: []` populated at runtime via `update*Models()` (pricing zeroed, `updatedAt` set to today — intentional for BYOK/reseller providers). `modelPatterns` prefixes match each provider's `update*` function and prefix-stripping in the provider implementations.
+
+| Provider | Config checked | Verdict |
+|---|---|---|
+| `fireworks` | temp 0–2, toolUsageControl true, pattern `/^fireworks\//` | CORRECT — Fireworks docs: temperature "between 0 and 2", full `tool_choice` support (`none`/`auto`/`required`/named) |
+| `together` | temp 0–2, toolUsageControl true, pattern `/^together\//` | **DISCREPANCY** — Together's own API reference documents temperature as "a decimal number from 0-1"; `tool_choice` supported. Sim declares max 2. Flagged below; not changed in this pass |
+| `baseten` | temp 0–2, toolUsageControl true, pattern `/^baseten\//` | SANE — Model APIs are OpenAI-compatible (docs.baseten.co); exact temp bounds not published, 0–2 follows the OpenAI convention |
+| `openrouter` | temp 0–2, toolUsageControl true, pattern `/^openrouter\//` | CORRECT — OpenRouter docs: temperature 0.0–2.0, default 1.0 |
+| `ollama-cloud` | temp 0–2, toolUsageControl **true**, pattern `/^ollama-cloud\//` | **QUESTIONABLE** — Ollama's OpenAI-compat layer (same API at `ollama.com/v1`) explicitly lists `tool_choice` as unsupported, and Sim's own shared core (`apps/sim/providers/ollama/core.ts:140-147`) degrades forced tool selection to `auto` with a warning. Local `ollama` correctly sets `toolUsageControl: false`; `ollama-cloud: true` is inconsistent. Flagged below; not changed in this pass |
+| `vllm` | temp 0–2, toolUsageControl true, `defaultModel: 'vllm/generic'`, pattern `/^vllm\//` | SANE — vLLM's OpenAI-compatible server accepts temperature ≥0 (no hard cap of 2); 0–2 is a reasonable UI cap. `vllm/generic` matches the pattern and is the documented placeholder (only other reference is the vllm provider test) |
+| `litellm` | temp 0–2, toolUsageControl true, pattern `/^litellm\//` | SANE — proxy passthrough; effective bounds depend on the upstream model, 0–2 is the OpenAI-convention cap |
+| `ollama` (local) | toolUsageControl false ("does not support tool_choice"), no temp block, `modelPatterns: []` | CORRECT — docs.ollama.com OpenAI-compatibility page lists `tool_choice` as unsupported (temperature is supported); empty patterns are intentional since local model names are arbitrary and matched via the providers store |
+
+## `gemini` vs `google` provider key
+
+- `PROVIDER_DEFINITIONS` contains only `google` (L1303, `defaultModel: 'gemini-2.5-pro'`, patterns `/^gemini/`, `/^deep-research/`). There is no `gemini` registry key, and nothing calls `getProviderModels('gemini')` — all callers use `'google'` (models.ts L3163, `apps/sim/providers/google/index.ts:21`).
+- `apps/sim/providers/gemini/` exists but is **not a provider**: it holds only `core.ts`/`types.ts` (shared Gemini execution logic consumed by both the `google` and `vertex` providers). No `index.ts`, not registered in `registry.ts`.
+- The only `'gemini'` string key is the rotating-API-key namespace: `apps/sim/providers/utils.ts:891` maps provider `google` → `getRotatingApiKey('gemini')`, matching the `GEMINI_API_KEY_*` env convention in `apps/sim/lib/core/config/api-keys.ts`. Intentional; nothing structurally odd.
+
+## Changes made in this pass
+
+None. All `EMBEDDING_MODEL_PRICING` and `RERANK_MODEL_PRICING` values verified correct; instructions prohibited edits to `models.ts`.
+
+## Deliberately not changed
+
+- **`together` temperature max 2 vs documented 0–1:** Together's API reference documents 0–1, but the endpoint is OpenAI-compatible and tolerantly accepts higher values in practice; tightening to `max: 1` would change UI slider behavior for existing workflows. Left for a deliberate follow-up decision.
+- **`ollama-cloud` `toolUsageControl: true`:** inconsistent with local `ollama: false` and with Ollama's documented lack of `tool_choice`. Runtime is already safe (shared core degrades forced selection to `auto` with a warning), so this only mis-advertises a capability in the UI. Left for follow-up.
+- Dynamic-model zero pricing (`input: 0, output: 0`) in all `update*Models()` functions — intentional for BYOK/reseller providers where Sim doesn't bill model usage.
+
+## Unverifiable
+
+- **Cohere first-party per-search price page:** `cohere.com/pricing`'s API pricing table does not render server-side; per-search numbers were confirmed via two independent secondary trackers per model plus Cohere docs confirming the per-search billing unit.
+- **Baseten and LiteLLM exact temperature bounds:** neither publishes a numeric range (OpenAI-compatible passthrough); 0–2 judged sane by convention rather than verified.
+- **vLLM upper temperature bound:** vLLM accepts temperatures above 2; the 0–2 cap is a UI choice, not a provider-documented limit.
diff --git a/docs/models/google.md b/docs/models/google.md
new file mode 100644
index 0000000000..215ea82402
--- /dev/null
+++ b/docs/models/google.md
@@ -0,0 +1,184 @@
+# Google Provider Model Validation — Final Pass
+
+- **Date:** 2026-06-11
+- **Scope:** `google` block in `apps/sim/providers/models.ts` (10 models), re-verifying everything including changes landed in PR #4990
+- **Method:** Live WebFetch of ai.google.dev (models overview, per-model pages, pricing, thinking, deprecations, changelog, generate-content API reference) and cloud.google.com Vertex AI pricing; OpenRouter as secondary pricing source; WebSearch for GA dates. Google docs treated as authoritative where sources conflict.
+- **Primary sources:**
+  - https://ai.google.dev/gemini-api/docs/models (+ per-model pages)
+  - https://ai.google.dev/gemini-api/docs/pricing
+  - https://ai.google.dev/gemini-api/docs/thinking
+  - https://ai.google.dev/gemini-api/docs/deprecations
+  - https://ai.google.dev/gemini-api/docs/changelog
+  - https://ai.google.dev/gemini-api/docs/interactions/deep-research
+  - https://ai.google.dev/api/generate-content (GenerationConfig)
+  - https://cloud.google.com/vertex-ai/generative-ai/pricing ("Gemini Deep Research Agent" row)
+  - OpenRouter model pages (secondary pricing)
+
+## Provider-level checks
+
+| Check | Result |
+|---|---|
+| Capability consumption in `apps/sim/providers/gemini/` | Only `thinking` is consumed: `request.thinkingLevel` → `mapToThinkingLevel` → `thinkingConfig` (`gemini/core.ts:955-961`). No references to `reasoningEffort`, `verbosity`, `nativeStructuredOutputs`, or `computerUse`. Declaring `thinking.levels`/`default` per model is the only capability surface that affects requests. |
+| `temperature: { min: 0, max: 2 }` | **Verified.** GenerationConfig documents temperature range [0.0, 2.0] (https://ai.google.dev/api/generate-content). Note Google recommends keeping 1.0 default on Gemini 3 models, but 0–2 is the accepted API range. Verdict: correct on all entries. |
+| 2.5-series entries have no `thinking` capability | **Correct by design.** Gemini 2.5 uses `thinkingBudget`, not `thinkingLevel` (https://ai.google.dev/gemini-api/docs/thinking). Our provider only sends `thinkingConfig` when a level is selected, so omitting `thinking` on 2.5 entries is right. |
+
+## Per-model verification
+
+### gemini-3.5-flash
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| id | `gemini-3.5-flash` (stable/GA) | docs/models, model page | OK |
+| pricing.input | 1.5 | docs/pricing ($1.50); Vertex ($1.50 global); OpenRouter ($1.50) | OK |
+| pricing.cachedInput | 0.15 | docs/pricing ($0.15); Vertex ($0.15) | OK |
+| pricing.output | 9.0 | docs/pricing ($9.00); Vertex ($9.00); OpenRouter ($9.00) | OK |
+| thinking.levels | minimal/low/medium/high | docs/thinking | OK |
+| thinking.default | medium | docs/thinking ("Default: medium"); OpenRouter ("defaults to medium thinking effort") | OK |
+| maxOutputTokens | 65536 | model page (65,536) | OK |
+| contextWindow | 1048576 | model page (1,048,576) | OK |
+| releaseDate | 2026-05-19 | changelog: "May 19, 2026 — Released `gemini-3.5-flash`, the generally available (GA) version" | OK |
+| recommended | true | Google's flagship recommendation; replacement target for 2.0-flash and 3-flash-preview | OK |
+
+### gemini-3.1-pro-preview
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| id | `gemini-3.1-pro-preview` | docs/models, model page | OK |
+| pricing.input | 2.0 | docs/pricing ($2.00 ≤200k; $4.00 >200k); OpenRouter ($2) | OK (base tier; see "Deliberately not changed") |
+| pricing.cachedInput | 0.2 | docs/pricing ($0.20 ≤200k) | OK |
+| pricing.output | 12.0 | docs/pricing ($12.00 ≤200k; $18.00 >200k); OpenRouter ($12) | OK |
+| thinking.levels | low/medium/high (no minimal — PR #4990 change) | docs/thinking: "Supported levels: low, medium, high"; "Thinking cannot be disabled" | OK — #4990 change re-confirmed |
+| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK |
+| maxOutputTokens | 65536 | model page | OK |
+| contextWindow | 1048576 | model page (1,048,576) | OK |
+| releaseDate | 2026-02-19 | changelog: "Feb 19, 2026 — Released Gemini 3.1 Pro Preview" | OK |
+
+### gemini-3.1-flash-lite
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| id | `gemini-3.1-flash-lite` (stable — PR #4990 rename) | docs/models lists stable; `gemini-3.1-flash-lite-preview` marked "Shut down" (May 25, 2026 per deprecations) | OK — rename re-confirmed |
+| pricing.input | 0.25 | docs/pricing ($0.25 text); Vertex ($0.25 global); OpenRouter ($0.25) | OK |
+| pricing.cachedInput | 0.025 | docs/pricing ($0.025); Vertex ($0.025) | OK |
+| pricing.output | 1.5 | docs/pricing ($1.50); Vertex ($1.50); OpenRouter ($1.50) | OK |
+| thinking.levels | minimal/low/medium/high | docs/thinking; OpenRouter ("full thinking levels (minimal, low, medium, high)") | OK |
+| thinking.default | minimal | docs/thinking: "Default: minimal" — Google's documented API default for this model **is** `minimal`, so our value matches the API default (the earlier report that the API default is 'high' is not supported by current docs). Also aligns with our cost-saving intent. | OK |
+| maxOutputTokens | 65536 | model page (65,536) | OK |
+| contextWindow | 1048576 | model page (1,048,576) | OK |
+| releaseDate | **2026-03-03 — STALE.** That is the preview's release date. GA changelog: "May 7, 2026 — Released `gemini-3.1-flash-lite`, the generally available (GA) version"; Google Cloud blog GA announcement published 2026-05-08. Changelog (Gemini API source of truth) wins. | changelog; cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | **FIX → 2026-05-07** |
+| speedOptimized | (absent) | Model page: "optimized for low-latency, cost-effective" high-volume tasks; Google blog: "fastest and most cost-efficient Gemini 3 series model". Precedent: `gemini-2.5-flash-lite` carries `speedOptimized: true` and Google's models page calls 2.5-flash-lite "the fastest and most budget-friendly" of its generation — 3.1-flash-lite holds the same position in the Gemini 3 generation. | **FIX → add `speedOptimized: true`** |
+
+### gemini-3-flash-preview
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| id | `gemini-3-flash-preview` | docs/models, model page | OK |
+| pricing.input | 0.5 | docs/pricing ($0.50 text); OpenRouter ($0.50) | OK |
+| pricing.cachedInput | 0.05 | docs/pricing ($0.05) | OK |
+| pricing.output | 3.0 | docs/pricing ($3.00); OpenRouter ($3.00) | OK |
+| thinking.levels | minimal/low/medium/high | docs/thinking | OK |
+| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK |
+| maxOutputTokens | 65536 | model page | OK |
+| contextWindow | 1048576 (PR #4990 change) | model page (1,048,576); OpenRouter (1M) | OK — #4990 change re-confirmed |
+| releaseDate | 2025-12-17 | changelog: "Dec 17, 2025 — Launched Gemini 3 Flash Preview"; OpenRouter | OK |
+| deprecated | (absent) | docs/deprecations lists `gemini-3-flash-preview` in the deprecation table with recommended replacement `gemini-3.5-flash`, **no shutdown date announced yet**. (The model's own page still renders as an active preview — the deprecations table is the authoritative lifecycle source.) | **FIX → add `deprecated: true`** |
+
+### gemini-2.5-pro
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| pricing.input | 1.25 | docs/pricing ($1.25 ≤200k); OpenRouter ($1.25) | OK (base tier) |
+| pricing.cachedInput | 0.125 | docs/pricing ($0.125 ≤200k) | OK |
+| pricing.output | 10.0 | docs/pricing ($10.00 ≤200k); OpenRouter ($10) | OK |
+| maxOutputTokens | 65536 | longstanding model-page value | OK |
+| contextWindow | 1048576 | OpenRouter (1M); longstanding model-page value | OK |
+| releaseDate | 2025-03-25 | preview launch date (GA was 2025-06-17); repo convention uses first availability | OK |
+
+### gemini-2.5-flash
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| pricing.input | 0.3 | docs/pricing ($0.30 text) | OK |
+| pricing.cachedInput | 0.03 | docs/pricing ($0.03) | OK |
+| pricing.output | 2.5 | docs/pricing ($2.50) | OK |
+| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK |
+| releaseDate | 2025-05-20 | I/O 2025 preview launch | OK |
+
+### gemini-2.5-flash-lite
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| pricing.input | 0.1 | docs/pricing ($0.10 text) | OK |
+| pricing.cachedInput | 0.01 | docs/pricing ($0.01) | OK |
+| pricing.output | 0.4 | docs/pricing ($0.40) | OK |
+| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK |
+| releaseDate | 2025-06-17 | launch announcement | OK |
+| speedOptimized | true | docs/models: "fastest and most budget-friendly multimodal model" | OK |
+
+### gemini-2.0-flash (deprecated)
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026; changelog: "now shut down"; docs/pricing marks "(deprecated; shutdown June 1, 2026)". Replacement: gemini-3.5-flash. | OK — #4990 change re-confirmed. Entry retained intentionally for saved-workflow history. |
+| pricing | input 0.1 / cachedInput 0.025 / output 0.4 | docs/pricing (still published) | OK |
+| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK |
+| releaseDate | 2025-02-05 | GA announcement | OK |
+
+### gemini-2.0-flash-lite (deprecated)
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026. Replacement: gemini-3.1-flash-lite. | OK — re-confirmed; retained for history |
+| pricing | input 0.075 / output 0.3 (no cachedInput — caching was never priced for this SKU) | docs/pricing | OK |
+| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK |
+| releaseDate | 2025-02-25 | GA announcement | OK |
+
+### deep-research-pro-preview-12-2025
+
+| Field | Our value | Source | Verdict |
+|---|---|---|---|
+| id | `deep-research-pro-preview-12-2025` | model page https://ai.google.dev/gemini-api/docs/models/deep-research-pro-preview-12-2025 (Interactions API) | OK |
+| pricing.input | 2.0 (PR #4990) | Vertex AI pricing, "Gemini Deep Research Agent": $2/1M input | OK — re-confirmed |
+| pricing.cachedInput | 0.2 (PR #4990) | Vertex AI pricing: $0.2/1M cached input | OK — re-confirmed |
+| pricing.output | 12.0 (PR #4990, was 2.0) | Vertex AI pricing: $12/1M output (response and reasoning). Consistent with underlying Gemini 3.1 Pro rates ($2/$0.2/$12). | OK — re-confirmed |
+| capabilities | deepResearch: true, memory: false | model page (agentic researcher; Interactions API) | OK |
+| maxOutputTokens | 65536 | model page (65,536) | OK |
+| contextWindow | 1048576 (PR #4990) | model page (1,048,576) | OK — re-confirmed |
+| releaseDate | 2025-12-11 | model page only says "December 2025"; exact day not published in fetched docs | Unverifiable to the day; month consistent — keep |
+| Lifecycle | Not listed on docs/deprecations; no shutdown announced | docs/deprecations | OK to keep |
+
+**Recommendation (documented only, no entries added):** Google introduced `deep-research-preview-04-2026` and `deep-research-max-preview-04-2026` on 2026-04-21 (changelog; https://ai.google.dev/gemini-api/docs/interactions/deep-research). The Deep Research interactions doc now leads with these SKUs and prices them per-task (~$1–3 / ~$3–7). A follow-up should evaluate adding them once per-token pricing is published; `deep-research-pro-preview-12-2025` remains documented and un-deprecated, so no change now.
+
+## Changes made in this pass
+
+None to `models.ts` (per task rules — fix list reported separately). This document is the only artifact.
+
+## Re-confirmed PR #4990 changes
+
+1. `gemini-3.1-flash-lite-preview` → `gemini-3.1-flash-lite` rename — preview slug shut down 2026-05-25 (deprecations page); stable listed on docs/models.
+2. `gemini-3.1-pro-preview` thinking.levels without `minimal` — docs/thinking lists low/medium/high only; "thinking cannot be disabled".
+3. `gemini-3-flash-preview` contextWindow 1048576 — model page.
+4. `deprecated: true` on gemini-2.0-flash and gemini-2.0-flash-lite — shut down 2026-06-01 (deprecations + changelog).
+5. Deep Research output 12.0, cachedInput 0.2, contextWindow 1048576 — Vertex pricing row + model page.
+
+## Recommended fixes (not applied)
+
+1. `gemini-3.1-flash-lite`: `releaseDate` `2026-03-03` → `2026-05-07` — current value is the preview's release date; GA released May 7, 2026 per Gemini API changelog (Cloud blog announcement published May 8, 2026; changelog wins as the API source of truth).
+2. `gemini-3.1-flash-lite`: add `speedOptimized: true` — Google positions it as the fastest, most cost-efficient Gemini 3 model (model page, GA blog); matches the precedent set by `gemini-2.5-flash-lite`.
+3. `gemini-3-flash-preview`: add `deprecated: true` — formally listed on https://ai.google.dev/gemini-api/docs/deprecations with replacement `gemini-3.5-flash` (no shutdown date announced yet).
+
+## Deliberately not changed
+
+- **`gemini-3.1-flash-lite` thinking.default `minimal`** — matches Google's documented default for this model (docs/thinking: "Default: minimal") and is also our intended cost-saving default. No conflict.
+- **Tiered pricing (`gemini-3.1-pro-preview`, `gemini-2.5-pro`)** — we model the ≤200k-token base tier ($2/$12 and $1.25/$10). The >200k tier ($4/$18 and $2.50/$15) is not representable in the flat pricing schema; base tier is the established repo convention.
+- **Audio input pricing** (flash models have higher audio-input rates, e.g. 3.1-flash-lite $0.50 audio) — schema models text-input pricing only; convention.
+- **gemini-2.0-flash / -flash-lite entries kept despite shutdown** — `deprecated: true` retained instead of deletion so saved workflows referencing them keep rendering history correctly.
+- **Deep Research newer SKUs not added** — per-task preview pricing only; documented as a follow-up recommendation above.
+- **`gemini-2.5-pro` releaseDate 2025-03-25** — preview-launch date; repo convention is first availability, not GA (2025-06-17).
+- **`updatedAt: 2026-06-11`** on all entries — accurate as of this validation.
+
+## Unverifiable
+
+- **deep-research-pro-preview-12-2025 exact release day (2025-12-11)** — Google docs only state "December 2025"; the day-level value could not be confirmed or refuted. Month consistent; left as-is.
+- **2.5-series maxOutputTokens (65,536) and 2.0-series limits (8,192 / 1,048,576)** — not re-fetched per-model in this pass; values match longstanding Google model-page specs and were unchanged by PR #4990. OpenRouter corroborates 1M context for 2.5-pro.
+- **Gemini API pricing page for Deep Research** — the ai.google.dev pricing page does not list the 12-2025 SKU (it now points at the 04-2026 per-task estimates); per-token verification rests on the Vertex AI "Gemini Deep Research Agent" row alone (single — but official Google — source).
diff --git a/docs/models/groq.md b/docs/models/groq.md
new file mode 100644
index 0000000000..b4bc6905d4
--- /dev/null
+++ b/docs/models/groq.md
@@ -0,0 +1,157 @@
+# Groq Provider Validation — Final Pass
+
+**Date:** 2026-06-11
+**Scope:** `groq` provider block in `apps/sim/providers/models.ts` (8 models). Re-verifies everything, including the changes landed in PR #4990 (kimi `deprecated: true`, gpt-oss `cachedInput`, `updatedAt` bumps).
+
+## Sources & Method
+
+| Source | What it verified |
+|---|---|
+| `https://api.groq.com/openai/v1/models` (live, authenticated with local dev key) | Active model list, `context_window`, `max_completion_tokens`, `created` timestamps. Groq's own per-model doc pages render their spec tables client-side from this same data ("Loading model information..." in static HTML), so the API is the authoritative equivalent of the per-model pages. |
+| `https://groq.com/pricing` (live fetch) | All input/cached-input/output rates |
+| `https://console.groq.com/docs/prompt-caching` (live fetch) | Caching-supported model list, 50% cached-token discount |
+| `https://console.groq.com/docs/deprecations` (live fetch) | kimi shutdown, qwen3-32b status |
+| `https://console.groq.com/docs/models` + per-model `.md` pages (live fetch) | Featured/flagship positioning, context-window prose, model-card positioning |
+| Groq OpenAPI spec embedded in `console.groq.com/docs/model/*` HTML | `temperature` parameter bounds (`minimum: 0, maximum: 2`) |
+| OpenRouter `GET /api/v1/models/<slug>/endpoints` Groq rows (secondary) | Pricing cross-check, `max_completion_tokens` cross-check |
+| WebSearch (Meta blog coverage, Moonshot K2-0905 announcement coverage) | Upstream release dates |
+
+Rule applied: where Groq's own sources conflict with secondary sources, Groq wins.
+
+## Per-Model Verification
+
+### groq/openai/gpt-oss-120b
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.15 | $0.15/M | groq.com/pricing; OpenRouter Groq row 0.00000015 | OK |
+| pricing.cachedInput | 0.075 | $0.075/M | groq.com/pricing (explicit cached column); prompt-caching doc 50% rule; OpenRouter 0.000000075 | OK (PR #4990 change confirmed) |
+| pricing.output | 0.6 | $0.60/M | groq.com/pricing; OpenRouter | OK |
+| contextWindow | 131072 | 131072 | api.groq.com/openai/v1/models; model card "131K context window" | OK |
+| capabilities.maxOutputTokens | — (absent) | 65536 | api.groq.com/openai/v1/models `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
+| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754408224 → 2025-08-05 UTC | OK |
+| recommended | — (absent) | should be `true` | console.groq.com/docs/models features it as "OpenAI's flagship open-weight language model" (~500 t/s); deprecations page names `openai/gpt-oss-120b` as the recommended replacement (incl. for kimi-k2-instruct-0905) | **FIX: add `recommended: true`** |
+| deprecated | — | active | live API `active: true`; not on deprecations page | OK |
+
+### groq/openai/gpt-oss-20b
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.075 | $0.075/M | groq.com/pricing; OpenRouter | OK |
+| pricing.cachedInput | 0.0375 | $0.0375/M | groq.com/pricing (explicit); OpenRouter 0.0000000375 | OK (PR #4990 confirmed) |
+| pricing.output | 0.3 | $0.30/M | groq.com/pricing; OpenRouter | OK |
+| contextWindow | 131072 | 131072 | Groq API; model card "up to 131K" | OK |
+| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
+| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754407957 → 2025-08-05 UTC | OK |
+| deprecated | — | active | live API; deprecations page | OK |
+
+### groq/openai/gpt-oss-safeguard-20b
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.075 | $0.075/M | groq.com/pricing | OK |
+| pricing.cachedInput | 0.0375 | $0.0375/M | prompt-caching doc lists this model as caching-supported with "50% discount for cached input tokens" → 0.075 × 0.5 = 0.0375. Pricing page shows no cached column for this row; OpenRouter shows $0.037/M (rounding). Groq's caching doc wins. | OK (PR #4990 confirmed) |
+| pricing.output | 0.3 | $0.30/M | groq.com/pricing | OK |
+| contextWindow | 131072 | 131072 | Groq API | OK |
+| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
+| releaseDate | 2025-10-29 | 2025-10-29 | Groq API `created` = 1761708789 → 2025-10-29 UTC | OK |
+| deprecated | — | active | live API; deprecations page | OK |
+
+### groq/qwen/qwen3-32b
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.29 | $0.29/M | groq.com/pricing; OpenRouter | OK |
+| pricing.cachedInput | — | none on Groq | Not in prompt-caching supported list (gpt-oss ×3 only); no cached column on pricing page. OpenRouter shows a 50% `input_cache_read` ($0.145) — Groq docs win; do not add. | OK (absent) |
+| pricing.output | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK |
+| contextWindow | 131072 | 131072 | Groq API | OK |
+| capabilities.maxOutputTokens | — | 40960 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 40960** |
+| releaseDate | 2025-04-29 | 2025-04-29 | Upstream Qwen3 family launch (field is "first publicly released"). Groq endpoint `created` is 2025-05-28 (when Groq added it) — repo convention uses upstream release. | OK |
+| deprecated | — | **not deprecated** | `active: true` in live API; absent from deprecations page (appears there only as a *replacement* for mistral-saba-24b / qwen-qwq-32b) | OK — confirmed still active (open question f) |
+
+### groq/llama-3.1-8b-instant
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.05 | $0.05/M | groq.com/pricing; OpenRouter | OK |
+| pricing.output | 0.08 | $0.08/M | groq.com/pricing; OpenRouter | OK |
+| pricing.cachedInput | — | none on Groq | Not in caching-supported list; no cached column on pricing page (OpenRouter's $0.025 row not honored — Groq wins) | OK (absent) |
+| contextWindow | 131072 | 131072 | Groq API | OK |
+| capabilities.maxOutputTokens | — | 131072 | Groq API `max_completion_tokens` = 131072 (full window); OpenRouter agrees | **FIX: add 131072** |
+| releaseDate | 2024-07-23 | 2024-07-23 | Meta released Llama 3.1 (8B/70B/405B) on 2024-07-23 (ai.meta.com/blog/meta-llama-3-1, press coverage dated 2024-07-23). Groq API `created` (2023-09-03) is a placeholder shared with whisper entries and predates Llama 3.1 — not meaningful. | OK — verified (open question g) |
+| speedOptimized | — (absent) | should be `true` | Groq's speed-tier "-instant" naming; model card positions it for "Real-Time Applications … requiring instant responses and high throughput"; cheapest text model in the lineup. Matches repo precedent (claude-3-haiku, gemini-2.0-flash). | **FIX: add `speedOptimized: true`** |
+| deprecated | — | active | live API; deprecations page (it is a replacement target, not deprecated) | OK |
+
+### groq/llama-3.3-70b-versatile
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK |
+| pricing.output | 0.79 | $0.79/M | groq.com/pricing; OpenRouter | OK |
+| contextWindow | 131072 | 131072 | Groq API | OK |
+| capabilities.maxOutputTokens | — | 32768 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 32768** |
+| releaseDate | 2024-12-06 | 2024-12-06 | Groq API `created` = 1733447754 → 2024-12-06 UTC, matching Meta's Llama 3.3 launch day | OK — verified (open question g) |
+| deprecated | — | active | live API; deprecations page (replacement target for several retired models) | OK |
+
+### groq/meta-llama/llama-4-scout-17b-16e-instruct
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing.input | 0.11 | $0.11/M | groq.com/pricing; OpenRouter | OK |
+| pricing.output | 0.34 | $0.34/M | groq.com/pricing; OpenRouter | OK |
+| contextWindow | 131072 | 131072 | Groq API | OK |
+| capabilities.maxOutputTokens | — | 8192 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 8192** |
+| releaseDate | 2025-04-05 | 2025-04-05 | Groq API `created` = 1743874824 → 2025-04-05 UTC (Meta Llama 4 launch day) | OK |
+| deprecated | — | active | live API; deprecations page | OK |
+
+### groq/moonshotai/kimi-k2-instruct-0905
+
+| Field | Repo value | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | shut down | console.groq.com/docs/deprecations: shutdown **04/15/26**, replacement `openai/gpt-oss-120b`; model entirely absent from the live `/v1/models` response | OK (PR #4990 change confirmed — open question regarding shutdown resolved) |
+| pricing.input | 1.0 | $1.00/M | groq.com/pricing (row still present); OpenRouter | OK |
+| pricing.output | 3.0 | $3.00/M | groq.com/pricing; OpenRouter | OK |
+| pricing.cachedInput | — | conflicting | groq.com/pricing still shows $0.50 cached, but the prompt-caching doc's supported list contains only the 3 gpt-oss models, and the model is removed from the API. Conflicting Groq sources + shut-down model → not added (see "Deliberately not changed"). | OK (absent) |
+| contextWindow | 262144 | 262144 | Moonshot K2-0905 announcement ("context length expanded from 128K to 256K"); Groq model card description "256K context"; OpenRouter Groq row 262144. Live Groq API no longer lists the model. | OK |
+| capabilities.maxOutputTokens | — | 16384 (OpenRouter only) | Only source is OpenRouter; model is gone from Groq's API and its doc-page spec table cannot be rendered. Cannot confirm from Groq's own docs → **skipped** per validation rules. | Not added (unverifiable from Groq) |
+| releaseDate | 2025-09-05 | 2025-09-05 | Moonshot AI announced K2-Instruct-0905 on September 5, 2025 (aibase coverage; simonwillison.net 2025-09-06; the `0905` suffix) | OK — verified (open question g) |
+| pricing.updatedAt | 2026-04-01 | — | Prices re-checked today and unchanged; model is shut down, so no bump needed | OK |
+
+## Provider-Level Capability: temperature
+
+**Recommendation: add `temperature: { min: 0, max: 2 }` to the groq provider `capabilities`.**
+
+- Groq's OpenAPI spec (embedded in console.groq.com docs pages, chat-completions `temperature`): "What sampling temperature to use, between 0 and 2", `"minimum": 0, "maximum": 2`.
+- `apps/sim/providers/groq/index.ts:82` already forwards it: `if (request.temperature !== undefined) payload.temperature = request.temperature` — so the only thing missing is the capability flag; today Sim hides the temperature slider for every Groq model while the provider would happily accept the value.
+- Precedent: `fireworks` (models.ts:97), `together` (models.ts:113), and `baseten` (models.ts:129) all declare `temperature: { min: 0, max: 2 }` at the provider level for the same OpenAI-compatible 0–2 range.
+
+**Test impact** (`apps/sim/providers/utils.test.ts`):
+- ~line 214: `'groq/meta-llama/llama-4-scout-17b-16e-instruct'` must be removed from the `unsupportedModels` list in the `supportsTemperature` → false test (it will now return `true`; move it to the supported list).
+- ~line 288: `expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()` must change to expect `2` (move into the "range 0-2" group).
+
+## Changes made in this pass
+
+None to `models.ts` (per instructions — doc only). The fix list below is the recommended diff.
+
+1. `groq` provider capabilities: add `temperature: { min: 0, max: 2 }` (+ update the two utils.test.ts assertions above).
+2. `groq/openai/gpt-oss-120b`: `capabilities: {}` → `capabilities: { maxOutputTokens: 65536 }`; add `recommended: true`.
+3. `groq/openai/gpt-oss-20b`: add `maxOutputTokens: 65536`.
+4. `groq/openai/gpt-oss-safeguard-20b`: add `maxOutputTokens: 65536`.
+5. `groq/qwen/qwen3-32b`: add `maxOutputTokens: 40960`.
+6. `groq/llama-3.1-8b-instant`: add `maxOutputTokens: 131072`; add `speedOptimized: true`.
+7. `groq/llama-3.3-70b-versatile`: add `maxOutputTokens: 32768`.
+8. `groq/meta-llama/llama-4-scout-17b-16e-instruct`: add `maxOutputTokens: 8192`.
+
+## Deliberately not changed
+
+- **kimi-k2-instruct-0905 `cachedInput`**: groq.com/pricing still shows $0.50 cached, but the canonical prompt-caching doc's supported-model list is exactly the three gpt-oss models, and the model is shut down (absent from the live API since the 2026-04-15 shutdown). Conflicting Groq sources for a decommissioned model — adding a cached rate would be dead config. Reconciliation: the pricing-page row is residual for a removed model; the caching doc never listed kimi.
+- **kimi-k2-instruct-0905 `maxOutputTokens`**: 16384 is OpenRouter-only; cannot be confirmed from Groq's own docs/API (model removed). Skipped per validation rules.
+- **`cachedInput` on qwen3-32b / llama-3.1-8b-instant**: OpenRouter's Groq endpoints advertise 50% `input_cache_read` rates, but Groq's prompt-caching doc explicitly limits caching support to the three gpt-oss models and the pricing page shows no cached column for them. Groq docs win. Re-check if Groq's promised caching rollout ("more models soon") lands.
+- **All pricing, contextWindow, releaseDate values**: verified correct as-is (including all PR #4990 changes — kimi `deprecated: true`, the three gpt-oss `cachedInput` rates, and `updatedAt: '2026-06-11'` bumps).
+- **kimi `pricing.updatedAt: '2026-04-01'`**: prices unchanged and model shut down; no bump needed.
+- **`defaultModel: 'groq/llama-3.3-70b-versatile'`**: still active and reasonable; changing the default is a product decision, not a validation finding.
+
+## Unverifiable
+
+- **kimi-k2-instruct-0905 `maxOutputTokens` (16384)** — Groq removed the model from its API and the doc page's spec table no longer renders; only OpenRouter attests it.
+- Nothing else: every other field was confirmed against at least one Groq-owned source (live `/v1/models` API, groq.com/pricing, prompt-caching doc, deprecations doc, or embedded OpenAPI spec), with OpenRouter as a corroborating secondary on pricing and token limits.
diff --git a/docs/models/mistral.md b/docs/models/mistral.md
new file mode 100644
index 0000000000..26b236e099
--- /dev/null
+++ b/docs/models/mistral.md
@@ -0,0 +1,305 @@
+# Mistral Provider Validation — Final Pass
+
+- **Date:** 2026-06-11
+- **Scope:** All 27 entries of the `mistral` provider block in `apps/sim/providers/models.ts` (lines ~2124–2501), re-verifying everything including the changes landed in PR #4990 (7 deprecations, 8 releaseDate fixes, updatedAt bumps).
+- **Method:** Live fetches of Mistral docs (model overview, model cards, pricing page, prompt-caching guide), direct download + grep of the canonical OpenAPI spec, and — decisively — the **server-side model-card source data** in `mistralai/platform-docs-public` (`src/schema/models/models/*.ts`, shallow-cloned at `main` on 2026-06-11). These TypeScript data files are what docs.mistral.ai renders into the model cards, and they carry `apiNames` (alias mappings), prices, context lengths, release dates, and `deprecationDate`/`retirementDate` metadata that the rendered pages omit. OpenRouter used as the secondary pricing source.
+
+## Sources
+
+| Source | URL |
+|---|---|
+| Models overview | https://docs.mistral.ai/getting-started/models/models_overview |
+| Pricing page | https://mistral.ai/pricing |
+| Model cards | https://docs.mistral.ai/models/model-cards/&lt;slug&gt; (slugs cited per model below) |
+| Model-card source data (authoritative) | https://github.com/mistralai/platform-docs-public — `src/schema/models/models/*.ts` @ `main`, 2026-06-11 |
+| OpenAPI spec | https://raw.githubusercontent.com/mistralai/platform-docs-public/main/openapi.yaml |
+| Prompt caching guide | https://docs.mistral.ai/studio-api/conversations/advanced/prompt-caching |
+| OpenRouter (secondary pricing) | https://openrouter.ai/mistralai/&lt;slug&gt; |
+
+Below, "data file" = the model's source file in `src/schema/models/models/`.
+
+---
+
+## Per-model verification
+
+### mistral-large-latest / mistral-large-2512 (Mistral Large 3, 25.12)
+
+Data file: `mistral-large-3-25-12.ts`. Model card: `/models/model-cards/mistral-large-3-25-12`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 per 1M | Data file, model card, pricing page ("Mistral Large 3: $0.5 / $1.5"), OpenRouter `mistral-large-2512` ($0.50/$1.50) | ✓ |
+| contextWindow | 256000 | 256k | Data file `contextLength: '256k'`; OpenRouter shows 262K (same window, binary units) | ✓ |
+| releaseDate | 2025-12-02 | 2025-12-02 | Data file `releaseDate: '2025-12-02'` | ✓ |
+| alias | latest → 2512 | `apiNames: ['mistral-large-2512', 'mistral-large-latest']` | Data file | ✓ |
+| status | active | `status: 'Active'` | Data file | ✓ |
+| temperature | {0, 1} | spec allows {0, **1.5**} | OpenAPI `ChatCompletionRequest.temperature` | ✗ see Changes |
+| recommended | (absent) | provider default, flagship | — | ✗ see Changes |
+
+Note: an initial pricing-page fetch summarized Large 3 as $2/$6; a verbatim re-fetch showed that was a summarization error — the literal row is "$0.5 / $1.5 /M tokens". $2/$6 is the legacy mistral-large-2411 price.
+
+### mistral-small-2603 / mistral-small-latest (Mistral Small 4, 26.03) — CONFLICT RULING
+
+Data file: `mistral-small-4-0-26-03.ts`. Model card: `/models/model-cards/mistral-small-4-0-26-03`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.15 / 0.6 | **$0.15 / $0.6** (ruling below) | Data file (`price: 0.15` / `price: 0.6`), model card, OpenRouter `mistral-small-2603` ($0.15/$0.60) | ✓ KEEP |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2026-03-16 | 2026-03-16 | Data file | ✓ |
+| alias | latest → 2603 | `apiNames: ['mistral-small-2603', 'mistral-small-latest']` | Data file | ✓ |
+| status | active | `status: 'Active'` | Data file | ✓ |
+
+**Ruling on the open price conflict (question a):** mistral.ai/pricing again printed "$0.1 / $0.3" for Mistral Small 4 (verbatim re-fetch, third consistent reading). But three independent confirmations say $0.15/$0.6: (1) the model card, (2) the model-card **source data file** that drives docs billing-side documentation, and (3) OpenRouter's Mistral endpoint, which mirrors what Mistral actually charges resellers. $0.1/$0.3 is exactly the price of the predecessor Mistral Small 3.2 (`mistral-small-2506`, verified below), so the pricing-page row is almost certainly a stale carry-over from Small 3.x, not a price cut. **Final value: 0.15 / 0.6 — no change.** Re-check if the pricing page row persists alongside an official price-cut announcement.
+
+### devstral-2512 / devstral-latest (Devstral 2, 25.12)
+
+Data file: `devstral-2-25-12.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.4 / 2.0 | $0.4 / $2 | Data file, pricing page ("Devstral 2: $0.4 / $2"), OpenRouter `devstral-2512` ($0.40/$2.00) | ✓ |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2025-12-09 | 2025-12-09 | Data file | ✓ |
+| alias | devstral-latest → 2512 | `apiNames: ['devstral-2512', 'devstral-latest', 'devstral-medium-latest']` | Data file | ✓ (note: `devstral-medium-latest` is a third alias we don't list — fine) |
+| status | active | `status: 'Active'` | Data file | ✓ |
+
+### mistral-large-2411 (deprecated)
+
+Data file: `mistral-large-2-1-24-11.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 2.0 / 6.0 | $2.0 / $6.0 | Data file (previously unverifiable — now confirmed) | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2024-11-18 | 2024-11-18 | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
+
+### magistral-medium-latest / magistral-medium-2509
+
+Data file: `magistral-medium-1-2-25-09.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 2.0 / 5.0 | $2.0 / $5.0 | Data file, pricing page ("Magistral Medium: $2 / $5") | ✓ (OpenRouter: not listed — single-family source) |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ |
+| alias | latest → 2509 | `apiNames: ['magistral-medium-2509', 'magistral-medium-latest']` | Data file | ✓ |
+| status | active | `status: 'Active'` | Data file | ✓ |
+
+Note: Magistral is a reasoning model (`output: ['reasoning', 'text']`); see "Deliberately not changed" re `reasoning_effort`.
+
+### magistral-small-latest / magistral-small-2509 (deprecated)
+
+Data file: `magistral-small-1-2-25-09.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 | Data file, pricing page | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ |
+| alias | small-latest → 2509 | `apiNames: ['magistral-small-2509', 'magistral-small-latest']` | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31, replacement "Mistral Small 4" | Data file metadata | ✓ |
+
+### mistral-medium-latest / mistral-medium-2508 (Mistral Medium 3.1)
+
+Data file: `mistral-medium-3-1-25-08.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-08-12 | 2025-08-12 | Data file | ✓ |
+| alias | latest → 2508 | `apiNames: ['mistral-medium-2508', 'mistral-medium-latest']` | Data file | ✓ — **`mistral-medium-latest` still maps to 2508, NOT to Medium 3.5** (3.5 has its own apiNames, see below) |
+| status | active | `status: 'Active'` | Data file | ✓ |
+
+### mistral-medium-2505 (Mistral Medium 3)
+
+Data file: `mistral-medium-3-25-05.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-05-07 | 2025-05-07 | Data file | ✓ |
+| status | active (no flag) | `status: 'Active'` — not deprecated despite age | Data file | ✓ |
+
+### mistral-small-2506 (Mistral Small 3.2, deprecated)
+
+Data file: `mistral-small-3-2-25-06.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-06-20 | 2025-06-20 | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31 | Data file metadata | ✓ |
+
+### open-mistral-nemo
+
+Data file: `mistral-nemo-12b-24-07.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page ("Mistral NeMo: $0.15 / $0.15") | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2024-07-18 | 2024-07-18 | Data file | ✓ |
+| status | active (no flag) | `status: 'Active'` — still active | Data file | ✓ |
+
+### codestral-latest / codestral-2508
+
+Data file: `codestral-25-08.ts`. Model card: `/models/model-cards/codestral-25-08`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.3 / 0.9 | $0.3 / $0.9 | Data file, model card, pricing page, OpenRouter `codestral-2508` ($0.30/$0.90) | ✓ |
+| contextWindow | 128000 | 128k per Mistral docs (data file + live model card). OpenRouter claims 256K — **Mistral docs win**, keep 128000 | Data file, model card | ✓ |
+| releaseDate | 2025-07-30 | 2025-07-30 | Data file | ✓ |
+| alias | latest → 2508 | `apiNames: ['codestral-2508', 'codestral-latest']` | Data file | ✓ |
+| status | active | `status: 'Active'` | Data file | ✓ |
+
+### devstral-small-latest (Devstral Small 2, 25.12, deprecated)
+
+Data file: `devstral-small-2-25-12.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file | ✓ |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2025-12-09 | 2025-12-09 | Data file (PR #4990 fix confirmed) | ✓ |
+| alias | — | `apiNames: ['labs-devstral-small-2512', 'devstral-small-latest']` | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-03-31 (already retired), replacement "Devstral 2" | Data file metadata | ✓ |
+
+### devstral-small-2507 (deprecated)
+
+Data file: `devstral-small-1-1-25-07.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
+
+### devstral-medium-2507 (deprecated)
+
+Data file: `devstral-medium-1-0-25-07.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file (previously unverifiable — now confirmed) | ✓ |
+| contextWindow | 128000 | 128k | Data file | ✓ |
+| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ |
+| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
+
+### ministral-14b-latest / ministral-14b-2512 (Ministral 3 14B)
+
+Data file: `ministral-3-14b-25-12.ts`. Model card: `/models/model-cards/ministral-3-14b-25-12`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.2 / 0.2 | $0.2 / $0.2 | Data file, pricing page, OpenRouter `ministral-14b-2512` ($0.20/$0.20) | ✓ |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2025-12-02 | 2025-12-02 | Data file | ✓ |
+| alias | latest → 2512 | `apiNames: ['ministral-14b-2512', 'ministral-14b-latest']` | Data file | ✓ |
+| status | active | `status: 'Active'` | Data file | ✓ |
+| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
+
+### ministral-8b-latest / ministral-8b-2512 (Ministral 3 8B)
+
+Data file: `ministral-3-8b-25-12.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page | ✓ |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ |
+| alias | latest → 2512 | `apiNames: ['ministral-8b-2512', 'ministral-8b-latest']` | Data file | ✓ |
+| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
+
+### ministral-3b-latest / ministral-3b-2512 (Ministral 3 3B)
+
+Data file: `ministral-3-3b-25-12.ts`.
+
+| Field | Ours | Verified value | Source | Verdict |
+|---|---|---|---|---|
+| pricing input/output | 0.1 / 0.1 | $0.1 / $0.1 | Data file, pricing page | ✓ |
+| contextWindow | 256000 | 256k | Data file | ✓ |
+| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ |
+| alias | latest → 2512 | `apiNames: ['ministral-3b-2512', 'ministral-3b-latest']` | Data file | ✓ |
+| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
+
+---
+
+## Provider-wide checks
+
+### Temperature bounds (question e) — DISCREPANCY FOUND
+
+The live OpenAPI spec's `ChatCompletionRequest.temperature` (openapi.yaml, schema at line 11988, property at 11997) is:
+
+```yaml
+temperature:
+  anyOf:
+  - type: number
+    maximum: 1.5
+    minimum: 0
+```
+
+with the description "we recommend between 0.0 and 0.7". So the chat-completions endpoint — the one Sim's provider calls (`https://api.mistral.ai/v1` + `chat.completions.create`) — accepts **0–1.5, not 0–1**. The 0–1 bound exists in the spec only on `CompletionArgs` (line ~8103), which is the **conversations/agents API**'s white-listed argument schema, not chat completions; that is likely where the earlier "max 1" belief came from. Verdict: our `{min: 0, max: 1}` is overly restrictive — users cannot select 1.0–1.5, which the API supports. Recommended fix: `max: 1.5` on all 27 entries.
+
+### Prompt caching (question b) — NOT WIRED, cachedInput NOT added
+
+- OpenAPI spec: `prompt_cache_key` exists on `ChatCompletionRequest` (line 12134), `FIMCompletionRequest` (12362), and `AgentsCompletionRequest` (13841): "A cache key to enable prompt caching. When provided, the API will attempt to reuse previously computed tokens... Cached tokens are billed at 10% of the standard input token price."
+- Prompt-caching guide confirms caching is **opt-in**: "Set the same `prompt_cache_key` on requests that are likely to share a prefix"; 64-token block granularity; hits reported via `usage.prompt_tokens_details.cached_tokens`.
+- Sim's provider (`apps/sim/providers/mistral/index.ts`) forwards only `temperature` and `max_tokens` (plus messages/tools/response_format). It does **not** send `prompt_cache_key`, so no Sim request can ever produce cached tokens.
+
+**Ruling: caching is opt-in, Sim does not opt in → adding `cachedInput` would be dead data. Not changed.** Recommended follow-up: wire `prompt_cache_key` in the Mistral provider (e.g. keyed per workflow execution/conversation), read `usage.prompt_tokens_details.cached_tokens`, then add `cachedInput = 0.1 × input` to all active entries (large 0.05, small 0.015, devstral 0.04, magistral-medium 0.2, medium 0.04, nemo 0.015, codestral 0.03, ministral-14b 0.02, ministral-8b 0.015, ministral-3b 0.01).
+
+### recommended / speedOptimized (question c) — BOTH JUSTIFIED
+
+- `recommended: true` on **mistral-large-latest**: it is the provider's `defaultModel`, Mistral's flagship generalist (Large 3), actively maintained, and the provider currently has zero recommended entries (every other major provider block marks its flagship). Justified.
+- `speedOptimized: true` on the **ministral tier** (14b/8b/3b, `-latest` and `-2512`, 6 entries): Ministral 3 is Mistral's edge/low-latency family ("les Ministraux" — edge models), the smallest and cheapest tier, directly analogous to the existing `speedOptimized` entries in models.ts (gpt-5-mini-class at line ~369, Haiku at line ~853). Justified.
+
+### Alias map (question g) — ALL CONFIRMED
+
+| Alias | Expected | Data-file `apiNames` | Verdict |
+|---|---|---|---|
+| mistral-large-latest | mistral-large-2512 | ✓ | ✓ |
+| mistral-small-latest | mistral-small-2603 | ✓ | ✓ |
+| codestral-latest | codestral-2508 | ✓ | ✓ |
+| devstral-latest | devstral-2512 | ✓ (also `devstral-medium-latest`) | ✓ |
+| devstral-small-latest | labs-devstral-small-2512 (Devstral Small 2) | ✓ | ✓ |
+| magistral-medium-latest | magistral-medium-2509 | ✓ | ✓ |
+| magistral-small-latest | magistral-small-2509 | ✓ | ✓ |
+| mistral-medium-latest | mistral-medium-2508 (NOT Medium 3.5) | ✓ | ✓ |
+| ministral-14b/8b/3b-latest | ministral-*-2512 | ✓ | ✓ |
+
+---
+
+## Changes made in this pass
+
+None to `models.ts` (per instructions, this pass writes only this document). The PR #4990 changes (7 deprecations, 8 releaseDate fixes) are all **confirmed correct** against the model-card source data.
+
+**Recommended fixes (the fix list):**
+
+1. `mistral-large-latest`: add `recommended: true` — provider default + flagship; provider has zero recommended entries.
+2. `ministral-14b-latest`, `ministral-14b-2512`, `ministral-8b-latest`, `ministral-8b-2512`, `ministral-3b-latest`, `ministral-3b-2512`: add `speedOptimized: true` — edge/low-latency tier, consistent with gpt-mini/haiku precedent.
+3. All 27 entries: `capabilities.temperature.max` 1 → **1.5** — OpenAPI `ChatCompletionRequest.temperature.maximum: 1.5`. (The 0–1 bound belongs to the conversations-API `CompletionArgs`, not chat completions. If the team prefers to cap the UI at Mistral's recommended sampling range instead of the API bound, keep 1 — but then document that choice; it does not match the endpoint Sim calls.)
+
+## Deliberately not changed
+
+- **mistral-small-2603 / mistral-small-latest pricing stays 0.15/0.6** — final ruling on the standing conflict: model card + model-card source data + OpenRouter all say $0.15/$0.6; only the marketing pricing page says $0.1/$0.3, which exactly equals the predecessor Small 3.2 price and is judged a stale row, not a price cut.
+- **No `cachedInput` on any entry** — Mistral caching is opt-in via `prompt_cache_key` and Sim's provider does not send it; adding prices would be dead data. Requires provider wiring first (recommended follow-up above).
+- **`mistral-medium-2505` left active** — `status: 'Active'` in source data, no deprecation metadata despite Medium 3.1/3.5 existing.
+- **`open-mistral-nemo` left active** — still `status: 'Active'`.
+- **codestral contextWindow stays 128000** — OpenRouter claims 256K but both the live model card and the source data say 128k; Mistral docs win.
+- **`updatedAt: '2026-04-01'` left on deprecated entries** — their prices were verified unchanged; only active entries were bumped in PR #4990 and that remains coherent.
+- **Reasoning params not wired** — spec exposes `reasoning_effort` (`high`/`none`) on `ChatCompletionRequest` (line 12119; `prompt_mode` is deprecated in its favor). Sim doesn't forward it, so no capabilities change; note for a future Magistral reasoning integration.
+- **mistral-medium-3-5 NOT added in this pass** (documented as a recommended addition, question d): Mistral Medium 3.5 — `apiNames: ['mistral-medium-3-5', 'mistral-medium-3']`, released **2026-04-28**, **$1.5 / $7.5** per 1M (data file `mistral-medium-3-5-26-04.ts` + pricing page agree), **256k** context, Active, "frontier-class multimodal model optimized for agentic and coding". Matches existing `/^mistral/` modelPattern, so adding the entry is sufficient. Note its id does not follow the `-MMYY` convention — both apiNames could be listed if desired.
+
+## Unverifiable
+
+Nothing remains strictly unverifiable. The four previously-unverifiable legacy prices (mistral-large-2411 2.0/6.0, mistral-small-2506 0.1/0.3, devstral-small-2507 0.1/0.3, devstral-medium-2507 0.4/2.0) are now **confirmed** via the model-card source data files. Caveats:
+
+- `magistral-medium-2509` pricing has no independent second source (not listed on OpenRouter); verified only within the Mistral doc family (data file + pricing page, which agree).
+- The Mistral Small 4 pricing-page row ($0.1/$0.3) remains in live contradiction with the model card; ruling above. Re-check on the next pass.
diff --git a/docs/models/openai.md b/docs/models/openai.md
new file mode 100644
index 0000000000..a1d81edb5b
--- /dev/null
+++ b/docs/models/openai.md
@@ -0,0 +1,338 @@
+# OpenAI Provider Block — Final Validation & Justification
+
+**Validation date:** 2026-06-11
+**Scope:** `openai` provider block in `apps/sim/providers/models.ts` (23 models), including changes landed in PR #4990.
+**Method:** Live WebFetch of every individual model page on `developers.openai.com/api/docs/models/<id>`, the pricing page, the reasoning guide, the GPT-5.5 usage guide, the deprecations page, and the API reference; secondary pricing cross-checks against OpenRouter. All claims below were fetched live this session. Provider docs win over secondary sources.
+
+**Sources:**
+
+- Pricing: https://developers.openai.com/api/docs/pricing (only lists current gpt-5.5/5.4 families; per-model pricing taken from individual model pages)
+- Model pages: `https://developers.openai.com/api/docs/models/<model-id>` (fetched for all 23 ids)
+- Reasoning guide: https://developers.openai.com/api/docs/guides/reasoning
+- GPT-5.5 usage guide: https://developers.openai.com/api/docs/guides/latest-model
+- Deprecations: https://developers.openai.com/api/docs/deprecations
+- GPT-5.5 launch: https://openai.com/index/introducing-gpt-5-5/ (via search; release 2026-04-23, API availability 2026-04-24)
+- Secondary pricing: https://openrouter.ai/openai/gpt-5.5, /gpt-5.5-pro, /gpt-5.4, /gpt-5.2, /o3, /gpt-4o — all consistent with provider docs
+
+**Flag consumption check** (`rg` over `apps/sim/providers/openai/`): `reasoningEffort` and `verbosity` are consumed in `apps/sim/providers/openai/core.ts` (sent as `reasoning.effort` / `text.verbosity` on the Responses API). `nativeStructuredOutputs` is NOT consumed by the provider runtime — its only consumer is the landing models page (`apps/sim/app/(landing)/models/utils.ts`), so it is display-only metadata. `thinking` / `computerUse` are not used by the OpenAI provider.
+
+Pricing is USD per 1M tokens throughout. "MP" = the model's own docs page (`developers.openai.com/api/docs/models/<id>`).
+
+---
+
+## Per-model verification
+
+### gpt-4.1
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 2.0 / 0.5 / 8.0 | MP gpt-4.1 | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ verified today |
+| contextWindow | 1,047,576 | MP: "1,047,576 tokens" | ✓ verified |
+| maxOutputTokens | 32,768 | MP | ✓ verified |
+| temperature 0–2 | present | non-reasoning chat model; standard OpenAI sampling range | ✓ correct by convention (docs do not enumerate the range; 0–2 is the API-wide bound) |
+| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-2025-04-14` | ✓ verified |
+| deprecated | absent | deprecations page does not list gpt-4.1 base | ✓ verified active ("Default", "Smartest non-reasoning model") |
+
+### gpt-4.1-mini
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.4 / 0.1 / 1.6 | MP gpt-4.1-mini | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified |
+| temperature 0–2 | present | convention (non-reasoning) | ✓ |
+| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-mini-2025-04-14` | ✓ verified |
+| deprecated | absent | not on deprecations page | ✓ verified |
+
+### gpt-4.1-nano
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.1 / 0.025 / 0.4 | MP gpt-4.1-nano | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified |
+| temperature 0–2 | present | convention | ✓ |
+| releaseDate | 2025-04-14 | MP (snapshot `gpt-4.1-nano-2025-04-14`, now marked deprecated) | ✓ verified |
+| deprecated | **absent — should be `true`** | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-nano; MP also recommends "starting with GPT-5 nano" | **FIX: add `deprecated: true`** |
+
+### gpt-5.5-pro
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / output | 30.0 / 180.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
+| cachedInput | absent | MP: "GPT-5.5 Pro does not offer a cached input discount" | ✓ verified correct omission |
+| updatedAt | **2026-04-23 — stale** | pricing re-verified 2026-06-11 this session | **FIX: bump to 2026-06-11** (PR #4990 claimed to bump all entries but missed this one) |
+| contextWindow | 1,050,000 | MP: "1,050,000 context window" | ✓ verified |
+| maxOutputTokens | 128,000 | MP | ✓ verified |
+| nativeStructuredOutputs | true | MP: "Structured outputs: Supported" | ✓ verified (display-only flag) |
+| reasoningEffort | **['none','low','medium','high','xhigh'] — wrong** | see Open Question (a) below | **FIX: change to `['medium','high','xhigh']`** |
+| verbosity | **present — should be removed** | see Open Question (b) below | **FIX: remove `verbosity` block** |
+| releaseDate | 2026-04-23 | MP snapshot `gpt-5.5-pro-2026-04-23` | ✓ verified |
+| deprecated | absent | no deprecation notes on MP | ✓ verified |
+
+**Open Question (a) — resolved.** The gpt-5.5-pro model page does NOT enumerate reasoning effort values (fetched twice, explicitly asked for any sentence containing "effort" — the page contains no `reasoning.effort` enumeration). The reasoning guide says values are model-dependent and "check the relevant model page". Direct documentation for the siblings is explicit: gpt-5.4-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5.2-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5-pro MP — "defaults to (and only supports) reasoning.effort: high". Every pro-tier model that documents the parameter excludes `none` and `low` — the pro tier exists to "use more compute to think harder" (gpt-5.5-pro MP), making `none`/`low` incoherent with the product. The most defensible value set is **`['medium','high','xhigh']`**, matching both documented pro siblings. The current `['none','low','medium','high','xhigh']` appears copied from non-pro gpt-5.5 and is backed by no source.
+
+**Open Question (b) — resolved.** Not documented. The gpt-5.5-pro page does not mention `verbosity` (explicitly checked). No pro-tier model page (gpt-5.4-pro, gpt-5.2-pro, gpt-5-pro) documents verbosity, and the GPT-5.5 usage guide discusses `text.verbosity` only for gpt-5.5. Since `verbosity` is runtime-consumed (`core.ts` sends `text.verbosity` to the API), advertising it on a model that may reject it is a live failure risk. **Remove the verbosity block from gpt-5.5-pro.**
+
+### gpt-5.5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 5.0 / 0.5 / 30.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
+| updatedAt | **2026-04-23 — stale** | re-verified 2026-06-11 | **FIX: bump to 2026-06-11** (missed by PR #4990) |
+| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
+| nativeStructuredOutputs | true | MP: structured outputs supported | ✓ verified |
+| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none, low, medium (default), high and xhigh" | ✓ verified verbatim |
+| verbosity ['low','medium','high'] | present | GPT-5.5 usage guide documents `text.verbosity` (recommends `low` for concise) | ✓ verified |
+| releaseDate | 2026-04-23 | announcement 2026-04-23 (openai.com/index/introducing-gpt-5-5/, TechCrunch); pro sibling snapshot is `-2026-04-23` | ✓ verified (note: API availability was 2026-04-24; snapshot naming uses 04-23) |
+| recommended | true | flagship per OpenAI ("latest GPT-5.5" is the recommended upgrade target on gpt-5.2/gpt-5/o3 pages) | ✓ intentional, docs-consistent |
+
+### gpt-5.4-pro
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / output | 30.0 / 180.0 | MP + pricing page | ✓ verified (note: MP — ">272K input tokens are priced at 2x input and 1.5x output"; the flat-rate model in `models.ts` cannot express this; under-bills long-context pro calls — pre-existing limitation, see Unverifiable/limitations) |
+| cachedInput | absent | pricing page shows no cached rate for pro | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim |
+| verbosity | absent | not documented for pro | ✓ correct omission |
+| releaseDate | 2026-03-05 | gpt-5.4 snapshot `gpt-5.4-2026-03-05`; same launch | ✓ verified |
+| deprecated | absent | none | ✓ |
+
+### gpt-5.4
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 2.5 / 0.25 / 15.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none (default), low, medium, high and xhigh" | ✓ verified verbatim |
+| verbosity ['low','medium','high'] | present | not on MP; carried from GPT-5-line `text.verbosity` parameter (documented in usage guide / help center for the GPT-5 family) | ✓ kept — see "Deliberately not changed" |
+| releaseDate | 2026-03-05 | MP snapshot `gpt-5.4-2026-03-05` | ✓ verified |
+
+### gpt-5.4-mini
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.75 / 0.075 / 4.5 | MP + pricing page | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['none','low','medium','high','xhigh'] | present | gpt-5.4 family per search-confirmed docs: "gpt-5.4, gpt-5.4-mini, and gpt-5.4-nano support none, low, medium, high, and xhigh" | ✓ verified |
+| verbosity | present | family convention | ✓ kept |
+| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-mini-2026-03-17` | ✓ verified |
+
+### gpt-5.4-nano
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.2 / 0.02 / 1.25 | MP + pricing page | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort / verbosity | as gpt-5.4-mini | same family docs | ✓ verified / kept |
+| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-nano-2026-03-17` | ✓ verified |
+| speedOptimized | true | MP: "cheapest GPT-5.4-class model", optimized for classification/extraction/sub-agents | ✓ intentional repo flag, consistent with docs |
+
+### gpt-5.2-pro
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / output | 21.0 / 168.0 | MP | ✓ verified |
+| cachedInput | absent | MP shows none | ✓ |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim |
+| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-pro-2025-12-11` | ✓ verified |
+| deprecated | absent | MP recommends upgrading to gpt-5.5-pro but no shutdown date on deprecations page | ✓ verified (soft-superseded, not deprecated) |
+
+### gpt-5.2
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.75 / 0.175 / 14.0 | MP + OpenRouter | ✓ verified (two sources) |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "none (default), low, medium, high and xhigh" | ✓ verified verbatim |
+| verbosity | present | family convention | ✓ kept |
+| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-2025-12-11` | ✓ verified |
+| deprecated | absent | superseded by 5.5 but no shutdown (only `gpt-5.2-chat-latest` has one) | ✓ verified |
+
+### gpt-5.1
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['none','low','medium','high'] | present | MP: "Reasoning.effort supports: none (default), low, medium, and high" (no xhigh) | ✓ verified verbatim |
+| verbosity | present | family convention | ✓ kept |
+| releaseDate | **2025-11-12** | MP snapshot is `gpt-5.1-2025-11-13` | **FIX: → 2025-11-13.** Repo convention everywhere else in this block is snapshot date (gpt-5-pro 10-06, gpt-5.2 12-11, gpt-4.1 04-14, o3-pro 06-10, …). 2025-11-12 is the announcement date; the API snapshot is 11-13 |
+
+### gpt-5-pro
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / output | 15.0 / 120.0 | MP | ✓ verified |
+| cachedInput | absent | MP shows none | ✓ |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow | 400,000 | MP | ✓ verified |
+| maxOutputTokens | 272,000 | MP: "272,000 max output tokens" | ✓ verified (yes, it really is larger than the rest of the family) |
+| reasoningEffort ['high'] | present | MP: "defaults to (and only supports) `reasoning.effort: high`" | ✓ verified verbatim |
+| releaseDate | 2025-10-06 | MP snapshot `gpt-5-pro-2025-10-06` | ✓ verified — **PR #4990's change confirmed correct** |
+| deprecated | absent | none listed | ✓ |
+
+### gpt-5
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort ['minimal','low','medium','high'] | present | MP: "minimal, low, medium, and high"; reasoning guide confirms `minimal` introduced with GPT-5 | ✓ verified verbatim |
+| verbosity | present | verbosity launched with GPT-5 | ✓ verified |
+| releaseDate | 2025-08-07 | MP snapshot `gpt-5-2025-08-07` | ✓ verified |
+| deprecated | absent | MP: "We recommend using the latest GPT-5.5" but no shutdown date — deprecations page: "not explicitly listed as deprecated" | ✓ verified (superseded, not deprecated) |
+
+### gpt-5-mini
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.25 / 0.025 / 2.0 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort / verbosity | gpt-5 family values | GPT-5 family launch docs | ✓ verified |
+| releaseDate | 2025-08-07 | MP snapshot `gpt-5-mini-2025-08-07` | ✓ verified |
+
+### gpt-5-nano
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 0.05 / 0.005 / 0.4 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
+| reasoningEffort / verbosity | gpt-5 family values | family docs | ✓ verified |
+| releaseDate | 2025-08-07 | MP snapshot `gpt-5-nano-2025-08-07` | ✓ verified |
+
+### gpt-5-chat-latest
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified |
+| temperature 0–2 | present | non-reasoning chat snapshot | ✓ convention |
+| releaseDate | 2025-08-07 | GPT-5 launch snapshot | ✓ verified |
+| deprecated | true | **deprecations page: shutdown 2026-07-23, replacement gpt-5.5** | ✓ verified — **PR #4990's change confirmed correct and now formally docs-backed** |
+
+### o4-mini
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.1 / 0.275 / 4.4 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
+| reasoningEffort ['low','medium','high'] | present | see Open Question (c) below | ✓ verified |
+| releaseDate | 2025-04-16 | MP snapshot `o4-mini-2025-04-16` | ✓ verified |
+| deprecated | true | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-mini; MP: snapshot Deprecated, "succeeded by GPT-5 mini" | ✓ verified — **PR #4990's change confirmed correct** |
+
+### o3-pro
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / output | 20.0 / 80.0 | MP | ✓ verified |
+| cachedInput | absent | MP shows none | ✓ |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
+| reasoningEffort | absent | MP: "Reasoning: Highest", no effort enum documented (pro pattern: fixed high effort) | ✓ correct omission |
+| releaseDate | 2025-06-10 | MP snapshot `o3-pro-2025-06-10` | ✓ verified |
+| deprecated | absent | deprecations page does not list o3-pro (only o3/o3-mini) | ✓ verified — note the oddity that base o3 is scheduled for shutdown while o3-pro is not; evidence-based, leave as is |
+
+### o3
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 2 / 0.5 / 8 | MP + OpenRouter ($2/$8) | ✓ verified (two sources) |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
+| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified |
+| releaseDate | 2025-04-16 | MP snapshot `o3-2025-04-16` | ✓ verified |
+| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5-pro; MP: "superseded by GPT-5" | **FIX: add `deprecated: true`** |
+
+### o3-mini
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 1.1 / 0.55 / 4.4 | MP (note: cachedInput 0.55 differs from o4-mini's 0.275 — both verified correct per their MPs) | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
+| reasoningEffort ['low','medium','high'] | present | o3-mini launch post: "three reasoning effort options—low, medium, and high" | ✓ verified explicitly |
+| releaseDate | 2025-01-31 | MP snapshot `o3-mini-2025-01-31` | ✓ verified |
+| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5; MP: snapshot marked deprecated | **FIX: add `deprecated: true`** |
+
+### o1
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 15.0 / 7.5 / 60 | MP | ✓ verified |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
+| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified |
+| releaseDate | **2024-12-05** | MP snapshot is `o1-2024-12-17` | **FIX (minor): → 2024-12-17** for snapshot-date consistency. 2024-12-05 is the ChatGPT launch; the API snapshot (the convention used by every other entry in this block) is 12-17 |
+| deprecated | **absent — recommend `true`** | MP: sole snapshot `o1-2024-12-17` explicitly "Deprecated"; described as "Previous full o-series reasoning model". Base alias not on the deprecations shutdown table (only o1-preview/o1-mini, already shut down) | **FIX (recommended): add `deprecated: true`** — weaker evidence than o3/o3-mini (no shutdown date for the alias), but its only snapshot is deprecated and every other o-series peer is deprecated |
+
+**Open Question (c) — resolved.** The current model pages no longer enumerate `reasoning_effort` for the o-series, and the Responses API reference page content does not surface the enum inline. The reasoning guide states: "Supported values are model-dependent and can include `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`... check the relevant model page." Best available evidence: (1) o3-mini launch post (openai.com/index/openai-o3-mini/) explicitly: "three reasoning effort options—low, medium, and high"; (2) the API changelog notes `reasoning_effort` was added for o1 models with those three values; (3) `none`/`minimal`/`xhigh` were introduced with the GPT-5 line and were never back-ported to o-series. **`['low','medium','high']` for o1, o3, o3-mini, o4-mini is confirmed — no change.**
+
+### gpt-4o
+
+| Field | Value | Source | Verdict |
+|---|---|---|---|
+| input / cachedInput / output | 2.5 / 1.25 / 10.0 | MP + OpenRouter ($2.50/$10) | ✓ verified (two sources) |
+| updatedAt | 2026-06-11 | this validation | ✓ |
+| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified |
+| temperature 0–2 | present | convention | ✓ |
+| releaseDate | 2024-05-13 | MP snapshot `gpt-4o-2024-05-13`; OpenRouter "released May 13, 2024" | ✓ verified |
+| deprecated | true | see Open Question (d) | ✓ verified — and now docs-backed |
+
+**Open Question (d) — resolved, better than expected.** The brief said gpt-4o is "active per OpenAI" and `deprecated: true` is a deliberate steering decision. The live deprecations page now shows **gpt-4o: shutdown 2026-10-23, replacement gpt-5.5**. So `deprecated: true` is no longer just an intentional product deviation — it is officially correct. Keep, no caveat needed.
+
+---
+
+## Open Question (e) — `defaultModel: 'gpt-4.1'`
+
+OpenAI's flagship is gpt-5.5 (announcement 2026-04-23; the gpt-5.2/gpt-5/o3 pages all point at "the latest GPT-5.5"). gpt-4.1 remains active (it is OpenAI's "smartest non-reasoning model" and is not on the deprecations page), so the current default is not broken — it is a cheap, fast, temperature-supporting non-reasoning default, which is a defensible UX choice for new blocks. **Recommendation:** consider `defaultModel: 'gpt-5.5'` (or `gpt-5.4-mini` for a cost-conscious reasoning default) to match the flagship, but this is a **product decision**, not a correctness fix — not included in the machine-applyable list.
+
+---
+
+## Changes made in this pass (recommended to apply now)
+
+1. **gpt-5.5-pro** — `reasoningEffort.values`: `['none','low','medium','high','xhigh']` → `['medium','high','xhigh']`. Undocumented on its own page; both documented pro siblings (gpt-5.4-pro, gpt-5.2-pro) enumerate exactly `medium, high, xhigh`; pro tier semantics exclude none/low. Sending `reasoning.effort: 'none'` to a pro model risks a 400.
+2. **gpt-5.5-pro** — remove the `verbosity` block. Not documented for any pro model; the provider sends `text.verbosity` at runtime, so advertising it is a live API-error risk.
+3. **gpt-5.5-pro** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (re-verified today; PR #4990 missed this entry despite claiming an all-entry bump).
+4. **gpt-5.5** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (same).
+5. **o3** — add `deprecated: true` (official shutdown 2026-10-23).
+6. **o3-mini** — add `deprecated: true` (official shutdown 2026-10-23).
+7. **gpt-4.1-nano** — add `deprecated: true` (official shutdown 2026-10-23, replacement gpt-5.4-nano).
+8. **o1** — add `deprecated: true` (sole snapshot `o1-2024-12-17` marked Deprecated; "previous" o-series model; recommended, slightly weaker evidence).
+9. **gpt-5.1** — `releaseDate`: `2025-11-12` → `2025-11-13` (snapshot `gpt-5.1-2025-11-13`; snapshot-date convention).
+10. **o1** — `releaseDate`: `2024-12-05` → `2024-12-17` (snapshot `o1-2024-12-17`; snapshot-date convention; minor).
+
+## Deliberately not changed
+
+- **gpt-4o `deprecated: true`** — originally an intentional steering decision; now officially correct (shutdown 2026-10-23). Keep.
+- **gpt-5-chat-latest / o4-mini `deprecated: true`** (PR #4990) — both confirmed by the deprecations page (2026-07-23 and 2026-10-23 shutdowns). Keep.
+- **`defaultModel: 'gpt-4.1'`** — product decision; gpt-4.1 is active. Flagged for product review (gpt-5.5 is the flagship), not a correctness fix.
+- **`verbosity` on non-pro gpt-5.x models (gpt-5.4/-mini/-nano, gpt-5.2, gpt-5.1, gpt-5 family)** — current model pages don't enumerate it per-model, but `text.verbosity` is a documented GPT-5-line parameter (GPT-5 launch; GPT-5.5 usage guide; OpenAI help center) and the provider has been sending it without errors. Keep.
+- **`temperature {0,2}` on gpt-4.1 family, gpt-4o, gpt-5-chat-latest** — model pages don't state sampling ranges; 0–2 is the documented API-wide range for non-reasoning chat models. Correct by convention.
+- **o3-pro not deprecated** — the deprecations page lists o3 and o3-mini but not o3-pro. Odd but evidence-based; leave.
+- **gpt-5.2 / gpt-5 / gpt-5.2-pro not deprecated** — docs say "superseded / recommend GPT-5.5" but list no shutdown; superseded ≠ deprecated. Leave.
+- **`recommended: true` on gpt-5.5 and `speedOptimized: true` on gpt-5.4-nano** — repo-internal flags, consistent with docs positioning.
+- **o3-mini `cachedInput: 0.55` vs o4-mini `0.275`** — looks like a typo but both verified correct on their respective model pages.
+
+## Unverifiable / known limitations
+
+- **gpt-5.5-pro effort values** — no official enumeration exists anywhere fetched (model page, reasoning guide, usage guide, OpenRouter). The `['medium','high','xhigh']` recommendation is an inference from documented siblings — the strongest available evidence, but flagged as not directly documented. If OpenAI later publishes the enum, re-verify.
+- **gpt-5.4-pro long-context surcharge** — MP states prompts >272K input tokens bill at 2x input / 1.5x output. The flat `pricing` shape in `models.ts` cannot represent tiered pricing; cost estimates for very long pro prompts will be low. Pre-existing schema limitation, out of scope here.
+- **gpt-5.5 release date 04-23 vs API availability 04-24** — announcement and snapshot say 2026-04-23; press coverage says API access opened 2026-04-24. Kept 2026-04-23 (snapshot wins).
+- **Verbosity enum per non-flagship model** — `['low','medium','high']` is documented at the parameter level, not re-enumerated on each model page.
+- **`nativeStructuredOutputs`** — only gpt-5.5/gpt-5.5-pro carry it though most listed models support structured outputs; flag is display-only (landing page), so under-reporting is cosmetic, not functional. Left as is.
diff --git a/docs/models/vertex.md b/docs/models/vertex.md
new file mode 100644
index 0000000000..8e8da6ed1a
--- /dev/null
+++ b/docs/models/vertex.md
@@ -0,0 +1,212 @@
+# Vertex AI provider — model validation (`models.ts` lines ~1487–1685)
+
+- **Date:** 2026-06-11 (final exhaustive pass, re-verifying PR #4990 changes)
+- **Method:** Live WebFetch of Google pricing/model/changelog pages; Google Cloud doc pages render nav-only to fetchers, so Vertex-specific specs were verified via Context7 MCP (`/websites/cloud_google_vertex-ai`, `/websites/cloud_google_gemini-enterprise-agent-platform`) and WebSearch fallback, per the validate-model skill. Two-source rule applied to pricing (Vertex pricing page + Gemini API pricing page / OpenRouter / CloudPrice).
+- **Primary sources:**
+  - https://cloud.google.com/vertex-ai/generative-ai/pricing (rendered fully — all pricing below)
+  - https://ai.google.dev/gemini-api/docs/pricing (cross-check; global-endpoint prices identical for 2.5/3.x)
+  - https://ai.google.dev/gemini-api/docs/models/gemini-3.5-flash, …/gemini-3.1-pro-preview, …/gemini-3.1-flash-lite, …/gemini-3-flash-preview, …/gemini-2.5-pro (token limits)
+  - https://ai.google.dev/gemini-api/docs/thinking (thinking levels/defaults)
+  - https://ai.google.dev/gemini-api/docs/changelog (lifecycle dates)
+  - https://deepmind.google/models/model-cards/gemini-3-5-flash/ (3.5 Flash card)
+  - Vertex docs via Context7: `…/models/gemini/2-5-pro` ("maximum output token limit of 65,535"), `…/migrate/migrate-palm-to-gemini`, `…/learn/model-versioning`, `…/learn/locations`
+  - https://blog.google/technology/developers/deep-research-agent-gemini-api/ (2025-12-11), https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-flash-lite/ (2026-03-03)
+- **Provider implementation:** `apps/sim/providers/vertex/index.ts` contains no capability handling itself — it delegates to `executeGeminiRequest` in `apps/sim/providers/gemini/core.ts`, which consumes `request.thinkingLevel` (core.ts:955–961, sent only when user explicitly selects a level) and `request.maxTokens` (core.ts:934). `thinking`, `temperature`, and `maxOutputTokens` flags are live; the global `maxOutputTokens` fallback is 4096 (models.ts:865), which is why PR #4990 added explicit caps.
+
+---
+
+## Per-model validation
+
+### vertex/gemini-3.5-flash
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| id | `gemini-3.5-flash` (GA 2026-05-19) | `gemini-3.5-flash` | ai.google.dev changelog ("Released `gemini-3.5-flash`… GA" 2026-05-19) | ✓ |
+| input | 1.5 | $1.50 (global) | Vertex pricing + Gemini API pricing + OpenRouter | ✓ (3 sources) |
+| cachedInput | 0.15 | $0.15 | Vertex pricing + Gemini API pricing | ✓ |
+| output | 9.0 | $9.00 | Vertex pricing + Gemini API pricing + OpenRouter | ✓ |
+| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.5-flash; DeepMind card "1M" | ✓ |
+| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page ("64K" on DeepMind card) | ✓ |
+| thinking | minimal/low/medium/high, default medium | minimal, low, medium, high; default medium | ai.google.dev/gemini-api/docs/thinking; OpenRouter ("defaults to medium thinking effort") | ✓ |
+| releaseDate | 2026-05-19 | "Published 19 May 2026" | DeepMind model card + changelog | ✓ |
+| recommended | absent | — | google provider entry has `recommended: true` on the same model | 🔵 add (see fixes) |
+
+Note: Vertex introduces **non-global endpoint pricing (+10%: $1.65 / $9.90 / $0.165) effective 2026-07-01**; our entries model global pricing. See operational caveats.
+
+### vertex/gemini-3.1-pro-preview
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| id | `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | ai.google.dev/gemini-api/docs/models/gemini-3.1-pro-preview | ✓ |
+| input | 2.0 | $2 (≤200k); $4 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier; >200k tier not modeled — see caveats) |
+| cachedInput | 0.2 | $0.20 (≤200k); $0.40 (>200k) | same | ✓ |
+| output | 12.0 | $12 (≤200k input); $18 (>200k) | same | ✓ |
+| contextWindow | 1048576 | 1,048,576 | ai.google.dev model page; Vertex release notes "1M token context window" | ✓ |
+| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page | ✓ |
+| thinking | low/medium/high, default high | low, medium, high; default high (Dynamic); **minimal not supported** | ai.google.dev/gemini-api/docs/thinking | ✓ (PR #4990 drop of 'minimal' confirmed correct) |
+| releaseDate | 2026-02-19 | 2026-02-19 | blog.google gemini-3-1-pro; github.blog changelog 2026-02-19 | ✓ |
+
+**Operational caveat (open question f):** Google documents `gemini-3.1-pro-preview` as **global-endpoint-only on Vertex AI** (Vertex `learn/locations` lists it under global-endpoint models; third-party migration guides state regional endpoints don't serve it). `apps/sim/providers/vertex/index.ts:34` resolves location as `request.vertexLocation || env.VERTEX_LOCATION || 'us-central1'` — with the default `us-central1`, requests to this model will fail with model-not-found. Users must set `vertexLocation` / `VERTEX_LOCATION` to `global`. No code change made (per instructions); documented here.
+
+### vertex/gemini-3.1-flash-lite
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| id | `gemini-3.1-flash-lite` (renamed from `-preview` in PR #4990) | stable id `gemini-3.1-flash-lite`; preview id shut down on Gemini API 2026-05-25; Vertex preview-alias discontinuation 2026-07-09 | ai.google.dev changelog ("Released `gemini-3.1-flash-lite`… GA" 2026-05-07; preview "shut down" 2026-05-25); cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | ✓ rename confirmed correct |
+| input | 0.25 | $0.25 (global, text) | Vertex pricing + Gemini API pricing | ✓ |
+| cachedInput | 0.025 | $0.025 | same | ✓ |
+| output | 1.5 | $1.50 | same + blog.google launch post | ✓ |
+| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.1-flash-lite | ✓ |
+| maxOutputTokens | 65536 | 65,536 | same | ✓ |
+| thinking levels | minimal/low/medium/high | minimal "Supported (Default)", low, medium, high | ai.google.dev/gemini-api/docs/thinking (3.1 Flash-Lite row; the "Not supported" row is 3.1 **Pro**) | ✓ — orchestrator re-fetched the thinking doc and corrected this report's initial misreading |
+| thinking default | 'minimal' | minimal ("Supported (Default)") | same | ✓ |
+| releaseDate | 2026-05-07 | stable GA 2026-05-07 (preview launch was 2026-03-03) | ai.google.dev changelog | ✓ changed this pass to the GA date |
+| speedOptimized | absent | "our most cost-effective model yet", lowest-latency tier | blog.google launch post | 🔵 add (see fixes) |
+
+**Open question (c) resolved:** the preview→stable rename is right (preview already shut down on the Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09). This report initially claimed `minimal` is rejected on 3.1 Flash-Lite — that was a misreading of the thinking-levels table (the "Not supported" cell belongs to 3.1 **Pro**). The orchestrator re-fetched ai.google.dev/gemini-api/docs/thinking, which states for Gemini 3.1 Flash-Lite: minimal "Supported (Default)", plus low/medium/high. The repo's `levels: ['minimal','low','medium','high'], default: 'minimal'` is correct and was left unchanged.
+
+### vertex/gemini-3-pro-preview (deprecated)
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | Gemini API shut down 2026-03-09 (`gemini-3-pro-preview` now aliases `gemini-3.1-pro-preview`); Vertex discontinuation 2026-03-26 | ai.google.dev changelog; Vertex deprecations (via third-party migration guides citing Google's table) | ✓ deprecated:true confirmed correct |
+| pricing 2.0/0.2/12.0 | — | current pricing page no longer lists text Gemini 3 Pro (only "Gemini 3 Pro Image") | cloud.google.com/vertex-ai/generative-ai/pricing | ⚠️ historical values, unverifiable from current page; acceptable on a deprecated entry |
+| contextWindow | 1000000 | launch materials said "1M token context window" | Vertex release notes | ⚠️ 1,000,000 vs sibling models' 1,048,576; left as-is (deprecated) |
+| thinking | low/medium/high, default high | consistent with 3.x Pro line (no minimal) | ai.google.dev/gemini-api/docs/thinking (3.1-pro row) | ✓ |
+| releaseDate | 2025-11-18 | 2025-11-18 | blog.google gemini-3; github.blog 2025-11-18; axios 2025-11-18 | ✓ |
+
+Note: since the id now auto-redirects to 3.1 Pro on Google's side, calls may silently serve 3.1 Pro; `deprecated: true` steering users away is the right call.
+
+### vertex/gemini-3-flash-preview
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| id | `gemini-3-flash-preview` | `gemini-3-flash-preview` | ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview | ✓ |
+| input / cachedInput / output | 0.5 / 0.05 / 3.0 | $0.50 / $0.05 / $3.00 | Vertex pricing + Gemini API pricing + TechCrunch | ✓ |
+| contextWindow | 1048576 (PR #4990 change) | 1,048,576 | ai.google.dev model page | ✓ change confirmed |
+| maxOutputTokens | 65536 | 65,536 | same | ✓ |
+| thinking | minimal/low/medium/high, default high | minimal, low, medium, high; default high (Dynamic) | ai.google.dev/gemini-api/docs/thinking | ✓ |
+| releaseDate | 2025-12-17 | 2025-12-17 | techcrunch.com 2025/12/17; 9to5google 2025/12/17; blog.google | ✓ |
+
+### vertex/gemini-2.5-pro
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| input | 1.25 | $1.25 (≤200k); $2.50 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier) |
+| cachedInput | 0.125 | Vertex page displays "$0.13" (rounded); Gemini API exact "$0.125" | both pricing pages | ✓ (0.125 is exact value) |
+| output | 10.0 | $10 (≤200k); $15 (>200k) | same | ✓ |
+| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-pro` (via Context7) + ai.google.dev | ✓ |
+| maxOutputTokens | **65536** | **Vertex: 65,535** ("maximum output token limit of 65,535"); Gemini API page: 65,536 | docs.cloud.google.com/…/models/gemini/2-5-pro (via Context7); ai.google.dev/gemini-api/docs/models/gemini-2.5-pro | ✗ 🟡 — platforms disagree; this is the **Vertex** entry, so Vertex's 65,535 wins |
+| releaseDate | 2025-03-25 | 2.5 Pro Experimental announced 2025-03-25 | blog.google gemini-model-thinking-updates-march-2025; siliconangle 2025/03/25 | ✓ |
+| deprecated | absent | retirement on Vertex extended to **2026-10-16** | Vertex release notes (via gcpstudyhub summary of release-notes) | ✓ correctly NOT deprecated today — see (d) below |
+
+### vertex/gemini-2.5-flash
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| input / cachedInput / output | 0.3 / 0.03 / 2.5 | $0.30 / $0.03 / $2.50 | Vertex pricing + Gemini API pricing | ✓ |
+| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash` (via Context7) | ✓ |
+| maxOutputTokens | **65536** | **Vertex: 65,535** ("default output token limit of 65,535") | docs.cloud.google.com/…/models/gemini/2-5-flash (via Context7); also migrate-palm-to-gemini doc ("2.5 Pro and 2.5 Flash… output context length of 65,535") | ✗ 🟡 |
+| releaseDate | 2025-05-20 | preview launched 2025-04-17 on Gemini API; I/O announcement 2025-05-20/21; Vertex GA June 2025 | ai.google.dev changelog; Google I/O coverage | ⚠️ date is the I/O announcement; preview predates it. Left as-is (convention ambiguity, not a factual error) |
+| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today |
+
+### vertex/gemini-2.5-flash-lite
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| input / cachedInput / output | 0.1 / 0.01 / 0.4 | $0.10 / $0.01 / $0.40 | Vertex pricing + Gemini API pricing | ✓ |
+| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash-lite` | ✓ |
+| maxOutputTokens | **65536** | **65,535** | Vertex 2-5-flash-lite doc / Oracle OCI mirror of Google spec (websearch confirmation: "maximum output for Gemini 2.5 Flash-Lite is 65,535 tokens") | ✗ 🟡 |
+| releaseDate | 2025-06-17 | 2.5 family GA + Flash-Lite preview announced 2025-06-17 | cloud.google.com blog "Gemini 2.5 Updates: Flash/Pro GA, SFT, Flash-Lite on Vertex AI" | ✓ |
+| speedOptimized | absent | smallest/fastest 2.5 tier | google provider entry has `speedOptimized: true` (models.ts:1436) | 🔵 add (see fixes) |
+| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today |
+
+### vertex/gemini-2.0-flash (deprecated)
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | discontinued on Vertex **2026-06-01** (model serving + Provisioned Throughput) | github.com/firebase/extensions/issues/2607; Vertex model-versioning doc ("as of March 6, 2026 … only available for existing customers") | ✓ PR #4990 change confirmed |
+| input | **0.1** | **$0.15** (Vertex token-based row, text) | cloud.google.com/vertex-ai/generative-ai/pricing | ✗ 🟡 repo carries Gemini API pricing ($0.10), not Vertex's |
+| output | **0.4** | **$0.60** | same | ✗ 🟡 |
+| cachedInput | 0.025 | not listed on Vertex pricing page (that's the Gemini API cache price) | same | ❓ UNVERIFIED on Vertex |
+| maxOutputTokens | absent (falls back 4096) | 8,192 ("output context length of 8,192 tokens by default") | Vertex migrate-palm-to-gemini doc | 🔵 google entry has 8192; add for parity (low priority, discontinued) |
+| contextWindow | 1048576 | 1,048,576 | same doc | ✓ |
+| releaseDate | 2025-02-05 | GA on Vertex 2025-02-05 | blog.google gemini-model-updates-february-2025; developers.googleblog.com | ✓ |
+
+### vertex/gemini-2.0-flash-lite (deprecated)
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| deprecated | true | discontinued on Vertex 2026-06-01 | same sources as 2.0-flash | ✓ |
+| input / output | 0.075 / 0.3 | $0.075 / $0.30 | Vertex pricing page | ✓ |
+| cachedInput | omitted | none listed | same | ✓ correctly omitted |
+| maxOutputTokens | absent | 8,192 default | Vertex migrate doc | 🔵 parity suggestion (low priority) |
+| releaseDate | 2025-02-25 | preview 2025-02-05; exact 2025-02-25 GA date not found in fetched pages | _attempted: blog.google, Vertex release notes_ | ❓ UNVERIFIED (plausible — GA followed preview by ~3 weeks; deprecated, left as-is) |
+
+### vertex/deep-research-pro-preview-12-2025
+
+| Field | Repo | Live docs | Source | Verdict |
+|---|---|---|---|---|
+| id | `deep-research-pro-preview-12-2025` | Vertex pricing page has a "Gemini Deep Research Agent" row but no id; id appears on third-party Vertex trackers (CloudPrice `vertex_ai/deep-research-pro-preview-12-2025`); Gemini API changelog confirms Deep Research Agent preview launch 2025-12-11 but its docs now list `deep-research-preview-04-2026` / `deep-research-max-preview-04-2026` | cloud.google.com pricing; cloudprice.net; ai.google.dev/gemini-api/docs/deep-research + changelog | ⚠️ id verified only via secondary sources; **no announced shutdown of the 12-2025 id** — but Google has shipped 04-2026 successors on the Gemini API (watch item) |
+| input | 2.0 | $2 | Vertex pricing page "Gemini Deep Research Agent" + CloudPrice | ✓ (open question a: pricing confirmed) |
+| cachedInput | 0.2 | $0.20 | Vertex pricing page (CloudPrice omits cached) | ✓ |
+| output | 12.0 | $12 | Vertex pricing page + CloudPrice | ✓ (PR #4990 output 12.0 confirmed) |
+| contextWindow | 1048576 | **conflict**: CloudPrice says "66K tokens" context / "33K tokens" max output; underlying model is Gemini 3 Pro (1M ctx); no Google doc states the agent's window; launch blog only says it "handles large context gracefully" | cloudprice.net/models/vertex_ai/deep-research-pro-preview-12-2025; blog.google deep-research post; ai.google.dev/gemini-api/docs/deep-research (lists no token limits for any version) | ❓ UNVERIFIED — conflict NOT resolvable from Google docs (they publish no limits for the agent). 1048576 is an inference from the Gemini 3 Pro core; CloudPrice's 66K/33K (≈65,536/32,768) may reflect the agent's actual per-task envelope |
+| maxOutputTokens | 65536 | no Google figure; CloudPrice says 33K | same | ❓ UNVERIFIED |
+| capabilities deepResearch / memory:false | true / false | it is a managed autonomous research agent; multi-turn memory not offered in preview | blog.google + ai.google.dev/gemini-api/docs/deep-research | ✓ reasonable |
+| releaseDate | 2025-12-11 | "Published December 11, 2025"; changelog: "Launched the Gemini Deep Research Agent in preview" 2025-12-11 | blog.google deep-research-agent-gemini-api; ai.google.dev changelog | ✓ |
+
+---
+
+## Changes made in this pass (PR #4990) — re-verification verdicts
+
+| PR #4990 change | Verdict |
+|---|---|
+| Rename `vertex/gemini-3.1-flash-lite-preview` → `vertex/gemini-3.1-flash-lite` | ✓ correct — stable id GA 2026-05-07; preview shut down on Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09 |
+| Drop `'minimal'` from 3.1-pro-preview thinking.levels | ✓ correct — thinking docs: minimal "Not supported" on 3.1 Pro |
+| `deprecated: true` on gemini-3-pro-preview | ✓ correct — shut down (Gemini API 2026-03-09; Vertex 2026-03-26) |
+| `deprecated: true` on both 2.0 models | ✓ correct — discontinued 2026-06-01 |
+| deep-research output → 12.0, cachedInput 0.2 | ✓ correct — Vertex pricing page row |
+| deep-research ctx 1048576 + maxOutputTokens 65536 | ❓ remains unverifiable; CloudPrice conflict (66K/33K) unresolved — Google publishes no limits for the agent |
+| maxOutputTokens 65536 on 3.5-flash / 3.1-pro / 3.1-flash-lite / 3-flash | ✓ correct — all four documented at 65,536 on their Gemini API model pages |
+| maxOutputTokens 65536 on 2.5-pro / 2.5-flash / 2.5-flash-lite | ✗ off-by-one for Vertex — Vertex docs say **65,535** (Gemini API pages say 65,536; platforms genuinely disagree; Vertex entry should carry the Vertex value) |
+| gemini-3-flash-preview ctx → 1048576 | ✓ correct |
+| updatedAt bumps to 2026-06-11 | ✓ all pricing values verified current today |
+
+## Recommended fixes (final disposition)
+
+Rejected by orchestrator re-verification (not applied):
+1. ~~`vertex/gemini-3.1-flash-lite` thinking.levels / default change~~ — the thinking doc confirms minimal IS supported and is the default on 3.1 Flash-Lite; the report's initial reading was wrong. No change made (google entry likewise untouched).
+
+Applied (warning — platform-correct values):
+3. `vertex/gemini-2.5-pro`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
+4. `vertex/gemini-2.5-flash`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
+5. `vertex/gemini-2.5-flash-lite`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
+6. `vertex/gemini-2.0-flash`: `input` 0.1 → 0.15, `output` 0.4 → 0.6 (Vertex pricing page; repo carries Gemini API prices). `cachedInput: 0.025` is unverified on Vertex — consider removing. Low urgency (model discontinued).
+
+Applied (suggestions):
+7. `vertex/gemini-3.5-flash`: add `recommended: true` — parity with the google entry; vertex provider currently has no recommended model
+8. `vertex/gemini-2.5-flash-lite`: add `speedOptimized: true` — parity with google entry (models.ts:1436)
+9. `vertex/gemini-3.1-flash-lite`: add `speedOptimized: true` — "most cost-effective model yet" / lowest-latency tier (blog.google); apply to the google entry too for consistency
+10. (optional) both vertex 2.0 entries: add `maxOutputTokens: 8192` for parity with google entries (Vertex docs: 8,192 default) — cosmetic, models discontinued
+
+Also applied: `releaseDate` 2026-03-03 → 2026-05-07 on both the vertex and google `gemini-3.1-flash-lite` entries (GA date per the Gemini API changelog). Item 10 (maxOutputTokens on discontinued 2.0 entries) was skipped as cosmetic; `cachedInput` on vertex/gemini-2.0-flash was kept (Gemini API documented the rate; no Vertex contradiction found).
+
+## Deliberately not changed
+
+- **2.5 Pro / Flash / Flash-Lite not marked deprecated (open question d):** Vertex retirement is 2026-10-16 (extended from June 2026; Google says the final date will be confirmed with ≥6 months notice once Gemini 3 is GA). They are fully supported today; `deprecated: true` would prematurely hide working models. Recommendation: revisit ~2026-09 (calendar item), keep undeprecated now. Note `defaultModel: 'vertex/gemini-2.5-pro'` (models.ts:1491) will need a new default before retirement — consider moving to `vertex/gemini-3.5-flash` when `recommended` is added.
+- **>200k-token pricing tiers (3.1-pro, 2.5-pro)** are not modeled — `pricing` is a flat structure; entries carry the ≤200k tier. Pre-existing, consistent with the google provider.
+- **Non-global endpoint surcharge (effective 2026-07-01):** Vertex adds +10% pricing for non-global endpoints on 3.x models ($1.65/$9.90 for 3.5-flash, etc.). Our default location is `us-central1` (non-global), so billed cost may exceed modeled cost starting July 1. Entries keep global pricing (the canonical published rate); flagged for ops awareness.
+- **`vertex/gemini-3-pro-preview` pricing/ctx left as historical** — model discontinued and absent from the current pricing page; `deprecated: true` is the user-facing protection.
+- **releaseDate conventions:** 2.5-flash 2025-05-20 (I/O) kept despite an earlier 2025-04-17 Gemini-API preview; 3.1-flash-lite 2026-03-03 (preview announcement) kept despite 2026-05-07 stable GA. Both match the repo's "first public launch announcement" convention.
+- **deep-research id not migrated** to the newer `deep-research-preview-04-2026` family — no announced shutdown of `deep-research-pro-preview-12-2025`, and the Vertex pricing row still matches it. Watch item for the next pass.
+
+## Unverifiable
+
+| Item | Attempted sources | Notes |
+|---|---|---|
+| `vertex/deep-research-pro-preview-12-2025` `contextWindow: 1048576` and `maxOutputTokens: 65536` | cloud.google.com pricing (no limits), ai.google.dev/gemini-api/docs/deep-research (lists only 04-2026 versions, no limits), blog.google launch post (no numbers), cloudprice.net (claims 66K ctx / 33K out) | Conflict NOT resolved: Google publishes no token limits for the agent. CloudPrice's 66K/33K (~65,536/32,768) is the only concrete figure and contradicts the repo's 1M. Current values are an inference from the Gemini 3 Pro core. Ask Google docs or test live before changing. |
+| Vertex-side model id for the Deep Research Agent | Vertex pricing page (row name only), Vertex docs (nav-only render), Context7 | Only third-party trackers tie `deep-research-pro-preview-12-2025` to Vertex. |
+| `vertex/gemini-2.0-flash` `cachedInput: 0.025` | Vertex pricing page (no cached row for 2.0) | $0.025 is the Gemini API cache price. Discontinued model; consider dropping the field. |
+| `vertex/gemini-2.0-flash-lite` `releaseDate: 2025-02-25` | blog.google Feb 2025 post (preview 2025-02-05), Vertex release notes (nav-only) | Exact GA date not found this session; plausible, left as-is. |
+| Vertex 3-pro-preview discontinuation date 2026-03-26 (exact) | Vertex deprecations page (nav-only), third-party migration guides | Gemini API shutdown 2026-03-09 is confirmed by the changelog; the Vertex-specific 03-26 date comes from secondary sources citing Google's deprecations table. Either way `deprecated: true` is correct. |
diff --git a/docs/models/xai.md b/docs/models/xai.md
new file mode 100644
index 0000000000..1fd8d159f4
--- /dev/null
+++ b/docs/models/xai.md
@@ -0,0 +1,91 @@
+# xAI Provider Validation — models.ts
+
+- **Date:** 2026-06-11
+- **Scope:** `xai` provider block in `apps/sim/providers/models.ts` (~lines 1752–1956), 13 models + provider config. Final re-verification after PR #4990 (deprecation flags, grok-4.20 repricing $2/$6 → $1.25/$2.50 and 2M → 1M, defaultModel → grok-4.3).
+- **Method:** Live WebFetch of xAI docs (primary source, wins all conflicts); OpenRouter as secondary pricing source; WebSearch for release-date pinning; `rg` audit of `apps/sim/providers/xai/` for parameter wiring.
+- **Sources:**
+  - https://docs.x.ai/developers/models (model listing + pricing)
+  - https://docs.x.ai/developers/models/grok-4.3, .../grok-4.20-0309-reasoning, .../grok-4.20-0309-non-reasoning, .../grok-4.20-multi-agent-0309, .../grok-build-0.1, .../grok-3, .../grok-3-fast, .../grok-4 (per-model pages)
+  - https://docs.x.ai/developers/migration/may-15-retirement (retirement/redirect table)
+  - https://docs.x.ai/developers/rest-api-reference/inference/chat (parameter ranges)
+  - https://docs.x.ai/developers/model-capabilities/text/reasoning (reasoning_effort semantics)
+  - https://openrouter.ai/x-ai/grok-4.3, https://openrouter.ai/x-ai/grok-4.20 (secondary)
+
+## Provider config
+
+| Field | Repo value | Source | Verdict |
+|---|---|---|---|
+| `defaultModel` | `grok-4.3` | docs.x.ai/developers/models — grok-4.3 is the current flagship ("most intelligent and fastest"); all retired slugs redirect to it | CORRECT (PR #4990 change re-verified) |
+| `modelPatterns` | `/^grok/` | All current model ids start with `grok` | CORRECT |
+
+## Active models
+
+### grok-4.3
+
+| Field | Repo value | Source value | Source | Verdict |
+|---|---|---|---|---|
+| input | 1.25 | $1.25 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($1.25) | CORRECT |
+| cachedInput | 0.2 | $0.20 / 1M | docs.x.ai/developers/models/grok-4.3 | CORRECT |
+| output | 2.5 | $2.50 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($2.50) | CORRECT |
+| contextWindow | 1000000 | 1,000,000 tokens | docs.x.ai per-model page; OpenRouter agrees (1M, "no output token limit") | CORRECT |
+| releaseDate | 2026-04-30 | April 30, 2026 | OpenRouter created date; consistent with xAI announcement timeline | CORRECT |
+| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference: "between 0 and 2" | ✓ after fix |
+| recommended | true | flagship model | docs.x.ai | CORRECT |
+
+Caveat: OpenRouter notes grok-4.3 requests exceeding 200k total tokens bill at a higher tier. xAI's own pricing tables show flat $1.25/$2.50; Sim's pricing model is flat, so the base tier is recorded. No change.
+
+### grok-4.20-0309-reasoning / grok-4.20-0309-non-reasoning / grok-4.20-multi-agent-0309
+
+All three per-model pages were fetched individually; all three show identical numbers (multi-agent is NOT priced differently):
+
+| Field | Repo value | Source value | Source | Verdict |
+|---|---|---|---|---|
+| input | 1.25 | $1.25 / 1M | all three per-model pages | CORRECT (PR #4990 reprice re-verified) |
+| cachedInput | 0.2 | $0.20 / 1M | all three per-model pages | CORRECT |
+| output | 2.5 | $2.50 / 1M | all three per-model pages | CORRECT |
+| contextWindow | 1000000 | 1,000,000 tokens | all three per-model pages | CORRECT — see conflict note |
+| releaseDate | 2026-03-10 | API availability March 10, 2026 | WebSearch (xAI API made Grok 4.20 + multi-agent available 2026-03-10; `0309` slug = March 9 snapshot) | CORRECT (secondary-source verified) |
+| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference | ✓ after fix |
+
+**1M vs 2M conflict resolved:** OpenRouter (x-ai/grok-4.20) lists 2M context; xAI's three per-model pages each state "Context window: 1,000,000 tokens". Press coverage attributes the larger window to "agent modes" (consumer-side), not the API. xAI docs win → **1M confirmed, keep**. (OpenRouter's created date of 2026-03-31 is its listing date, not the API release.)
+
+## Deprecated models (9 entries)
+
+Retirement source: docs.x.ai/developers/migration/may-15-retirement — "After May 15, 2026 at 12:00 PM PT, requests to the retired model slugs will automatically redirect" and bill at the redirect target's rates. Today (2026-06-11) is past that date: the redirects are live. The per-model docs pages for the legacy slugs (`grok-4`, `grok-4-0709`, `grok-3`, `grok-3-fast`) now resolve to the grok-4.3 page showing $1.25/$0.20/$2.50 — direct confirmation that the slugs are aliases billing at target rates.
+
+| Model id | Redirect target (source: may-15-retirement page) | `deprecated: true` verdict |
+|---|---|---|
+| grok-4-latest | grok-4.3 (alias of grok-4-0709; per-model page now resolves to grok-4.3) | CORRECT |
+| grok-4-0709 | grok-4.3 (reasoning_effort low) — explicitly listed | CORRECT |
+| grok-4-1-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT |
+| grok-4-1-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT |
+| grok-4-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT |
+| grok-4-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT |
+| grok-code-fast-1 | grok-build-0.1 — explicitly listed | CORRECT |
+| grok-3-latest | grok-4.3 (none) — `grok-3` explicitly listed; `-latest` is its alias | CORRECT |
+| grok-3-fast-latest | grok-4.3 — not on the May-15 table by name, but docs.x.ai/developers/models/grok-3-fast now resolves to the grok-4.3 page with grok-4.3 pricing | CORRECT |
+
+Legacy pricing fields on these entries ($3/$15 for grok-4 family and grok-3, $5/$25 for grok-3-fast, $0.20/$0.50 fast families, $0.20/$1.50 grok-code-fast-1) match the rates these models historically carried, but xAI no longer publishes them — they are unverifiable against live docs and, more importantly, **no longer what calls cost**.
+
+**Recommendation (one clear position):** reprice the deprecated entries to their redirect targets' rates — the 8 grok-4.3-redirected slugs to $1.25 / $0.20 cached / $2.50, and grok-code-fast-1 to grok-build-0.1's $1.00 / $0.20 cached / $2.00. Rationale: Sim computes execution cost at run time from the current `models.ts` values and stores the result in execution logs; past log rows are unaffected by a reprice, so nothing historical is lost. Meanwhile any workflow still pointed at a retired slug bills at redirect rates today, so the legacy numbers overestimate live costs by up to 6× (grok-4-latest: $15 vs $2.50 output). This is docs-backed (the retirement page states the redirect billing explicitly). **Disposition: APPLIED in this pass** — the 8 grok-4.3 redirects now carry $1.25 / $0.20 cached / $2.50 with `contextWindow: 1000000`, and grok-code-fast-1 carries grok-build-0.1's $1.00 / $0.20 cached / $2.00 (256k unchanged).
+
+## Changes made in this pass
+
+None to `models.ts` (per instructions, this pass writes only this justification doc). The verified pending fix:
+
+- **all 13 xai entries: `capabilities.temperature.max` 1 → 2.** The xAI chat REST reference documents `temperature` as "between 0 and 2" (same range OpenAI uses). The repo UI uses this for slider bounds, so the current `max: 1` artificially halves the usable range. Source: https://docs.x.ai/developers/rest-api-reference/inference/chat
+
+Changes from PR #4990 re-verified and confirmed correct: 9 deprecation flags, grok-4.20 trio reprice to $1.25/$2.50 with 1M context, defaultModel grok-4.3.
+
+## Deliberately not changed
+
+- **grok-4.3 `reasoningEffort` capability flag — not added.** The REST reference and reasoning docs confirm grok-4.3 supports `reasoning_effort` with `none` / `low` (default) / `medium` / `high` ("Only supported by grok-4.3"). However, `apps/sim/providers/xai/index.ts` forwards only `temperature` (verified by rg: single hit at line 101, `basePayload.temperature`); no `reasoning_effort` wiring exists, so the capability flag would be dead metadata. **Recommended follow-up:** wire `reasoning_effort` in the xai provider, then add the capability flag to grok-4.3. Note for that follow-up: per the reasoning docs, `presence_penalty`, `frequency_penalty`, and `stop` cannot be combined with reasoning, and grok-4.20-multi-agent uses a different control (`reasoning.effort`: low/medium/high/xhigh, controlling agent count, not reasoning depth).
+
+- **grok-build-0.1 — not added.** grok-code-fast-1's successor: $1.00 input / $0.20 cached / $2.00 output, 256k context, "xAI's fast coding model trained specifically for agentic coding" (docs.x.ai/developers/models/grok-build-0.1). Recommended addition; adding models is separate work from validation.
+- **grok-4.3 tiered >200k-token pricing — not modeled.** Sim's pricing schema is flat; base tier recorded (and xAI's own table is flat).
+
+## Unverifiable
+
+- **Original (pre-retirement) pricing of the 9 deprecated entries** — xAI docs no longer publish historical rates; values match known historical pricing but cannot be confirmed against a live source.
+- **Release dates of deprecated entries** (2025-07-09, 2025-11-19, 2025-09-19, 2025-08-28, 2025-02-17) — consistent with historical announcements/slugs (e.g. `grok-4-0709`), not republished on live docs.
+- **grok-4.3 / grok-4.20 official release dates on xAI docs** — per-model pages omit release dates. grok-4.3: 2026-04-30 corroborated by OpenRouter. grok-4.20: 2026-03-10 corroborated by secondary reporting of xAI API availability plus the `0309` snapshot slug; treated as verified-by-secondary-source.

From 025f84b9e0dfa584e3fc03b794c30668a89ed7cc Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Thu, 11 Jun 2026 20:06:40 -0700
Subject: [PATCH 3/4] chore(providers): keep model validation logs local, not
 in the repo

---
 docs/models/anthropic.md                 | 232 ----------------
 docs/models/azure.md                     | 258 -----------------
 docs/models/bedrock.md                   | 226 ---------------
 docs/models/deepseek-cerebras.md         | 189 -------------
 docs/models/embeddings-rerank-dynamic.md |  75 -----
 docs/models/google.md                    | 184 ------------
 docs/models/groq.md                      | 157 -----------
 docs/models/mistral.md                   | 305 --------------------
 docs/models/openai.md                    | 338 -----------------------
 docs/models/vertex.md                    | 212 --------------
 docs/models/xai.md                       |  91 ------
 11 files changed, 2267 deletions(-)
 delete mode 100644 docs/models/anthropic.md
 delete mode 100644 docs/models/azure.md
 delete mode 100644 docs/models/bedrock.md
 delete mode 100644 docs/models/deepseek-cerebras.md
 delete mode 100644 docs/models/embeddings-rerank-dynamic.md
 delete mode 100644 docs/models/google.md
 delete mode 100644 docs/models/groq.md
 delete mode 100644 docs/models/mistral.md
 delete mode 100644 docs/models/openai.md
 delete mode 100644 docs/models/vertex.md
 delete mode 100644 docs/models/xai.md

diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md
deleted file mode 100644
index 81dad60b26..0000000000
--- a/docs/models/anthropic.md
+++ /dev/null
@@ -1,232 +0,0 @@
-# Anthropic Provider Model Validation — Justification Doc
-
-- **Date:** 2026-06-11
-- **Scope:** `anthropic` provider block in `apps/sim/providers/models.ts` (12 models), re-verified after PR #4990
-- **Method:** Live WebFetch of official Anthropic docs (platform.claude.com), secondary pricing source (OpenRouter), Anthropic news posts via web search for launch dates, plus `rg` verification that every capability flag is actually consumed by provider code (`apps/sim/providers/anthropic/core.ts`, `apps/sim/providers/models.ts`, `apps/sim/providers/utils.ts`).
-- **Primary sources:**
-  - Models overview: https://platform.claude.com/docs/en/about-claude/models/overview
-  - Pricing: https://platform.claude.com/docs/en/about-claude/pricing
-  - Deprecations: https://platform.claude.com/docs/en/about-claude/model-deprecations
-  - Effort: https://platform.claude.com/docs/en/build-with-claude/effort
-  - Structured outputs: https://platform.claude.com/docs/en/build-with-claude/structured-outputs
-  - Computer use: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool
-  - Messages API: https://platform.claude.com/docs/en/api/messages
-  - Secondary pricing: https://openrouter.ai/provider/anthropic
-  - Launch dates: https://www.anthropic.com/news/claude-4 , https://www.anthropic.com/news/claude-3-haiku
-
-**Verdict key:** ✓ = verified against live docs · ⚠ = recommended change · ◆ = intentional deviation (documented) · ◇ = unverifiable from live docs (reason given)
-
----
-
-## How capability fields are consumed (code verification)
-
-| Field | Consumer | Behavior |
-|---|---|---|
-| `thinking.levels` / `thinking.default` | `core.ts` `buildThinkingConfig()` via `getThinkingCapability()` | Level must be in `levels` or thinking is skipped. Fable 5 / Opus 4.8 / 4.7 / 4.6 / Sonnet 4.6 (`supportsAdaptiveThinking()`) → `thinking: {type: 'adaptive'}` + `output_config: {effort: <level>}`. All other models → `thinking: {type: 'enabled', budget_tokens}` with low=2048 / medium=8192 / high=32768 (so `xhigh`/`max` must never appear on a budget-tokens model — `THINKING_BUDGET_TOKENS` has no entry and config would be dropped). |
-| `temperature` | payload construction in `core.ts` | Presence of `capabilities.temperature` allows the param; omitted on a model means Sim never sends it. Stripped when thinking enabled (thinking incompatible with temperature). |
-| `nativeStructuredOutputs` | `models.ts:3393` (`getModelsWithNativeStructuredOutputs`-style helper) consumed by `core.ts` | With flag → native `output_format`/`output_config` JSON-schema path; without → `generateSchemaInstructions()` prompt-injection fallback. |
-| `computerUse` | `models.ts:3167` `getComputerUseModels()` → `providers/utils.ts:143` `computerUseModels` | Gates Sim's computer-use path per provider. **No Anthropic model currently sets it.** |
-| `contextWindow` / `maxOutputTokens` / `pricing` | cost calculation, token clamping, UI | Straight passthrough. Sim does **not** send any `context-1m-*` beta header (`rg 'context-1m' apps/sim/providers/anthropic/` → no matches), so `contextWindow` must reflect the no-beta-header window. |
-| `reasoningEffort` / `verbosity` | **not consumed** by the Anthropic provider (OpenAI-family fields) | Correctly absent from all Anthropic entries. |
-
----
-
-## Per-model field verification
-
-### claude-fable-5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing.input | 10.0 | Pricing doc ($10/MTok); OpenRouter $10/M | ✓ |
-| pricing.cachedInput | 1.0 | Pricing doc cache hit $1/MTok (0.1×) | ✓ |
-| pricing.output | 50.0 | Pricing doc $50/MTok; OpenRouter $50/M | ✓ |
-| capabilities.temperature | absent | Deprecations doc: sampling params 400 on Opus 4.7 and later; Fable 5 rejects `temperature`/`top_p`/`top_k` | ✓ |
-| capabilities.nativeStructuredOutputs | **absent** | Structured-outputs doc: "generally available … for **Claude Fable 5**, Claude Mythos 5, Claude Opus 4.8, …" | ⚠ **should be `true`** — Fable 5 is in the GA list; current absence routes Fable 5 through the prompt-injection fallback instead of native JSON-schema output |
-| capabilities.maxOutputTokens | 128000 | Models overview: Max output 128k | ✓ |
-| thinking.levels | low–xhigh–max | Effort doc: `max` available on Fable 5; `xhigh` available on Fable 5; low/medium/high universal | ✓ |
-| thinking.default | high | Effort doc: default is `high` | ✓ |
-| contextWindow | 1000000 | Models overview: 1M tokens (default, no beta header) | ✓ |
-| releaseDate | 2026-06-09 | Models overview: "generally available … beginning June 9, 2026" | ✓ |
-| (no deprecated flag) | — | Active | ✓ |
-
-Note: Fable 5's thinking is always-on; Sim's adaptive path (`thinking: {type:'adaptive'}` + effort) is the documented-correct call shape. The `'none'` sentinel omits the `thinking` param, which on Fable 5 means adaptive-by-default rather than disabled — acceptable (explicit `disabled` would 400).
-
-### claude-opus-4-8
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing.input / cachedInput / output | 5.0 / 0.5 / 25.0 | Pricing doc $5 / $0.50 cache-hit / $25; OpenRouter $5/$25 | ✓ |
-| pricing.updatedAt | 2026-05-28 | bumped in PR #4990 | ✓ |
-| temperature | absent | Deprecations doc: 400 on Opus 4.7 and later, "including Claude Opus 4.8" | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
-| maxOutputTokens | 128000 | Models overview | ✓ |
-| thinking.levels | low–xhigh–max | Effort doc: `xhigh` and `max` available on Opus 4.8 | ✓ |
-| thinking.default | high | Effort doc: "The default is `high` on all surfaces" | ✓ |
-| contextWindow | 1000000 | Models overview: 1M (standard pricing, no long-context premium) | ✓ |
-| releaseDate | 2026-05-28 | Deprecations doc: tentative retirement "Not sooner than May 28, **2027**" (release + 1 yr convention) — confirms the PR #4990 correction | ✓ changed this pass (PR #4990), re-verified |
-| recommended | true | Sim product choice; consistent with docs' "most capable Opus-tier model" | ◆ product decision |
-
-### claude-opus-4-7
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-04-16) | Pricing doc; OpenRouter $5/$25 | ✓ |
-| temperature | absent | Deprecations doc: 400 on Opus 4.7+ | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
-| maxOutputTokens | 128000 | Models overview (legacy table) | ✓ |
-| thinking.levels | low–xhigh–max | Effort doc: `xhigh` introduced with 4.7; `max` available | ✓ |
-| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ |
-| releaseDate | 2026-04-16 | Deprecations doc: "Not sooner than April 16, 2027" | ✓ |
-
-### claude-opus-4-6
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ |
-| temperature {0,1} | present | Sampling-param removal is "Opus 4.7 and later" — Opus 4.6 still accepts `temperature` (0.0–1.0 per Messages API) | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
-| maxOutputTokens | 128000 | Models overview legacy table | ✓ |
-| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` on Opus 4.6 ✓; `xhigh` only on Fable 5 / Opus 4.8 / 4.7 — correctly excluded | ✓ |
-| contextWindow | 1000000 | Models overview legacy table: 1M | ✓ |
-| releaseDate | 2026-02-05 | Deprecations doc: "Not sooner than February 5, 2027" | ✓ |
-
-### claude-sonnet-4-6
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc $3 / $0.30 / $15; OpenRouter $3/$15 | ✓ |
-| temperature {0,1} | present | Sonnet 4.6 is not in the "Opus 4.7 and later" sampling-param removal; temperature 0.0–1.0 valid | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
-| maxOutputTokens | 64000 | Models overview: 64k | ✓ |
-| thinking.levels | low/medium/high/**max** (no xhigh) | Effort doc: `max` available on Sonnet 4.6; `xhigh` is NOT (Fable 5 / Opus 4.8 / 4.7 only) | ✓ |
-| contextWindow | 1000000 | Models overview: 1M, no beta header required; "Long context pricing": full 1M at standard pricing on Sonnet 4.6 | ✓ |
-| releaseDate | 2026-02-17 | Deprecations doc: "Not sooner than February 17, 2027" | ✓ |
-| recommended | true | Sim product choice ("best combination of speed and intelligence") | ◆ product decision |
-
-### claude-opus-4-5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 5.0 / 0.5 / 25.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $5/$25 | ✓ |
-| temperature {0,1} | present | ≤ 4.6-era model; accepted | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Opus 4.5") | ✓ |
-| maxOutputTokens | 64000 | Models overview legacy table | ✓ |
-| thinking.levels | low/medium/high | Effort doc: Opus 4.5 supports effort but neither `max` nor `xhigh`. Sim's code path for 4.5 uses `budget_tokens` (not effort) — levels map to budget tiers; same three levels are valid either way | ✓ |
-| contextWindow | 200000 | Models overview legacy table: 200k | ✓ |
-| releaseDate | 2025-11-24 | Deprecations doc: "Not sooner than November 24, 2026"; anthropic.com/news/claude-opus-4-5 (Nov 24, 2025) | ✓ |
-
-### claude-opus-4-1
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc $15 / $1.50 / $75; OpenRouter $15/$75 | ✓ |
-| temperature {0,1} | present | pre-4.7 model; accepted | ✓ |
-| nativeStructuredOutputs | **removed in PR #4990** | Structured-outputs doc GA list does **not** include Opus 4.1 | ✓ changed this pass (PR #4990), re-verified correct |
-| maxOutputTokens | 32000 | Models overview legacy table: 32k | ✓ |
-| thinking.levels | low/medium/high | budget_tokens model; extended thinking supported | ✓ |
-| contextWindow | 200000 | Models overview legacy table | ✓ |
-| releaseDate | 2025-08-05 | Snapshot `claude-opus-4-1-20250805`; launched Aug 5, 2025 | ✓ |
-| deprecated | true | Deprecations doc: deprecated June 5, 2026; retires Aug 5, 2026 → migrate to claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified |
-
-### claude-opus-4-0
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 15.0 / 1.5 / 75.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Opus 4 (deprecated)"); OpenRouter $15/$75 | ✓ |
-| temperature {0,1} | present | pre-4.7; accepted | ✓ |
-| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ |
-| maxOutputTokens | 32000 | Models overview legacy table | ✓ |
-| thinking.levels | low/medium/high | budget_tokens model | ✓ |
-| contextWindow | 200000 | Models overview legacy table | ✓ |
-| releaseDate | 2025-05-22 | **Open question (a) resolved:** Claude 4 (Opus 4 + Sonnet 4) launched **May 22, 2025** (anthropic.com/news/claude-4). The `20250514` in the full ID is the snapshot date, not the launch date. Repo convention uses launch dates (cf. haiku-4-5: launch 2025-10-15 vs snapshot 20251001) | ✓ — **no change recommended** |
-| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-opus-4-8 | ✓ changed this pass (PR #4990), re-verified |
-
-### claude-sonnet-4-5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc; OpenRouter $3/$15 | ✓ |
-| temperature {0,1} | present | pre-4.7; accepted | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list ("Claude Sonnet 4.5") | ✓ |
-| maxOutputTokens | 64000 | Models overview legacy table | ✓ |
-| thinking.levels | low/medium/high | Effort doc: effort errors on Sonnet 4.5 — Sim correctly routes it through budget_tokens; no max/xhigh | ✓ |
-| contextWindow | 200000 | **Open question (e) resolved:** Models overview legacy table lists Sonnet 4.5 at **200k**. The historical 1M for Sonnet 4.5 required the `context-1m` beta header, which Sim does not send (`rg 'context-1m'` → no matches in `apps/sim/providers/anthropic/`) | ✓ changed this pass (PR #4990, 1000000 → 200000), re-verified correct |
-| releaseDate | 2025-09-29 | Snapshot `claude-sonnet-4-5-20250929`; launched Sep 29, 2025 | ✓ |
-
-### claude-sonnet-4-0
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 3.0 / 0.3 / 15.0 (updatedAt 2026-06-11) | Pricing doc ("Claude Sonnet 4 (deprecated)"); OpenRouter $3/$15 | ✓ |
-| temperature {0,1} | present | pre-4.7; accepted | ✓ |
-| nativeStructuredOutputs | absent | Not in structured-outputs GA list | ✓ |
-| maxOutputTokens | 64000 | Models overview legacy table: 64k | ✓ |
-| thinking.levels | low/medium/high | budget_tokens model | ✓ |
-| contextWindow | 200000 | Models overview legacy table: 200k; same `context-1m` beta-header reasoning as Sonnet 4.5 | ✓ changed this pass (PR #4990), re-verified correct |
-| releaseDate | 2025-05-22 | Claude 4 launch May 22, 2025 (see opus-4-0) — no change | ✓ |
-| deprecated | true | Deprecations doc: deprecated Apr 14, 2026; retires June 15, 2026 → claude-sonnet-4-6 | ✓ changed this pass (PR #4990), re-verified |
-
-### claude-haiku-4-5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing | 1.0 / 0.1 / 5.0 (updatedAt 2026-06-11) | Pricing doc $1 / $0.10 / $5; OpenRouter $1/$5 | ✓ |
-| temperature {0,1} | present | pre-4.7; accepted | ✓ |
-| nativeStructuredOutputs | true | Structured-outputs doc GA list | ✓ |
-| maxOutputTokens | 64000 | Models overview: 64k | ✓ |
-| thinking.levels | low/medium/high | Effort doc: effort errors on Haiku 4.5; extended thinking (budget_tokens) supported — Sim routes via budget_tokens | ✓ |
-| contextWindow | 200000 | Models overview: 200k | ✓ |
-| releaseDate | 2025-10-15 | Launch Oct 15, 2025 (deprecations doc: retirement "Not sooner than October 15, 2026"); snapshot is `20251001` — repo correctly uses the launch date | ✓ |
-| speedOptimized | true | Sim-internal flag; docs: "The fastest model" | ◆ Sim-internal, consistent |
-
-### claude-3-haiku-20240307
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| pricing.input / output | 0.25 / 1.25 (updatedAt 2026-04-01) | ◇ No longer listed on the live pricing page (only retired Haiku 3.5 remains) or OpenRouter — model is retired. Values match Anthropic's historical published pricing ($0.25/$1.25) | ◇ unverifiable live; historically consistent — leave as-is |
-| pricing.cachedInput | 0.03 | ◇ Historical cache-hit pricing for Claude 3 Haiku was $0.03/MTok (slightly above the 0.1× convention) | ◇ unverifiable live; historically consistent |
-| temperature {0,1} | present | Claude 3-era; accepted (model no longer serves requests anyway) | ✓ (moot) |
-| maxOutputTokens | 4096 | Historical Claude 3 Haiku max output | ◇ unverifiable live; historically consistent |
-| no thinking capability | absent | Claude 3 Haiku has no extended thinking | ✓ |
-| contextWindow | 200000 | Historical Claude 3 family window | ◇ unverifiable live; historically consistent |
-| releaseDate | 2024-03-07 | Claude 3 Haiku GA was **March 13, 2024** (anthropic.com/news/claude-3-haiku); `20240307` is the snapshot date. Repo convention elsewhere uses launch dates | ⚠ optional: `2024-03-07` → `2024-03-13` (cosmetic; model is retired) |
-| deprecated | true | Deprecations doc: **Retired April 20, 2026** ("Requests to retired models will fail") | ◆ see open question (b) below |
-
----
-
-## Changes made in this pass (PR #4990) — all re-verified correct
-
-| Change | Verification |
-|---|---|
-| opus-4-8 releaseDate → 2026-05-28 | Deprecations doc retirement floor "May 28, 2027" (release + 1 yr) ✓ |
-| deprecated:true on opus-4-1 | Deprecated 2026-06-05, retires 2026-08-05 ✓ |
-| deprecated:true on opus-4-0, sonnet-4-0 | Deprecated 2026-04-14, retire 2026-06-15 ✓ |
-| sonnet-4-5 & sonnet-4-0 contextWindow 1000000 → 200000 | Models overview legacy table: both 200k. The 1M window on these models was beta-header-gated (`context-1m`); Sim never sends that header ✓ |
-| removed nativeStructuredOutputs from opus-4-1 | Opus 4.1 absent from structured-outputs GA list ✓ |
-| updatedAt bumps | informational ✓ |
-
-## Recommended fixes from THIS validation
-
-1. **claude-fable-5: add `nativeStructuredOutputs: true`.** Structured-outputs doc explicitly lists Claude Fable 5 as GA. Without the flag, Sim falls back to prompt-injected schema instructions for Fable 5 instead of the native JSON-schema output path — weaker guarantees on the flagship model.
-2. *(optional, cosmetic)* **claude-3-haiku-20240307: releaseDate `2024-03-07` → `2024-03-13`.** Repo convention is launch date (not snapshot date); GA was March 13, 2024. Low value since the model is retired.
-
-## Deliberately not changed
-
-- **`computerUse` on Anthropic models (open question c).** Anthropic documents computer-use support (beta) for: Opus 4.8 / 4.7 / 4.6 / 4.5 + Sonnet 4.6 (header `computer-use-2025-11-24`) and Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4, Opus 4 (header `computer-use-2025-01-24`). **Claude Fable 5 is NOT in the documented list.** The flag IS consumed (`getComputerUseModels()` → `providers/utils.ts` `computerUseModels`), so setting it would light up Sim's computer-use path for these models — a feature-enablement/product decision (beta headers, screenshot plumbing, UX), not a data correction. Left unchanged; documented here for whoever owns that decision.
-- **opus-4-0 / sonnet-4-0 releaseDate `2025-05-22` (open question a).** Confirmed correct: Claude 4 launched May 22, 2025; `20250514` is the snapshot suffix, not the launch date.
-- **claude-3-haiku-20240307 entry kept (open question b).** The model was retired 2026-04-20 — live requests now fail. Recommendation: **keep the entry with `deprecated: true`** rather than delete. Removing it would break saved workflows that reference the model ID (model lookup, pricing for historical logs, UI rendering of old runs). The schema has no `retired` field; if one is ever added, this model is the first candidate. Runtime failures surface from Anthropic's API as clear 404s, which is an acceptable failure mode for a retired model.
-- **`recommended` flags (opus-4-8, sonnet-4-6) and `speedOptimized` (haiku-4-5)** — Sim product/UI decisions, consistent with docs positioning; not doc-verifiable facts.
-- **`defaultModel: 'claude-sonnet-4-6'`** — active, recommended model; valid product choice.
-- **Thinking level lists for budget-tokens models (opus-4-5, sonnet-4-5, sonnet-4-0, opus-4-1, opus-4-0, haiku-4-5).** Their `low/medium/high` are Sim-defined budget tiers (2048/8192/32768 budget_tokens), not API effort levels — internally consistent with `THINKING_BUDGET_TOKENS` in `core.ts`. Note Opus 4.5 does support the API `effort` param (low/medium/high) per the effort doc, but Sim routes it through budget_tokens (`supportsAdaptiveThinking()` excludes 4.5); that is a code-path choice in `core.ts`, not a models.ts data error, and the level list is valid under either interpretation.
-
-## Open question (d) resolution — thinking levels & temperature boundary
-
-- `xhigh`: Fable 5, Opus 4.8, Opus 4.7 only (effort doc). Repo ✓.
-- `max`: Fable 5, Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6 (effort doc; **not** Opus 4.5 / Sonnet 4.5 / Haiku 4.5). Repo ✓ — including Sonnet 4.6 `max`, verified.
-- Effort default `high` on all supporting models (effort doc: "Setting effort to high produces exactly the same behavior as omitting the parameter"). Repo `default: 'high'` ✓.
-- Temperature boundary: deprecations doc — `temperature`/`top_p`/`top_k` return 400 on **Opus 4.7 and later (incl. Opus 4.8) and Fable 5**; still valid (0.0–1.0, default 1.0 per Messages API) on Opus 4.6, Sonnet 4.6, and everything earlier. Repo: temperature absent exactly on fable-5 / opus-4-8 / opus-4-7, present `{min:0, max:1}` on opus-4-6 / sonnet-4-6 and all older models ✓.
-
-## Unverifiable
-
-- **claude-3-haiku-20240307 pricing, contextWindow (200k), maxOutputTokens (4096):** the model is retired and has been removed from the live pricing/overview pages and OpenRouter. Values match Anthropic's historical published specs; no contradiction found. No change recommended.
-- **Exact cache-write pricing is not modeled** (Sim's schema has only `cachedInput` = cache read). Live docs confirm cache reads = 0.1× input for every current model, matching all `cachedInput` values. 5-min/1-hour write premiums (1.25× / 2×) are not representable in the current schema — noting for completeness, not a defect.
diff --git a/docs/models/azure.md b/docs/models/azure.md
deleted file mode 100644
index 03f5dfd72d..0000000000
--- a/docs/models/azure.md
+++ /dev/null
@@ -1,258 +0,0 @@
-# Azure OpenAI & Azure Anthropic model validation
-
-**Date:** 2026-06-11
-**Scope:** `azure-openai` block (17 models) and `azure-anthropic` block (5 models) in `apps/sim/providers/models.ts`. Final exhaustive re-validation following PR #4990.
-
-## Method
-
-Every field was checked against live primary sources fetched on 2026-06-11:
-
-1. **Specs (context window, max output, version dates, API support, lifecycle):**
-   - https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure (doc updated 2026-06-05)
-   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning (reasoning effort / verbosity feature matrix, doc updated 2026-06-05)
-   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements (lifecycle policy + gpt-4o dates)
-   - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/model-router and .../concepts/model-router
-   - https://learn.microsoft.com/en-us/azure/foundry/foundry-models/how-to/use-foundry-models-claude (doc updated 2026-06-11)
-   - https://platform.claude.com/docs/en/build-with-claude/claude-in-microsoft-foundry
-   - https://platform.claude.com/docs/en/about-claude/pricing
-   - https://platform.claude.com/docs/en/about-claude/models/overview
-   - https://platform.claude.com/docs/en/build-with-claude/structured-outputs
-2. **Azure OpenAI pricing:** Azure Retail Prices API (`https://prices.azure.com/api/retail/prices?$filter=serviceName eq 'Foundry Models' and contains(meterName,'...')`). All quoted prices are the **Global Standard** ("Gl"/"glbl") meters, normalized to USD per 1M tokens. The marketing pricing page times out; the Retail Prices API is authoritative for billed meters.
-3. **Provider implementation:** `apps/sim/providers/azure-openai/index.ts` (API dispatch), `apps/sim/providers/azure-anthropic/index.ts` (Messages API via `@anthropic-ai/sdk` against `{endpoint}/anthropic`).
-
-Sim convention notes: `pricing.cachedInput` = cache-read price; `releaseDate` for `azure/*` entries = the Azure model **version date** (convention set in PR #4990 with gpt-4o → 2024-11-20 and model-router → 2025-05-19).
-
----
-
-## Block: `azure-openai` (defaultModel: `azure/gpt-4o`)
-
-### azure/gpt-4o
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing.input | 2.5 | Retail API `gpt 4o 1120 Inp glbl` = 0.0025/1K = $2.50/1M | OK |
-| pricing.cachedInput | 1.25 | Retail API `gpt 4o 1120 cached Inp glbl` = 0.00125/1K = $1.25/1M | **OK — VERIFIED** (open question b resolved) |
-| pricing.output | 10.0 | Retail API `gpt 4o 1120 Outp glbl` = 0.01/1K = $10/1M | OK |
-| temperature 0–2 | yes | Standard chat model; reasoning-model parameter restrictions don't apply | OK |
-| maxOutputTokens | **(absent)** | models-sold-directly: gpt-4o (2024-11-20) "Input: 128,000 / Output: 16,384" | **FIX: add `maxOutputTokens: 16384`** |
-| contextWindow | 128000 | same row | OK |
-| releaseDate | 2024-11-20 | Azure version `2024-11-20` (PR #4990 change re-verified) | OK |
-| deprecated | (absent) | model-retirements: versions 2024-05-13 / 2024-08-06 **retired 2026-03-31** (auto-upgraded to gpt-5.1); version 2024-11-20 "retires **2026-10-01**" | **RECOMMEND `deprecated: true`** — firm retirement date within ~3.7 months. NOTE: gpt-4o is the `azure-openai` `defaultModel`; changing the default (e.g. to azure/gpt-5.1 per Azure's own auto-upgrade path) is a product decision — documented only, not assumed. |
-
-### azure/gpt-5.4
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 2.5 / 0.25 / 15.0 | Retail API `5.4 inp Gl` 2.5, `5.4 cd inp Gl` 0.25, `5.4 opt Gl` 15.0 | OK |
-| reasoningEffort | none, low, medium, high | reasoning doc footnote 7 enumerates `'none'` support as exactly: gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.1-codex-mini — **gpt-5.4 family is not listed** | **FIX: drop `'none'`** → `['low','medium','high']` (open question c resolved). PR #4990's removal of `'xhigh'` re-verified correct: footnote 6 — xhigh is gpt-5.1-codex-max only. |
-| verbosity | low, medium, high | reasoning doc "NEW GPT-5 reasoning features": verbosity options low/medium/high for GPT-5 series | OK |
-| maxOutputTokens | 128000 | models-sold-directly: gpt-5.4 (2026-03-05) output 128,000 | OK |
-| contextWindow | 1050000 | same row: 1,050,000 (Input 922,000 / Output 128,000) | OK |
-| releaseDate | 2026-03-05 | Azure version `2026-03-05` | OK |
-
-Pricing limitation: a long-context tier exists (`5.4 longco inp Gl` $5.0 / `longco cd inp Gl` $0.5 / `longco opt Gl` $22.5) for requests beyond the standard context threshold. The flat pricing schema cannot express tiered pricing; standard-tier rates are recorded.
-
-### azure/gpt-5.4-mini
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 0.75 / 0.075 / 4.5 | Retail API `5.4 mini Inp Gl` 0.75, `cd Inp Gl` 0.075, `Opt Gl` 4.5 | OK |
-| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** |
-| verbosity | low, medium, high | GPT-5 series verbosity | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-mini (2026-03-17) 400,000 (272k in / 128k out) | OK |
-| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK |
-
-### azure/gpt-5.4-nano
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 0.2 / 0.02 / 1.25 | Retail API `5.4 nano Inp Gl` 0.2, `cd Inp Gl` 0.02, `Opt Gl` 1.25 | OK |
-| reasoningEffort | none, low, medium, high | footnote 7 (see gpt-5.4) | **FIX: drop `'none'`** |
-| verbosity | low, medium, high | GPT-5 series verbosity | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.4-nano (2026-03-17) | OK |
-| releaseDate | 2026-03-17 | Azure version `2026-03-17` | OK |
-
-### azure/gpt-5.2
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 1.75 / 0.175 / 14.0 | Retail API `GPT 5.2 inp Gl` 1.75, `cd inp Gl` 0.175, `opt Gl` 14.0 | OK |
-| reasoningEffort | none, low, medium, high | footnote 7 explicitly lists gpt-5.2 as supporting `'none'`; `'xhigh'` removal (PR #4990) correct — codex-max only; `'minimal'` correctly absent ("not supported with gpt-5.1 or greater") | OK |
-| verbosity | low, medium, high | GPT-5 series verbosity | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.2 (2025-12-11) | OK |
-| releaseDate | 2025-12-11 | Azure version `2025-12-11` | OK |
-
-### azure/gpt-5.1
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5.1 inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK |
-| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1 (also: `reasoning_effort` defaults to `none` on 5.1); `'minimal'` correctly absent | OK |
-| verbosity | low, medium, high | GPT-5 series verbosity | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1 | OK |
-| releaseDate | 2025-11-12 | Azure version is **2025-11-13** in both the models table and the reasoning feature matrix | **FIX: → 2025-11-13** (per PR #4990's own convention of using the Azure version date, cf. gpt-4o 2024-11-20, model-router 2025-05-19) |
-
-### azure/gpt-5.1-codex
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 1.25 / 0.125 / 10.0 | Retail API `5.1 codex inp Gl` 1.25, `cd inp Gl` 0.125, `opt Gl` 10.0 | OK |
-| reasoningEffort | none, low, medium, high | footnote 7 lists gpt-5.1-codex | OK |
-| verbosity | low, medium, high | GPT-5 series | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | models-sold-directly: gpt-5.1-codex | OK |
-| releaseDate | 2025-11-12 | Azure version `2025-11-13` | **FIX: → 2025-11-13** |
-| deprecated | true (PR #4990 stopgap) | See ruling below | **RECOMMEND: KEEP entry, REVERT `deprecated: true`** |
-
-**Ruling on open question (a):** Responses-API-only status **confirmed** — models-sold-directly lists gpt-5.1-codex as "Responses API only", and the reasoning feature matrix shows Chat Completions = not supported. **However, the premise that it "never worked through Sim" is false.** `apps/sim/providers/azure-openai/index.ts` dispatches by endpoint shape: a full chat-completions URL → Chat Completions; a full responses URL → Responses; **the default path (plain resource base URL) constructs `{endpoint}/openai/v1/responses` and calls the Responses API** (lines ~743–765). So gpt-5.1-codex works for any user configured with a base endpoint or responses URL — the majority configuration. Azure itself has not deprecated the model (GA, "Access is no longer restricted"). Therefore: **KEEP the entry and revert `deprecated: true`**. The only genuinely broken configuration is a user-supplied chat-completions endpoint URL; that is an endpoint-configuration limitation, not a model lifecycle state, and `deprecated` (which signals retirement to users) is the wrong tool for it.
-
-### azure/gpt-5 · azure/gpt-5-mini · azure/gpt-5-nano
-
-| Field | gpt-5 | gpt-5-mini | gpt-5-nano | Source / evidence | Verdict |
-| --- | --- | --- | --- | --- | --- |
-| pricing in/cached/out | 1.25 / 0.125 / 10.0 | 0.25 / 0.025 / 2.0 | 0.05 / 0.005 / 0.4 | Retail API `GPT 5 [Mini\|Nano] [Inpt\|cchd Inpt\|outpt] Glbl` — exact matches all three | OK |
-| reasoningEffort | minimal, low, medium, high | same | same | reasoning doc: "`minimal` is only supported with the original GPT-5 reasoning models"; `'none'` correctly absent (not in footnote 7); `'xhigh'` correctly absent | OK |
-| verbosity | low/medium/high | same | same | GPT-5 series | OK |
-| maxOutputTokens / contextWindow | 128000 / 400000 | same | same | models-sold-directly: all three 400,000 (272k/128k) | OK |
-| releaseDate | 2025-08-07 | 2025-08-07 | 2025-08-07 | Azure version `2025-08-07` | OK |
-
-### azure/gpt-5-chat
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| id (deployable name) | `gpt-5-chat` | models-sold-directly lists `gpt-5-chat` (Preview), versions 2025-08-07 and 2025-10-03 — **exact name confirmed**; PR #4990 rename from `gpt-5-chat-latest` re-verified correct. Note: OpenAI's first-party `gpt-5-chat-latest`-style continuously-updated alias maps to a *different* Foundry product (`gpt-chat-latest`, now GPT-5.5 Instant) — our entry correctly tracks the deployable `gpt-5-chat` (open question e resolved) | OK |
-| pricing | 1.25 / 0.125 / 10.0 | Retail API `GPT 5 Chat [Inpt\|cchd Inpt\|outpt] Glbl` = 1.25 / 0.125 / 10.0 | OK |
-| temperature 0–2 | yes | gpt-5-chat is a non-reasoning chat model (temperature restriction applies to gpt-5.1-chat and later, which we do not list) | OK |
-| maxOutputTokens | 16384 | models-sold-directly: 128,000 / **16,384** (PR #4990 addition re-verified) | OK |
-| contextWindow | 128000 | same row | OK |
-| releaseDate | 2025-08-07 | Azure version `2025-08-07` (a `2025-10-03` revision also exists; the original version date is kept) | OK |
-| lifecycle | not marked | **Preview** on Azure. Preview lifecycle = "not sooner than" retirement, force-upgrade or 30-day-notice retirement, "not recommended for production". No retirement date currently announced → no `deprecated` flag warranted | OK (documented) |
-
-### azure/o3 · azure/o4-mini
-
-| Field | o3 | o4-mini | Source / evidence | Verdict |
-| --- | --- | --- | --- | --- |
-| pricing | 2 / 0.5 / 8 | 1.1 / 0.275 / 4.4 | Retail API `o3 0416` 0.002/0.0005/0.008 per 1K; `o4-mini 0416` 0.0011/0.000275/0.0044 per 1K | OK |
-| reasoningEffort | low, medium, high | low, medium, high | reasoning doc: "low, medium, or high for all reasoning models except o1-mini"; o-series matrix has no none/minimal/xhigh | OK |
-| verbosity | (absent) | (absent) | verbosity is a GPT-5-series-only parameter | OK |
-| maxOutputTokens / contextWindow | 100000 / 200000 | 100000 / 200000 | models-sold-directly o-series: Input 200,000 / Output 100,000 | OK |
-| releaseDate | 2025-04-16 | 2025-04-16 | Azure version `2025-04-16` for both | OK |
-
-### azure/gpt-4.1 · azure/gpt-4.1-mini · azure/gpt-4.1-nano
-
-| Field | 4.1 | 4.1-mini | 4.1-nano | Source / evidence | Verdict |
-| --- | --- | --- | --- | --- | --- |
-| pricing | 2.0 / 0.5 / 8.0 | 0.4 / 0.1 / 1.6 | 0.1 / 0.025 / 0.4 | Retail API `gpt 4.1 [mini\|nano] [Inp\|cached Inp\|Outp] glbl` — exact matches all three | OK |
-| temperature 0–2 | yes | yes | yes | non-reasoning models | OK |
-| maxOutputTokens | 32768 | 32768 | 32768 | models-sold-directly: 32,768 | OK |
-| contextWindow | 1047576 | 1047576 | 1047576 | models-sold-directly: 1,047,576 (global standard; lower for regional standard/batch — global is the right representation) | OK |
-| releaseDate | 2025-04-14 | 2025-04-14 | 2025-04-14 | Azure version `2025-04-14` | OK |
-
-### azure/model-router
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 2.0 / 0.5 / 8.0 | No `model-router` meter exists in the Retail Prices API (searched `Router`/`Rtr`/`rtr` under serviceName 'Foundry Models' and productName across all services — only Communication Services "Job Router" exists). Concepts page: "Model router usage is charged for input prompts at the rate listed on the pricing page"; how-to evaluation section: "Account for the **router markup on input tokens** plus the underlying model's input and output pricing." The reported $0.14/1M router markup could not be confirmed from any fetchable source (only the timing-out marketing page carries the number). | **KEEP as documented proxy** (open question d resolved — see below) |
-| capabilities | {} (no reasoningEffort) | Router accepts `reasoning_effort` since version 2025-11-18 and forwards it; but our pinned version semantics are 2025-05-19 (gpt-4.1-family + o4-mini routing, none of which take temperature uniformly — temp/top_p silently dropped for o-series). Empty capabilities is the safest representation | OK |
-| contextWindow | 200000 | models-sold-directly footnote: "Context window: 200,000" — the limit of the smallest underlying model; larger prompts succeed only if routed to a compatible model | OK |
-| maxOutputTokens | (absent) | "max output tokens varies" (16,384–128,000 depending on routed model) — correctly unset | OK |
-| releaseDate | 2025-05-19 | Original version `2025-05-19` confirmed (versions: 2025-05-19, 2025-08-07, 2025-11-18 latest); PR #4990 change re-verified | OK |
-
-**Pricing decision (open question d):** True billing = per-input-token router markup + the routed model's own input/output rates, which varies per request. The flat `{input, cachedInput, output}` schema cannot express this. The current 2.0/0.5/8.0 equals the gpt-4.1 rates — gpt-4.1 is the flagship of the 2025-05-19 routed set (gpt-4.1/-mini/-nano + o4-mini) and sits at the cost ceiling of that set alongside o3-class o4-mini rates, so it is a conservative (slightly pessimistic) proxy for cost estimation. **Keep 2.0/0.5/8.0.** This is a documented schema limitation, not a verified Azure price; cost estimates for model-router workloads in Sim are approximations.
-
----
-
-## Block: `azure-anthropic` (defaultModel: `azure-anthropic/claude-sonnet-4-5`)
-
-Pricing basis: platform.claude.com Claude-in-Microsoft-Foundry doc — "Pricing for Claude in the Microsoft Marketplace uses Anthropic's standard API pricing." So azure-anthropic pricing == Anthropic first-party pricing (open question f, pricing half, resolved). `cachedInput` maps to Anthropic "Cache Hits & Refreshes" (0.1× input). All five models are **(preview)** on Foundry; Foundry "follows the Claude API lifecycle schedule".
-
-### azure-anthropic/claude-opus-4-6
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.6 $5 in / $0.50 cache read / $25 out | OK |
-| contextWindow | 1000000 | MS Foundry Claude doc: opus-4-6 "1M / 128K"; Anthropic Foundry doc: "Claude Fable 5, Claude Opus 4.7, Claude Opus 4.6, and Claude Sonnet 4.6 have a 1M-token context window on Microsoft Foundry"; Anthropic models overview: 1M. PR #4990 change re-verified. Long context is at **standard pricing** (Anthropic long-context pricing section), so no tiered-pricing concern | OK |
-| maxOutputTokens | 128000 | both MS and Anthropic sources: 128K | OK |
-| thinking levels | low, medium, high, max (default high) | MS Foundry Claude doc: effort supports low/medium/high, "also max for Opus 4.8, Opus 4.7, **Opus 4.6**, and Sonnet 4.6" | OK |
-| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.6 supported (GA) | OK |
-| temperature 0–1 | yes | Anthropic Messages API range | OK |
-| releaseDate | 2026-02-05 | Not stated in any fetched doc (dateless model ID). Consistent with Opus 4.6 launch timeframe (early Feb 2026); convention = announcement date | Unverifiable (plausible, kept) |
-
-### azure-anthropic/claude-opus-4-5
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 5.0 / 0.5 / 25.0 | Anthropic pricing: Opus 4.5 $5 / $0.50 / $25 | OK |
-| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview 200k / 64k | OK |
-| thinking | low, medium, high | extended thinking; `max` effort not supported on 4.5-generation | OK |
-| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Opus 4.5 supported | OK |
-| releaseDate | 2025-11-24 | Anthropic launch date (snapshot ID claude-opus-4-5-20251101; announcement 2025-11-24 — announcement-date convention) | OK |
-
-### azure-anthropic/claude-sonnet-4-5
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 3.0 / 0.3 / 15.0 | Anthropic pricing: Sonnet 4.5 $3 / $0.30 / $15 | OK |
-| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview. Note: the Sonnet 4.5 **1M-context beta** on Foundry retires after 2026-04-30 (already past) — 200000 is correct | OK |
-| thinking | low, medium, high | extended thinking | OK |
-| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Sonnet 4.5 supported | OK |
-| releaseDate | 2025-09-29 | snapshot claude-sonnet-4-5-20250929 | OK |
-
-### azure-anthropic/claude-opus-4-1
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 15.0 / 1.5 / 75.0 | Anthropic pricing: Opus 4.1 $15 / $1.50 / $75 | OK |
-| contextWindow / maxOutputTokens | 200000 / 32000 | MS doc "200K / 32K"; Anthropic overview 200k / 32k | OK |
-| thinking | low, medium, high | extended thinking | OK |
-| nativeStructuredOutputs | **true** | Anthropic structured-outputs doc supported-model list **excludes Opus 4.1** (Fable 5, Mythos 5/Preview, Opus 4.8/4.7/4.6/4.5, Sonnet 4.6/4.5, Haiku 4.5 only). The first-party `anthropic` block's `claude-opus-4-1` entry correctly omits it (models.ts ~line 762). With this flag set, Sim sends the `structured-outputs-2025-11-13` beta header and `output_format` to a model that doesn't support it | **FIX: remove `nativeStructuredOutputs`** |
-| deprecated | true | Anthropic Foundry doc model table: "Claude Opus 4.1 — Deprecated. **Retiring August 5, 2026**"; Anthropic pricing page marks it deprecated. PR #4990 change re-verified correct | OK |
-| releaseDate | 2025-08-05 | snapshot claude-opus-4-1-20250805 | OK |
-
-### azure-anthropic/claude-haiku-4-5
-
-| Field | Current value | Source / evidence | Verdict |
-| --- | --- | --- | --- |
-| pricing | 1.0 / 0.1 / 5.0 | Anthropic pricing: Haiku 4.5 $1 / $0.10 / $5 | OK |
-| contextWindow / maxOutputTokens | 200000 / 64000 | MS doc "200K / 64K"; Anthropic overview | OK |
-| thinking | low, medium, high | extended thinking | OK |
-| nativeStructuredOutputs | true | Anthropic structured-outputs doc: Haiku 4.5 supported | OK |
-| releaseDate | 2025-10-15 | Anthropic launch date (snapshot claude-haiku-4-5-20251001; announcement 2025-10-15 — announcement-date convention) | OK |
-
----
-
-## Changes made in PR #4990 — re-verification results
-
-| PR #4990 change | Verdict |
-| --- | --- |
-| Drop `'xhigh'` from azure/gpt-5.4, 5.4-mini, 5.4-nano, gpt-5.2 | **Correct** — `xhigh` is gpt-5.1-codex-max only (reasoning doc footnote 6) |
-| `deprecated: true` on azure/gpt-5.1-codex | **Premise partially wrong** — Responses-API-only confirmed, but Sim's azure provider defaults to the Responses API; recommend reverting (see entry) |
-| `deprecated: true` on azure-anthropic/claude-opus-4-1 | **Correct** — retiring 2026-08-05 |
-| Rename azure/gpt-5-chat-latest → azure/gpt-5-chat + maxOutputTokens 16384 | **Correct** |
-| azure/gpt-4o releaseDate → 2024-11-20 | **Correct** |
-| azure/model-router releaseDate → 2025-05-19 | **Correct** |
-| azure-anthropic/claude-opus-4-6 contextWindow → 1000000 | **Correct** |
-| updatedAt bumps to 2026-06-11 | OK (azure/model-router still 2026-04-01; acceptable since its pricing is an unverifiable proxy) |
-
-## Recommended fixes from this pass (not applied — doc only)
-
-1. `azure/gpt-5.4`, `azure/gpt-5.4-mini`, `azure/gpt-5.4-nano`: reasoningEffort drop `'none'` → `['low','medium','high']` (reasoning doc footnote 7 enumerates 'none' support and excludes the 5.4 family).
-2. `azure/gpt-4o`: add `maxOutputTokens: 16384`.
-3. `azure/gpt-4o`: add `deprecated: true` (retires 2026-10-01). **Product caveat:** it is the block's `defaultModel`; the default-model change is a product decision, not made here.
-4. `azure/gpt-5.1` and `azure/gpt-5.1-codex`: releaseDate `2025-11-12` → `2025-11-13` (Azure version date convention).
-5. `azure/gpt-5.1-codex`: **KEEP entry; revert `deprecated: true`** (works through Sim's default Responses-API path; Azure lifecycle is GA, not deprecated).
-6. `azure-anthropic/claude-opus-4-1`: remove `nativeStructuredOutputs: true` (unsupported model; matches first-party anthropic entry).
-
-## Deliberately not changed
-
-- **azure/model-router pricing 2.0/0.5/8.0** — kept as a documented gpt-4.1-rate proxy; real billing (input-token router markup + routed model rates) is unrepresentable in the flat pricing schema, and no router meter exists in the Retail Prices API to anchor a different number.
-- **azure/gpt-5-chat Preview status** — no `deprecated` flag: Preview models have no announced retirement; flagging would misrepresent lifecycle.
-- **gpt-5.4 long-context pricing tier** (5.0/0.5/22.5 "longco" meters) — schema cannot express tiered pricing; standard-tier rates kept.
-- **gpt-4.1 contextWindow 1,047,576** — global-standard figure kept although regional standard (300,000) and batch (128,000) deployments are lower; Sim assumes global standard.
-- **azure-anthropic releaseDates using announcement dates** (opus-4-5 2025-11-24, haiku-4-5 2025-10-15) rather than snapshot dates (20251101, 20251001) — consistent existing convention across the file.
-- **Missing newer models** (out of scope, noted for follow-up): Azure now offers `gpt-5.5` (GA, 2026-04-24, 1.05M ctx), `gpt-chat-latest`, `gpt-5.4-pro`, `gpt-5.3-codex`/`gpt-5.3-chat`, `gpt-5.2-codex`/`gpt-5.2-chat`; Foundry Claude now offers `claude-fable-5`, `claude-opus-4-8`, `claude-opus-4-7`, `claude-sonnet-4-6` (1M ctx GA).
-
-## Unverifiable
-
-- **model-router pricing** — no retail meter; the $0.14/1M router-markup figure appears only on the timing-out marketing pricing page and could not be confirmed.
-- **azure-anthropic/claude-opus-4-6 releaseDate 2026-02-05** — no fetched source states the launch date (dateless model ID); plausible and consistent with Opus 4.6-era documentation, kept as-is.
-- **Azure-side rate-limit/quota values** — not modeled in the schema; not validated.
diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md
deleted file mode 100644
index eff34fc335..0000000000
--- a/docs/models/bedrock.md
+++ /dev/null
@@ -1,226 +0,0 @@
-# Bedrock provider validation — `apps/sim/providers/models.ts`
-
-- **Date:** 2026-06-11 (final exhaustive pass; re-verifies PR #4990)
-- **Scope:** all 32 `bedrock/*` model entries
-- **Method:** every fact below traced to a live source fetched today:
-  - **AWS Pricing API** (authoritative for token prices): `https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonBedrock/current/us-east-1/index.json` (1.37 MB, Last-Modified 2026-06-11) and the `us-west-2` offer file. Prices are per 1K tokens in the offer; converted ×1000 to per-1M below. Claude 4.x, Cohere, and Mistral Large 24.11 have **no SKUs** in the Pricing API (marketplace-billed / absent).
-  - **AWS model cards:** `docs.aws.amazon.com/bedrock/latest/userguide/model-card-<provider>-<model>.html` (authoritative for geo/global inference IDs, context window, max output, lifecycle, prompt caching).
-  - **Lifecycle:** `docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html` (Legacy/EOL table).
-  - **Anthropic:** `platform.claude.com/docs/en/about-claude/pricing` and `.../models/overview` (Claude prices, cache rates, max output, Bedrock geo premium).
-  - **AWS what's-new** for the Nova Premier GA date and Nova 2 announcements.
-
----
-
-## GEO-PROFILE TABLE (deliverable for `getBedrockInferenceProfileId`)
-
-Source: each model card's Programmatic Access table ("Geo inference ID" / "Global inference ID" columns). `geo` = inference profile required/available (the bare ID is generally **not** invokable on-demand for these, except where noted); `bare` = card lists "Not supported" for both Geo and Global — must invoke with the plain model ID.
-
-| model id suffix | verdict | profiles on card |
-|---|---|---|
-| anthropic.claude-opus-4-5-20251101-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` + `global.` (no apac/au/jp) |
-| anthropic.claude-sonnet-4-5-20250929-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`) |
-| anthropic.claude-haiku-4-5-20251001-v1:0 | **geo** (REQUIRED in most regions) | `us.`, `eu.`, `au.`, `jp.` + `global.` (no `apac.`; in-region only us-east-1/eu-north-1/eu-west-1/ap-northeast-1/ap-southeast-4) |
-| anthropic.claude-opus-4-1-20250805-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported |
-| amazon.nova-2-pro-v1:0 | **unknown** (no card; ID does not exist on Bedrock — real preview ID is `amazon.nova-2-pro-preview-20251202-v1:0`, served via geo/global profiles per cloudprice `apac.amazon.nova-2-pro-preview-…`) |
-| amazon.nova-2-lite-v1:0 | **geo** (REQUIRED) | `us.`, `eu.`, `jp.` + `global.` (no `apac.`) |
-| amazon.nova-premier-v1:0 | **geo** (REQUIRED) | `us.` only; global NOT supported |
-| amazon.nova-pro-v1:0 | **geo** | `us.`, `eu.` (no apac/global; in-region exists in us-east-1 and a few others) |
-| amazon.nova-lite-v1:0 | **geo** | `us.`, `eu.` (no apac/global) |
-| amazon.nova-micro-v1:0 | **geo** | `us.`, `eu.` (no apac/global) |
-| meta.llama4-maverick-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
-| meta.llama4-scout-17b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
-| meta.llama3-3-70b-instruct-v1:0 | **geo** | `us.` only |
-| meta.llama3-2-90b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
-| meta.llama3-2-11b-instruct-v1:0 | **geo** (REQUIRED) | `us.` only |
-| meta.llama3-2-3b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
-| meta.llama3-2-1b-instruct-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
-| meta.llama3-1-405b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
-| meta.llama3-1-70b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
-| meta.llama3-1-8b-instruct-v1:0 | **geo** | `us.` only (in-region only us-west-2) |
-| mistral.mistral-large-3-675b-instruct | **bare** | Geo: Not supported; Global: Not supported (in-region, 11 regions) |
-| mistral.mistral-large-2411-v1:0 | **bare** (phantom — see below; the Mistral Large card covers only `mistral-large-2402-v1:0`, bare) |
-| mistral.mistral-large-2407-v1:0 | **bare** (no card; on-demand SKUs exist in us-west-2; the 2402 card shows Geo/Global Not supported — same family, in-region only) |
-| mistral.pixtral-large-2502-v1:0 | **geo** (REQUIRED) | `us.`, `eu.` |
-| mistral.magistral-small-2509 | **bare** | Geo: Not supported; Global: Not supported |
-| mistral.ministral-3-14b-instruct | **bare** | Geo: Not supported; Global: Not supported |
-| mistral.ministral-3-8b-instruct | **bare** | Geo: Not supported; Global: Not supported |
-| mistral.ministral-3-3b-instruct | **bare** | Geo: Not supported; Global: Not supported (card "Ministral 3B" confirms this exact ID) |
-| mistral.mixtral-8x7b-instruct-v0:1 | **bare** | Geo: Not supported; Global: Not supported |
-| amazon.titan-text-premier-v1:0 | **bare** | model card removed from docs; historically in-region only, never had inference profiles |
-| cohere.command-r-v1:0 | **bare** | card: Geo Not supported; Global Not supported |
-| cohere.command-r-plus-v1:0 | **bare** | card: Geo Not supported; Global Not supported |
-
-Implications for `apps/sim/providers/bedrock/utils.ts` (`getBedrockInferenceProfileId`):
-
-1. All `mistral.*` IDs **except** `mistral.pixtral-large-2502-v1:0`, all `cohere.*` IDs, and `amazon.titan-text-premier-v1:0` must be passed through **unprefixed**. Today the function prefixes everything → `ValidationException` for these 10 models.
-2. The blanket `ap-*/me-* → apac` mapping is wrong for every model in this list: **no bedrock-provider model has an `apac.` profile**. Claude Sonnet/Haiku 4.5 use `au.`/`jp.` (or `global.`); Nova 2 Lite has `jp.`; everything else is `us.`/`eu.` only.
-3. `eu.` is only valid for: claude opus/sonnet/haiku 4.5, nova-2-lite, nova pro/lite/micro, llama3-2-3b/1b, pixtral-large. For the rest (opus-4-1, nova-premier, all other llamas) only `us.` exists — an `eu-*` region request currently produces a nonexistent `eu.` profile ID.
-
----
-
-## Per-model verification
-
-Prices are USD per 1M tokens, **standard on-demand, us-east-1** (us-west-2 where us-east-1 has no SKU). "Pricing API" = the offer file above, fetched 2026-06-11.
-
-### Anthropic (no Pricing API SKUs — verified against Anthropic pricing page; Bedrock bills Anthropic list prices)
-
-| model | field | repo | verified | source | verdict |
-|---|---|---|---|---|---|
-| claude-opus-4-5 | input/output | 5 / 25 | 5 / 25 | Anthropic pricing | OK |
-| | cachedInput | — | 0.50 (0.1× input; Bedrock card: caching Yes, min 4096 tok) | Anthropic pricing + card | **ADD** |
-| | maxOutputTokens | 64000 | 64K | card + Anthropic overview | OK |
-| | contextWindow | 200000 | 200K | card | OK |
-| | releaseDate | 2025-11-24 | Nov 24 2025 | card | OK |
-| claude-sonnet-4-5 | input/output | 3 / 15 | 3 / 15 | Anthropic pricing | OK |
-| | cachedInput | — | 0.30 | Anthropic pricing + card (caching Yes) | **ADD** |
-| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK |
-| | releaseDate | 2025-09-29 | card says Sep 30 2025; Anthropic launch Sep 29 2025 | keep repo (matches upstream launch) |
-| | recommended | — | provider default model | models.ts convention | **ADD `recommended: true`** |
-| claude-haiku-4-5 | input/output | 1 / 5 | 1 / 5 | Anthropic pricing | OK |
-| | cachedInput | — | 0.10 | Anthropic pricing + card (caching Yes) | **ADD** |
-| | maxOutputTokens / ctx | 64000 / 200000 | 64K / 200K | card | OK |
-| | releaseDate | 2025-10-15 | card says Oct 16 2025; Anthropic launch Oct 15 2025 | keep repo |
-| | speedOptimized | — | "the fastest model with near-frontier intelligence" | Anthropic overview | **ADD `speedOptimized: true`** |
-| claude-opus-4-1 | input/output | 15 / 75 | 15 / 75 | Anthropic pricing | OK |
-| | cachedInput | — | 1.50 | Anthropic pricing + card (caching Yes, 5m TTL only) | **ADD** |
-| | maxOutputTokens | 32768 | **32K = 32000** (card "32K"; Anthropic overview "32k tokens") | **FIX 32768 → 32000** (32768 would exceed the documented cap) |
-| | ctx / releaseDate / lifecycle | 200000 / 2025-08-05 / active | 200K / Aug 05 2025 / Active on Bedrock (deprecated on first-party API, retire 2026-08-05 — Bedrock lifecycle independent) | OK |
-
-**Geo premium (open question d):** Anthropic's pricing page states regional/multi-region endpoints carry a **10% premium over global** for Sonnet 4.5, Haiku 4.5, Opus 4.5 "and all future models" (earlier models keep existing pricing). Sim always builds geo profiles, so real spend on these three is 1.1× the table values. **Decision: keep base prices and document** — (a) the Pricing API exposes no Claude SKUs to anchor a geo-specific number, (b) repo convention is provider list price, (c) baking 1.1× would overbill if/when the provider routes `global.`. Revisit if Sim adds `global.` routing.
-
-### Amazon Nova (Pricing API us-east-1)
-
-| model | field | repo | verified | verdict |
-|---|---|---|---|---|
-| nova-2-pro | input/output | 1.0 / 4.0 | **1.375 / 11.0** (`USE1-Nova2.0Pro-text-input-tokens` 0.001375, `-text-output-tokens` 0.011; global cross-region 1.25/10.0) | **FIX**. Note: cloudprice lists 2.19/17.50 for an apac preview profile — AWS Pricing API wins |
-| | identity | `amazon.nova-2-pro-v1:0` | no model card; not in catalog; real ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview, Nova Forge early access, per AWS re:Invent 2025 what's-new + cloudprice/getmaxim) | entry is a **phantom ID**; `deprecated: true` (PR #4990) keeps it hidden — acceptable; longer-term remove or migrate to the preview ID |
-| nova-2-lite | input/output | 0.08 / 0.32 | **0.33 / 2.75** (`USE1-Nova2.0Lite-input-tokens` 0.00033, `-output-tokens` 0.00275) | **FIX** — resolves open question (a): repo was wrong AND the secondaries' 0.30/2.50 is the *global cross-region* price (`-cross-region-global` SKUs), not the geo/in-region price Sim pays |
-| | cachedInput | — | **0.0825** (`-cache-read-input-token-count` 0.0000825; cache write $0) | **ADD** |
-| | maxOutputTokens | — | 64K (card) | **ADD 64000** |
-| | ctx / releaseDate / lifecycle | 1000000 / 2025-12-02 / active | 1M / Dec 02 2025 / Active; geo us/eu/jp + global | OK |
-| nova-premier | input/output | 2.5 / 12.5 | 2.50 / 12.50 (`USE1-NovaPremier-*`) | OK (PR #4990 fix confirmed) |
-| | cachedInput | — | 0.625 (`-cache-read` 0.000625) | **ADD** (model is Legacy but still billable until EOL 2026-09-14) |
-| | deprecated | true | Legacy 2026-03-13, EOL 2026-09-14 (lifecycle page + card) | OK |
-| | maxOutputTokens | — | 25K (card) | skip per instruction (deprecated); documented only |
-| | releaseDate | 2025-04-30 | GA announced Apr 30 2025 (aws.amazon.com what's-new 2025/04 "Amazon Nova Premier… generally available"); card shows "Oct 31 2025" which conflicts with AWS's own GA announcement and the lifecycle history — treated as a card-metadata anomaly | **keep 2025-04-30** |
-| nova-pro | input/output | 0.8 / 3.2 | 0.80 / 3.20 | OK (question b resolved) |
-| | cachedInput | — | 0.20 | **ADD** |
-| | maxOutputTokens | — | 5K (card) | **ADD 5120** (Nova "5K" cap; trackers/openrouter report 5,120) |
-| | ctx | 300000 | 300K | OK; releaseDate repo 2024-12-03 (re:Invent announce) vs card Dec 05 2024 — keep repo, documented |
-| nova-lite | input/output | 0.06 / 0.24 | 0.06 / 0.24 | OK |
-| | cachedInput | — | 0.015 | **ADD** |
-| | maxOutputTokens | — | 5K | **ADD 5120** |
-| nova-micro | input/output | 0.035 / 0.14 | 0.035 / 0.14 | OK |
-| | cachedInput | — | 0.00875 | **ADD** |
-| | maxOutputTokens | — | 5K | **ADD 5120** |
-| | speedOptimized | — | card: "Amazon's fastest text-only model, optimized for speed and low cost" | **ADD `speedOptimized: true`** |
-
-### Meta (Pricing API; all cards report max output 4K for 3.x, 8K for Llama 4)
-
-| model | field | repo | verified | verdict |
-|---|---|---|---|---|
-| llama4-maverick | input/output | 0.24 / 0.97 | 0.24 / 0.97 | OK |
-| | maxOutputTokens | — | 8K (card) | **ADD 8192** |
-| | ctx / date / lifecycle | 1M / 2025-04-05 / active | 1M / Apr 05 2025 / Active | OK |
-| llama4-scout | input/output | 0.18 / 0.72 | **0.17 / 0.66** (`USE1-Llama4-Scout-17B-*` 0.00017 / 0.00066) | **FIX** |
-| | maxOutputTokens | — | 8K | **ADD 8192** |
-| | ctx | 10000000 | 10M (card) | OK (PR #4990 fix confirmed) |
-| llama3-3-70b | input/output | 0.72 / 0.72 | 0.72 / 0.72 | OK |
-| | lifecycle | active | **Active** (card; absent from Legacy table) — question (g) | OK |
-| | maxOutputTokens | — | 4K | **ADD 4096** |
-| llama3-2-90b | input/output | 2.0 / 2.0 | **0.72 / 0.72** (`USE1-Llama3-2-90B-*`) | **FIX** (deprecated but still billable until EOL 2026-07-07) |
-| | deprecated | true | Legacy, EOL Jul 7 2026 | OK |
-| llama3-2-11b | input/output | 0.16 / 0.16 | 0.16 / 0.16; Legacy EOL 2026-07-07 | OK |
-| llama3-2-3b | input/output | 0.15 / 0.15 | 0.15 / 0.15; Legacy | OK |
-| llama3-2-1b | input/output | 0.10 / 0.10 | 0.10 / 0.10; Legacy | OK |
-| llama3-1-405b | input/output | 5.32 / 16.0 | **2.40 / 2.40** (`USW2-Llama3-1-405B-*` 0.0024; us-east-1 has only batch SKUs at 1.20) | **FIX** (deprecated, Legacy EOL 2026-07-07, but price was ~5× off) |
-| llama3-1-70b | input/output | 2.65 / 3.5 | **0.72 / 0.72** (`USE1-Llama3-1-70B-*`; the 2.65 figure resembles no AWS SKU — latency-optimized variant is a separate SKU) | **FIX** |
-| | lifecycle | active | **Active** (card) — question (g) | OK |
-| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** |
-| llama3-1-8b | input/output | 0.3 / 0.6 | **0.22 / 0.22** (`USE1-Llama3-1-8B-*`) | **FIX** |
-| | lifecycle | active | **Active** (card) | OK |
-| | maxOutputTokens / releaseDate | — / — | 4K / Jul 23 2024 | **ADD 4096, 2024-07-23** |
-
-### Mistral AI (Pricing API + cards)
-
-| model | field | repo | verified | verdict |
-|---|---|---|---|---|
-| mistral-large-3-675b | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-Mistral-Large-3-675b-Instruct-*`) | OK (PR #4990 confirmed) |
-| | ctx / maxOutput | 256000 / 32768 | 256K / 32K (card) | OK |
-| | releaseDate | — | Dec 2 2025 (card) | **ADD 2025-12-02** |
-| | caching | — | card: prompt caching **Yes** (bedrock-runtime), but no cache-read SKU in Pricing API → rate unpublishable | no `cachedInput` (documented) |
-| mistral-large-2411 | input/output | 2.0 / 6.0 | **UNVERIFIABLE — model appears not to exist on Bedrock**: no model card (Mistral card index has only "Mistral Large" = 2402 and "Mistral Large 3"), no Pricing API SKU in us-east-1 or us-west-2, not in lifecycle table | keep price; entry is already `deprecated: true` (hidden); recommend follow-up removal |
-| mistral-large-2407 | input/output | 4.0 / 12.0 | **2.00 / 6.00** (`USW2-MistralLarge2407-*` 0.002/0.006; us-west-2 only). The 4/12 figure belongs to *Mistral Large 2402* (`USE1-MistralLarge-*` = 0.004/0.012) — repo had the two swapped | **FIX** (deprecated but billable) |
-| pixtral-large-2502 | input/output | 2.0 / 6.0 | 2.00 / 6.00 (`USE1-PixtralLarge2502-*`) | OK (question b resolved) |
-| | ctx / maxOutput / lifecycle | 128000 / 16384 / active | 128K / 16K / Active | OK |
-| magistral-small-2509 | input/output | 0.5 / 1.5 | 0.50 / 1.50 | OK |
-| | ctx / maxOutput / lifecycle | 128000 / 40000 / active | 128K / 40K / Active (card launch "Sep 2025", no day — no releaseDate added) | OK |
-| ministral-3-14b | input/output | 0.2 / 0.2 | 0.20 / 0.20 (`USE1-Ministral-3-14b-Instruct-*`) | OK |
-| | maxOutput / releaseDate | 8192 / — | 8K / Dec 2 2025 | **ADD 2025-12-02** |
-| | caching | — | card shows no prompt-caching row → unconfirmed | no `cachedInput` |
-| ministral-3-8b | input/output | 0.1 / 0.1 | **0.15 / 0.15** (`USE1-Ministral-3-8b-Instruct-*` 0.00015) | **FIX**; **ADD releaseDate 2025-12-02** |
-| ministral-3-3b | input/output | 0.04 / 0.04 | **0.10 / 0.10** (`USE1-Ministral-3-3b-Instruct-*` 0.0001) | **FIX**; **ADD releaseDate 2025-12-02** (card "Ministral 3B" confirms ID `mistral.ministral-3-3b-instruct`, 128K ctx, 8K out, Active) |
-| mixtral-8x7b | input/output | 0.45 / 0.7 | 0.45 / 0.70 (`USE1-Mixtral8x7B-*`) | OK (question b resolved) |
-| | ctx / lifecycle | 32000 / active | 32K / Active | OK |
-| | maxOutputTokens | — | 4K (card) | **ADD 4096** |
-
-### Amazon Titan / Cohere
-
-| model | field | repo | verified | verdict |
-|---|---|---|---|---|
-| titan-text-premier | input/output | 0.5 / 1.5 | 0.50 / 1.50 (`USE1-TitanText-Premier-*`, attribute `titanModel: "Titan Text G1 Premier"`) | OK |
-| | deprecated | true | model card **removed** from the model-cards index (only Titan embeddings/image cards remain); absent from the Legacy table (which excludes models already past EOL) | OK — keep deprecated |
-| cohere command-r | input/output | 0.5 / 1.5 | not in Pricing API (marketplace-billed); matches long-standing AWS list price | UNVERIFIABLE via Pricing API — keep |
-| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 (lifecycle + card) | OK |
-| cohere command-r-plus | input/output | 3.0 / 15.0 | not in Pricing API; matches long-standing AWS list price | UNVERIFIABLE — keep |
-| | deprecated | true | Legacy 2026-02-19, EOL 2026-08-19 | OK |
-
----
-
-## Changes made in this pass (fix list for models.ts — to be applied by the follow-up code change)
-
-Pricing (all `updatedAt` → `2026-06-11`):
-
-1. `bedrock/amazon.nova-2-pro-v1:0`: input 1.0 → 1.375, output 4.0 → 11.0 (Pricing API `USE1-Nova2.0Pro-text-*`)
-2. `bedrock/amazon.nova-2-lite-v1:0`: input 0.08 → 0.33, output 0.32 → 2.75 (Pricing API `USE1-Nova2.0Lite-*`)
-3. `bedrock/meta.llama4-scout-17b-instruct-v1:0`: input 0.18 → 0.17, output 0.72 → 0.66
-4. `bedrock/meta.llama3-2-90b-instruct-v1:0`: 2.0/2.0 → 0.72/0.72
-5. `bedrock/meta.llama3-1-405b-instruct-v1:0`: 5.32/16.0 → 2.40/2.40 (USW2 on-demand)
-6. `bedrock/meta.llama3-1-70b-instruct-v1:0`: 2.65/3.5 → 0.72/0.72
-7. `bedrock/meta.llama3-1-8b-instruct-v1:0`: 0.3/0.6 → 0.22/0.22
-8. `bedrock/mistral.mistral-large-2407-v1:0`: 4.0/12.0 → 2.0/6.0 (USW2 `MistralLarge2407`)
-9. `bedrock/mistral.ministral-3-8b-instruct`: 0.1/0.1 → 0.15/0.15
-10. `bedrock/mistral.ministral-3-3b-instruct`: 0.04/0.04 → 0.10/0.10
-
-cachedInput additions (cache-read rate):
-
-11. claude-opus-4-5: 0.5; claude-sonnet-4-5: 0.3; claude-haiku-4-5: 0.1; claude-opus-4-1: 1.5 (Anthropic pricing 0.1× input; Bedrock cards confirm caching)
-12. nova-2-lite: 0.0825; nova-premier: 0.625; nova-pro: 0.2; nova-lite: 0.015; nova-micro: 0.00875 (Pricing API cache-read SKUs; Nova cache writes are $0)
-
-maxOutputTokens:
-
-13. claude-opus-4-1: 32768 → 32000 (Anthropic overview "32k"; Bedrock card "32K")
-14. nova-2-lite: add 64000; nova-pro/lite/micro: add 5120 each
-15. llama4-maverick/scout: add 8192 each; llama3-3-70b, llama3-1-70b, llama3-1-8b: add 4096 each; mixtral-8x7b: add 4096
-
-Flags / metadata:
-
-16. claude-sonnet-4-5: add `recommended: true` (bedrock default model; matches other providers' convention)
-17. claude-haiku-4-5 and nova-micro: add `speedOptimized: true` (Anthropic "fastest model"; card "Amazon's fastest text-only model"). Ruled **against** `speedOptimized` on nova-2-lite — its card positions it as cost-efficient multimodal, not the speed tier.
-18. releaseDate additions: mistral-large-3 `2025-12-02`; ministral-3-14b/8b/3b `2025-12-02`; llama3-1-70b/8b `2024-07-23`
-
-## Deliberately not changed
-
-- **Claude 4.5-gen geo premium (q. d):** kept base list prices; Sim's geo-profile routing actually bills 1.1× for opus/sonnet/haiku 4.5 per Anthropic's pricing page. Documented here rather than baked in (no AWS SKU to anchor; would overstate global-endpoint cost; consistent with list-price convention).
-- **Release-date nits (q. h):** sonnet-4-5 `2025-09-29` and haiku-4-5 `2025-10-15` kept (Anthropic launch dates; Bedrock cards say +1 day). nova pro/lite/micro `2024-12-03` kept (re:Invent announcement; cards say Dec 05). nova-premier `2025-04-30` kept — AWS what's-new confirms GA Apr 30 2025; the card's "Oct 31 2025" contradicts AWS's own announcement.
-- **Deprecated models' maxOutputTokens** (nova-premier 25K, llama3-2 4K, command-r/r+ 4K, mistral-large-2407 4K): per instruction, not added.
-- **All deprecated flags from PR #4990 re-verified correct:** nova-premier, llama3-2 ×4, llama3-1-405b, command-r/r+ (Legacy with EOL dates on the lifecycle page), titan-text-premier (card removed from catalog), mistral-large-2411/2407 (absent from catalog). llama3-1-70b/8b and llama3-3-70b confirmed **Active** — correctly not deprecated.
-- **mistral-large-3 / magistral / ministral-14b `cachedInput`:** Large 3 card says caching is supported but no cache-read SKU exists in the Pricing API; ministral-14b card shows no caching row. No invented numbers.
-- **`bedrock/amazon.nova-2-pro-v1:0` and `bedrock/mistral.mistral-large-2411-v1:0` entries kept** (both `deprecated: true`, hidden): the former's real Bedrock ID is `amazon.nova-2-pro-preview-20251202-v1:0` (preview), the latter appears to have never shipped on Bedrock. Recommend a follow-up PR to remove/rename — out of scope for a validation pass.
-
-## Unverifiable
-
-- **cohere.command-r-v1:0 / command-r-plus-v1:0 prices** (0.5/1.5, 3/15): absent from the Pricing API (marketplace-billed); match the long-standing published AWS rates; models are Legacy. Kept as-is.
-- **mistral-large-2411 price** (2/6): no SKU, no card; phantom entry (see above).
-- **nova-2-pro geo-profile support**: no card; preview ID served via profiles per third-party trackers only.
-- **Mistral Large 3 cache-read rate**: caching supported per card; rate unpublished.
diff --git a/docs/models/deepseek-cerebras.md b/docs/models/deepseek-cerebras.md
deleted file mode 100644
index 33f9927459..0000000000
--- a/docs/models/deepseek-cerebras.md
+++ /dev/null
@@ -1,189 +0,0 @@
-# Model Validation: `deepseek` & `cerebras` — apps/sim/providers/models.ts
-
-- **Date:** 2026-06-11
-- **Scope:** Final exhaustive re-validation after PR #4990 (deepseek-chat/reasoner repricing + 1M ctx, deprecation flags on deepseek-v3/r1 and cerebras llama3.1-8b/qwen-3-235b)
-- **Method:** Live WebFetch of provider docs (primary), OpenRouter/ArtificialAnalysis/aggregators (secondary), DeepSeek news archive for release dates, `rg` of provider code to confirm capability consumption. Provider docs win on conflicts.
-
-## Sources
-
-| Source | URL |
-|---|---|
-| DeepSeek pricing (primary) | https://api-docs.deepseek.com/quick_start/pricing |
-| DeepSeek list-models (primary) | https://api-docs.deepseek.com/api/list-models |
-| DeepSeek chat-completion API ref (primary) | https://api-docs.deepseek.com/api/create-chat-completion |
-| DeepSeek reasoning guide (primary) | https://api-docs.deepseek.com/guides/reasoning_model |
-| DeepSeek V3 announcement | https://api-docs.deepseek.com/news/news1226 |
-| DeepSeek R1 announcement | https://api-docs.deepseek.com/news/news250120 |
-| DeepSeek V4 preview announcement | https://api-docs.deepseek.com/news/news260424 |
-| Cerebras models overview (primary) | https://inference-docs.cerebras.ai/models/overview |
-| Cerebras gpt-oss model page (primary) | https://inference-docs.cerebras.ai/models/openai-oss |
-| Cerebras zai-glm-4.7 model page (primary) | https://inference-docs.cerebras.ai/models/zai-glm-47 |
-| Cerebras deprecations (primary) | https://inference-docs.cerebras.ai/support/deprecation |
-| Cerebras chat-completions API ref (primary) | https://inference-docs.cerebras.ai/api-reference/chat-completions |
-| OpenRouter deepseek-v4-flash (secondary) | https://openrouter.ai/deepseek/deepseek-v4-flash |
-| OpenRouter GLM 4.7 (secondary) | https://openrouter.ai/z-ai/glm-4.7 |
-| ArtificialAnalysis gpt-oss-120b providers (secondary) | https://artificialanalysis.ai/models/gpt-oss-120b/providers |
-| aimodelapis Cerebras GLM-4.7 (secondary) | https://aimodelapis.com/providers/cerebras/cerebras-zai-glm-4-7 |
-| Cerebras GLM-4.7 launch blog (secondary) | https://www.cerebras.ai/blog/glm-4-7 |
-
-## Code-consumption checks
-
-- `rg "temperature" apps/sim/providers/deepseek/ apps/sim/providers/cerebras/`:
-  - `deepseek/index.ts:89` — `if (request.temperature !== undefined) payload.temperature = request.temperature`
-  - `cerebras/index.ts:85` — `if (request.temperature !== undefined) payload.temperature = request.temperature`
-  - Both providers forward temperature when set; a `temperature` capability in models.ts is what surfaces the slider (`getMaxTempFromDefinitions` in `providers/utils.ts`). With `capabilities: {}` the slider is hidden even though the API accepts the param.
-- No `reasoningEffort`, `verbosity`, `thinking`, `nativeStructuredOutputs`, or `computerUse` handling exists in either provider implementation — do **not** add those capabilities even though Cerebras documents `reasoning_effort` (not consumed by code).
-- `maxOutputTokens` is a supported capability field (`models.ts:42`) consumed by `providers/index.ts` — safe to recommend.
-
----
-
-## DeepSeek
-
-### Alias status (Open Question a)
-
-**Confirmed.** DeepSeek pricing page: "The model names `deepseek-chat` and `deepseek-reasoner` will be deprecated on **2026/07/24 15:59 UTC**." They correspond to the **non-thinking** and **thinking** modes of `deepseek-v4-flash` respectively. The list-models API now returns only `deepseek-v4-flash` and `deepseek-v4-pro`. Until 2026-07-24 the aliases remain valid API ids, so keeping them non-deprecated in models.ts is correct **for now** — they must be flipped to `deprecated: true` (or removed) by 2026-07-24.
-
-**Recommendation (separate work, not part of this pass):** add `deepseek-v4-flash` (input $0.14 / cached $0.0028 / output $0.28, ctx 1M, max output 384K, released 2026-04-24) and `deepseek-v4-pro` (input $0.435 / cached $0.003625 / output $0.87, ctx 1M, max output 384K) as first-class entries before the 2026-07-24 alias retirement, then deprecate the aliases.
-
-### deepseek-chat
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `deepseek-chat` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash non-thinking) | pricing page | OK |
-| pricing.input | 0.14 | $0.14/M (cache miss) | pricing page | OK |
-| pricing.cachedInput | 0.0028 | $0.0028/M (cache hit) | pricing page | OK |
-| pricing.output | 0.28 | $0.28/M | pricing page | OK |
-| pricing.updatedAt | 2026-06-11 | — | — | OK |
-| contextWindow | 1000000 | 1M tokens | pricing page | OK |
-| capabilities.temperature | *(absent)* | Supported, range 0–2, default 1 ("What sampling temperature to use, between 0 and 2…") — applies to non-thinking mode | create-chat-completion API ref | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `deepseek/index.ts:89` consumes it) |
-| capabilities.maxOutputTokens | *(unset)* | Conflict: pricing page says 384K max output for v4-flash; reasoning guide (thinking mode) says default 32K / max 64K | pricing page vs reasoning guide | Leave unset — see "Deliberately not changed" |
-| releaseDate | 2024-12-26 | V3 announcement 2024-12-26 (date the alias pointed to V3); alias now points to v4-flash (released 2026-04-24) | news1226, news260424 | OK (alias semantics — keep original anchor) |
-| deprecated | *(absent)* | Alias still live | pricing page | OK until 2026-07-24 |
-
-### deepseek-v3
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `deepseek-v3` | **Not** a valid API id (list-models returns only v4-flash/v4-pro; never a documented API id — API ids were deepseek-chat/reasoner) | list-models | OK as `deprecated: true` |
-| deprecated | true | Correct | list-models | OK |
-| pricing | 0.28 / 0.028 / 0.42 (updatedAt 2026-04-01) | Historical V3.x pricing; model unpurchasable, frozen values acceptable | — | OK (legacy) |
-| contextWindow | 128000 | Historical 128K | — | OK (legacy) |
-| releaseDate | 2024-12-26 | DeepSeek-V3 announced 2024-12-26 | news1226 | **Verified** |
-
-### deepseek-r1
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `deepseek-r1` | **Not** a valid API id (R1 was accessed as `deepseek-reasoner`) | list-models, news250120 | OK as `deprecated: true` |
-| deprecated | true | Correct | list-models | OK |
-| pricing | 0.55 / 0.14 / 2.19 | Matches original R1 launch pricing ($0.14 hit / $0.55 miss / $2.19 out) | news250120 | **Verified** (legacy, frozen) |
-| contextWindow | 128000 | Historical | — | OK (legacy) |
-| releaseDate | 2025-01-20 | R1 announced 2025-01-20 | news250120 | **Verified** |
-
-### deepseek-reasoner
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `deepseek-reasoner` | Valid alias until 2026-07-24 15:59 UTC (→ v4-flash thinking) | pricing page | OK |
-| pricing.input / cachedInput / output | 0.14 / 0.0028 / 0.28 | $0.14 / $0.0028 / $0.28 (same v4-flash pricing, both modes) | pricing page | OK |
-| pricing.updatedAt | 2026-06-11 | — | — | OK |
-| contextWindow | 1000000 | 1M | pricing page | OK |
-| capabilities | `{}` (no temperature) | Reasoning guide: `temperature`, `top_p`, `presence_penalty`, `frequency_penalty`, `logprobs`, `top_logprobs` **not supported** — "will not trigger an error but will also have no effect" | reasoning guide | OK — must NOT add temperature |
-| capabilities.maxOutputTokens | *(unset)* | Conflict (384K vs 32K/64K) | see below | Leave unset |
-| releaseDate | 2025-01-20 | `model=deepseek-reasoner` introduced with R1 release 2025-01-20 | news250120 ("Use DeepSeek-R1 by setting model=deepseek-reasoner") | **Verified** |
-
-### maxOutputTokens conflict (Open Question a)
-
-- Pricing page (current, v4-flash): **384K max output**.
-- Reasoning guide (deepseek-reasoner page): **default 32K, max 64K** — appears not yet updated for V4 (still reflects R1-era limits).
-- The aliases map to v4-flash modes, so 384K is *probably* correct, but DeepSeek's own docs disagree with each other and the reasoning guide is the page specific to `deepseek-reasoner`. **Resolution: leave `maxOutputTokens` unset on both aliases** (current state) and set 384000 on the future `deepseek-v4-flash`/`deepseek-v4-pro` entries, where the pricing page is unambiguous.
-
-### Secondary-source pricing (DeepSeek)
-
-OpenRouter lists deepseek-v4-flash at **$0.098 in / $0.196 out** — exactly 70% of official $0.14/$0.28, i.e. the OpenRouter **−30% promo is still present**. Per policy, provider docs win: $0.14 / $0.0028 / $0.28 stands. OpenRouter confirms 1M context and the 2026-04-24 release date.
-
----
-
-## Cerebras
-
-### Deprecations (confirmed)
-
-Cerebras deprecation page lists **llama3.1-8b** and **qwen-3-235b-a22b-instruct-2507** as deprecated **2026-05-27**, recommended replacement "GPT OSS 120B". Neither appears on the models overview anymore. `deprecated: true` on both entries (PR #4990) is correct.
-
-### cerebras/gpt-oss-120b
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `gpt-oss-120b` (after `cerebras/` strip at `cerebras/index.ts:82`) | Production model | models overview, model page | OK |
-| pricing.input | 0.35 | $0.35/M | model page (live 2026-06-11) | OK |
-| pricing.output | 0.75 | $0.75/M | model page | OK |
-| pricing.updatedAt | 2026-06-11 | — | — | OK |
-| contextWindow | 131072 | 131k (paid tiers; free tier 65k) | model page | OK (paid tier, consistent with repo convention) |
-| capabilities.maxOutputTokens | *(unset)* | 40k paid tiers (32k free) | model page | **FIX: add `maxOutputTokens: 40000`** (paid tier, matching paid-tier ctx) |
-| capabilities.temperature | *(absent)* | Cerebras chat-completions API: "sampling temperature to use, between 0 and 2.0" | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** (code at `cerebras/index.ts:85` consumes it) |
-| releaseDate | 2025-08-05 | gpt-oss released 2025-08-05; Cerebras day-one launch | cerebras.ai blog "OpenAI GPT OSS 120B Runs Fastest on Cerebras", techintelpro | **Verified** |
-
-Secondary-source note: several aggregators (crackedaiengineering, ArtificialAnalysis blended $0.39) still show launch-era pricing **$0.25/$0.69** and 33K max output. The live Cerebras model page (fetched today) says $0.35/$0.75 and 40k paid-tier max output — provider docs win; aggregators are stale.
-
-### cerebras/llama3.1-8b
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** |
-| pricing | 0.10 / 0.10 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) |
-| contextWindow | 32768 | Historical | — | OK (legacy) |
-| releaseDate | 2024-08-27 | Consistent with Cerebras Inference launch (2024-08-27); not re-verified against a live page this pass | — | Plausible / not re-verified (deprecated model, low stakes) |
-
-### cerebras/qwen-3-235b-a22b-instruct-2507
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | Deprecated 2026-05-27, migrate to GPT OSS 120B | deprecation page | **Verified** |
-| pricing | 0.6 / 1.2 (frozen 2026-04-01) | Unpurchasable; frozen legacy values | — | OK (legacy) |
-| contextWindow | 131072 | Historical | — | OK (legacy) |
-| releaseDate | 2025-07-29 | Could not verify the exact Cerebras availability date | — | **Unverifiable** (deprecated model; leave as-is) |
-
-### cerebras/zai-glm-4.7
-
-| Field | Current value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| id valid | `zai-glm-4.7` | Preview model on overview | models overview, model page | OK |
-| pricing.input | 2.25 | $2.25/M | model page; confirmed by aimodelapis (secondary) | OK |
-| pricing.output | 2.75 | $2.75/M | model page; aimodelapis | OK |
-| pricing.updatedAt | 2026-06-11 | — | — | OK |
-| contextWindow | 131072 | 131k paid tiers (free 64k) | model page; aimodelapis (131,000) | OK |
-| capabilities.maxOutputTokens | *(unset)* | 40k tokens (both tiers) | model page; aimodelapis (40,000) | **FIX: add `maxOutputTokens: 40000`** |
-| capabilities.temperature | *(absent)* | API-wide param, 0–2.0 | API reference | **FIX: add `temperature: { min: 0, max: 2 }`** |
-| releaseDate | 2025-12-22 | GLM-4.7 released 2025-12-22 (OpenRouter "Dec 22, 2025"; PR Newswire; Cerebras same-day launch blog) | multiple | **Verified** |
-
----
-
-## Changes made in this pass (PR #4990) — all re-verified correct
-
-1. `deepseek-chat` & `deepseek-reasoner` repriced to $0.14 / $0.0028 cached / $0.28 — matches v4-flash pricing they now alias. ✅
-2. `deepseek-chat` & `deepseek-reasoner` contextWindow → 1,000,000 — matches v4-flash 1M default. ✅
-3. `deprecated: true` on `deepseek-v3` and `deepseek-r1` — neither is a valid API id (list-models returns only v4-flash/v4-pro). ✅
-4. `deprecated: true` on `cerebras/llama3.1-8b` and `cerebras/qwen-3-235b-a22b-instruct-2507` — Cerebras deprecation page, 2026-05-27. ✅
-5. `pricing.updatedAt: 2026-06-11` bumps on the four live-model entries. ✅
-
-## Outstanding fixes recommended (not applied — doc-only pass)
-
-1. `deepseek-chat`: add `capabilities.temperature: { min: 0, max: 2 }` — API ref documents temperature 0–2 (default 1) for chat completions; non-thinking mode honors it; `deepseek/index.ts:89` forwards it. Currently the empty `capabilities` hides Sim's temperature slider for a model that supports it.
-2. `cerebras/gpt-oss-120b`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`.
-3. `cerebras/zai-glm-4.7`: add `capabilities.temperature: { min: 0, max: 2 }` and `capabilities.maxOutputTokens: 40000`.
-
-## Deliberately not changed
-
-- **`deepseek-reasoner` capabilities stay `{}`** — reasoning guide explicitly lists temperature as unsupported/no-effect in thinking mode.
-- **`deepseek-chat`/`deepseek-reasoner` not marked deprecated** — valid aliases until 2026-07-24 15:59 UTC. Calendar item: deprecate (and add v4-flash/v4-pro entries) before that date.
-- **`maxOutputTokens` left unset on both DeepSeek aliases** — DeepSeek docs self-conflict (pricing page: 384K for v4-flash; reasoning guide: 32K default / 64K max for deepseek-reasoner). Set 384000 only on future first-class `deepseek-v4-*` entries where the pricing page is unambiguous.
-- **Legacy pricing/ctx on the four deprecated entries** (deepseek-v3, deepseek-r1, llama3.1-8b, qwen-3-235b) — frozen historical values on unpurchasable models; R1 values cross-checked against the original announcement.
-- **No `reasoningEffort` capability for Cerebras** despite the model pages documenting `reasoning_effort` — `cerebras/index.ts` does not consume it (capability additions must be backed by docs AND code).
-- **OpenRouter −30% DeepSeek promo pricing ($0.098/$0.196) ignored** — provider docs win.
-- **deepseek-chat releaseDate kept at 2024-12-26** — anchor is the V3 announcement; the id predates V3 and now aliases v4-flash (2026-04-24); any value is a judgment call for an alias, so the existing anchor is retained.
-
-## Unverifiable
-
-- `cerebras/qwen-3-235b-a22b-instruct-2507` releaseDate 2025-07-29 — no live source found for the exact Cerebras availability date (model delisted). Left as-is.
-- `cerebras/llama3.1-8b` releaseDate 2024-08-27 — consistent with the known Cerebras Inference launch date but not re-verified against a live page this pass (model delisted).
-- Cerebras temperature **default** value — API ref documents the 0–2.0 range but not a default.
diff --git a/docs/models/embeddings-rerank-dynamic.md b/docs/models/embeddings-rerank-dynamic.md
deleted file mode 100644
index 46ef6b6967..0000000000
--- a/docs/models/embeddings-rerank-dynamic.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Validation: EMBEDDING_MODEL_PRICING, RERANK_MODEL_PRICING, and dynamic providers
-
-- **Date:** 2026-06-11
-- **File validated:** `apps/sim/providers/models.ts` (`EMBEDDING_MODEL_PRICING` ~L3289, `RERANK_MODEL_PRICING` ~L3320, dynamic provider definitions ~L87–191, L2503–2515, update functions ~L3190–3287)
-- **Method:** Every numeric claim checked via live WebFetch against the provider's first-party docs, with at least one secondary tracker where available. WebSearch used as fallback when a page truncated. No edits were made to `models.ts`.
-- **Primary sources:**
-  - OpenAI: `developers.openai.com/api/docs/models/text-embedding-3-small` / `.../text-embedding-3-large` / `.../text-embedding-ada-002` (the aggregate pricing page truncates before the embeddings table; per-model pages carry the prices)
-  - Google: `ai.google.dev/gemini-api/docs/pricing`
-  - Cohere: `cohere.com/pricing` (Model Vault only — per-search API pricing not rendered), `docs.cohere.com/docs/how-does-cohere-pricing-work` (confirms rerank is billed per search, no numbers), `docs.cohere.com/docs/rerank` (model list)
-  - Secondary trackers: Vercel AI Gateway (`vercel.com/ai-gateway/models/rerank-v4-pro`, `.../rerank-v4-fast`), eesel.ai Cohere pricing guide, metacto.com Cohere pricing deep dive, cloudprice.net, TokenMix/costgoat (OpenAI embeddings)
-  - Provider API docs: `docs.fireworks.ai/api-reference/post-chatcompletions`, `docs.together.ai/reference/chat-completions`, `openrouter.ai/docs` parameters reference, `docs.ollama.com/api/openai-compatibility`, `docs.baseten.co/development/model-apis/overview`
-
-## EMBEDDING_MODEL_PRICING
-
-| Entry | Field | Value in code | Verified value | Source | Verdict |
-|---|---|---|---|---|---|
-| `text-embedding-3-small` | input | $0.02 / 1M | $0.02 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT |
-| `text-embedding-3-small` | output | $0.00 | n/a (embeddings bill input only) | OpenAI docs | CORRECT |
-| `text-embedding-3-large` | input | $0.13 / 1M | $0.13 / 1M | developers.openai.com model page; TokenMix secondary | CORRECT |
-| `text-embedding-3-large` | output | $0.00 | n/a | OpenAI docs | CORRECT |
-| `text-embedding-ada-002` | input | $0.10 / 1M | $0.10 / 1M | developers.openai.com model page; search secondary | CORRECT |
-| `text-embedding-ada-002` | output | $0.00 | n/a | OpenAI docs | CORRECT |
-| `gemini-embedding-001` | input | $0.15 / 1M | $0.15 / 1M (paid tier, standard; batch is $0.075) | ai.google.dev/gemini-api/docs/pricing | CORRECT |
-| `gemini-embedding-001` | output | $0.00 | n/a | Google docs | CORRECT |
-
-## RERANK_MODEL_PRICING (per search unit = 1 query × ≤100 docs)
-
-| Entry | Value in code | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| `rerank-v4.0-pro` | $0.0025 / search | $2.50 / 1k searches ($0.0025) | Vercel AI Gateway rerank-v4-pro page ("$2.5/K, billed per search query"); eesel.ai ("$0.0025 / search") | CORRECT |
-| `rerank-v4.0-fast` | $0.002 / search | $2.00 / 1k searches ($0.002) | Vercel AI Gateway rerank-v4-fast page ("$2/K"); eesel.ai ("$0.002 / search") | CORRECT |
-| `rerank-v3.5` | $0.002 / search | $2.00 / 1k searches ($0.002) Cohere direct & Bedrock | metacto ("$2.00 per 1,000 searches"); cloudprice.net ($0.0020/unit, Cohere + Bedrock rows agree) | CORRECT |
-
-Notes:
-
-- `cohere.com/pricing` currently only renders Model Vault (dedicated instance) hourly pricing; the per-search API table is JS-rendered and not fetchable. `docs.cohere.com/docs/how-does-cohere-pricing-work` confirms rerank is "priced based on the quantity of searches" (per-search, not per-token), which validates the `perSearchUnit` modeling and the ≤100-doc cap comment in the code.
-- Conflicting source resolved: OpenRouter lists `cohere/rerank-v3.5` at $0.001/search, but that is OpenRouter's reseller price, not Cohere first-party. Sim calls Cohere directly, so $0.002 stands.
-- Cohere also offers `rerank-english-v3.0` and `rerank-multilingual-v3.0`; Sim does not expose them, so no entries are needed.
-
-## Dynamic providers (provider-level config sanity pass)
-
-All eight have empty static `models: []` populated at runtime via `update*Models()` (pricing zeroed, `updatedAt` set to today — intentional for BYOK/reseller providers). `modelPatterns` prefixes match each provider's `update*` function and prefix-stripping in the provider implementations.
-
-| Provider | Config checked | Verdict |
-|---|---|---|
-| `fireworks` | temp 0–2, toolUsageControl true, pattern `/^fireworks\//` | CORRECT — Fireworks docs: temperature "between 0 and 2", full `tool_choice` support (`none`/`auto`/`required`/named) |
-| `together` | temp 0–2, toolUsageControl true, pattern `/^together\//` | **DISCREPANCY** — Together's own API reference documents temperature as "a decimal number from 0-1"; `tool_choice` supported. Sim declares max 2. Flagged below; not changed in this pass |
-| `baseten` | temp 0–2, toolUsageControl true, pattern `/^baseten\//` | SANE — Model APIs are OpenAI-compatible (docs.baseten.co); exact temp bounds not published, 0–2 follows the OpenAI convention |
-| `openrouter` | temp 0–2, toolUsageControl true, pattern `/^openrouter\//` | CORRECT — OpenRouter docs: temperature 0.0–2.0, default 1.0 |
-| `ollama-cloud` | temp 0–2, toolUsageControl **true**, pattern `/^ollama-cloud\//` | **QUESTIONABLE** — Ollama's OpenAI-compat layer (same API at `ollama.com/v1`) explicitly lists `tool_choice` as unsupported, and Sim's own shared core (`apps/sim/providers/ollama/core.ts:140-147`) degrades forced tool selection to `auto` with a warning. Local `ollama` correctly sets `toolUsageControl: false`; `ollama-cloud: true` is inconsistent. Flagged below; not changed in this pass |
-| `vllm` | temp 0–2, toolUsageControl true, `defaultModel: 'vllm/generic'`, pattern `/^vllm\//` | SANE — vLLM's OpenAI-compatible server accepts temperature ≥0 (no hard cap of 2); 0–2 is a reasonable UI cap. `vllm/generic` matches the pattern and is the documented placeholder (only other reference is the vllm provider test) |
-| `litellm` | temp 0–2, toolUsageControl true, pattern `/^litellm\//` | SANE — proxy passthrough; effective bounds depend on the upstream model, 0–2 is the OpenAI-convention cap |
-| `ollama` (local) | toolUsageControl false ("does not support tool_choice"), no temp block, `modelPatterns: []` | CORRECT — docs.ollama.com OpenAI-compatibility page lists `tool_choice` as unsupported (temperature is supported); empty patterns are intentional since local model names are arbitrary and matched via the providers store |
-
-## `gemini` vs `google` provider key
-
-- `PROVIDER_DEFINITIONS` contains only `google` (L1303, `defaultModel: 'gemini-2.5-pro'`, patterns `/^gemini/`, `/^deep-research/`). There is no `gemini` registry key, and nothing calls `getProviderModels('gemini')` — all callers use `'google'` (models.ts L3163, `apps/sim/providers/google/index.ts:21`).
-- `apps/sim/providers/gemini/` exists but is **not a provider**: it holds only `core.ts`/`types.ts` (shared Gemini execution logic consumed by both the `google` and `vertex` providers). No `index.ts`, not registered in `registry.ts`.
-- The only `'gemini'` string key is the rotating-API-key namespace: `apps/sim/providers/utils.ts:891` maps provider `google` → `getRotatingApiKey('gemini')`, matching the `GEMINI_API_KEY_*` env convention in `apps/sim/lib/core/config/api-keys.ts`. Intentional; nothing structurally odd.
-
-## Changes made in this pass
-
-None. All `EMBEDDING_MODEL_PRICING` and `RERANK_MODEL_PRICING` values verified correct; instructions prohibited edits to `models.ts`.
-
-## Deliberately not changed
-
-- **`together` temperature max 2 vs documented 0–1:** Together's API reference documents 0–1, but the endpoint is OpenAI-compatible and tolerantly accepts higher values in practice; tightening to `max: 1` would change UI slider behavior for existing workflows. Left for a deliberate follow-up decision.
-- **`ollama-cloud` `toolUsageControl: true`:** inconsistent with local `ollama: false` and with Ollama's documented lack of `tool_choice`. Runtime is already safe (shared core degrades forced selection to `auto` with a warning), so this only mis-advertises a capability in the UI. Left for follow-up.
-- Dynamic-model zero pricing (`input: 0, output: 0`) in all `update*Models()` functions — intentional for BYOK/reseller providers where Sim doesn't bill model usage.
-
-## Unverifiable
-
-- **Cohere first-party per-search price page:** `cohere.com/pricing`'s API pricing table does not render server-side; per-search numbers were confirmed via two independent secondary trackers per model plus Cohere docs confirming the per-search billing unit.
-- **Baseten and LiteLLM exact temperature bounds:** neither publishes a numeric range (OpenAI-compatible passthrough); 0–2 judged sane by convention rather than verified.
-- **vLLM upper temperature bound:** vLLM accepts temperatures above 2; the 0–2 cap is a UI choice, not a provider-documented limit.
diff --git a/docs/models/google.md b/docs/models/google.md
deleted file mode 100644
index 215ea82402..0000000000
--- a/docs/models/google.md
+++ /dev/null
@@ -1,184 +0,0 @@
-# Google Provider Model Validation — Final Pass
-
-- **Date:** 2026-06-11
-- **Scope:** `google` block in `apps/sim/providers/models.ts` (10 models), re-verifying everything including changes landed in PR #4990
-- **Method:** Live WebFetch of ai.google.dev (models overview, per-model pages, pricing, thinking, deprecations, changelog, generate-content API reference) and cloud.google.com Vertex AI pricing; OpenRouter as secondary pricing source; WebSearch for GA dates. Google docs treated as authoritative where sources conflict.
-- **Primary sources:**
-  - https://ai.google.dev/gemini-api/docs/models (+ per-model pages)
-  - https://ai.google.dev/gemini-api/docs/pricing
-  - https://ai.google.dev/gemini-api/docs/thinking
-  - https://ai.google.dev/gemini-api/docs/deprecations
-  - https://ai.google.dev/gemini-api/docs/changelog
-  - https://ai.google.dev/gemini-api/docs/interactions/deep-research
-  - https://ai.google.dev/api/generate-content (GenerationConfig)
-  - https://cloud.google.com/vertex-ai/generative-ai/pricing ("Gemini Deep Research Agent" row)
-  - OpenRouter model pages (secondary pricing)
-
-## Provider-level checks
-
-| Check | Result |
-|---|---|
-| Capability consumption in `apps/sim/providers/gemini/` | Only `thinking` is consumed: `request.thinkingLevel` → `mapToThinkingLevel` → `thinkingConfig` (`gemini/core.ts:955-961`). No references to `reasoningEffort`, `verbosity`, `nativeStructuredOutputs`, or `computerUse`. Declaring `thinking.levels`/`default` per model is the only capability surface that affects requests. |
-| `temperature: { min: 0, max: 2 }` | **Verified.** GenerationConfig documents temperature range [0.0, 2.0] (https://ai.google.dev/api/generate-content). Note Google recommends keeping 1.0 default on Gemini 3 models, but 0–2 is the accepted API range. Verdict: correct on all entries. |
-| 2.5-series entries have no `thinking` capability | **Correct by design.** Gemini 2.5 uses `thinkingBudget`, not `thinkingLevel` (https://ai.google.dev/gemini-api/docs/thinking). Our provider only sends `thinkingConfig` when a level is selected, so omitting `thinking` on 2.5 entries is right. |
-
-## Per-model verification
-
-### gemini-3.5-flash
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| id | `gemini-3.5-flash` (stable/GA) | docs/models, model page | OK |
-| pricing.input | 1.5 | docs/pricing ($1.50); Vertex ($1.50 global); OpenRouter ($1.50) | OK |
-| pricing.cachedInput | 0.15 | docs/pricing ($0.15); Vertex ($0.15) | OK |
-| pricing.output | 9.0 | docs/pricing ($9.00); Vertex ($9.00); OpenRouter ($9.00) | OK |
-| thinking.levels | minimal/low/medium/high | docs/thinking | OK |
-| thinking.default | medium | docs/thinking ("Default: medium"); OpenRouter ("defaults to medium thinking effort") | OK |
-| maxOutputTokens | 65536 | model page (65,536) | OK |
-| contextWindow | 1048576 | model page (1,048,576) | OK |
-| releaseDate | 2026-05-19 | changelog: "May 19, 2026 — Released `gemini-3.5-flash`, the generally available (GA) version" | OK |
-| recommended | true | Google's flagship recommendation; replacement target for 2.0-flash and 3-flash-preview | OK |
-
-### gemini-3.1-pro-preview
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| id | `gemini-3.1-pro-preview` | docs/models, model page | OK |
-| pricing.input | 2.0 | docs/pricing ($2.00 ≤200k; $4.00 >200k); OpenRouter ($2) | OK (base tier; see "Deliberately not changed") |
-| pricing.cachedInput | 0.2 | docs/pricing ($0.20 ≤200k) | OK |
-| pricing.output | 12.0 | docs/pricing ($12.00 ≤200k; $18.00 >200k); OpenRouter ($12) | OK |
-| thinking.levels | low/medium/high (no minimal — PR #4990 change) | docs/thinking: "Supported levels: low, medium, high"; "Thinking cannot be disabled" | OK — #4990 change re-confirmed |
-| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK |
-| maxOutputTokens | 65536 | model page | OK |
-| contextWindow | 1048576 | model page (1,048,576) | OK |
-| releaseDate | 2026-02-19 | changelog: "Feb 19, 2026 — Released Gemini 3.1 Pro Preview" | OK |
-
-### gemini-3.1-flash-lite
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| id | `gemini-3.1-flash-lite` (stable — PR #4990 rename) | docs/models lists stable; `gemini-3.1-flash-lite-preview` marked "Shut down" (May 25, 2026 per deprecations) | OK — rename re-confirmed |
-| pricing.input | 0.25 | docs/pricing ($0.25 text); Vertex ($0.25 global); OpenRouter ($0.25) | OK |
-| pricing.cachedInput | 0.025 | docs/pricing ($0.025); Vertex ($0.025) | OK |
-| pricing.output | 1.5 | docs/pricing ($1.50); Vertex ($1.50); OpenRouter ($1.50) | OK |
-| thinking.levels | minimal/low/medium/high | docs/thinking; OpenRouter ("full thinking levels (minimal, low, medium, high)") | OK |
-| thinking.default | minimal | docs/thinking: "Default: minimal" — Google's documented API default for this model **is** `minimal`, so our value matches the API default (the earlier report that the API default is 'high' is not supported by current docs). Also aligns with our cost-saving intent. | OK |
-| maxOutputTokens | 65536 | model page (65,536) | OK |
-| contextWindow | 1048576 | model page (1,048,576) | OK |
-| releaseDate | **2026-03-03 — STALE.** That is the preview's release date. GA changelog: "May 7, 2026 — Released `gemini-3.1-flash-lite`, the generally available (GA) version"; Google Cloud blog GA announcement published 2026-05-08. Changelog (Gemini API source of truth) wins. | changelog; cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | **FIX → 2026-05-07** |
-| speedOptimized | (absent) | Model page: "optimized for low-latency, cost-effective" high-volume tasks; Google blog: "fastest and most cost-efficient Gemini 3 series model". Precedent: `gemini-2.5-flash-lite` carries `speedOptimized: true` and Google's models page calls 2.5-flash-lite "the fastest and most budget-friendly" of its generation — 3.1-flash-lite holds the same position in the Gemini 3 generation. | **FIX → add `speedOptimized: true`** |
-
-### gemini-3-flash-preview
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| id | `gemini-3-flash-preview` | docs/models, model page | OK |
-| pricing.input | 0.5 | docs/pricing ($0.50 text); OpenRouter ($0.50) | OK |
-| pricing.cachedInput | 0.05 | docs/pricing ($0.05) | OK |
-| pricing.output | 3.0 | docs/pricing ($3.00); OpenRouter ($3.00) | OK |
-| thinking.levels | minimal/low/medium/high | docs/thinking | OK |
-| thinking.default | high | docs/thinking ("Default: high (dynamic)") | OK |
-| maxOutputTokens | 65536 | model page | OK |
-| contextWindow | 1048576 (PR #4990 change) | model page (1,048,576); OpenRouter (1M) | OK — #4990 change re-confirmed |
-| releaseDate | 2025-12-17 | changelog: "Dec 17, 2025 — Launched Gemini 3 Flash Preview"; OpenRouter | OK |
-| deprecated | (absent) | docs/deprecations lists `gemini-3-flash-preview` in the deprecation table with recommended replacement `gemini-3.5-flash`, **no shutdown date announced yet**. (The model's own page still renders as an active preview — the deprecations table is the authoritative lifecycle source.) | **FIX → add `deprecated: true`** |
-
-### gemini-2.5-pro
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| pricing.input | 1.25 | docs/pricing ($1.25 ≤200k); OpenRouter ($1.25) | OK (base tier) |
-| pricing.cachedInput | 0.125 | docs/pricing ($0.125 ≤200k) | OK |
-| pricing.output | 10.0 | docs/pricing ($10.00 ≤200k); OpenRouter ($10) | OK |
-| maxOutputTokens | 65536 | longstanding model-page value | OK |
-| contextWindow | 1048576 | OpenRouter (1M); longstanding model-page value | OK |
-| releaseDate | 2025-03-25 | preview launch date (GA was 2025-06-17); repo convention uses first availability | OK |
-
-### gemini-2.5-flash
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| pricing.input | 0.3 | docs/pricing ($0.30 text) | OK |
-| pricing.cachedInput | 0.03 | docs/pricing ($0.03) | OK |
-| pricing.output | 2.5 | docs/pricing ($2.50) | OK |
-| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK |
-| releaseDate | 2025-05-20 | I/O 2025 preview launch | OK |
-
-### gemini-2.5-flash-lite
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| pricing.input | 0.1 | docs/pricing ($0.10 text) | OK |
-| pricing.cachedInput | 0.01 | docs/pricing ($0.01) | OK |
-| pricing.output | 0.4 | docs/pricing ($0.40) | OK |
-| maxOutputTokens / contextWindow | 65536 / 1048576 | longstanding model-page values | OK |
-| releaseDate | 2025-06-17 | launch announcement | OK |
-| speedOptimized | true | docs/models: "fastest and most budget-friendly multimodal model" | OK |
-
-### gemini-2.0-flash (deprecated)
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026; changelog: "now shut down"; docs/pricing marks "(deprecated; shutdown June 1, 2026)". Replacement: gemini-3.5-flash. | OK — #4990 change re-confirmed. Entry retained intentionally for saved-workflow history. |
-| pricing | input 0.1 / cachedInput 0.025 / output 0.4 | docs/pricing (still published) | OK |
-| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK |
-| releaseDate | 2025-02-05 | GA announcement | OK |
-
-### gemini-2.0-flash-lite (deprecated)
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| deprecated | true (PR #4990 change) | docs/deprecations: shutdown June 1, 2026. Replacement: gemini-3.1-flash-lite. | OK — re-confirmed; retained for history |
-| pricing | input 0.075 / output 0.3 (no cachedInput — caching was never priced for this SKU) | docs/pricing | OK |
-| maxOutputTokens / contextWindow | 8192 / 1048576 | historical model-page values | OK |
-| releaseDate | 2025-02-25 | GA announcement | OK |
-
-### deep-research-pro-preview-12-2025
-
-| Field | Our value | Source | Verdict |
-|---|---|---|---|
-| id | `deep-research-pro-preview-12-2025` | model page https://ai.google.dev/gemini-api/docs/models/deep-research-pro-preview-12-2025 (Interactions API) | OK |
-| pricing.input | 2.0 (PR #4990) | Vertex AI pricing, "Gemini Deep Research Agent": $2/1M input | OK — re-confirmed |
-| pricing.cachedInput | 0.2 (PR #4990) | Vertex AI pricing: $0.2/1M cached input | OK — re-confirmed |
-| pricing.output | 12.0 (PR #4990, was 2.0) | Vertex AI pricing: $12/1M output (response and reasoning). Consistent with underlying Gemini 3.1 Pro rates ($2/$0.2/$12). | OK — re-confirmed |
-| capabilities | deepResearch: true, memory: false | model page (agentic researcher; Interactions API) | OK |
-| maxOutputTokens | 65536 | model page (65,536) | OK |
-| contextWindow | 1048576 (PR #4990) | model page (1,048,576) | OK — re-confirmed |
-| releaseDate | 2025-12-11 | model page only says "December 2025"; exact day not published in fetched docs | Unverifiable to the day; month consistent — keep |
-| Lifecycle | Not listed on docs/deprecations; no shutdown announced | docs/deprecations | OK to keep |
-
-**Recommendation (documented only, no entries added):** Google introduced `deep-research-preview-04-2026` and `deep-research-max-preview-04-2026` on 2026-04-21 (changelog; https://ai.google.dev/gemini-api/docs/interactions/deep-research). The Deep Research interactions doc now leads with these SKUs and prices them per-task (~$1–3 / ~$3–7). A follow-up should evaluate adding them once per-token pricing is published; `deep-research-pro-preview-12-2025` remains documented and un-deprecated, so no change now.
-
-## Changes made in this pass
-
-None to `models.ts` (per task rules — fix list reported separately). This document is the only artifact.
-
-## Re-confirmed PR #4990 changes
-
-1. `gemini-3.1-flash-lite-preview` → `gemini-3.1-flash-lite` rename — preview slug shut down 2026-05-25 (deprecations page); stable listed on docs/models.
-2. `gemini-3.1-pro-preview` thinking.levels without `minimal` — docs/thinking lists low/medium/high only; "thinking cannot be disabled".
-3. `gemini-3-flash-preview` contextWindow 1048576 — model page.
-4. `deprecated: true` on gemini-2.0-flash and gemini-2.0-flash-lite — shut down 2026-06-01 (deprecations + changelog).
-5. Deep Research output 12.0, cachedInput 0.2, contextWindow 1048576 — Vertex pricing row + model page.
-
-## Recommended fixes (not applied)
-
-1. `gemini-3.1-flash-lite`: `releaseDate` `2026-03-03` → `2026-05-07` — current value is the preview's release date; GA released May 7, 2026 per Gemini API changelog (Cloud blog announcement published May 8, 2026; changelog wins as the API source of truth).
-2. `gemini-3.1-flash-lite`: add `speedOptimized: true` — Google positions it as the fastest, most cost-efficient Gemini 3 model (model page, GA blog); matches the precedent set by `gemini-2.5-flash-lite`.
-3. `gemini-3-flash-preview`: add `deprecated: true` — formally listed on https://ai.google.dev/gemini-api/docs/deprecations with replacement `gemini-3.5-flash` (no shutdown date announced yet).
-
-## Deliberately not changed
-
-- **`gemini-3.1-flash-lite` thinking.default `minimal`** — matches Google's documented default for this model (docs/thinking: "Default: minimal") and is also our intended cost-saving default. No conflict.
-- **Tiered pricing (`gemini-3.1-pro-preview`, `gemini-2.5-pro`)** — we model the ≤200k-token base tier ($2/$12 and $1.25/$10). The >200k tier ($4/$18 and $2.50/$15) is not representable in the flat pricing schema; base tier is the established repo convention.
-- **Audio input pricing** (flash models have higher audio-input rates, e.g. 3.1-flash-lite $0.50 audio) — schema models text-input pricing only; convention.
-- **gemini-2.0-flash / -flash-lite entries kept despite shutdown** — `deprecated: true` retained instead of deletion so saved workflows referencing them keep rendering history correctly.
-- **Deep Research newer SKUs not added** — per-task preview pricing only; documented as a follow-up recommendation above.
-- **`gemini-2.5-pro` releaseDate 2025-03-25** — preview-launch date; repo convention is first availability, not GA (2025-06-17).
-- **`updatedAt: 2026-06-11`** on all entries — accurate as of this validation.
-
-## Unverifiable
-
-- **deep-research-pro-preview-12-2025 exact release day (2025-12-11)** — Google docs only state "December 2025"; the day-level value could not be confirmed or refuted. Month consistent; left as-is.
-- **2.5-series maxOutputTokens (65,536) and 2.0-series limits (8,192 / 1,048,576)** — not re-fetched per-model in this pass; values match longstanding Google model-page specs and were unchanged by PR #4990. OpenRouter corroborates 1M context for 2.5-pro.
-- **Gemini API pricing page for Deep Research** — the ai.google.dev pricing page does not list the 12-2025 SKU (it now points at the 04-2026 per-task estimates); per-token verification rests on the Vertex AI "Gemini Deep Research Agent" row alone (single — but official Google — source).
diff --git a/docs/models/groq.md b/docs/models/groq.md
deleted file mode 100644
index b4bc6905d4..0000000000
--- a/docs/models/groq.md
+++ /dev/null
@@ -1,157 +0,0 @@
-# Groq Provider Validation — Final Pass
-
-**Date:** 2026-06-11
-**Scope:** `groq` provider block in `apps/sim/providers/models.ts` (8 models). Re-verifies everything, including the changes landed in PR #4990 (kimi `deprecated: true`, gpt-oss `cachedInput`, `updatedAt` bumps).
-
-## Sources & Method
-
-| Source | What it verified |
-|---|---|
-| `https://api.groq.com/openai/v1/models` (live, authenticated with local dev key) | Active model list, `context_window`, `max_completion_tokens`, `created` timestamps. Groq's own per-model doc pages render their spec tables client-side from this same data ("Loading model information..." in static HTML), so the API is the authoritative equivalent of the per-model pages. |
-| `https://groq.com/pricing` (live fetch) | All input/cached-input/output rates |
-| `https://console.groq.com/docs/prompt-caching` (live fetch) | Caching-supported model list, 50% cached-token discount |
-| `https://console.groq.com/docs/deprecations` (live fetch) | kimi shutdown, qwen3-32b status |
-| `https://console.groq.com/docs/models` + per-model `.md` pages (live fetch) | Featured/flagship positioning, context-window prose, model-card positioning |
-| Groq OpenAPI spec embedded in `console.groq.com/docs/model/*` HTML | `temperature` parameter bounds (`minimum: 0, maximum: 2`) |
-| OpenRouter `GET /api/v1/models/<slug>/endpoints` Groq rows (secondary) | Pricing cross-check, `max_completion_tokens` cross-check |
-| WebSearch (Meta blog coverage, Moonshot K2-0905 announcement coverage) | Upstream release dates |
-
-Rule applied: where Groq's own sources conflict with secondary sources, Groq wins.
-
-## Per-Model Verification
-
-### groq/openai/gpt-oss-120b
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.15 | $0.15/M | groq.com/pricing; OpenRouter Groq row 0.00000015 | OK |
-| pricing.cachedInput | 0.075 | $0.075/M | groq.com/pricing (explicit cached column); prompt-caching doc 50% rule; OpenRouter 0.000000075 | OK (PR #4990 change confirmed) |
-| pricing.output | 0.6 | $0.60/M | groq.com/pricing; OpenRouter | OK |
-| contextWindow | 131072 | 131072 | api.groq.com/openai/v1/models; model card "131K context window" | OK |
-| capabilities.maxOutputTokens | — (absent) | 65536 | api.groq.com/openai/v1/models `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
-| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754408224 → 2025-08-05 UTC | OK |
-| recommended | — (absent) | should be `true` | console.groq.com/docs/models features it as "OpenAI's flagship open-weight language model" (~500 t/s); deprecations page names `openai/gpt-oss-120b` as the recommended replacement (incl. for kimi-k2-instruct-0905) | **FIX: add `recommended: true`** |
-| deprecated | — | active | live API `active: true`; not on deprecations page | OK |
-
-### groq/openai/gpt-oss-20b
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.075 | $0.075/M | groq.com/pricing; OpenRouter | OK |
-| pricing.cachedInput | 0.0375 | $0.0375/M | groq.com/pricing (explicit); OpenRouter 0.0000000375 | OK (PR #4990 confirmed) |
-| pricing.output | 0.3 | $0.30/M | groq.com/pricing; OpenRouter | OK |
-| contextWindow | 131072 | 131072 | Groq API; model card "up to 131K" | OK |
-| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
-| releaseDate | 2025-08-05 | 2025-08-05 | Groq API `created` = 1754407957 → 2025-08-05 UTC | OK |
-| deprecated | — | active | live API; deprecations page | OK |
-
-### groq/openai/gpt-oss-safeguard-20b
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.075 | $0.075/M | groq.com/pricing | OK |
-| pricing.cachedInput | 0.0375 | $0.0375/M | prompt-caching doc lists this model as caching-supported with "50% discount for cached input tokens" → 0.075 × 0.5 = 0.0375. Pricing page shows no cached column for this row; OpenRouter shows $0.037/M (rounding). Groq's caching doc wins. | OK (PR #4990 confirmed) |
-| pricing.output | 0.3 | $0.30/M | groq.com/pricing | OK |
-| contextWindow | 131072 | 131072 | Groq API | OK |
-| capabilities.maxOutputTokens | — | 65536 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 65536** |
-| releaseDate | 2025-10-29 | 2025-10-29 | Groq API `created` = 1761708789 → 2025-10-29 UTC | OK |
-| deprecated | — | active | live API; deprecations page | OK |
-
-### groq/qwen/qwen3-32b
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.29 | $0.29/M | groq.com/pricing; OpenRouter | OK |
-| pricing.cachedInput | — | none on Groq | Not in prompt-caching supported list (gpt-oss ×3 only); no cached column on pricing page. OpenRouter shows a 50% `input_cache_read` ($0.145) — Groq docs win; do not add. | OK (absent) |
-| pricing.output | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK |
-| contextWindow | 131072 | 131072 | Groq API | OK |
-| capabilities.maxOutputTokens | — | 40960 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 40960** |
-| releaseDate | 2025-04-29 | 2025-04-29 | Upstream Qwen3 family launch (field is "first publicly released"). Groq endpoint `created` is 2025-05-28 (when Groq added it) — repo convention uses upstream release. | OK |
-| deprecated | — | **not deprecated** | `active: true` in live API; absent from deprecations page (appears there only as a *replacement* for mistral-saba-24b / qwen-qwq-32b) | OK — confirmed still active (open question f) |
-
-### groq/llama-3.1-8b-instant
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.05 | $0.05/M | groq.com/pricing; OpenRouter | OK |
-| pricing.output | 0.08 | $0.08/M | groq.com/pricing; OpenRouter | OK |
-| pricing.cachedInput | — | none on Groq | Not in caching-supported list; no cached column on pricing page (OpenRouter's $0.025 row not honored — Groq wins) | OK (absent) |
-| contextWindow | 131072 | 131072 | Groq API | OK |
-| capabilities.maxOutputTokens | — | 131072 | Groq API `max_completion_tokens` = 131072 (full window); OpenRouter agrees | **FIX: add 131072** |
-| releaseDate | 2024-07-23 | 2024-07-23 | Meta released Llama 3.1 (8B/70B/405B) on 2024-07-23 (ai.meta.com/blog/meta-llama-3-1, press coverage dated 2024-07-23). Groq API `created` (2023-09-03) is a placeholder shared with whisper entries and predates Llama 3.1 — not meaningful. | OK — verified (open question g) |
-| speedOptimized | — (absent) | should be `true` | Groq's speed-tier "-instant" naming; model card positions it for "Real-Time Applications … requiring instant responses and high throughput"; cheapest text model in the lineup. Matches repo precedent (claude-3-haiku, gemini-2.0-flash). | **FIX: add `speedOptimized: true`** |
-| deprecated | — | active | live API; deprecations page (it is a replacement target, not deprecated) | OK |
-
-### groq/llama-3.3-70b-versatile
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.59 | $0.59/M | groq.com/pricing; OpenRouter | OK |
-| pricing.output | 0.79 | $0.79/M | groq.com/pricing; OpenRouter | OK |
-| contextWindow | 131072 | 131072 | Groq API | OK |
-| capabilities.maxOutputTokens | — | 32768 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 32768** |
-| releaseDate | 2024-12-06 | 2024-12-06 | Groq API `created` = 1733447754 → 2024-12-06 UTC, matching Meta's Llama 3.3 launch day | OK — verified (open question g) |
-| deprecated | — | active | live API; deprecations page (replacement target for several retired models) | OK |
-
-### groq/meta-llama/llama-4-scout-17b-16e-instruct
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing.input | 0.11 | $0.11/M | groq.com/pricing; OpenRouter | OK |
-| pricing.output | 0.34 | $0.34/M | groq.com/pricing; OpenRouter | OK |
-| contextWindow | 131072 | 131072 | Groq API | OK |
-| capabilities.maxOutputTokens | — | 8192 | Groq API `max_completion_tokens`; OpenRouter agrees | **FIX: add 8192** |
-| releaseDate | 2025-04-05 | 2025-04-05 | Groq API `created` = 1743874824 → 2025-04-05 UTC (Meta Llama 4 launch day) | OK |
-| deprecated | — | active | live API; deprecations page | OK |
-
-### groq/moonshotai/kimi-k2-instruct-0905
-
-| Field | Repo value | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | shut down | console.groq.com/docs/deprecations: shutdown **04/15/26**, replacement `openai/gpt-oss-120b`; model entirely absent from the live `/v1/models` response | OK (PR #4990 change confirmed — open question regarding shutdown resolved) |
-| pricing.input | 1.0 | $1.00/M | groq.com/pricing (row still present); OpenRouter | OK |
-| pricing.output | 3.0 | $3.00/M | groq.com/pricing; OpenRouter | OK |
-| pricing.cachedInput | — | conflicting | groq.com/pricing still shows $0.50 cached, but the prompt-caching doc's supported list contains only the 3 gpt-oss models, and the model is removed from the API. Conflicting Groq sources + shut-down model → not added (see "Deliberately not changed"). | OK (absent) |
-| contextWindow | 262144 | 262144 | Moonshot K2-0905 announcement ("context length expanded from 128K to 256K"); Groq model card description "256K context"; OpenRouter Groq row 262144. Live Groq API no longer lists the model. | OK |
-| capabilities.maxOutputTokens | — | 16384 (OpenRouter only) | Only source is OpenRouter; model is gone from Groq's API and its doc-page spec table cannot be rendered. Cannot confirm from Groq's own docs → **skipped** per validation rules. | Not added (unverifiable from Groq) |
-| releaseDate | 2025-09-05 | 2025-09-05 | Moonshot AI announced K2-Instruct-0905 on September 5, 2025 (aibase coverage; simonwillison.net 2025-09-06; the `0905` suffix) | OK — verified (open question g) |
-| pricing.updatedAt | 2026-04-01 | — | Prices re-checked today and unchanged; model is shut down, so no bump needed | OK |
-
-## Provider-Level Capability: temperature
-
-**Recommendation: add `temperature: { min: 0, max: 2 }` to the groq provider `capabilities`.**
-
-- Groq's OpenAPI spec (embedded in console.groq.com docs pages, chat-completions `temperature`): "What sampling temperature to use, between 0 and 2", `"minimum": 0, "maximum": 2`.
-- `apps/sim/providers/groq/index.ts:82` already forwards it: `if (request.temperature !== undefined) payload.temperature = request.temperature` — so the only thing missing is the capability flag; today Sim hides the temperature slider for every Groq model while the provider would happily accept the value.
-- Precedent: `fireworks` (models.ts:97), `together` (models.ts:113), and `baseten` (models.ts:129) all declare `temperature: { min: 0, max: 2 }` at the provider level for the same OpenAI-compatible 0–2 range.
-
-**Test impact** (`apps/sim/providers/utils.test.ts`):
-- ~line 214: `'groq/meta-llama/llama-4-scout-17b-16e-instruct'` must be removed from the `unsupportedModels` list in the `supportsTemperature` → false test (it will now return `true`; move it to the supported list).
-- ~line 288: `expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()` must change to expect `2` (move into the "range 0-2" group).
-
-## Changes made in this pass
-
-None to `models.ts` (per instructions — doc only). The fix list below is the recommended diff.
-
-1. `groq` provider capabilities: add `temperature: { min: 0, max: 2 }` (+ update the two utils.test.ts assertions above).
-2. `groq/openai/gpt-oss-120b`: `capabilities: {}` → `capabilities: { maxOutputTokens: 65536 }`; add `recommended: true`.
-3. `groq/openai/gpt-oss-20b`: add `maxOutputTokens: 65536`.
-4. `groq/openai/gpt-oss-safeguard-20b`: add `maxOutputTokens: 65536`.
-5. `groq/qwen/qwen3-32b`: add `maxOutputTokens: 40960`.
-6. `groq/llama-3.1-8b-instant`: add `maxOutputTokens: 131072`; add `speedOptimized: true`.
-7. `groq/llama-3.3-70b-versatile`: add `maxOutputTokens: 32768`.
-8. `groq/meta-llama/llama-4-scout-17b-16e-instruct`: add `maxOutputTokens: 8192`.
-
-## Deliberately not changed
-
-- **kimi-k2-instruct-0905 `cachedInput`**: groq.com/pricing still shows $0.50 cached, but the canonical prompt-caching doc's supported-model list is exactly the three gpt-oss models, and the model is shut down (absent from the live API since the 2026-04-15 shutdown). Conflicting Groq sources for a decommissioned model — adding a cached rate would be dead config. Reconciliation: the pricing-page row is residual for a removed model; the caching doc never listed kimi.
-- **kimi-k2-instruct-0905 `maxOutputTokens`**: 16384 is OpenRouter-only; cannot be confirmed from Groq's own docs/API (model removed). Skipped per validation rules.
-- **`cachedInput` on qwen3-32b / llama-3.1-8b-instant**: OpenRouter's Groq endpoints advertise 50% `input_cache_read` rates, but Groq's prompt-caching doc explicitly limits caching support to the three gpt-oss models and the pricing page shows no cached column for them. Groq docs win. Re-check if Groq's promised caching rollout ("more models soon") lands.
-- **All pricing, contextWindow, releaseDate values**: verified correct as-is (including all PR #4990 changes — kimi `deprecated: true`, the three gpt-oss `cachedInput` rates, and `updatedAt: '2026-06-11'` bumps).
-- **kimi `pricing.updatedAt: '2026-04-01'`**: prices unchanged and model shut down; no bump needed.
-- **`defaultModel: 'groq/llama-3.3-70b-versatile'`**: still active and reasonable; changing the default is a product decision, not a validation finding.
-
-## Unverifiable
-
-- **kimi-k2-instruct-0905 `maxOutputTokens` (16384)** — Groq removed the model from its API and the doc page's spec table no longer renders; only OpenRouter attests it.
-- Nothing else: every other field was confirmed against at least one Groq-owned source (live `/v1/models` API, groq.com/pricing, prompt-caching doc, deprecations doc, or embedded OpenAPI spec), with OpenRouter as a corroborating secondary on pricing and token limits.
diff --git a/docs/models/mistral.md b/docs/models/mistral.md
deleted file mode 100644
index 26b236e099..0000000000
--- a/docs/models/mistral.md
+++ /dev/null
@@ -1,305 +0,0 @@
-# Mistral Provider Validation — Final Pass
-
-- **Date:** 2026-06-11
-- **Scope:** All 27 entries of the `mistral` provider block in `apps/sim/providers/models.ts` (lines ~2124–2501), re-verifying everything including the changes landed in PR #4990 (7 deprecations, 8 releaseDate fixes, updatedAt bumps).
-- **Method:** Live fetches of Mistral docs (model overview, model cards, pricing page, prompt-caching guide), direct download + grep of the canonical OpenAPI spec, and — decisively — the **server-side model-card source data** in `mistralai/platform-docs-public` (`src/schema/models/models/*.ts`, shallow-cloned at `main` on 2026-06-11). These TypeScript data files are what docs.mistral.ai renders into the model cards, and they carry `apiNames` (alias mappings), prices, context lengths, release dates, and `deprecationDate`/`retirementDate` metadata that the rendered pages omit. OpenRouter used as the secondary pricing source.
-
-## Sources
-
-| Source | URL |
-|---|---|
-| Models overview | https://docs.mistral.ai/getting-started/models/models_overview |
-| Pricing page | https://mistral.ai/pricing |
-| Model cards | https://docs.mistral.ai/models/model-cards/&lt;slug&gt; (slugs cited per model below) |
-| Model-card source data (authoritative) | https://github.com/mistralai/platform-docs-public — `src/schema/models/models/*.ts` @ `main`, 2026-06-11 |
-| OpenAPI spec | https://raw.githubusercontent.com/mistralai/platform-docs-public/main/openapi.yaml |
-| Prompt caching guide | https://docs.mistral.ai/studio-api/conversations/advanced/prompt-caching |
-| OpenRouter (secondary pricing) | https://openrouter.ai/mistralai/&lt;slug&gt; |
-
-Below, "data file" = the model's source file in `src/schema/models/models/`.
-
----
-
-## Per-model verification
-
-### mistral-large-latest / mistral-large-2512 (Mistral Large 3, 25.12)
-
-Data file: `mistral-large-3-25-12.ts`. Model card: `/models/model-cards/mistral-large-3-25-12`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 per 1M | Data file, model card, pricing page ("Mistral Large 3: $0.5 / $1.5"), OpenRouter `mistral-large-2512` ($0.50/$1.50) | ✓ |
-| contextWindow | 256000 | 256k | Data file `contextLength: '256k'`; OpenRouter shows 262K (same window, binary units) | ✓ |
-| releaseDate | 2025-12-02 | 2025-12-02 | Data file `releaseDate: '2025-12-02'` | ✓ |
-| alias | latest → 2512 | `apiNames: ['mistral-large-2512', 'mistral-large-latest']` | Data file | ✓ |
-| status | active | `status: 'Active'` | Data file | ✓ |
-| temperature | {0, 1} | spec allows {0, **1.5**} | OpenAPI `ChatCompletionRequest.temperature` | ✗ see Changes |
-| recommended | (absent) | provider default, flagship | — | ✗ see Changes |
-
-Note: an initial pricing-page fetch summarized Large 3 as $2/$6; a verbatim re-fetch showed that was a summarization error — the literal row is "$0.5 / $1.5 /M tokens". $2/$6 is the legacy mistral-large-2411 price.
-
-### mistral-small-2603 / mistral-small-latest (Mistral Small 4, 26.03) — CONFLICT RULING
-
-Data file: `mistral-small-4-0-26-03.ts`. Model card: `/models/model-cards/mistral-small-4-0-26-03`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.15 / 0.6 | **$0.15 / $0.6** (ruling below) | Data file (`price: 0.15` / `price: 0.6`), model card, OpenRouter `mistral-small-2603` ($0.15/$0.60) | ✓ KEEP |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2026-03-16 | 2026-03-16 | Data file | ✓ |
-| alias | latest → 2603 | `apiNames: ['mistral-small-2603', 'mistral-small-latest']` | Data file | ✓ |
-| status | active | `status: 'Active'` | Data file | ✓ |
-
-**Ruling on the open price conflict (question a):** mistral.ai/pricing again printed "$0.1 / $0.3" for Mistral Small 4 (verbatim re-fetch, third consistent reading). But three independent confirmations say $0.15/$0.6: (1) the model card, (2) the model-card **source data file** that drives docs billing-side documentation, and (3) OpenRouter's Mistral endpoint, which mirrors what Mistral actually charges resellers. $0.1/$0.3 is exactly the price of the predecessor Mistral Small 3.2 (`mistral-small-2506`, verified below), so the pricing-page row is almost certainly a stale carry-over from Small 3.x, not a price cut. **Final value: 0.15 / 0.6 — no change.** Re-check if the pricing page row persists alongside an official price-cut announcement.
-
-### devstral-2512 / devstral-latest (Devstral 2, 25.12)
-
-Data file: `devstral-2-25-12.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.4 / 2.0 | $0.4 / $2 | Data file, pricing page ("Devstral 2: $0.4 / $2"), OpenRouter `devstral-2512` ($0.40/$2.00) | ✓ |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2025-12-09 | 2025-12-09 | Data file | ✓ |
-| alias | devstral-latest → 2512 | `apiNames: ['devstral-2512', 'devstral-latest', 'devstral-medium-latest']` | Data file | ✓ (note: `devstral-medium-latest` is a third alias we don't list — fine) |
-| status | active | `status: 'Active'` | Data file | ✓ |
-
-### mistral-large-2411 (deprecated)
-
-Data file: `mistral-large-2-1-24-11.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 2.0 / 6.0 | $2.0 / $6.0 | Data file (previously unverifiable — now confirmed) | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2024-11-18 | 2024-11-18 | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
-
-### magistral-medium-latest / magistral-medium-2509
-
-Data file: `magistral-medium-1-2-25-09.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 2.0 / 5.0 | $2.0 / $5.0 | Data file, pricing page ("Magistral Medium: $2 / $5") | ✓ (OpenRouter: not listed — single-family source) |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ |
-| alias | latest → 2509 | `apiNames: ['magistral-medium-2509', 'magistral-medium-latest']` | Data file | ✓ |
-| status | active | `status: 'Active'` | Data file | ✓ |
-
-Note: Magistral is a reasoning model (`output: ['reasoning', 'text']`); see "Deliberately not changed" re `reasoning_effort`.
-
-### magistral-small-latest / magistral-small-2509 (deprecated)
-
-Data file: `magistral-small-1-2-25-09.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.5 / 1.5 | $0.5 / $1.5 | Data file, pricing page | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-09-18 | 2025-09-18 | Data file (PR #4990 fix confirmed) | ✓ |
-| alias | small-latest → 2509 | `apiNames: ['magistral-small-2509', 'magistral-small-latest']` | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31, replacement "Mistral Small 4" | Data file metadata | ✓ |
-
-### mistral-medium-latest / mistral-medium-2508 (Mistral Medium 3.1)
-
-Data file: `mistral-medium-3-1-25-08.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-08-12 | 2025-08-12 | Data file | ✓ |
-| alias | latest → 2508 | `apiNames: ['mistral-medium-2508', 'mistral-medium-latest']` | Data file | ✓ — **`mistral-medium-latest` still maps to 2508, NOT to Medium 3.5** (3.5 has its own apiNames, see below) |
-| status | active | `status: 'Active'` | Data file | ✓ |
-
-### mistral-medium-2505 (Mistral Medium 3)
-
-Data file: `mistral-medium-3-25-05.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-05-07 | 2025-05-07 | Data file | ✓ |
-| status | active (no flag) | `status: 'Active'` — not deprecated despite age | Data file | ✓ |
-
-### mistral-small-2506 (Mistral Small 3.2, deprecated)
-
-Data file: `mistral-small-3-2-25-06.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-06-20 | 2025-06-20 | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-04-30 (past), retirementDate 2026-07-31 | Data file metadata | ✓ |
-
-### open-mistral-nemo
-
-Data file: `mistral-nemo-12b-24-07.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page ("Mistral NeMo: $0.15 / $0.15") | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2024-07-18 | 2024-07-18 | Data file | ✓ |
-| status | active (no flag) | `status: 'Active'` — still active | Data file | ✓ |
-
-### codestral-latest / codestral-2508
-
-Data file: `codestral-25-08.ts`. Model card: `/models/model-cards/codestral-25-08`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.3 / 0.9 | $0.3 / $0.9 | Data file, model card, pricing page, OpenRouter `codestral-2508` ($0.30/$0.90) | ✓ |
-| contextWindow | 128000 | 128k per Mistral docs (data file + live model card). OpenRouter claims 256K — **Mistral docs win**, keep 128000 | Data file, model card | ✓ |
-| releaseDate | 2025-07-30 | 2025-07-30 | Data file | ✓ |
-| alias | latest → 2508 | `apiNames: ['codestral-2508', 'codestral-latest']` | Data file | ✓ |
-| status | active | `status: 'Active'` | Data file | ✓ |
-
-### devstral-small-latest (Devstral Small 2, 25.12, deprecated)
-
-Data file: `devstral-small-2-25-12.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file | ✓ |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2025-12-09 | 2025-12-09 | Data file (PR #4990 fix confirmed) | ✓ |
-| alias | — | `apiNames: ['labs-devstral-small-2512', 'devstral-small-latest']` | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-03-31 (already retired), replacement "Devstral 2" | Data file metadata | ✓ |
-
-### devstral-small-2507 (deprecated)
-
-Data file: `devstral-small-1-1-25-07.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.1 / 0.3 | $0.1 / $0.3 | Data file (previously unverifiable — now confirmed) | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
-
-### devstral-medium-2507 (deprecated)
-
-Data file: `devstral-medium-1-0-25-07.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.4 / 2.0 | $0.4 / $2.0 | Data file (previously unverifiable — now confirmed) | ✓ |
-| contextWindow | 128000 | 128k | Data file | ✓ |
-| releaseDate | 2025-07-10 | 2025-07-10 | Data file | ✓ |
-| deprecated | true | `status: 'Deprecated'`, deprecationDate 2026-02-27, retirementDate 2026-05-31 (already retired) | Data file metadata | ✓ |
-
-### ministral-14b-latest / ministral-14b-2512 (Ministral 3 14B)
-
-Data file: `ministral-3-14b-25-12.ts`. Model card: `/models/model-cards/ministral-3-14b-25-12`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.2 / 0.2 | $0.2 / $0.2 | Data file, pricing page, OpenRouter `ministral-14b-2512` ($0.20/$0.20) | ✓ |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2025-12-02 | 2025-12-02 | Data file | ✓ |
-| alias | latest → 2512 | `apiNames: ['ministral-14b-2512', 'ministral-14b-latest']` | Data file | ✓ |
-| status | active | `status: 'Active'` | Data file | ✓ |
-| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
-
-### ministral-8b-latest / ministral-8b-2512 (Ministral 3 8B)
-
-Data file: `ministral-3-8b-25-12.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.15 / 0.15 | $0.15 / $0.15 | Data file, pricing page | ✓ |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ |
-| alias | latest → 2512 | `apiNames: ['ministral-8b-2512', 'ministral-8b-latest']` | Data file | ✓ |
-| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
-
-### ministral-3b-latest / ministral-3b-2512 (Ministral 3 3B)
-
-Data file: `ministral-3-3b-25-12.ts`.
-
-| Field | Ours | Verified value | Source | Verdict |
-|---|---|---|---|---|
-| pricing input/output | 0.1 / 0.1 | $0.1 / $0.1 | Data file, pricing page | ✓ |
-| contextWindow | 256000 | 256k | Data file | ✓ |
-| releaseDate | 2025-12-02 | 2025-12-02 | Data file (PR #4990 fix confirmed) | ✓ |
-| alias | latest → 2512 | `apiNames: ['ministral-3b-2512', 'ministral-3b-latest']` | Data file | ✓ |
-| speedOptimized | (absent) | edge/low-latency tier | — | ✗ see Changes |
-
----
-
-## Provider-wide checks
-
-### Temperature bounds (question e) — DISCREPANCY FOUND
-
-The live OpenAPI spec's `ChatCompletionRequest.temperature` (openapi.yaml, schema at line 11988, property at 11997) is:
-
-```yaml
-temperature:
-  anyOf:
-  - type: number
-    maximum: 1.5
-    minimum: 0
-```
-
-with the description "we recommend between 0.0 and 0.7". So the chat-completions endpoint — the one Sim's provider calls (`https://api.mistral.ai/v1` + `chat.completions.create`) — accepts **0–1.5, not 0–1**. The 0–1 bound exists in the spec only on `CompletionArgs` (line ~8103), which is the **conversations/agents API**'s white-listed argument schema, not chat completions; that is likely where the earlier "max 1" belief came from. Verdict: our `{min: 0, max: 1}` is overly restrictive — users cannot select 1.0–1.5, which the API supports. Recommended fix: `max: 1.5` on all 27 entries.
-
-### Prompt caching (question b) — NOT WIRED, cachedInput NOT added
-
-- OpenAPI spec: `prompt_cache_key` exists on `ChatCompletionRequest` (line 12134), `FIMCompletionRequest` (12362), and `AgentsCompletionRequest` (13841): "A cache key to enable prompt caching. When provided, the API will attempt to reuse previously computed tokens... Cached tokens are billed at 10% of the standard input token price."
-- Prompt-caching guide confirms caching is **opt-in**: "Set the same `prompt_cache_key` on requests that are likely to share a prefix"; 64-token block granularity; hits reported via `usage.prompt_tokens_details.cached_tokens`.
-- Sim's provider (`apps/sim/providers/mistral/index.ts`) forwards only `temperature` and `max_tokens` (plus messages/tools/response_format). It does **not** send `prompt_cache_key`, so no Sim request can ever produce cached tokens.
-
-**Ruling: caching is opt-in, Sim does not opt in → adding `cachedInput` would be dead data. Not changed.** Recommended follow-up: wire `prompt_cache_key` in the Mistral provider (e.g. keyed per workflow execution/conversation), read `usage.prompt_tokens_details.cached_tokens`, then add `cachedInput = 0.1 × input` to all active entries (large 0.05, small 0.015, devstral 0.04, magistral-medium 0.2, medium 0.04, nemo 0.015, codestral 0.03, ministral-14b 0.02, ministral-8b 0.015, ministral-3b 0.01).
-
-### recommended / speedOptimized (question c) — BOTH JUSTIFIED
-
-- `recommended: true` on **mistral-large-latest**: it is the provider's `defaultModel`, Mistral's flagship generalist (Large 3), actively maintained, and the provider currently has zero recommended entries (every other major provider block marks its flagship). Justified.
-- `speedOptimized: true` on the **ministral tier** (14b/8b/3b, `-latest` and `-2512`, 6 entries): Ministral 3 is Mistral's edge/low-latency family ("les Ministraux" — edge models), the smallest and cheapest tier, directly analogous to the existing `speedOptimized` entries in models.ts (gpt-5-mini-class at line ~369, Haiku at line ~853). Justified.
-
-### Alias map (question g) — ALL CONFIRMED
-
-| Alias | Expected | Data-file `apiNames` | Verdict |
-|---|---|---|---|
-| mistral-large-latest | mistral-large-2512 | ✓ | ✓ |
-| mistral-small-latest | mistral-small-2603 | ✓ | ✓ |
-| codestral-latest | codestral-2508 | ✓ | ✓ |
-| devstral-latest | devstral-2512 | ✓ (also `devstral-medium-latest`) | ✓ |
-| devstral-small-latest | labs-devstral-small-2512 (Devstral Small 2) | ✓ | ✓ |
-| magistral-medium-latest | magistral-medium-2509 | ✓ | ✓ |
-| magistral-small-latest | magistral-small-2509 | ✓ | ✓ |
-| mistral-medium-latest | mistral-medium-2508 (NOT Medium 3.5) | ✓ | ✓ |
-| ministral-14b/8b/3b-latest | ministral-*-2512 | ✓ | ✓ |
-
----
-
-## Changes made in this pass
-
-None to `models.ts` (per instructions, this pass writes only this document). The PR #4990 changes (7 deprecations, 8 releaseDate fixes) are all **confirmed correct** against the model-card source data.
-
-**Recommended fixes (the fix list):**
-
-1. `mistral-large-latest`: add `recommended: true` — provider default + flagship; provider has zero recommended entries.
-2. `ministral-14b-latest`, `ministral-14b-2512`, `ministral-8b-latest`, `ministral-8b-2512`, `ministral-3b-latest`, `ministral-3b-2512`: add `speedOptimized: true` — edge/low-latency tier, consistent with gpt-mini/haiku precedent.
-3. All 27 entries: `capabilities.temperature.max` 1 → **1.5** — OpenAPI `ChatCompletionRequest.temperature.maximum: 1.5`. (The 0–1 bound belongs to the conversations-API `CompletionArgs`, not chat completions. If the team prefers to cap the UI at Mistral's recommended sampling range instead of the API bound, keep 1 — but then document that choice; it does not match the endpoint Sim calls.)
-
-## Deliberately not changed
-
-- **mistral-small-2603 / mistral-small-latest pricing stays 0.15/0.6** — final ruling on the standing conflict: model card + model-card source data + OpenRouter all say $0.15/$0.6; only the marketing pricing page says $0.1/$0.3, which exactly equals the predecessor Small 3.2 price and is judged a stale row, not a price cut.
-- **No `cachedInput` on any entry** — Mistral caching is opt-in via `prompt_cache_key` and Sim's provider does not send it; adding prices would be dead data. Requires provider wiring first (recommended follow-up above).
-- **`mistral-medium-2505` left active** — `status: 'Active'` in source data, no deprecation metadata despite Medium 3.1/3.5 existing.
-- **`open-mistral-nemo` left active** — still `status: 'Active'`.
-- **codestral contextWindow stays 128000** — OpenRouter claims 256K but both the live model card and the source data say 128k; Mistral docs win.
-- **`updatedAt: '2026-04-01'` left on deprecated entries** — their prices were verified unchanged; only active entries were bumped in PR #4990 and that remains coherent.
-- **Reasoning params not wired** — spec exposes `reasoning_effort` (`high`/`none`) on `ChatCompletionRequest` (line 12119; `prompt_mode` is deprecated in its favor). Sim doesn't forward it, so no capabilities change; note for a future Magistral reasoning integration.
-- **mistral-medium-3-5 NOT added in this pass** (documented as a recommended addition, question d): Mistral Medium 3.5 — `apiNames: ['mistral-medium-3-5', 'mistral-medium-3']`, released **2026-04-28**, **$1.5 / $7.5** per 1M (data file `mistral-medium-3-5-26-04.ts` + pricing page agree), **256k** context, Active, "frontier-class multimodal model optimized for agentic and coding". Matches existing `/^mistral/` modelPattern, so adding the entry is sufficient. Note its id does not follow the `-MMYY` convention — both apiNames could be listed if desired.
-
-## Unverifiable
-
-Nothing remains strictly unverifiable. The four previously-unverifiable legacy prices (mistral-large-2411 2.0/6.0, mistral-small-2506 0.1/0.3, devstral-small-2507 0.1/0.3, devstral-medium-2507 0.4/2.0) are now **confirmed** via the model-card source data files. Caveats:
-
-- `magistral-medium-2509` pricing has no independent second source (not listed on OpenRouter); verified only within the Mistral doc family (data file + pricing page, which agree).
-- The Mistral Small 4 pricing-page row ($0.1/$0.3) remains in live contradiction with the model card; ruling above. Re-check on the next pass.
diff --git a/docs/models/openai.md b/docs/models/openai.md
deleted file mode 100644
index a1d81edb5b..0000000000
--- a/docs/models/openai.md
+++ /dev/null
@@ -1,338 +0,0 @@
-# OpenAI Provider Block — Final Validation & Justification
-
-**Validation date:** 2026-06-11
-**Scope:** `openai` provider block in `apps/sim/providers/models.ts` (23 models), including changes landed in PR #4990.
-**Method:** Live WebFetch of every individual model page on `developers.openai.com/api/docs/models/<id>`, the pricing page, the reasoning guide, the GPT-5.5 usage guide, the deprecations page, and the API reference; secondary pricing cross-checks against OpenRouter. All claims below were fetched live this session. Provider docs win over secondary sources.
-
-**Sources:**
-
-- Pricing: https://developers.openai.com/api/docs/pricing (only lists current gpt-5.5/5.4 families; per-model pricing taken from individual model pages)
-- Model pages: `https://developers.openai.com/api/docs/models/<model-id>` (fetched for all 23 ids)
-- Reasoning guide: https://developers.openai.com/api/docs/guides/reasoning
-- GPT-5.5 usage guide: https://developers.openai.com/api/docs/guides/latest-model
-- Deprecations: https://developers.openai.com/api/docs/deprecations
-- GPT-5.5 launch: https://openai.com/index/introducing-gpt-5-5/ (via search; release 2026-04-23, API availability 2026-04-24)
-- Secondary pricing: https://openrouter.ai/openai/gpt-5.5, /gpt-5.5-pro, /gpt-5.4, /gpt-5.2, /o3, /gpt-4o — all consistent with provider docs
-
-**Flag consumption check** (`rg` over `apps/sim/providers/openai/`): `reasoningEffort` and `verbosity` are consumed in `apps/sim/providers/openai/core.ts` (sent as `reasoning.effort` / `text.verbosity` on the Responses API). `nativeStructuredOutputs` is NOT consumed by the provider runtime — its only consumer is the landing models page (`apps/sim/app/(landing)/models/utils.ts`), so it is display-only metadata. `thinking` / `computerUse` are not used by the OpenAI provider.
-
-Pricing is USD per 1M tokens throughout. "MP" = the model's own docs page (`developers.openai.com/api/docs/models/<id>`).
-
----
-
-## Per-model verification
-
-### gpt-4.1
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 2.0 / 0.5 / 8.0 | MP gpt-4.1 | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ verified today |
-| contextWindow | 1,047,576 | MP: "1,047,576 tokens" | ✓ verified |
-| maxOutputTokens | 32,768 | MP | ✓ verified |
-| temperature 0–2 | present | non-reasoning chat model; standard OpenAI sampling range | ✓ correct by convention (docs do not enumerate the range; 0–2 is the API-wide bound) |
-| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-2025-04-14` | ✓ verified |
-| deprecated | absent | deprecations page does not list gpt-4.1 base | ✓ verified active ("Default", "Smartest non-reasoning model") |
-
-### gpt-4.1-mini
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.4 / 0.1 / 1.6 | MP gpt-4.1-mini | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified |
-| temperature 0–2 | present | convention (non-reasoning) | ✓ |
-| releaseDate | 2025-04-14 | MP snapshot `gpt-4.1-mini-2025-04-14` | ✓ verified |
-| deprecated | absent | not on deprecations page | ✓ verified |
-
-### gpt-4.1-nano
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.1 / 0.025 / 0.4 | MP gpt-4.1-nano | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 1,047,576 / 32,768 | MP | ✓ verified |
-| temperature 0–2 | present | convention | ✓ |
-| releaseDate | 2025-04-14 | MP (snapshot `gpt-4.1-nano-2025-04-14`, now marked deprecated) | ✓ verified |
-| deprecated | **absent — should be `true`** | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-nano; MP also recommends "starting with GPT-5 nano" | **FIX: add `deprecated: true`** |
-
-### gpt-5.5-pro
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / output | 30.0 / 180.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
-| cachedInput | absent | MP: "GPT-5.5 Pro does not offer a cached input discount" | ✓ verified correct omission |
-| updatedAt | **2026-04-23 — stale** | pricing re-verified 2026-06-11 this session | **FIX: bump to 2026-06-11** (PR #4990 claimed to bump all entries but missed this one) |
-| contextWindow | 1,050,000 | MP: "1,050,000 context window" | ✓ verified |
-| maxOutputTokens | 128,000 | MP | ✓ verified |
-| nativeStructuredOutputs | true | MP: "Structured outputs: Supported" | ✓ verified (display-only flag) |
-| reasoningEffort | **['none','low','medium','high','xhigh'] — wrong** | see Open Question (a) below | **FIX: change to `['medium','high','xhigh']`** |
-| verbosity | **present — should be removed** | see Open Question (b) below | **FIX: remove `verbosity` block** |
-| releaseDate | 2026-04-23 | MP snapshot `gpt-5.5-pro-2026-04-23` | ✓ verified |
-| deprecated | absent | no deprecation notes on MP | ✓ verified |
-
-**Open Question (a) — resolved.** The gpt-5.5-pro model page does NOT enumerate reasoning effort values (fetched twice, explicitly asked for any sentence containing "effort" — the page contains no `reasoning.effort` enumeration). The reasoning guide says values are model-dependent and "check the relevant model page". Direct documentation for the siblings is explicit: gpt-5.4-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5.2-pro MP — "supports reasoning.effort: medium, high, xhigh"; gpt-5-pro MP — "defaults to (and only supports) reasoning.effort: high". Every pro-tier model that documents the parameter excludes `none` and `low` — the pro tier exists to "use more compute to think harder" (gpt-5.5-pro MP), making `none`/`low` incoherent with the product. The most defensible value set is **`['medium','high','xhigh']`**, matching both documented pro siblings. The current `['none','low','medium','high','xhigh']` appears copied from non-pro gpt-5.5 and is backed by no source.
-
-**Open Question (b) — resolved.** Not documented. The gpt-5.5-pro page does not mention `verbosity` (explicitly checked). No pro-tier model page (gpt-5.4-pro, gpt-5.2-pro, gpt-5-pro) documents verbosity, and the GPT-5.5 usage guide discusses `text.verbosity` only for gpt-5.5. Since `verbosity` is runtime-consumed (`core.ts` sends `text.verbosity` to the API), advertising it on a model that may reject it is a live failure risk. **Remove the verbosity block from gpt-5.5-pro.**
-
-### gpt-5.5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 5.0 / 0.5 / 30.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
-| updatedAt | **2026-04-23 — stale** | re-verified 2026-06-11 | **FIX: bump to 2026-06-11** (missed by PR #4990) |
-| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
-| nativeStructuredOutputs | true | MP: structured outputs supported | ✓ verified |
-| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none, low, medium (default), high and xhigh" | ✓ verified verbatim |
-| verbosity ['low','medium','high'] | present | GPT-5.5 usage guide documents `text.verbosity` (recommends `low` for concise) | ✓ verified |
-| releaseDate | 2026-04-23 | announcement 2026-04-23 (openai.com/index/introducing-gpt-5-5/, TechCrunch); pro sibling snapshot is `-2026-04-23` | ✓ verified (note: API availability was 2026-04-24; snapshot naming uses 04-23) |
-| recommended | true | flagship per OpenAI ("latest GPT-5.5" is the recommended upgrade target on gpt-5.2/gpt-5/o3 pages) | ✓ intentional, docs-consistent |
-
-### gpt-5.4-pro
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / output | 30.0 / 180.0 | MP + pricing page | ✓ verified (note: MP — ">272K input tokens are priced at 2x input and 1.5x output"; the flat-rate model in `models.ts` cannot express this; under-bills long-context pro calls — pre-existing limitation, see Unverifiable/limitations) |
-| cachedInput | absent | pricing page shows no cached rate for pro | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim |
-| verbosity | absent | not documented for pro | ✓ correct omission |
-| releaseDate | 2026-03-05 | gpt-5.4 snapshot `gpt-5.4-2026-03-05`; same launch | ✓ verified |
-| deprecated | absent | none | ✓ |
-
-### gpt-5.4
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 2.5 / 0.25 / 15.0 | MP + pricing page + OpenRouter | ✓ verified (two sources) |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 1,050,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "Reasoning.effort supports: none (default), low, medium, high and xhigh" | ✓ verified verbatim |
-| verbosity ['low','medium','high'] | present | not on MP; carried from GPT-5-line `text.verbosity` parameter (documented in usage guide / help center for the GPT-5 family) | ✓ kept — see "Deliberately not changed" |
-| releaseDate | 2026-03-05 | MP snapshot `gpt-5.4-2026-03-05` | ✓ verified |
-
-### gpt-5.4-mini
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.75 / 0.075 / 4.5 | MP + pricing page | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['none','low','medium','high','xhigh'] | present | gpt-5.4 family per search-confirmed docs: "gpt-5.4, gpt-5.4-mini, and gpt-5.4-nano support none, low, medium, high, and xhigh" | ✓ verified |
-| verbosity | present | family convention | ✓ kept |
-| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-mini-2026-03-17` | ✓ verified |
-
-### gpt-5.4-nano
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.2 / 0.02 / 1.25 | MP + pricing page | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort / verbosity | as gpt-5.4-mini | same family docs | ✓ verified / kept |
-| releaseDate | 2026-03-17 | MP snapshot `gpt-5.4-nano-2026-03-17` | ✓ verified |
-| speedOptimized | true | MP: "cheapest GPT-5.4-class model", optimized for classification/extraction/sub-agents | ✓ intentional repo flag, consistent with docs |
-
-### gpt-5.2-pro
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / output | 21.0 / 168.0 | MP | ✓ verified |
-| cachedInput | absent | MP shows none | ✓ |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['medium','high','xhigh'] | present | MP: "supports reasoning.effort: medium, high, xhigh" | ✓ verified verbatim |
-| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-pro-2025-12-11` | ✓ verified |
-| deprecated | absent | MP recommends upgrading to gpt-5.5-pro but no shutdown date on deprecations page | ✓ verified (soft-superseded, not deprecated) |
-
-### gpt-5.2
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.75 / 0.175 / 14.0 | MP + OpenRouter | ✓ verified (two sources) |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['none','low','medium','high','xhigh'] | present | MP: "none (default), low, medium, high and xhigh" | ✓ verified verbatim |
-| verbosity | present | family convention | ✓ kept |
-| releaseDate | 2025-12-11 | MP snapshot `gpt-5.2-2025-12-11` | ✓ verified |
-| deprecated | absent | superseded by 5.5 but no shutdown (only `gpt-5.2-chat-latest` has one) | ✓ verified |
-
-### gpt-5.1
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['none','low','medium','high'] | present | MP: "Reasoning.effort supports: none (default), low, medium, and high" (no xhigh) | ✓ verified verbatim |
-| verbosity | present | family convention | ✓ kept |
-| releaseDate | **2025-11-12** | MP snapshot is `gpt-5.1-2025-11-13` | **FIX: → 2025-11-13.** Repo convention everywhere else in this block is snapshot date (gpt-5-pro 10-06, gpt-5.2 12-11, gpt-4.1 04-14, o3-pro 06-10, …). 2025-11-12 is the announcement date; the API snapshot is 11-13 |
-
-### gpt-5-pro
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / output | 15.0 / 120.0 | MP | ✓ verified |
-| cachedInput | absent | MP shows none | ✓ |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow | 400,000 | MP | ✓ verified |
-| maxOutputTokens | 272,000 | MP: "272,000 max output tokens" | ✓ verified (yes, it really is larger than the rest of the family) |
-| reasoningEffort ['high'] | present | MP: "defaults to (and only supports) `reasoning.effort: high`" | ✓ verified verbatim |
-| releaseDate | 2025-10-06 | MP snapshot `gpt-5-pro-2025-10-06` | ✓ verified — **PR #4990's change confirmed correct** |
-| deprecated | absent | none listed | ✓ |
-
-### gpt-5
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort ['minimal','low','medium','high'] | present | MP: "minimal, low, medium, and high"; reasoning guide confirms `minimal` introduced with GPT-5 | ✓ verified verbatim |
-| verbosity | present | verbosity launched with GPT-5 | ✓ verified |
-| releaseDate | 2025-08-07 | MP snapshot `gpt-5-2025-08-07` | ✓ verified |
-| deprecated | absent | MP: "We recommend using the latest GPT-5.5" but no shutdown date — deprecations page: "not explicitly listed as deprecated" | ✓ verified (superseded, not deprecated) |
-
-### gpt-5-mini
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.25 / 0.025 / 2.0 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort / verbosity | gpt-5 family values | GPT-5 family launch docs | ✓ verified |
-| releaseDate | 2025-08-07 | MP snapshot `gpt-5-mini-2025-08-07` | ✓ verified |
-
-### gpt-5-nano
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 0.05 / 0.005 / 0.4 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 400,000 / 128,000 | MP | ✓ verified |
-| reasoningEffort / verbosity | gpt-5 family values | family docs | ✓ verified |
-| releaseDate | 2025-08-07 | MP snapshot `gpt-5-nano-2025-08-07` | ✓ verified |
-
-### gpt-5-chat-latest
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.25 / 0.125 / 10.0 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified |
-| temperature 0–2 | present | non-reasoning chat snapshot | ✓ convention |
-| releaseDate | 2025-08-07 | GPT-5 launch snapshot | ✓ verified |
-| deprecated | true | **deprecations page: shutdown 2026-07-23, replacement gpt-5.5** | ✓ verified — **PR #4990's change confirmed correct and now formally docs-backed** |
-
-### o4-mini
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.1 / 0.275 / 4.4 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
-| reasoningEffort ['low','medium','high'] | present | see Open Question (c) below | ✓ verified |
-| releaseDate | 2025-04-16 | MP snapshot `o4-mini-2025-04-16` | ✓ verified |
-| deprecated | true | deprecations page: shutdown **2026-10-23**, replacement gpt-5.4-mini; MP: snapshot Deprecated, "succeeded by GPT-5 mini" | ✓ verified — **PR #4990's change confirmed correct** |
-
-### o3-pro
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / output | 20.0 / 80.0 | MP | ✓ verified |
-| cachedInput | absent | MP shows none | ✓ |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
-| reasoningEffort | absent | MP: "Reasoning: Highest", no effort enum documented (pro pattern: fixed high effort) | ✓ correct omission |
-| releaseDate | 2025-06-10 | MP snapshot `o3-pro-2025-06-10` | ✓ verified |
-| deprecated | absent | deprecations page does not list o3-pro (only o3/o3-mini) | ✓ verified — note the oddity that base o3 is scheduled for shutdown while o3-pro is not; evidence-based, leave as is |
-
-### o3
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 2 / 0.5 / 8 | MP + OpenRouter ($2/$8) | ✓ verified (two sources) |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
-| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified |
-| releaseDate | 2025-04-16 | MP snapshot `o3-2025-04-16` | ✓ verified |
-| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5-pro; MP: "superseded by GPT-5" | **FIX: add `deprecated: true`** |
-
-### o3-mini
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 1.1 / 0.55 / 4.4 | MP (note: cachedInput 0.55 differs from o4-mini's 0.275 — both verified correct per their MPs) | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
-| reasoningEffort ['low','medium','high'] | present | o3-mini launch post: "three reasoning effort options—low, medium, and high" | ✓ verified explicitly |
-| releaseDate | 2025-01-31 | MP snapshot `o3-mini-2025-01-31` | ✓ verified |
-| deprecated | **absent — should be `true`** | **deprecations page: shutdown 2026-10-23**, replacement gpt-5.5; MP: snapshot marked deprecated | **FIX: add `deprecated: true`** |
-
-### o1
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 15.0 / 7.5 / 60 | MP | ✓ verified |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 200,000 / 100,000 | MP | ✓ verified |
-| reasoningEffort ['low','medium','high'] | present | Open Question (c) | ✓ verified |
-| releaseDate | **2024-12-05** | MP snapshot is `o1-2024-12-17` | **FIX (minor): → 2024-12-17** for snapshot-date consistency. 2024-12-05 is the ChatGPT launch; the API snapshot (the convention used by every other entry in this block) is 12-17 |
-| deprecated | **absent — recommend `true`** | MP: sole snapshot `o1-2024-12-17` explicitly "Deprecated"; described as "Previous full o-series reasoning model". Base alias not on the deprecations shutdown table (only o1-preview/o1-mini, already shut down) | **FIX (recommended): add `deprecated: true`** — weaker evidence than o3/o3-mini (no shutdown date for the alias), but its only snapshot is deprecated and every other o-series peer is deprecated |
-
-**Open Question (c) — resolved.** The current model pages no longer enumerate `reasoning_effort` for the o-series, and the Responses API reference page content does not surface the enum inline. The reasoning guide states: "Supported values are model-dependent and can include `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`... check the relevant model page." Best available evidence: (1) o3-mini launch post (openai.com/index/openai-o3-mini/) explicitly: "three reasoning effort options—low, medium, and high"; (2) the API changelog notes `reasoning_effort` was added for o1 models with those three values; (3) `none`/`minimal`/`xhigh` were introduced with the GPT-5 line and were never back-ported to o-series. **`['low','medium','high']` for o1, o3, o3-mini, o4-mini is confirmed — no change.**
-
-### gpt-4o
-
-| Field | Value | Source | Verdict |
-|---|---|---|---|
-| input / cachedInput / output | 2.5 / 1.25 / 10.0 | MP + OpenRouter ($2.50/$10) | ✓ verified (two sources) |
-| updatedAt | 2026-06-11 | this validation | ✓ |
-| contextWindow / maxOutputTokens | 128,000 / 16,384 | MP | ✓ verified |
-| temperature 0–2 | present | convention | ✓ |
-| releaseDate | 2024-05-13 | MP snapshot `gpt-4o-2024-05-13`; OpenRouter "released May 13, 2024" | ✓ verified |
-| deprecated | true | see Open Question (d) | ✓ verified — and now docs-backed |
-
-**Open Question (d) — resolved, better than expected.** The brief said gpt-4o is "active per OpenAI" and `deprecated: true` is a deliberate steering decision. The live deprecations page now shows **gpt-4o: shutdown 2026-10-23, replacement gpt-5.5**. So `deprecated: true` is no longer just an intentional product deviation — it is officially correct. Keep, no caveat needed.
-
----
-
-## Open Question (e) — `defaultModel: 'gpt-4.1'`
-
-OpenAI's flagship is gpt-5.5 (announcement 2026-04-23; the gpt-5.2/gpt-5/o3 pages all point at "the latest GPT-5.5"). gpt-4.1 remains active (it is OpenAI's "smartest non-reasoning model" and is not on the deprecations page), so the current default is not broken — it is a cheap, fast, temperature-supporting non-reasoning default, which is a defensible UX choice for new blocks. **Recommendation:** consider `defaultModel: 'gpt-5.5'` (or `gpt-5.4-mini` for a cost-conscious reasoning default) to match the flagship, but this is a **product decision**, not a correctness fix — not included in the machine-applyable list.
-
----
-
-## Changes made in this pass (recommended to apply now)
-
-1. **gpt-5.5-pro** — `reasoningEffort.values`: `['none','low','medium','high','xhigh']` → `['medium','high','xhigh']`. Undocumented on its own page; both documented pro siblings (gpt-5.4-pro, gpt-5.2-pro) enumerate exactly `medium, high, xhigh`; pro tier semantics exclude none/low. Sending `reasoning.effort: 'none'` to a pro model risks a 400.
-2. **gpt-5.5-pro** — remove the `verbosity` block. Not documented for any pro model; the provider sends `text.verbosity` at runtime, so advertising it is a live API-error risk.
-3. **gpt-5.5-pro** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (re-verified today; PR #4990 missed this entry despite claiming an all-entry bump).
-4. **gpt-5.5** — `pricing.updatedAt`: `2026-04-23` → `2026-06-11` (same).
-5. **o3** — add `deprecated: true` (official shutdown 2026-10-23).
-6. **o3-mini** — add `deprecated: true` (official shutdown 2026-10-23).
-7. **gpt-4.1-nano** — add `deprecated: true` (official shutdown 2026-10-23, replacement gpt-5.4-nano).
-8. **o1** — add `deprecated: true` (sole snapshot `o1-2024-12-17` marked Deprecated; "previous" o-series model; recommended, slightly weaker evidence).
-9. **gpt-5.1** — `releaseDate`: `2025-11-12` → `2025-11-13` (snapshot `gpt-5.1-2025-11-13`; snapshot-date convention).
-10. **o1** — `releaseDate`: `2024-12-05` → `2024-12-17` (snapshot `o1-2024-12-17`; snapshot-date convention; minor).
-
-## Deliberately not changed
-
-- **gpt-4o `deprecated: true`** — originally an intentional steering decision; now officially correct (shutdown 2026-10-23). Keep.
-- **gpt-5-chat-latest / o4-mini `deprecated: true`** (PR #4990) — both confirmed by the deprecations page (2026-07-23 and 2026-10-23 shutdowns). Keep.
-- **`defaultModel: 'gpt-4.1'`** — product decision; gpt-4.1 is active. Flagged for product review (gpt-5.5 is the flagship), not a correctness fix.
-- **`verbosity` on non-pro gpt-5.x models (gpt-5.4/-mini/-nano, gpt-5.2, gpt-5.1, gpt-5 family)** — current model pages don't enumerate it per-model, but `text.verbosity` is a documented GPT-5-line parameter (GPT-5 launch; GPT-5.5 usage guide; OpenAI help center) and the provider has been sending it without errors. Keep.
-- **`temperature {0,2}` on gpt-4.1 family, gpt-4o, gpt-5-chat-latest** — model pages don't state sampling ranges; 0–2 is the documented API-wide range for non-reasoning chat models. Correct by convention.
-- **o3-pro not deprecated** — the deprecations page lists o3 and o3-mini but not o3-pro. Odd but evidence-based; leave.
-- **gpt-5.2 / gpt-5 / gpt-5.2-pro not deprecated** — docs say "superseded / recommend GPT-5.5" but list no shutdown; superseded ≠ deprecated. Leave.
-- **`recommended: true` on gpt-5.5 and `speedOptimized: true` on gpt-5.4-nano** — repo-internal flags, consistent with docs positioning.
-- **o3-mini `cachedInput: 0.55` vs o4-mini `0.275`** — looks like a typo but both verified correct on their respective model pages.
-
-## Unverifiable / known limitations
-
-- **gpt-5.5-pro effort values** — no official enumeration exists anywhere fetched (model page, reasoning guide, usage guide, OpenRouter). The `['medium','high','xhigh']` recommendation is an inference from documented siblings — the strongest available evidence, but flagged as not directly documented. If OpenAI later publishes the enum, re-verify.
-- **gpt-5.4-pro long-context surcharge** — MP states prompts >272K input tokens bill at 2x input / 1.5x output. The flat `pricing` shape in `models.ts` cannot represent tiered pricing; cost estimates for very long pro prompts will be low. Pre-existing schema limitation, out of scope here.
-- **gpt-5.5 release date 04-23 vs API availability 04-24** — announcement and snapshot say 2026-04-23; press coverage says API access opened 2026-04-24. Kept 2026-04-23 (snapshot wins).
-- **Verbosity enum per non-flagship model** — `['low','medium','high']` is documented at the parameter level, not re-enumerated on each model page.
-- **`nativeStructuredOutputs`** — only gpt-5.5/gpt-5.5-pro carry it though most listed models support structured outputs; flag is display-only (landing page), so under-reporting is cosmetic, not functional. Left as is.
diff --git a/docs/models/vertex.md b/docs/models/vertex.md
deleted file mode 100644
index 8e8da6ed1a..0000000000
--- a/docs/models/vertex.md
+++ /dev/null
@@ -1,212 +0,0 @@
-# Vertex AI provider — model validation (`models.ts` lines ~1487–1685)
-
-- **Date:** 2026-06-11 (final exhaustive pass, re-verifying PR #4990 changes)
-- **Method:** Live WebFetch of Google pricing/model/changelog pages; Google Cloud doc pages render nav-only to fetchers, so Vertex-specific specs were verified via Context7 MCP (`/websites/cloud_google_vertex-ai`, `/websites/cloud_google_gemini-enterprise-agent-platform`) and WebSearch fallback, per the validate-model skill. Two-source rule applied to pricing (Vertex pricing page + Gemini API pricing page / OpenRouter / CloudPrice).
-- **Primary sources:**
-  - https://cloud.google.com/vertex-ai/generative-ai/pricing (rendered fully — all pricing below)
-  - https://ai.google.dev/gemini-api/docs/pricing (cross-check; global-endpoint prices identical for 2.5/3.x)
-  - https://ai.google.dev/gemini-api/docs/models/gemini-3.5-flash, …/gemini-3.1-pro-preview, …/gemini-3.1-flash-lite, …/gemini-3-flash-preview, …/gemini-2.5-pro (token limits)
-  - https://ai.google.dev/gemini-api/docs/thinking (thinking levels/defaults)
-  - https://ai.google.dev/gemini-api/docs/changelog (lifecycle dates)
-  - https://deepmind.google/models/model-cards/gemini-3-5-flash/ (3.5 Flash card)
-  - Vertex docs via Context7: `…/models/gemini/2-5-pro` ("maximum output token limit of 65,535"), `…/migrate/migrate-palm-to-gemini`, `…/learn/model-versioning`, `…/learn/locations`
-  - https://blog.google/technology/developers/deep-research-agent-gemini-api/ (2025-12-11), https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-flash-lite/ (2026-03-03)
-- **Provider implementation:** `apps/sim/providers/vertex/index.ts` contains no capability handling itself — it delegates to `executeGeminiRequest` in `apps/sim/providers/gemini/core.ts`, which consumes `request.thinkingLevel` (core.ts:955–961, sent only when user explicitly selects a level) and `request.maxTokens` (core.ts:934). `thinking`, `temperature`, and `maxOutputTokens` flags are live; the global `maxOutputTokens` fallback is 4096 (models.ts:865), which is why PR #4990 added explicit caps.
-
----
-
-## Per-model validation
-
-### vertex/gemini-3.5-flash
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| id | `gemini-3.5-flash` (GA 2026-05-19) | `gemini-3.5-flash` | ai.google.dev changelog ("Released `gemini-3.5-flash`… GA" 2026-05-19) | ✓ |
-| input | 1.5 | $1.50 (global) | Vertex pricing + Gemini API pricing + OpenRouter | ✓ (3 sources) |
-| cachedInput | 0.15 | $0.15 | Vertex pricing + Gemini API pricing | ✓ |
-| output | 9.0 | $9.00 | Vertex pricing + Gemini API pricing + OpenRouter | ✓ |
-| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.5-flash; DeepMind card "1M" | ✓ |
-| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page ("64K" on DeepMind card) | ✓ |
-| thinking | minimal/low/medium/high, default medium | minimal, low, medium, high; default medium | ai.google.dev/gemini-api/docs/thinking; OpenRouter ("defaults to medium thinking effort") | ✓ |
-| releaseDate | 2026-05-19 | "Published 19 May 2026" | DeepMind model card + changelog | ✓ |
-| recommended | absent | — | google provider entry has `recommended: true` on the same model | 🔵 add (see fixes) |
-
-Note: Vertex introduces **non-global endpoint pricing (+10%: $1.65 / $9.90 / $0.165) effective 2026-07-01**; our entries model global pricing. See operational caveats.
-
-### vertex/gemini-3.1-pro-preview
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| id | `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | ai.google.dev/gemini-api/docs/models/gemini-3.1-pro-preview | ✓ |
-| input | 2.0 | $2 (≤200k); $4 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier; >200k tier not modeled — see caveats) |
-| cachedInput | 0.2 | $0.20 (≤200k); $0.40 (>200k) | same | ✓ |
-| output | 12.0 | $12 (≤200k input); $18 (>200k) | same | ✓ |
-| contextWindow | 1048576 | 1,048,576 | ai.google.dev model page; Vertex release notes "1M token context window" | ✓ |
-| maxOutputTokens | 65536 | 65,536 | ai.google.dev model page | ✓ |
-| thinking | low/medium/high, default high | low, medium, high; default high (Dynamic); **minimal not supported** | ai.google.dev/gemini-api/docs/thinking | ✓ (PR #4990 drop of 'minimal' confirmed correct) |
-| releaseDate | 2026-02-19 | 2026-02-19 | blog.google gemini-3-1-pro; github.blog changelog 2026-02-19 | ✓ |
-
-**Operational caveat (open question f):** Google documents `gemini-3.1-pro-preview` as **global-endpoint-only on Vertex AI** (Vertex `learn/locations` lists it under global-endpoint models; third-party migration guides state regional endpoints don't serve it). `apps/sim/providers/vertex/index.ts:34` resolves location as `request.vertexLocation || env.VERTEX_LOCATION || 'us-central1'` — with the default `us-central1`, requests to this model will fail with model-not-found. Users must set `vertexLocation` / `VERTEX_LOCATION` to `global`. No code change made (per instructions); documented here.
-
-### vertex/gemini-3.1-flash-lite
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| id | `gemini-3.1-flash-lite` (renamed from `-preview` in PR #4990) | stable id `gemini-3.1-flash-lite`; preview id shut down on Gemini API 2026-05-25; Vertex preview-alias discontinuation 2026-07-09 | ai.google.dev changelog ("Released `gemini-3.1-flash-lite`… GA" 2026-05-07; preview "shut down" 2026-05-25); cloud.google.com blog "Gemini 3.1 Flash-Lite is now generally available" | ✓ rename confirmed correct |
-| input | 0.25 | $0.25 (global, text) | Vertex pricing + Gemini API pricing | ✓ |
-| cachedInput | 0.025 | $0.025 | same | ✓ |
-| output | 1.5 | $1.50 | same + blog.google launch post | ✓ |
-| contextWindow | 1048576 | 1,048,576 | ai.google.dev/gemini-api/docs/models/gemini-3.1-flash-lite | ✓ |
-| maxOutputTokens | 65536 | 65,536 | same | ✓ |
-| thinking levels | minimal/low/medium/high | minimal "Supported (Default)", low, medium, high | ai.google.dev/gemini-api/docs/thinking (3.1 Flash-Lite row; the "Not supported" row is 3.1 **Pro**) | ✓ — orchestrator re-fetched the thinking doc and corrected this report's initial misreading |
-| thinking default | 'minimal' | minimal ("Supported (Default)") | same | ✓ |
-| releaseDate | 2026-05-07 | stable GA 2026-05-07 (preview launch was 2026-03-03) | ai.google.dev changelog | ✓ changed this pass to the GA date |
-| speedOptimized | absent | "our most cost-effective model yet", lowest-latency tier | blog.google launch post | 🔵 add (see fixes) |
-
-**Open question (c) resolved:** the preview→stable rename is right (preview already shut down on the Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09). This report initially claimed `minimal` is rejected on 3.1 Flash-Lite — that was a misreading of the thinking-levels table (the "Not supported" cell belongs to 3.1 **Pro**). The orchestrator re-fetched ai.google.dev/gemini-api/docs/thinking, which states for Gemini 3.1 Flash-Lite: minimal "Supported (Default)", plus low/medium/high. The repo's `levels: ['minimal','low','medium','high'], default: 'minimal'` is correct and was left unchanged.
-
-### vertex/gemini-3-pro-preview (deprecated)
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | Gemini API shut down 2026-03-09 (`gemini-3-pro-preview` now aliases `gemini-3.1-pro-preview`); Vertex discontinuation 2026-03-26 | ai.google.dev changelog; Vertex deprecations (via third-party migration guides citing Google's table) | ✓ deprecated:true confirmed correct |
-| pricing 2.0/0.2/12.0 | — | current pricing page no longer lists text Gemini 3 Pro (only "Gemini 3 Pro Image") | cloud.google.com/vertex-ai/generative-ai/pricing | ⚠️ historical values, unverifiable from current page; acceptable on a deprecated entry |
-| contextWindow | 1000000 | launch materials said "1M token context window" | Vertex release notes | ⚠️ 1,000,000 vs sibling models' 1,048,576; left as-is (deprecated) |
-| thinking | low/medium/high, default high | consistent with 3.x Pro line (no minimal) | ai.google.dev/gemini-api/docs/thinking (3.1-pro row) | ✓ |
-| releaseDate | 2025-11-18 | 2025-11-18 | blog.google gemini-3; github.blog 2025-11-18; axios 2025-11-18 | ✓ |
-
-Note: since the id now auto-redirects to 3.1 Pro on Google's side, calls may silently serve 3.1 Pro; `deprecated: true` steering users away is the right call.
-
-### vertex/gemini-3-flash-preview
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| id | `gemini-3-flash-preview` | `gemini-3-flash-preview` | ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview | ✓ |
-| input / cachedInput / output | 0.5 / 0.05 / 3.0 | $0.50 / $0.05 / $3.00 | Vertex pricing + Gemini API pricing + TechCrunch | ✓ |
-| contextWindow | 1048576 (PR #4990 change) | 1,048,576 | ai.google.dev model page | ✓ change confirmed |
-| maxOutputTokens | 65536 | 65,536 | same | ✓ |
-| thinking | minimal/low/medium/high, default high | minimal, low, medium, high; default high (Dynamic) | ai.google.dev/gemini-api/docs/thinking | ✓ |
-| releaseDate | 2025-12-17 | 2025-12-17 | techcrunch.com 2025/12/17; 9to5google 2025/12/17; blog.google | ✓ |
-
-### vertex/gemini-2.5-pro
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| input | 1.25 | $1.25 (≤200k); $2.50 (>200k) | Vertex pricing + Gemini API pricing | ✓ (≤200k tier) |
-| cachedInput | 0.125 | Vertex page displays "$0.13" (rounded); Gemini API exact "$0.125" | both pricing pages | ✓ (0.125 is exact value) |
-| output | 10.0 | $10 (≤200k); $15 (>200k) | same | ✓ |
-| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-pro` (via Context7) + ai.google.dev | ✓ |
-| maxOutputTokens | **65536** | **Vertex: 65,535** ("maximum output token limit of 65,535"); Gemini API page: 65,536 | docs.cloud.google.com/…/models/gemini/2-5-pro (via Context7); ai.google.dev/gemini-api/docs/models/gemini-2.5-pro | ✗ 🟡 — platforms disagree; this is the **Vertex** entry, so Vertex's 65,535 wins |
-| releaseDate | 2025-03-25 | 2.5 Pro Experimental announced 2025-03-25 | blog.google gemini-model-thinking-updates-march-2025; siliconangle 2025/03/25 | ✓ |
-| deprecated | absent | retirement on Vertex extended to **2026-10-16** | Vertex release notes (via gcpstudyhub summary of release-notes) | ✓ correctly NOT deprecated today — see (d) below |
-
-### vertex/gemini-2.5-flash
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| input / cachedInput / output | 0.3 / 0.03 / 2.5 | $0.30 / $0.03 / $2.50 | Vertex pricing + Gemini API pricing | ✓ |
-| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash` (via Context7) | ✓ |
-| maxOutputTokens | **65536** | **Vertex: 65,535** ("default output token limit of 65,535") | docs.cloud.google.com/…/models/gemini/2-5-flash (via Context7); also migrate-palm-to-gemini doc ("2.5 Pro and 2.5 Flash… output context length of 65,535") | ✗ 🟡 |
-| releaseDate | 2025-05-20 | preview launched 2025-04-17 on Gemini API; I/O announcement 2025-05-20/21; Vertex GA June 2025 | ai.google.dev changelog; Google I/O coverage | ⚠️ date is the I/O announcement; preview predates it. Left as-is (convention ambiguity, not a factual error) |
-| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today |
-
-### vertex/gemini-2.5-flash-lite
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| input / cachedInput / output | 0.1 / 0.01 / 0.4 | $0.10 / $0.01 / $0.40 | Vertex pricing + Gemini API pricing | ✓ |
-| contextWindow | 1048576 | 1,048,576 | Vertex `models/gemini/2-5-flash-lite` | ✓ |
-| maxOutputTokens | **65536** | **65,535** | Vertex 2-5-flash-lite doc / Oracle OCI mirror of Google spec (websearch confirmation: "maximum output for Gemini 2.5 Flash-Lite is 65,535 tokens") | ✗ 🟡 |
-| releaseDate | 2025-06-17 | 2.5 family GA + Flash-Lite preview announced 2025-06-17 | cloud.google.com blog "Gemini 2.5 Updates: Flash/Pro GA, SFT, Flash-Lite on Vertex AI" | ✓ |
-| speedOptimized | absent | smallest/fastest 2.5 tier | google provider entry has `speedOptimized: true` (models.ts:1436) | 🔵 add (see fixes) |
-| deprecated | absent | retires 2026-10-16 | as above | ✓ not deprecated today |
-
-### vertex/gemini-2.0-flash (deprecated)
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | discontinued on Vertex **2026-06-01** (model serving + Provisioned Throughput) | github.com/firebase/extensions/issues/2607; Vertex model-versioning doc ("as of March 6, 2026 … only available for existing customers") | ✓ PR #4990 change confirmed |
-| input | **0.1** | **$0.15** (Vertex token-based row, text) | cloud.google.com/vertex-ai/generative-ai/pricing | ✗ 🟡 repo carries Gemini API pricing ($0.10), not Vertex's |
-| output | **0.4** | **$0.60** | same | ✗ 🟡 |
-| cachedInput | 0.025 | not listed on Vertex pricing page (that's the Gemini API cache price) | same | ❓ UNVERIFIED on Vertex |
-| maxOutputTokens | absent (falls back 4096) | 8,192 ("output context length of 8,192 tokens by default") | Vertex migrate-palm-to-gemini doc | 🔵 google entry has 8192; add for parity (low priority, discontinued) |
-| contextWindow | 1048576 | 1,048,576 | same doc | ✓ |
-| releaseDate | 2025-02-05 | GA on Vertex 2025-02-05 | blog.google gemini-model-updates-february-2025; developers.googleblog.com | ✓ |
-
-### vertex/gemini-2.0-flash-lite (deprecated)
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| deprecated | true | discontinued on Vertex 2026-06-01 | same sources as 2.0-flash | ✓ |
-| input / output | 0.075 / 0.3 | $0.075 / $0.30 | Vertex pricing page | ✓ |
-| cachedInput | omitted | none listed | same | ✓ correctly omitted |
-| maxOutputTokens | absent | 8,192 default | Vertex migrate doc | 🔵 parity suggestion (low priority) |
-| releaseDate | 2025-02-25 | preview 2025-02-05; exact 2025-02-25 GA date not found in fetched pages | _attempted: blog.google, Vertex release notes_ | ❓ UNVERIFIED (plausible — GA followed preview by ~3 weeks; deprecated, left as-is) |
-
-### vertex/deep-research-pro-preview-12-2025
-
-| Field | Repo | Live docs | Source | Verdict |
-|---|---|---|---|---|
-| id | `deep-research-pro-preview-12-2025` | Vertex pricing page has a "Gemini Deep Research Agent" row but no id; id appears on third-party Vertex trackers (CloudPrice `vertex_ai/deep-research-pro-preview-12-2025`); Gemini API changelog confirms Deep Research Agent preview launch 2025-12-11 but its docs now list `deep-research-preview-04-2026` / `deep-research-max-preview-04-2026` | cloud.google.com pricing; cloudprice.net; ai.google.dev/gemini-api/docs/deep-research + changelog | ⚠️ id verified only via secondary sources; **no announced shutdown of the 12-2025 id** — but Google has shipped 04-2026 successors on the Gemini API (watch item) |
-| input | 2.0 | $2 | Vertex pricing page "Gemini Deep Research Agent" + CloudPrice | ✓ (open question a: pricing confirmed) |
-| cachedInput | 0.2 | $0.20 | Vertex pricing page (CloudPrice omits cached) | ✓ |
-| output | 12.0 | $12 | Vertex pricing page + CloudPrice | ✓ (PR #4990 output 12.0 confirmed) |
-| contextWindow | 1048576 | **conflict**: CloudPrice says "66K tokens" context / "33K tokens" max output; underlying model is Gemini 3 Pro (1M ctx); no Google doc states the agent's window; launch blog only says it "handles large context gracefully" | cloudprice.net/models/vertex_ai/deep-research-pro-preview-12-2025; blog.google deep-research post; ai.google.dev/gemini-api/docs/deep-research (lists no token limits for any version) | ❓ UNVERIFIED — conflict NOT resolvable from Google docs (they publish no limits for the agent). 1048576 is an inference from the Gemini 3 Pro core; CloudPrice's 66K/33K (≈65,536/32,768) may reflect the agent's actual per-task envelope |
-| maxOutputTokens | 65536 | no Google figure; CloudPrice says 33K | same | ❓ UNVERIFIED |
-| capabilities deepResearch / memory:false | true / false | it is a managed autonomous research agent; multi-turn memory not offered in preview | blog.google + ai.google.dev/gemini-api/docs/deep-research | ✓ reasonable |
-| releaseDate | 2025-12-11 | "Published December 11, 2025"; changelog: "Launched the Gemini Deep Research Agent in preview" 2025-12-11 | blog.google deep-research-agent-gemini-api; ai.google.dev changelog | ✓ |
-
----
-
-## Changes made in this pass (PR #4990) — re-verification verdicts
-
-| PR #4990 change | Verdict |
-|---|---|
-| Rename `vertex/gemini-3.1-flash-lite-preview` → `vertex/gemini-3.1-flash-lite` | ✓ correct — stable id GA 2026-05-07; preview shut down on Gemini API 2026-05-25; Vertex alias discontinues 2026-07-09 |
-| Drop `'minimal'` from 3.1-pro-preview thinking.levels | ✓ correct — thinking docs: minimal "Not supported" on 3.1 Pro |
-| `deprecated: true` on gemini-3-pro-preview | ✓ correct — shut down (Gemini API 2026-03-09; Vertex 2026-03-26) |
-| `deprecated: true` on both 2.0 models | ✓ correct — discontinued 2026-06-01 |
-| deep-research output → 12.0, cachedInput 0.2 | ✓ correct — Vertex pricing page row |
-| deep-research ctx 1048576 + maxOutputTokens 65536 | ❓ remains unverifiable; CloudPrice conflict (66K/33K) unresolved — Google publishes no limits for the agent |
-| maxOutputTokens 65536 on 3.5-flash / 3.1-pro / 3.1-flash-lite / 3-flash | ✓ correct — all four documented at 65,536 on their Gemini API model pages |
-| maxOutputTokens 65536 on 2.5-pro / 2.5-flash / 2.5-flash-lite | ✗ off-by-one for Vertex — Vertex docs say **65,535** (Gemini API pages say 65,536; platforms genuinely disagree; Vertex entry should carry the Vertex value) |
-| gemini-3-flash-preview ctx → 1048576 | ✓ correct |
-| updatedAt bumps to 2026-06-11 | ✓ all pricing values verified current today |
-
-## Recommended fixes (final disposition)
-
-Rejected by orchestrator re-verification (not applied):
-1. ~~`vertex/gemini-3.1-flash-lite` thinking.levels / default change~~ — the thinking doc confirms minimal IS supported and is the default on 3.1 Flash-Lite; the report's initial reading was wrong. No change made (google entry likewise untouched).
-
-Applied (warning — platform-correct values):
-3. `vertex/gemini-2.5-pro`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
-4. `vertex/gemini-2.5-flash`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
-5. `vertex/gemini-2.5-flash-lite`: `maxOutputTokens` 65536 → 65535 (Vertex model doc)
-6. `vertex/gemini-2.0-flash`: `input` 0.1 → 0.15, `output` 0.4 → 0.6 (Vertex pricing page; repo carries Gemini API prices). `cachedInput: 0.025` is unverified on Vertex — consider removing. Low urgency (model discontinued).
-
-Applied (suggestions):
-7. `vertex/gemini-3.5-flash`: add `recommended: true` — parity with the google entry; vertex provider currently has no recommended model
-8. `vertex/gemini-2.5-flash-lite`: add `speedOptimized: true` — parity with google entry (models.ts:1436)
-9. `vertex/gemini-3.1-flash-lite`: add `speedOptimized: true` — "most cost-effective model yet" / lowest-latency tier (blog.google); apply to the google entry too for consistency
-10. (optional) both vertex 2.0 entries: add `maxOutputTokens: 8192` for parity with google entries (Vertex docs: 8,192 default) — cosmetic, models discontinued
-
-Also applied: `releaseDate` 2026-03-03 → 2026-05-07 on both the vertex and google `gemini-3.1-flash-lite` entries (GA date per the Gemini API changelog). Item 10 (maxOutputTokens on discontinued 2.0 entries) was skipped as cosmetic; `cachedInput` on vertex/gemini-2.0-flash was kept (Gemini API documented the rate; no Vertex contradiction found).
-
-## Deliberately not changed
-
-- **2.5 Pro / Flash / Flash-Lite not marked deprecated (open question d):** Vertex retirement is 2026-10-16 (extended from June 2026; Google says the final date will be confirmed with ≥6 months notice once Gemini 3 is GA). They are fully supported today; `deprecated: true` would prematurely hide working models. Recommendation: revisit ~2026-09 (calendar item), keep undeprecated now. Note `defaultModel: 'vertex/gemini-2.5-pro'` (models.ts:1491) will need a new default before retirement — consider moving to `vertex/gemini-3.5-flash` when `recommended` is added.
-- **>200k-token pricing tiers (3.1-pro, 2.5-pro)** are not modeled — `pricing` is a flat structure; entries carry the ≤200k tier. Pre-existing, consistent with the google provider.
-- **Non-global endpoint surcharge (effective 2026-07-01):** Vertex adds +10% pricing for non-global endpoints on 3.x models ($1.65/$9.90 for 3.5-flash, etc.). Our default location is `us-central1` (non-global), so billed cost may exceed modeled cost starting July 1. Entries keep global pricing (the canonical published rate); flagged for ops awareness.
-- **`vertex/gemini-3-pro-preview` pricing/ctx left as historical** — model discontinued and absent from the current pricing page; `deprecated: true` is the user-facing protection.
-- **releaseDate conventions:** 2.5-flash 2025-05-20 (I/O) kept despite an earlier 2025-04-17 Gemini-API preview; 3.1-flash-lite 2026-03-03 (preview announcement) kept despite 2026-05-07 stable GA. Both match the repo's "first public launch announcement" convention.
-- **deep-research id not migrated** to the newer `deep-research-preview-04-2026` family — no announced shutdown of `deep-research-pro-preview-12-2025`, and the Vertex pricing row still matches it. Watch item for the next pass.
-
-## Unverifiable
-
-| Item | Attempted sources | Notes |
-|---|---|---|
-| `vertex/deep-research-pro-preview-12-2025` `contextWindow: 1048576` and `maxOutputTokens: 65536` | cloud.google.com pricing (no limits), ai.google.dev/gemini-api/docs/deep-research (lists only 04-2026 versions, no limits), blog.google launch post (no numbers), cloudprice.net (claims 66K ctx / 33K out) | Conflict NOT resolved: Google publishes no token limits for the agent. CloudPrice's 66K/33K (~65,536/32,768) is the only concrete figure and contradicts the repo's 1M. Current values are an inference from the Gemini 3 Pro core. Ask Google docs or test live before changing. |
-| Vertex-side model id for the Deep Research Agent | Vertex pricing page (row name only), Vertex docs (nav-only render), Context7 | Only third-party trackers tie `deep-research-pro-preview-12-2025` to Vertex. |
-| `vertex/gemini-2.0-flash` `cachedInput: 0.025` | Vertex pricing page (no cached row for 2.0) | $0.025 is the Gemini API cache price. Discontinued model; consider dropping the field. |
-| `vertex/gemini-2.0-flash-lite` `releaseDate: 2025-02-25` | blog.google Feb 2025 post (preview 2025-02-05), Vertex release notes (nav-only) | Exact GA date not found this session; plausible, left as-is. |
-| Vertex 3-pro-preview discontinuation date 2026-03-26 (exact) | Vertex deprecations page (nav-only), third-party migration guides | Gemini API shutdown 2026-03-09 is confirmed by the changelog; the Vertex-specific 03-26 date comes from secondary sources citing Google's deprecations table. Either way `deprecated: true` is correct. |
diff --git a/docs/models/xai.md b/docs/models/xai.md
deleted file mode 100644
index 1fd8d159f4..0000000000
--- a/docs/models/xai.md
+++ /dev/null
@@ -1,91 +0,0 @@
-# xAI Provider Validation — models.ts
-
-- **Date:** 2026-06-11
-- **Scope:** `xai` provider block in `apps/sim/providers/models.ts` (~lines 1752–1956), 13 models + provider config. Final re-verification after PR #4990 (deprecation flags, grok-4.20 repricing $2/$6 → $1.25/$2.50 and 2M → 1M, defaultModel → grok-4.3).
-- **Method:** Live WebFetch of xAI docs (primary source, wins all conflicts); OpenRouter as secondary pricing source; WebSearch for release-date pinning; `rg` audit of `apps/sim/providers/xai/` for parameter wiring.
-- **Sources:**
-  - https://docs.x.ai/developers/models (model listing + pricing)
-  - https://docs.x.ai/developers/models/grok-4.3, .../grok-4.20-0309-reasoning, .../grok-4.20-0309-non-reasoning, .../grok-4.20-multi-agent-0309, .../grok-build-0.1, .../grok-3, .../grok-3-fast, .../grok-4 (per-model pages)
-  - https://docs.x.ai/developers/migration/may-15-retirement (retirement/redirect table)
-  - https://docs.x.ai/developers/rest-api-reference/inference/chat (parameter ranges)
-  - https://docs.x.ai/developers/model-capabilities/text/reasoning (reasoning_effort semantics)
-  - https://openrouter.ai/x-ai/grok-4.3, https://openrouter.ai/x-ai/grok-4.20 (secondary)
-
-## Provider config
-
-| Field | Repo value | Source | Verdict |
-|---|---|---|---|
-| `defaultModel` | `grok-4.3` | docs.x.ai/developers/models — grok-4.3 is the current flagship ("most intelligent and fastest"); all retired slugs redirect to it | CORRECT (PR #4990 change re-verified) |
-| `modelPatterns` | `/^grok/` | All current model ids start with `grok` | CORRECT |
-
-## Active models
-
-### grok-4.3
-
-| Field | Repo value | Source value | Source | Verdict |
-|---|---|---|---|---|
-| input | 1.25 | $1.25 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($1.25) | CORRECT |
-| cachedInput | 0.2 | $0.20 / 1M | docs.x.ai/developers/models/grok-4.3 | CORRECT |
-| output | 2.5 | $2.50 / 1M | docs.x.ai/developers/models/grok-4.3; OpenRouter agrees ($2.50) | CORRECT |
-| contextWindow | 1000000 | 1,000,000 tokens | docs.x.ai per-model page; OpenRouter agrees (1M, "no output token limit") | CORRECT |
-| releaseDate | 2026-04-30 | April 30, 2026 | OpenRouter created date; consistent with xAI announcement timeline | CORRECT |
-| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference: "between 0 and 2" | ✓ after fix |
-| recommended | true | flagship model | docs.x.ai | CORRECT |
-
-Caveat: OpenRouter notes grok-4.3 requests exceeding 200k total tokens bill at a higher tier. xAI's own pricing tables show flat $1.25/$2.50; Sim's pricing model is flat, so the base tier is recorded. No change.
-
-### grok-4.20-0309-reasoning / grok-4.20-0309-non-reasoning / grok-4.20-multi-agent-0309
-
-All three per-model pages were fetched individually; all three show identical numbers (multi-agent is NOT priced differently):
-
-| Field | Repo value | Source value | Source | Verdict |
-|---|---|---|---|---|
-| input | 1.25 | $1.25 / 1M | all three per-model pages | CORRECT (PR #4990 reprice re-verified) |
-| cachedInput | 0.2 | $0.20 / 1M | all three per-model pages | CORRECT |
-| output | 2.5 | $2.50 / 1M | all three per-model pages | CORRECT |
-| contextWindow | 1000000 | 1,000,000 tokens | all three per-model pages | CORRECT — see conflict note |
-| releaseDate | 2026-03-10 | API availability March 10, 2026 | WebSearch (xAI API made Grok 4.20 + multi-agent available 2026-03-10; `0309` slug = March 9 snapshot) | CORRECT (secondary-source verified) |
-| temperature.max | 2 (fixed this pass, was 1) | 0–2 | docs.x.ai chat REST reference | ✓ after fix |
-
-**1M vs 2M conflict resolved:** OpenRouter (x-ai/grok-4.20) lists 2M context; xAI's three per-model pages each state "Context window: 1,000,000 tokens". Press coverage attributes the larger window to "agent modes" (consumer-side), not the API. xAI docs win → **1M confirmed, keep**. (OpenRouter's created date of 2026-03-31 is its listing date, not the API release.)
-
-## Deprecated models (9 entries)
-
-Retirement source: docs.x.ai/developers/migration/may-15-retirement — "After May 15, 2026 at 12:00 PM PT, requests to the retired model slugs will automatically redirect" and bill at the redirect target's rates. Today (2026-06-11) is past that date: the redirects are live. The per-model docs pages for the legacy slugs (`grok-4`, `grok-4-0709`, `grok-3`, `grok-3-fast`) now resolve to the grok-4.3 page showing $1.25/$0.20/$2.50 — direct confirmation that the slugs are aliases billing at target rates.
-
-| Model id | Redirect target (source: may-15-retirement page) | `deprecated: true` verdict |
-|---|---|---|
-| grok-4-latest | grok-4.3 (alias of grok-4-0709; per-model page now resolves to grok-4.3) | CORRECT |
-| grok-4-0709 | grok-4.3 (reasoning_effort low) — explicitly listed | CORRECT |
-| grok-4-1-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT |
-| grok-4-1-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT |
-| grok-4-fast-reasoning | grok-4.3 (low) — explicitly listed | CORRECT |
-| grok-4-fast-non-reasoning | grok-4.3 (none) — explicitly listed | CORRECT |
-| grok-code-fast-1 | grok-build-0.1 — explicitly listed | CORRECT |
-| grok-3-latest | grok-4.3 (none) — `grok-3` explicitly listed; `-latest` is its alias | CORRECT |
-| grok-3-fast-latest | grok-4.3 — not on the May-15 table by name, but docs.x.ai/developers/models/grok-3-fast now resolves to the grok-4.3 page with grok-4.3 pricing | CORRECT |
-
-Legacy pricing fields on these entries ($3/$15 for grok-4 family and grok-3, $5/$25 for grok-3-fast, $0.20/$0.50 fast families, $0.20/$1.50 grok-code-fast-1) match the rates these models historically carried, but xAI no longer publishes them — they are unverifiable against live docs and, more importantly, **no longer what calls cost**.
-
-**Recommendation (one clear position):** reprice the deprecated entries to their redirect targets' rates — the 8 grok-4.3-redirected slugs to $1.25 / $0.20 cached / $2.50, and grok-code-fast-1 to grok-build-0.1's $1.00 / $0.20 cached / $2.00. Rationale: Sim computes execution cost at run time from the current `models.ts` values and stores the result in execution logs; past log rows are unaffected by a reprice, so nothing historical is lost. Meanwhile any workflow still pointed at a retired slug bills at redirect rates today, so the legacy numbers overestimate live costs by up to 6× (grok-4-latest: $15 vs $2.50 output). This is docs-backed (the retirement page states the redirect billing explicitly). **Disposition: APPLIED in this pass** — the 8 grok-4.3 redirects now carry $1.25 / $0.20 cached / $2.50 with `contextWindow: 1000000`, and grok-code-fast-1 carries grok-build-0.1's $1.00 / $0.20 cached / $2.00 (256k unchanged).
-
-## Changes made in this pass
-
-None to `models.ts` (per instructions, this pass writes only this justification doc). The verified pending fix:
-
-- **all 13 xai entries: `capabilities.temperature.max` 1 → 2.** The xAI chat REST reference documents `temperature` as "between 0 and 2" (same range OpenAI uses). The repo UI uses this for slider bounds, so the current `max: 1` artificially halves the usable range. Source: https://docs.x.ai/developers/rest-api-reference/inference/chat
-
-Changes from PR #4990 re-verified and confirmed correct: 9 deprecation flags, grok-4.20 trio reprice to $1.25/$2.50 with 1M context, defaultModel grok-4.3.
-
-## Deliberately not changed
-
-- **grok-4.3 `reasoningEffort` capability flag — not added.** The REST reference and reasoning docs confirm grok-4.3 supports `reasoning_effort` with `none` / `low` (default) / `medium` / `high` ("Only supported by grok-4.3"). However, `apps/sim/providers/xai/index.ts` forwards only `temperature` (verified by rg: single hit at line 101, `basePayload.temperature`); no `reasoning_effort` wiring exists, so the capability flag would be dead metadata. **Recommended follow-up:** wire `reasoning_effort` in the xai provider, then add the capability flag to grok-4.3. Note for that follow-up: per the reasoning docs, `presence_penalty`, `frequency_penalty`, and `stop` cannot be combined with reasoning, and grok-4.20-multi-agent uses a different control (`reasoning.effort`: low/medium/high/xhigh, controlling agent count, not reasoning depth).
-
-- **grok-build-0.1 — not added.** grok-code-fast-1's successor: $1.00 input / $0.20 cached / $2.00 output, 256k context, "xAI's fast coding model trained specifically for agentic coding" (docs.x.ai/developers/models/grok-build-0.1). Recommended addition; adding models is separate work from validation.
-- **grok-4.3 tiered >200k-token pricing — not modeled.** Sim's pricing schema is flat; base tier recorded (and xAI's own table is flat).
-
-## Unverifiable
-
-- **Original (pre-retirement) pricing of the 9 deprecated entries** — xAI docs no longer publish historical rates; values match known historical pricing but cannot be confirmed against a live source.
-- **Release dates of deprecated entries** (2025-07-09, 2025-11-19, 2025-09-19, 2025-08-28, 2025-02-17) — consistent with historical announcements/slugs (e.g. `grok-4-0709`), not republished on live docs.
-- **grok-4.3 / grok-4.20 official release dates on xAI docs** — per-model pages omit release dates. grok-4.3: 2026-04-30 corroborated by OpenRouter. grok-4.20: 2026-03-10 corroborated by secondary reporting of xAI API availability plus the `0309` snapshot slug; treated as verified-by-secondary-source.

From ddadc30c9db51d8ca60b92610d450b3fb46f021a Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Thu, 11 Jun 2026 20:10:02 -0700
Subject: [PATCH 4/4] fix(providers): default azure-openai to gpt-5.4 instead
 of deprecated gpt-4o

---
 apps/sim/providers/models.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts
index 988074e6a6..b48805dec2 100644
--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
@@ -876,7 +876,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
     id: 'azure-openai',
     name: 'Azure OpenAI',
     description: 'Microsoft Azure OpenAI Service models',
-    defaultModel: 'azure/gpt-4o',
+    defaultModel: 'azure/gpt-5.4',
     modelPatterns: [/^azure\//],
     capabilities: {
       toolUsageControl: true,