forked from openai/codex
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel-utils.ts
More file actions
196 lines (176 loc) · 7.13 KB
/
model-utils.ts
File metadata and controls
196 lines (176 loc) · 7.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { approximateTokensUsed } from "./approximate-tokens-used.js";
import { getApiKey } from "./config.js";
import { type SupportedModelId, openAiModelInfo } from "./model-info.js";
import { createOpenAIClient } from "./openai-client.js";
const MODEL_LIST_TIMEOUT_MS = 2_000; // 2 seconds
export const RECOMMENDED_MODELS: Array<string> = ["o4-mini", "o3"];
/**
* Background model loader / cache.
*
* We start fetching the list of available models from OpenAI once the CLI
* enters interactive mode. The request is made exactly once during the
* lifetime of the process and the results are cached for subsequent calls.
*/
async function fetchModels(provider: string): Promise<Array<string>> {
// If the user has not configured an API key we cannot retrieve the models.
if (!getApiKey(provider)) {
throw new Error("No API key configured for provider: " + provider);
}
try {
const openai = createOpenAIClient({ provider });
const list = await openai.models.list();
const models: Array<string> = [];
for await (const model of list as AsyncIterable<{ id?: string }>) {
if (model && typeof model.id === "string") {
let modelStr = model.id;
// Fix for gemini.
if (modelStr.startsWith("models/")) {
modelStr = modelStr.replace("models/", "");
}
models.push(modelStr);
}
}
return models.sort();
} catch (error) {
return [];
}
}
/** Returns the list of models available for the provided key / credentials. */
export async function getAvailableModels(
provider: string,
): Promise<Array<string>> {
return fetchModels(provider.toLowerCase());
}
/**
* Verifies that the provided model identifier is present in the set returned by
* {@link getAvailableModels}.
*/
export async function isModelSupportedForResponses(
provider: string,
model: string | undefined | null,
): Promise<boolean> {
if (
typeof model !== "string" ||
model.trim() === "" ||
RECOMMENDED_MODELS.includes(model)
) {
return true;
}
try {
const models = await Promise.race<Array<string>>([
getAvailableModels(provider),
new Promise<Array<string>>((resolve) =>
setTimeout(() => resolve([]), MODEL_LIST_TIMEOUT_MS),
),
]);
// If the timeout fired we get an empty list → treat as supported to avoid
// false negatives.
if (models.length === 0) {
return true;
}
return models.includes(model.trim());
} catch {
// Network or library failure → don't block start‑up.
return true;
}
}
/** Returns the maximum context length (in tokens) for a given model. */
export function maxTokensForModel(model: string): number {
if (model in openAiModelInfo) {
return openAiModelInfo[model as SupportedModelId].maxContextLength;
}
// fallback to heuristics for models not in the registry
const lower = model.toLowerCase();
if (lower.includes("32k")) {
return 32000;
}
if (lower.includes("16k")) {
return 16000;
}
if (lower.includes("8k")) {
return 8000;
}
if (lower.includes("4k")) {
return 4000;
}
return 128000; // Default to 128k for any other model.
}
/** Calculates the percentage of tokens remaining in context for a model. */
export function calculateContextPercentRemaining(
items: Array<ResponseItem>,
model: string,
): number {
const used = approximateTokensUsed(items);
const max = maxTokensForModel(model);
const remaining = Math.max(0, max - used);
return (remaining / max) * 100;
}
/**
* Type‑guard that narrows a {@link ResponseItem} to one that represents a
* user‑authored message. The OpenAI SDK represents both input *and* output
* messages with a discriminated union where:
* • `type` is the string literal "message" and
* • `role` is one of "user" | "assistant" | "system" | "developer".
*
* For the purposes of de‑duplication we only care about *user* messages so we
* detect those here in a single, reusable helper.
*/
function isUserMessage(
item: ResponseItem,
): item is ResponseItem & { type: "message"; role: "user"; content: unknown } {
return item.type === "message" && (item as { role?: string }).role === "user";
}
/**
* Deduplicate the stream of {@link ResponseItem}s before they are persisted in
* component state.
*
* Historically we used the (optional) {@code id} field returned by the
* OpenAI streaming API as the primary key: the first occurrence of any given
* {@code id} “won” and subsequent duplicates were dropped. In practice this
* proved brittle because locally‑generated user messages don’t include an
* {@code id}. The result was that if a user quickly pressed <Enter> twice the
* exact same message would appear twice in the transcript.
*
* The new rules are therefore:
* 1. If a {@link ResponseItem} has an {@code id} keep only the *first*
* occurrence of that {@code id} (this retains the previous behaviour for
* assistant / tool messages).
* 2. Additionally, collapse *consecutive* user messages with identical
* content. Two messages are considered identical when their serialized
* {@code content} array matches exactly. We purposefully restrict this
* to **adjacent** duplicates so that legitimately repeated questions at
* a later point in the conversation are still shown.
*/
export function uniqueById(items: Array<ResponseItem>): Array<ResponseItem> {
const seenIds = new Set<string>();
const deduped: Array<ResponseItem> = [];
for (const item of items) {
// ──────────────────────────────────────────────────────────────────
// Rule #1 – de‑duplicate by id when present
// ──────────────────────────────────────────────────────────────────
if (typeof item.id === "string" && item.id.length > 0) {
if (seenIds.has(item.id)) {
continue; // skip duplicates
}
seenIds.add(item.id);
}
// ──────────────────────────────────────────────────────────────────
// Rule #2 – collapse consecutive identical user messages
// ──────────────────────────────────────────────────────────────────
if (isUserMessage(item) && deduped.length > 0) {
const prev = deduped[deduped.length - 1]!;
if (
isUserMessage(prev) &&
// Note: the `content` field is an array of message parts. Performing
// a deep compare is over‑kill here; serialising to JSON is sufficient
// (and fast for the tiny payloads involved).
JSON.stringify(prev.content) === JSON.stringify(item.content)
) {
continue; // skip duplicate user message
}
}
deduped.push(item);
}
return deduped;
}