From 5e36725c14a2fef39d0e41e182d37dd89766c31d Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Mon, 15 Jun 2026 17:47:39 +0100 Subject: [PATCH 1/7] feat(webapp): split Models into Your models and Model library tabs --- .server-changes/models-page-usage-tabs.md | 6 + .../app/assets/icons/AiProviderIcons.tsx | 34 +- .../components/primitives/UsageSparkline.tsx | 115 +++++ .../v3/ModelRegistryPresenter.server.ts | 238 +++++++++- .../route.tsx | 413 ++++++++++++++++-- 5 files changed, 754 insertions(+), 52 deletions(-) create mode 100644 .server-changes/models-page-usage-tabs.md create mode 100644 apps/webapp/app/components/primitives/UsageSparkline.tsx diff --git a/.server-changes/models-page-usage-tabs.md b/.server-changes/models-page-usage-tabs.md new file mode 100644 index 00000000000..da2f4f2fda8 --- /dev/null +++ b/.server-changes/models-page-usage-tabs.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, and trend sparklines over a selectable time range) alongside the full model library, which is ordered by provider relevance and release date. diff --git a/apps/webapp/app/assets/icons/AiProviderIcons.tsx b/apps/webapp/app/assets/icons/AiProviderIcons.tsx index 85a01b98d63..2be3fe38ed7 100644 --- a/apps/webapp/app/assets/icons/AiProviderIcons.tsx +++ b/apps/webapp/app/assets/icons/AiProviderIcons.tsx @@ -46,8 +46,8 @@ export function LlamaIcon({ className }: IconProps) { xmlns="http://www.w3.org/2000/svg" > @@ -58,10 +58,10 @@ export function LlamaIcon({ className }: IconProps) { export function DeepseekIcon({ className }: IconProps) { return ( - + @@ -99,8 +99,8 @@ export function PerplexityIcon({ className }: IconProps) { return ( @@ -112,32 +112,32 @@ export function CerebrasIcon({ className }: IconProps) { return ( diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx new file mode 100644 index 00000000000..553dc4fc641 --- /dev/null +++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx @@ -0,0 +1,115 @@ +import { + Bar, + BarChart, + ReferenceLine, + ResponsiveContainer, + Tooltip, + YAxis, + type TooltipProps, +} from "recharts"; +import { cn } from "~/utils/cn"; +import { formatDateTime } from "./DateTime"; +import { Header3 } from "./Headers"; +import TooltipPortal from "./TooltipPortal"; + +type UsageDatum = { date: Date; count: number }; + +type UnitLabel = { singular: string; plural: string }; + +export type UsageSparklineProps = { + /** Trailing 24 hourly buckets; the last entry is the most recent hour. */ + data?: number[]; + /** Bar colour. Defaults to blue. */ + color?: string; + /** Unit shown in the tooltip (e.g. calls, tokens). */ + unitLabel?: UnitLabel; + /** Format the trailing total. Defaults to `toLocaleString`. */ + formatTotal?: (total: number) => string; + /** Class for the trailing total label. */ + totalClassName?: string; +}; + +/** + * Inline 24h sparkline for list rows. Renders a small bar chart plus a trailing + * total, or an em-dash when there's no data. Shared by the prompts and models + * lists — keep it presentational (the caller supplies the zero-filled buckets). + */ +export function UsageSparkline({ + data, + color = "#3B82F6", + unitLabel = { singular: "call", plural: "calls" }, + formatTotal, + totalClassName = "text-blue-400", +}: UsageSparklineProps) { + if (!data || data.every((v) => v === 0)) { + return ; + } + + const total = data.reduce((a, b) => a + b, 0); + const max = Math.max(...data); + + // Map the 24-bucket array to dated points so the tooltip can show the + // hour each bar represents. Bucket i is `23 - i` hours before now. + const now = new Date(); + const chartData: UsageDatum[] = data.map((count, i) => ({ + date: new Date(now.getTime() - (data.length - 1 - i) * 3600_000), + count, + })); + + return ( +
+
+ + + + } + allowEscapeViewBox={{ x: true, y: true }} + wrapperStyle={{ zIndex: 1000 }} + animationDuration={0} + /> + + + {max > 0 && ( + + )} + + +
+ + {formatTotal ? formatTotal(total) : total.toLocaleString()} + +
+ ); +} + +function UsageSparklineTooltip({ + active, + payload, + unitLabel, +}: TooltipProps & { unitLabel: UnitLabel }) { + if (!active || !payload || payload.length === 0) return null; + const entry = payload[0].payload as UsageDatum; + const date = entry.date instanceof Date ? entry.date : new Date(entry.date); + const formattedDate = formatDateTime(date, "UTC", [], false, true); + return ( + +
+ {formattedDate} +
+ {entry.count.toLocaleString()}{" "} + + {entry.count === 1 ? unitLabel.singular : unitLabel.plural} + +
+
+
+ ); +} diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts index 16a0aa75046..fddb92d897d 100644 --- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts @@ -52,6 +52,64 @@ export function formatModelId(provider: string, modelName: string): string { return `${provider}:${modelName}`; } +/** + * Hardcoded provider display priority (most relevant first). Providers not in + * this list fall back to alphabetical order after the listed ones. Within a + * provider, models are always sorted by release date (newest first). + */ +const PROVIDER_IMPORTANCE = [ + "anthropic", + "openai", + "google", + "xai", + "meta", + "mistral", + "deepseek", +]; + +function providerRank(provider: string): number { + const index = PROVIDER_IMPORTANCE.indexOf(provider); + return index === -1 ? PROVIDER_IMPORTANCE.length : index; +} + +/** + * Pick a sparkline bucket size (in seconds) for a given range so the rendered + * sparkline stays a readable ~24-52 bars. Tuned for the small inline charts in + * the "Your models" list — coarser than the full-size dashboard charts. + */ +function sparklineBucketSeconds(rangeMs: number): number { + const MIN = 60; + const HOUR = 3600; + const DAY = 86400; + const ms = (s: number) => s * 1000; + if (rangeMs <= ms(HOUR)) return 2 * MIN; + if (rangeMs <= ms(3 * HOUR)) return 5 * MIN; + if (rangeMs <= ms(6 * HOUR)) return 15 * MIN; + if (rangeMs <= ms(DAY)) return HOUR; + if (rangeMs <= ms(3 * DAY)) return 2 * HOUR; + if (rangeMs <= ms(7 * DAY)) return 6 * HOUR; + if (rangeMs <= ms(14 * DAY)) return 12 * HOUR; + if (rangeMs <= ms(30 * DAY)) return DAY; + if (rangeMs <= ms(90 * DAY)) return 3 * DAY; + return 7 * DAY; +} + +/** + * Generate the ordered bucket-start keys for [from, to] at the given interval, + * epoch-aligned in UTC to exactly match ClickHouse's + * `toStartOfInterval(col, INTERVAL n SECOND)` output strings ("YYYY-MM-DD HH:MM:SS"). + */ +function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): string[] { + const intervalMs = intervalSeconds * 1000; + const start = Math.floor(from.getTime() / intervalMs) * intervalMs; + const end = Math.floor(to.getTime() / intervalMs) * intervalMs; + const keys: string[] = []; + for (let t = start; t <= end; t += intervalMs) { + keys.push(new Date(t).toISOString().slice(0, 19).replace("T", " ")); + } + return keys; +} + // --- Types --- export type ModelCatalogItem = { @@ -162,6 +220,17 @@ export type PopularModel = { ttfcP50: number; }; +/** A model with usage in a specific project/environment (the "Your models" list). */ +export type ProjectModelUsageItem = { + responseModel: string; + genAiSystem: string; + calls: number; + totalCost: number; + totalTokens: number; + avgTtfc: number; + avgTps: number; +}; + // --- ClickHouse schemas for user metrics --- const UserMetricsSummaryRow = z.object({ @@ -179,6 +248,22 @@ const UserTaskBreakdownRow = z.object({ cost: z.coerce.number(), }); +const ProjectModelUsageRow = z.object({ + response_model: z.string(), + gen_ai_system: z.string(), + calls: z.coerce.number(), + total_cost: z.coerce.number(), + total_tokens: z.coerce.number(), + avg_ttfc: z.coerce.number(), + avg_tps: z.coerce.number(), +}); + +const ModelSparklineRow = z.object({ + response_model: z.string(), + bucket: z.string(), + val: z.coerce.number(), +}); + // --- Presenter --- export class ModelRegistryPresenter extends BasePresenter { @@ -296,7 +381,12 @@ export class ModelRegistryPresenter extends BasePresenter { } return Array.from(groups.entries()) - .sort(([a], [b]) => a.localeCompare(b)) + .sort(([a], [b]) => { + const rankA = providerRank(a); + const rankB = providerRank(b); + if (rankA !== rankB) return rankA - rankB; + return a.localeCompare(b); + }) .map(([provider, models]) => ({ provider, models: models.sort((a, b) => { @@ -549,4 +639,150 @@ export class ModelRegistryPresenter extends BasePresenter { ttfcP50: r.ttfc_p50, })); } + + /** + * Models that had usage in a specific project/environment over the window, + * with aggregate metrics. This is the tenant-scoped "Your models" list (as + * opposed to the cross-tenant getPopularModels). + */ + async getProjectModelUsage( + projectId: string, + environmentId: string, + startTime: Date, + endTime: Date + ): Promise { + const queryFn = this.clickhouse.reader.query({ + name: "modelRegistryProjectUsage", + query: ` + SELECT + response_model, + any(gen_ai_system) AS gen_ai_system, + count() AS calls, + sum(total_cost) AS total_cost, + sum(total_tokens) AS total_tokens, + round(avg(ms_to_first_chunk), 1) AS avg_ttfc, + round(avg(tokens_per_second), 1) AS avg_tps + FROM trigger_dev.llm_metrics_v1 + WHERE project_id = {projectId: String} + AND environment_id = {environmentId: String} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + AND response_model != '' + GROUP BY response_model + ORDER BY calls DESC + LIMIT 100 + `, + params: z.object({ + projectId: z.string(), + environmentId: z.string(), + startTime: z.string(), + endTime: z.string(), + }), + schema: ProjectModelUsageRow, + }); + + const [error, rows] = await queryFn({ + projectId, + environmentId, + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + }); + + if (error || !rows) return []; + + return rows.map((r) => ({ + responseModel: r.response_model, + genAiSystem: r.gen_ai_system, + calls: r.calls, + totalCost: r.total_cost, + totalTokens: r.total_tokens, + avgTtfc: r.avg_ttfc, + avgTps: r.avg_tps, + })); + } + + /** + * Call-count and total-token sparklines per response_model over [from, to], + * matching the window the "Your models" charts and table use. The bucket size + * adapts to the range (see sparklineBucketSeconds) so a sparkline stays a + * readable ~24-52 bars regardless of the selected period. Zero-filled. + */ + async getModelUsageSparklines( + environmentId: string, + responseModels: string[], + from: Date, + to: Date + ): Promise<{ calls: Record; tokens: Record }> { + if (responseModels.length === 0) return { calls: {}, tokens: {} }; + + const intervalSeconds = sparklineBucketSeconds(to.getTime() - from.getTime()); + const bucketKeys = sparklineBucketKeys(from, to, intervalSeconds); + + // intervalSeconds is a server-derived integer from a fixed ladder, so it's + // safe to inline. Epoch-aligned SECOND buckets match the JS keys above. + const buildQuery = (valueExpr: string, name: string) => + this.clickhouse.reader.query({ + name, + query: ` + SELECT + response_model, + toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND) AS bucket, + ${valueExpr} AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND response_model IN {responseModels: Array(String)} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + GROUP BY response_model, bucket + ORDER BY response_model, bucket + `, + params: z.object({ + environmentId: z.string(), + responseModels: z.array(z.string()), + startTime: z.string(), + endTime: z.string(), + }), + schema: ModelSparklineRow, + }); + + const queryParams = { + environmentId, + responseModels, + startTime: formatDateForCH(from), + endTime: formatDateForCH(to), + }; + + const [callsResult, tokensResult] = await Promise.all([ + buildQuery("count()", "modelCallSparklines")(queryParams), + buildQuery("sum(total_tokens)", "modelTokenSparklines")(queryParams), + ]); + + return { + calls: this.#buildSparklineMap(callsResult, responseModels, bucketKeys), + tokens: this.#buildSparklineMap(tokensResult, responseModels, bucketKeys), + }; + } + + /** Convert a sparkline query result to a zero-filled bucket map. */ + #buildSparklineMap( + queryResult: + | [Error, null] + | [null, { response_model: string; bucket: string; val: number }[]], + keys: string[], + bucketKeys: string[] + ): Record { + const [error, rows] = queryResult; + if (error || !rows) return {}; + + const rowMap = new Map(); + for (const row of rows) { + rowMap.set(`${row.response_model}|${row.bucket}`, row.val); + } + + const result: Record = {}; + for (const key of keys) { + result[key] = bucketKeys.map((b) => rowMap.get(`${key}|${b}`) ?? 0); + } + return result; + } } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index 8785c9a2dc2..943d9ae221f 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1,11 +1,17 @@ import { AdjustmentsHorizontalIcon, + ArrowTopRightOnSquareIcon, CheckIcon, CubeIcon, XMarkIcon, } from "@heroicons/react/20/solid"; import * as Ariakit from "@ariakit/react"; -import { Form, type MetaFunction, useFetcher } from "@remix-run/react"; +import { + Form, + type MetaFunction, + type ShouldRevalidateFunctionArgs, + useFetcher, +} from "@remix-run/react"; import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { AnimatePresence, motion } from "framer-motion"; import { useEffect, useMemo, useRef, useState } from "react"; @@ -27,7 +33,7 @@ import { InlineCode } from "~/components/code/InlineCode"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; import { Badge } from "~/components/primitives/Badge"; -import { Button } from "~/components/primitives/Buttons"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; import { Callout } from "~/components/primitives/Callout"; import { Checkbox } from "~/components/primitives/Checkbox"; import { DateTime } from "~/components/primitives/DateTime"; @@ -61,7 +67,13 @@ import { TableRow, } from "~/components/primitives/Table"; import { TabButton, TabContainer } from "~/components/primitives/Tabs"; -import { appliedSummary } from "~/components/runs/v3/SharedFilters"; +import { + appliedSummary, + TimeFilter, + type TimeFilterApplyValues, + timeFilterFromTo, +} from "~/components/runs/v3/SharedFilters"; +import { parseFiniteInt } from "~/utils/searchParams"; import { useSearchParams } from "~/hooks/useSearchParam"; import { useShortcutKeys } from "~/hooks/useShortcutKeys"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; @@ -71,6 +83,7 @@ import { type ModelCatalogItem, type ModelComparisonItem, type PopularModel, + type ProjectModelUsageItem, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; @@ -78,7 +91,7 @@ import { requireUserId } from "~/services/session.server"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; -import { EnvironmentParamSchema, v3ModelComparePath } from "~/utils/pathBuilder"; +import { EnvironmentParamSchema, v3BuiltInDashboardPath, v3ModelComparePath } from "~/utils/pathBuilder"; import { formatModelPrice, formatTokenCount, @@ -88,6 +101,7 @@ import { } from "~/utils/modelFormatters"; import { formatNumberCompact } from "~/utils/numberFormatter"; import { Spinner } from "~/components/primitives/Spinner"; +import { UsageSparkline } from "~/components/primitives/UsageSparkline"; import { MetricWidget } from "~/routes/resources.metric"; import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget"; @@ -116,9 +130,27 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const presenter = new ModelRegistryPresenter(clickhouse); const catalog = await presenter.getModelCatalog(); - const now = new Date(); - const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); - const popularModels = await presenter.getPopularModels(sevenDaysAgo, now, 50); + // Shared time range for the "Your models" tab (charts, usage table, sparklines). + // Mirrors the agent detail page: URL-driven period / from / to via TimeFilter. + const url = new URL(request.url); + const period = url.searchParams.get("period") ?? undefined; + const from = parseFiniteInt(url.searchParams.get("from")); + const to = parseFiniteInt(url.searchParams.get("to")); + const time = timeFilterFromTo({ period, from, to, defaultPeriod: "7d" }); + + // popularModels = cross-tenant aggregate (powers the library's p50 TTFC column). + // projectUsage = tenant-scoped models with usage in this env (the "Your models" tab). + const [popularModels, projectUsage] = await Promise.all([ + presenter.getPopularModels(time.from, time.to, 50), + presenter.getProjectModelUsage(project.id, environment.id, time.from, time.to), + ]); + + const usageSparklines = await presenter.getModelUsageSparklines( + environment.id, + projectUsage.map((u) => u.responseModel), + time.from, + time.to + ); const allProviders = catalog.map((g) => g.provider); const allFeatures = Array.from( @@ -128,6 +160,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { return typedjson({ catalog, popularModels, + projectUsage, + usageSparklines, allProviders, allFeatures, organizationId: project.organizationId, @@ -136,6 +170,26 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }); }; +export function shouldRevalidate({ + currentUrl, + nextUrl, + defaultShouldRevalidate, +}: ShouldRevalidateFunctionArgs) { + // The active tab is persisted in the URL (?tab=), but no loader data depends + // on it — so switching tabs must not refetch. Any other param change (period, + // from/to, …) revalidates as normal. + const normalize = (url: URL) => { + const params = new URLSearchParams(url.search); + params.delete("tab"); + params.sort(); + return params.toString(); + }; + if (normalize(currentUrl) === normalize(nextUrl)) { + return false; + } + return defaultShouldRevalidate; +} + const providerIcons: Record JSX.Element> = { openai: OpenAIIcon, anthropic: AnthropicIcon, @@ -154,6 +208,16 @@ function providerIcon(slug: string) { return ; } +const NEW_MODEL_WINDOW_DAYS = 7; + +/** True if the model was released within the last NEW_MODEL_WINDOW_DAYS. */ +function isNewModel(releaseDate: string | null): boolean { + if (!releaseDate) return false; + const released = new Date(releaseDate).getTime(); + if (Number.isNaN(released)) return false; + return Date.now() - released <= NEW_MODEL_WINDOW_DAYS * 24 * 60 * 60 * 1000; +} + // --- Filter Components --- const providerShortcut = { key: "p" }; @@ -468,7 +532,10 @@ function ModelsList({ /> - {model.displayId} + + {model.displayId} + {isNewModel(model.releaseDate) && New} + @@ -768,14 +835,16 @@ function chartConfig(opts: { xAxisColumn: string; yAxisColumns: string[]; aggregation?: "sum" | "avg"; + stacked?: boolean; + groupByColumn?: string | null; }): QueryWidgetConfig { return { type: "chart", chartType: opts.chartType, xAxisColumn: opts.xAxisColumn, yAxisColumns: opts.yAxisColumns, - groupByColumn: null, - stacked: false, + groupByColumn: opts.groupByColumn ?? null, + stacked: opts.stacked ?? false, sortByColumn: null, sortDirection: "asc", aggregation: opts.aggregation ?? "sum", @@ -784,17 +853,21 @@ function chartConfig(opts: { type DetailTab = "overview" | "usage"; +type ModelsTab = "yours" | "library"; + function ModelDetailPanel({ model, organizationId, projectId, environmentId, + aiMetricsBasePath, onClose, }: { model: ModelCatalogItem; organizationId: string; projectId: string; environmentId: string; + aiMetricsBasePath: string; onClose: () => void; }) { const [tab, setTab] = useState("overview"); @@ -840,6 +913,7 @@ function ModelDetailPanel({ organizationId={organizationId} projectId={projectId} environmentId={environmentId} + aiMetricsBasePath={aiMetricsBasePath} /> )} @@ -947,28 +1021,61 @@ function DetailYourUsageTab({ organizationId, projectId, environmentId, + aiMetricsBasePath, }: { modelName: string; organizationId: string; projectId: string; environmentId: string; + aiMetricsBasePath: string; }) { + // Inspector-local range, independent of the page-level "Your models" range. + const [range, setRange] = useState({ period: "7d" }); + const widgetProps = { organizationId, projectId, environmentId, scope: "environment" as const, - period: "7d", - from: null, - to: null, + period: range.from && range.to ? null : range.period ?? "7d", + from: range.from ?? null, + to: range.to ?? null, }; + // Deep-link to the AI metrics dashboard pre-filtered to this model, carrying + // the inspector's current range so the dashboard opens on the same window. + const dashboardParams = new URLSearchParams({ models: modelName }); + if (range.from && range.to) { + dashboardParams.set("from", range.from); + dashboardParams.set("to", range.to); + } else if (range.period) { + dashboardParams.set("period", range.period); + } + const aiMetricsHref = `${aiMetricsBasePath}?${dashboardParams.toString()}`; + return (
+
+ + + View in AI metrics + +
; + tokenSparklines: Record; + organizationId: string; + projectId: string; + environmentId: string; + period: string | null; + from: string | null; + to: string | null; + modelLookup: Map; + selectedModelId: string | null; + onSelectModel: (model: ModelCatalogItem) => void; + onGoToLibrary: () => void; +}) { + // Drive the charts off the same URL-selected range as the table + sparklines. + // period and from/to are mutually exclusive (TimeFilter enforces this). + const widgetProps = { + organizationId, + projectId, + environmentId, + scope: "environment" as const, + period: from && to ? null : period ?? "7d", + from, + to, + }; + + return ( +
+
+
+ +
+
+ +
+
+ +
+
+ +
+ {usage.length === 0 ? ( +
+

+ No model usage in this environment yet. Models you call from your tasks will appear here + with usage metrics. +

+ +
+ ) : ( + + + + Model + Provider + Calls + Cost + Avg TTFC + Avg tokens/sec + Calls trend + Tokens trend + + + + {usage.map((u) => { + const catalogItem = modelLookup.get(u.responseModel); + const provider = catalogItem?.provider ?? u.genAiSystem; + const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`; + const select = catalogItem ? () => onSelectModel(catalogItem) : undefined; + return ( + + + {displayId} + + + + {providerIcon(provider)} + {formatProviderName(provider)} + + + + {formatNumberCompact(u.calls)} + + + {formatModelCost(u.totalCost)} + + + {u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"} + + + {u.avgTps > 0 ? u.avgTps.toFixed(0) : "—"} + + + + + + formatNumberCompact(t)} + totalClassName="text-emerald-400" + /> + + + ); + })} + +
+ )} +
+
+ ); +} + // --- Main Page --- export default function ModelsPage() { const { catalog, popularModels, + projectUsage, + usageSparklines, allProviders, allFeatures, organizationId, projectId, environmentId, } = useTypedLoaderData(); - const { values: searchValues, value: searchValue } = useSearchParams(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + const aiMetricsBasePath = v3BuiltInDashboardPath(organization, project, environment, "llm"); + const { values: searchValues, value: searchValue, replace } = useSearchParams(); const search = searchValue("search") ?? ""; const selectedProviders = searchValues("providers"); const selectedFeatures = searchValues("features"); + const periodParam = searchValue("period") ?? null; + const fromParam = searchValue("from") ?? null; + const toParam = searchValue("to") ?? null; + // Active tab is persisted in the URL (?tab=) so it survives refresh and is + // shareable. Defaults to "yours" when there's usage, else "library". + const tabParam = searchValue("tab"); + const view: ModelsTab = + tabParam === "library" + ? "library" + : tabParam === "yours" + ? "yours" + : projectUsage.length > 0 + ? "yours" + : "library"; + const setView = (next: ModelsTab) => replace({ tab: next }); const [compareSet, setCompareSet] = useState>(new Set()); const [showAllDetails, setShowAllDetails] = useState(false); const [compareOpen, setCompareOpen] = useState(false); @@ -1117,6 +1405,19 @@ export default function ModelsPage() { const compareModels = useMemo(() => Array.from(compareSet), [compareSet]); const allModels = useMemo(() => catalog.flatMap((g) => g.models), [catalog]); + // Resolve a used response_model (base or dated variant) to its catalog card, + // so a "Your models" row can open the same detail inspector as the library. + const modelLookup = useMemo(() => { + const map = new Map(); + for (const model of allModels) { + map.set(model.modelName, model); + for (const variant of model.variants) { + map.set(variant.modelName, model); + } + } + return map; + }, [allModels]); + return ( @@ -1126,24 +1427,67 @@ export default function ModelsPage() {
- setCompareOpen(true)} - showAllDetails={showAllDetails} - onToggleAllDetails={(checked) => setShowAllDetails(checked)} - /> - +
+ + setView("yours")} + > + Your models + + setView("library")} + > + Model library + + + {view === "yours" && ( +
+ +
+ )} +
+ {view === "yours" ? ( + setView("library")} + /> + ) : ( +
+ setCompareOpen(true)} + showAllDetails={showAllDetails} + onToggleAllDetails={(checked) => setShowAllDetails(checked)} + /> + +
+ )}
setSelectedModel(null)} /> )} From 57dd835c79e05fadb6e32be8f10138a5e4916d67 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Mon, 15 Jun 2026 18:00:54 +0100 Subject: [PATCH 2/7] fix(webapp): revalidate Models loader on project/env path change --- .../route.tsx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index 943d9ae221f..ca4144c9e2e 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -176,15 +176,19 @@ export function shouldRevalidate({ defaultShouldRevalidate, }: ShouldRevalidateFunctionArgs) { // The active tab is persisted in the URL (?tab=), but no loader data depends - // on it — so switching tabs must not refetch. Any other param change (period, - // from/to, …) revalidates as normal. + // on it — so switching tabs must not refetch. Any other change (a different + // project/environment in the path, or a period/from/to param) revalidates as + // normal, since the loader data is scoped to the path params + time range. const normalize = (url: URL) => { const params = new URLSearchParams(url.search); params.delete("tab"); params.sort(); return params.toString(); }; - if (normalize(currentUrl) === normalize(nextUrl)) { + if ( + currentUrl.pathname === nextUrl.pathname && + normalize(currentUrl) === normalize(nextUrl) + ) { return false; } return defaultShouldRevalidate; From 610ea59eceb39259acfcd1edc164f8e153191066 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Mon, 15 Jun 2026 18:15:35 +0100 Subject: [PATCH 3/7] fix(webapp): label model sparkline tooltips with their real bucket times The Your models sparklines use dynamic bucket sizes (6h at 7d, etc.), but the tooltip assumed hourly buckets and showed wrong dates. Thread the bucket interval and start through so each bar is labelled correctly. Also pin the library tab cross-tenant p50 TTFC column to a fixed 7-day window so it no longer follows the Your models time selector. --- .../components/primitives/UsageSparkline.tsx | 18 +++++++++++---- .../v3/ModelRegistryPresenter.server.ts | 21 ++++++++++++++--- .../route.tsx | 23 ++++++++++++++++--- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx index 553dc4fc641..2ffc1936a1d 100644 --- a/apps/webapp/app/components/primitives/UsageSparkline.tsx +++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx @@ -17,8 +17,12 @@ type UsageDatum = { date: Date; count: number }; type UnitLabel = { singular: string; plural: string }; export type UsageSparklineProps = { - /** Trailing 24 hourly buckets; the last entry is the most recent hour. */ + /** Equal-width time buckets, oldest first. */ data?: number[]; + /** Epoch ms of the first bucket's start. When omitted, the last bucket is anchored to now. */ + bucketStartMs?: number; + /** Width of each bucket in ms. Defaults to one hour. */ + bucketIntervalMs?: number; /** Bar colour. Defaults to blue. */ color?: string; /** Unit shown in the tooltip (e.g. calls, tokens). */ @@ -36,6 +40,8 @@ export type UsageSparklineProps = { */ export function UsageSparkline({ data, + bucketStartMs, + bucketIntervalMs, color = "#3B82F6", unitLabel = { singular: "call", plural: "calls" }, formatTotal, @@ -48,11 +54,13 @@ export function UsageSparkline({ const total = data.reduce((a, b) => a + b, 0); const max = Math.max(...data); - // Map the 24-bucket array to dated points so the tooltip can show the - // hour each bar represents. Bucket i is `23 - i` hours before now. - const now = new Date(); + // Map each bucket to a dated point so the tooltip can show the window it + // represents. Buckets are `intervalMs` wide; if the caller didn't pass the + // first bucket's start, anchor the last bucket to now (hourly default). + const intervalMs = bucketIntervalMs ?? 3600_000; + const startMs = bucketStartMs ?? Date.now() - (data.length - 1) * intervalMs; const chartData: UsageDatum[] = data.map((count, i) => ({ - date: new Date(now.getTime() - (data.length - 1 - i) * 3600_000), + date: new Date(startMs + i * intervalMs), count, })); diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts index fddb92d897d..011a9ff8a12 100644 --- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts @@ -712,10 +712,23 @@ export class ModelRegistryPresenter extends BasePresenter { responseModels: string[], from: Date, to: Date - ): Promise<{ calls: Record; tokens: Record }> { - if (responseModels.length === 0) return { calls: {}, tokens: {} }; - + ): Promise<{ + calls: Record; + tokens: Record; + bucketIntervalMs: number; + bucketStartMs: number; + }> { const intervalSeconds = sparklineBucketSeconds(to.getTime() - from.getTime()); + const intervalMs = intervalSeconds * 1000; + // Epoch-aligned start of the first bucket, matching sparklineBucketKeys and + // ClickHouse toStartOfInterval. Returned so the sparkline tooltip can label + // each bar with its true time rather than assuming hourly buckets. + const bucketStartMs = Math.floor(from.getTime() / intervalMs) * intervalMs; + + if (responseModels.length === 0) { + return { calls: {}, tokens: {}, bucketIntervalMs: intervalMs, bucketStartMs }; + } + const bucketKeys = sparklineBucketKeys(from, to, intervalSeconds); // intervalSeconds is a server-derived integer from a fixed ladder, so it's @@ -760,6 +773,8 @@ export class ModelRegistryPresenter extends BasePresenter { return { calls: this.#buildSparklineMap(callsResult, responseModels, bucketKeys), tokens: this.#buildSparklineMap(tokensResult, responseModels, bucketKeys), + bucketIntervalMs: intervalMs, + bucketStartMs, }; } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index ca4144c9e2e..a3530223a28 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -138,10 +138,15 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const to = parseFiniteInt(url.searchParams.get("to")); const time = timeFilterFromTo({ period, from, to, defaultPeriod: "7d" }); - // popularModels = cross-tenant aggregate (powers the library's p50 TTFC column). + // popularModels powers the library tab's cross-tenant p50 TTFC column — a + // stable "typical latency" reference, so it always uses a fixed 7-day window + // independent of the Your models time selector (the library tab has none). + const popularTo = new Date(); + const popularFrom = new Date(popularTo.getTime() - 7 * 24 * 60 * 60 * 1000); + // projectUsage = tenant-scoped models with usage in this env (the "Your models" tab). const [popularModels, projectUsage] = await Promise.all([ - presenter.getPopularModels(time.from, time.to, 50), + presenter.getPopularModels(popularFrom, popularTo, 50), presenter.getProjectModelUsage(project.id, environment.id, time.from, time.to), ]); @@ -1172,6 +1177,8 @@ function YourModelsTab({ usage, callSparklines, tokenSparklines, + bucketStartMs, + bucketIntervalMs, organizationId, projectId, environmentId, @@ -1186,6 +1193,8 @@ function YourModelsTab({ usage: ProjectModelUsageItem[]; callSparklines: Record; tokenSparklines: Record; + bucketStartMs: number; + bucketIntervalMs: number; organizationId: string; projectId: string; environmentId: string; @@ -1304,11 +1313,17 @@ function YourModelsTab({ {u.avgTps > 0 ? u.avgTps.toFixed(0) : "—"} - + formatNumberCompact(t)} @@ -1459,6 +1474,8 @@ export default function ModelsPage() { usage={projectUsage} callSparklines={usageSparklines.calls} tokenSparklines={usageSparklines.tokens} + bucketStartMs={usageSparklines.bucketStartMs} + bucketIntervalMs={usageSparklines.bucketIntervalMs} organizationId={organizationId} projectId={projectId} environmentId={environmentId} From b5a7a5696392d654eef84c377978f3413248f126 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Mon, 15 Jun 2026 18:47:35 +0100 Subject: [PATCH 4/7] fix(webapp): tidy Your models tab spacing and enlarge the charts --- .../route.tsx | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index a3530223a28..daac075ad2f 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1219,9 +1219,9 @@ function YourModelsTab({ }; return ( -
-
-
+
+
+
-
+
-
+
) : ( - +
- Model - Provider - Calls - Cost - Avg TTFC - Avg tokens/sec - Calls trend - Tokens trend + Model + Provider + + Calls + + + Cost + + + Avg TTFC + + + Avg tokens/sec + + Calls trend + Tokens trend @@ -1446,8 +1454,8 @@ export default function ModelsPage() {
-
- +
+ Date: Tue, 16 Jun 2026 00:00:28 +0100 Subject: [PATCH 5/7] feat(webapp): add prompt-cache metrics to Models and AI metrics Your models gets a cache-savings column and per-model cached-tokens and cache-hit-rate views; the AI metrics dashboard gets a caching section (hit rate, cached tokens, estimated savings, hit rate by model). Also makes the Your models charts all time-series for consistency. --- .server-changes/models-page-usage-tabs.md | 2 +- .../presenters/v3/BuiltInDashboards.server.ts | 54 +++++++++++++-- .../v3/ModelRegistryPresenter.server.ts | 17 ++++- .../route.tsx | 66 +++++++++++++++---- 4 files changed, 121 insertions(+), 18 deletions(-) diff --git a/.server-changes/models-page-usage-tabs.md b/.server-changes/models-page-usage-tabs.md index da2f4f2fda8..6b37b45dd20 100644 --- a/.server-changes/models-page-usage-tabs.md +++ b/.server-changes/models-page-usage-tabs.md @@ -3,4 +3,4 @@ area: webapp type: feature --- -The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, and trend sparklines over a selectable time range) alongside the full model library, which is ordered by provider relevance and release date. +The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, prompt-cache savings, and trend sparklines over a selectable time range) alongside the full model library, ordered by provider relevance and release date. The AI metrics dashboard also gains a caching section with cache hit rate, cached tokens, and estimated savings. diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts index 06b5ee2d406..03561ee7e20 100644 --- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts +++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts @@ -252,8 +252,13 @@ const llmDashboard: BuiltInDashboard = { { i: "llm-cost-user", x: 6, y: 92, w: 6, h: 13 }, // Efficiency section { i: "llm-title-efficiency", x: 0, y: 105, w: 12, h: 2, minH: 2, maxH: 2 }, - { i: "llm-cost-operation", x: 0, y: 107, w: 6, h: 13 }, - { i: "llm-cache-util", x: 6, y: 107, w: 6, h: 13 }, + { i: "llm-cost-operation", x: 0, y: 107, w: 12, h: 13 }, + // Caching section + { i: "llm-title-caching", x: 0, y: 120, w: 12, h: 2, minH: 2, maxH: 2 }, + { i: "llm-cache-hit", x: 0, y: 122, w: 6, h: 13 }, + { i: "llm-cache-tokens", x: 6, y: 122, w: 6, h: 13 }, + { i: "llm-cache-savings", x: 0, y: 135, w: 6, h: 13 }, + { i: "llm-cache-by-model", x: 6, y: 135, w: 6, h: 13 }, ], widgets: { "llm-cost": { @@ -487,10 +492,11 @@ const llmDashboard: BuiltInDashboard = { aggregation: "sum", }, }, - "llm-cache-util": { - title: "Cache utilization", + "llm-title-caching": { title: "Caching", query: "", display: { type: "title" } }, + "llm-cache-hit": { + title: "Cache hit rate over time", query: - "SELECT\r\n timeBucket(),\r\n round(countIf(cached_read_tokens > 0) * 100.0 / count(), 1) AS cache_hit_pct,\r\n round(avg(cached_read_tokens), 0) AS avg_cached_tokens\r\nFROM\r\n llm_metrics\r\nGROUP BY\r\n timeBucket\r\nORDER BY\r\n timeBucket", + "SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "line", @@ -503,6 +509,44 @@ const llmDashboard: BuiltInDashboard = { aggregation: "avg", }, }, + "llm-cache-tokens": { + title: "Cached tokens over time", + query: + "SELECT timeBucket(), sum(cached_read_tokens) AS cache_reads, sum(cache_creation_tokens) AS cache_writes FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", + display: { + type: "chart", + chartType: "bar", + xAxisColumn: "timebucket", + yAxisColumns: ["cache_reads", "cache_writes"], + groupByColumn: null, + stacked: true, + sortByColumn: null, + sortDirection: "asc", + aggregation: "sum", + }, + }, + "llm-cache-savings": { + title: "Cache savings over time", + query: + "SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", + display: { + type: "chart", + chartType: "bar", + xAxisColumn: "timebucket", + yAxisColumns: ["cache_savings"], + groupByColumn: null, + stacked: false, + sortByColumn: null, + sortDirection: "asc", + aggregation: "sum", + }, + }, + "llm-cache-by-model": { + title: "Cache hit rate by model", + query: + "SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", + display: { type: "table", prettyFormatting: true, sorting: [] }, + }, }, }, }; diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts index 011a9ff8a12..b8565b87011 100644 --- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts @@ -229,6 +229,12 @@ export type ProjectModelUsageItem = { totalTokens: number; avgTtfc: number; avgTps: number; + /** Input tokens (used as the denominator for the cache read rate). */ + inputTokens: number; + /** Input tokens served from the provider's prompt cache. */ + cachedReadTokens: number; + /** Actual (discounted) cost of those cached read tokens. */ + cachedReadCost: number; }; // --- ClickHouse schemas for user metrics --- @@ -256,6 +262,9 @@ const ProjectModelUsageRow = z.object({ total_tokens: z.coerce.number(), avg_ttfc: z.coerce.number(), avg_tps: z.coerce.number(), + input_tokens: z.coerce.number(), + cached_read_tokens: z.coerce.number(), + cached_read_cost: z.coerce.number(), }); const ModelSparklineRow = z.object({ @@ -661,7 +670,10 @@ export class ModelRegistryPresenter extends BasePresenter { sum(total_cost) AS total_cost, sum(total_tokens) AS total_tokens, round(avg(ms_to_first_chunk), 1) AS avg_ttfc, - round(avg(tokens_per_second), 1) AS avg_tps + round(avg(tokens_per_second), 1) AS avg_tps, + sum(input_tokens) AS input_tokens, + sum(usage_details['input_cached_tokens']) AS cached_read_tokens, + sum(cost_details['input_cached_tokens']) AS cached_read_cost FROM trigger_dev.llm_metrics_v1 WHERE project_id = {projectId: String} AND environment_id = {environmentId: String} @@ -698,6 +710,9 @@ export class ModelRegistryPresenter extends BasePresenter { totalTokens: r.total_tokens, avgTtfc: r.avg_ttfc, avgTps: r.avg_tps, + inputTokens: r.input_tokens, + cachedReadTokens: r.cached_read_tokens, + cachedReadCost: r.cached_read_cost, })); } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index daac075ad2f..a3a60b88e37 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1125,6 +1125,17 @@ function DetailYourUsageTab({ {...widgetProps} />
+
+ +
+
+ +
@@ -1270,22 +1297,25 @@ function YourModelsTab({
- Model - Provider - + Model + Provider + Calls - + Cost + Cache savings + + Avg TTFC - + Avg tokens/sec - Calls trend - Tokens trend + Calls trend + Tokens trend @@ -1294,6 +1324,13 @@ function YourModelsTab({ const provider = catalogItem?.provider ?? u.genAiSystem; const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`; const select = catalogItem ? () => onSelectModel(catalogItem) : undefined; + // Savings = cached reads valued at the normal input rate minus what + // they actually cost. Needs the model's input price from the catalog. + const inputPrice = catalogItem?.inputPrice ?? null; + const cacheSavings = + inputPrice != null && u.cachedReadTokens > 0 + ? Math.max(0, u.cachedReadTokens * inputPrice - u.cachedReadCost) + : null; return ( {formatModelCost(u.totalCost)} + + {cacheSavings != null ? formatModelCost(cacheSavings) : "—"} + {u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"} From adf068416bacb10923a88a25fce2b568c186ed9e Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Tue, 16 Jun 2026 10:02:30 +0100 Subject: [PATCH 6/7] fix(webapp): harden cache metric and sparkline queries The cache hit-rate and savings queries divided by zero for models with no cached tokens, surfacing NaN or empty widgets; they now return 0 via ifNull/nullIf. Model usage sparklines bucketed on a timezone-dependent DateTime string, which could misalign bars with the charts above them; they now key on toUnixTimestamp so buckets line up regardless of the ClickHouse server timezone. --- .../presenters/v3/BuiltInDashboards.server.ts | 6 +++--- .../v3/ModelRegistryPresenter.server.ts | 19 ++++++++++--------- .../route.tsx | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts index 03561ee7e20..c8e74e30f6e 100644 --- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts +++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts @@ -496,7 +496,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-hit": { title: "Cache hit rate over time", query: - "SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", + "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "line", @@ -528,7 +528,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-savings": { title: "Cache savings over time", query: - "SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", + "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "bar", @@ -544,7 +544,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-by-model": { title: "Cache hit rate by model", query: - "SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", + "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", display: { type: "table", prettyFormatting: true, sorting: [] }, }, }, diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts index b8565b87011..364a5a58c00 100644 --- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts @@ -96,16 +96,17 @@ function sparklineBucketSeconds(rangeMs: number): number { /** * Generate the ordered bucket-start keys for [from, to] at the given interval, - * epoch-aligned in UTC to exactly match ClickHouse's - * `toStartOfInterval(col, INTERVAL n SECOND)` output strings ("YYYY-MM-DD HH:MM:SS"). + * as epoch seconds to match ClickHouse's + * `toUnixTimestamp(toStartOfInterval(col, INTERVAL n SECOND))` — timezone-independent + * (a raw DateTime string would depend on the ClickHouse server timezone). */ -function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): string[] { +function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): number[] { const intervalMs = intervalSeconds * 1000; const start = Math.floor(from.getTime() / intervalMs) * intervalMs; const end = Math.floor(to.getTime() / intervalMs) * intervalMs; - const keys: string[] = []; + const keys: number[] = []; for (let t = start; t <= end; t += intervalMs) { - keys.push(new Date(t).toISOString().slice(0, 19).replace("T", " ")); + keys.push(t / 1000); } return keys; } @@ -269,7 +270,7 @@ const ProjectModelUsageRow = z.object({ const ModelSparklineRow = z.object({ response_model: z.string(), - bucket: z.string(), + bucket: z.coerce.number(), val: z.coerce.number(), }); @@ -754,7 +755,7 @@ export class ModelRegistryPresenter extends BasePresenter { query: ` SELECT response_model, - toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND) AS bucket, + toUnixTimestamp(toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND)) AS bucket, ${valueExpr} AS val FROM trigger_dev.llm_metrics_v1 WHERE environment_id = {environmentId: String} @@ -797,9 +798,9 @@ export class ModelRegistryPresenter extends BasePresenter { #buildSparklineMap( queryResult: | [Error, null] - | [null, { response_model: string; bucket: string; val: number }[]], + | [null, { response_model: string; bucket: number; val: number }[]], keys: string[], - bucketKeys: string[] + bucketKeys: number[] ): Record { const [error, rows] = queryResult; if (error || !rows) return {}; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index a3a60b88e37..aa6155b1a65 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1171,7 +1171,7 @@ function DetailYourUsageTab({ Date: Tue, 16 Jun 2026 10:45:49 +0100 Subject: [PATCH 7/7] fix(webapp,llm-model-catalog): stop double-counting cached input tokens input_tokens is the total prompt count, inclusive of cache-read and cache-creation tokens. The cost pipeline charged the full input count at the input price and then added a separate cache line, so cached tokens were billed twice (e.g. ~2.4x on OpenAI), and the cache hit-rate metric divided cached reads by input + cached, understating the rate. Charge the input price only on the fresh (non-cached) remainder, resolve cache prices across provider alias keys (falling back to input price so cache tokens are never free), and compute the hit rate as cached / input. --- .../llm-cost-cached-token-double-charge.md | 6 + .../presenters/v3/BuiltInDashboards.server.ts | 6 +- .../route.tsx | 2 +- .../llm-model-catalog/src/registry.test.ts | 105 +++++++++++++++++- .../llm-model-catalog/src/registry.ts | 62 +++++++++++ 5 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 .server-changes/llm-cost-cached-token-double-charge.md diff --git a/.server-changes/llm-cost-cached-token-double-charge.md b/.server-changes/llm-cost-cached-token-double-charge.md new file mode 100644 index 00000000000..c34b52de7a4 --- /dev/null +++ b/.server-changes/llm-cost-cached-token-double-charge.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +LLM cost no longer double-counts cached input tokens. Prompt-cache reads and writes are now billed once at their cache rate instead of also being charged at the full input price, so cost and cache hit-rate figures on the AI metrics dashboard and Models page are accurate. diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts index c8e74e30f6e..4f62fc00b86 100644 --- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts +++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts @@ -496,7 +496,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-hit": { title: "Cache hit rate over time", query: - "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", + "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "line", @@ -528,7 +528,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-savings": { title: "Cache savings over time", query: - "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", + "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens) - sum(cached_read_tokens) - sum(cache_creation_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "bar", @@ -544,7 +544,7 @@ const llmDashboard: BuiltInDashboard = { "llm-cache-by-model": { title: "Cache hit rate by model", query: - "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", + "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", display: { type: "table", prettyFormatting: true, sorting: [] }, }, }, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index aa6155b1a65..a412311badc 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1171,7 +1171,7 @@ function DetailYourUsageTab({ { let registry: TestableRegistry; beforeEach(() => { registry = new TestableRegistry(null as any); - registry.loadPatterns([gpt4o, claudeSonnet]); + registry.loadPatterns([gpt4o, claudeSonnet, claudeWithCache, noCachePrice]); }); describe("match", () => { @@ -129,7 +176,10 @@ describe("ModelPricingRegistry", () => { expect(result!.totalCost).toBeCloseTo(0.0035); }); - it("should include cached token costs", () => { + it("should include cached token costs and charge input only on the fresh portion", () => { + // input_tokens (500) is inclusive of the 200 cached read tokens, so the input price + // applies to the 300 fresh tokens and the cache price to the 200 cached tokens — the + // cached tokens must not be billed twice. const result = registry.calculateCost("gpt-4o", { input: 500, output: 50, @@ -137,10 +187,57 @@ describe("ModelPricingRegistry", () => { }); expect(result).not.toBeNull(); - expect(result!.costDetails["input"]).toBeCloseTo(0.00125); // 500 * 0.0000025 + expect(result!.costDetails["input"]).toBeCloseTo(0.00075); // (500 - 200) * 0.0000025 expect(result!.costDetails["output"]).toBeCloseTo(0.0005); // 50 * 0.00001 expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00025); // 200 * 0.00000125 - expect(result!.totalCost).toBeCloseTo(0.002); + expect(result!.totalCost).toBeCloseTo(0.0015); + }); + + it("should not double-charge cache creation tokens (subset of input)", () => { + // input (1000) is inclusive of both the 400 cache-read and 300 cache-creation tokens. + const result = registry.calculateCost("claude-with-cache", { + input: 1000, + output: 100, + input_cached_tokens: 400, + cache_creation_input_tokens: 300, + }); + + expect(result).not.toBeNull(); + // fresh input = 1000 - 400 - 300 = 300 + expect(result!.costDetails["input"]).toBeCloseTo(0.0009); // 300 * 0.000003 + expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00012); // 400 * 0.0000003 + expect(result!.costDetails["cache_creation_input_tokens"]).toBeCloseTo(0.001125); // 300 * 0.00000375 + expect(result!.costDetails["output"]).toBeCloseTo(0.0015); // 100 * 0.000015 + // 0.0009 + 0.00012 + 0.001125 + 0.0015 + expect(result!.totalCost).toBeCloseTo(0.003645); + }); + + it("should apply the cache-read discount when priced under a provider alias key", () => { + // The usage is normalized to `input_cached_tokens` but this model prices cache reads + // under `cache_read_input_tokens` — the discount must still apply. + const result = registry.calculateCost("claude-with-cache", { + input: 1000, + input_cached_tokens: 400, + }); + + expect(result).not.toBeNull(); + expect(result!.costDetails["input"]).toBeCloseTo(0.0018); // (1000 - 400) * 0.000003 + expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00012); // 400 * 0.0000003 + expect(result!.totalCost).toBeCloseTo(0.00192); + }); + + it("should fall back to the input price for cache tokens when no cache price exists", () => { + // no-cache-price model has only input/output prices; cached tokens must still be billed + // (at the input price) — never free, never double-charged. Total equals input * price. + const result = registry.calculateCost("no-cache-price", { + input: 1000, + input_cached_tokens: 400, + }); + + expect(result).not.toBeNull(); + expect(result!.costDetails["input"]).toBeCloseTo(0.0018); // (1000 - 400) * 0.000003 + expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.0012); // 400 * 0.000003 + expect(result!.totalCost).toBeCloseTo(0.003); // 1000 * 0.000003 — unchanged from no-cache behavior }); it("should return null for unknown model", () => { diff --git a/internal-packages/llm-model-catalog/src/registry.ts b/internal-packages/llm-model-catalog/src/registry.ts index 80da40ba980..841234561cf 100644 --- a/internal-packages/llm-model-catalog/src/registry.ts +++ b/internal-packages/llm-model-catalog/src/registry.ts @@ -147,7 +147,69 @@ export class ModelPricingRegistry { const costDetails: Record = {}; let totalCost = 0; + // `input_tokens` (the "input" usage value) is the TOTAL prompt token count and is + // inclusive of cache-read and cache-creation tokens — providers report it that way and + // the AI SDK passes it through (verified: total_tokens == input + output, never the + // sum of the decomposed parts). Cache reads/writes are therefore a SUBSET of input, not + // additional to it. Charging the full input count at the input price AND charging a + // separate cache line double-counts those tokens, so the input price must apply only to + // the fresh (non-cached) remainder. + const priceByType = new Map(tier.prices.map((p) => [p.usageType, p.price])); + const resolvePrice = (aliases: string[]): number | undefined => { + for (const alias of aliases) { + const price = priceByType.get(alias); + if (price !== undefined) return price; + } + return undefined; + }; + + const inputPrice = resolvePrice(["input", "input_tokens"]) ?? 0; + const cacheReadTokens = usageDetails["input_cached_tokens"] ?? 0; + const cacheCreationTokens = usageDetails["cache_creation_input_tokens"] ?? 0; + + // Providers price cache reads/writes under provider-specific keys, but our usage details + // normalize them to `input_cached_tokens` / `cache_creation_input_tokens`. Resolve the + // matching price across the known aliases, falling back to the input price so cache tokens + // are never billed for free and never dropped when a model lacks a dedicated cache price. + const cacheReadPrice = + resolvePrice(["input_cached_tokens", "input_cache_read", "cache_read_input_tokens"]) ?? + inputPrice; + const cacheCreationPrice = + resolvePrice([ + "cache_creation_input_tokens", + "input_cache_creation", + "input_cache_creation_5m", + ]) ?? inputPrice; + + const totalInputTokens = usageDetails["input"] ?? usageDetails["input_tokens"] ?? 0; + const freshInputTokens = Math.max(0, totalInputTokens - cacheReadTokens - cacheCreationTokens); + + const addCost = (usageType: string, tokenCount: number, price: number) => { + if (tokenCount <= 0 || price <= 0) return; + const cost = tokenCount * price; + costDetails[usageType] = (costDetails[usageType] ?? 0) + cost; + totalCost += cost; + }; + + addCost("input", freshInputTokens, inputPrice); + addCost("input_cached_tokens", cacheReadTokens, cacheReadPrice); + addCost("cache_creation_input_tokens", cacheCreationTokens, cacheCreationPrice); + + // Charge every remaining usage type generically. The input + cache types are handled + // above (and their alias keys skipped here) so they are never charged twice. + const handledUsageTypes = new Set([ + "input", + "input_tokens", + "input_cached_tokens", + "input_cache_read", + "cache_read_input_tokens", + "cache_creation_input_tokens", + "input_cache_creation", + "input_cache_creation_5m", + "input_cache_creation_1h", + ]); for (const priceEntry of tier.prices) { + if (handledUsageTypes.has(priceEntry.usageType)) continue; const tokenCount = usageDetails[priceEntry.usageType] ?? 0; if (tokenCount === 0) continue; const cost = tokenCount * priceEntry.price;