From 5e36725c14a2fef39d0e41e182d37dd89766c31d Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 15 Jun 2026 17:47:39 +0100
Subject: [PATCH 1/7] feat(webapp): split Models into Your models and Model
 library tabs

---
 .server-changes/models-page-usage-tabs.md     |   6 +
 .../app/assets/icons/AiProviderIcons.tsx      |  34 +-
 .../components/primitives/UsageSparkline.tsx  | 115 +++++
 .../v3/ModelRegistryPresenter.server.ts       | 238 +++++++++-
 .../route.tsx                                 | 413 ++++++++++++++++--
 5 files changed, 754 insertions(+), 52 deletions(-)
 create mode 100644 .server-changes/models-page-usage-tabs.md
 create mode 100644 apps/webapp/app/components/primitives/UsageSparkline.tsx
diff --git a/.server-changes/models-page-usage-tabs.md b/.server-changes/models-page-usage-tabs.md
new file mode 100644
index 00000000000..da2f4f2fda8
--- /dev/null
+++ b/.server-changes/models-page-usage-tabs.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: feature
+---
+
+The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, and trend sparklines over a selectable time range) alongside the full model library, which is ordered by provider relevance and release date.
diff --git a/apps/webapp/app/assets/icons/AiProviderIcons.tsx b/apps/webapp/app/assets/icons/AiProviderIcons.tsx
index 85a01b98d63..2be3fe38ed7 100644
--- a/apps/webapp/app/assets/icons/AiProviderIcons.tsx
+++ b/apps/webapp/app/assets/icons/AiProviderIcons.tsx
@@ -46,8 +46,8 @@ export function LlamaIcon({ className }: IconProps) {
       xmlns="http://www.w3.org/2000/svg"
     >
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M3.4485 2C4.406 2 5.2065 2.466 6.1635 3.688L6.3045 3.5015C6.3995 3.3785 6.496 3.2595 6.5945 3.1465L6.751 2.9715C7.294 2.394 7.896 2 8.6125 2C9.249 2 9.847 2.2785 10.358 2.758L10.467 2.8645C11.332 3.747 11.9255 5.2195 11.9935 6.8775L11.999 7.0735L12 7.1985C12 7.949 11.86 8.578 11.591 9.0485L11.521 9.1635L11.467 9.24C11.3165 9.45 11.135 9.619 10.924 9.7445L10.7915 9.8155L10.748 9.8355C10.6986 9.85749 10.6482 9.87718 10.597 9.8945C10.3825 9.96542 10.1579 10.0006 9.932 9.9985C9.67 9.9985 9.434 9.965 9.213 9.891C8.906 9.789 8.6315 9.611 8.35 9.333L8.2365 9.2155C7.86 8.8095 7.4695 8.2275 6.99 7.4225L6.275 6.2175L6.003 5.77L5.12 7.335L4.9485 7.631C3.7985 9.578 3.1135 10 2.178 10C1.573 10 1.0755 9.79 0.71 9.409L0.626 9.317C0.384 9.0305 0.2075 8.6615 0.1045 8.2225L0.071 8.0625C0.0323456 7.84982 0.00961585 7.63456 0.003 7.4185L0 7.234C0.001 6.8615 0.03 6.489 0.087 6.119L0.137 5.8325C0.286 5.0675 0.551 4.3535 0.905 3.754L1.0095 3.584C1.598 2.669 2.404 2.0575 3.317 2.004L3.4485 2ZM3.432 3.3075L3.3315 3.3125C2.9165 3.354 2.5285 3.649 2.2055 4.101L2.1365 4.2005L2.1315 4.2095C1.7965 4.718 1.539 5.3985 1.4035 6.132L1.4015 6.143C1.33323 6.5148 1.29859 6.89199 1.298 7.27L1.299 7.364C1.301 7.454 1.3075 7.544 1.319 7.634L1.3405 7.7795C1.3865 8.031 1.469 8.2335 1.5835 8.3835L1.642 8.452C1.7935 8.6135 1.991 8.698 2.227 8.698C2.777 8.698 3.125 8.36 4.075 6.8775L5.1625 5.1775L5.3895 4.827L5.32 4.728C4.555 3.65 4.042 3.3075 3.432 3.3075ZM8.53 3.0315L8.442 3.035C8.1245 3.059 7.8305 3.2145 7.532 3.5015L7.434 3.6005C7.2145 3.8315 6.9905 4.1325 6.7505 4.504L6.8835 4.703C6.9735 4.84 7.0645 4.983 7.1585 5.132L7.305 5.3695L8.003 6.537L8.3505 7.094C8.642 7.557 8.8655 7.894 9.0545 8.135L9.161 8.266C9.302 8.429 9.4255 8.536 9.5495 8.6025L9.6005 8.6275C9.714 8.6775 9.829 8.6965 9.9595 8.6965C10.0475 8.6975 10.1345 8.685 10.2185 8.66C10.3875 8.608 10.5235 8.5 10.625 8.3415L10.6725 8.26L10.711 8.179C10.808 7.9495 10.856 7.649 10.856 7.2865L10.853 7.062C10.813 5.6265 10.384 4.376 9.753 3.663L9.665 3.5685C9.33 3.227 8.943 3.0315 8.53 3.0315Z"
         fill="currentColor"
       />
@@ -58,10 +58,10 @@ export function LlamaIcon({ className }: IconProps) {
 export function DeepseekIcon({ className }: IconProps) {
   return (
     <svg className={className} viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
-      <g clip-path="url(#clip0_20374_57805)">
+      <g clipPath="url(#clip0_20374_57805)">
         <path
-          fill-rule="evenodd"
-          clip-rule="evenodd"
+          fillRule="evenodd"
+          clipRule="evenodd"
           d="M23.7479 4.48176C23.4939 4.35776 23.384 4.59476 23.236 4.71576C23.185 4.75476 23.142 4.80576 23.099 4.85176C22.727 5.24876 22.293 5.50876 21.726 5.47776C20.897 5.43176 20.189 5.69176 19.563 6.32576C19.43 5.54376 18.988 5.07776 18.316 4.77776C17.964 4.62176 17.608 4.46676 17.361 4.12776C17.189 3.88676 17.142 3.61776 17.056 3.35376C17.001 3.19376 16.946 3.03076 16.763 3.00376C16.563 2.97276 16.4849 3.13976 16.4069 3.27976C16.094 3.85176 15.9729 4.48176 15.9849 5.11976C16.0119 6.55576 16.618 7.69976 17.823 8.51276C17.96 8.60576 17.995 8.69976 17.952 8.83576C17.87 9.11576 17.772 9.38776 17.686 9.66876C17.631 9.84776 17.549 9.88576 17.357 9.80876C16.7082 9.52995 16.1189 9.12939 15.6209 8.62876C14.7639 7.80076 13.9899 6.88676 13.0239 6.17076C12.8001 6.00537 12.5703 5.84827 12.3349 5.69976C11.3499 4.74276 12.4649 3.95676 12.7229 3.86376C12.9929 3.76576 12.8159 3.43176 11.9439 3.43576C11.0719 3.43976 10.2739 3.73076 9.25695 4.11976C9.10582 4.17767 8.95033 4.22348 8.79195 4.25676C7.84158 4.07769 6.8696 4.0433 5.90895 4.15476C4.02395 4.36476 2.51895 5.25676 1.41195 6.77776C0.0819496 8.60576 -0.23105 10.6838 0.15195 12.8498C0.55495 15.1338 1.72095 17.0248 3.51195 18.5028C5.36995 20.0358 7.50895 20.7868 9.94995 20.6428C11.4319 20.5578 13.0829 20.3588 14.9439 18.7828C15.4139 19.0168 15.906 19.1098 16.724 19.1798C17.354 19.2388 17.96 19.1498 18.429 19.0518C19.164 18.8958 19.1129 18.2148 18.8479 18.0908C16.693 17.0868 17.166 17.4958 16.735 17.1648C17.831 15.8688 19.481 14.5228 20.127 10.1618C20.177 9.81476 20.134 9.59676 20.127 9.31676C20.123 9.14676 20.162 9.07976 20.357 9.06076C20.898 9.00463 21.4228 8.84327 21.902 8.58576C23.298 7.82276 23.862 6.57076 23.995 5.06876C24.015 4.83876 23.9909 4.60276 23.7479 4.48176ZM11.5809 17.9998C9.49195 16.3578 8.47895 15.8168 8.06095 15.8398C7.66895 15.8638 7.73995 16.3108 7.82595 16.6028C7.91595 16.8908 8.03295 17.0888 8.19695 17.3418C8.31095 17.5088 8.38895 17.7578 8.08395 17.9448C7.41095 18.3608 6.24195 17.8048 6.18695 17.7778C4.82595 16.9758 3.68695 15.9178 2.88595 14.4708C2.11195 13.0778 1.66195 11.5838 1.58795 9.98876C1.56795 9.60276 1.68095 9.46676 2.06495 9.39676C2.56906 9.30029 3.08558 9.28711 3.59395 9.35776C5.72595 9.66976 7.53995 10.6228 9.06195 12.1318C9.92995 12.9918 10.5869 14.0188 11.2639 15.0228C11.9839 16.0888 12.7579 17.1048 13.7439 17.9368C14.0919 18.2288 14.3689 18.4508 14.6349 18.6138C13.8329 18.7038 12.4949 18.7238 11.5809 17.9998ZM12.5809 11.5598C12.5808 11.5101 12.5927 11.4611 12.6157 11.4171C12.6387 11.373 12.672 11.3353 12.7129 11.307C12.7538 11.2787 12.8009 11.2609 12.8502 11.2549C12.8995 11.2489 12.9495 11.2551 12.9959 11.2728C13.0551 11.294 13.1062 11.3331 13.142 11.3848C13.1779 11.4364 13.1967 11.4979 13.1959 11.5608C13.1961 11.6014 13.1881 11.6416 13.1726 11.6791C13.157 11.7166 13.1341 11.7506 13.1053 11.7792C13.0764 11.8078 13.0422 11.8303 13.0045 11.8455C12.9669 11.8607 12.9266 11.8683 12.8859 11.8678C12.8457 11.8679 12.8057 11.86 12.7685 11.8445C12.7313 11.829 12.6976 11.8063 12.6693 11.7776C12.641 11.7489 12.6186 11.7149 12.6037 11.6775C12.5887 11.6401 12.5803 11.6 12.5809 11.5598ZM15.6909 13.1558C15.4909 13.2368 15.2919 13.3068 15.1009 13.3158C14.8136 13.3258 14.5316 13.236 14.3029 13.0618C14.0289 12.8318 13.8329 12.7038 13.7509 12.3038C13.7227 12.1083 13.7281 11.9094 13.7669 11.7158C13.8369 11.3888 13.7589 11.1788 13.5279 10.9888C13.3409 10.8328 13.1019 10.7898 12.8399 10.7898C12.7502 10.7845 12.6631 10.7578 12.5859 10.7118C12.4759 10.6578 12.3859 10.5218 12.4719 10.3538C12.4999 10.2998 12.6319 10.1678 12.6639 10.1438C13.0199 9.94176 13.4309 10.0078 13.8099 10.1598C14.1619 10.3038 14.4279 10.5678 14.8109 10.9418C15.2019 11.3928 15.2729 11.5178 15.4959 11.8558C15.6719 12.1208 15.8319 12.3928 15.9409 12.7038C16.0079 12.8988 15.9219 13.0578 15.6909 13.1558Z"
           fill="currentColor"
         />
@@ -99,8 +99,8 @@ export function PerplexityIcon({ className }: IconProps) {
   return (
     <svg className={className} viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M18.4875 2V8.06H20.75V16.6833H18.3042V22L12.44 16.8383V21.9592H11.5308V16.8325L5.66 22V16.6125H3.25V7.99H5.65333V2L11.5308 7.41167V2.15833H12.4392V7.56667L18.4875 2ZM12.44 9.53667V15.6358L17.395 19.9975V14.0333L12.44 9.53667ZM11.5242 9.47L6.56917 13.9683V19.9975L11.5242 15.6358V9.47083V9.47ZM18.3042 15.7867H19.8408V8.9575H13.2167L18.3042 13.5742V15.7867ZM10.8192 8.88667H4.15833V15.7158H5.65833V13.5692L10.8183 8.88583L10.8192 8.88667ZM6.5625 4.06333V7.98833H10.825L6.5625 4.06333ZM17.5783 4.06333L13.3158 7.98833H17.5783V4.06333Z"
         fill="currentColor"
       />
@@ -112,32 +112,32 @@ export function CerebrasIcon({ className }: IconProps) {
   return (
     <svg className={className} viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M11.6535 20.6834C10.4382 20.6834 9.28717 20.4401 8.23625 20.0036C6.66345 19.3453 5.31944 18.2433 4.36862 16.8551C3.4178 15.4669 2.86732 13.7997 2.86732 11.9964C2.86732 10.7943 3.11039 9.65655 3.56078 8.61182C4.22564 7.0519 5.3409 5.72809 6.7421 4.7907C8.1433 3.85331 9.83047 3.30948 11.6535 3.30948V2C10.2594 2 8.92972 2.27907 7.71437 2.78712C5.8985 3.54562 4.35432 4.81217 3.26767 6.40788C2.17386 8.00356 1.5376 9.92844 1.5376 11.9964C1.5376 13.3774 1.82356 14.6941 2.33114 15.8891C3.09609 17.6851 4.38291 19.2093 5.99144 20.2898C7.60713 21.3703 9.55167 22 11.6463 22V20.6834H11.6535Z"
         fill="currentColor"
       />
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M7.14949 17.3272C6.32735 16.6403 5.71253 15.8102 5.2979 14.9014C4.88323 13.9927 4.66877 13.0124 4.66877 12.0249C4.66877 11.2378 4.8046 10.4506 5.06911 9.69931C5.34079 8.94794 5.74113 8.23241 6.29159 7.58122C6.9779 6.7655 7.81433 6.15012 8.72225 5.73508C9.63021 5.32005 10.6239 5.11254 11.6105 5.11254C12.3969 5.11254 13.1904 5.24849 13.9411 5.51325C14.6989 5.78516 15.4138 6.18588 16.0643 6.7297L16.9151 5.72077C16.143 5.07676 15.2851 4.59018 14.3843 4.27533C13.4835 3.95332 12.547 3.7959 11.6105 3.7959C10.4309 3.7959 9.25846 4.04634 8.17178 4.54009C7.08514 5.03382 6.09141 5.77087 5.27643 6.73687C4.62587 7.50967 4.14689 8.36119 3.82518 9.25563C3.50347 10.1501 3.34619 11.0875 3.34619 12.0249C3.34619 13.1984 3.59641 14.3719 4.08969 15.4524C4.58298 16.5329 5.32649 17.5276 6.29876 18.3362L7.14949 17.3272Z"
         fill="currentColor"
       />
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M9.18714 16.4758C8.32211 16.0179 7.64298 15.3524 7.17829 14.5724C6.7136 13.7925 6.47052 12.8909 6.47052 11.9821C6.47052 11.1807 6.65641 10.3721 7.06388 9.62074C7.52144 8.75492 8.19345 8.08228 8.97983 7.6243C9.76624 7.1592 10.667 6.9159 11.5821 6.9159C12.3828 6.9159 13.1978 7.10194 13.9556 7.50269L14.5704 6.33631C13.6196 5.83541 12.5901 5.59212 11.5749 5.59927C10.424 5.59927 9.28725 5.90697 8.30069 6.48655C7.31412 7.06618 6.46339 7.92487 5.89146 9.00535C5.39101 9.95706 5.14795 10.9803 5.14795 11.9821C5.14795 13.127 5.45536 14.2576 6.04159 15.2379C6.62782 16.2254 7.48568 17.0626 8.57236 17.635L9.18714 16.4758Z"
         fill="currentColor"
       />
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M11.6608 15.2165C11.2104 15.2165 10.7815 15.1235 10.3955 14.9589C9.80924 14.7156 9.31596 14.3005 8.96564 13.7782C8.61536 13.2558 8.40804 12.6333 8.40804 11.9606C8.40804 11.5098 8.50095 11.0805 8.66537 10.6941C8.90845 10.1145 9.32309 9.61359 9.84496 9.26297C10.3669 8.91235 10.9888 8.70484 11.6608 8.70484V7.38818C11.0317 7.38818 10.4312 7.517 9.88072 7.74597C9.05858 8.09659 8.36511 8.66905 7.87183 9.39892C7.37142 10.136 7.08545 11.0233 7.08545 11.9678C7.08545 12.5975 7.21412 13.1986 7.4429 13.7496C7.79322 14.5725 8.37228 15.2666 9.10147 15.7603C9.83067 16.2469 10.71 16.5331 11.6608 16.5331V15.2165Z"
         fill="currentColor"
       />
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M12.7332 10.9234C12.5831 10.766 12.4187 10.6372 12.2542 10.5442C12.0898 10.4511 11.9183 10.401 11.7395 10.401C11.4965 10.401 11.2891 10.444 11.0961 10.5299C10.9102 10.6157 10.7458 10.7302 10.61 10.8805C10.4741 11.0236 10.374 11.1953 10.3026 11.3814C10.2311 11.5674 10.2025 11.7678 10.2025 11.9681C10.2025 12.1685 10.2382 12.3689 10.3026 12.5549C10.374 12.7409 10.4741 12.9127 10.61 13.0558C10.7458 13.1989 10.9031 13.3206 11.0961 13.4064C11.282 13.4923 11.4965 13.5352 11.7395 13.5352C11.9397 13.5352 12.1327 13.4923 12.3043 13.4136C12.4759 13.3277 12.626 13.2061 12.7475 13.0486L13.6197 13.986C13.491 14.1148 13.3409 14.2293 13.1693 14.3223C12.9978 14.4154 12.8262 14.4941 12.6546 14.5513C12.483 14.6086 12.3114 14.6515 12.1542 14.673C11.9969 14.7016 11.8539 14.7087 11.7395 14.7087C11.3463 14.7087 10.9746 14.6443 10.6314 14.5155C10.2811 14.3868 9.98084 14.2007 9.73064 13.9574C9.47326 13.7213 9.27311 13.4279 9.12298 13.0916C8.97285 12.7553 8.90137 12.376 8.90137 11.9681C8.90137 11.5531 8.97285 11.181 9.12298 10.8447C9.27311 10.5084 9.47326 10.2222 9.73064 9.97887C9.98801 9.74274 10.2883 9.55669 10.6314 9.42075C10.9817 9.29193 11.3535 9.22754 11.7395 9.22754C12.0755 9.22754 12.4115 9.29193 12.7475 9.42075C13.0835 9.54953 13.3838 9.7499 13.634 10.0218L12.7332 10.9234Z"
         fill="currentColor"
       />
diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx
new file mode 100644
index 00000000000..553dc4fc641
--- /dev/null
+++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx
@@ -0,0 +1,115 @@
+import {
+  Bar,
+  BarChart,
+  ReferenceLine,
+  ResponsiveContainer,
+  Tooltip,
+  YAxis,
+  type TooltipProps,
+} from "recharts";
+import { cn } from "~/utils/cn";
+import { formatDateTime } from "./DateTime";
+import { Header3 } from "./Headers";
+import TooltipPortal from "./TooltipPortal";
+
+type UsageDatum = { date: Date; count: number };
+
+type UnitLabel = { singular: string; plural: string };
+
+export type UsageSparklineProps = {
+  /** Trailing 24 hourly buckets; the last entry is the most recent hour. */
+  data?: number[];
+  /** Bar colour. Defaults to blue. */
+  color?: string;
+  /** Unit shown in the tooltip (e.g. calls, tokens). */
+  unitLabel?: UnitLabel;
+  /** Format the trailing total. Defaults to `toLocaleString`. */
+  formatTotal?: (total: number) => string;
+  /** Class for the trailing total label. */
+  totalClassName?: string;
+};
+
+/**
+ * Inline 24h sparkline for list rows. Renders a small bar chart plus a trailing
+ * total, or an em-dash when there's no data. Shared by the prompts and models
+ * lists — keep it presentational (the caller supplies the zero-filled buckets).
+ */
+export function UsageSparkline({
+  data,
+  color = "#3B82F6",
+  unitLabel = { singular: "call", plural: "calls" },
+  formatTotal,
+  totalClassName = "text-blue-400",
+}: UsageSparklineProps) {
+  if (!data || data.every((v) => v === 0)) {
+    return <span className="text-text-dimmed">–</span>;
+  }
+
+  const total = data.reduce((a, b) => a + b, 0);
+  const max = Math.max(...data);
+
+  // Map the 24-bucket array to dated points so the tooltip can show the
+  // hour each bar represents. Bucket i is `23 - i` hours before now.
+  const now = new Date();
+  const chartData: UsageDatum[] = data.map((count, i) => ({
+    date: new Date(now.getTime() - (data.length - 1 - i) * 3600_000),
+    count,
+  }));
+
+  return (
+    <div className="flex items-start gap-2">
+      <div className="h-6 w-[7rem] rounded-sm">
+        <ResponsiveContainer width="100%" height="100%">
+          <BarChart data={chartData} margin={{ top: 0, right: 0, left: 0, bottom: 0 }}>
+            <YAxis domain={[0, max || 1]} hide />
+            <Tooltip
+              cursor={{ fill: "rgba(255, 255, 255, 0.06)" }}
+              content={<UsageSparklineTooltip unitLabel={unitLabel} />}
+              allowEscapeViewBox={{ x: true, y: true }}
+              wrapperStyle={{ zIndex: 1000 }}
+              animationDuration={0}
+            />
+            <Bar
+              dataKey="count"
+              fill={color}
+              strokeWidth={0}
+              isAnimationActive={false}
+              minPointSize={1}
+            />
+            <ReferenceLine y={0} stroke="#2C3034" strokeWidth={1} />
+            {max > 0 && (
+              <ReferenceLine y={max} stroke="#4D525B" strokeDasharray="4 4" strokeWidth={1} />
+            )}
+          </BarChart>
+        </ResponsiveContainer>
+      </div>
+      <span className={cn("-mt-1 text-xs tabular-nums", totalClassName)}>
+        {formatTotal ? formatTotal(total) : total.toLocaleString()}
+      </span>
+    </div>
+  );
+}
+
+function UsageSparklineTooltip({
+  active,
+  payload,
+  unitLabel,
+}: TooltipProps<number, string> & { unitLabel: UnitLabel }) {
+  if (!active || !payload || payload.length === 0) return null;
+  const entry = payload[0].payload as UsageDatum;
+  const date = entry.date instanceof Date ? entry.date : new Date(entry.date);
+  const formattedDate = formatDateTime(date, "UTC", [], false, true);
+  return (
+    <TooltipPortal active={active}>
+      <div className="rounded-sm border border-grid-bright bg-background-dimmed px-3 py-2">
+        <Header3 className="border-b border-b-charcoal-650 pb-2">{formattedDate}</Header3>
+        <div className="mt-2 text-xs text-text-bright">
+          <span className="tabular-nums">{entry.count.toLocaleString()}</span>{" "}
+          <span className="text-text-dimmed">
+            {entry.count === 1 ? unitLabel.singular : unitLabel.plural}
+          </span>
+        </div>
+      </div>
+    </TooltipPortal>
+  );
+}
diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
index 16a0aa75046..fddb92d897d 100644
--- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
@@ -52,6 +52,64 @@ export function formatModelId(provider: string, modelName: string): string {
   return `${provider}:${modelName}`;
 }
 
+/**
+ * Hardcoded provider display priority (most relevant first). Providers not in
+ * this list fall back to alphabetical order after the listed ones. Within a
+ * provider, models are always sorted by release date (newest first).
+ */
+const PROVIDER_IMPORTANCE = [
+  "anthropic",
+  "openai",
+  "google",
+  "xai",
+  "meta",
+  "mistral",
+  "deepseek",
+];
+
+function providerRank(provider: string): number {
+  const index = PROVIDER_IMPORTANCE.indexOf(provider);
+  return index === -1 ? PROVIDER_IMPORTANCE.length : index;
+}
+
+/**
+ * Pick a sparkline bucket size (in seconds) for a given range so the rendered
+ * sparkline stays a readable ~24-52 bars. Tuned for the small inline charts in
+ * the "Your models" list — coarser than the full-size dashboard charts.
+ */
+function sparklineBucketSeconds(rangeMs: number): number {
+  const MIN = 60;
+  const HOUR = 3600;
+  const DAY = 86400;
+  const ms = (s: number) => s * 1000;
+  if (rangeMs <= ms(HOUR)) return 2 * MIN;
+  if (rangeMs <= ms(3 * HOUR)) return 5 * MIN;
+  if (rangeMs <= ms(6 * HOUR)) return 15 * MIN;
+  if (rangeMs <= ms(DAY)) return HOUR;
+  if (rangeMs <= ms(3 * DAY)) return 2 * HOUR;
+  if (rangeMs <= ms(7 * DAY)) return 6 * HOUR;
+  if (rangeMs <= ms(14 * DAY)) return 12 * HOUR;
+  if (rangeMs <= ms(30 * DAY)) return DAY;
+  if (rangeMs <= ms(90 * DAY)) return 3 * DAY;
+  return 7 * DAY;
+}
+
+/**
+ * Generate the ordered bucket-start keys for [from, to] at the given interval,
+ * epoch-aligned in UTC to exactly match ClickHouse's
+ * `toStartOfInterval(col, INTERVAL n SECOND)` output strings ("YYYY-MM-DD HH:MM:SS").
+ */
+function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): string[] {
+  const intervalMs = intervalSeconds * 1000;
+  const start = Math.floor(from.getTime() / intervalMs) * intervalMs;
+  const end = Math.floor(to.getTime() / intervalMs) * intervalMs;
+  const keys: string[] = [];
+  for (let t = start; t <= end; t += intervalMs) {
+    keys.push(new Date(t).toISOString().slice(0, 19).replace("T", " "));
+  }
+  return keys;
+}
+
 // --- Types ---
 
 export type ModelCatalogItem = {
@@ -162,6 +220,17 @@ export type PopularModel = {
   ttfcP50: number;
 };
 
+/** A model with usage in a specific project/environment (the "Your models" list). */
+export type ProjectModelUsageItem = {
+  responseModel: string;
+  genAiSystem: string;
+  calls: number;
+  totalCost: number;
+  totalTokens: number;
+  avgTtfc: number;
+  avgTps: number;
+};
+
 // --- ClickHouse schemas for user metrics ---
 
 const UserMetricsSummaryRow = z.object({
@@ -179,6 +248,22 @@ const UserTaskBreakdownRow = z.object({
   cost: z.coerce.number(),
 });
 
+const ProjectModelUsageRow = z.object({
+  response_model: z.string(),
+  gen_ai_system: z.string(),
+  calls: z.coerce.number(),
+  total_cost: z.coerce.number(),
+  total_tokens: z.coerce.number(),
+  avg_ttfc: z.coerce.number(),
+  avg_tps: z.coerce.number(),
+});
+
+const ModelSparklineRow = z.object({
+  response_model: z.string(),
+  bucket: z.string(),
+  val: z.coerce.number(),
+});
+
 // --- Presenter ---
 
 export class ModelRegistryPresenter extends BasePresenter {
@@ -296,7 +381,12 @@ export class ModelRegistryPresenter extends BasePresenter {
     }
 
     return Array.from(groups.entries())
-      .sort(([a], [b]) => a.localeCompare(b))
+      .sort(([a], [b]) => {
+        const rankA = providerRank(a);
+        const rankB = providerRank(b);
+        if (rankA !== rankB) return rankA - rankB;
+        return a.localeCompare(b);
+      })
       .map(([provider, models]) => ({
         provider,
         models: models.sort((a, b) => {
@@ -549,4 +639,150 @@ export class ModelRegistryPresenter extends BasePresenter {
       ttfcP50: r.ttfc_p50,
     }));
   }
+
+  /**
+   * Models that had usage in a specific project/environment over the window,
+   * with aggregate metrics. This is the tenant-scoped "Your models" list (as
+   * opposed to the cross-tenant getPopularModels).
+   */
+  async getProjectModelUsage(
+    projectId: string,
+    environmentId: string,
+    startTime: Date,
+    endTime: Date
+  ): Promise<ProjectModelUsageItem[]> {
+    const queryFn = this.clickhouse.reader.query({
+      name: "modelRegistryProjectUsage",
+      query: `
+        SELECT
+          response_model,
+          any(gen_ai_system) AS gen_ai_system,
+          count() AS calls,
+          sum(total_cost) AS total_cost,
+          sum(total_tokens) AS total_tokens,
+          round(avg(ms_to_first_chunk), 1) AS avg_ttfc,
+          round(avg(tokens_per_second), 1) AS avg_tps
+        FROM trigger_dev.llm_metrics_v1
+        WHERE project_id = {projectId: String}
+          AND environment_id = {environmentId: String}
+          AND start_time >= {startTime: String}
+          AND start_time <= {endTime: String}
+          AND response_model != ''
+        GROUP BY response_model
+        ORDER BY calls DESC
+        LIMIT 100
+      `,
+      params: z.object({
+        projectId: z.string(),
+        environmentId: z.string(),
+        startTime: z.string(),
+        endTime: z.string(),
+      }),
+      schema: ProjectModelUsageRow,
+    });
+
+    const [error, rows] = await queryFn({
+      projectId,
+      environmentId,
+      startTime: formatDateForCH(startTime),
+      endTime: formatDateForCH(endTime),
+    });
+
+    if (error || !rows) return [];
+
+    return rows.map((r) => ({
+      responseModel: r.response_model,
+      genAiSystem: r.gen_ai_system,
+      calls: r.calls,
+      totalCost: r.total_cost,
+      totalTokens: r.total_tokens,
+      avgTtfc: r.avg_ttfc,
+      avgTps: r.avg_tps,
+    }));
+  }
+
+  /**
+   * Call-count and total-token sparklines per response_model over [from, to],
+   * matching the window the "Your models" charts and table use. The bucket size
+   * adapts to the range (see sparklineBucketSeconds) so a sparkline stays a
+   * readable ~24-52 bars regardless of the selected period. Zero-filled.
+   */
+  async getModelUsageSparklines(
+    environmentId: string,
+    responseModels: string[],
+    from: Date,
+    to: Date
+  ): Promise<{ calls: Record<string, number[]>; tokens: Record<string, number[]> }> {
+    if (responseModels.length === 0) return { calls: {}, tokens: {} };
+
+    const intervalSeconds = sparklineBucketSeconds(to.getTime() - from.getTime());
+    const bucketKeys = sparklineBucketKeys(from, to, intervalSeconds);
+
+    // intervalSeconds is a server-derived integer from a fixed ladder, so it's
+    // safe to inline. Epoch-aligned SECOND buckets match the JS keys above.
+    const buildQuery = (valueExpr: string, name: string) =>
+      this.clickhouse.reader.query({
+        name,
+        query: `
+          SELECT
+            response_model,
+            toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND) AS bucket,
+            ${valueExpr} AS val
+          FROM trigger_dev.llm_metrics_v1
+          WHERE environment_id = {environmentId: String}
+            AND response_model IN {responseModels: Array(String)}
+            AND start_time >= {startTime: String}
+            AND start_time <= {endTime: String}
+          GROUP BY response_model, bucket
+          ORDER BY response_model, bucket
+        `,
+        params: z.object({
+          environmentId: z.string(),
+          responseModels: z.array(z.string()),
+          startTime: z.string(),
+          endTime: z.string(),
+        }),
+        schema: ModelSparklineRow,
+      });
+
+    const queryParams = {
+      environmentId,
+      responseModels,
+      startTime: formatDateForCH(from),
+      endTime: formatDateForCH(to),
+    };
+
+    const [callsResult, tokensResult] = await Promise.all([
+      buildQuery("count()", "modelCallSparklines")(queryParams),
+      buildQuery("sum(total_tokens)", "modelTokenSparklines")(queryParams),
+    ]);
+
+    return {
+      calls: this.#buildSparklineMap(callsResult, responseModels, bucketKeys),
+      tokens: this.#buildSparklineMap(tokensResult, responseModels, bucketKeys),
+    };
+  }
+
+  /** Convert a sparkline query result to a zero-filled bucket map. */
+  #buildSparklineMap(
+    queryResult:
+      | [Error, null]
+      | [null, { response_model: string; bucket: string; val: number }[]],
+    keys: string[],
+    bucketKeys: string[]
+  ): Record<string, number[]> {
+    const [error, rows] = queryResult;
+    if (error || !rows) return {};
+
+    const rowMap = new Map<string, number>();
+    for (const row of rows) {
+      rowMap.set(`${row.response_model}|${row.bucket}`, row.val);
+    }
+
+    const result: Record<string, number[]> = {};
+    for (const key of keys) {
+      result[key] = bucketKeys.map((b) => rowMap.get(`${key}|${b}`) ?? 0);
+    }
+    return result;
+  }
 }
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index 8785c9a2dc2..943d9ae221f 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -1,11 +1,17 @@
 import {
   AdjustmentsHorizontalIcon,
+  ArrowTopRightOnSquareIcon,
   CheckIcon,
   CubeIcon,
   XMarkIcon,
 } from "@heroicons/react/20/solid";
 import * as Ariakit from "@ariakit/react";
-import { Form, type MetaFunction, useFetcher } from "@remix-run/react";
+import {
+  Form,
+  type MetaFunction,
+  type ShouldRevalidateFunctionArgs,
+  useFetcher,
+} from "@remix-run/react";
 import { type LoaderFunctionArgs } from "@remix-run/server-runtime";
 import { AnimatePresence, motion } from "framer-motion";
 import { useEffect, useMemo, useRef, useState } from "react";
@@ -27,7 +33,7 @@ import { InlineCode } from "~/components/code/InlineCode";
 import { PageBody, PageContainer } from "~/components/layout/AppLayout";
 import { AppliedFilter } from "~/components/primitives/AppliedFilter";
 import { Badge } from "~/components/primitives/Badge";
-import { Button } from "~/components/primitives/Buttons";
+import { Button, LinkButton } from "~/components/primitives/Buttons";
 import { Callout } from "~/components/primitives/Callout";
 import { Checkbox } from "~/components/primitives/Checkbox";
 import { DateTime } from "~/components/primitives/DateTime";
@@ -61,7 +67,13 @@ import {
   TableRow,
 } from "~/components/primitives/Table";
 import { TabButton, TabContainer } from "~/components/primitives/Tabs";
-import { appliedSummary } from "~/components/runs/v3/SharedFilters";
+import {
+  appliedSummary,
+  TimeFilter,
+  type TimeFilterApplyValues,
+  timeFilterFromTo,
+} from "~/components/runs/v3/SharedFilters";
+import { parseFiniteInt } from "~/utils/searchParams";
 import { useSearchParams } from "~/hooks/useSearchParam";
 import { useShortcutKeys } from "~/hooks/useShortcutKeys";
 import { useOptimisticLocation } from "~/hooks/useOptimisticLocation";
@@ -71,6 +83,7 @@ import {
   type ModelCatalogItem,
   type ModelComparisonItem,
   type PopularModel,
+  type ProjectModelUsageItem,
   ModelRegistryPresenter,
 } from "~/presenters/v3/ModelRegistryPresenter.server";
 import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server";
@@ -78,7 +91,7 @@ import { requireUserId } from "~/services/session.server";
 import { useEnvironment } from "~/hooks/useEnvironment";
 import { useOrganization } from "~/hooks/useOrganizations";
 import { useProject } from "~/hooks/useProject";
-import { EnvironmentParamSchema, v3ModelComparePath } from "~/utils/pathBuilder";
+import { EnvironmentParamSchema, v3BuiltInDashboardPath, v3ModelComparePath } from "~/utils/pathBuilder";
 import {
   formatModelPrice,
   formatTokenCount,
@@ -88,6 +101,7 @@ import {
 } from "~/utils/modelFormatters";
 import { formatNumberCompact } from "~/utils/numberFormatter";
 import { Spinner } from "~/components/primitives/Spinner";
+import { UsageSparkline } from "~/components/primitives/UsageSparkline";
 import { MetricWidget } from "~/routes/resources.metric";
 import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget";
 
@@ -116,9 +130,27 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   const presenter = new ModelRegistryPresenter(clickhouse);
   const catalog = await presenter.getModelCatalog();
 
-  const now = new Date();
-  const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
-  const popularModels = await presenter.getPopularModels(sevenDaysAgo, now, 50);
+  // Shared time range for the "Your models" tab (charts, usage table, sparklines).
+  // Mirrors the agent detail page: URL-driven period / from / to via TimeFilter.
+  const url = new URL(request.url);
+  const period = url.searchParams.get("period") ?? undefined;
+  const from = parseFiniteInt(url.searchParams.get("from"));
+  const to = parseFiniteInt(url.searchParams.get("to"));
+  const time = timeFilterFromTo({ period, from, to, defaultPeriod: "7d" });
+
+  // popularModels = cross-tenant aggregate (powers the library's p50 TTFC column).
+  // projectUsage = tenant-scoped models with usage in this env (the "Your models" tab).
+  const [popularModels, projectUsage] = await Promise.all([
+    presenter.getPopularModels(time.from, time.to, 50),
+    presenter.getProjectModelUsage(project.id, environment.id, time.from, time.to),
+  ]);
+
+  const usageSparklines = await presenter.getModelUsageSparklines(
+    environment.id,
+    projectUsage.map((u) => u.responseModel),
+    time.from,
+    time.to
+  );
 
   const allProviders = catalog.map((g) => g.provider);
   const allFeatures = Array.from(
@@ -128,6 +160,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   return typedjson({
     catalog,
     popularModels,
+    projectUsage,
+    usageSparklines,
     allProviders,
     allFeatures,
     organizationId: project.organizationId,
@@ -136,6 +170,26 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   });
 };
 
+export function shouldRevalidate({
+  currentUrl,
+  nextUrl,
+  defaultShouldRevalidate,
+}: ShouldRevalidateFunctionArgs) {
+  // The active tab is persisted in the URL (?tab=), but no loader data depends
+  // on it — so switching tabs must not refetch. Any other param change (period,
+  // from/to, …) revalidates as normal.
+  const normalize = (url: URL) => {
+    const params = new URLSearchParams(url.search);
+    params.delete("tab");
+    params.sort();
+    return params.toString();
+  };
+  if (normalize(currentUrl) === normalize(nextUrl)) {
+    return false;
+  }
+  return defaultShouldRevalidate;
+}
+
 const providerIcons: Record<string, (props: { className?: string }) => JSX.Element> = {
   openai: OpenAIIcon,
   anthropic: AnthropicIcon,
@@ -154,6 +208,16 @@ function providerIcon(slug: string) {
   return <Icon className="size-4 text-text-dimmed" />;
 }
 
+const NEW_MODEL_WINDOW_DAYS = 7;
+
+/** True if the model was released within the last NEW_MODEL_WINDOW_DAYS. */
+function isNewModel(releaseDate: string | null): boolean {
+  if (!releaseDate) return false;
+  const released = new Date(releaseDate).getTime();
+  if (Number.isNaN(released)) return false;
+  return Date.now() - released <= NEW_MODEL_WINDOW_DAYS * 24 * 60 * 60 * 1000;
+}
+
 // --- Filter Components ---
 
 const providerShortcut = { key: "p" };
@@ -468,7 +532,10 @@ function ModelsList({
                 />
               </TableCell>
               <TableCell onClick={select} isTabbableCell>
-                {model.displayId}
+                <span className="flex items-center gap-2">
+                  {model.displayId}
+                  {isNewModel(model.releaseDate) && <Badge variant="outline-rounded">New</Badge>}
+                </span>
               </TableCell>
               <TableCell onClick={select}>
                 <span className="flex items-center gap-1.5">
@@ -768,14 +835,16 @@ function chartConfig(opts: {
   xAxisColumn: string;
   yAxisColumns: string[];
   aggregation?: "sum" | "avg";
+  stacked?: boolean;
+  groupByColumn?: string | null;
 }): QueryWidgetConfig {
   return {
     type: "chart",
     chartType: opts.chartType,
     xAxisColumn: opts.xAxisColumn,
     yAxisColumns: opts.yAxisColumns,
-    groupByColumn: null,
-    stacked: false,
+    groupByColumn: opts.groupByColumn ?? null,
+    stacked: opts.stacked ?? false,
     sortByColumn: null,
     sortDirection: "asc",
     aggregation: opts.aggregation ?? "sum",
@@ -784,17 +853,21 @@ function chartConfig(opts: {
 
 type DetailTab = "overview" | "usage";
 
+type ModelsTab = "yours" | "library";
+
 function ModelDetailPanel({
   model,
   organizationId,
   projectId,
   environmentId,
+  aiMetricsBasePath,
   onClose,
 }: {
   model: ModelCatalogItem;
   organizationId: string;
   projectId: string;
   environmentId: string;
+  aiMetricsBasePath: string;
   onClose: () => void;
 }) {
   const [tab, setTab] = useState<DetailTab>("overview");
@@ -840,6 +913,7 @@ function ModelDetailPanel({
             organizationId={organizationId}
             projectId={projectId}
             environmentId={environmentId}
+            aiMetricsBasePath={aiMetricsBasePath}
           />
         )}
       </div>
@@ -947,28 +1021,61 @@ function DetailYourUsageTab({
   organizationId,
   projectId,
   environmentId,
+  aiMetricsBasePath,
 }: {
   modelName: string;
   organizationId: string;
   projectId: string;
   environmentId: string;
+  aiMetricsBasePath: string;
 }) {
+  // Inspector-local range, independent of the page-level "Your models" range.
+  const [range, setRange] = useState<TimeFilterApplyValues>({ period: "7d" });
+
   const widgetProps = {
     organizationId,
     projectId,
     environmentId,
     scope: "environment" as const,
-    period: "7d",
-    from: null,
-    to: null,
+    period: range.from && range.to ? null : range.period ?? "7d",
+    from: range.from ?? null,
+    to: range.to ?? null,
   };
 
+  // Deep-link to the AI metrics dashboard pre-filtered to this model, carrying
+  // the inspector's current range so the dashboard opens on the same window.
+  const dashboardParams = new URLSearchParams({ models: modelName });
+  if (range.from && range.to) {
+    dashboardParams.set("from", range.from);
+    dashboardParams.set("to", range.to);
+  } else if (range.period) {
+    dashboardParams.set("period", range.period);
+  }
+  const aiMetricsHref = `${aiMetricsBasePath}?${dashboardParams.toString()}`;
+
   return (
     <div className="flex flex-col gap-3 py-3">
+      <div className="flex items-center justify-between gap-2">
+        <TimeFilter
+          defaultPeriod="7d"
+          labelName="Period"
+          period={range.period}
+          from={range.from}
+          to={range.to}
+          onValueChange={setRange}
+        />
+        <LinkButton
+          to={aiMetricsHref}
+          variant="secondary/small"
+          TrailingIcon={ArrowTopRightOnSquareIcon}
+        >
+          View in AI metrics
+        </LinkButton>
+      </div>
       <div className="h-[120px]">
         <MetricWidget
           widgetKey={`${modelName}-user-calls`}
-          title="Total calls (7d)"
+          title="Total calls"
           query={`SELECT count() AS total_calls FROM llm_metrics WHERE response_model = '${escapeTSQL(
             modelName
           )}'`}
@@ -979,7 +1086,7 @@ function DetailYourUsageTab({
       <div className="h-[120px]">
         <MetricWidget
           widgetKey={`${modelName}-user-cost`}
-          title="Total cost (7d)"
+          title="Total cost"
           query={`SELECT sum(total_cost) AS total_cost FROM llm_metrics WHERE response_model = '${escapeTSQL(
             modelName
           )}'`}
@@ -1055,23 +1162,204 @@ function DetailYourUsageTab({
   );
 }
 
+// --- Your Models Tab ---
+
+function YourModelsTab({
+  usage,
+  callSparklines,
+  tokenSparklines,
+  organizationId,
+  projectId,
+  environmentId,
+  period,
+  from,
+  to,
+  modelLookup,
+  selectedModelId,
+  onSelectModel,
+  onGoToLibrary,
+}: {
+  usage: ProjectModelUsageItem[];
+  callSparklines: Record<string, number[]>;
+  tokenSparklines: Record<string, number[]>;
+  organizationId: string;
+  projectId: string;
+  environmentId: string;
+  period: string | null;
+  from: string | null;
+  to: string | null;
+  modelLookup: Map<string, ModelCatalogItem>;
+  selectedModelId: string | null;
+  onSelectModel: (model: ModelCatalogItem) => void;
+  onGoToLibrary: () => void;
+}) {
+  // Drive the charts off the same URL-selected range as the table + sparklines.
+  // period and from/to are mutually exclusive (TimeFilter enforces this).
+  const widgetProps = {
+    organizationId,
+    projectId,
+    environmentId,
+    scope: "environment" as const,
+    period: from && to ? null : period ?? "7d",
+    from,
+    to,
+  };
+
+  return (
+    <div className="overflow-y-auto p-3 scrollbar-thin scrollbar-track-transparent scrollbar-thumb-charcoal-600">
+      <div className="grid grid-cols-1 gap-3 lg:grid-cols-3">
+        <div className="h-[260px]">
+          <MetricWidget
+            widgetKey="your-models-cost-time"
+            title="Cost over time"
+            query={`SELECT timeBucket(), sum(total_cost) AS cost FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["cost"] })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-[260px]">
+          <MetricWidget
+            widgetKey="your-models-tokens-time"
+            title="Tokens over time"
+            query={`SELECT timeBucket(), sum(input_tokens) AS input_tokens, sum(output_tokens) AS output_tokens FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({
+              chartType: "bar",
+              xAxisColumn: "timebucket",
+              yAxisColumns: ["input_tokens", "output_tokens"],
+              stacked: true,
+            })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-[260px]">
+          <MetricWidget
+            widgetKey="your-models-calls-by-model"
+            title="Calls by model"
+            query={`SELECT response_model, count() AS calls FROM llm_metrics GROUP BY response_model ORDER BY calls DESC LIMIT 10`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "response_model", yAxisColumns: ["calls"] })}
+            {...widgetProps}
+          />
+        </div>
+      </div>
+
+      <div className="mt-4">
+        {usage.length === 0 ? (
+          <div className="flex flex-col items-center justify-center gap-3 py-12">
+            <p className="max-w-md text-center text-sm text-text-dimmed">
+              No model usage in this environment yet. Models you call from your tasks will appear here
+              with usage metrics.
+            </p>
+            <Button variant="secondary/small" onClick={onGoToLibrary}>
+              Browse the model library
+            </Button>
+          </div>
+        ) : (
+          <Table>
+            <TableHeader>
+              <TableRow>
+                <TableHeaderCell>Model</TableHeaderCell>
+                <TableHeaderCell>Provider</TableHeaderCell>
+                <TableHeaderCell alignment="right">Calls</TableHeaderCell>
+                <TableHeaderCell alignment="right">Cost</TableHeaderCell>
+                <TableHeaderCell alignment="right">Avg TTFC</TableHeaderCell>
+                <TableHeaderCell alignment="right">Avg tokens/sec</TableHeaderCell>
+                <TableHeaderCell>Calls trend</TableHeaderCell>
+                <TableHeaderCell>Tokens trend</TableHeaderCell>
+              </TableRow>
+            </TableHeader>
+            <TableBody>
+              {usage.map((u) => {
+                const catalogItem = modelLookup.get(u.responseModel);
+                const provider = catalogItem?.provider ?? u.genAiSystem;
+                const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`;
+                const select = catalogItem ? () => onSelectModel(catalogItem) : undefined;
+                return (
+                  <TableRow
+                    key={u.responseModel}
+                    isSelected={!!catalogItem && selectedModelId === catalogItem.friendlyId}
+                  >
+                    <TableCell onClick={select} isTabbableCell={!!select}>
+                      {displayId}
+                    </TableCell>
+                    <TableCell onClick={select}>
+                      <span className="flex items-center gap-1.5">
+                        {providerIcon(provider)}
+                        {formatProviderName(provider)}
+                      </span>
+                    </TableCell>
+                    <TableCell onClick={select} alignment="right" className="tabular-nums">
+                      {formatNumberCompact(u.calls)}
+                    </TableCell>
+                    <TableCell onClick={select} alignment="right" className="tabular-nums">
+                      {formatModelCost(u.totalCost)}
+                    </TableCell>
+                    <TableCell onClick={select} alignment="right" className="tabular-nums">
+                      {u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"}
+                    </TableCell>
+                    <TableCell onClick={select} alignment="right" className="tabular-nums">
+                      {u.avgTps > 0 ? u.avgTps.toFixed(0) : "—"}
+                    </TableCell>
+                    <TableCell onClick={select}>
+                      <UsageSparkline data={callSparklines[u.responseModel]} />
+                    </TableCell>
+                    <TableCell onClick={select}>
+                      <UsageSparkline
+                        data={tokenSparklines[u.responseModel]}
+                        color="#10B981"
+                        unitLabel={{ singular: "token", plural: "tokens" }}
+                        formatTotal={(t) => formatNumberCompact(t)}
+                        totalClassName="text-emerald-400"
+                      />
+                    </TableCell>
+                  </TableRow>
+                );
+              })}
+            </TableBody>
+          </Table>
+        )}
+      </div>
+    </div>
+  );
+}
+
 // --- Main Page ---
 
 export default function ModelsPage() {
   const {
     catalog,
     popularModels,
+    projectUsage,
+    usageSparklines,
     allProviders,
     allFeatures,
     organizationId,
     projectId,
     environmentId,
   } = useTypedLoaderData<typeof loader>();
-  const { values: searchValues, value: searchValue } = useSearchParams();
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+  const aiMetricsBasePath = v3BuiltInDashboardPath(organization, project, environment, "llm");
+  const { values: searchValues, value: searchValue, replace } = useSearchParams();
 
   const search = searchValue("search") ?? "";
   const selectedProviders = searchValues("providers");
   const selectedFeatures = searchValues("features");
+  const periodParam = searchValue("period") ?? null;
+  const fromParam = searchValue("from") ?? null;
+  const toParam = searchValue("to") ?? null;
+  // Active tab is persisted in the URL (?tab=) so it survives refresh and is
+  // shareable. Defaults to "yours" when there's usage, else "library".
+  const tabParam = searchValue("tab");
+  const view: ModelsTab =
+    tabParam === "library"
+      ? "library"
+      : tabParam === "yours"
+      ? "yours"
+      : projectUsage.length > 0
+      ? "yours"
+      : "library";
+  const setView = (next: ModelsTab) => replace({ tab: next });
   const [compareSet, setCompareSet] = useState<Set<string>>(new Set());
   const [showAllDetails, setShowAllDetails] = useState(false);
   const [compareOpen, setCompareOpen] = useState(false);
@@ -1117,6 +1405,19 @@ export default function ModelsPage() {
   const compareModels = useMemo(() => Array.from(compareSet), [compareSet]);
   const allModels = useMemo(() => catalog.flatMap((g) => g.models), [catalog]);
 
+  // Resolve a used response_model (base or dated variant) to its catalog card,
+  // so a "Your models" row can open the same detail inspector as the library.
+  const modelLookup = useMemo(() => {
+    const map = new Map<string, ModelCatalogItem>();
+    for (const model of allModels) {
+      map.set(model.modelName, model);
+      for (const variant of model.variants) {
+        map.set(variant.modelName, model);
+      }
+    }
+    return map;
+  }, [allModels]);
+
   return (
     <PageContainer>
       <NavBar>
@@ -1126,24 +1427,67 @@ export default function ModelsPage() {
         <ResizablePanelGroup orientation="horizontal" className="max-h-full">
           <ResizablePanel id="models-main" min="100px">
             <div className="grid h-full max-h-full grid-rows-[auto_1fr] overflow-hidden">
-              <FiltersBar
-                allProviders={allProviders}
-                allFeatures={allFeatures}
-                compareSet={compareSet}
-                onCompare={() => setCompareOpen(true)}
-                showAllDetails={showAllDetails}
-                onToggleAllDetails={(checked) => setShowAllDetails(checked)}
-              />
-              <ModelsList
-                models={filteredModels}
-                popularMap={popularMap}
-                compareSet={compareSet}
-                onToggleCompare={toggleCompare}
-                showAllDetails={showAllDetails}
-                allFeatures={allFeatures}
-                selectedModelId={selectedModel?.friendlyId ?? null}
-                onSelectModel={setSelectedModel}
-              />
+              <div className="flex h-fit items-center justify-between gap-2 border-b border-grid-bright px-3 pt-1.5">
+                <TabContainer>
+                  <TabButton
+                    isActive={view === "yours"}
+                    layoutId="models-page-tabs"
+                    onClick={() => setView("yours")}
+                  >
+                    Your models
+                  </TabButton>
+                  <TabButton
+                    isActive={view === "library"}
+                    layoutId="models-page-tabs"
+                    onClick={() => setView("library")}
+                  >
+                    Model library
+                  </TabButton>
+                </TabContainer>
+                {view === "yours" && (
+                  <div className="pb-1.5">
+                    <TimeFilter defaultPeriod="7d" labelName="Period" shortcut={{ key: "t" }} />
+                  </div>
+                )}
+              </div>
+              {view === "yours" ? (
+                <YourModelsTab
+                  usage={projectUsage}
+                  callSparklines={usageSparklines.calls}
+                  tokenSparklines={usageSparklines.tokens}
+                  organizationId={organizationId}
+                  projectId={projectId}
+                  environmentId={environmentId}
+                  period={periodParam}
+                  from={fromParam}
+                  to={toParam}
+                  modelLookup={modelLookup}
+                  selectedModelId={selectedModel?.friendlyId ?? null}
+                  onSelectModel={setSelectedModel}
+                  onGoToLibrary={() => setView("library")}
+                />
+              ) : (
+                <div className="grid h-full max-h-full grid-rows-[auto_1fr] overflow-hidden">
+                  <FiltersBar
+                    allProviders={allProviders}
+                    allFeatures={allFeatures}
+                    compareSet={compareSet}
+                    onCompare={() => setCompareOpen(true)}
+                    showAllDetails={showAllDetails}
+                    onToggleAllDetails={(checked) => setShowAllDetails(checked)}
+                  />
+                  <ModelsList
+                    models={filteredModels}
+                    popularMap={popularMap}
+                    compareSet={compareSet}
+                    onToggleCompare={toggleCompare}
+                    showAllDetails={showAllDetails}
+                    allFeatures={allFeatures}
+                    selectedModelId={selectedModel?.friendlyId ?? null}
+                    onSelectModel={setSelectedModel}
+                  />
+                </div>
+              )}
             </div>
           </ResizablePanel>
           <ResizableHandle
@@ -1172,6 +1516,7 @@ export default function ModelsPage() {
                   organizationId={organizationId}
                   projectId={projectId}
                   environmentId={environmentId}
+                  aiMetricsBasePath={aiMetricsBasePath}
                   onClose={() => setSelectedModel(null)}
                 />
               )}

From 57dd835c79e05fadb6e32be8f10138a5e4916d67 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 15 Jun 2026 18:00:54 +0100
Subject: [PATCH 2/7] fix(webapp): revalidate Models loader on project/env path
 change

---
 .../route.tsx                                          | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index 943d9ae221f..ca4144c9e2e 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -176,15 +176,19 @@ export function shouldRevalidate({
   defaultShouldRevalidate,
 }: ShouldRevalidateFunctionArgs) {
   // The active tab is persisted in the URL (?tab=), but no loader data depends
-  // on it — so switching tabs must not refetch. Any other param change (period,
-  // from/to, …) revalidates as normal.
+  // on it — so switching tabs must not refetch. Any other change (a different
+  // project/environment in the path, or a period/from/to param) revalidates as
+  // normal, since the loader data is scoped to the path params + time range.
   const normalize = (url: URL) => {
     const params = new URLSearchParams(url.search);
     params.delete("tab");
     params.sort();
     return params.toString();
   };
-  if (normalize(currentUrl) === normalize(nextUrl)) {
+  if (
+    currentUrl.pathname === nextUrl.pathname &&
+    normalize(currentUrl) === normalize(nextUrl)
+  ) {
     return false;
   }
   return defaultShouldRevalidate;

From 610ea59eceb39259acfcd1edc164f8e153191066 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 15 Jun 2026 18:15:35 +0100
Subject: [PATCH 3/7] fix(webapp): label model sparkline tooltips with their
 real bucket times

The Your models sparklines use dynamic bucket sizes (6h at 7d, etc.), but the
tooltip assumed hourly buckets and showed wrong dates. Thread the bucket
interval and start through so each bar is labelled correctly.

Also pin the library tab cross-tenant p50 TTFC column to a fixed 7-day window
so it no longer follows the Your models time selector.
---
 .../components/primitives/UsageSparkline.tsx  | 18 +++++++++++----
 .../v3/ModelRegistryPresenter.server.ts       | 21 ++++++++++++++---
 .../route.tsx                                 | 23 ++++++++++++++++---
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx
index 553dc4fc641..2ffc1936a1d 100644
--- a/apps/webapp/app/components/primitives/UsageSparkline.tsx
+++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx
@@ -17,8 +17,12 @@ type UsageDatum = { date: Date; count: number };
 type UnitLabel = { singular: string; plural: string };
 
 export type UsageSparklineProps = {
-  /** Trailing 24 hourly buckets; the last entry is the most recent hour. */
+  /** Equal-width time buckets, oldest first. */
   data?: number[];
+  /** Epoch ms of the first bucket's start. When omitted, the last bucket is anchored to now. */
+  bucketStartMs?: number;
+  /** Width of each bucket in ms. Defaults to one hour. */
+  bucketIntervalMs?: number;
   /** Bar colour. Defaults to blue. */
   color?: string;
   /** Unit shown in the tooltip (e.g. calls, tokens). */
@@ -36,6 +40,8 @@ export type UsageSparklineProps = {
  */
 export function UsageSparkline({
   data,
+  bucketStartMs,
+  bucketIntervalMs,
   color = "#3B82F6",
   unitLabel = { singular: "call", plural: "calls" },
   formatTotal,
@@ -48,11 +54,13 @@ export function UsageSparkline({
   const total = data.reduce((a, b) => a + b, 0);
   const max = Math.max(...data);
 
-  // Map the 24-bucket array to dated points so the tooltip can show the
-  // hour each bar represents. Bucket i is `23 - i` hours before now.
-  const now = new Date();
+  // Map each bucket to a dated point so the tooltip can show the window it
+  // represents. Buckets are `intervalMs` wide; if the caller didn't pass the
+  // first bucket's start, anchor the last bucket to now (hourly default).
+  const intervalMs = bucketIntervalMs ?? 3600_000;
+  const startMs = bucketStartMs ?? Date.now() - (data.length - 1) * intervalMs;
   const chartData: UsageDatum[] = data.map((count, i) => ({
-    date: new Date(now.getTime() - (data.length - 1 - i) * 3600_000),
+    date: new Date(startMs + i * intervalMs),
     count,
   }));
 
diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
index fddb92d897d..011a9ff8a12 100644
--- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
@@ -712,10 +712,23 @@ export class ModelRegistryPresenter extends BasePresenter {
     responseModels: string[],
     from: Date,
     to: Date
-  ): Promise<{ calls: Record<string, number[]>; tokens: Record<string, number[]> }> {
-    if (responseModels.length === 0) return { calls: {}, tokens: {} };
-
+  ): Promise<{
+    calls: Record<string, number[]>;
+    tokens: Record<string, number[]>;
+    bucketIntervalMs: number;
+    bucketStartMs: number;
+  }> {
     const intervalSeconds = sparklineBucketSeconds(to.getTime() - from.getTime());
+    const intervalMs = intervalSeconds * 1000;
+    // Epoch-aligned start of the first bucket, matching sparklineBucketKeys and
+    // ClickHouse toStartOfInterval. Returned so the sparkline tooltip can label
+    // each bar with its true time rather than assuming hourly buckets.
+    const bucketStartMs = Math.floor(from.getTime() / intervalMs) * intervalMs;
+
+    if (responseModels.length === 0) {
+      return { calls: {}, tokens: {}, bucketIntervalMs: intervalMs, bucketStartMs };
+    }
+
     const bucketKeys = sparklineBucketKeys(from, to, intervalSeconds);
 
     // intervalSeconds is a server-derived integer from a fixed ladder, so it's
@@ -760,6 +773,8 @@ export class ModelRegistryPresenter extends BasePresenter {
     return {
       calls: this.#buildSparklineMap(callsResult, responseModels, bucketKeys),
       tokens: this.#buildSparklineMap(tokensResult, responseModels, bucketKeys),
+      bucketIntervalMs: intervalMs,
+      bucketStartMs,
     };
   }
 
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index ca4144c9e2e..a3530223a28 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -138,10 +138,15 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   const to = parseFiniteInt(url.searchParams.get("to"));
   const time = timeFilterFromTo({ period, from, to, defaultPeriod: "7d" });
 
-  // popularModels = cross-tenant aggregate (powers the library's p50 TTFC column).
+  // popularModels powers the library tab's cross-tenant p50 TTFC column — a
+  // stable "typical latency" reference, so it always uses a fixed 7-day window
+  // independent of the Your models time selector (the library tab has none).
+  const popularTo = new Date();
+  const popularFrom = new Date(popularTo.getTime() - 7 * 24 * 60 * 60 * 1000);
+
   // projectUsage = tenant-scoped models with usage in this env (the "Your models" tab).
   const [popularModels, projectUsage] = await Promise.all([
-    presenter.getPopularModels(time.from, time.to, 50),
+    presenter.getPopularModels(popularFrom, popularTo, 50),
     presenter.getProjectModelUsage(project.id, environment.id, time.from, time.to),
   ]);
 
@@ -1172,6 +1177,8 @@ function YourModelsTab({
   usage,
   callSparklines,
   tokenSparklines,
+  bucketStartMs,
+  bucketIntervalMs,
   organizationId,
   projectId,
   environmentId,
@@ -1186,6 +1193,8 @@ function YourModelsTab({
   usage: ProjectModelUsageItem[];
   callSparklines: Record<string, number[]>;
   tokenSparklines: Record<string, number[]>;
+  bucketStartMs: number;
+  bucketIntervalMs: number;
   organizationId: string;
   projectId: string;
   environmentId: string;
@@ -1304,11 +1313,17 @@ function YourModelsTab({
                       {u.avgTps > 0 ? u.avgTps.toFixed(0) : "—"}
                     </TableCell>
                     <TableCell onClick={select}>
-                      <UsageSparkline data={callSparklines[u.responseModel]} />
+                      <UsageSparkline
+                        data={callSparklines[u.responseModel]}
+                        bucketStartMs={bucketStartMs}
+                        bucketIntervalMs={bucketIntervalMs}
+                      />
                     </TableCell>
                     <TableCell onClick={select}>
                       <UsageSparkline
                         data={tokenSparklines[u.responseModel]}
+                        bucketStartMs={bucketStartMs}
+                        bucketIntervalMs={bucketIntervalMs}
                         color="#10B981"
                         unitLabel={{ singular: "token", plural: "tokens" }}
                         formatTotal={(t) => formatNumberCompact(t)}
@@ -1459,6 +1474,8 @@ export default function ModelsPage() {
                   usage={projectUsage}
                   callSparklines={usageSparklines.calls}
                   tokenSparklines={usageSparklines.tokens}
+                  bucketStartMs={usageSparklines.bucketStartMs}
+                  bucketIntervalMs={usageSparklines.bucketIntervalMs}
                   organizationId={organizationId}
                   projectId={projectId}
                   environmentId={environmentId}

From b5a7a5696392d654eef84c377978f3413248f126 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 15 Jun 2026 18:47:35 +0100
Subject: [PATCH 4/7] fix(webapp): tidy Your models tab spacing and enlarge the
 charts

---
 .../route.tsx                                 | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index a3530223a28..daac075ad2f 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -1219,9 +1219,9 @@ function YourModelsTab({
   };
 
   return (
-    <div className="overflow-y-auto p-3 scrollbar-thin scrollbar-track-transparent scrollbar-thumb-charcoal-600">
-      <div className="grid grid-cols-1 gap-3 lg:grid-cols-3">
-        <div className="h-[260px]">
+    <div className="overflow-y-auto py-3 scrollbar-thin scrollbar-track-transparent scrollbar-thumb-charcoal-600">
+      <div className="grid grid-cols-1 gap-3 px-3 lg:grid-cols-3">
+        <div className="h-[312px]">
           <MetricWidget
             widgetKey="your-models-cost-time"
             title="Cost over time"
@@ -1230,7 +1230,7 @@ function YourModelsTab({
             {...widgetProps}
           />
         </div>
-        <div className="h-[260px]">
+        <div className="h-[312px]">
           <MetricWidget
             widgetKey="your-models-tokens-time"
             title="Tokens over time"
@@ -1244,7 +1244,7 @@ function YourModelsTab({
             {...widgetProps}
           />
         </div>
-        <div className="h-[260px]">
+        <div className="h-[312px]">
           <MetricWidget
             widgetKey="your-models-calls-by-model"
             title="Calls by model"
@@ -1267,17 +1267,25 @@ function YourModelsTab({
             </Button>
           </div>
         ) : (
-          <Table>
+          <Table className="table-fixed">
             <TableHeader>
               <TableRow>
-                <TableHeaderCell>Model</TableHeaderCell>
-                <TableHeaderCell>Provider</TableHeaderCell>
-                <TableHeaderCell alignment="right">Calls</TableHeaderCell>
-                <TableHeaderCell alignment="right">Cost</TableHeaderCell>
-                <TableHeaderCell alignment="right">Avg TTFC</TableHeaderCell>
-                <TableHeaderCell alignment="right">Avg tokens/sec</TableHeaderCell>
-                <TableHeaderCell>Calls trend</TableHeaderCell>
-                <TableHeaderCell>Tokens trend</TableHeaderCell>
+                <TableHeaderCell className="w-[20%]">Model</TableHeaderCell>
+                <TableHeaderCell className="w-[13%]">Provider</TableHeaderCell>
+                <TableHeaderCell className="w-[9%]" alignment="right">
+                  Calls
+                </TableHeaderCell>
+                <TableHeaderCell className="w-[9%]" alignment="right">
+                  Cost
+                </TableHeaderCell>
+                <TableHeaderCell className="w-[10%]" alignment="right">
+                  Avg TTFC
+                </TableHeaderCell>
+                <TableHeaderCell className="w-[12%]" alignment="right">
+                  Avg tokens/sec
+                </TableHeaderCell>
+                <TableHeaderCell className="w-[13.5%]">Calls trend</TableHeaderCell>
+                <TableHeaderCell className="w-[13.5%]">Tokens trend</TableHeaderCell>
               </TableRow>
             </TableHeader>
             <TableBody>
@@ -1446,8 +1454,8 @@ export default function ModelsPage() {
         <ResizablePanelGroup orientation="horizontal" className="max-h-full">
           <ResizablePanel id="models-main" min="100px">
             <div className="grid h-full max-h-full grid-rows-[auto_1fr] overflow-hidden">
-              <div className="flex h-fit items-center justify-between gap-2 border-b border-grid-bright px-3 pt-1.5">
-                <TabContainer>
+              <div className="flex h-fit items-center justify-between gap-2 border-b border-grid-bright pl-3 pr-1.5 pt-1.5">
+                <TabContainer className="-mb-px">
                   <TabButton
                     isActive={view === "yours"}
                     layoutId="models-page-tabs"

From e372c8bfa0d67f8c0fda96e11a9a153bfbc725ce Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 16 Jun 2026 00:00:28 +0100
Subject: [PATCH 5/7] feat(webapp): add prompt-cache metrics to Models and AI
 metrics

Your models gets a cache-savings column and per-model cached-tokens and
cache-hit-rate views; the AI metrics dashboard gets a caching section
(hit rate, cached tokens, estimated savings, hit rate by model). Also makes
the Your models charts all time-series for consistency.
---
 .server-changes/models-page-usage-tabs.md     |  2 +-
 .../presenters/v3/BuiltInDashboards.server.ts | 54 +++++++++++++--
 .../v3/ModelRegistryPresenter.server.ts       | 17 ++++-
 .../route.tsx                                 | 66 +++++++++++++++----
 4 files changed, 121 insertions(+), 18 deletions(-)

diff --git a/.server-changes/models-page-usage-tabs.md b/.server-changes/models-page-usage-tabs.md
index da2f4f2fda8..6b37b45dd20 100644
--- a/.server-changes/models-page-usage-tabs.md
+++ b/.server-changes/models-page-usage-tabs.md
@@ -3,4 +3,4 @@ area: webapp
 type: feature
 ---
 
-The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, and trend sparklines over a selectable time range) alongside the full model library, which is ordered by provider relevance and release date.
+The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, prompt-cache savings, and trend sparklines over a selectable time range) alongside the full model library, ordered by provider relevance and release date. The AI metrics dashboard also gains a caching section with cache hit rate, cached tokens, and estimated savings.
diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
index 06b5ee2d406..03561ee7e20 100644
--- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
+++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
@@ -252,8 +252,13 @@ const llmDashboard: BuiltInDashboard = {
       { i: "llm-cost-user", x: 6, y: 92, w: 6, h: 13 },
       // Efficiency section
       { i: "llm-title-efficiency", x: 0, y: 105, w: 12, h: 2, minH: 2, maxH: 2 },
-      { i: "llm-cost-operation", x: 0, y: 107, w: 6, h: 13 },
-      { i: "llm-cache-util", x: 6, y: 107, w: 6, h: 13 },
+      { i: "llm-cost-operation", x: 0, y: 107, w: 12, h: 13 },
+      // Caching section
+      { i: "llm-title-caching", x: 0, y: 120, w: 12, h: 2, minH: 2, maxH: 2 },
+      { i: "llm-cache-hit", x: 0, y: 122, w: 6, h: 13 },
+      { i: "llm-cache-tokens", x: 6, y: 122, w: 6, h: 13 },
+      { i: "llm-cache-savings", x: 0, y: 135, w: 6, h: 13 },
+      { i: "llm-cache-by-model", x: 6, y: 135, w: 6, h: 13 },
     ],
     widgets: {
       "llm-cost": {
@@ -487,10 +492,11 @@ const llmDashboard: BuiltInDashboard = {
           aggregation: "sum",
         },
       },
-      "llm-cache-util": {
-        title: "Cache utilization",
+      "llm-title-caching": { title: "Caching", query: "", display: { type: "title" } },
+      "llm-cache-hit": {
+        title: "Cache hit rate over time",
         query:
-          "SELECT\r\n  timeBucket(),\r\n  round(countIf(cached_read_tokens > 0) * 100.0 / count(), 1) AS cache_hit_pct,\r\n  round(avg(cached_read_tokens), 0) AS avg_cached_tokens\r\nFROM\r\n  llm_metrics\r\nGROUP BY\r\n  timeBucket\r\nORDER BY\r\n  timeBucket",
+          "SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
         display: {
           type: "chart",
           chartType: "line",
@@ -503,6 +509,44 @@ const llmDashboard: BuiltInDashboard = {
           aggregation: "avg",
         },
       },
+      "llm-cache-tokens": {
+        title: "Cached tokens over time",
+        query:
+          "SELECT timeBucket(), sum(cached_read_tokens) AS cache_reads, sum(cache_creation_tokens) AS cache_writes FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
+        display: {
+          type: "chart",
+          chartType: "bar",
+          xAxisColumn: "timebucket",
+          yAxisColumns: ["cache_reads", "cache_writes"],
+          groupByColumn: null,
+          stacked: true,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "sum",
+        },
+      },
+      "llm-cache-savings": {
+        title: "Cache savings over time",
+        query:
+          "SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
+        display: {
+          type: "chart",
+          chartType: "bar",
+          xAxisColumn: "timebucket",
+          yAxisColumns: ["cache_savings"],
+          groupByColumn: null,
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "sum",
+        },
+      },
+      "llm-cache-by-model": {
+        title: "Cache hit rate by model",
+        query:
+          "SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
+        display: { type: "table", prettyFormatting: true, sorting: [] },
+      },
     },
   },
 };
diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
index 011a9ff8a12..b8565b87011 100644
--- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
@@ -229,6 +229,12 @@ export type ProjectModelUsageItem = {
   totalTokens: number;
   avgTtfc: number;
   avgTps: number;
+  /** Input tokens (used as the denominator for the cache read rate). */
+  inputTokens: number;
+  /** Input tokens served from the provider's prompt cache. */
+  cachedReadTokens: number;
+  /** Actual (discounted) cost of those cached read tokens. */
+  cachedReadCost: number;
 };
 
 // --- ClickHouse schemas for user metrics ---
@@ -256,6 +262,9 @@ const ProjectModelUsageRow = z.object({
   total_tokens: z.coerce.number(),
   avg_ttfc: z.coerce.number(),
   avg_tps: z.coerce.number(),
+  input_tokens: z.coerce.number(),
+  cached_read_tokens: z.coerce.number(),
+  cached_read_cost: z.coerce.number(),
 });
 
 const ModelSparklineRow = z.object({
@@ -661,7 +670,10 @@ export class ModelRegistryPresenter extends BasePresenter {
           sum(total_cost) AS total_cost,
           sum(total_tokens) AS total_tokens,
           round(avg(ms_to_first_chunk), 1) AS avg_ttfc,
-          round(avg(tokens_per_second), 1) AS avg_tps
+          round(avg(tokens_per_second), 1) AS avg_tps,
+          sum(input_tokens) AS input_tokens,
+          sum(usage_details['input_cached_tokens']) AS cached_read_tokens,
+          sum(cost_details['input_cached_tokens']) AS cached_read_cost
         FROM trigger_dev.llm_metrics_v1
         WHERE project_id = {projectId: String}
           AND environment_id = {environmentId: String}
@@ -698,6 +710,9 @@ export class ModelRegistryPresenter extends BasePresenter {
       totalTokens: r.total_tokens,
       avgTtfc: r.avg_ttfc,
       avgTps: r.avg_tps,
+      inputTokens: r.input_tokens,
+      cachedReadTokens: r.cached_read_tokens,
+      cachedReadCost: r.cached_read_cost,
     }));
   }
 
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index daac075ad2f..a3a60b88e37 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -1125,6 +1125,17 @@ function DetailYourUsageTab({
           {...widgetProps}
         />
       </div>
+      <div className="h-[120px]">
+        <MetricWidget
+          widgetKey={`${modelName}-user-cached-tokens`}
+          title="Cached tokens"
+          query={`SELECT sum(cached_read_tokens) AS cached_tokens FROM llm_metrics WHERE response_model = '${escapeTSQL(
+            modelName
+          )}'`}
+          config={bignumberConfig("cached_tokens", { aggregation: "sum", abbreviate: true })}
+          {...widgetProps}
+        />
+      </div>
 
       <div className="h-[400px]">
         <MetricWidget
@@ -1156,6 +1167,22 @@ function DetailYourUsageTab({
           {...widgetProps}
         />
       </div>
+      <div className="h-[400px]">
+        <MetricWidget
+          widgetKey={`${modelName}-user-cache-hit`}
+          title="Cache hit rate over time"
+          query={`SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
+            modelName
+          )}' GROUP BY timeBucket ORDER BY timeBucket`}
+          config={chartConfig({
+            chartType: "line",
+            xAxisColumn: "timebucket",
+            yAxisColumns: ["cache_hit_pct"],
+            aggregation: "avg",
+          })}
+          {...widgetProps}
+        />
+      </div>
       <div className="h-[400px]">
         <MetricWidget
           widgetKey={`${modelName}-user-tasks`}
@@ -1246,10 +1273,10 @@ function YourModelsTab({
         </div>
         <div className="h-[312px]">
           <MetricWidget
-            widgetKey="your-models-calls-by-model"
-            title="Calls by model"
-            query={`SELECT response_model, count() AS calls FROM llm_metrics GROUP BY response_model ORDER BY calls DESC LIMIT 10`}
-            config={chartConfig({ chartType: "bar", xAxisColumn: "response_model", yAxisColumns: ["calls"] })}
+            widgetKey="your-models-calls-over-time"
+            title="Calls over time"
+            query={`SELECT timeBucket(), count() AS calls FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["calls"] })}
             {...widgetProps}
           />
         </div>
@@ -1270,22 +1297,25 @@ function YourModelsTab({
           <Table className="table-fixed">
             <TableHeader>
               <TableRow>
-                <TableHeaderCell className="w-[20%]">Model</TableHeaderCell>
-                <TableHeaderCell className="w-[13%]">Provider</TableHeaderCell>
-                <TableHeaderCell className="w-[9%]" alignment="right">
+                <TableHeaderCell className="w-[18%]">Model</TableHeaderCell>
+                <TableHeaderCell className="w-[12%]">Provider</TableHeaderCell>
+                <TableHeaderCell className="w-[8%]" alignment="right">
                   Calls
                 </TableHeaderCell>
-                <TableHeaderCell className="w-[9%]" alignment="right">
+                <TableHeaderCell className="w-[8%]" alignment="right">
                   Cost
                 </TableHeaderCell>
                 <TableHeaderCell className="w-[10%]" alignment="right">
+                  Cache savings
+                </TableHeaderCell>
+                <TableHeaderCell className="w-[9%]" alignment="right">
                   Avg TTFC
                 </TableHeaderCell>
-                <TableHeaderCell className="w-[12%]" alignment="right">
+                <TableHeaderCell className="w-[11%]" alignment="right">
                   Avg tokens/sec
                 </TableHeaderCell>
-                <TableHeaderCell className="w-[13.5%]">Calls trend</TableHeaderCell>
-                <TableHeaderCell className="w-[13.5%]">Tokens trend</TableHeaderCell>
+                <TableHeaderCell className="w-[12%]">Calls trend</TableHeaderCell>
+                <TableHeaderCell className="w-[12%]">Tokens trend</TableHeaderCell>
               </TableRow>
             </TableHeader>
             <TableBody>
@@ -1294,6 +1324,13 @@ function YourModelsTab({
                 const provider = catalogItem?.provider ?? u.genAiSystem;
                 const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`;
                 const select = catalogItem ? () => onSelectModel(catalogItem) : undefined;
+                // Savings = cached reads valued at the normal input rate minus what
+                // they actually cost. Needs the model's input price from the catalog.
+                const inputPrice = catalogItem?.inputPrice ?? null;
+                const cacheSavings =
+                  inputPrice != null && u.cachedReadTokens > 0
+                    ? Math.max(0, u.cachedReadTokens * inputPrice - u.cachedReadCost)
+                    : null;
                 return (
                   <TableRow
                     key={u.responseModel}
@@ -1314,6 +1351,13 @@ function YourModelsTab({
                     <TableCell onClick={select} alignment="right" className="tabular-nums">
                       {formatModelCost(u.totalCost)}
                     </TableCell>
+                    <TableCell
+                      onClick={select}
+                      alignment="right"
+                      className="tabular-nums text-emerald-400/80"
+                    >
+                      {cacheSavings != null ? formatModelCost(cacheSavings) : "—"}
+                    </TableCell>
                     <TableCell onClick={select} alignment="right" className="tabular-nums">
                       {u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"}
                     </TableCell>

From adf068416bacb10923a88a25fce2b568c186ed9e Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 16 Jun 2026 10:02:30 +0100
Subject: [PATCH 6/7] fix(webapp): harden cache metric and sparkline queries

The cache hit-rate and savings queries divided by zero for models with
no cached tokens, surfacing NaN or empty widgets; they now return 0 via
ifNull/nullIf. Model usage sparklines bucketed on a timezone-dependent
DateTime string, which could misalign bars with the charts above them;
they now key on toUnixTimestamp so buckets line up regardless of the
ClickHouse server timezone.
---
 .../presenters/v3/BuiltInDashboards.server.ts |  6 +++---
 .../v3/ModelRegistryPresenter.server.ts       | 19 ++++++++++---------
 .../route.tsx                                 |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
index 03561ee7e20..c8e74e30f6e 100644
--- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
+++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
@@ -496,7 +496,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-hit": {
         title: "Cache hit rate over time",
         query:
-          "SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
+          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
         display: {
           type: "chart",
           chartType: "line",
@@ -528,7 +528,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-savings": {
         title: "Cache savings over time",
         query:
-          "SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
+          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
         display: {
           type: "chart",
           chartType: "bar",
@@ -544,7 +544,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-by-model": {
         title: "Cache hit rate by model",
         query:
-          "SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
+          "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
         display: { type: "table", prettyFormatting: true, sorting: [] },
       },
     },
diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
index b8565b87011..364a5a58c00 100644
--- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
@@ -96,16 +96,17 @@ function sparklineBucketSeconds(rangeMs: number): number {
 
 /**
  * Generate the ordered bucket-start keys for [from, to] at the given interval,
- * epoch-aligned in UTC to exactly match ClickHouse's
- * `toStartOfInterval(col, INTERVAL n SECOND)` output strings ("YYYY-MM-DD HH:MM:SS").
+ * as epoch seconds to match ClickHouse's
+ * `toUnixTimestamp(toStartOfInterval(col, INTERVAL n SECOND))` — timezone-independent
+ * (a raw DateTime string would depend on the ClickHouse server timezone).
  */
-function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): string[] {
+function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): number[] {
   const intervalMs = intervalSeconds * 1000;
   const start = Math.floor(from.getTime() / intervalMs) * intervalMs;
   const end = Math.floor(to.getTime() / intervalMs) * intervalMs;
-  const keys: string[] = [];
+  const keys: number[] = [];
   for (let t = start; t <= end; t += intervalMs) {
-    keys.push(new Date(t).toISOString().slice(0, 19).replace("T", " "));
+    keys.push(t / 1000);
   }
   return keys;
 }
@@ -269,7 +270,7 @@ const ProjectModelUsageRow = z.object({
 
 const ModelSparklineRow = z.object({
   response_model: z.string(),
-  bucket: z.string(),
+  bucket: z.coerce.number(),
   val: z.coerce.number(),
 });
 
@@ -754,7 +755,7 @@ export class ModelRegistryPresenter extends BasePresenter {
         query: `
           SELECT
             response_model,
-            toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND) AS bucket,
+            toUnixTimestamp(toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND)) AS bucket,
             ${valueExpr} AS val
           FROM trigger_dev.llm_metrics_v1
           WHERE environment_id = {environmentId: String}
@@ -797,9 +798,9 @@ export class ModelRegistryPresenter extends BasePresenter {
   #buildSparklineMap(
     queryResult:
       | [Error, null]
-      | [null, { response_model: string; bucket: string; val: number }[]],
+      | [null, { response_model: string; bucket: number; val: number }[]],
     keys: string[],
-    bucketKeys: string[]
+    bucketKeys: number[]
   ): Record<string, number[]> {
     const [error, rows] = queryResult;
     if (error || !rows) return {};
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index a3a60b88e37..aa6155b1a65 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -1171,7 +1171,7 @@ function DetailYourUsageTab({
         <MetricWidget
           widgetKey={`${modelName}-user-cache-hit`}
           title="Cache hit rate over time"
-          query={`SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
+          query={`SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
             modelName
           )}' GROUP BY timeBucket ORDER BY timeBucket`}
           config={chartConfig({

From e7a728ffd9e38ff1b6a065203771937cfd8b1766 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 16 Jun 2026 10:45:49 +0100
Subject: [PATCH 7/7] fix(webapp,llm-model-catalog): stop double-counting
 cached input tokens

input_tokens is the total prompt count, inclusive of cache-read and
cache-creation tokens. The cost pipeline charged the full input count at
the input price and then added a separate cache line, so cached tokens
were billed twice (e.g. ~2.4x on OpenAI), and the cache hit-rate metric
divided cached reads by input + cached, understating the rate. Charge
the input price only on the fresh (non-cached) remainder, resolve cache
prices across provider alias keys (falling back to input price so cache
tokens are never free), and compute the hit rate as cached / input.
---
 .../llm-cost-cached-token-double-charge.md    |   6 +
 .../presenters/v3/BuiltInDashboards.server.ts |   6 +-
 .../route.tsx                                 |   2 +-
 .../llm-model-catalog/src/registry.test.ts    | 105 +++++++++++++++++-
 .../llm-model-catalog/src/registry.ts         |  62 +++++++++++
 5 files changed, 173 insertions(+), 8 deletions(-)
 create mode 100644 .server-changes/llm-cost-cached-token-double-charge.md

diff --git a/.server-changes/llm-cost-cached-token-double-charge.md b/.server-changes/llm-cost-cached-token-double-charge.md
new file mode 100644
index 00000000000..c34b52de7a4
--- /dev/null
+++ b/.server-changes/llm-cost-cached-token-double-charge.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: fix
+---
+
+LLM cost no longer double-counts cached input tokens. Prompt-cache reads and writes are now billed once at their cache rate instead of also being charged at the full input price, so cost and cache hit-rate figures on the AI metrics dashboard and Models page are accurate.
diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
index c8e74e30f6e..4f62fc00b86 100644
--- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
+++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
@@ -496,7 +496,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-hit": {
         title: "Cache hit rate over time",
         query:
-          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
+          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
         display: {
           type: "chart",
           chartType: "line",
@@ -528,7 +528,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-savings": {
         title: "Cache savings over time",
         query:
-          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
+          "SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * (sum(input_cost) / nullIf(sum(input_tokens) - sum(cached_read_tokens) - sum(cache_creation_tokens), 0)) - sum(cached_read_cost), 0), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
         display: {
           type: "chart",
           chartType: "bar",
@@ -544,7 +544,7 @@ const llmDashboard: BuiltInDashboard = {
       "llm-cache-by-model": {
         title: "Cache hit rate by model",
         query:
-          "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
+          "SELECT response_model, round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens), 0), 0), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
         display: { type: "table", prettyFormatting: true, sorting: [] },
       },
     },
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
index aa6155b1a65..a412311badc 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -1171,7 +1171,7 @@ function DetailYourUsageTab({
         <MetricWidget
           widgetKey={`${modelName}-user-cache-hit`}
           title="Cache hit rate over time"
-          query={`SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens) + sum(cached_read_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
+          query={`SELECT timeBucket(), round(ifNull(sum(cached_read_tokens) * 100.0 / nullIf(sum(input_tokens), 0), 0), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
             modelName
           )}' GROUP BY timeBucket ORDER BY timeBucket`}
           config={chartConfig({
diff --git a/internal-packages/llm-model-catalog/src/registry.test.ts b/internal-packages/llm-model-catalog/src/registry.test.ts
index 679c8c4cfcf..349ba2622e6 100644
--- a/internal-packages/llm-model-catalog/src/registry.test.ts
+++ b/internal-packages/llm-model-catalog/src/registry.test.ts
@@ -69,12 +69,59 @@ const claudeSonnet: LlmModelWithPricing = {
   ],
 };
 
+// Prices cache reads under the Anthropic-style alias `cache_read_input_tokens` (not
+// `input_cached_tokens`) plus a cache-creation price, to exercise alias resolution.
+const claudeWithCache: LlmModelWithPricing = {
+  id: "model-claude-with-cache",
+  friendlyId: "llm_model_claude_with_cache",
+  modelName: "claude-with-cache",
+  matchPattern: "^claude-with-cache$",
+  startDate: null,
+  pricingTiers: [
+    {
+      id: "tier-claude-with-cache",
+      name: "Standard",
+      isDefault: true,
+      priority: 0,
+      conditions: [],
+      prices: [
+        { usageType: "input", price: 0.000003 },
+        { usageType: "output", price: 0.000015 },
+        { usageType: "cache_read_input_tokens", price: 0.0000003 },
+        { usageType: "cache_creation_input_tokens", price: 0.00000375 },
+      ],
+    },
+  ],
+};
+
+// No cache prices at all — cached tokens should fall back to the input price.
+const noCachePrice: LlmModelWithPricing = {
+  id: "model-no-cache-price",
+  friendlyId: "llm_model_no_cache_price",
+  modelName: "no-cache-price",
+  matchPattern: "^no-cache-price$",
+  startDate: null,
+  pricingTiers: [
+    {
+      id: "tier-no-cache-price",
+      name: "Standard",
+      isDefault: true,
+      priority: 0,
+      conditions: [],
+      prices: [
+        { usageType: "input", price: 0.000003 },
+        { usageType: "output", price: 0.000015 },
+      ],
+    },
+  ],
+};
+
 describe("ModelPricingRegistry", () => {
   let registry: TestableRegistry;
 
   beforeEach(() => {
     registry = new TestableRegistry(null as any);
-    registry.loadPatterns([gpt4o, claudeSonnet]);
+    registry.loadPatterns([gpt4o, claudeSonnet, claudeWithCache, noCachePrice]);
   });
 
   describe("match", () => {
@@ -129,7 +176,10 @@ describe("ModelPricingRegistry", () => {
       expect(result!.totalCost).toBeCloseTo(0.0035);
     });
 
-    it("should include cached token costs", () => {
+    it("should include cached token costs and charge input only on the fresh portion", () => {
+      // input_tokens (500) is inclusive of the 200 cached read tokens, so the input price
+      // applies to the 300 fresh tokens and the cache price to the 200 cached tokens — the
+      // cached tokens must not be billed twice.
       const result = registry.calculateCost("gpt-4o", {
         input: 500,
         output: 50,
@@ -137,10 +187,57 @@ describe("ModelPricingRegistry", () => {
       });
 
       expect(result).not.toBeNull();
-      expect(result!.costDetails["input"]).toBeCloseTo(0.00125); // 500 * 0.0000025
+      expect(result!.costDetails["input"]).toBeCloseTo(0.00075); // (500 - 200) * 0.0000025
       expect(result!.costDetails["output"]).toBeCloseTo(0.0005); // 50 * 0.00001
       expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00025); // 200 * 0.00000125
-      expect(result!.totalCost).toBeCloseTo(0.002);
+      expect(result!.totalCost).toBeCloseTo(0.0015);
+    });
+
+    it("should not double-charge cache creation tokens (subset of input)", () => {
+      // input (1000) is inclusive of both the 400 cache-read and 300 cache-creation tokens.
+      const result = registry.calculateCost("claude-with-cache", {
+        input: 1000,
+        output: 100,
+        input_cached_tokens: 400,
+        cache_creation_input_tokens: 300,
+      });
+
+      expect(result).not.toBeNull();
+      // fresh input = 1000 - 400 - 300 = 300
+      expect(result!.costDetails["input"]).toBeCloseTo(0.0009); // 300 * 0.000003
+      expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00012); // 400 * 0.0000003
+      expect(result!.costDetails["cache_creation_input_tokens"]).toBeCloseTo(0.001125); // 300 * 0.00000375
+      expect(result!.costDetails["output"]).toBeCloseTo(0.0015); // 100 * 0.000015
+      // 0.0009 + 0.00012 + 0.001125 + 0.0015
+      expect(result!.totalCost).toBeCloseTo(0.003645);
+    });
+
+    it("should apply the cache-read discount when priced under a provider alias key", () => {
+      // The usage is normalized to `input_cached_tokens` but this model prices cache reads
+      // under `cache_read_input_tokens` — the discount must still apply.
+      const result = registry.calculateCost("claude-with-cache", {
+        input: 1000,
+        input_cached_tokens: 400,
+      });
+
+      expect(result).not.toBeNull();
+      expect(result!.costDetails["input"]).toBeCloseTo(0.0018); // (1000 - 400) * 0.000003
+      expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.00012); // 400 * 0.0000003
+      expect(result!.totalCost).toBeCloseTo(0.00192);
+    });
+
+    it("should fall back to the input price for cache tokens when no cache price exists", () => {
+      // no-cache-price model has only input/output prices; cached tokens must still be billed
+      // (at the input price) — never free, never double-charged. Total equals input * price.
+      const result = registry.calculateCost("no-cache-price", {
+        input: 1000,
+        input_cached_tokens: 400,
+      });
+
+      expect(result).not.toBeNull();
+      expect(result!.costDetails["input"]).toBeCloseTo(0.0018); // (1000 - 400) * 0.000003
+      expect(result!.costDetails["input_cached_tokens"]).toBeCloseTo(0.0012); // 400 * 0.000003
+      expect(result!.totalCost).toBeCloseTo(0.003); // 1000 * 0.000003 — unchanged from no-cache behavior
     });
 
     it("should return null for unknown model", () => {
diff --git a/internal-packages/llm-model-catalog/src/registry.ts b/internal-packages/llm-model-catalog/src/registry.ts
index 80da40ba980..841234561cf 100644
--- a/internal-packages/llm-model-catalog/src/registry.ts
+++ b/internal-packages/llm-model-catalog/src/registry.ts
@@ -147,7 +147,69 @@ export class ModelPricingRegistry {
     const costDetails: Record<string, number> = {};
     let totalCost = 0;
 
+    // `input_tokens` (the "input" usage value) is the TOTAL prompt token count and is
+    // inclusive of cache-read and cache-creation tokens — providers report it that way and
+    // the AI SDK passes it through (verified: total_tokens == input + output, never the
+    // sum of the decomposed parts). Cache reads/writes are therefore a SUBSET of input, not
+    // additional to it. Charging the full input count at the input price AND charging a
+    // separate cache line double-counts those tokens, so the input price must apply only to
+    // the fresh (non-cached) remainder.
+    const priceByType = new Map(tier.prices.map((p) => [p.usageType, p.price]));
+    const resolvePrice = (aliases: string[]): number | undefined => {
+      for (const alias of aliases) {
+        const price = priceByType.get(alias);
+        if (price !== undefined) return price;
+      }
+      return undefined;
+    };
+
+    const inputPrice = resolvePrice(["input", "input_tokens"]) ?? 0;
+    const cacheReadTokens = usageDetails["input_cached_tokens"] ?? 0;
+    const cacheCreationTokens = usageDetails["cache_creation_input_tokens"] ?? 0;
+
+    // Providers price cache reads/writes under provider-specific keys, but our usage details
+    // normalize them to `input_cached_tokens` / `cache_creation_input_tokens`. Resolve the
+    // matching price across the known aliases, falling back to the input price so cache tokens
+    // are never billed for free and never dropped when a model lacks a dedicated cache price.
+    const cacheReadPrice =
+      resolvePrice(["input_cached_tokens", "input_cache_read", "cache_read_input_tokens"]) ??
+      inputPrice;
+    const cacheCreationPrice =
+      resolvePrice([
+        "cache_creation_input_tokens",
+        "input_cache_creation",
+        "input_cache_creation_5m",
+      ]) ?? inputPrice;
+
+    const totalInputTokens = usageDetails["input"] ?? usageDetails["input_tokens"] ?? 0;
+    const freshInputTokens = Math.max(0, totalInputTokens - cacheReadTokens - cacheCreationTokens);
+
+    const addCost = (usageType: string, tokenCount: number, price: number) => {
+      if (tokenCount <= 0 || price <= 0) return;
+      const cost = tokenCount * price;
+      costDetails[usageType] = (costDetails[usageType] ?? 0) + cost;
+      totalCost += cost;
+    };
+
+    addCost("input", freshInputTokens, inputPrice);
+    addCost("input_cached_tokens", cacheReadTokens, cacheReadPrice);
+    addCost("cache_creation_input_tokens", cacheCreationTokens, cacheCreationPrice);
+
+    // Charge every remaining usage type generically. The input + cache types are handled
+    // above (and their alias keys skipped here) so they are never charged twice.
+    const handledUsageTypes = new Set([
+      "input",
+      "input_tokens",
+      "input_cached_tokens",
+      "input_cache_read",
+      "cache_read_input_tokens",
+      "cache_creation_input_tokens",
+      "input_cache_creation",
+      "input_cache_creation_5m",
+      "input_cache_creation_1h",
+    ]);
     for (const priceEntry of tier.prices) {
+      if (handledUsageTypes.has(priceEntry.usageType)) continue;
       const tokenCount = usageDetails[priceEntry.usageType] ?? 0;
       if (tokenCount === 0) continue;
       const cost = tokenCount * priceEntry.price;