Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
35f980a
feat(coderd): rank chat workspace templates
jaaydenh May 7, 2026
f8882fc
test(coderd/database): cover workspace usage authz
jaaydenh May 7, 2026
34c7fac
fix(coderd): address template ranking feedback
jaaydenh May 15, 2026
b8ae639
fix(coderd): follow up on template ranking feedback
jaaydenh May 15, 2026
ee89c16
Merge branch 'main' into workspace-bdje
jaaydenh May 18, 2026
15acaf5
Merge remote-tracking branch 'origin/main' into merge-workspace-bdje-…
jaaydenh May 27, 2026
0fcc777
Merge branch 'main' into workspace-bdje
jaaydenh May 27, 2026
4d357de
fix(coderd): address template ranking feedback
jaaydenh Jun 1, 2026
f8e7819
Merge branch 'main' into workspace-bdje
jaaydenh Jun 1, 2026
9d1ee50
fix(coderd/x/chatd): require template selection on weak matches
jaaydenh Jun 1, 2026
4c20d3f
fix(coderd): rank chat templates by frecency score
jaaydenh Jun 4, 2026
dfc293d
fix(coderd): address template ranking review feedback
jaaydenh Jun 5, 2026
5a7641f
fix(coderd/x/chatd/chattool): add list_templates auth error context
jaaydenh Jun 5, 2026
42fed81
test(coderd): update chat system prompt expectation
jaaydenh Jun 5, 2026
58f73db
Merge branch 'main' into workspace-bdje
jaaydenh Jun 9, 2026
d7fe770
chore: update comments
jaaydenh Jun 9, 2026
95f7beb
test(coderd/database/dbauthz): cover template ranking signals deny path
jaaydenh Jun 10, 2026
71eca18
refactor(coderd/x/chatd/chattool): simplify templateQueryScore tier m…
jaaydenh Jun 10, 2026
36a6be6
refactor: replace list_templates selection hints with a next_step ins…
jaaydenh Jun 11, 2026
19ce6aa
test(coderd): cover template recommendation edge cases from review
jaaydenh Jun 11, 2026
fcec96b
docs: add list_templates request and response example
jaaydenh Jun 12, 2026
9e38592
docs: state the exact deleted-workspace weight in template ranking
jaaydenh Jun 12, 2026
4f6ac74
docs: detail query relevance tiers and the affinity formula
jaaydenh Jun 12, 2026
9a91632
Merge remote-tracking branch 'origin/main' into merge-conflicts-annk
jaaydenh Jun 17, 2026
3017d21
feat(coderd/x/chatd): add list_templates ranking telemetry
jaaydenh Jun 17, 2026
137ccf9
fix: address list_templates ranking telemetry review feedback
jaaydenh Jun 18, 2026
b7d60a0
Merge branch 'main' into chatd-list-templates-telemetry
jaaydenh Jun 18, 2026
ab4fb75
fix(coderd/x/chatd): address round 2 telemetry review nits
jaaydenh Jun 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion coderd/x/chatd/chatd.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,11 @@ type Server struct {
chatWorker *chatWorker
messagePartBuffer *messagepartbuffer.Buffer
streamSyncPoller *streamSyncPoller
recordingSem chan struct{}
// templateRecommendations tracks list_templates recommendations to classify
// the template a later create_workspace builds. In-memory and best-effort;
// see chattool.RecommendationTracker.
templateRecommendations *chattool.RecommendationTracker
recordingSem chan struct{}

aibridgeTransportFactory *atomic.Pointer[aibridge.TransportFactory]
aiGatewayRoutingEnabled bool
Expand Down Expand Up @@ -3447,6 +3451,7 @@ func New(ps pubsub.Pubsub, cfg Config) *Server {
panic("chatd: create chat worker: " + err.Error())
}
p.chatWorker = chatWorker
p.templateRecommendations = chattool.NewRecommendationTracker(clk, 0, 0)

//nolint:gocritic // The chat processor uses a scoped chatd context.
ctx = dbauthz.AsChatd(ctx)
Expand Down Expand Up @@ -4202,6 +4207,9 @@ func (p *Server) appendRootChatTools(
Logger: p.logger,
Clock: p.clock,
AllowedTemplateIDs: p.chatTemplateAllowlist,
ChatID: opts.chat.ID,
Metrics: p.metrics,
Recommendations: p.templateRecommendations,
}),
chattool.ReadTemplate(p.db, opts.chat.OrganizationID, chattool.ReadTemplateOptions{
OwnerID: opts.chat.OwnerID,
Expand All @@ -4216,6 +4224,8 @@ func (p *Server) appendRootChatTools(
OnChatUpdated: onChatUpdated,
Logger: p.logger,
AllowedTemplateIDs: p.chatTemplateAllowlist,
Metrics: p.metrics,
Recommendations: p.templateRecommendations,
}),
chattool.StartWorkspace(p.db, opts.chat.ID, chattool.StartWorkspaceOptions{
OwnerID: opts.chat.OwnerID,
Expand Down
69 changes: 69 additions & 0 deletions coderd/x/chatd/chatloop/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ type Metrics struct {
StepsTotal *prometheus.CounterVec
StreamRetriesTotal *prometheus.CounterVec
StreamBufferDroppedTotal prometheus.Counter

ListTemplatesOutcomeTotal *prometheus.CounterVec
ListTemplatesSignalsFailuresTotal prometheus.Counter
ListTemplatesAffinityGap *prometheus.HistogramVec
TemplateRecommendationFollowupTotal *prometheus.CounterVec
}

// NewMetrics creates a new Metrics instance registered with the
Expand Down Expand Up @@ -109,6 +114,31 @@ func NewMetrics(reg prometheus.Registerer) *Metrics {
Name: "stream_buffer_dropped_total",
Help: "Number of chat stream buffer events dropped due to the per-chat buffer cap.",
}),
ListTemplatesOutcomeTotal: factory.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "list_templates_outcome_total",
Help: "Total list_templates calls by recommendation outcome (recommended, ask_user, no_matches, no_templates).",
}, []string{"outcome"}),
ListTemplatesSignalsFailuresTotal: factory.NewCounter(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "list_templates_signals_failures_total",
Help: "Total list_templates calls where ranking signals failed to load, degrading the result toward asking the user.",
}),
ListTemplatesAffinityGap: factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "list_templates_affinity_gap",
Help: "Affinity score gap between the top two candidates when affinity is the deciding signal, labeled by whether a recommendation was made.",
Buckets: prometheus.ExponentialBuckets(0.1, 2, 9), // 0.1 .. 25.6
}, []string{"recommended"}),
TemplateRecommendationFollowupTotal: factory.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "template_recommendation_followup_total",
Help: "Total create_workspace calls by how the chosen template related to the prior list_templates recommendation (accepted_recommendation, overrode_with_listed_template, created_listed_without_recommendation, created_unlisted_template, no_recent_list_templates).",
}, []string{"outcome"}),
}
}

Expand Down Expand Up @@ -165,6 +195,45 @@ func (m *Metrics) RecordToolError(provider, model, toolLabel string) {
m.ToolErrorsTotal.WithLabelValues(provider, model, toolLabel).Inc()
}

// RecordListTemplatesOutcome increments list_templates_outcome_total for the
// given recommendation outcome. No-op when m is nil.
func (m *Metrics) RecordListTemplatesOutcome(outcome string) {
if m == nil {
return
}
m.ListTemplatesOutcomeTotal.WithLabelValues(outcome).Inc()
}

// RecordListTemplatesSignalsFailure increments
// list_templates_signals_failures_total. No-op when m is nil.
func (m *Metrics) RecordListTemplatesSignalsFailure() {
if m == nil {
return
}
m.ListTemplatesSignalsFailuresTotal.Inc()
}

// RecordListTemplatesAffinityGap observes the affinity gap between the top two
// list_templates candidates, labeled by whether a recommendation was made.
// Callers must only record when affinity is the deciding signal so the gap is
// non-negative and meaningful. No-op when m is nil.
func (m *Metrics) RecordListTemplatesAffinityGap(recommended bool, gap float64) {
if m == nil {
return
}
m.ListTemplatesAffinityGap.WithLabelValues(strconv.FormatBool(recommended)).Observe(gap)
}

// RecordTemplateRecommendationFollowup increments
// template_recommendation_followup_total for how a create_workspace call
// related to the prior list_templates recommendation. No-op when m is nil.
func (m *Metrics) RecordTemplateRecommendationFollowup(outcome string) {
if m == nil {
return
}
m.TemplateRecommendationFollowupTotal.WithLabelValues(outcome).Inc()
}

// RecordStreamBufferDropped increments stream_buffer_dropped_total
// once per dropped event. No-op when m is nil.
func (m *Metrics) RecordStreamBufferDropped() {
Expand Down
40 changes: 39 additions & 1 deletion coderd/x/chatd/chattool/createworkspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,16 @@ type AgentConnFunc func(
agentID uuid.UUID,
) (workspacesdk.AgentConn, func(), error)

// CreateWorkspaceOptions configures the create_workspace tool.
// CreateWorkspaceMetrics records create_workspace telemetry. It is implemented
// by *chatloop.Metrics and declared here (rather than imported) because
// chatloop imports chattool, so chattool must not import chatloop.
type CreateWorkspaceMetrics interface {
RecordTemplateRecommendationFollowup(outcome string)
}

// CreateWorkspaceOptions configures the create_workspace tool. Metrics and
// Recommendations are optional telemetry hooks that classify how the chosen
// template related to the prior list_templates recommendation for this chat.
type CreateWorkspaceOptions struct {
OwnerID uuid.UUID
CreateFn CreateWorkspaceFn
Expand All @@ -72,6 +81,8 @@ type CreateWorkspaceOptions struct {
OnChatUpdated func(database.Chat)
Logger slog.Logger
AllowedTemplateIDs func() map[uuid.UUID]bool
Metrics CreateWorkspaceMetrics
Recommendations *RecommendationTracker
}

type createWorkspaceArgs struct {
Expand Down Expand Up @@ -277,6 +288,10 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option
options.OnChatUpdated(updatedChat)
}

// Only genuine creations reach here; the idempotent
// existing-workspace path returns earlier.
recordRecommendationFollowup(ctx, options, chatID, workspace.ID, templateID)

// Wait for the build to complete and the agent to
// come online so subsequent tools can use the
// workspace immediately.
Expand Down Expand Up @@ -362,6 +377,29 @@ type existingWorkspaceResult struct {
Err error
}

// recordRecommendationFollowup classifies how a freshly created workspace's
// template related to the prior list_templates recommendation for the chat and
// records it as a metric plus a structured log. Classification is best-effort:
// it degrades to "no_recent_list_templates" when no in-memory record exists
// (restart, replica handoff, expiry, or list_templates was never called).
func recordRecommendationFollowup(
ctx context.Context,
options CreateWorkspaceOptions,
chatID, workspaceID, templateID uuid.UUID,
) {
followup := options.Recommendations.Classify(chatID, templateID)
if options.Metrics != nil {
options.Metrics.RecordTemplateRecommendationFollowup(followup)
}
options.Logger.Info(ctx, "create_workspace recommendation follow-up",
Comment thread
jaaydenh marked this conversation as resolved.
slog.F("chat_id", chatID),
slog.F("owner_id", options.OwnerID),
slog.F("workspace_id", workspaceID),
slog.F("template_id", templateID),
slog.F("recommendation_followup", followup),
)
}

// checkExistingWorkspace checks whether the given chat
// already has a usable workspace. Returns an
// existingWorkspaceResult with Done set when the caller should
Expand Down
65 changes: 65 additions & 0 deletions coderd/x/chatd/chattool/createworkspace_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2100,3 +2100,68 @@ func TestCreateWorkspace_WithPresetAndParams(t *testing.T) {
require.Equal(t, "region", capturedReq.RichParameterValues[0].Name)
require.Equal(t, "us-east", capturedReq.RichParameterValues[0].Value)
}

type fakeCreateWorkspaceMetrics struct {
followups []string
}

func (m *fakeCreateWorkspaceMetrics) RecordTemplateRecommendationFollowup(outcome string) {
m.followups = append(m.followups, outcome)
}

// TestCreateWorkspace_RecordsRecommendationFollowup verifies the create path
// classifies the chosen template against a prior list_templates recommendation
// shared through the tracker, and records the follow-up metric.
Comment thread
jaaydenh marked this conversation as resolved.
func TestCreateWorkspace_RecordsRecommendationFollowup(t *testing.T) {
t.Parallel()

ctrl := gomock.NewController(t)
db := newCreateWorkspaceMockStore(ctrl)

ownerID := uuid.New()
orgID := uuid.New()
chatID := uuid.New()
templateID := uuid.New()
workspaceID := uuid.New()

db.EXPECT().GetChatByID(gomock.Any(), chatID).Return(database.Chat{ID: chatID}, nil)
db.EXPECT().UpdateChatWorkspaceBinding(gomock.Any(), gomock.Any()).Return(database.Chat{ID: chatID}, nil)
db.EXPECT().GetAuthorizationUserRoles(gomock.Any(), ownerID).Return(database.GetAuthorizationUserRolesRow{
ID: ownerID, Roles: []string{}, Groups: []string{}, Status: database.UserStatusActive,
}, nil)
db.EXPECT().GetTemplateByID(gomock.Any(), templateID).Return(database.Template{
ID: templateID, OrganizationID: orgID,
}, nil)
db.EXPECT().GetChatWorkspaceTTL(gomock.Any()).Return("0s", nil)
// Empty agent list short-circuits the agent-ready wait.
db.EXPECT().GetWorkspaceAgentsInLatestBuildByWorkspaceID(gomock.Any(), workspaceID).
Return([]database.WorkspaceAgent{}, nil)

// A nil-build workspace skips the build-completion wait.
createFn := func(_ context.Context, _ uuid.UUID, req codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) {
return codersdk.Workspace{ID: workspaceID, Name: req.Name, OwnerName: "testuser"}, nil
}

// Seed the tracker so the chat already has a recommendation for templateID.
tracker := NewRecommendationTracker(nil, 0, 0)
tracker.Record(chatID, templateID, []uuid.UUID{templateID}, 1)
metrics := &fakeCreateWorkspaceMetrics{}

tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{
OwnerID: ownerID,
CreateFn: createFn,
WorkspaceMu: &sync.Mutex{},
Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}),
Metrics: metrics,
Recommendations: tracker,
})

input := fmt.Sprintf(`{"template_id":%q}`, templateID.String())
resp, err := tool.Run(context.Background(), fantasy.ToolCall{ID: "call-1", Name: "create_workspace", Input: input})
require.NoError(t, err)
require.False(t, resp.IsError)

require.Equal(t, []string{recommendationFollowupAccepted}, metrics.followups)
// The recommendation was consumed: a repeat classification finds nothing.
require.Equal(t, recommendationFollowupNoRecord, tracker.Classify(chatID, templateID))
}
Loading
Loading