From 35f980a47712cf5056b83c8a0df437e68850e899 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 7 May 2026 12:06:06 +0000 Subject: [PATCH 01/21] feat(coderd): rank chat workspace templates --- coderd/database/dbauthz/dbauthz.go | 13 + coderd/database/dbmetrics/querymetrics.go | 8 + coderd/database/dbmock/dbmock.go | 15 + coderd/database/querier.go | 1 + coderd/database/queries.sql.go | 54 +++ coderd/database/queries/workspaces.sql | 18 + coderd/x/chatd/chattool/createworkspace.go | 15 +- coderd/x/chatd/chattool/listtemplates.go | 339 +++++++++++++++--- coderd/x/chatd/chattool/listtemplates_test.go | 266 ++++++++++++++ coderd/x/chatd/chattool/readtemplate.go | 11 +- coderd/x/chatd/prompt.go | 8 + 11 files changed, 682 insertions(+), 66 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 9badded7e03e4..ac795a8d27360 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -5077,6 +5077,19 @@ func (q *querier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, return q.db.GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, templateIDs) } +func (q *querier) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { + obj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()) + if arg.OrganizationID != uuid.Nil { + obj = obj.InOrg(arg.OrganizationID) + } else { + obj = obj.AnyOrganization() + } + if err := q.authorizeContext(ctx, policy.ActionRead, obj); err != nil { + return nil, err + } + return q.db.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg) +} + func (q *querier) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { prep, err := prepareSQLFilter(ctx, q.auth, policy.ActionRead, rbac.ResourceWorkspace.Type) if err != nil { diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 125e86b2a4c6f..1d46b092fd121 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -3440,6 +3440,14 @@ func (m queryMetricsStore) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context return r0, r1 } +func (m queryMetricsStore) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { + start := time.Now() + r0, r1 := m.s.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg) + m.queryLatencies.WithLabelValues("GetWorkspaceUsageGroupedByTemplateIDForOwner").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetWorkspaceUsageGroupedByTemplateIDForOwner").Inc() + return r0, r1 +} + func (m queryMetricsStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { start := time.Now() r0, r1 := m.s.GetWorkspaces(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index bfb29d8559b00..2e0fcd42d417b 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -6437,6 +6437,21 @@ func (mr *MockStoreMockRecorder) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUniqueOwnerCountByTemplateIDs", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUniqueOwnerCountByTemplateIDs), ctx, templateIds) } +// GetWorkspaceUsageGroupedByTemplateIDForOwner mocks base method. +func (m *MockStore) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetWorkspaceUsageGroupedByTemplateIDForOwner", ctx, arg) + ret0, _ := ret[0].([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetWorkspaceUsageGroupedByTemplateIDForOwner indicates an expected call of GetWorkspaceUsageGroupedByTemplateIDForOwner. +func (mr *MockStoreMockRecorder) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUsageGroupedByTemplateIDForOwner", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUsageGroupedByTemplateIDForOwner), ctx, arg) +} + // GetWorkspaces mocks base method. func (m *MockStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { m.ctrl.T.Helper() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 795c9a7af19be..02b8212c70f03 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -833,6 +833,7 @@ type sqlcQuerier interface { GetWorkspaceResourcesByJobIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, templateIds []uuid.UUID) ([]GetWorkspaceUniqueOwnerCountByTemplateIDsRow, error) + GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) // build_params is used to filter by build parameters if present. // It has to be a CTE because the set returning function 'unnest' cannot // be used in a WHERE clause. diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index c18e34bb19c28..cbefcfff43efd 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -32389,6 +32389,60 @@ func (q *sqlQuerier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Conte return items, nil } +const getWorkspaceUsageGroupedByTemplateIDForOwner = `-- name: GetWorkspaceUsageGroupedByTemplateIDForOwner :many +SELECT + template_id, + COUNT(*) AS workspace_count, + MAX(last_used_at)::timestamptz AS last_used_at +FROM + workspaces +WHERE + owner_id = $1 + AND deleted = false + AND CASE + WHEN $2 :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN + organization_id = $2 + ELSE true + END + AND template_id = ANY($3 :: uuid[]) +GROUP BY template_id +` + +type GetWorkspaceUsageGroupedByTemplateIDForOwnerParams struct { + OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"` +} + +type GetWorkspaceUsageGroupedByTemplateIDForOwnerRow struct { + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + WorkspaceCount int64 `db:"workspace_count" json:"workspace_count"` + LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` +} + +func (q *sqlQuerier) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { + rows, err := q.db.QueryContext(ctx, getWorkspaceUsageGroupedByTemplateIDForOwner, arg.OwnerID, arg.OrganizationID, pq.Array(arg.TemplateIDs)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetWorkspaceUsageGroupedByTemplateIDForOwnerRow + for rows.Next() { + var i GetWorkspaceUsageGroupedByTemplateIDForOwnerRow + if err := rows.Scan(&i.TemplateID, &i.WorkspaceCount, &i.LastUsedAt); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getWorkspaces = `-- name: GetWorkspaces :many WITH build_params AS ( diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 5269ea8fba524..972f4a542712b 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -497,6 +497,24 @@ LEFT JOIN workspaces ON workspaces.template_id = templates.id AND workspaces.del WHERE templates.id = ANY(@template_ids :: uuid[]) GROUP BY templates.id; +-- name: GetWorkspaceUsageGroupedByTemplateIDForOwner :many +SELECT + template_id, + COUNT(*) AS workspace_count, + MAX(last_used_at)::timestamptz AS last_used_at +FROM + workspaces +WHERE + owner_id = @owner_id + AND deleted = false + AND CASE + WHEN @organization_id :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN + organization_id = @organization_id + ELSE true + END + AND template_id = ANY(@template_ids :: uuid[]) +GROUP BY template_id; + -- name: InsertWorkspace :one INSERT INTO workspaces ( diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index 4a95a6e639aa1..eb58328c62446 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -73,10 +73,10 @@ type CreateWorkspaceOptions struct { } type createWorkspaceArgs struct { - TemplateID string `json:"template_id" description:"The UUIDv4 of the template to create the workspace from. Obtain this from list_templates."` + TemplateID string `json:"template_id" description:"The UUIDv4 of the template to create the workspace from. Obtain this from list_templates recommended_template_id or a ranked template."` Name string `json:"name,omitempty" description:"The name of the workspace to create. If not provided, a random name will be generated."` - Parameters map[string]string `json:"parameters,omitempty" description:"Key-value pairs of template parameters to use when creating the workspace. Obtain available parameters from read_template."` - PresetID string `json:"preset_id,omitempty" description:"The UUIDv4 of a template version preset to use. Obtain available presets from read_template. When provided, the preset's parameters are applied automatically and the workspace may claim a prebuilt instance for faster startup."` + Parameters map[string]string `json:"parameters,omitempty" description:"Key-value pairs of template parameters to use when creating the workspace. Obtain available parameters from read_template when needed."` + PresetID string `json:"preset_id,omitempty" description:"The UUIDv4 of a template version preset to use. Obtain available presets from read_template when needed. When provided, the preset's parameters are applied automatically and the workspace may claim a prebuilt instance for faster startup."` } // CreateWorkspace returns a tool that creates a new workspace from a @@ -89,10 +89,11 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option return fantasy.NewAgentTool( "create_workspace", "Create a new workspace from a template. Requires a "+ - "template_id (from list_templates). Optionally provide "+ - "a name and parameter values (from read_template). "+ - "If no name is given, one will be generated. "+ - "Provide a preset_id (from read_template) to apply "+ + "template_id from list_templates. Use recommended_template_id "+ + "or rank 1 when list_templates reports a confident choice. "+ + "Optionally provide a name and parameter values from "+ + "read_template. If no name is given, one will be generated. "+ + "Provide a preset_id from read_template to apply "+ "preset parameters and potentially claim a prebuilt "+ "workspace for faster startup. "+ "This tool is idempotent. If the chat already has a "+ diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 3c6d31c1b02dd..55abb3ee4b93d 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -7,6 +7,7 @@ import ( "maps" "slices" "strings" + "time" "charm.land/fantasy" "github.com/google/uuid" @@ -20,6 +21,13 @@ import ( const listTemplatesPageSize = 10 +const ( + listTemplatesHintOnlyAvailable = "only_available_template" + listTemplatesHintHighConfidence = "high_confidence_recommendation" + listTemplatesHintAmbiguous = "ambiguous_top_matches" + listTemplatesHintNoConfidence = "no_confident_match" +) + // ListTemplatesOptions configures the list_templates tool. type ListTemplatesOptions struct { OwnerID uuid.UUID @@ -27,23 +35,39 @@ type ListTemplatesOptions struct { } type listTemplatesArgs struct { - Query string `json:"query,omitempty" description:"Optional text to filter templates by name or description."` - Page int `json:"page,omitempty" description:"Page number for pagination (starts at 1). Each page returns up to 10 templates."` + Query string `json:"query,omitempty" description:"Optional text to filter templates by name, display name, or description."` + Page int `json:"page,omitempty" description:"Page number for pagination (starts at 1). Each page returns up to 10 ranked templates."` +} + +type rankedTemplate struct { + Template database.Template + QueryScore int + ActiveDevelopers int64 + Usage templateUsage + Rank int +} + +type templateUsage struct { + WorkspaceCount int64 + LastUsedAt time.Time } // ListTemplates returns a tool that lists available workspace templates. // The agent uses this to discover templates before creating a workspace. -// Results are ordered by number of active developers (most popular first) -// and paginated at 10 per page. +// Results are ranked before pagination using query relevance, current-user +// usage, and organization-wide popularity. // db must not be nil. func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemplatesOptions) fantasy.AgentTool { return fantasy.NewAgentTool( "list_templates", - "List available workspace templates. Optionally filter by a "+ - "search query matching template name or description. "+ - "Use this to find a template before creating a workspace. "+ - "Results are ordered by number of active developers (most popular first). "+ - "Returns 10 per page. Use the page parameter to paginate through results.", + "List available workspace templates as a ranked shortlist. "+ + "Optionally provide a search query matching template name, "+ + "display name, or description. Use recommended_template_id "+ + "or rank 1 as the default choice when selection_hint is "+ + "only_available_template or high_confidence_recommendation. "+ + "Do not paginate unless the returned templates do not fit the "+ + "request, selection_hint reports ambiguity or no confident match, "+ + "or the user asked to browse templates. Returns 10 per page.", func(ctx context.Context, args listTemplatesArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { ctx, err := asOwner(ctx, db, options.OwnerID) if err != nil { @@ -58,10 +82,6 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp Valid: true, }, } - query := strings.TrimSpace(args.Query) - if query != "" { - filterParams.FuzzyName = query - } var allowlist map[uuid.UUID]bool if options.AllowedTemplateIDs != nil { @@ -75,32 +95,35 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp return fantasy.NewTextErrorResponse(err.Error()), nil } - // Look up active developer counts so we can sort by popularity. - templateIDs := make([]uuid.UUID, len(templates)) - for i, t := range templates { - templateIDs[i] = t.ID + query := strings.TrimSpace(args.Query) + visibleTemplateCount := len(templates) + ranked := candidateRankedTemplates(templates, query) + + templateIDs := make([]uuid.UUID, len(ranked)) + for i, t := range ranked { + templateIDs[i] = t.Template.ID } - ownerCounts := make(map[uuid.UUID]int64) - if len(templateIDs) > 0 { - rows, countErr := db.GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, templateIDs) - - if countErr == nil { - for _, row := range rows { - ownerCounts[row.TemplateID] = row.UniqueOwnersSum - } - } + ownerCounts := loadTemplateActiveDeveloperCounts(ctx, db, templateIDs) + usageByTemplate := loadTemplateUsage( + ctx, db, options.OwnerID, organizationID, templateIDs, + ) + + for i := range ranked { + ranked[i].ActiveDevelopers = ownerCounts[ranked[i].Template.ID] + ranked[i].Usage = usageByTemplate[ranked[i].Template.ID] } - // Sort by active developer count descending. - slices.SortStableFunc(templates, func(a, b database.Template) int { - return cmp.Compare(ownerCounts[b.ID], ownerCounts[a.ID]) - }) + rankTemplates(ranked, query) + selectionHint, recommendedID, recommendationReason := selectTemplateRecommendation( + ranked, visibleTemplateCount, + ) + // Paginate. page := args.Page if page < 1 { page = 1 } - totalCount := len(templates) + totalCount := len(ranked) totalPages := (totalCount + listTemplatesPageSize - 1) / listTemplatesPageSize if totalPages == 0 { totalPages = 1 @@ -113,38 +136,246 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp if end > totalCount { end = totalCount } - pageTemplates := templates[start:end] + pageTemplates := ranked[start:end] items := make([]map[string]any, 0, len(pageTemplates)) for _, t := range pageTemplates { - item := map[string]any{ - "id": t.ID.String(), - "name": t.Name, - "organization_id": t.OrganizationID.String(), - } - if display := strings.TrimSpace(t.DisplayName); display != "" { - item["display_name"] = display - } - if desc := strings.TrimSpace(t.Description); desc != "" { - item["description"] = truncateRunes(desc, 200) - } - if count, ok := ownerCounts[t.ID]; ok && count > 0 { - item["active_developers"] = count - } - items = append(items, item) + items = append(items, templateItem(t, recommendedID)) } - return toolResponse(map[string]any{ - "templates": items, - "count": len(items), - "page": page, - "total_pages": totalPages, - "total_count": totalCount, - }), nil + result := map[string]any{ + "templates": items, + "count": len(items), + "page": page, + "total_pages": totalPages, + "total_count": totalCount, + "selection_hint": selectionHint, + "recommendation_reason": recommendationReason, + } + if recommendedID != uuid.Nil { + result["recommended_template_id"] = recommendedID.String() + } + return toolResponse(result), nil }, ) } +func candidateRankedTemplates(templates []database.Template, query string) []rankedTemplate { + ranked := make([]rankedTemplate, 0, len(templates)) + for _, t := range templates { + queryScore := templateQueryScore(t, query) + if query != "" && queryScore == 0 { + continue + } + ranked = append(ranked, rankedTemplate{ + Template: t, + QueryScore: queryScore, + }) + } + return ranked +} + +func loadTemplateActiveDeveloperCounts( + ctx context.Context, + db database.Store, + templateIDs []uuid.UUID, +) map[uuid.UUID]int64 { + ownerCounts := make(map[uuid.UUID]int64) + if len(templateIDs) == 0 { + return ownerCounts + } + + // Templates are already filtered with the owner's permissions. The + // aggregate count query requires system read because it spans workspace + // owners, but it only receives IDs the owner can already see. + rows, err := db.GetWorkspaceUniqueOwnerCountByTemplateIDs(dbauthz.AsSystemRestricted(ctx), templateIDs) //nolint:gocritic // see above + if err != nil { + return ownerCounts + } + for _, row := range rows { + ownerCounts[row.TemplateID] = row.UniqueOwnersSum + } + return ownerCounts +} + +func loadTemplateUsage( + ctx context.Context, + db database.Store, + ownerID uuid.UUID, + organizationID uuid.UUID, + templateIDs []uuid.UUID, +) map[uuid.UUID]templateUsage { + usageByTemplate := make(map[uuid.UUID]templateUsage) + if ownerID == uuid.Nil || len(templateIDs) == 0 { + return usageByTemplate + } + + rows, err := db.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams{ + OwnerID: ownerID, + OrganizationID: organizationID, + TemplateIDs: templateIDs, + }) + if err != nil { + return usageByTemplate + } + for _, row := range rows { + usageByTemplate[row.TemplateID] = templateUsage{ + WorkspaceCount: row.WorkspaceCount, + LastUsedAt: row.LastUsedAt, + } + } + return usageByTemplate +} + +func rankTemplates(ranked []rankedTemplate, query string) { + slices.SortStableFunc(ranked, func(a, b rankedTemplate) int { + if query != "" { + if c := cmp.Compare(b.QueryScore, a.QueryScore); c != 0 { + return c + } + } + if c := cmp.Compare(b.Usage.WorkspaceCount, a.Usage.WorkspaceCount); c != 0 { + return c + } + if c := b.Usage.LastUsedAt.Compare(a.Usage.LastUsedAt); c != 0 { + return c + } + if c := cmp.Compare(b.ActiveDevelopers, a.ActiveDevelopers); c != 0 { + return c + } + if c := strings.Compare(a.Template.Name, b.Template.Name); c != 0 { + return c + } + return strings.Compare(a.Template.ID.String(), b.Template.ID.String()) + }) + + for i := range ranked { + ranked[i].Rank = i + 1 + } +} + +func selectTemplateRecommendation( + ranked []rankedTemplate, + visibleTemplateCount int, +) (string, uuid.UUID, string) { + if len(ranked) == 0 { + return listTemplatesHintNoConfidence, uuid.Nil, "no_matching_templates" + } + + top := ranked[0] + if visibleTemplateCount == 1 && len(ranked) == 1 { + return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" + } + if !templateHasRankingSignal(top) { + return listTemplatesHintNoConfidence, uuid.Nil, "no_ranking_signal" + } + if len(ranked) > 1 && templatesAreAmbiguous(top, ranked[1]) { + return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" + } + return listTemplatesHintHighConfidence, top.Template.ID, rankReason(top) +} + +func templatesAreAmbiguous(a, b rankedTemplate) bool { + return a.QueryScore == b.QueryScore && + a.Usage.WorkspaceCount == b.Usage.WorkspaceCount && + a.Usage.LastUsedAt.Equal(b.Usage.LastUsedAt) && + a.ActiveDevelopers == b.ActiveDevelopers +} + +func templateHasRankingSignal(t rankedTemplate) bool { + return t.QueryScore > 0 || t.Usage.WorkspaceCount > 0 || t.ActiveDevelopers > 0 +} + +func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { + item := map[string]any{ + "id": t.Template.ID.String(), + "name": t.Template.Name, + "organization_id": t.Template.OrganizationID.String(), + "rank": t.Rank, + "rank_reason": rankReason(t), + } + if display := strings.TrimSpace(t.Template.DisplayName); display != "" { + item["display_name"] = display + } + if desc := strings.TrimSpace(t.Template.Description); desc != "" { + item["description"] = truncateRunes(desc, 200) + } + if t.ActiveDevelopers > 0 { + item["active_developers"] = t.ActiveDevelopers + } + if t.Usage.WorkspaceCount > 0 { + item["your_workspace_count"] = t.Usage.WorkspaceCount + item["last_used_by_you"] = t.Usage.LastUsedAt.Format(time.RFC3339Nano) + } + if t.Template.ID == recommendedID { + item["recommended"] = true + } + return item +} + +func rankReason(t rankedTemplate) string { + switch { + case t.QueryScore > 0 && t.Usage.WorkspaceCount > 0: + return "matches_query_and_used_by_you" + case t.QueryScore > 0: + return "matches_query" + case t.Usage.WorkspaceCount > 0: + return "used_by_you" + case t.ActiveDevelopers > 0: + return "popular_in_org" + default: + return "ordered_by_name" + } +} + +func templateQueryScore(t database.Template, query string) int { + query = normalizeTemplateSearch(query) + if query == "" { + return 0 + } + + queryCompact := compactTemplateSearch(query) + for _, field := range []string{t.Name, t.DisplayName} { + field = normalizeTemplateSearch(field) + if field == "" { + continue + } + if field == query || compactTemplateSearch(field) == queryCompact { + return 4 + } + } + for _, field := range []string{t.Name, t.DisplayName} { + field = normalizeTemplateSearch(field) + if field == "" { + continue + } + if strings.HasPrefix(field, query) || strings.HasPrefix(compactTemplateSearch(field), queryCompact) { + return 3 + } + } + for _, field := range []string{t.Name, t.DisplayName} { + field = normalizeTemplateSearch(field) + if field == "" { + continue + } + if strings.Contains(field, query) || strings.Contains(compactTemplateSearch(field), queryCompact) { + return 2 + } + } + if strings.Contains(normalizeTemplateSearch(t.Description), query) { + return 1 + } + return 0 +} + +func normalizeTemplateSearch(value string) string { + return strings.ToLower(strings.TrimSpace(value)) +} + +func compactTemplateSearch(value string) string { + return strings.ReplaceAll(value, " ", "") +} + // asOwner sets up a dbauthz context for the given owner so that // subsequent database calls are scoped to what that user can access. func asOwner(ctx context.Context, db database.Store, ownerID uuid.UUID) (context.Context, error) { diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 0cf25d2c432d3..66c2cf6e8e4d4 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -3,7 +3,9 @@ package chattool_test import ( "context" "encoding/json" + "fmt" "testing" + "time" "charm.land/fantasy" "github.com/google/uuid" @@ -121,6 +123,231 @@ func TestListTemplates_OrganizationFilter(t *testing.T) { }) } +func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + displayTemplate := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "data-science", + DisplayName: "Data Science Lab", + }) + descriptionTemplate := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "node-general", + Description: "A JavaScript and TypeScript workspace.", + }) + _ = dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unrelated", + Description: "A plain Linux workspace.", + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + + result := runListTemplates(ctx, t, tool, `{"query":"Data Science"}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 1) + require.Equal(t, displayTemplate.ID.String(), templates[0]["id"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, displayTemplate.ID.String(), result["recommended_template_id"]) + require.Equal(t, "matches_query", templates[0]["rank_reason"]) + + result = runListTemplates(ctx, t, tool, `{"query":"TypeScript"}`) + templates = listTemplateItems(t, result) + require.Len(t, templates, 1) + require.Equal(t, descriptionTemplate.ID.String(), templates[0]["id"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, descriptionTemplate.ID.String(), result["recommended_template_id"]) +} + +func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + var target database.Template + for i := range 11 { + tpl := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: fmt.Sprintf("template-%02d", i), + }) + if i == 10 { + target = tpl + } + } + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: target.ID, + LastUsedAt: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC), + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 10) + require.Equal(t, float64(11), result["total_count"]) + require.Equal(t, float64(2), result["total_pages"]) + require.Equal(t, target.ID.String(), templates[0]["id"]) + require.Equal(t, float64(1), templates[0]["rank"]) + require.Equal(t, float64(1), templates[0]["your_workspace_count"]) + require.NotEmpty(t, templates[0]["last_used_by_you"]) + require.Equal(t, true, templates[0]["recommended"]) + require.Equal(t, "used_by_you", templates[0]["rank_reason"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, target.ID.String(), result["recommended_template_id"]) +} + +func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + target := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python-gpu", + Description: "GPU workspace.", + }) + used := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "generic-dev", + Description: "Python-capable general environment.", + }) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: used.ID, + LastUsedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC), + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{"query":"python"}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, target.ID.String(), templates[0]["id"]) + require.Equal(t, used.ID.String(), templates[1]["id"]) + require.Equal(t, "matches_query", templates[0]["rank_reason"]) + require.Equal(t, "matches_query_and_used_by_you", templates[1]["rank_reason"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, target.ID.String(), result["recommended_template_id"]) +} + +func TestListTemplates_OrgPopularityFallback(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + popular := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "popular-template", + }) + lessPopular := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "less-popular-template", + }) + for range 2 { + otherUser := dbgen.User(t, db, database.User{}) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: otherUser.ID, + OrganizationID: org.ID, + TemplateID: popular.ID, + }) + } + otherUser := dbgen.User(t, db, database.User{}) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: otherUser.ID, + OrganizationID: org.ID, + TemplateID: lessPopular.ID, + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, popular.ID.String(), templates[0]["id"]) + require.Equal(t, float64(2), templates[0]["active_developers"]) + require.Equal(t, "popular_in_org", templates[0]["rank_reason"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, popular.ID.String(), result["recommended_template_id"]) +} + +func TestListTemplates_AmbiguousTopMatches(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + _ = dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "go-alpha", + }) + _ = dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "go-beta", + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{"query":"go"}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, "ambiguous_top_matches", result["selection_hint"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) + _, ok = templates[0]["recommended"] + require.False(t, ok) +} + //nolint:tparallel,paralleltest // Subtests share a single DB and run sequentially. func TestTemplateAllowlistEnforcement(t *testing.T) { t.Parallel() @@ -187,6 +414,10 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { require.Len(t, templates, 1) m := templates[0].(map[string]any) require.Equal(t, t1.ID.String(), m["id"].(string)) + require.Equal(t, "only_available_template", result["selection_hint"]) + require.Equal(t, t1.ID.String(), result["recommended_template_id"]) + require.Equal(t, true, m["recommended"]) + require.Equal(t, float64(1), m["rank"]) }) t.Run("NoMatches", func(t *testing.T) { @@ -301,3 +532,38 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { }) }) } + +func runListTemplates( + ctx context.Context, + t *testing.T, + tool fantasy.AgentTool, + input string, +) map[string]any { + t.Helper() + + resp, err := tool.Run(ctx, fantasy.ToolCall{ + ID: uuid.NewString(), + Name: "list_templates", + Input: input, + }) + require.NoError(t, err) + require.False(t, resp.IsError) + + var result map[string]any + require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) + return result +} + +func listTemplateItems(t *testing.T, result map[string]any) []map[string]any { + t.Helper() + + rawTemplates, ok := result["templates"].([]any) + require.True(t, ok) + templates := make([]map[string]any, 0, len(rawTemplates)) + for _, raw := range rawTemplates { + template, ok := raw.(map[string]any) + require.True(t, ok) + templates = append(templates, template) + } + return templates +} diff --git a/coderd/x/chatd/chattool/readtemplate.go b/coderd/x/chatd/chattool/readtemplate.go index 09179237babc6..b790e2c4d1c17 100644 --- a/coderd/x/chatd/chattool/readtemplate.go +++ b/coderd/x/chatd/chattool/readtemplate.go @@ -19,20 +19,21 @@ type ReadTemplateOptions struct { } type readTemplateArgs struct { - TemplateID string `json:"template_id" description:"The UUIDv4 of the template to read details for. Obtain this from list_templates."` + TemplateID string `json:"template_id" description:"The UUIDv4 of the template to read details for. Obtain this from list_templates recommended_template_id or a ranked template."` } // ReadTemplate returns a tool that retrieves details about a specific -// template, including its configurable rich parameters. The agent -// uses this after list_templates and before create_workspace. +// template, including its configurable rich parameters. The agent uses +// this after list_templates when it needs parameters or presets before +// create_workspace. // db must not be nil. func ReadTemplate(db database.Store, organizationID uuid.UUID, options ReadTemplateOptions) fantasy.AgentTool { return fantasy.NewAgentTool( "read_template", "Get details about a workspace template, including its "+ "configurable parameters and available presets. Use this "+ - "after finding a template with list_templates and before "+ - "creating a workspace with create_workspace.", + "after list_templates when you need required parameter "+ + "details or preset IDs before create_workspace.", func(ctx context.Context, args readTemplateArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { templateIDStr := strings.TrimSpace(args.TemplateID) if templateIDStr == "" { diff --git a/coderd/x/chatd/prompt.go b/coderd/x/chatd/prompt.go index 23c42fcb9b027..c295ccfacec3f 100644 --- a/coderd/x/chatd/prompt.go +++ b/coderd/x/chatd/prompt.go @@ -82,6 +82,14 @@ Do not start with clarifying questions if the codebase or tools can answer them. Ask the minimum number of questions needed to define the scope together. + +When no workspace is attached and you need to create one: +- Call list_templates with concise search terms from the user's task when the task suggests a language, framework, image, or environment. +- Treat recommended_template_id, or rank 1 when selection_hint is only_available_template or high_confidence_recommendation, as the default template unless the user asked for a different template. +- Do not paginate unless selection_hint is ambiguous_top_matches or no_confident_match, no returned template fits the request, or the user asked to browse or compare templates. +- Call read_template before create_workspace when you need parameter names, required parameter values, or preset IDs. Otherwise use create_workspace with the selected template_id and defaults. + + Propose a plan when: - The task is too ambiguous to implement with confidence. From f8882fce4e6ceb4d6f0423f9d4c2f790ee288c0e Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 7 May 2026 12:21:39 +0000 Subject: [PATCH 02/21] test(coderd/database): cover workspace usage authz --- coderd/database/dbauthz/dbauthz_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 795a0e6641690..fe688d28c57da 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -4764,6 +4764,15 @@ func (s *MethodTestSuite) TestSystemFunctions() { dbm.EXPECT().GetWorkspaceUniqueOwnerCountByTemplateIDs(gomock.Any(), ids).Return([]database.GetWorkspaceUniqueOwnerCountByTemplateIDsRow{}, nil).AnyTimes() check.Args(ids).Asserts(rbac.ResourceSystem, policy.ActionRead) })) + s.Run("GetWorkspaceUsageGroupedByTemplateIDForOwner", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + arg := database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams{ + OwnerID: uuid.New(), + OrganizationID: uuid.New(), + TemplateIDs: []uuid.UUID{uuid.New()}, + } + dbm.EXPECT().GetWorkspaceUsageGroupedByTemplateIDForOwner(gomock.Any(), arg).Return([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow{}, nil).AnyTimes() + check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead) + })) s.Run("GetWorkspaceAgentScriptsByAgentIDs", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { ids := []uuid.UUID{uuid.New()} dbm.EXPECT().GetWorkspaceAgentScriptsByAgentIDs(gomock.Any(), ids).Return([]database.GetWorkspaceAgentScriptsByAgentIDsRow{}, nil).AnyTimes() From 34c7facc78a79e7c24e2892f1c42e45976e70943 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 15 May 2026 10:57:49 +0000 Subject: [PATCH 03/21] fix(coderd): address template ranking feedback --- coderd/database/dbauthz/dbauthz.go | 4 +- coderd/database/dbauthz/dbauthz_test.go | 18 +- coderd/database/dbmetrics/querymetrics.go | 8 +- coderd/database/dbmock/dbmock.go | 14 +- coderd/database/models.go | 2 +- coderd/database/querier.go | 4 +- coderd/database/queries.sql.go | 21 +- coderd/database/queries/workspaces.sql | 2 +- coderd/x/chatd/chatd.go | 1 + coderd/x/chatd/chattool/listtemplates.go | 210 +++++++++++++----- coderd/x/chatd/chattool/listtemplates_test.go | 210 +++++++++++++++++- 11 files changed, 392 insertions(+), 102 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index ac795a8d27360..9a660961cc792 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -5077,7 +5077,7 @@ func (q *querier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, return q.db.GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, templateIDs) } -func (q *querier) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { +func (q *querier) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { obj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()) if arg.OrganizationID != uuid.Nil { obj = obj.InOrg(arg.OrganizationID) @@ -5087,7 +5087,7 @@ func (q *querier) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Conte if err := q.authorizeContext(ctx, policy.ActionRead, obj); err != nil { return nil, err } - return q.db.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg) + return q.db.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg) } func (q *querier) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index fe688d28c57da..86cceca4704bc 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -3110,6 +3110,15 @@ func (s *MethodTestSuite) TestWorkspace() { // No asserts here because SQLFilter. check.Args(ws.OwnerID, emptyPreparedAuthorized{}).Asserts() })) + s.Run("GetWorkspaceUsageGroupedByTemplateIDByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + arg := database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams{ + OwnerID: uuid.New(), + OrganizationID: uuid.New(), + TemplateIDs: []uuid.UUID{uuid.New()}, + } + dbm.EXPECT().GetWorkspaceUsageGroupedByTemplateIDByOwnerID(gomock.Any(), arg).Return([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow{}, nil).AnyTimes() + check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead) + })) s.Run("GetWorkspaceACLByID", s.Mocked(func(dbM *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { ws := testutil.Fake(s.T(), faker, database.Workspace{}) dbM.EXPECT().GetWorkspaceByID(gomock.Any(), ws.ID).Return(ws, nil).AnyTimes() @@ -4764,15 +4773,6 @@ func (s *MethodTestSuite) TestSystemFunctions() { dbm.EXPECT().GetWorkspaceUniqueOwnerCountByTemplateIDs(gomock.Any(), ids).Return([]database.GetWorkspaceUniqueOwnerCountByTemplateIDsRow{}, nil).AnyTimes() check.Args(ids).Asserts(rbac.ResourceSystem, policy.ActionRead) })) - s.Run("GetWorkspaceUsageGroupedByTemplateIDForOwner", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { - arg := database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams{ - OwnerID: uuid.New(), - OrganizationID: uuid.New(), - TemplateIDs: []uuid.UUID{uuid.New()}, - } - dbm.EXPECT().GetWorkspaceUsageGroupedByTemplateIDForOwner(gomock.Any(), arg).Return([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow{}, nil).AnyTimes() - check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead) - })) s.Run("GetWorkspaceAgentScriptsByAgentIDs", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { ids := []uuid.UUID{uuid.New()} dbm.EXPECT().GetWorkspaceAgentScriptsByAgentIDs(gomock.Any(), ids).Return([]database.GetWorkspaceAgentScriptsByAgentIDsRow{}, nil).AnyTimes() diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 1d46b092fd121..a444dc588c3c1 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -3440,11 +3440,11 @@ func (m queryMetricsStore) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context return r0, r1 } -func (m queryMetricsStore) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { +func (m queryMetricsStore) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { start := time.Now() - r0, r1 := m.s.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg) - m.queryLatencies.WithLabelValues("GetWorkspaceUsageGroupedByTemplateIDForOwner").Observe(time.Since(start).Seconds()) - m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetWorkspaceUsageGroupedByTemplateIDForOwner").Inc() + r0, r1 := m.s.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg) + m.queryLatencies.WithLabelValues("GetWorkspaceUsageGroupedByTemplateIDByOwnerID").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetWorkspaceUsageGroupedByTemplateIDByOwnerID").Inc() return r0, r1 } diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 2e0fcd42d417b..66b042c5f5757 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -6437,19 +6437,19 @@ func (mr *MockStoreMockRecorder) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUniqueOwnerCountByTemplateIDs", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUniqueOwnerCountByTemplateIDs), ctx, templateIds) } -// GetWorkspaceUsageGroupedByTemplateIDForOwner mocks base method. -func (m *MockStore) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { +// GetWorkspaceUsageGroupedByTemplateIDByOwnerID mocks base method. +func (m *MockStore) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetWorkspaceUsageGroupedByTemplateIDForOwner", ctx, arg) - ret0, _ := ret[0].([]database.GetWorkspaceUsageGroupedByTemplateIDForOwnerRow) + ret := m.ctrl.Call(m, "GetWorkspaceUsageGroupedByTemplateIDByOwnerID", ctx, arg) + ret0, _ := ret[0].([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow) ret1, _ := ret[1].(error) return ret0, ret1 } -// GetWorkspaceUsageGroupedByTemplateIDForOwner indicates an expected call of GetWorkspaceUsageGroupedByTemplateIDForOwner. -func (mr *MockStoreMockRecorder) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, arg any) *gomock.Call { +// GetWorkspaceUsageGroupedByTemplateIDByOwnerID indicates an expected call of GetWorkspaceUsageGroupedByTemplateIDByOwnerID. +func (mr *MockStoreMockRecorder) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUsageGroupedByTemplateIDForOwner", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUsageGroupedByTemplateIDForOwner), ctx, arg) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUsageGroupedByTemplateIDByOwnerID", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUsageGroupedByTemplateIDByOwnerID), ctx, arg) } // GetWorkspaces mocks base method. diff --git a/coderd/database/models.go b/coderd/database/models.go index 65e6d5a1420eb..5dc17bd690571 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.30.0 +// sqlc v1.31.1 package database diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 02b8212c70f03..8b98cc243f480 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.30.0 +// sqlc v1.31.1 package database @@ -833,7 +833,7 @@ type sqlcQuerier interface { GetWorkspaceResourcesByJobIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, templateIds []uuid.UUID) ([]GetWorkspaceUniqueOwnerCountByTemplateIDsRow, error) - GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) + GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) // build_params is used to filter by build parameters if present. // It has to be a CTE because the set returning function 'unnest' cannot // be used in a WHERE clause. diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index cbefcfff43efd..c19a11529cb35 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.30.0 +// sqlc v1.31.1 package database @@ -12850,8 +12850,9 @@ RETURNING id func (q *sqlQuerier) DeleteLicense(ctx context.Context, id int32) (int32, error) { row := q.db.QueryRowContext(ctx, deleteLicense, id) - err := row.Scan(&id) - return id, err + var id_2 int32 + err := row.Scan(&id_2) + return id_2, err } const getLicenseByID = `-- name: GetLicenseByID :one @@ -32389,7 +32390,7 @@ func (q *sqlQuerier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Conte return items, nil } -const getWorkspaceUsageGroupedByTemplateIDForOwner = `-- name: GetWorkspaceUsageGroupedByTemplateIDForOwner :many +const getWorkspaceUsageGroupedByTemplateIDByOwnerID = `-- name: GetWorkspaceUsageGroupedByTemplateIDByOwnerID :many SELECT template_id, COUNT(*) AS workspace_count, @@ -32408,27 +32409,27 @@ WHERE GROUP BY template_id ` -type GetWorkspaceUsageGroupedByTemplateIDForOwnerParams struct { +type GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams struct { OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"` } -type GetWorkspaceUsageGroupedByTemplateIDForOwnerRow struct { +type GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow struct { TemplateID uuid.UUID `db:"template_id" json:"template_id"` WorkspaceCount int64 `db:"workspace_count" json:"workspace_count"` LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` } -func (q *sqlQuerier) GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDForOwnerParams) ([]GetWorkspaceUsageGroupedByTemplateIDForOwnerRow, error) { - rows, err := q.db.QueryContext(ctx, getWorkspaceUsageGroupedByTemplateIDForOwner, arg.OwnerID, arg.OrganizationID, pq.Array(arg.TemplateIDs)) +func (q *sqlQuerier) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { + rows, err := q.db.QueryContext(ctx, getWorkspaceUsageGroupedByTemplateIDByOwnerID, arg.OwnerID, arg.OrganizationID, pq.Array(arg.TemplateIDs)) if err != nil { return nil, err } defer rows.Close() - var items []GetWorkspaceUsageGroupedByTemplateIDForOwnerRow + var items []GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow for rows.Next() { - var i GetWorkspaceUsageGroupedByTemplateIDForOwnerRow + var i GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow if err := rows.Scan(&i.TemplateID, &i.WorkspaceCount, &i.LastUsedAt); err != nil { return nil, err } diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 972f4a542712b..c860b7b0afe50 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -497,7 +497,7 @@ LEFT JOIN workspaces ON workspaces.template_id = templates.id AND workspaces.del WHERE templates.id = ANY(@template_ids :: uuid[]) GROUP BY templates.id; --- name: GetWorkspaceUsageGroupedByTemplateIDForOwner :many +-- name: GetWorkspaceUsageGroupedByTemplateIDByOwnerID :many SELECT template_id, COUNT(*) AS workspace_count, diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index ff551bb76f6da..de5d59934ea9d 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -5952,6 +5952,7 @@ func (p *Server) appendRootChatTools( tools = append(tools, chattool.ListTemplates(p.db, opts.chat.OrganizationID, chattool.ListTemplatesOptions{ OwnerID: opts.chat.OwnerID, + Logger: p.logger, AllowedTemplateIDs: p.chatTemplateAllowlist, }), chattool.ReadTemplate(p.db, opts.chat.OrganizationID, chattool.ReadTemplateOptions{ diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 55abb3ee4b93d..6d50e7b3a8e47 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -4,6 +4,7 @@ import ( "cmp" "context" "database/sql" + "errors" "maps" "slices" "strings" @@ -13,13 +14,20 @@ import ( "github.com/google/uuid" "golang.org/x/xerrors" + "cdr.dev/slog/v3" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/httpmw" "github.com/coder/coder/v2/coderd/rbac" ) -const listTemplatesPageSize = 10 +const ( + listTemplatesPageSize = 10 + + listTemplatesMinPersonalWorkspacesForRecommendation = 2 + listTemplatesMinActiveDevelopersForRecommendation = 2 + listTemplatesRecentUsageWindow = 90 * 24 * time.Hour +) const ( listTemplatesHintOnlyAvailable = "only_available_template" @@ -28,9 +36,17 @@ const ( listTemplatesHintNoConfidence = "no_confident_match" ) +const ( + queryScoreExactName = 4 + queryScoreNamePrefix = 3 + queryScoreNameContains = 2 + queryScoreDescriptionMatch = 1 +) + // ListTemplatesOptions configures the list_templates tool. type ListTemplatesOptions struct { OwnerID uuid.UUID + Logger slog.Logger AllowedTemplateIDs func() map[uuid.UUID]bool } @@ -52,6 +68,13 @@ type templateUsage struct { LastUsedAt time.Time } +type templateRankSignals struct { + QueryScore int + WorkspaceCount int64 + LastUsedAtUnixNano int64 + ActiveDevelopers int64 +} + // ListTemplates returns a tool that lists available workspace templates. // The agent uses this to discover templates before creating a workspace. // Results are ranked before pagination using query relevance, current-user @@ -97,16 +120,30 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp query := strings.TrimSpace(args.Query) visibleTemplateCount := len(templates) - ranked := candidateRankedTemplates(templates, query) + ranked := scoreTemplateCandidates(templates, query) templateIDs := make([]uuid.UUID, len(ranked)) for i, t := range ranked { templateIDs[i] = t.Template.ID } - ownerCounts := loadTemplateActiveDeveloperCounts(ctx, db, templateIDs) - usageByTemplate := loadTemplateUsage( + ownerCounts, ownerCountsErr := loadTemplateActiveDeveloperCounts(ctx, db, templateIDs) + if ownerCountsErr != nil { + options.Logger.Warn(ctx, "failed to load template active developer counts", + slog.F("template_count", len(templateIDs)), + slog.Error(ownerCountsErr), + ) + } + usageByTemplate, usageErr := loadTemplateUsage( ctx, db, options.OwnerID, organizationID, templateIDs, ) + if usageErr != nil { + options.Logger.Warn(ctx, "failed to load template usage", + slog.F("owner_id", options.OwnerID), + slog.F("organization_id", organizationID), + slog.F("template_count", len(templateIDs)), + slog.Error(usageErr), + ) + } for i := range ranked { ranked[i].ActiveDevelopers = ownerCounts[ranked[i].Template.ID] @@ -115,7 +152,10 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp rankTemplates(ranked, query) selectionHint, recommendedID, recommendationReason := selectTemplateRecommendation( - ranked, visibleTemplateCount, + ranked, + visibleTemplateCount, + errors.Join(ownerCountsErr, usageErr), + time.Now(), ) // Paginate. @@ -144,13 +184,14 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp } result := map[string]any{ - "templates": items, - "count": len(items), - "page": page, - "total_pages": totalPages, - "total_count": totalCount, - "selection_hint": selectionHint, - "recommendation_reason": recommendationReason, + "templates": items, + "count": len(items), + "page": page, + "total_pages": totalPages, + "total_count": totalCount, + "available_template_count": visibleTemplateCount, + "selection_hint": selectionHint, + "recommendation_reason": recommendationReason, } if recommendedID != uuid.Nil { result["recommended_template_id"] = recommendedID.String() @@ -160,29 +201,29 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp ) } -func candidateRankedTemplates(templates []database.Template, query string) []rankedTemplate { - ranked := make([]rankedTemplate, 0, len(templates)) +func scoreTemplateCandidates(templates []database.Template, query string) []rankedTemplate { + candidates := make([]rankedTemplate, 0, len(templates)) for _, t := range templates { queryScore := templateQueryScore(t, query) if query != "" && queryScore == 0 { continue } - ranked = append(ranked, rankedTemplate{ + candidates = append(candidates, rankedTemplate{ Template: t, QueryScore: queryScore, }) } - return ranked + return candidates } func loadTemplateActiveDeveloperCounts( ctx context.Context, db database.Store, templateIDs []uuid.UUID, -) map[uuid.UUID]int64 { +) (map[uuid.UUID]int64, error) { ownerCounts := make(map[uuid.UUID]int64) if len(templateIDs) == 0 { - return ownerCounts + return ownerCounts, nil } // Templates are already filtered with the owner's permissions. The @@ -190,12 +231,12 @@ func loadTemplateActiveDeveloperCounts( // owners, but it only receives IDs the owner can already see. rows, err := db.GetWorkspaceUniqueOwnerCountByTemplateIDs(dbauthz.AsSystemRestricted(ctx), templateIDs) //nolint:gocritic // see above if err != nil { - return ownerCounts + return ownerCounts, err } for _, row := range rows { ownerCounts[row.TemplateID] = row.UniqueOwnersSum } - return ownerCounts + return ownerCounts, nil } func loadTemplateUsage( @@ -204,19 +245,19 @@ func loadTemplateUsage( ownerID uuid.UUID, organizationID uuid.UUID, templateIDs []uuid.UUID, -) map[uuid.UUID]templateUsage { +) (map[uuid.UUID]templateUsage, error) { usageByTemplate := make(map[uuid.UUID]templateUsage) if ownerID == uuid.Nil || len(templateIDs) == 0 { - return usageByTemplate + return usageByTemplate, nil } - rows, err := db.GetWorkspaceUsageGroupedByTemplateIDForOwner(ctx, database.GetWorkspaceUsageGroupedByTemplateIDForOwnerParams{ + rows, err := db.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams{ OwnerID: ownerID, OrganizationID: organizationID, TemplateIDs: templateIDs, }) if err != nil { - return usageByTemplate + return usageByTemplate, err } for _, row := range rows { usageByTemplate[row.TemplateID] = templateUsage{ @@ -224,29 +265,22 @@ func loadTemplateUsage( LastUsedAt: row.LastUsedAt, } } - return usageByTemplate + return usageByTemplate, nil } func rankTemplates(ranked []rankedTemplate, query string) { slices.SortStableFunc(ranked, func(a, b rankedTemplate) int { - if query != "" { - if c := cmp.Compare(b.QueryScore, a.QueryScore); c != 0 { - return c - } - } - if c := cmp.Compare(b.Usage.WorkspaceCount, a.Usage.WorkspaceCount); c != 0 { + if c := compareTemplateRankSignals( + templateRankSignalsFor(a), + templateRankSignalsFor(b), + query, + ); c != 0 { return c } - if c := b.Usage.LastUsedAt.Compare(a.Usage.LastUsedAt); c != 0 { + if c := cmp.Compare(a.Template.Name, b.Template.Name); c != 0 { return c } - if c := cmp.Compare(b.ActiveDevelopers, a.ActiveDevelopers); c != 0 { - return c - } - if c := strings.Compare(a.Template.Name, b.Template.Name); c != 0 { - return c - } - return strings.Compare(a.Template.ID.String(), b.Template.ID.String()) + return cmp.Compare(a.Template.ID.String(), b.Template.ID.String()) }) for i := range ranked { @@ -254,15 +288,51 @@ func rankTemplates(ranked []rankedTemplate, query string) { } } +func templateRankSignalsFor(t rankedTemplate) templateRankSignals { + return templateRankSignals{ + QueryScore: t.QueryScore, + WorkspaceCount: t.Usage.WorkspaceCount, + LastUsedAtUnixNano: templateRankTime(t.Usage.LastUsedAt), + ActiveDevelopers: t.ActiveDevelopers, + } +} + +func templateRankTime(t time.Time) int64 { + if t.IsZero() { + return 0 + } + return t.UnixNano() +} + +func compareTemplateRankSignals(a, b templateRankSignals, query string) int { + if query != "" { + if c := cmp.Compare(b.QueryScore, a.QueryScore); c != 0 { + return c + } + } + if c := cmp.Compare(b.WorkspaceCount, a.WorkspaceCount); c != 0 { + return c + } + if c := cmp.Compare(b.LastUsedAtUnixNano, a.LastUsedAtUnixNano); c != 0 { + return c + } + return cmp.Compare(b.ActiveDevelopers, a.ActiveDevelopers) +} + func selectTemplateRecommendation( ranked []rankedTemplate, visibleTemplateCount int, + rankingSignalsErr error, + now time.Time, ) (string, uuid.UUID, string) { if len(ranked) == 0 { return listTemplatesHintNoConfidence, uuid.Nil, "no_matching_templates" } top := ranked[0] + if rankingSignalsErr != nil { + return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" + } if visibleTemplateCount == 1 && len(ranked) == 1 { return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" } @@ -272,27 +342,44 @@ func selectTemplateRecommendation( if len(ranked) > 1 && templatesAreAmbiguous(top, ranked[1]) { return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" } - return listTemplatesHintHighConfidence, top.Template.ID, rankReason(top) + if !templateHasConfidentRankingSignal(top, now) { + return listTemplatesHintNoConfidence, uuid.Nil, "weak_ranking_signal" + } + return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) } func templatesAreAmbiguous(a, b rankedTemplate) bool { - return a.QueryScore == b.QueryScore && - a.Usage.WorkspaceCount == b.Usage.WorkspaceCount && - a.Usage.LastUsedAt.Equal(b.Usage.LastUsedAt) && - a.ActiveDevelopers == b.ActiveDevelopers + return templateRankSignalsFor(a) == templateRankSignalsFor(b) } func templateHasRankingSignal(t rankedTemplate) bool { - return t.QueryScore > 0 || t.Usage.WorkspaceCount > 0 || t.ActiveDevelopers > 0 + signals := templateRankSignalsFor(t) + return signals.QueryScore > 0 || signals.WorkspaceCount > 0 || signals.ActiveDevelopers > 0 +} + +func templateHasConfidentRankingSignal(t rankedTemplate, now time.Time) bool { + signals := templateRankSignalsFor(t) + if signals.QueryScore > 0 { + return true + } + if signals.WorkspaceCount >= listTemplatesMinPersonalWorkspacesForRecommendation { + return true + } + if signals.WorkspaceCount > 0 && + !t.Usage.LastUsedAt.IsZero() && + now.Sub(t.Usage.LastUsedAt) <= listTemplatesRecentUsageWindow { + return true + } + return signals.ActiveDevelopers >= listTemplatesMinActiveDevelopersForRecommendation } func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { item := map[string]any{ - "id": t.Template.ID.String(), - "name": t.Template.Name, - "organization_id": t.Template.OrganizationID.String(), - "rank": t.Rank, - "rank_reason": rankReason(t), + "id": t.Template.ID.String(), + "name": t.Template.Name, + "organization_id": t.Template.OrganizationID.String(), + "rank": t.Rank, + "relevance_signals": relevanceSignals(t), } if display := strings.TrimSpace(t.Template.DisplayName); display != "" { item["display_name"] = display @@ -313,15 +400,16 @@ func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { return item } -func rankReason(t rankedTemplate) string { +func relevanceSignals(t rankedTemplate) string { + signals := templateRankSignalsFor(t) switch { - case t.QueryScore > 0 && t.Usage.WorkspaceCount > 0: + case signals.QueryScore > 0 && signals.WorkspaceCount > 0: return "matches_query_and_used_by_you" - case t.QueryScore > 0: + case signals.QueryScore > 0: return "matches_query" - case t.Usage.WorkspaceCount > 0: + case signals.WorkspaceCount > 0: return "used_by_you" - case t.ActiveDevelopers > 0: + case signals.ActiveDevelopers > 0: return "popular_in_org" default: return "ordered_by_name" @@ -341,7 +429,7 @@ func templateQueryScore(t database.Template, query string) int { continue } if field == query || compactTemplateSearch(field) == queryCompact { - return 4 + return queryScoreExactName } } for _, field := range []string{t.Name, t.DisplayName} { @@ -350,7 +438,7 @@ func templateQueryScore(t database.Template, query string) int { continue } if strings.HasPrefix(field, query) || strings.HasPrefix(compactTemplateSearch(field), queryCompact) { - return 3 + return queryScoreNamePrefix } } for _, field := range []string{t.Name, t.DisplayName} { @@ -359,11 +447,11 @@ func templateQueryScore(t database.Template, query string) int { continue } if strings.Contains(field, query) || strings.Contains(compactTemplateSearch(field), queryCompact) { - return 2 + return queryScoreNameContains } } if strings.Contains(normalizeTemplateSearch(t.Description), query) { - return 1 + return queryScoreDescriptionMatch } return 0 } @@ -372,8 +460,10 @@ func normalizeTemplateSearch(value string) string { return strings.ToLower(strings.TrimSpace(value)) } +var templateSearchCompactReplacer = strings.NewReplacer(" ", "", "-", "", "_", "") + func compactTemplateSearch(value string) string { - return strings.ReplaceAll(value, " ", "") + return templateSearchCompactReplacer.Replace(value) } // asOwner sets up a dbauthz context for the given owner so that diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 66c2cf6e8e4d4..5dd1417da9f98 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -84,6 +84,10 @@ func TestListTemplates_OrganizationFilter(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) templates := result["templates"].([]any) require.Len(t, templates, 2) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "no_ranking_signal", result["recommendation_reason"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) }) t.Run("ReadTemplate_CrossOrgRejected", func(t *testing.T) { @@ -163,7 +167,7 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Equal(t, displayTemplate.ID.String(), templates[0]["id"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, displayTemplate.ID.String(), result["recommended_template_id"]) - require.Equal(t, "matches_query", templates[0]["rank_reason"]) + require.Equal(t, "matches_query", templates[0]["relevance_signals"]) result = runListTemplates(ctx, t, tool, `{"query":"TypeScript"}`) templates = listTemplateItems(t, result) @@ -171,6 +175,69 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Equal(t, descriptionTemplate.ID.String(), templates[0]["id"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, descriptionTemplate.ID.String(), result["recommended_template_id"]) + + result = runListTemplates(ctx, t, tool, `{"query":"does-not-exist"}`) + templates = listTemplateItems(t, result) + require.Empty(t, templates) + require.Equal(t, float64(0), result["total_count"]) + require.Equal(t, float64(3), result["available_template_count"]) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "no_matching_templates", result["recommendation_reason"]) +} + +func TestListTemplates_QueryScoreTiers(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + exact := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python", + }) + prefix := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python-alpha", + }) + contains := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "go-python", + }) + description := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "generic-dev", + Description: "Python-capable general environment.", + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{"query":"python"}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 4) + require.Equal(t, exact.ID.String(), templates[0]["id"]) + require.Equal(t, prefix.ID.String(), templates[1]["id"]) + require.Equal(t, contains.ID.String(), templates[2]["id"]) + require.Equal(t, description.ID.String(), templates[3]["id"]) + + hyphenated := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python-gpu", + }) + result = runListTemplates(ctx, t, tool, `{"query":"python gpu"}`) + templates = listTemplateItems(t, result) + require.Len(t, templates, 1) + require.Equal(t, hyphenated.ID.String(), templates[0]["id"]) } func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { @@ -199,7 +266,7 @@ func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { OwnerID: user.ID, OrganizationID: org.ID, TemplateID: target.ID, - LastUsedAt: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC), + LastUsedAt: time.Now().Add(-time.Hour), }) tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ @@ -215,7 +282,7 @@ func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { require.Equal(t, float64(1), templates[0]["your_workspace_count"]) require.NotEmpty(t, templates[0]["last_used_by_you"]) require.Equal(t, true, templates[0]["recommended"]) - require.Equal(t, "used_by_you", templates[0]["rank_reason"]) + require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, target.ID.String(), result["recommended_template_id"]) } @@ -258,12 +325,53 @@ func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, target.ID.String(), templates[0]["id"]) require.Equal(t, used.ID.String(), templates[1]["id"]) - require.Equal(t, "matches_query", templates[0]["rank_reason"]) - require.Equal(t, "matches_query_and_used_by_you", templates[1]["rank_reason"]) + require.Equal(t, "matches_query", templates[0]["relevance_signals"]) + require.Equal(t, "matches_query_and_used_by_you", templates[1]["relevance_signals"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, target.ID.String(), result["recommended_template_id"]) } +func TestListTemplates_PersonalUsageBreaksEqualQueryScoreTie(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python-alpha", + }) + used := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "python-beta", + }) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: used.ID, + LastUsedAt: time.Now().Add(-time.Hour), + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{"query":"python"}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, used.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, "matches_query_and_used_by_you", templates[0]["relevance_signals"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, used.ID.String(), result["recommended_template_id"]) +} + func TestListTemplates_OrgPopularityFallback(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) @@ -308,11 +416,97 @@ func TestListTemplates_OrgPopularityFallback(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, popular.ID.String(), templates[0]["id"]) require.Equal(t, float64(2), templates[0]["active_developers"]) - require.Equal(t, "popular_in_org", templates[0]["rank_reason"]) + require.Equal(t, "popular_in_org", templates[0]["relevance_signals"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, popular.ID.String(), result["recommended_template_id"]) } +func TestListTemplates_WeakOrgPopularityDoesNotRecommend(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + usedByOne := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "used-by-one", + }) + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unused", + }) + otherUser := dbgen.User(t, db, database.User{}) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: otherUser.ID, + OrganizationID: org.ID, + TemplateID: usedByOne.ID, + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, usedByOne.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, float64(1), templates[0]["active_developers"]) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) +} + +func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + oldUsage := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "old-usage", + }) + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unused", + }) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: oldUsage.ID, + LastUsedAt: time.Now().Add(-180 * 24 * time.Hour), + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, oldUsage.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, float64(1), templates[0]["your_workspace_count"]) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) +} + func TestListTemplates_AmbiguousTopMatches(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) @@ -432,6 +626,10 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) templates := result["templates"].([]any) require.Empty(t, templates) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "no_matching_templates", result["recommendation_reason"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) }) }) From b8ae6390e81071ff40d2b2f515b285f028a8b5c4 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 15 May 2026 15:14:49 +0000 Subject: [PATCH 04/21] fix(coderd): follow up on template ranking feedback --- coderd/x/chatd/chatd.go | 1 + coderd/x/chatd/chattool/listtemplates.go | 28 ++++++-- .../chattool/listtemplates_internal_test.go | 57 ++++++++++++++++ coderd/x/chatd/chattool/listtemplates_test.go | 65 ++++++++++++++++++- 4 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 coderd/x/chatd/chattool/listtemplates_internal_test.go diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index de5d59934ea9d..b9d76cdfe5aae 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -5953,6 +5953,7 @@ func (p *Server) appendRootChatTools( chattool.ListTemplates(p.db, opts.chat.OrganizationID, chattool.ListTemplatesOptions{ OwnerID: opts.chat.OwnerID, Logger: p.logger, + Clock: p.clock, AllowedTemplateIDs: p.chatTemplateAllowlist, }), chattool.ReadTemplate(p.db, opts.chat.OrganizationID, chattool.ReadTemplateOptions{ diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 6d50e7b3a8e47..e909b41f964cc 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -19,6 +19,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/httpmw" "github.com/coder/coder/v2/coderd/rbac" + "github.com/coder/quartz" ) const ( @@ -47,6 +48,7 @@ const ( type ListTemplatesOptions struct { OwnerID uuid.UUID Logger slog.Logger + Clock quartz.Clock AllowedTemplateIDs func() map[uuid.UUID]bool } @@ -81,6 +83,11 @@ type templateRankSignals struct { // usage, and organization-wide popularity. // db must not be nil. func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemplatesOptions) fantasy.AgentTool { + clock := options.Clock + if clock == nil { + clock = quartz.NewReal() + } + return fantasy.NewAgentTool( "list_templates", "List available workspace templates as a ranked shortlist. "+ @@ -155,7 +162,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp ranked, visibleTemplateCount, errors.Join(ownerCountsErr, usageErr), - time.Now(), + clock.Now(), ) // Paginate. @@ -330,12 +337,15 @@ func selectTemplateRecommendation( } top := ranked[0] - if rankingSignalsErr != nil { - return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" - } if visibleTemplateCount == 1 && len(ranked) == 1 { return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" } + if rankingSignalsErr != nil { + if templateHasDecisiveQuerySignal(ranked) { + return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) + } + return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" + } if !templateHasRankingSignal(top) { return listTemplatesHintNoConfidence, uuid.Nil, "no_ranking_signal" } @@ -357,6 +367,13 @@ func templateHasRankingSignal(t rankedTemplate) bool { return signals.QueryScore > 0 || signals.WorkspaceCount > 0 || signals.ActiveDevelopers > 0 } +func templateHasDecisiveQuerySignal(ranked []rankedTemplate) bool { + if len(ranked) == 0 || ranked[0].QueryScore == 0 { + return false + } + return len(ranked) == 1 || ranked[0].QueryScore > ranked[1].QueryScore +} + func templateHasConfidentRankingSignal(t rankedTemplate, now time.Time) bool { signals := templateRankSignalsFor(t) if signals.QueryScore > 0 { @@ -450,7 +467,8 @@ func templateQueryScore(t database.Template, query string) int { return queryScoreNameContains } } - if strings.Contains(normalizeTemplateSearch(t.Description), query) { + desc := normalizeTemplateSearch(t.Description) + if strings.Contains(desc, query) || strings.Contains(compactTemplateSearch(desc), queryCompact) { return queryScoreDescriptionMatch } return 0 diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go new file mode 100644 index 0000000000000..655c275b9ed4b --- /dev/null +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -0,0 +1,57 @@ +package chattool + +import ( + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "golang.org/x/xerrors" + + "github.com/coder/coder/v2/coderd/database" +) + +func TestSelectTemplateRecommendationRankingSignalsUnavailable(t *testing.T) { + t.Parallel() + + enrichmentErr := xerrors.New("enrichment failed") + now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) + + onlyTemplateID := uuid.New() + hint, recommendedID, reason := selectTemplateRecommendation( + []rankedTemplate{{Template: database.Template{ID: onlyTemplateID}}}, + 1, + enrichmentErr, + now, + ) + require.Equal(t, listTemplatesHintOnlyAvailable, hint) + require.Equal(t, onlyTemplateID, recommendedID) + require.Equal(t, "only_available_template", reason) + + topID := uuid.New() + hint, recommendedID, reason = selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: topID}, QueryScore: queryScoreExactName}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, + }, + 2, + enrichmentErr, + now, + ) + require.Equal(t, listTemplatesHintHighConfidence, hint) + require.Equal(t, topID, recommendedID) + require.Equal(t, "matches_query", reason) + + hint, recommendedID, reason = selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + }, + 2, + enrichmentErr, + now, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, uuid.Nil, recommendedID) + require.Equal(t, "ranking_signals_unavailable", reason) +} diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 5dd1417da9f98..04d066012b10f 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -17,6 +17,7 @@ import ( "github.com/coder/coder/v2/coderd/x/chatd/chattool" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/testutil" + "github.com/coder/quartz" ) func TestListTemplates_OrganizationFilter(t *testing.T) { @@ -238,6 +239,17 @@ func TestListTemplates_QueryScoreTiers(t *testing.T) { templates = listTemplateItems(t, result) require.Len(t, templates, 1) require.Equal(t, hyphenated.ID.String(), templates[0]["id"]) + + descriptionHyphenated := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "ml-tools", + Description: "Includes machine-learning libraries.", + }) + result = runListTemplates(ctx, t, tool, `{"query":"machine learning"}`) + templates = listTemplateItems(t, result) + require.Len(t, templates, 1) + require.Equal(t, descriptionHyphenated.ID.String(), templates[0]["id"]) } func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { @@ -467,6 +479,9 @@ func TestListTemplates_WeakOrgPopularityDoesNotRecommend(t *testing.T) { func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) + clock := quartz.NewMock(t) + now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) + clock.Set(now).MustWait(ctx) db, _ := dbtestutil.NewDB(t) user := dbgen.User(t, db, database.User{}) org := dbgen.Organization(t, db, database.Organization{}) @@ -489,11 +504,12 @@ func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { OwnerID: user.ID, OrganizationID: org.ID, TemplateID: oldUsage.ID, - LastUsedAt: time.Now().Add(-180 * 24 * time.Hour), + LastUsedAt: now.Add(-180 * 24 * time.Hour), }) tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ OwnerID: user.ID, + Clock: clock, }) result := runListTemplates(ctx, t, tool, `{}`) templates := listTemplateItems(t, result) @@ -507,6 +523,53 @@ func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { require.False(t, ok) } +func TestListTemplates_PersonalUsageCountRecommendsStaleTemplate(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + clock := quartz.NewMock(t) + now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) + clock.Set(now).MustWait(ctx) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + staleUsage := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "stale-usage", + }) + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unused", + }) + for range 2 { + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: staleUsage.ID, + LastUsedAt: now.Add(-180 * 24 * time.Hour), + }) + } + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + Clock: clock, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, staleUsage.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, float64(2), templates[0]["your_workspace_count"]) + require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, staleUsage.ID.String(), result["recommended_template_id"]) +} + func TestListTemplates_AmbiguousTopMatches(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) From 4d357dea9f21cf6ee022c31b96f0f7c65810696b Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Mon, 1 Jun 2026 10:48:52 +0000 Subject: [PATCH 05/21] fix(coderd): address template ranking feedback --- coderd/workspaces_test.go | 10 ++++- coderd/x/chatd/chattool/listtemplates.go | 43 ++++++++++++++----- .../chattool/listtemplates_internal_test.go | 43 +++++++++++++++++-- coderd/x/chatd/chattool/listtemplates_test.go | 16 ++++++- 4 files changed, 97 insertions(+), 15 deletions(-) diff --git a/coderd/workspaces_test.go b/coderd/workspaces_test.go index b03253b76ba6a..557da1441771f 100644 --- a/coderd/workspaces_test.go +++ b/coderd/workspaces_test.go @@ -4587,7 +4587,15 @@ func TestWorkspaceDormant(t *testing.T) { require.NoError(t, err) // Should be able to stop a workspace while it is dormant. - coderdtest.MustTransitionWorkspace(t, client, workspace.ID, codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) + workspace = coderdtest.MustTransitionWorkspace(t, client, workspace.ID, codersdk.WorkspaceTransitionStart, codersdk.WorkspaceTransitionStop) + testutil.Eventually(ctx, t, func(context.Context) bool { + return auditor.Contains(t, database.AuditLog{ + ResourceID: workspace.LatestBuild.ID, + ResourceType: database.ResourceTypeWorkspaceBuild, + Action: database.AuditActionStop, + StatusCode: http.StatusOK, + }) + }, testutil.IntervalFast) // Reset the auditor auditor.ResetLogs() diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index e909b41f964cc..26881fe5fd03a 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -4,7 +4,6 @@ import ( "cmp" "context" "database/sql" - "errors" "maps" "slices" "strings" @@ -77,6 +76,15 @@ type templateRankSignals struct { ActiveDevelopers int64 } +type templateRankingSignalErrors struct { + ActiveDeveloperCounts error + Usage error +} + +func (e templateRankingSignalErrors) hasAny() bool { + return e.ActiveDeveloperCounts != nil || e.Usage != nil +} + // ListTemplates returns a tool that lists available workspace templates. // The agent uses this to discover templates before creating a workspace. // Results are ranked before pagination using query relevance, current-user @@ -161,7 +169,10 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp selectionHint, recommendedID, recommendationReason := selectTemplateRecommendation( ranked, visibleTemplateCount, - errors.Join(ownerCountsErr, usageErr), + templateRankingSignalErrors{ + ActiveDeveloperCounts: ownerCountsErr, + Usage: usageErr, + }, clock.Now(), ) @@ -329,7 +340,7 @@ func compareTemplateRankSignals(a, b templateRankSignals, query string) int { func selectTemplateRecommendation( ranked []rankedTemplate, visibleTemplateCount int, - rankingSignalsErr error, + rankingSignalErrors templateRankingSignalErrors, now time.Time, ) (string, uuid.UUID, string) { if len(ranked) == 0 { @@ -340,10 +351,15 @@ func selectTemplateRecommendation( if visibleTemplateCount == 1 && len(ranked) == 1 { return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" } - if rankingSignalsErr != nil { + if rankingSignalErrors.hasAny() { if templateHasDecisiveQuerySignal(ranked) { return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) } + if rankingSignalErrors.Usage == nil && + templateHasConfidentPersonalUsageSignal(top, now) && + (len(ranked) == 1 || !templatesAreAmbiguous(top, ranked[1])) { + return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) + } return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" } if !templateHasRankingSignal(top) { @@ -374,17 +390,21 @@ func templateHasDecisiveQuerySignal(ranked []rankedTemplate) bool { return len(ranked) == 1 || ranked[0].QueryScore > ranked[1].QueryScore } +func templateHasConfidentPersonalUsageSignal(t rankedTemplate, now time.Time) bool { + if t.Usage.WorkspaceCount >= listTemplatesMinPersonalWorkspacesForRecommendation { + return true + } + return t.Usage.WorkspaceCount > 0 && + !t.Usage.LastUsedAt.IsZero() && + now.Sub(t.Usage.LastUsedAt) <= listTemplatesRecentUsageWindow +} + func templateHasConfidentRankingSignal(t rankedTemplate, now time.Time) bool { signals := templateRankSignalsFor(t) if signals.QueryScore > 0 { return true } - if signals.WorkspaceCount >= listTemplatesMinPersonalWorkspacesForRecommendation { - return true - } - if signals.WorkspaceCount > 0 && - !t.Usage.LastUsedAt.IsZero() && - now.Sub(t.Usage.LastUsedAt) <= listTemplatesRecentUsageWindow { + if templateHasConfidentPersonalUsageSignal(t, now) { return true } return signals.ActiveDevelopers >= listTemplatesMinActiveDevelopersForRecommendation @@ -440,6 +460,9 @@ func templateQueryScore(t database.Template, query string) int { } queryCompact := compactTemplateSearch(query) + if queryCompact == "" { + return 0 + } for _, field := range []string{t.Name, t.DisplayName} { field = normalizeTemplateSearch(field) if field == "" { diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go index 655c275b9ed4b..6ee20a28b9e03 100644 --- a/coderd/x/chatd/chattool/listtemplates_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -15,13 +15,17 @@ func TestSelectTemplateRecommendationRankingSignalsUnavailable(t *testing.T) { t.Parallel() enrichmentErr := xerrors.New("enrichment failed") + enrichmentErrors := templateRankingSignalErrors{ + ActiveDeveloperCounts: enrichmentErr, + Usage: enrichmentErr, + } now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) onlyTemplateID := uuid.New() hint, recommendedID, reason := selectTemplateRecommendation( []rankedTemplate{{Template: database.Template{ID: onlyTemplateID}}}, 1, - enrichmentErr, + enrichmentErrors, now, ) require.Equal(t, listTemplatesHintOnlyAvailable, hint) @@ -35,20 +39,53 @@ func TestSelectTemplateRecommendationRankingSignalsUnavailable(t *testing.T) { {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, }, 2, - enrichmentErr, + enrichmentErrors, now, ) require.Equal(t, listTemplatesHintHighConfidence, hint) require.Equal(t, topID, recommendedID) require.Equal(t, "matches_query", reason) + personalUsageID := uuid.New() + hint, recommendedID, reason = selectTemplateRecommendation( + []rankedTemplate{ + { + Template: database.Template{ID: personalUsageID}, + Usage: templateUsage{ + WorkspaceCount: 3, + LastUsedAt: now.Add(-180 * 24 * time.Hour), + }, + }, + {Template: database.Template{ID: uuid.New()}}, + }, + 2, + templateRankingSignalErrors{ActiveDeveloperCounts: enrichmentErr}, + now, + ) + require.Equal(t, listTemplatesHintHighConfidence, hint) + require.Equal(t, personalUsageID, recommendedID) + require.Equal(t, "used_by_you", reason) + + hint, recommendedID, reason = selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, ActiveDevelopers: 2}, + {Template: database.Template{ID: uuid.New()}}, + }, + 2, + templateRankingSignalErrors{Usage: enrichmentErr}, + now, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, uuid.Nil, recommendedID) + require.Equal(t, "ranking_signals_unavailable", reason) + hint, recommendedID, reason = selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, }, 2, - enrichmentErr, + enrichmentErrors, now, ) require.Equal(t, listTemplatesHintNoConfidence, hint) diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 04d066012b10f..f656831326300 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -142,7 +142,7 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { displayTemplate := dbgen.Template(t, db, database.Template{ OrganizationID: org.ID, CreatedBy: user.ID, - Name: "data-science", + Name: "tpl-42", DisplayName: "Data Science Lab", }) descriptionTemplate := dbgen.Template(t, db, database.Template{ @@ -177,6 +177,14 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, descriptionTemplate.ID.String(), result["recommended_template_id"]) + result = runListTemplates(ctx, t, tool, `{"query":"-"}`) + templates = listTemplateItems(t, result) + require.Empty(t, templates) + require.Equal(t, float64(0), result["total_count"]) + require.Equal(t, float64(3), result["available_template_count"]) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "no_matching_templates", result["recommendation_reason"]) + result = runListTemplates(ctx, t, tool, `{"query":"does-not-exist"}`) templates = listTemplateItems(t, result) require.Empty(t, templates) @@ -297,6 +305,12 @@ func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) require.Equal(t, target.ID.String(), result["recommended_template_id"]) + + result = runListTemplates(ctx, t, tool, `{"page":2}`) + templates = listTemplateItems(t, result) + require.Len(t, templates, 1) + require.Equal(t, float64(2), result["page"]) + require.Equal(t, float64(11), templates[0]["rank"]) } func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { From 9d1ee5096db2d1fd7c15aabeb3a390173d703dc0 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Mon, 1 Jun 2026 13:47:25 +0000 Subject: [PATCH 06/21] fix(coderd/x/chatd): require template selection on weak matches --- coderd/x/chatd/chatd_test.go | 174 ++++++++++++++++++ coderd/x/chatd/chattool/createworkspace.go | 4 + coderd/x/chatd/chattool/listtemplates.go | 9 + coderd/x/chatd/chattool/listtemplates_test.go | 3 + coderd/x/chatd/prompt.go | 1 + 5 files changed, 191 insertions(+) diff --git a/coderd/x/chatd/chatd_test.go b/coderd/x/chatd/chatd_test.go index 9e67c1023015b..bbfc2dc23e4a2 100644 --- a/coderd/x/chatd/chatd_test.go +++ b/coderd/x/chatd/chatd_test.go @@ -8577,6 +8577,180 @@ func TestChatTemplateAllowlistEnforcement(t *testing.T) { "create_workspace for blocked template should be rejected") } +func TestChatAsksUserWhenListTemplatesRequiresSelection(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + db, ps := dbtestutil.NewDB(t) + + var tplCode, tplDocker database.Template + var callCount atomic.Int32 + var sawHardRule atomic.Bool + var sawSelectionRequiredResult atomic.Bool + + openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { + if !req.Stream { + return chattest.OpenAINonStreamingResponse("title") + } + + switch callCount.Add(1) { + case 1: + promptAndTools := string(req.RawBody) + for _, message := range req.Messages { + promptAndTools += "\n" + message.Content + } + if strings.Contains(promptAndTools, "If user_selection_required is true") && + strings.Contains(promptAndTools, "do not call create_workspace") { + sawHardRule.Store(true) + } + return chattest.OpenAIStreamingResponse( + chattest.OpenAIToolCallChunk("list_templates", `{}`), + ) + case 2: + if listTemplatesResultRequiresUserSelection(req.Messages) { + sawSelectionRequiredResult.Store(true) + return chattest.OpenAIStreamingResponse( + chattest.OpenAITextChunks( + "I found two templates, typescript-alpha and Docker Containers. Which template should I use?", + )..., + ) + } + + return chattest.OpenAIStreamingResponse( + chattest.OpenAIToolCallChunk("create_workspace", + fmt.Sprintf(`{"template_id":%q}`, tplCode.ID.String())), + ) + default: + return chattest.OpenAIStreamingResponse( + chattest.OpenAITextChunks("Done.")..., + ) + } + }) + + user, org, model := seedChatDependenciesWithProvider(t, db, "openai-compat", openAIURL) + tplCode = dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "code-2", + DisplayName: "typescript-alpha", + Description: "this is a long description", + }) + tplDocker = dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "docker", + DisplayName: "Docker Containers", + Description: "Provision Docker containers as Coder workspaces", + }) + + server := newActiveTestServer(t, db, ps, func(cfg *chatd.Config) { + cfg.CreateWorkspace = func( + context.Context, + uuid.UUID, + codersdk.CreateWorkspaceRequest, + ) (codersdk.Workspace, error) { + t.Error("create_workspace should not be called when list_templates requires user selection") + return codersdk.Workspace{}, xerrors.New("unexpected create_workspace call") + } + }) + + chat, err := server.CreateChat(ctx, chatd.CreateOptions{ + OrganizationID: org.ID, + OwnerID: user.ID, + Title: "ask-template-selection-test", + ModelConfigID: model.ID, + InitialUserContent: []codersdk.ChatMessagePart{ + codersdk.ChatMessageText("Create a workspace."), + }, + }) + require.NoError(t, err) + + var chatResult database.Chat + require.Eventually(t, func() bool { + got, getErr := db.GetChatByID(ctx, chat.ID) + if getErr != nil { + return false + } + chatResult = got + return got.Status == database.ChatStatusWaiting || got.Status == database.ChatStatusError + }, testutil.WaitLong, testutil.IntervalFast) + + if chatResult.Status == database.ChatStatusError { + require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) + } + + require.True(t, sawHardRule.Load(), "model request should include the user-selection hard rule") + require.True(t, sawSelectionRequiredResult.Load(), "model should receive a list_templates result requiring user selection") + + messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ + ChatID: chat.ID, + AfterID: 0, + }) + require.NoError(t, err) + + var listTemplatesResult map[string]any + var assistantText string + var sawCreateWorkspaceResult bool + for _, message := range messages { + parts, parseErr := chatprompt.ParseContent(message) + require.NoError(t, parseErr) + for _, part := range parts { + switch { + case part.Type == codersdk.ChatMessagePartTypeToolResult && part.ToolName == "list_templates": + require.NoError(t, json.Unmarshal(part.Result, &listTemplatesResult)) + case part.Type == codersdk.ChatMessagePartTypeToolResult && part.ToolName == "create_workspace": + sawCreateWorkspaceResult = true + case message.Role == database.ChatMessageRoleAssistant && part.Type == codersdk.ChatMessagePartTypeText: + assistantText += part.Text + } + } + } + + require.NotNil(t, listTemplatesResult, "expected list_templates tool result") + require.Equal(t, "no_confident_match", listTemplatesResult["selection_hint"]) + require.Equal(t, "no_ranking_signal", listTemplatesResult["recommendation_reason"]) + require.Equal(t, true, listTemplatesResult["user_selection_required"]) + require.NotContains(t, listTemplatesResult, "recommended_template_id") + require.Contains(t, listTemplatesResult["templates"], any(map[string]any{ + "id": tplCode.ID.String(), + "name": "code-2", + "organization_id": org.ID.String(), + "display_name": "typescript-alpha", + "description": "this is a long description", + "rank": float64(1), + "relevance_signals": "ordered_by_name", + })) + require.Contains(t, listTemplatesResult["templates"], any(map[string]any{ + "id": tplDocker.ID.String(), + "name": "docker", + "organization_id": org.ID.String(), + "display_name": "Docker Containers", + "description": "Provision Docker containers as Coder workspaces", + "rank": float64(2), + "relevance_signals": "ordered_by_name", + })) + require.False(t, sawCreateWorkspaceResult, "agent should ask instead of calling create_workspace") + require.Contains(t, assistantText, "Which template should I use?") +} + +func listTemplatesResultRequiresUserSelection(messages []chattest.OpenAIMessage) bool { + for _, message := range messages { + if message.Role != "tool" || !json.Valid([]byte(message.Content)) { + continue + } + + var result map[string]any + if err := json.Unmarshal([]byte(message.Content), &result); err != nil { + continue + } + required, _ := result["user_selection_required"].(bool) + if result["selection_hint"] == "no_confident_match" && required { + return true + } + } + return false +} + // TestSignalWakeImmediateAcquisition verifies that CreateChat triggers // immediate processing via signalWake without waiting for the polling // ticker to fire. The ticker interval is set to an hour so it never diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index 78460b53452ba..010ce9a19dedd 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -96,6 +96,10 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option "context, provider tools, or external MCP tools. Requires a "+ "template_id from list_templates. Use recommended_template_id "+ "or rank 1 when list_templates reports a confident choice. "+ + "If list_templates returned user_selection_required true, "+ + "or selection_hint no_confident_match or ambiguous_top_matches, "+ + "do not call create_workspace. Ask the user to choose a "+ + "template unless the user already explicitly selected one. "+ "Optionally provide a name and parameter values from "+ "read_template. If no name is given, one will be generated. "+ "Provide a preset_id from read_template to apply "+ diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 26881fe5fd03a..0e7c127b6eb35 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -103,6 +103,10 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp "display name, or description. Use recommended_template_id "+ "or rank 1 as the default choice when selection_hint is "+ "only_available_template or high_confidence_recommendation. "+ + "If user_selection_required is true, or selection_hint is "+ + "no_confident_match or ambiguous_top_matches, do not call "+ + "create_workspace. Ask the user to choose a template unless "+ + "the user already explicitly selected one. "+ "Do not paginate unless the returned templates do not fit the "+ "request, selection_hint reports ambiguity or no confident match, "+ "or the user asked to browse templates. Returns 10 per page.", @@ -210,6 +214,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp "available_template_count": visibleTemplateCount, "selection_hint": selectionHint, "recommendation_reason": recommendationReason, + "user_selection_required": userSelectionRequired(selectionHint), } if recommendedID != uuid.Nil { result["recommended_template_id"] = recommendedID.String() @@ -374,6 +379,10 @@ func selectTemplateRecommendation( return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) } +func userSelectionRequired(selectionHint string) bool { + return selectionHint == listTemplatesHintAmbiguous || selectionHint == listTemplatesHintNoConfidence +} + func templatesAreAmbiguous(a, b rankedTemplate) bool { return templateRankSignalsFor(a) == templateRankSignalsFor(b) } diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index f656831326300..cfae13aa214e6 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -167,6 +167,7 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Len(t, templates, 1) require.Equal(t, displayTemplate.ID.String(), templates[0]["id"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, false, result["user_selection_required"]) require.Equal(t, displayTemplate.ID.String(), result["recommended_template_id"]) require.Equal(t, "matches_query", templates[0]["relevance_signals"]) @@ -183,6 +184,7 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Equal(t, float64(0), result["total_count"]) require.Equal(t, float64(3), result["available_template_count"]) require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, true, result["user_selection_required"]) require.Equal(t, "no_matching_templates", result["recommendation_reason"]) result = runListTemplates(ctx, t, tool, `{"query":"does-not-exist"}`) @@ -191,6 +193,7 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { require.Equal(t, float64(0), result["total_count"]) require.Equal(t, float64(3), result["available_template_count"]) require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, true, result["user_selection_required"]) require.Equal(t, "no_matching_templates", result["recommendation_reason"]) } diff --git a/coderd/x/chatd/prompt.go b/coderd/x/chatd/prompt.go index f02006a9aa121..1254ca6e8d3dd 100644 --- a/coderd/x/chatd/prompt.go +++ b/coderd/x/chatd/prompt.go @@ -108,6 +108,7 @@ Ask the minimum number of questions needed to define the scope together. When no workspace is attached and you need to create one: - Call list_templates with concise search terms from the user's task when the task suggests a language, framework, image, or environment. - Treat recommended_template_id, or rank 1 when selection_hint is only_available_template or high_confidence_recommendation, as the default template unless the user asked for a different template. +- If user_selection_required is true, or selection_hint is no_confident_match or ambiguous_top_matches, do not call create_workspace. Ask the user to choose a template unless the user already explicitly selected one. - Do not paginate unless selection_hint is ambiguous_top_matches or no_confident_match, no returned template fits the request, or the user asked to browse or compare templates. - Call read_template before create_workspace when you need parameter names, required parameter values, or preset IDs. Otherwise use create_workspace with the selected template_id and defaults. From 4c20d3f9d0cc08750c4cdc78b8c59b320efb532b Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 4 Jun 2026 12:43:22 +0000 Subject: [PATCH 07/21] fix(coderd): rank chat templates by frecency score Replace the lexicographic template-ranking comparator in list_templates with a frecency score (frequency discounted by recency), per reviewer feedback. - Add GetTemplateRankingSignalsByOwnerID, returning the user's recent active and recently-deleted workspace counts, last usage, and the count of distinct active developers in the org. Recently-deleted workspaces now contribute (recovering history the deleted=false filter discarded), scoped to a lookback window, and the prebuilds system user is excluded from the org popularity count. Replaces GetWorkspaceUsageGroupedByTemplateIDByOwnerID. - Compute the affinity score in Go (Wp*(active + Wd*deleted)*0.5^(age/half_life) + Wo*ln(1+org_devs)) because sqlc cannot reliably compile the parameterized decay expression; the query returns the raw signals. Weights, half-life, and lookback are explicit constants. - Recommendation confidence is now a single score comparison: a decisive query match recommends on its own, otherwise the top score must clear a floor derived from the active-developer minimum and lead the runner-up by a derived margin. Stale-but-frequent usage no longer recommends. - Replace the AsSystemRestricted call for the cross-user org count with a narrow dbauthz wrapper checking workspace-owner read plus a template-metadata read. - Clarify list_templates/read_template guidance in the detached prompt. --- coderd/database/dbauthz/dbauthz.go | 40 +- coderd/database/dbauthz/dbauthz_test.go | 21 +- coderd/database/dbmetrics/querymetrics.go | 16 +- coderd/database/dbmock/dbmock.go | 30 +- coderd/database/querier.go | 11 +- coderd/database/queries.sql.go | 172 ++++++--- coderd/database/queries/workspaces.sql | 78 +++- coderd/x/chatd/chattool/listtemplates.go | 363 +++++++++--------- .../chattool/listtemplates_internal_test.go | 250 ++++++++---- coderd/x/chatd/chattool/listtemplates_test.go | 146 ++++++- coderd/x/chatd/prompt.go | 2 +- 11 files changed, 759 insertions(+), 370 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index ca6e0203c6e7b..11b6074696750 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -4362,6 +4362,33 @@ func (q *querier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateI return q.db.GetTemplatePresetsWithPrebuilds(ctx, templateID) } +func (q *querier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg database.GetTemplateRankingSignalsByOwnerIDParams) ([]database.GetTemplateRankingSignalsByOwnerIDRow, error) { + // The personal frecency signal reads the owner's own workspaces, which a + // user can always read. + workspaceObj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()) + if arg.OrganizationID != uuid.Nil { + workspaceObj = workspaceObj.InOrg(arg.OrganizationID) + } else { + workspaceObj = workspaceObj.AnyOrganization() + } + if err := q.authorizeContext(ctx, policy.ActionRead, workspaceObj); err != nil { + return nil, err + } + // The org-popularity signal is a cross-user COUNT(DISTINCT owner_id) that we + // treat as template popularity metadata, not as permission to read other + // users' workspaces. Callers only ever pass template IDs already authorized + // via GetTemplatesWithFilter, so a template read check is the minimal, + // intentional authorization here instead of a system escalation. + templateObj := rbac.ResourceTemplate.AnyOrganization() + if arg.OrganizationID != uuid.Nil { + templateObj = rbac.ResourceTemplate.InOrg(arg.OrganizationID) + } + if err := q.authorizeContext(ctx, policy.ActionRead, templateObj); err != nil { + return nil, err + } + return q.db.GetTemplateRankingSignalsByOwnerID(ctx, arg) +} + func (q *querier) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { if err := q.authorizeTemplateInsights(ctx, arg.TemplateIDs); err != nil { return nil, err @@ -5394,19 +5421,6 @@ func (q *querier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, return q.db.GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, templateIDs) } -func (q *querier) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { - obj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()) - if arg.OrganizationID != uuid.Nil { - obj = obj.InOrg(arg.OrganizationID) - } else { - obj = obj.AnyOrganization() - } - if err := q.authorizeContext(ctx, policy.ActionRead, obj); err != nil { - return nil, err - } - return q.db.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg) -} - func (q *querier) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { prep, err := prepareSQLFilter(ctx, q.auth, policy.ActionRead, rbac.ResourceWorkspace.Type) if err != nil { diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 63b650adda7f4..d9a61495f0db9 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -3373,14 +3373,27 @@ func (s *MethodTestSuite) TestWorkspace() { // No asserts here because SQLFilter. check.Args(ws.OwnerID, emptyPreparedAuthorized{}).Asserts() })) - s.Run("GetWorkspaceUsageGroupedByTemplateIDByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { - arg := database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams{ + s.Run("GetTemplateRankingSignalsByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + argOrg := database.GetTemplateRankingSignalsByOwnerIDParams{ OwnerID: uuid.New(), OrganizationID: uuid.New(), TemplateIDs: []uuid.UUID{uuid.New()}, } - dbm.EXPECT().GetWorkspaceUsageGroupedByTemplateIDByOwnerID(gomock.Any(), arg).Return([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow{}, nil).AnyTimes() - check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead) + dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), argOrg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() + check.Args(argOrg).Asserts( + rbac.ResourceWorkspace.WithOwner(argOrg.OwnerID.String()).InOrg(argOrg.OrganizationID), policy.ActionRead, + rbac.ResourceTemplate.InOrg(argOrg.OrganizationID), policy.ActionRead, + ) + + argNoOrg := database.GetTemplateRankingSignalsByOwnerIDParams{ + OwnerID: uuid.New(), + TemplateIDs: []uuid.UUID{uuid.New()}, + } + dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), argNoOrg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() + check.Args(argNoOrg).Asserts( + rbac.ResourceWorkspace.WithOwner(argNoOrg.OwnerID.String()).AnyOrganization(), policy.ActionRead, + rbac.ResourceTemplate.AnyOrganization(), policy.ActionRead, + ) })) s.Run("GetWorkspaceACLByID", s.Mocked(func(dbM *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { ws := testutil.Fake(s.T(), faker, database.Workspace{}) diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 765bf0abc9c68..7e91bf68ead98 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -2793,6 +2793,14 @@ func (m queryMetricsStore) GetTemplatePresetsWithPrebuilds(ctx context.Context, return r0, r1 } +func (m queryMetricsStore) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg database.GetTemplateRankingSignalsByOwnerIDParams) ([]database.GetTemplateRankingSignalsByOwnerIDRow, error) { + start := time.Now() + r0, r1 := m.s.GetTemplateRankingSignalsByOwnerID(ctx, arg) + m.queryLatencies.WithLabelValues("GetTemplateRankingSignalsByOwnerID").Observe(time.Since(start).Seconds()) + m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetTemplateRankingSignalsByOwnerID").Inc() + return r0, r1 +} + func (m queryMetricsStore) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { start := time.Now() r0, r1 := m.s.GetTemplateUsageStats(ctx, arg) @@ -3641,14 +3649,6 @@ func (m queryMetricsStore) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context return r0, r1 } -func (m queryMetricsStore) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { - start := time.Now() - r0, r1 := m.s.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg) - m.queryLatencies.WithLabelValues("GetWorkspaceUsageGroupedByTemplateIDByOwnerID").Observe(time.Since(start).Seconds()) - m.queryCounts.WithLabelValues(httpmw.ExtractHTTPRoute(ctx), httpmw.ExtractHTTPMethod(ctx), "GetWorkspaceUsageGroupedByTemplateIDByOwnerID").Inc() - return r0, r1 -} - func (m queryMetricsStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { start := time.Now() r0, r1 := m.s.GetWorkspaces(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 081ca4462d1da..72af46e76346c 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -5220,6 +5220,21 @@ func (mr *MockStoreMockRecorder) GetTemplatePresetsWithPrebuilds(ctx, templateID return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplatePresetsWithPrebuilds", reflect.TypeOf((*MockStore)(nil).GetTemplatePresetsWithPrebuilds), ctx, templateID) } +// GetTemplateRankingSignalsByOwnerID mocks base method. +func (m *MockStore) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg database.GetTemplateRankingSignalsByOwnerIDParams) ([]database.GetTemplateRankingSignalsByOwnerIDRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTemplateRankingSignalsByOwnerID", ctx, arg) + ret0, _ := ret[0].([]database.GetTemplateRankingSignalsByOwnerIDRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTemplateRankingSignalsByOwnerID indicates an expected call of GetTemplateRankingSignalsByOwnerID. +func (mr *MockStoreMockRecorder) GetTemplateRankingSignalsByOwnerID(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplateRankingSignalsByOwnerID", reflect.TypeOf((*MockStore)(nil).GetTemplateRankingSignalsByOwnerID), ctx, arg) +} + // GetTemplateUsageStats mocks base method. func (m *MockStore) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { m.ctrl.T.Helper() @@ -6825,21 +6840,6 @@ func (mr *MockStoreMockRecorder) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUniqueOwnerCountByTemplateIDs", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUniqueOwnerCountByTemplateIDs), ctx, templateIds) } -// GetWorkspaceUsageGroupedByTemplateIDByOwnerID mocks base method. -func (m *MockStore) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetWorkspaceUsageGroupedByTemplateIDByOwnerID", ctx, arg) - ret0, _ := ret[0].([]database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetWorkspaceUsageGroupedByTemplateIDByOwnerID indicates an expected call of GetWorkspaceUsageGroupedByTemplateIDByOwnerID. -func (mr *MockStoreMockRecorder) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, arg any) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceUsageGroupedByTemplateIDByOwnerID", reflect.TypeOf((*MockStore)(nil).GetWorkspaceUsageGroupedByTemplateIDByOwnerID), ctx, arg) -} - // GetWorkspaces mocks base method. func (m *MockStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesParams) ([]database.GetWorkspacesRow, error) { m.ctrl.T.Helper() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 12592fe61e59e..20a555526f820 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -716,6 +716,16 @@ type sqlcQuerier interface { // It also returns the number of desired instances for each preset. // If template_id is specified, only template versions associated with that template will be returned. GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]GetTemplatePresetsWithPrebuildsRow, error) + // GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the + // given templates relative to a single owner: how many active and recently + // deleted workspaces the owner used within the lookback window, when the + // template was last used, and how many distinct developers in the organization + // currently have a non-deleted workspace on it. The affinity score itself is + // computed in Go (see listtemplates.go); the parameterized recency-decay math + // cannot be expressed through sqlc reliably, so this query returns the exact + // raw signals the score is built from. The lookback window is applied with a + // caller-computed cutoff timestamp. + GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) GetTemplateVersionByJobID(ctx context.Context, jobID uuid.UUID) (TemplateVersion, error) @@ -901,7 +911,6 @@ type sqlcQuerier interface { GetWorkspaceResourcesByJobIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Context, templateIds []uuid.UUID) ([]GetWorkspaceUniqueOwnerCountByTemplateIDsRow, error) - GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) // build_params is used to filter by build parameters if present. // It has to be a CTE because the set returning function 'unnest' cannot // be used in a WHERE clause. diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 65e066f25b52f..b0acd5db7b4f8 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -34767,6 +34767,124 @@ func (q *sqlQuerier) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]Ge return items, nil } +const getTemplateRankingSignalsByOwnerID = `-- name: GetTemplateRankingSignalsByOwnerID :many +WITH org_usage AS ( + -- org_usage measures how many distinct developers currently have a + -- non-deleted workspace on each template. The prebuilds system user is + -- excluded so unclaimed prebuilds do not inflate popularity. + SELECT + w.template_id, + COUNT(DISTINCT w.owner_id) AS org_devs + FROM + workspaces w + WHERE + w.template_id = ANY($1 :: uuid[]) + AND NOT w.deleted + AND w.owner_id != $2 :: uuid + AND CASE + WHEN $3 :: uuid != '00000000-0000-0000-0000-000000000000' :: uuid THEN + w.organization_id = $3 + ELSE true + END + GROUP BY + w.template_id +), +user_usage AS ( + -- user_usage counts workspaces owned by the requesting user within the + -- lookback window, splitting active from recently deleted so deleted + -- history can be counted at reduced weight. The window is keyed on + -- last_used_at. + SELECT + w.template_id, + COUNT(*) FILTER (WHERE NOT w.deleted) AS active_count, + COUNT(*) FILTER (WHERE w.deleted) AS deleted_recent_count, + MAX(w.last_used_at) :: timestamptz AS last_used_at + FROM + workspaces w + WHERE + w.owner_id = $4 + AND w.template_id = ANY($1 :: uuid[]) + AND w.last_used_at > $5 :: timestamptz + AND CASE + WHEN $3 :: uuid != '00000000-0000-0000-0000-000000000000' :: uuid THEN + w.organization_id = $3 + ELSE true + END + GROUP BY + w.template_id +) +SELECT + t.template_id :: uuid AS template_id, + COALESCE(u.active_count, 0) :: bigint AS active_count, + COALESCE(u.deleted_recent_count, 0) :: bigint AS deleted_recent_count, + u.last_used_at, + COALESCE(o.org_devs, 0) :: bigint AS org_devs +FROM + unnest($1 :: uuid[]) AS t(template_id) +LEFT JOIN user_usage u ON u.template_id = t.template_id +LEFT JOIN org_usage o ON o.template_id = t.template_id +` + +type GetTemplateRankingSignalsByOwnerIDParams struct { + TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"` + PrebuildsUserID uuid.UUID `db:"prebuilds_user_id" json:"prebuilds_user_id"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` + LookbackCutoff time.Time `db:"lookback_cutoff" json:"lookback_cutoff"` +} + +type GetTemplateRankingSignalsByOwnerIDRow struct { + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + ActiveCount int64 `db:"active_count" json:"active_count"` + DeletedRecentCount int64 `db:"deleted_recent_count" json:"deleted_recent_count"` + LastUsedAt sql.NullTime `db:"last_used_at" json:"last_used_at"` + OrgDevs int64 `db:"org_devs" json:"org_devs"` +} + +// GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the +// given templates relative to a single owner: how many active and recently +// deleted workspaces the owner used within the lookback window, when the +// template was last used, and how many distinct developers in the organization +// currently have a non-deleted workspace on it. The affinity score itself is +// computed in Go (see listtemplates.go); the parameterized recency-decay math +// cannot be expressed through sqlc reliably, so this query returns the exact +// raw signals the score is built from. The lookback window is applied with a +// caller-computed cutoff timestamp. +func (q *sqlQuerier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) { + rows, err := q.db.QueryContext(ctx, getTemplateRankingSignalsByOwnerID, + pq.Array(arg.TemplateIDs), + arg.PrebuildsUserID, + arg.OrganizationID, + arg.OwnerID, + arg.LookbackCutoff, + ) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetTemplateRankingSignalsByOwnerIDRow + for rows.Next() { + var i GetTemplateRankingSignalsByOwnerIDRow + if err := rows.Scan( + &i.TemplateID, + &i.ActiveCount, + &i.DeletedRecentCount, + &i.LastUsedAt, + &i.OrgDevs, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getWorkspaceACLByID = `-- name: GetWorkspaceACLByID :one SELECT group_acl as groups, @@ -35146,60 +35264,6 @@ func (q *sqlQuerier) GetWorkspaceUniqueOwnerCountByTemplateIDs(ctx context.Conte return items, nil } -const getWorkspaceUsageGroupedByTemplateIDByOwnerID = `-- name: GetWorkspaceUsageGroupedByTemplateIDByOwnerID :many -SELECT - template_id, - COUNT(*) AS workspace_count, - MAX(last_used_at)::timestamptz AS last_used_at -FROM - workspaces -WHERE - owner_id = $1 - AND deleted = false - AND CASE - WHEN $2 :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN - organization_id = $2 - ELSE true - END - AND template_id = ANY($3 :: uuid[]) -GROUP BY template_id -` - -type GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams struct { - OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` - OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` - TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"` -} - -type GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow struct { - TemplateID uuid.UUID `db:"template_id" json:"template_id"` - WorkspaceCount int64 `db:"workspace_count" json:"workspace_count"` - LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` -} - -func (q *sqlQuerier) GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx context.Context, arg GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams) ([]GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow, error) { - rows, err := q.db.QueryContext(ctx, getWorkspaceUsageGroupedByTemplateIDByOwnerID, arg.OwnerID, arg.OrganizationID, pq.Array(arg.TemplateIDs)) - if err != nil { - return nil, err - } - defer rows.Close() - var items []GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow - for rows.Next() { - var i GetWorkspaceUsageGroupedByTemplateIDByOwnerIDRow - if err := rows.Scan(&i.TemplateID, &i.WorkspaceCount, &i.LastUsedAt); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Close(); err != nil { - return nil, err - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil -} - const getWorkspaces = `-- name: GetWorkspaces :many WITH build_params AS ( diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index c860b7b0afe50..225e6cacdcaff 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -497,23 +497,71 @@ LEFT JOIN workspaces ON workspaces.template_id = templates.id AND workspaces.del WHERE templates.id = ANY(@template_ids :: uuid[]) GROUP BY templates.id; --- name: GetWorkspaceUsageGroupedByTemplateIDByOwnerID :many +-- name: GetTemplateRankingSignalsByOwnerID :many +-- GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the +-- given templates relative to a single owner: how many active and recently +-- deleted workspaces the owner used within the lookback window, when the +-- template was last used, and how many distinct developers in the organization +-- currently have a non-deleted workspace on it. The affinity score itself is +-- computed in Go (see listtemplates.go); the parameterized recency-decay math +-- cannot be expressed through sqlc reliably, so this query returns the exact +-- raw signals the score is built from. The lookback window is applied with a +-- caller-computed cutoff timestamp. +WITH org_usage AS ( + -- org_usage measures how many distinct developers currently have a + -- non-deleted workspace on each template. The prebuilds system user is + -- excluded so unclaimed prebuilds do not inflate popularity. + SELECT + w.template_id, + COUNT(DISTINCT w.owner_id) AS org_devs + FROM + workspaces w + WHERE + w.template_id = ANY(@template_ids :: uuid[]) + AND NOT w.deleted + AND w.owner_id != @prebuilds_user_id :: uuid + AND CASE + WHEN @organization_id :: uuid != '00000000-0000-0000-0000-000000000000' :: uuid THEN + w.organization_id = @organization_id + ELSE true + END + GROUP BY + w.template_id +), +user_usage AS ( + -- user_usage counts workspaces owned by the requesting user within the + -- lookback window, splitting active from recently deleted so deleted + -- history can be counted at reduced weight. The window is keyed on + -- last_used_at. + SELECT + w.template_id, + COUNT(*) FILTER (WHERE NOT w.deleted) AS active_count, + COUNT(*) FILTER (WHERE w.deleted) AS deleted_recent_count, + MAX(w.last_used_at) :: timestamptz AS last_used_at + FROM + workspaces w + WHERE + w.owner_id = @owner_id + AND w.template_id = ANY(@template_ids :: uuid[]) + AND w.last_used_at > @lookback_cutoff :: timestamptz + AND CASE + WHEN @organization_id :: uuid != '00000000-0000-0000-0000-000000000000' :: uuid THEN + w.organization_id = @organization_id + ELSE true + END + GROUP BY + w.template_id +) SELECT - template_id, - COUNT(*) AS workspace_count, - MAX(last_used_at)::timestamptz AS last_used_at + t.template_id :: uuid AS template_id, + COALESCE(u.active_count, 0) :: bigint AS active_count, + COALESCE(u.deleted_recent_count, 0) :: bigint AS deleted_recent_count, + u.last_used_at, + COALESCE(o.org_devs, 0) :: bigint AS org_devs FROM - workspaces -WHERE - owner_id = @owner_id - AND deleted = false - AND CASE - WHEN @organization_id :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN - organization_id = @organization_id - ELSE true - END - AND template_id = ANY(@template_ids :: uuid[]) -GROUP BY template_id; + unnest(@template_ids :: uuid[]) AS t(template_id) +LEFT JOIN user_usage u ON u.template_id = t.template_id +LEFT JOIN org_usage o ON o.template_id = t.template_id; -- name: InsertWorkspace :one INSERT INTO diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 0e7c127b6eb35..715d2261608ae 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -5,6 +5,7 @@ import ( "context" "database/sql" "maps" + "math" "slices" "strings" "time" @@ -24,11 +25,55 @@ import ( const ( listTemplatesPageSize = 10 - listTemplatesMinPersonalWorkspacesForRecommendation = 2 - listTemplatesMinActiveDevelopersForRecommendation = 2 - listTemplatesRecentUsageWindow = 90 * 24 * time.Hour + // listTemplatesMinActiveDevelopersForRecommendation is the organization + // popularity floor: a template needs at least this many active developers + // before organization popularity on its own is a confident recommendation. + listTemplatesMinActiveDevelopersForRecommendation = 2 + + // The following constants parameterize the affinity score, a "frecency" + // signal (frequency discounted by recency). The personal term is the count + // of the user's recent workspaces (active plus a fraction of + // recently-deleted) multiplied by a recency decay; the organization term is + // a log-scaled active-developer count. Only the ratio of the personal to + // organization weight matters. They are deliberately explicit so the + // ranking can be calibrated as ranking-quality signal accrues. + // + // The score is computed in Go (computeAffinityScore) rather than SQL + // because sqlc cannot reliably compile the parameterized decay expression; + // see GetTemplateRankingSignalsByOwnerID. Keeping the score and the + // confidence thresholds in the same place also avoids Postgres-versus-Go + // floating-point differences at confidence boundaries. + listTemplatesLookbackDays = 60 + listTemplatesHalfLife = 14 * 24 * time.Hour + listTemplatesPersonalWeight = 10.0 + listTemplatesOrgWeight = 1.0 + listTemplatesDeletedWeight = 0.5 ) +var ( + // minConfidentAffinityScore preserves today's floor: organization + // popularity alone is confident once a template reaches the active-developer + // minimum. math.Log1p(n) == ln(1+n) is exactly the organization term of the + // affinity score, so the threshold and the score stay float-consistent. + minConfidentAffinityScore = listTemplatesOrgWeight * math.Log1p(listTemplatesMinActiveDevelopersForRecommendation) + + // minConfidentGap requires rank 1 to lead rank 2 by at least the score + // difference between "min" and "min-1" active developers before + // recommending when both clear the floor. It is derived, not tuned, so + // "2 developers versus 1" still recommends while "16 versus 15" does not. + minConfidentGap = listTemplatesOrgWeight * (math.Log1p(listTemplatesMinActiveDevelopersForRecommendation) - math.Log1p(listTemplatesMinActiveDevelopersForRecommendation-1)) +) + +// affinityScoreEpsilon absorbs floating-point rounding so a score sitting +// exactly on a threshold boundary counts as meeting it. +const affinityScoreEpsilon = 1e-9 + +// affinityScoreAtLeast reports whether score meets threshold within the +// comparison epsilon. +func affinityScoreAtLeast(score, threshold float64) bool { + return score >= threshold-affinityScoreEpsilon +} + const ( listTemplatesHintOnlyAvailable = "only_available_template" listTemplatesHintHighConfidence = "high_confidence_recommendation" @@ -57,32 +102,30 @@ type listTemplatesArgs struct { } type rankedTemplate struct { - Template database.Template - QueryScore int - ActiveDevelopers int64 - Usage templateUsage - Rank int + Template database.Template + QueryScore int + Signals templateRankingSignals + AffinityScore float64 + Rank int } -type templateUsage struct { - WorkspaceCount int64 - LastUsedAt time.Time +// templateRankingSignals holds the raw, per-template ranking inputs returned by +// GetTemplateRankingSignalsByOwnerID. ActiveCount and DeletedRecentCount are the +// user's in-window workspace counts; LastUsedAt is the most recent usage within +// the window (zero when there is none); OrgDevs is the count of distinct active +// developers in the organization. +type templateRankingSignals struct { + ActiveCount int64 + DeletedRecentCount int64 + LastUsedAt time.Time + OrgDevs int64 } -type templateRankSignals struct { - QueryScore int - WorkspaceCount int64 - LastUsedAtUnixNano int64 - ActiveDevelopers int64 -} - -type templateRankingSignalErrors struct { - ActiveDeveloperCounts error - Usage error -} - -func (e templateRankingSignalErrors) hasAny() bool { - return e.ActiveDeveloperCounts != nil || e.Usage != nil +// hasPersonalUsage reports whether the user used the template within the +// lookback window, counting recently-deleted workspaces so deleted history is +// still treated as personal usage. +func (s templateRankingSignals) hasPersonalUsage() bool { + return s.ActiveCount+s.DeletedRecentCount > 0 } // ListTemplates returns a tool that lists available workspace templates. @@ -145,39 +188,29 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp for i, t := range ranked { templateIDs[i] = t.Template.ID } - ownerCounts, ownerCountsErr := loadTemplateActiveDeveloperCounts(ctx, db, templateIDs) - if ownerCountsErr != nil { - options.Logger.Warn(ctx, "failed to load template active developer counts", - slog.F("template_count", len(templateIDs)), - slog.Error(ownerCountsErr), - ) - } - usageByTemplate, usageErr := loadTemplateUsage( - ctx, db, options.OwnerID, organizationID, templateIDs, + now := clock.Now() + signalsByTemplate, signalsErr := loadTemplateRankingSignals( + ctx, db, options.OwnerID, organizationID, templateIDs, now, ) - if usageErr != nil { - options.Logger.Warn(ctx, "failed to load template usage", + if signalsErr != nil { + options.Logger.Warn(ctx, "failed to load template ranking signals", slog.F("owner_id", options.OwnerID), slog.F("organization_id", organizationID), slog.F("template_count", len(templateIDs)), - slog.Error(usageErr), + slog.Error(signalsErr), ) } for i := range ranked { - ranked[i].ActiveDevelopers = ownerCounts[ranked[i].Template.ID] - ranked[i].Usage = usageByTemplate[ranked[i].Template.ID] + ranked[i].Signals = signalsByTemplate[ranked[i].Template.ID] + ranked[i].AffinityScore = computeAffinityScore(ranked[i].Signals, now) } rankTemplates(ranked, query) selectionHint, recommendedID, recommendationReason := selectTemplateRecommendation( ranked, visibleTemplateCount, - templateRankingSignalErrors{ - ActiveDeveloperCounts: ownerCountsErr, - Usage: usageErr, - }, - clock.Now(), + signalsErr, ) // Paginate. @@ -239,65 +272,78 @@ func scoreTemplateCandidates(templates []database.Template, query string) []rank return candidates } -func loadTemplateActiveDeveloperCounts( +func loadTemplateRankingSignals( ctx context.Context, db database.Store, + ownerID uuid.UUID, + organizationID uuid.UUID, templateIDs []uuid.UUID, -) (map[uuid.UUID]int64, error) { - ownerCounts := make(map[uuid.UUID]int64) + now time.Time, +) (map[uuid.UUID]templateRankingSignals, error) { + signals := make(map[uuid.UUID]templateRankingSignals) if len(templateIDs) == 0 { - return ownerCounts, nil - } - - // Templates are already filtered with the owner's permissions. The - // aggregate count query requires system read because it spans workspace - // owners, but it only receives IDs the owner can already see. - rows, err := db.GetWorkspaceUniqueOwnerCountByTemplateIDs(dbauthz.AsSystemRestricted(ctx), templateIDs) //nolint:gocritic // see above + return signals, nil + } + + // The templates were already authorized with the owner's permissions by + // GetTemplatesWithFilter. GetTemplateRankingSignalsByOwnerID authorizes the + // owner reading their own workspaces plus a template-metadata read for the + // cross-user popularity count, so no system escalation is needed here. + rows, err := db.GetTemplateRankingSignalsByOwnerID(ctx, database.GetTemplateRankingSignalsByOwnerIDParams{ + TemplateIDs: templateIDs, + OwnerID: ownerID, + OrganizationID: organizationID, + PrebuildsUserID: database.PrebuildsSystemUserID, + LookbackCutoff: now.Add(-listTemplatesLookbackDays * 24 * time.Hour), + }) if err != nil { - return ownerCounts, err + return signals, err } for _, row := range rows { - ownerCounts[row.TemplateID] = row.UniqueOwnersSum + s := templateRankingSignals{ + ActiveCount: row.ActiveCount, + DeletedRecentCount: row.DeletedRecentCount, + OrgDevs: row.OrgDevs, + } + if row.LastUsedAt.Valid { + s.LastUsedAt = row.LastUsedAt.Time + } + signals[row.TemplateID] = s } - return ownerCounts, nil + return signals, nil } -func loadTemplateUsage( - ctx context.Context, - db database.Store, - ownerID uuid.UUID, - organizationID uuid.UUID, - templateIDs []uuid.UUID, -) (map[uuid.UUID]templateUsage, error) { - usageByTemplate := make(map[uuid.UUID]templateUsage) - if ownerID == uuid.Nil || len(templateIDs) == 0 { - return usageByTemplate, nil - } - - rows, err := db.GetWorkspaceUsageGroupedByTemplateIDByOwnerID(ctx, database.GetWorkspaceUsageGroupedByTemplateIDByOwnerIDParams{ - OwnerID: ownerID, - OrganizationID: organizationID, - TemplateIDs: templateIDs, - }) - if err != nil { - return usageByTemplate, err - } - for _, row := range rows { - usageByTemplate[row.TemplateID] = templateUsage{ - WorkspaceCount: row.WorkspaceCount, - LastUsedAt: row.LastUsedAt, +// computeAffinityScore folds the raw signals into a single "frecency" score: +// the personal workspace count (active plus a fraction of recently-deleted) +// multiplied by a recency decay, plus a log-scaled organization-popularity +// term. When the user has no in-window usage the personal term is zero and the +// score collapses to organization popularity. +func computeAffinityScore(s templateRankingSignals, now time.Time) float64 { + personal := 0.0 + if !s.LastUsedAt.IsZero() { + count := float64(s.ActiveCount) + listTemplatesDeletedWeight*float64(s.DeletedRecentCount) + age := now.Sub(s.LastUsedAt) + if age < 0 { + age = 0 } + decay := math.Pow(0.5, float64(age)/float64(listTemplatesHalfLife)) + personal = listTemplatesPersonalWeight * count * decay } - return usageByTemplate, nil + org := listTemplatesOrgWeight * math.Log1p(float64(s.OrgDevs)) + return personal + org } +// rankTemplates orders templates by query relevance first (only when a query is +// present), then by affinity score, with template name and ID as deterministic +// tiebreakers. func rankTemplates(ranked []rankedTemplate, query string) { slices.SortStableFunc(ranked, func(a, b rankedTemplate) int { - if c := compareTemplateRankSignals( - templateRankSignalsFor(a), - templateRankSignalsFor(b), - query, - ); c != 0 { + if query != "" { + if c := cmp.Compare(b.QueryScore, a.QueryScore); c != 0 { + return c + } + } + if c := cmp.Compare(b.AffinityScore, a.AffinityScore); c != 0 { return c } if c := cmp.Compare(a.Template.Name, b.Template.Name); c != 0 { @@ -311,42 +357,15 @@ func rankTemplates(ranked []rankedTemplate, query string) { } } -func templateRankSignalsFor(t rankedTemplate) templateRankSignals { - return templateRankSignals{ - QueryScore: t.QueryScore, - WorkspaceCount: t.Usage.WorkspaceCount, - LastUsedAtUnixNano: templateRankTime(t.Usage.LastUsedAt), - ActiveDevelopers: t.ActiveDevelopers, - } -} - -func templateRankTime(t time.Time) int64 { - if t.IsZero() { - return 0 - } - return t.UnixNano() -} - -func compareTemplateRankSignals(a, b templateRankSignals, query string) int { - if query != "" { - if c := cmp.Compare(b.QueryScore, a.QueryScore); c != 0 { - return c - } - } - if c := cmp.Compare(b.WorkspaceCount, a.WorkspaceCount); c != 0 { - return c - } - if c := cmp.Compare(b.LastUsedAtUnixNano, a.LastUsedAtUnixNano); c != 0 { - return c - } - return cmp.Compare(b.ActiveDevelopers, a.ActiveDevelopers) -} - +// selectTemplateRecommendation decides whether to recommend the top-ranked +// template or ask the user to choose. Query relevance is the primary signal: a +// decisive query match recommends on its own. Otherwise confidence comes from +// the affinity score, which must clear a floor and lead the runner-up by a +// margin before recommending. func selectTemplateRecommendation( ranked []rankedTemplate, visibleTemplateCount int, - rankingSignalErrors templateRankingSignalErrors, - now time.Time, + rankingSignalsErr error, ) (string, uuid.UUID, string) { if len(ranked) == 0 { return listTemplatesHintNoConfidence, uuid.Nil, "no_matching_templates" @@ -356,26 +375,41 @@ func selectTemplateRecommendation( if visibleTemplateCount == 1 && len(ranked) == 1 { return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" } - if rankingSignalErrors.hasAny() { - if templateHasDecisiveQuerySignal(ranked) { - return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) - } - if rankingSignalErrors.Usage == nil && - templateHasConfidentPersonalUsageSignal(top, now) && - (len(ranked) == 1 || !templatesAreAmbiguous(top, ranked[1])) { - return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) - } - return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" + + // A decisive query match (strictly outscoring the runner-up, or the only + // match) is a confident recommendation on its own, even when the affinity + // signals failed to load. + if top.QueryScore > 0 && (len(ranked) == 1 || top.QueryScore > ranked[1].QueryScore) { + return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) } - if !templateHasRankingSignal(top) { - return listTemplatesHintNoConfidence, uuid.Nil, "no_ranking_signal" + + // Without a decisive query tier the affinity score decides confidence, so an + // unreliable (failed) signal load means we must ask the user. + if rankingSignalsErr != nil { + return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" } - if len(ranked) > 1 && templatesAreAmbiguous(top, ranked[1]) { + + // Query present but the top two tie on relevance: break the tie with the + // affinity score when the gap is clear, otherwise ask the user. + if top.QueryScore > 0 { + if len(ranked) > 1 && affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { + return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) + } return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" } - if !templateHasConfidentRankingSignal(top, now) { + + // No query: recommend purely on the affinity score. + if !affinityScoreAtLeast(top.AffinityScore, minConfidentAffinityScore) { + if top.AffinityScore <= 0 { + return listTemplatesHintNoConfidence, uuid.Nil, "no_ranking_signal" + } return listTemplatesHintNoConfidence, uuid.Nil, "weak_ranking_signal" } + if len(ranked) > 1 && + affinityScoreAtLeast(ranked[1].AffinityScore, minConfidentAffinityScore) && + !affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { + return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" + } return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) } @@ -383,42 +417,6 @@ func userSelectionRequired(selectionHint string) bool { return selectionHint == listTemplatesHintAmbiguous || selectionHint == listTemplatesHintNoConfidence } -func templatesAreAmbiguous(a, b rankedTemplate) bool { - return templateRankSignalsFor(a) == templateRankSignalsFor(b) -} - -func templateHasRankingSignal(t rankedTemplate) bool { - signals := templateRankSignalsFor(t) - return signals.QueryScore > 0 || signals.WorkspaceCount > 0 || signals.ActiveDevelopers > 0 -} - -func templateHasDecisiveQuerySignal(ranked []rankedTemplate) bool { - if len(ranked) == 0 || ranked[0].QueryScore == 0 { - return false - } - return len(ranked) == 1 || ranked[0].QueryScore > ranked[1].QueryScore -} - -func templateHasConfidentPersonalUsageSignal(t rankedTemplate, now time.Time) bool { - if t.Usage.WorkspaceCount >= listTemplatesMinPersonalWorkspacesForRecommendation { - return true - } - return t.Usage.WorkspaceCount > 0 && - !t.Usage.LastUsedAt.IsZero() && - now.Sub(t.Usage.LastUsedAt) <= listTemplatesRecentUsageWindow -} - -func templateHasConfidentRankingSignal(t rankedTemplate, now time.Time) bool { - signals := templateRankSignalsFor(t) - if signals.QueryScore > 0 { - return true - } - if templateHasConfidentPersonalUsageSignal(t, now) { - return true - } - return signals.ActiveDevelopers >= listTemplatesMinActiveDevelopersForRecommendation -} - func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { item := map[string]any{ "id": t.Template.ID.String(), @@ -433,12 +431,16 @@ func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { if desc := strings.TrimSpace(t.Template.Description); desc != "" { item["description"] = truncateRunes(desc, 200) } - if t.ActiveDevelopers > 0 { - item["active_developers"] = t.ActiveDevelopers + if t.Signals.OrgDevs > 0 { + item["active_developers"] = t.Signals.OrgDevs } - if t.Usage.WorkspaceCount > 0 { - item["your_workspace_count"] = t.Usage.WorkspaceCount - item["last_used_by_you"] = t.Usage.LastUsedAt.Format(time.RFC3339Nano) + // your_workspace_count exposes only active workspaces so deleted history is + // not surfaced to the model, though it still contributes to the score. + if t.Signals.ActiveCount > 0 { + item["your_workspace_count"] = t.Signals.ActiveCount + if !t.Signals.LastUsedAt.IsZero() { + item["last_used_by_you"] = t.Signals.LastUsedAt.Format(time.RFC3339Nano) + } } if t.Template.ID == recommendedID { item["recommended"] = true @@ -447,15 +449,16 @@ func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { } func relevanceSignals(t rankedTemplate) string { - signals := templateRankSignalsFor(t) + hasQuery := t.QueryScore > 0 + hasPersonal := t.Signals.hasPersonalUsage() switch { - case signals.QueryScore > 0 && signals.WorkspaceCount > 0: + case hasQuery && hasPersonal: return "matches_query_and_used_by_you" - case signals.QueryScore > 0: + case hasQuery: return "matches_query" - case signals.WorkspaceCount > 0: + case hasPersonal: return "used_by_you" - case signals.ActiveDevelopers > 0: + case t.Signals.OrgDevs > 0: return "popular_in_org" default: return "ordered_by_name" diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go index 6ee20a28b9e03..4dc9479608fed 100644 --- a/coderd/x/chatd/chattool/listtemplates_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -1,6 +1,7 @@ package chattool import ( + "math" "testing" "time" @@ -11,84 +12,183 @@ import ( "github.com/coder/coder/v2/coderd/database" ) -func TestSelectTemplateRecommendationRankingSignalsUnavailable(t *testing.T) { +func TestComputeAffinityScore(t *testing.T) { t.Parallel() - enrichmentErr := xerrors.New("enrichment failed") - enrichmentErrors := templateRankingSignalErrors{ - ActiveDeveloperCounts: enrichmentErr, - Usage: enrichmentErr, - } now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) - onlyTemplateID := uuid.New() - hint, recommendedID, reason := selectTemplateRecommendation( - []rankedTemplate{{Template: database.Template{ID: onlyTemplateID}}}, - 1, - enrichmentErrors, - now, - ) - require.Equal(t, listTemplatesHintOnlyAvailable, hint) - require.Equal(t, onlyTemplateID, recommendedID) - require.Equal(t, "only_available_template", reason) - - topID := uuid.New() - hint, recommendedID, reason = selectTemplateRecommendation( - []rankedTemplate{ - {Template: database.Template{ID: topID}, QueryScore: queryScoreExactName}, - {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, - }, - 2, - enrichmentErrors, - now, - ) - require.Equal(t, listTemplatesHintHighConfidence, hint) - require.Equal(t, topID, recommendedID) - require.Equal(t, "matches_query", reason) - - personalUsageID := uuid.New() - hint, recommendedID, reason = selectTemplateRecommendation( - []rankedTemplate{ - { - Template: database.Template{ID: personalUsageID}, - Usage: templateUsage{ - WorkspaceCount: 3, - LastUsedAt: now.Add(-180 * 24 * time.Hour), - }, - }, - {Template: database.Template{ID: uuid.New()}}, - }, - 2, - templateRankingSignalErrors{ActiveDeveloperCounts: enrichmentErr}, - now, - ) - require.Equal(t, listTemplatesHintHighConfidence, hint) - require.Equal(t, personalUsageID, recommendedID) - require.Equal(t, "used_by_you", reason) - - hint, recommendedID, reason = selectTemplateRecommendation( - []rankedTemplate{ - {Template: database.Template{ID: uuid.New()}, ActiveDevelopers: 2}, - {Template: database.Template{ID: uuid.New()}}, - }, - 2, - templateRankingSignalErrors{Usage: enrichmentErr}, - now, - ) - require.Equal(t, listTemplatesHintNoConfidence, hint) - require.Equal(t, uuid.Nil, recommendedID) - require.Equal(t, "ranking_signals_unavailable", reason) - - hint, recommendedID, reason = selectTemplateRecommendation( - []rankedTemplate{ - {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, - {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, - }, - 2, - enrichmentErrors, - now, + // No signals at all scores zero. + require.Zero(t, computeAffinityScore(templateRankingSignals{}, now)) + + // With no personal usage the score collapses to the log-scaled org term. + orgOnly := computeAffinityScore(templateRankingSignals{OrgDevs: 3}, now) + require.InDelta(t, listTemplatesOrgWeight*math.Log1p(3), orgOnly, 1e-9) + + // Org popularity is monotonic in the developer count. + require.Greater(t, + computeAffinityScore(templateRankingSignals{OrgDevs: 3}, now), + computeAffinityScore(templateRankingSignals{OrgDevs: 1}, now), ) - require.Equal(t, listTemplatesHintNoConfidence, hint) - require.Equal(t, uuid.Nil, recommendedID) - require.Equal(t, "ranking_signals_unavailable", reason) + + // Recency decay: the same usage counts more when it is more recent. + recent := computeAffinityScore(templateRankingSignals{ActiveCount: 2, LastUsedAt: now.Add(-1 * 24 * time.Hour)}, now) + stale := computeAffinityScore(templateRankingSignals{ActiveCount: 2, LastUsedAt: now.Add(-30 * 24 * time.Hour)}, now) + require.Greater(t, recent, stale) + + // Deleted workspaces contribute at reduced weight, so the same number of + // active workspaces outscores deleted ones. + last := now.Add(-1 * time.Hour) + activeOnly := computeAffinityScore(templateRankingSignals{ActiveCount: 2, LastUsedAt: last}, now) + deletedOnly := computeAffinityScore(templateRankingSignals{DeletedRecentCount: 2, LastUsedAt: last}, now) + require.Greater(t, activeOnly, deletedOnly) + require.Greater(t, deletedOnly, 0.0) + + // A future last_used_at clamps the age to zero rather than amplifying. + future := computeAffinityScore(templateRankingSignals{ActiveCount: 1, LastUsedAt: now.Add(time.Hour)}, now) + atNow := computeAffinityScore(templateRankingSignals{ActiveCount: 1, LastUsedAt: now}, now) + require.InDelta(t, atNow, future, 1e-9) +} + +func TestSelectTemplateRecommendation(t *testing.T) { + t.Parallel() + + loadErr := xerrors.New("signals failed to load") + + t.Run("NoMatches", func(t *testing.T) { + t.Parallel() + hint, id, reason := selectTemplateRecommendation(nil, 0, nil) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, uuid.Nil, id) + require.Equal(t, "no_matching_templates", reason) + }) + + t.Run("OnlyAvailable", func(t *testing.T) { + t.Parallel() + only := uuid.New() + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{{Template: database.Template{ID: only}}}, 1, loadErr, + ) + require.Equal(t, listTemplatesHintOnlyAvailable, hint) + require.Equal(t, only, id) + require.Equal(t, "only_available_template", reason) + }) + + t.Run("DecisiveQueryRecommendsEvenWithLoadError", func(t *testing.T) { + t.Parallel() + top := uuid.New() + for _, err := range []error{nil, loadErr} { + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: top}, QueryScore: queryScoreExactName}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, + }, 2, err, + ) + require.Equal(t, listTemplatesHintHighConfidence, hint) + require.Equal(t, top, id) + require.Equal(t, "matches_query", reason) + } + }) + + t.Run("QueryTieBrokenByAffinityGap", func(t *testing.T) { + t.Parallel() + top := uuid.New() + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: top}, QueryScore: queryScoreNamePrefix, AffinityScore: 10, Signals: templateRankingSignals{ActiveCount: 1}}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintHighConfidence, hint) + require.Equal(t, top, id) + require.Equal(t, "matches_query_and_used_by_you", reason) + }) + + t.Run("QueryTieWithSmallGapIsAmbiguous", func(t *testing.T) { + t.Parallel() + hint, id, _ := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0.1}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintAmbiguous, hint) + require.Equal(t, uuid.Nil, id) + }) + + t.Run("QueryTieWithLoadErrorIsUnavailable", func(t *testing.T) { + t.Parallel() + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + }, 2, loadErr, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, uuid.Nil, id) + require.Equal(t, "ranking_signals_unavailable", reason) + }) + + t.Run("NoQueryNoSignal", func(t *testing.T) { + t.Parallel() + hint, _, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}}, + {Template: database.Template{ID: uuid.New()}}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, "no_ranking_signal", reason) + }) + + t.Run("NoQueryWeakSignalBelowFloor", func(t *testing.T) { + t.Parallel() + // One active developer scores ln(2), below the ln(3) floor. + hint, _, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 0}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, "weak_ranking_signal", reason) + }) + + t.Run("NoQueryConfidentWhenLeadsRunnerUp", func(t *testing.T) { + t.Parallel() + top := uuid.New() + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: top}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintHighConfidence, hint) + require.Equal(t, top, id) + require.Equal(t, "popular_in_org", reason) + }) + + t.Run("NoQueryAmbiguousWhenBothClearFloorAndClose", func(t *testing.T) { + t.Parallel() + hint, id, _ := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.20, Signals: templateRankingSignals{OrgDevs: 2}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.15, Signals: templateRankingSignals{OrgDevs: 2}}, + }, 2, nil, + ) + require.Equal(t, listTemplatesHintAmbiguous, hint) + require.Equal(t, uuid.Nil, id) + }) + + t.Run("NoQueryLoadErrorIsUnavailable", func(t *testing.T) { + t.Parallel() + hint, id, reason := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, + {Template: database.Template{ID: uuid.New()}}, + }, 2, loadErr, + ) + require.Equal(t, listTemplatesHintNoConfidence, hint) + require.Equal(t, uuid.Nil, id) + require.Equal(t, "ranking_signals_unavailable", reason) + }) } diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index cfae13aa214e6..cd2c290af1176 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -343,7 +343,7 @@ func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, - LastUsedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC), + LastUsedAt: time.Now().Add(-14 * 24 * time.Hour), }) tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ @@ -533,14 +533,17 @@ func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, oldUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) - require.Equal(t, float64(1), templates[0]["your_workspace_count"]) + // The 180-day-old workspace is outside the 60-day lookback window, so it no + // longer counts as in-window personal usage. + _, hasCount := templates[0]["your_workspace_count"] + require.False(t, hasCount) require.Equal(t, "no_confident_match", result["selection_hint"]) require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) _, ok := result["recommended_template_id"] require.False(t, ok) } -func TestListTemplates_PersonalUsageCountRecommendsStaleTemplate(t *testing.T) { +func TestListTemplates_StaleFrequentPersonalUsageDoesNotRecommend(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) clock := quartz.NewMock(t) @@ -564,6 +567,9 @@ func TestListTemplates_PersonalUsageCountRecommendsStaleTemplate(t *testing.T) { CreatedBy: user.ID, Name: "unused", }) + // Two workspaces used 180 days ago. Frequency no longer dominates recency: + // usage outside the lookback window decays out of the personal signal, so a + // frequently-but-stalely-used template is no longer a confident match. for range 2 { dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, @@ -582,9 +588,64 @@ func TestListTemplates_PersonalUsageCountRecommendsStaleTemplate(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, staleUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, "no_confident_match", result["selection_hint"]) + require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) + // The stale workspaces fall outside the lookback window, so no in-window + // personal count is surfaced. + _, hasCount := templates[0]["your_workspace_count"] + require.False(t, hasCount) +} + +func TestListTemplates_RecentPersonalUsageRecommends(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + clock := quartz.NewMock(t) + now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) + clock.Set(now).MustWait(ctx) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + recentUsage := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "recent-usage", + }) + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unused", + }) + // Two workspaces used two days ago. Recent, in-window usage is a confident + // signal: this is the frecency improvement over the old count-only ranking. + for range 2 { + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: recentUsage.ID, + LastUsedAt: now.Add(-2 * 24 * time.Hour), + }) + } + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + Clock: clock, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, recentUsage.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) require.Equal(t, float64(2), templates[0]["your_workspace_count"]) + require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) - require.Equal(t, staleUsage.ID.String(), result["recommended_template_id"]) + require.Equal(t, recentUsage.ID.String(), result["recommended_template_id"]) } func TestListTemplates_AmbiguousTopMatches(t *testing.T) { @@ -811,6 +872,83 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { }) } +// TestGetTemplateRankingSignalsByOwnerID exercises the raw SQL signals query: +// the lookback window, the active/deleted split, and excluding the prebuilds +// system user from the organization developer count. +func TestGetTemplateRankingSignalsByOwnerID(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + + now := time.Now() + lookbackCutoff := now.Add(-60 * 24 * time.Hour) + + user := dbgen.User(t, db, database.User{}) + otherUser := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + for _, u := range []uuid.UUID{user.ID, otherUser.ID} { + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: u, OrganizationID: org.ID}) + } + + used := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID, Name: "used"}) + unused := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID, Name: "unused"}) + + // Active, in-window workspace for the requesting user. + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, + LastUsedAt: now.Add(-2 * 24 * time.Hour), + }) + // Recently-deleted, in-window workspace for the requesting user. + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, + LastUsedAt: now.Add(-3 * 24 * time.Hour), Deleted: true, + }) + // Non-deleted but outside the lookback window: it must not count toward the + // in-window active count, though it still keeps the user in the org count. + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, + LastUsedAt: now.Add(-90 * 24 * time.Hour), + }) + // Another developer's active workspace contributes to org popularity. + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: otherUser.ID, OrganizationID: org.ID, TemplateID: used.ID, + LastUsedAt: now.Add(-1 * 24 * time.Hour), + }) + // The prebuilds system user must be excluded from the org developer count. + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: database.PrebuildsSystemUserID, OrganizationID: org.ID, TemplateID: used.ID, + LastUsedAt: now.Add(-1 * 24 * time.Hour), + }) + + rows, err := db.GetTemplateRankingSignalsByOwnerID(ctx, database.GetTemplateRankingSignalsByOwnerIDParams{ + TemplateIDs: []uuid.UUID{used.ID, unused.ID}, + OwnerID: user.ID, + OrganizationID: org.ID, + PrebuildsUserID: database.PrebuildsSystemUserID, + LookbackCutoff: lookbackCutoff, + }) + require.NoError(t, err) + + byTemplate := make(map[uuid.UUID]database.GetTemplateRankingSignalsByOwnerIDRow, len(rows)) + for _, row := range rows { + byTemplate[row.TemplateID] = row + } + // The unnest LEFT JOIN returns a row for every requested template. + require.Len(t, byTemplate, 2) + + usedRow := byTemplate[used.ID] + require.Equal(t, int64(1), usedRow.ActiveCount, "only the in-window active workspace counts") + require.Equal(t, int64(1), usedRow.DeletedRecentCount, "the in-window deleted workspace counts") + require.Equal(t, int64(2), usedRow.OrgDevs, "user and otherUser count; prebuilds user is excluded") + require.True(t, usedRow.LastUsedAt.Valid) + + unusedRow := byTemplate[unused.ID] + require.Equal(t, int64(0), unusedRow.ActiveCount) + require.Equal(t, int64(0), unusedRow.DeletedRecentCount) + require.Equal(t, int64(0), unusedRow.OrgDevs) + require.False(t, unusedRow.LastUsedAt.Valid) +} + func runListTemplates( ctx context.Context, t *testing.T, diff --git a/coderd/x/chatd/prompt.go b/coderd/x/chatd/prompt.go index 1254ca6e8d3dd..0fb503032b407 100644 --- a/coderd/x/chatd/prompt.go +++ b/coderd/x/chatd/prompt.go @@ -9,7 +9,7 @@ Do not create or start a workspace by default. Many requests can be completed us Workspace tools such as execute, read_file, write_file, and edit_files require an attached workspace.` const workspaceDetachedAwareness = workspaceDetachedAwarenessBase + ` Only call create_workspace or start_workspace when the user explicitly asks for a workspace-backed task, or when the task cannot be completed without inspecting, editing, or running files in a workspace. -If a workspace is needed, use list_templates and read_template as needed before create_workspace.` +If a workspace is needed, use list_templates before create_workspace. Call read_template only when you need template parameter or preset details.` const workspaceDetachedNoCreateAwareness = workspaceDetachedAwarenessBase + ` This delegated chat cannot create or start a workspace. If workspace-backed work is required, report that need to the parent agent instead of trying workspace tools.` From dfc293d07c54b75a598393731b1a5f95995dd145 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 5 Jun 2026 11:26:14 +0000 Subject: [PATCH 08/21] fix(coderd): address template ranking review feedback Address the latest PR review feedback for frecency-based template ranking: - Authorize template ranking signals against the exact candidate template IDs using the same prepared-filter semantics as GetTemplatesWithFilter, so users with template ACL access keep ranking signals without broad org-wide template read. - Split the dbauthz mock coverage for org-scoped and any-organization calls; normalize duplicate subtest suffixes in the dbauthz method-test harness. - Surface deleted-only personal usage evidence in list_templates output with a recently-deleted count and last-used timestamp. - Assert the raw SQL query returns the maximum last_used_at value. - Clarify detached prompt guidance for user_selection_required and conditional read_template usage. - Document ListTemplatesOptions requirements and defaults. --- coderd/database/dbauthz/dbauthz.go | 33 ++++++++--- coderd/database/dbauthz/dbauthz_test.go | 30 +++++----- coderd/database/dbauthz/setup_test.go | 3 + coderd/x/chatd/chattool/listtemplates.go | 15 +++-- coderd/x/chatd/chattool/listtemplates_test.go | 55 ++++++++++++++++++- coderd/x/chatd/prompt.go | 2 +- 6 files changed, 108 insertions(+), 30 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 11b6074696750..1af2a21395e17 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -4377,14 +4377,31 @@ func (q *querier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg da // The org-popularity signal is a cross-user COUNT(DISTINCT owner_id) that we // treat as template popularity metadata, not as permission to read other // users' workspaces. Callers only ever pass template IDs already authorized - // via GetTemplatesWithFilter, so a template read check is the minimal, - // intentional authorization here instead of a system escalation. - templateObj := rbac.ResourceTemplate.AnyOrganization() - if arg.OrganizationID != uuid.Nil { - templateObj = rbac.ResourceTemplate.InOrg(arg.OrganizationID) - } - if err := q.authorizeContext(ctx, policy.ActionRead, templateObj); err != nil { - return nil, err + // via GetTemplatesWithFilter, and we verify those exact IDs here with the + // same prepared-filter semantics so ACL-only template readers keep their + // ranking signals without requiring broad org-wide template read. + if len(arg.TemplateIDs) > 0 { + prep, err := prepareSQLFilter(ctx, q.auth, policy.ActionRead, rbac.ResourceTemplate.Type) + if err != nil { + return nil, xerrors.Errorf("(dev error) prepare sql filter: %w", err) + } + authorizedTemplates, err := q.db.GetAuthorizedTemplates(ctx, database.GetTemplatesWithFilterParams{ + Deleted: false, + OrganizationID: arg.OrganizationID, + IDs: arg.TemplateIDs, + }, prep) + if err != nil { + return nil, err + } + authorizedIDs := make(map[uuid.UUID]struct{}, len(authorizedTemplates)) + for _, template := range authorizedTemplates { + authorizedIDs[template.ID] = struct{}{} + } + for _, templateID := range arg.TemplateIDs { + if _, ok := authorizedIDs[templateID]; !ok { + return nil, NotAuthorizedError{Err: xerrors.Errorf("not authorized to read template %s", templateID)} + } + } } return q.db.GetTemplateRankingSignalsByOwnerID(ctx, arg) } diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index d9a61495f0db9..cd57e5c0d5503 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -3374,26 +3374,30 @@ func (s *MethodTestSuite) TestWorkspace() { check.Args(ws.OwnerID, emptyPreparedAuthorized{}).Asserts() })) s.Run("GetTemplateRankingSignalsByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { - argOrg := database.GetTemplateRankingSignalsByOwnerIDParams{ + arg := database.GetTemplateRankingSignalsByOwnerIDParams{ OwnerID: uuid.New(), OrganizationID: uuid.New(), TemplateIDs: []uuid.UUID{uuid.New()}, } - dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), argOrg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() - check.Args(argOrg).Asserts( - rbac.ResourceWorkspace.WithOwner(argOrg.OwnerID.String()).InOrg(argOrg.OrganizationID), policy.ActionRead, - rbac.ResourceTemplate.InOrg(argOrg.OrganizationID), policy.ActionRead, - ) - - argNoOrg := database.GetTemplateRankingSignalsByOwnerIDParams{ + dbm.EXPECT().GetAuthorizedTemplates(gomock.Any(), database.GetTemplatesWithFilterParams{ + Deleted: false, + OrganizationID: arg.OrganizationID, + IDs: arg.TemplateIDs, + }, gomock.Any()).Return([]database.Template{{ID: arg.TemplateIDs[0]}}, nil).AnyTimes() + dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), arg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() + check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead) + })) + s.Run("GetTemplateRankingSignalsByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + arg := database.GetTemplateRankingSignalsByOwnerIDParams{ OwnerID: uuid.New(), TemplateIDs: []uuid.UUID{uuid.New()}, } - dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), argNoOrg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() - check.Args(argNoOrg).Asserts( - rbac.ResourceWorkspace.WithOwner(argNoOrg.OwnerID.String()).AnyOrganization(), policy.ActionRead, - rbac.ResourceTemplate.AnyOrganization(), policy.ActionRead, - ) + dbm.EXPECT().GetAuthorizedTemplates(gomock.Any(), database.GetTemplatesWithFilterParams{ + Deleted: false, + IDs: arg.TemplateIDs, + }, gomock.Any()).Return([]database.Template{{ID: arg.TemplateIDs[0]}}, nil).AnyTimes() + dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), arg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() + check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).AnyOrganization(), policy.ActionRead) })) s.Run("GetWorkspaceACLByID", s.Mocked(func(dbM *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { ws := testutil.Fake(s.T(), faker, database.Workspace{}) diff --git a/coderd/database/dbauthz/setup_test.go b/coderd/database/dbauthz/setup_test.go index bab2cac91cf12..5bf5163b91e20 100644 --- a/coderd/database/dbauthz/setup_test.go +++ b/coderd/database/dbauthz/setup_test.go @@ -192,6 +192,9 @@ func (s *MethodTestSuite) SubtestWithDB(db database.Store, testCaseF func(db dat testName := s.T().Name() names := strings.Split(testName, "/") methodName := names[len(names)-1] + if baseMethodName, _, ok := strings.Cut(methodName, "#"); ok { + methodName = baseMethodName + } s.methodAccounting[methodName]++ fakeAuthorizer := &coderdtest.FakeAuthorizer{} diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 715d2261608ae..1f6b85fe2bcec 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -88,7 +88,9 @@ const ( queryScoreDescriptionMatch = 1 ) -// ListTemplatesOptions configures the list_templates tool. +// ListTemplatesOptions configures the list_templates tool. OwnerID is required. +// Logger may be zero-valued; Clock defaults to a real clock when nil. +// AllowedTemplateIDs optionally restricts which templates can be returned. type ListTemplatesOptions struct { OwnerID uuid.UUID Logger slog.Logger @@ -434,13 +436,14 @@ func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { if t.Signals.OrgDevs > 0 { item["active_developers"] = t.Signals.OrgDevs } - // your_workspace_count exposes only active workspaces so deleted history is - // not surfaced to the model, though it still contributes to the score. if t.Signals.ActiveCount > 0 { item["your_workspace_count"] = t.Signals.ActiveCount - if !t.Signals.LastUsedAt.IsZero() { - item["last_used_by_you"] = t.Signals.LastUsedAt.Format(time.RFC3339Nano) - } + } + if t.Signals.DeletedRecentCount > 0 { + item["your_recently_deleted_workspace_count"] = t.Signals.DeletedRecentCount + } + if t.Signals.hasPersonalUsage() && !t.Signals.LastUsedAt.IsZero() { + item["last_used_by_you"] = t.Signals.LastUsedAt.Format(time.RFC3339Nano) } if t.Template.ID == recommendedID { item["recommended"] = true diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index cd2c290af1176..f957cb5e9e73c 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -648,6 +648,54 @@ func TestListTemplates_RecentPersonalUsageRecommends(t *testing.T) { require.Equal(t, recentUsage.ID.String(), result["recommended_template_id"]) } +func TestListTemplates_DeletedRecentPersonalUsageShowsEvidence(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + clock := quartz.NewMock(t) + now := time.Date(2026, 5, 15, 12, 0, 0, 0, time.UTC) + clock.Set(now).MustWait(ctx) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{ + UserID: user.ID, + OrganizationID: org.ID, + }) + + deletedUsage := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "deleted-usage", + }) + unused := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "unused", + }) + dbgen.Workspace(t, db, database.WorkspaceTable{ + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: deletedUsage.ID, + LastUsedAt: now.Add(-2 * 24 * time.Hour), + Deleted: true, + }) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + Clock: clock, + }) + result := runListTemplates(ctx, t, tool, `{}`) + templates := listTemplateItems(t, result) + require.Len(t, templates, 2) + require.Equal(t, deletedUsage.ID.String(), templates[0]["id"]) + require.Equal(t, unused.ID.String(), templates[1]["id"]) + require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) + require.Equal(t, float64(1), templates[0]["your_recently_deleted_workspace_count"]) + require.NotEmpty(t, templates[0]["last_used_by_you"]) + _, hasActiveCount := templates[0]["your_workspace_count"] + require.False(t, hasActiveCount) +} + func TestListTemplates_AmbiguousTopMatches(t *testing.T) { t.Parallel() ctx := testutil.Context(t, testutil.WaitShort) @@ -893,15 +941,17 @@ func TestGetTemplateRankingSignalsByOwnerID(t *testing.T) { used := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID, Name: "used"}) unused := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID, Name: "unused"}) + activeLastUsedAt := now.Add(-2 * 24 * time.Hour) + deletedLastUsedAt := now.Add(-3 * 24 * time.Hour) // Active, in-window workspace for the requesting user. dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, - LastUsedAt: now.Add(-2 * 24 * time.Hour), + LastUsedAt: activeLastUsedAt, }) // Recently-deleted, in-window workspace for the requesting user. dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, - LastUsedAt: now.Add(-3 * 24 * time.Hour), Deleted: true, + LastUsedAt: deletedLastUsedAt, Deleted: true, }) // Non-deleted but outside the lookback window: it must not count toward the // in-window active count, though it still keeps the user in the org count. @@ -941,6 +991,7 @@ func TestGetTemplateRankingSignalsByOwnerID(t *testing.T) { require.Equal(t, int64(1), usedRow.DeletedRecentCount, "the in-window deleted workspace counts") require.Equal(t, int64(2), usedRow.OrgDevs, "user and otherUser count; prebuilds user is excluded") require.True(t, usedRow.LastUsedAt.Valid) + require.WithinDuration(t, activeLastUsedAt, usedRow.LastUsedAt.Time, time.Microsecond) unusedRow := byTemplate[unused.ID] require.Equal(t, int64(0), unusedRow.ActiveCount) diff --git a/coderd/x/chatd/prompt.go b/coderd/x/chatd/prompt.go index 0fb503032b407..d5ccedd7c3157 100644 --- a/coderd/x/chatd/prompt.go +++ b/coderd/x/chatd/prompt.go @@ -9,7 +9,7 @@ Do not create or start a workspace by default. Many requests can be completed us Workspace tools such as execute, read_file, write_file, and edit_files require an attached workspace.` const workspaceDetachedAwareness = workspaceDetachedAwarenessBase + ` Only call create_workspace or start_workspace when the user explicitly asks for a workspace-backed task, or when the task cannot be completed without inspecting, editing, or running files in a workspace. -If a workspace is needed, use list_templates before create_workspace. Call read_template only when you need template parameter or preset details.` +If a workspace is needed, use list_templates before create_workspace. If list_templates returns user_selection_required or a no_confident_match or ambiguous_top_matches selection_hint, ask the user to choose before create_workspace. Call read_template only when you need template parameter or preset details.` const workspaceDetachedNoCreateAwareness = workspaceDetachedAwarenessBase + ` This delegated chat cannot create or start a workspace. If workspace-backed work is required, report that need to the parent agent instead of trying workspace tools.` From 5a7641ff662e0683346ee2800a47d360c00a509a Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 5 Jun 2026 11:52:00 +0000 Subject: [PATCH 09/21] fix(coderd/x/chatd/chattool): add list_templates auth error context Address CRF-26 by adding list_templates operation context to the user-facing asOwner authorization error response. --- coderd/x/chatd/chattool/listtemplates.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 1f6b85fe2bcec..480bd237c7270 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -158,7 +158,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp func(ctx context.Context, args listTemplatesArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { ctx, err := asOwner(ctx, db, options.OwnerID) if err != nil { - return fantasy.NewTextErrorResponse(err.Error()), nil + return fantasy.NewTextErrorResponse(xerrors.Errorf("authorize list_templates owner: %w", err).Error()), nil } filterParams := database.GetTemplatesWithFilterParams{ From 42fed81df5a3e98082ff714a417bb3d08e50b603 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 5 Jun 2026 12:18:48 +0000 Subject: [PATCH 10/21] test(coderd): update chat system prompt expectation Keep TestChatSystemPrompt's detached workspace awareness expectation in sync with the updated list_templates guidance. --- coderd/exp_chats_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index fdbc1160d8ac8..0d93978a1e807 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -10631,7 +10631,7 @@ func TestChatSystemPrompt(t *testing.T) { const workspaceAwareness = `No workspace is attached to this chat yet. Do not create or start a workspace by default. Many requests can be completed using the conversation, provider tools such as web_search when available, or configured external MCP tools. Workspace tools such as execute, read_file, write_file, and edit_files require an attached workspace. Only call create_workspace or start_workspace when the user explicitly asks for a workspace-backed task, or when the task cannot be completed without inspecting, editing, or running files in a workspace. -If a workspace is needed, use list_templates and read_template as needed before create_workspace.` +If a workspace is needed, use list_templates before create_workspace. If list_templates returns user_selection_required or a no_confident_match or ambiguous_top_matches selection_hint, ask the user to choose before create_workspace. Call read_template only when you need template parameter or preset details.` updateChatSystemPrompt := func(t *testing.T, ctx context.Context, req codersdk.UpdateChatSystemPromptRequest) { t.Helper() From d7fe770829dad0fb2598a05b91d3222380c3f2c8 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Tue, 9 Jun 2026 13:53:25 +0000 Subject: [PATCH 11/21] chore: update comments --- coderd/database/querier.go | 7 ++++--- coderd/database/queries.sql.go | 7 ++++--- coderd/database/queries/workspaces.sql | 7 ++++--- coderd/x/chatd/chattool/listtemplates.go | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 284ae0a74827e..f958e00893126 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -731,9 +731,10 @@ type sqlcQuerier interface { // deleted workspaces the owner used within the lookback window, when the // template was last used, and how many distinct developers in the organization // currently have a non-deleted workspace on it. The affinity score itself is - // computed in Go (see listtemplates.go); the parameterized recency-decay math - // cannot be expressed through sqlc reliably, so this query returns the exact - // raw signals the score is built from. The lookback window is applied with a + // computed in Go (see listtemplates.go) because sqlc type inference is fragile + // around complex parameterized expressions unless inputs are explicitly cast + // and nested selects are kept simple. This query returns the exact raw signals + // the score is built from. The lookback window is applied with a // caller-computed cutoff timestamp. GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 9bb42acae010e..369ec1fc703fe 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -35120,9 +35120,10 @@ type GetTemplateRankingSignalsByOwnerIDRow struct { // deleted workspaces the owner used within the lookback window, when the // template was last used, and how many distinct developers in the organization // currently have a non-deleted workspace on it. The affinity score itself is -// computed in Go (see listtemplates.go); the parameterized recency-decay math -// cannot be expressed through sqlc reliably, so this query returns the exact -// raw signals the score is built from. The lookback window is applied with a +// computed in Go (see listtemplates.go) because sqlc type inference is fragile +// around complex parameterized expressions unless inputs are explicitly cast +// and nested selects are kept simple. This query returns the exact raw signals +// the score is built from. The lookback window is applied with a // caller-computed cutoff timestamp. func (q *sqlQuerier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) { rows, err := q.db.QueryContext(ctx, getTemplateRankingSignalsByOwnerID, diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 225e6cacdcaff..f615b27cd6a1c 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -503,9 +503,10 @@ GROUP BY templates.id; -- deleted workspaces the owner used within the lookback window, when the -- template was last used, and how many distinct developers in the organization -- currently have a non-deleted workspace on it. The affinity score itself is --- computed in Go (see listtemplates.go); the parameterized recency-decay math --- cannot be expressed through sqlc reliably, so this query returns the exact --- raw signals the score is built from. The lookback window is applied with a +-- computed in Go (see listtemplates.go) because sqlc type inference is fragile +-- around complex parameterized expressions unless inputs are explicitly cast +-- and nested selects are kept simple. This query returns the exact raw signals +-- the score is built from. The lookback window is applied with a -- caller-computed cutoff timestamp. WITH org_usage AS ( -- org_usage measures how many distinct developers currently have a diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 480bd237c7270..0babf600fcaf9 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -38,11 +38,12 @@ const ( // organization weight matters. They are deliberately explicit so the // ranking can be calibrated as ranking-quality signal accrues. // - // The score is computed in Go (computeAffinityScore) rather than SQL - // because sqlc cannot reliably compile the parameterized decay expression; - // see GetTemplateRankingSignalsByOwnerID. Keeping the score and the - // confidence thresholds in the same place also avoids Postgres-versus-Go - // floating-point differences at confidence boundaries. + // The score is computed in Go (computeAffinityScore) rather than SQL because + // sqlc type inference is fragile around complex parameterized expressions + // unless inputs are explicitly cast and nested selects are kept simple; see + // GetTemplateRankingSignalsByOwnerID. Keeping the score and the confidence + // thresholds in the same place also avoids Postgres-versus-Go floating-point + // differences at confidence boundaries. listTemplatesLookbackDays = 60 listTemplatesHalfLife = 14 * 24 * time.Hour listTemplatesPersonalWeight = 10.0 From 95f7beb9eeb907af2d1ad80d3d5ee03e1f806af6 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Wed, 10 Jun 2026 07:27:58 +0000 Subject: [PATCH 12/21] test(coderd/database/dbauthz): cover template ranking signals deny path --- coderd/database/dbauthz/dbauthz_test.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 0c5b6f5be9ac9..d66732e0faed5 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -3418,6 +3418,25 @@ func (s *MethodTestSuite) TestWorkspace() { dbm.EXPECT().GetTemplateRankingSignalsByOwnerID(gomock.Any(), arg).Return([]database.GetTemplateRankingSignalsByOwnerIDRow{}, nil).AnyTimes() check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).AnyOrganization(), policy.ActionRead) })) + s.Run("GetTemplateRankingSignalsByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { + // Deny path: when any requested template is missing from the + // authorized set, the call is rejected and the underlying query is + // never reached. No expectation is registered for + // GetTemplateRankingSignalsByOwnerID, so reaching it fails the test. + arg := database.GetTemplateRankingSignalsByOwnerIDParams{ + OwnerID: uuid.New(), + OrganizationID: uuid.New(), + TemplateIDs: []uuid.UUID{uuid.New(), uuid.New()}, + } + dbm.EXPECT().GetAuthorizedTemplates(gomock.Any(), database.GetTemplatesWithFilterParams{ + Deleted: false, + OrganizationID: arg.OrganizationID, + IDs: arg.TemplateIDs, + }, gomock.Any()).Return([]database.Template{{ID: arg.TemplateIDs[0]}}, nil).AnyTimes() + check.Args(arg). + Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID), policy.ActionRead). + Errors(dbauthz.NotAuthorizedError{Err: xerrors.Errorf("not authorized to read template %s", arg.TemplateIDs[1])}) + })) s.Run("GetWorkspaceACLByID", s.Mocked(func(dbM *dbmock.MockStore, faker *gofakeit.Faker, check *expects) { ws := testutil.Fake(s.T(), faker, database.Workspace{}) dbM.EXPECT().GetWorkspaceByID(gomock.Any(), ws.ID).Return(ws, nil).AnyTimes() From 71eca18be56d67b19d22bd836fbfe17eca0482fe Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Wed, 10 Jun 2026 07:38:10 +0000 Subject: [PATCH 13/21] refactor(coderd/x/chatd/chattool): simplify templateQueryScore tier matching --- coderd/x/chatd/chattool/listtemplates.go | 56 ++++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 0babf600fcaf9..e159b69b694b4 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -471,40 +471,17 @@ func relevanceSignals(t rankedTemplate) string { func templateQueryScore(t database.Template, query string) int { query = normalizeTemplateSearch(query) - if query == "" { - return 0 - } - queryCompact := compactTemplateSearch(query) - if queryCompact == "" { + if query == "" || queryCompact == "" { return 0 } + + best := 0 for _, field := range []string{t.Name, t.DisplayName} { - field = normalizeTemplateSearch(field) - if field == "" { - continue - } - if field == query || compactTemplateSearch(field) == queryCompact { - return queryScoreExactName - } + best = max(best, nameQueryScore(field, query, queryCompact)) } - for _, field := range []string{t.Name, t.DisplayName} { - field = normalizeTemplateSearch(field) - if field == "" { - continue - } - if strings.HasPrefix(field, query) || strings.HasPrefix(compactTemplateSearch(field), queryCompact) { - return queryScoreNamePrefix - } - } - for _, field := range []string{t.Name, t.DisplayName} { - field = normalizeTemplateSearch(field) - if field == "" { - continue - } - if strings.Contains(field, query) || strings.Contains(compactTemplateSearch(field), queryCompact) { - return queryScoreNameContains - } + if best > 0 { + return best } desc := normalizeTemplateSearch(t.Description) if strings.Contains(desc, query) || strings.Contains(compactTemplateSearch(desc), queryCompact) { @@ -513,6 +490,27 @@ func templateQueryScore(t database.Template, query string) int { return 0 } +// nameQueryScore returns the relevance tier of a single name-like field: +// exact match outranks prefix match, which outranks substring match, on +// either the normalized or compact form. Returns 0 when the field does not +// match. +func nameQueryScore(field, query, queryCompact string) int { + field = normalizeTemplateSearch(field) + if field == "" { + return 0 + } + fieldCompact := compactTemplateSearch(field) + switch { + case field == query || fieldCompact == queryCompact: + return queryScoreExactName + case strings.HasPrefix(field, query) || strings.HasPrefix(fieldCompact, queryCompact): + return queryScoreNamePrefix + case strings.Contains(field, query) || strings.Contains(fieldCompact, queryCompact): + return queryScoreNameContains + } + return 0 +} + func normalizeTemplateSearch(value string) string { return strings.ToLower(strings.TrimSpace(value)) } From 36a6be6b58243ab71bd4f504b474bfa9c92fd11a Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 11 Jun 2026 10:57:19 +0000 Subject: [PATCH 14/21] refactor: replace list_templates selection hints with a next_step instruction list_templates previously taught the model a four-value selection_hint enum, an eleven-value recommendation_reason, per-item relevance_signals, and a derived user_selection_required flag, re-explained in two tool descriptions and two prompt sections. The ranked output now carries a single fixed next_step instruction at the point of use plus an optional recommended_template_id, and the per-item payload keeps only raw usage evidence. Ranking, confidence thresholds, and authorization are unchanged. Prompts reference the field through chattool.NextStepField so they cannot drift. Documents how agent tool calls work, including template selection and workspace creation, in docs/ai-coder/agents/tools. --- coderd/database/dbauthz/dbauthz.go | 11 +- coderd/database/dbauthz/dbauthz_test.go | 6 +- coderd/database/dbauthz/setup_test.go | 1 + coderd/database/querier.go | 15 +- coderd/database/queries.sql.go | 26 +- coderd/database/queries/workspaces.sql | 26 +- coderd/exp_chats_test.go | 2 +- coderd/x/chatd/chatd_test.go | 38 +-- coderd/x/chatd/chattool/createworkspace.go | 22 +- coderd/x/chatd/chattool/listtemplates.go | 254 ++++++------------ .../chattool/listtemplates_internal_test.go | 68 ++--- coderd/x/chatd/chattool/listtemplates_test.go | 87 ++---- coderd/x/chatd/chattool/readtemplate.go | 2 +- coderd/x/chatd/prompt.go | 11 +- docs/ai-coder/agents/tools/index.md | 156 +++++++++++ docs/manifest.json | 6 + 16 files changed, 361 insertions(+), 370 deletions(-) create mode 100644 docs/ai-coder/agents/tools/index.md diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 6fe38428877fd..f368bcf1dfbcc 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -4397,8 +4397,7 @@ func (q *querier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateI } func (q *querier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg database.GetTemplateRankingSignalsByOwnerIDParams) ([]database.GetTemplateRankingSignalsByOwnerIDRow, error) { - // The personal frecency signal reads the owner's own workspaces, which a - // user can always read. + // The personal signal reads only the owner's own workspaces. workspaceObj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()) if arg.OrganizationID != uuid.Nil { workspaceObj = workspaceObj.InOrg(arg.OrganizationID) @@ -4408,12 +4407,8 @@ func (q *querier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg da if err := q.authorizeContext(ctx, policy.ActionRead, workspaceObj); err != nil { return nil, err } - // The org-popularity signal is a cross-user COUNT(DISTINCT owner_id) that we - // treat as template popularity metadata, not as permission to read other - // users' workspaces. Callers only ever pass template IDs already authorized - // via GetTemplatesWithFilter, and we verify those exact IDs here with the - // same prepared-filter semantics so ACL-only template readers keep their - // ranking signals without requiring broad org-wide template read. + // The cross-user popularity count is template metadata, not workspace + // reads, so it only requires read access to every requested template. if len(arg.TemplateIDs) > 0 { prep, err := prepareSQLFilter(ctx, q.auth, policy.ActionRead, rbac.ResourceTemplate.Type) if err != nil { diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index d66732e0faed5..3ef389fdefb41 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -3419,10 +3419,8 @@ func (s *MethodTestSuite) TestWorkspace() { check.Args(arg).Asserts(rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).AnyOrganization(), policy.ActionRead) })) s.Run("GetTemplateRankingSignalsByOwnerID", s.Mocked(func(dbm *dbmock.MockStore, _ *gofakeit.Faker, check *expects) { - // Deny path: when any requested template is missing from the - // authorized set, the call is rejected and the underlying query is - // never reached. No expectation is registered for - // GetTemplateRankingSignalsByOwnerID, so reaching it fails the test. + // Deny path: an unauthorized template ID rejects the call before the + // query runs (no query expectation is registered). arg := database.GetTemplateRankingSignalsByOwnerIDParams{ OwnerID: uuid.New(), OrganizationID: uuid.New(), diff --git a/coderd/database/dbauthz/setup_test.go b/coderd/database/dbauthz/setup_test.go index 5bf5163b91e20..be99ee7eeca1b 100644 --- a/coderd/database/dbauthz/setup_test.go +++ b/coderd/database/dbauthz/setup_test.go @@ -192,6 +192,7 @@ func (s *MethodTestSuite) SubtestWithDB(db database.Store, testCaseF func(db dat testName := s.T().Name() names := strings.Split(testName, "/") methodName := names[len(names)-1] + // Repeated subtests get "#NN" suffixes; count them under the base method. if baseMethodName, _, ok := strings.Cut(methodName, "#"); ok { methodName = baseMethodName } diff --git a/coderd/database/querier.go b/coderd/database/querier.go index f958e00893126..05834833e4c60 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -726,16 +726,11 @@ type sqlcQuerier interface { // It also returns the number of desired instances for each preset. // If template_id is specified, only template versions associated with that template will be returned. GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]GetTemplatePresetsWithPrebuildsRow, error) - // GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the - // given templates relative to a single owner: how many active and recently - // deleted workspaces the owner used within the lookback window, when the - // template was last used, and how many distinct developers in the organization - // currently have a non-deleted workspace on it. The affinity score itself is - // computed in Go (see listtemplates.go) because sqlc type inference is fragile - // around complex parameterized expressions unless inputs are explicitly cast - // and nested selects are kept simple. This query returns the exact raw signals - // the score is built from. The lookback window is applied with a - // caller-computed cutoff timestamp. + // GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for + // one owner: in-window active and recently-deleted workspace counts, the last + // in-window usage, and distinct active developers per template. The affinity + // score is computed in Go (see listtemplates.go) because sqlc type inference + // is fragile around complex parameterized expressions. GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 369ec1fc703fe..6fb24b3007095 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -35043,9 +35043,8 @@ func (q *sqlQuerier) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]Ge const getTemplateRankingSignalsByOwnerID = `-- name: GetTemplateRankingSignalsByOwnerID :many WITH org_usage AS ( - -- org_usage measures how many distinct developers currently have a - -- non-deleted workspace on each template. The prebuilds system user is - -- excluded so unclaimed prebuilds do not inflate popularity. + -- Distinct developers with a non-deleted workspace; the prebuilds system + -- user is excluded so unclaimed prebuilds do not inflate popularity. SELECT w.template_id, COUNT(DISTINCT w.owner_id) AS org_devs @@ -35064,10 +35063,8 @@ WITH org_usage AS ( w.template_id ), user_usage AS ( - -- user_usage counts workspaces owned by the requesting user within the - -- lookback window, splitting active from recently deleted so deleted - -- history can be counted at reduced weight. The window is keyed on - -- last_used_at. + -- The owner's workspaces used within the lookback window, split into + -- active and recently-deleted counts. SELECT w.template_id, COUNT(*) FILTER (WHERE NOT w.deleted) AS active_count, @@ -35115,16 +35112,11 @@ type GetTemplateRankingSignalsByOwnerIDRow struct { OrgDevs int64 `db:"org_devs" json:"org_devs"` } -// GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the -// given templates relative to a single owner: how many active and recently -// deleted workspaces the owner used within the lookback window, when the -// template was last used, and how many distinct developers in the organization -// currently have a non-deleted workspace on it. The affinity score itself is -// computed in Go (see listtemplates.go) because sqlc type inference is fragile -// around complex parameterized expressions unless inputs are explicitly cast -// and nested selects are kept simple. This query returns the exact raw signals -// the score is built from. The lookback window is applied with a -// caller-computed cutoff timestamp. +// GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for +// one owner: in-window active and recently-deleted workspace counts, the last +// in-window usage, and distinct active developers per template. The affinity +// score is computed in Go (see listtemplates.go) because sqlc type inference +// is fragile around complex parameterized expressions. func (q *sqlQuerier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) { rows, err := q.db.QueryContext(ctx, getTemplateRankingSignalsByOwnerID, pq.Array(arg.TemplateIDs), diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index f615b27cd6a1c..7d6c64aaba36e 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -498,20 +498,14 @@ WHERE templates.id = ANY(@template_ids :: uuid[]) GROUP BY templates.id; -- name: GetTemplateRankingSignalsByOwnerID :many --- GetTemplateRankingSignalsByOwnerID returns the raw ranking signals for the --- given templates relative to a single owner: how many active and recently --- deleted workspaces the owner used within the lookback window, when the --- template was last used, and how many distinct developers in the organization --- currently have a non-deleted workspace on it. The affinity score itself is --- computed in Go (see listtemplates.go) because sqlc type inference is fragile --- around complex parameterized expressions unless inputs are explicitly cast --- and nested selects are kept simple. This query returns the exact raw signals --- the score is built from. The lookback window is applied with a --- caller-computed cutoff timestamp. +-- GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for +-- one owner: in-window active and recently-deleted workspace counts, the last +-- in-window usage, and distinct active developers per template. The affinity +-- score is computed in Go (see listtemplates.go) because sqlc type inference +-- is fragile around complex parameterized expressions. WITH org_usage AS ( - -- org_usage measures how many distinct developers currently have a - -- non-deleted workspace on each template. The prebuilds system user is - -- excluded so unclaimed prebuilds do not inflate popularity. + -- Distinct developers with a non-deleted workspace; the prebuilds system + -- user is excluded so unclaimed prebuilds do not inflate popularity. SELECT w.template_id, COUNT(DISTINCT w.owner_id) AS org_devs @@ -530,10 +524,8 @@ WITH org_usage AS ( w.template_id ), user_usage AS ( - -- user_usage counts workspaces owned by the requesting user within the - -- lookback window, splitting active from recently deleted so deleted - -- history can be counted at reduced weight. The window is keyed on - -- last_used_at. + -- The owner's workspaces used within the lookback window, split into + -- active and recently-deleted counts. SELECT w.template_id, COUNT(*) FILTER (WHERE NOT w.deleted) AS active_count, diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index f7d4c206347f5..94f4cdca305cd 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -10817,7 +10817,7 @@ func TestChatSystemPrompt(t *testing.T) { const workspaceAwareness = `No workspace is attached to this chat yet. Do not create or start a workspace by default. Many requests can be completed using the conversation, provider tools such as web_search when available, or configured external MCP tools. Workspace tools such as execute, read_file, write_file, and edit_files require an attached workspace. Only call create_workspace or start_workspace when the user explicitly asks for a workspace-backed task, or when the task cannot be completed without inspecting, editing, or running files in a workspace. -If a workspace is needed, use list_templates before create_workspace. If list_templates returns user_selection_required or a no_confident_match or ambiguous_top_matches selection_hint, ask the user to choose before create_workspace. Call read_template only when you need template parameter or preset details.` +If a workspace is needed, use list_templates before create_workspace and follow its next_step. Call read_template only when you need template parameter or preset details.` updateChatSystemPrompt := func(t *testing.T, ctx context.Context, req codersdk.UpdateChatSystemPromptRequest) { t.Helper() diff --git a/coderd/x/chatd/chatd_test.go b/coderd/x/chatd/chatd_test.go index 517df302ca180..cb5432f4ee72b 100644 --- a/coderd/x/chatd/chatd_test.go +++ b/coderd/x/chatd/chatd_test.go @@ -8586,7 +8586,7 @@ func TestChatAsksUserWhenListTemplatesRequiresSelection(t *testing.T) { var tplCode, tplDocker database.Template var callCount atomic.Int32 - var sawHardRule atomic.Bool + var sawSelectionRule atomic.Bool var sawSelectionRequiredResult atomic.Bool openAIURL := chattest.NewOpenAI(t, func(req *chattest.OpenAIRequest) chattest.OpenAIResponse { @@ -8600,9 +8600,8 @@ func TestChatAsksUserWhenListTemplatesRequiresSelection(t *testing.T) { for _, message := range req.Messages { promptAndTools += "\n" + message.Content } - if strings.Contains(promptAndTools, "If user_selection_required is true") && - strings.Contains(promptAndTools, "do not call create_workspace") { - sawHardRule.Store(true) + if strings.Contains(promptAndTools, "follow its next_step") { + sawSelectionRule.Store(true) } return chattest.OpenAIStreamingResponse( chattest.OpenAIToolCallChunk("list_templates", `{}`), @@ -8680,7 +8679,7 @@ func TestChatAsksUserWhenListTemplatesRequiresSelection(t *testing.T) { require.FailNowf(t, "chat run failed", "last_error=%q", chatLastErrorMessage(chatResult.LastError)) } - require.True(t, sawHardRule.Load(), "model request should include the user-selection hard rule") + require.True(t, sawSelectionRule.Load(), "model request should include the next_step selection rule") require.True(t, sawSelectionRequiredResult.Load(), "model should receive a list_templates result requiring user selection") messages, err := db.GetChatMessagesByChatID(ctx, database.GetChatMessagesByChatIDParams{ @@ -8708,27 +8707,19 @@ func TestChatAsksUserWhenListTemplatesRequiresSelection(t *testing.T) { } require.NotNil(t, listTemplatesResult, "expected list_templates tool result") - require.Equal(t, "no_confident_match", listTemplatesResult["selection_hint"]) - require.Equal(t, "no_ranking_signal", listTemplatesResult["recommendation_reason"]) - require.Equal(t, true, listTemplatesResult["user_selection_required"]) + require.Equal(t, chattool.NextStepAskUser, listTemplatesResult["next_step"]) require.NotContains(t, listTemplatesResult, "recommended_template_id") require.Contains(t, listTemplatesResult["templates"], any(map[string]any{ - "id": tplCode.ID.String(), - "name": "code-2", - "organization_id": org.ID.String(), - "display_name": "typescript-alpha", - "description": "this is a long description", - "rank": float64(1), - "relevance_signals": "ordered_by_name", + "id": tplCode.ID.String(), + "name": "code-2", + "display_name": "typescript-alpha", + "description": "this is a long description", })) require.Contains(t, listTemplatesResult["templates"], any(map[string]any{ - "id": tplDocker.ID.String(), - "name": "docker", - "organization_id": org.ID.String(), - "display_name": "Docker Containers", - "description": "Provision Docker containers as Coder workspaces", - "rank": float64(2), - "relevance_signals": "ordered_by_name", + "id": tplDocker.ID.String(), + "name": "docker", + "display_name": "Docker Containers", + "description": "Provision Docker containers as Coder workspaces", })) require.False(t, sawCreateWorkspaceResult, "agent should ask instead of calling create_workspace") require.Contains(t, assistantText, "Which template should I use?") @@ -8744,8 +8735,7 @@ func listTemplatesResultRequiresUserSelection(messages []chattest.OpenAIMessage) if err := json.Unmarshal([]byte(message.Content), &result); err != nil { continue } - required, _ := result["user_selection_required"].(bool) - if result["selection_hint"] == "no_confident_match" && required { + if result["next_step"] == chattool.NextStepAskUser { return true } } diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index 48794551fb69b..b47f1aa778a0f 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -75,7 +75,7 @@ type CreateWorkspaceOptions struct { } type createWorkspaceArgs struct { - TemplateID string `json:"template_id" description:"The UUIDv4 of the template to create the workspace from. Obtain this from list_templates recommended_template_id or a ranked template."` + TemplateID string `json:"template_id" description:"The UUIDv4 of the template to create the workspace from. Obtain this from list_templates."` Name string `json:"name,omitempty" description:"The name of the workspace to create. If not provided, a random name will be generated."` Parameters map[string]string `json:"parameters,omitempty" description:"Key-value pairs of template parameters to use when creating the workspace. Obtain available parameters from read_template when needed."` PresetID string `json:"preset_id,omitempty" description:"The UUIDv4 of a template version preset to use. Obtain available presets from read_template when needed. When provided, the preset's parameters are applied automatically and the workspace may claim a prebuilt instance for faster startup."` @@ -95,20 +95,12 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option "or when the user explicitly asks for one. Do not use this as a "+ "default first step for requests answerable from conversation "+ "context, provider tools, or external MCP tools. Requires a "+ - "template_id from list_templates. Use recommended_template_id "+ - "or rank 1 when list_templates reports a confident choice. "+ - "If list_templates returned user_selection_required true, "+ - "or selection_hint no_confident_match or ambiguous_top_matches, "+ - "do not call create_workspace. Ask the user to choose a "+ - "template unless the user already explicitly selected one. "+ - "Optionally provide a name and parameter values from "+ - "read_template. If no name is given, one will be generated. "+ - "Provide a preset_id from read_template to apply "+ - "preset parameters and potentially claim a prebuilt "+ - "workspace for faster startup. "+ - "This tool is idempotent. If the chat already has a "+ - "workspace that is building or running, the existing "+ - "workspace is returned.", + "template_id from list_templates; follow its "+NextStepField+" "+ + "before calling. Optionally provide a name (one is generated if "+ + "omitted), parameter values, and a preset_id from read_template "+ + "to apply preset parameters and potentially claim a prebuilt "+ + "workspace for faster startup. Idempotent: if the chat already "+ + "has a workspace building or running, it is returned.", func(ctx context.Context, args createWorkspaceArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { if options.CreateFn == nil { return fantasy.NewTextErrorResponse("workspace creator is not configured"), nil diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index e159b69b694b4..7cfca268e66b7 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -25,25 +25,15 @@ import ( const ( listTemplatesPageSize = 10 - // listTemplatesMinActiveDevelopersForRecommendation is the organization - // popularity floor: a template needs at least this many active developers - // before organization popularity on its own is a confident recommendation. + // Minimum active developers before organization popularity alone is a + // confident recommendation. listTemplatesMinActiveDevelopersForRecommendation = 2 - // The following constants parameterize the affinity score, a "frecency" - // signal (frequency discounted by recency). The personal term is the count - // of the user's recent workspaces (active plus a fraction of - // recently-deleted) multiplied by a recency decay; the organization term is - // a log-scaled active-developer count. Only the ratio of the personal to - // organization weight matters. They are deliberately explicit so the - // ranking can be calibrated as ranking-quality signal accrues. - // - // The score is computed in Go (computeAffinityScore) rather than SQL because - // sqlc type inference is fragile around complex parameterized expressions - // unless inputs are explicitly cast and nested selects are kept simple; see - // GetTemplateRankingSignalsByOwnerID. Keeping the score and the confidence - // thresholds in the same place also avoids Postgres-versus-Go floating-point - // differences at confidence boundaries. + // Affinity ("frecency") parameters: recency-decayed personal usage plus + // log-scaled organization popularity. Computed in Go rather than SQL so + // the score and its confidence thresholds share float semantics, and + // because sqlc type inference is fragile around complex parameterized + // expressions. listTemplatesLookbackDays = 60 listTemplatesHalfLife = 14 * 24 * time.Hour listTemplatesPersonalWeight = 10.0 @@ -52,34 +42,33 @@ const ( ) var ( - // minConfidentAffinityScore preserves today's floor: organization - // popularity alone is confident once a template reaches the active-developer - // minimum. math.Log1p(n) == ln(1+n) is exactly the organization term of the - // affinity score, so the threshold and the score stay float-consistent. + // Confidence floor: organization popularity alone is confident at the + // active-developer minimum. minConfidentAffinityScore = listTemplatesOrgWeight * math.Log1p(listTemplatesMinActiveDevelopersForRecommendation) - // minConfidentGap requires rank 1 to lead rank 2 by at least the score - // difference between "min" and "min-1" active developers before - // recommending when both clear the floor. It is derived, not tuned, so - // "2 developers versus 1" still recommends while "16 versus 15" does not. + // Required rank-1 lead over rank 2, derived so "2 developers versus 1" + // recommends while "16 versus 15" does not. minConfidentGap = listTemplatesOrgWeight * (math.Log1p(listTemplatesMinActiveDevelopersForRecommendation) - math.Log1p(listTemplatesMinActiveDevelopersForRecommendation-1)) ) -// affinityScoreEpsilon absorbs floating-point rounding so a score sitting -// exactly on a threshold boundary counts as meeting it. +// affinityScoreEpsilon absorbs float rounding at threshold boundaries. const affinityScoreEpsilon = 1e-9 -// affinityScoreAtLeast reports whether score meets threshold within the -// comparison epsilon. func affinityScoreAtLeast(score, threshold float64) bool { return score >= threshold-affinityScoreEpsilon } +// NextStepField is the list_templates result field carrying the instruction +// the model should follow next. Tool descriptions and prompts reference it +// by name. +const NextStepField = "next_step" + +// Next-step instructions returned with every list_templates result. const ( - listTemplatesHintOnlyAvailable = "only_available_template" - listTemplatesHintHighConfidence = "high_confidence_recommendation" - listTemplatesHintAmbiguous = "ambiguous_top_matches" - listTemplatesHintNoConfidence = "no_confident_match" + NextStepUseRecommended = "Use recommended_template_id with create_workspace. Call read_template first only if you need parameter or preset details." + NextStepAskUser = "Do not call create_workspace yet. Ask the user to choose a template, unless they already named one." + NextStepNoMatches = "No templates matched the query. Retry without a query or ask the user." + NextStepNoTemplates = "No templates are available to this chat. Inform the user." ) const ( @@ -89,9 +78,9 @@ const ( queryScoreDescriptionMatch = 1 ) -// ListTemplatesOptions configures the list_templates tool. OwnerID is required. -// Logger may be zero-valued; Clock defaults to a real clock when nil. -// AllowedTemplateIDs optionally restricts which templates can be returned. +// ListTemplatesOptions configures the list_templates tool. OwnerID is +// required; Clock defaults to a real clock when nil. AllowedTemplateIDs +// optionally restricts which templates can be returned. type ListTemplatesOptions struct { OwnerID uuid.UUID Logger slog.Logger @@ -101,7 +90,7 @@ type ListTemplatesOptions struct { type listTemplatesArgs struct { Query string `json:"query,omitempty" description:"Optional text to filter templates by name, display name, or description."` - Page int `json:"page,omitempty" description:"Page number for pagination (starts at 1). Each page returns up to 10 ranked templates."` + Page int `json:"page,omitempty" description:"Page number (starts at 1)."` } type rankedTemplate struct { @@ -109,14 +98,10 @@ type rankedTemplate struct { QueryScore int Signals templateRankingSignals AffinityScore float64 - Rank int } -// templateRankingSignals holds the raw, per-template ranking inputs returned by -// GetTemplateRankingSignalsByOwnerID. ActiveCount and DeletedRecentCount are the -// user's in-window workspace counts; LastUsedAt is the most recent usage within -// the window (zero when there is none); OrgDevs is the count of distinct active -// developers in the organization. +// templateRankingSignals holds the per-template ranking inputs returned by +// GetTemplateRankingSignalsByOwnerID. type templateRankingSignals struct { ActiveCount int64 DeletedRecentCount int64 @@ -124,18 +109,9 @@ type templateRankingSignals struct { OrgDevs int64 } -// hasPersonalUsage reports whether the user used the template within the -// lookback window, counting recently-deleted workspaces so deleted history is -// still treated as personal usage. -func (s templateRankingSignals) hasPersonalUsage() bool { - return s.ActiveCount+s.DeletedRecentCount > 0 -} - -// ListTemplates returns a tool that lists available workspace templates. -// The agent uses this to discover templates before creating a workspace. -// Results are ranked before pagination using query relevance, current-user -// usage, and organization-wide popularity. -// db must not be nil. +// ListTemplates returns a tool that lists workspace templates as a ranked +// shortlist, ordered by query relevance, the user's recent usage, and +// organization popularity. db must not be nil. func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemplatesOptions) fantasy.AgentTool { clock := options.Clock if clock == nil { @@ -144,18 +120,10 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp return fantasy.NewAgentTool( "list_templates", - "List available workspace templates as a ranked shortlist. "+ - "Optionally provide a search query matching template name, "+ - "display name, or description. Use recommended_template_id "+ - "or rank 1 as the default choice when selection_hint is "+ - "only_available_template or high_confidence_recommendation. "+ - "If user_selection_required is true, or selection_hint is "+ - "no_confident_match or ambiguous_top_matches, do not call "+ - "create_workspace. Ask the user to choose a template unless "+ - "the user already explicitly selected one. "+ - "Do not paginate unless the returned templates do not fit the "+ - "request, selection_hint reports ambiguity or no confident match, "+ - "or the user asked to browse templates. Returns 10 per page.", + "List workspace templates as a ranked shortlist, optionally filtered "+ + "by a query matching template name, display name, or description. "+ + "Follow the "+NextStepField+" field in the result. Returns 10 per "+ + "page; fetch next_page only when no listed template fits the request.", func(ctx context.Context, args listTemplatesArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { ctx, err := asOwner(ctx, db, options.OwnerID) if err != nil { @@ -210,47 +178,32 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp } rankTemplates(ranked, query) - selectionHint, recommendedID, recommendationReason := selectTemplateRecommendation( + recommendedID, nextStep := selectTemplateRecommendation( ranked, visibleTemplateCount, signalsErr, ) - // Paginate. page := args.Page if page < 1 { page = 1 } totalCount := len(ranked) - totalPages := (totalCount + listTemplatesPageSize - 1) / listTemplatesPageSize - if totalPages == 0 { - totalPages = 1 - } - start := (page - 1) * listTemplatesPageSize - end := start + listTemplatesPageSize - if start > totalCount { - start = totalCount - } - if end > totalCount { - end = totalCount - } - pageTemplates := ranked[start:end] + start := min((page-1)*listTemplatesPageSize, totalCount) + end := min(start+listTemplatesPageSize, totalCount) - items := make([]map[string]any, 0, len(pageTemplates)) - for _, t := range pageTemplates { - items = append(items, templateItem(t, recommendedID)) + items := make([]map[string]any, 0, end-start) + for _, t := range ranked[start:end] { + items = append(items, templateItem(t)) } result := map[string]any{ - "templates": items, - "count": len(items), - "page": page, - "total_pages": totalPages, - "total_count": totalCount, - "available_template_count": visibleTemplateCount, - "selection_hint": selectionHint, - "recommendation_reason": recommendationReason, - "user_selection_required": userSelectionRequired(selectionHint), + "templates": items, + "page": page, + NextStepField: nextStep, + } + if end < totalCount { + result["next_page"] = page + 1 } if recommendedID != uuid.Nil { result["recommended_template_id"] = recommendedID.String() @@ -288,10 +241,8 @@ func loadTemplateRankingSignals( return signals, nil } - // The templates were already authorized with the owner's permissions by - // GetTemplatesWithFilter. GetTemplateRankingSignalsByOwnerID authorizes the - // owner reading their own workspaces plus a template-metadata read for the - // cross-user popularity count, so no system escalation is needed here. + // Runs with the owner's permissions; no system escalation. See the + // dbauthz GetTemplateRankingSignalsByOwnerID authorization notes. rows, err := db.GetTemplateRankingSignalsByOwnerID(ctx, database.GetTemplateRankingSignalsByOwnerIDParams{ TemplateIDs: templateIDs, OwnerID: ownerID, @@ -317,10 +268,7 @@ func loadTemplateRankingSignals( } // computeAffinityScore folds the raw signals into a single "frecency" score: -// the personal workspace count (active plus a fraction of recently-deleted) -// multiplied by a recency decay, plus a log-scaled organization-popularity -// term. When the user has no in-window usage the personal term is zero and the -// score collapses to organization popularity. +// recency-decayed personal usage plus log-scaled organization popularity. func computeAffinityScore(s templateRankingSignals, now time.Time) float64 { personal := 0.0 if !s.LastUsedAt.IsZero() { @@ -336,9 +284,8 @@ func computeAffinityScore(s templateRankingSignals, now time.Time) float64 { return personal + org } -// rankTemplates orders templates by query relevance first (only when a query is -// present), then by affinity score, with template name and ID as deterministic -// tiebreakers. +// rankTemplates orders by query relevance (when a query is present), then +// affinity score, then name and ID for determinism. func rankTemplates(ranked []rankedTemplate, query string) { slices.SortStableFunc(ranked, func(a, b rankedTemplate) int { if query != "" { @@ -354,79 +301,64 @@ func rankTemplates(ranked []rankedTemplate, query string) { } return cmp.Compare(a.Template.ID.String(), b.Template.ID.String()) }) - - for i := range ranked { - ranked[i].Rank = i + 1 - } } -// selectTemplateRecommendation decides whether to recommend the top-ranked -// template or ask the user to choose. Query relevance is the primary signal: a -// decisive query match recommends on its own. Otherwise confidence comes from -// the affinity score, which must clear a floor and lead the runner-up by a -// margin before recommending. +// selectTemplateRecommendation returns the recommended template (uuid.Nil for +// none) and the next-step instruction. A decisive query match recommends on +// its own; otherwise the affinity score must clear a floor and lead the +// runner-up by a margin. func selectTemplateRecommendation( ranked []rankedTemplate, visibleTemplateCount int, rankingSignalsErr error, -) (string, uuid.UUID, string) { +) (uuid.UUID, string) { if len(ranked) == 0 { - return listTemplatesHintNoConfidence, uuid.Nil, "no_matching_templates" + if visibleTemplateCount == 0 { + return uuid.Nil, NextStepNoTemplates + } + return uuid.Nil, NextStepNoMatches } top := ranked[0] if visibleTemplateCount == 1 && len(ranked) == 1 { - return listTemplatesHintOnlyAvailable, top.Template.ID, "only_available_template" + return top.Template.ID, NextStepUseRecommended } - // A decisive query match (strictly outscoring the runner-up, or the only - // match) is a confident recommendation on its own, even when the affinity - // signals failed to load. + // A decisive query match recommends even when signals failed to load. if top.QueryScore > 0 && (len(ranked) == 1 || top.QueryScore > ranked[1].QueryScore) { - return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) + return top.Template.ID, NextStepUseRecommended } - // Without a decisive query tier the affinity score decides confidence, so an - // unreliable (failed) signal load means we must ask the user. + // Beyond a decisive query match, confidence comes from the affinity + // score, so a failed signal load means asking the user. if rankingSignalsErr != nil { - return listTemplatesHintNoConfidence, uuid.Nil, "ranking_signals_unavailable" + return uuid.Nil, NextStepAskUser } - // Query present but the top two tie on relevance: break the tie with the - // affinity score when the gap is clear, otherwise ask the user. + // Query tie: break it with a clear affinity gap. if top.QueryScore > 0 { if len(ranked) > 1 && affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { - return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) + return top.Template.ID, NextStepUseRecommended } - return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" + return uuid.Nil, NextStepAskUser } - // No query: recommend purely on the affinity score. + // No query: the affinity score alone decides. if !affinityScoreAtLeast(top.AffinityScore, minConfidentAffinityScore) { - if top.AffinityScore <= 0 { - return listTemplatesHintNoConfidence, uuid.Nil, "no_ranking_signal" - } - return listTemplatesHintNoConfidence, uuid.Nil, "weak_ranking_signal" + return uuid.Nil, NextStepAskUser } if len(ranked) > 1 && affinityScoreAtLeast(ranked[1].AffinityScore, minConfidentAffinityScore) && !affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { - return listTemplatesHintAmbiguous, uuid.Nil, "top_templates_are_ambiguous" + return uuid.Nil, NextStepAskUser } - return listTemplatesHintHighConfidence, top.Template.ID, relevanceSignals(top) -} - -func userSelectionRequired(selectionHint string) bool { - return selectionHint == listTemplatesHintAmbiguous || selectionHint == listTemplatesHintNoConfidence + return top.Template.ID, NextStepUseRecommended } -func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { +func templateItem(t rankedTemplate) map[string]any { item := map[string]any{ - "id": t.Template.ID.String(), - "name": t.Template.Name, - "organization_id": t.Template.OrganizationID.String(), - "rank": t.Rank, - "relevance_signals": relevanceSignals(t), + "id": t.Template.ID.String(), + "name": t.Template.Name, } if display := strings.TrimSpace(t.Template.DisplayName); display != "" { item["display_name"] = display @@ -440,35 +372,12 @@ func templateItem(t rankedTemplate, recommendedID uuid.UUID) map[string]any { if t.Signals.ActiveCount > 0 { item["your_workspace_count"] = t.Signals.ActiveCount } - if t.Signals.DeletedRecentCount > 0 { - item["your_recently_deleted_workspace_count"] = t.Signals.DeletedRecentCount - } - if t.Signals.hasPersonalUsage() && !t.Signals.LastUsedAt.IsZero() { + if !t.Signals.LastUsedAt.IsZero() { item["last_used_by_you"] = t.Signals.LastUsedAt.Format(time.RFC3339Nano) } - if t.Template.ID == recommendedID { - item["recommended"] = true - } return item } -func relevanceSignals(t rankedTemplate) string { - hasQuery := t.QueryScore > 0 - hasPersonal := t.Signals.hasPersonalUsage() - switch { - case hasQuery && hasPersonal: - return "matches_query_and_used_by_you" - case hasQuery: - return "matches_query" - case hasPersonal: - return "used_by_you" - case t.Signals.OrgDevs > 0: - return "popular_in_org" - default: - return "ordered_by_name" - } -} - func templateQueryScore(t database.Template, query string) int { query = normalizeTemplateSearch(query) queryCompact := compactTemplateSearch(query) @@ -491,9 +400,7 @@ func templateQueryScore(t database.Template, query string) int { } // nameQueryScore returns the relevance tier of a single name-like field: -// exact match outranks prefix match, which outranks substring match, on -// either the normalized or compact form. Returns 0 when the field does not -// match. +// exact match outranks prefix match, which outranks substring match. func nameQueryScore(field, query, queryCompact string) int { field = normalizeTemplateSearch(field) if field == "" { @@ -521,8 +428,7 @@ func compactTemplateSearch(value string) string { return templateSearchCompactReplacer.Replace(value) } -// asOwner sets up a dbauthz context for the given owner so that -// subsequent database calls are scoped to what that user can access. +// asOwner sets up a dbauthz context scoped to what the owner can access. func asOwner(ctx context.Context, db database.Store, ownerID uuid.UUID) (context.Context, error) { actor, _, err := httpmw.UserRBACSubject(ctx, db, ownerID, rbac.ScopeAll) if err != nil { diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go index 4dc9479608fed..190dbf1ce4eb6 100644 --- a/coderd/x/chatd/chattool/listtemplates_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -54,141 +54,141 @@ func TestSelectTemplateRecommendation(t *testing.T) { loadErr := xerrors.New("signals failed to load") - t.Run("NoMatches", func(t *testing.T) { + t.Run("NoTemplatesAvailable", func(t *testing.T) { t.Parallel() - hint, id, reason := selectTemplateRecommendation(nil, 0, nil) - require.Equal(t, listTemplatesHintNoConfidence, hint) + id, next := selectTemplateRecommendation(nil, 0, nil) require.Equal(t, uuid.Nil, id) - require.Equal(t, "no_matching_templates", reason) + require.Equal(t, NextStepNoTemplates, next) + }) + + t.Run("QueryFiltersEverything", func(t *testing.T) { + t.Parallel() + id, next := selectTemplateRecommendation(nil, 2, nil) + require.Equal(t, uuid.Nil, id) + require.Equal(t, NextStepNoMatches, next) }) t.Run("OnlyAvailable", func(t *testing.T) { t.Parallel() only := uuid.New() - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{{Template: database.Template{ID: only}}}, 1, loadErr, ) - require.Equal(t, listTemplatesHintOnlyAvailable, hint) require.Equal(t, only, id) - require.Equal(t, "only_available_template", reason) + require.Equal(t, NextStepUseRecommended, next) }) t.Run("DecisiveQueryRecommendsEvenWithLoadError", func(t *testing.T) { t.Parallel() top := uuid.New() for _, err := range []error{nil, loadErr} { - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, QueryScore: queryScoreExactName}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, }, 2, err, ) - require.Equal(t, listTemplatesHintHighConfidence, hint) require.Equal(t, top, id) - require.Equal(t, "matches_query", reason) + require.Equal(t, NextStepUseRecommended, next) } }) t.Run("QueryTieBrokenByAffinityGap", func(t *testing.T) { t.Parallel() top := uuid.New() - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, QueryScore: queryScoreNamePrefix, AffinityScore: 10, Signals: templateRankingSignals{ActiveCount: 1}}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, }, 2, nil, ) - require.Equal(t, listTemplatesHintHighConfidence, hint) require.Equal(t, top, id) - require.Equal(t, "matches_query_and_used_by_you", reason) + require.Equal(t, NextStepUseRecommended, next) }) t.Run("QueryTieWithSmallGapIsAmbiguous", func(t *testing.T) { t.Parallel() - hint, id, _ := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0.1}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, }, 2, nil, ) - require.Equal(t, listTemplatesHintAmbiguous, hint) require.Equal(t, uuid.Nil, id) + require.Equal(t, NextStepAskUser, next) }) - t.Run("QueryTieWithLoadErrorIsUnavailable", func(t *testing.T) { + t.Run("QueryTieWithLoadErrorAsksUser", func(t *testing.T) { t.Parallel() - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, }, 2, loadErr, ) - require.Equal(t, listTemplatesHintNoConfidence, hint) require.Equal(t, uuid.Nil, id) - require.Equal(t, "ranking_signals_unavailable", reason) + require.Equal(t, NextStepAskUser, next) }) t.Run("NoQueryNoSignal", func(t *testing.T) { t.Parallel() - hint, _, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}}, {Template: database.Template{ID: uuid.New()}}, }, 2, nil, ) - require.Equal(t, listTemplatesHintNoConfidence, hint) - require.Equal(t, "no_ranking_signal", reason) + require.Equal(t, uuid.Nil, id) + require.Equal(t, NextStepAskUser, next) }) t.Run("NoQueryWeakSignalBelowFloor", func(t *testing.T) { t.Parallel() // One active developer scores ln(2), below the ln(3) floor. - hint, _, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: 0}, }, 2, nil, ) - require.Equal(t, listTemplatesHintNoConfidence, hint) - require.Equal(t, "weak_ranking_signal", reason) + require.Equal(t, uuid.Nil, id) + require.Equal(t, NextStepAskUser, next) }) t.Run("NoQueryConfidentWhenLeadsRunnerUp", func(t *testing.T) { t.Parallel() top := uuid.New() - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, }, 2, nil, ) - require.Equal(t, listTemplatesHintHighConfidence, hint) require.Equal(t, top, id) - require.Equal(t, "popular_in_org", reason) + require.Equal(t, NextStepUseRecommended, next) }) t.Run("NoQueryAmbiguousWhenBothClearFloorAndClose", func(t *testing.T) { t.Parallel() - hint, id, _ := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.20, Signals: templateRankingSignals{OrgDevs: 2}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.15, Signals: templateRankingSignals{OrgDevs: 2}}, }, 2, nil, ) - require.Equal(t, listTemplatesHintAmbiguous, hint) require.Equal(t, uuid.Nil, id) + require.Equal(t, NextStepAskUser, next) }) - t.Run("NoQueryLoadErrorIsUnavailable", func(t *testing.T) { + t.Run("NoQueryLoadErrorAsksUser", func(t *testing.T) { t.Parallel() - hint, id, reason := selectTemplateRecommendation( + id, next := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, {Template: database.Template{ID: uuid.New()}}, }, 2, loadErr, ) - require.Equal(t, listTemplatesHintNoConfidence, hint) require.Equal(t, uuid.Nil, id) - require.Equal(t, "ranking_signals_unavailable", reason) + require.Equal(t, NextStepAskUser, next) }) } diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index f957cb5e9e73c..52cf119f335a2 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -85,8 +85,7 @@ func TestListTemplates_OrganizationFilter(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) templates := result["templates"].([]any) require.Len(t, templates, 2) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, "no_ranking_signal", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepAskUser, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) }) @@ -166,35 +165,29 @@ func TestListTemplates_QueryMatchesDisplayNameAndDescription(t *testing.T) { templates := listTemplateItems(t, result) require.Len(t, templates, 1) require.Equal(t, displayTemplate.ID.String(), templates[0]["id"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) - require.Equal(t, false, result["user_selection_required"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, displayTemplate.ID.String(), result["recommended_template_id"]) - require.Equal(t, "matches_query", templates[0]["relevance_signals"]) result = runListTemplates(ctx, t, tool, `{"query":"TypeScript"}`) templates = listTemplateItems(t, result) require.Len(t, templates, 1) require.Equal(t, descriptionTemplate.ID.String(), templates[0]["id"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, descriptionTemplate.ID.String(), result["recommended_template_id"]) result = runListTemplates(ctx, t, tool, `{"query":"-"}`) templates = listTemplateItems(t, result) require.Empty(t, templates) - require.Equal(t, float64(0), result["total_count"]) - require.Equal(t, float64(3), result["available_template_count"]) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, true, result["user_selection_required"]) - require.Equal(t, "no_matching_templates", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepNoMatches, result["next_step"]) + _, ok := result["recommended_template_id"] + require.False(t, ok) result = runListTemplates(ctx, t, tool, `{"query":"does-not-exist"}`) templates = listTemplateItems(t, result) require.Empty(t, templates) - require.Equal(t, float64(0), result["total_count"]) - require.Equal(t, float64(3), result["available_template_count"]) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, true, result["user_selection_required"]) - require.Equal(t, "no_matching_templates", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepNoMatches, result["next_step"]) + _, ok = result["recommended_template_id"] + require.False(t, ok) } func TestListTemplates_QueryScoreTiers(t *testing.T) { @@ -298,22 +291,20 @@ func TestListTemplates_RanksAllCandidatesBeforePagination(t *testing.T) { result := runListTemplates(ctx, t, tool, `{}`) templates := listTemplateItems(t, result) require.Len(t, templates, 10) - require.Equal(t, float64(11), result["total_count"]) - require.Equal(t, float64(2), result["total_pages"]) + require.Equal(t, float64(1), result["page"]) + require.Equal(t, float64(2), result["next_page"]) require.Equal(t, target.ID.String(), templates[0]["id"]) - require.Equal(t, float64(1), templates[0]["rank"]) require.Equal(t, float64(1), templates[0]["your_workspace_count"]) require.NotEmpty(t, templates[0]["last_used_by_you"]) - require.Equal(t, true, templates[0]["recommended"]) - require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, target.ID.String(), result["recommended_template_id"]) result = runListTemplates(ctx, t, tool, `{"page":2}`) templates = listTemplateItems(t, result) require.Len(t, templates, 1) require.Equal(t, float64(2), result["page"]) - require.Equal(t, float64(11), templates[0]["rank"]) + _, hasNextPage := result["next_page"] + require.False(t, hasNextPage) } func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { @@ -354,9 +345,7 @@ func TestListTemplates_QueryRelevanceOutranksPersonalUsage(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, target.ID.String(), templates[0]["id"]) require.Equal(t, used.ID.String(), templates[1]["id"]) - require.Equal(t, "matches_query", templates[0]["relevance_signals"]) - require.Equal(t, "matches_query_and_used_by_you", templates[1]["relevance_signals"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, target.ID.String(), result["recommended_template_id"]) } @@ -396,8 +385,7 @@ func TestListTemplates_PersonalUsageBreaksEqualQueryScoreTie(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, used.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) - require.Equal(t, "matches_query_and_used_by_you", templates[0]["relevance_signals"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, used.ID.String(), result["recommended_template_id"]) } @@ -445,8 +433,7 @@ func TestListTemplates_OrgPopularityFallback(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, popular.ID.String(), templates[0]["id"]) require.Equal(t, float64(2), templates[0]["active_developers"]) - require.Equal(t, "popular_in_org", templates[0]["relevance_signals"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, popular.ID.String(), result["recommended_template_id"]) } @@ -487,8 +474,7 @@ func TestListTemplates_WeakOrgPopularityDoesNotRecommend(t *testing.T) { require.Equal(t, usedByOne.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) require.Equal(t, float64(1), templates[0]["active_developers"]) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepAskUser, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) } @@ -533,12 +519,10 @@ func TestListTemplates_StalePersonalUsageDoesNotRecommend(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, oldUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) - // The 180-day-old workspace is outside the 60-day lookback window, so it no - // longer counts as in-window personal usage. + // 180 days old is outside the 60-day lookback window. _, hasCount := templates[0]["your_workspace_count"] require.False(t, hasCount) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepAskUser, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) } @@ -567,9 +551,7 @@ func TestListTemplates_StaleFrequentPersonalUsageDoesNotRecommend(t *testing.T) CreatedBy: user.ID, Name: "unused", }) - // Two workspaces used 180 days ago. Frequency no longer dominates recency: - // usage outside the lookback window decays out of the personal signal, so a - // frequently-but-stalely-used template is no longer a confident match. + // Stale usage decays out of the personal signal despite its frequency. for range 2 { dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, @@ -588,12 +570,9 @@ func TestListTemplates_StaleFrequentPersonalUsageDoesNotRecommend(t *testing.T) require.Len(t, templates, 2) require.Equal(t, staleUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, "weak_ranking_signal", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepAskUser, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) - // The stale workspaces fall outside the lookback window, so no in-window - // personal count is surfaced. _, hasCount := templates[0]["your_workspace_count"] require.False(t, hasCount) } @@ -622,8 +601,7 @@ func TestListTemplates_RecentPersonalUsageRecommends(t *testing.T) { CreatedBy: user.ID, Name: "unused", }) - // Two workspaces used two days ago. Recent, in-window usage is a confident - // signal: this is the frecency improvement over the old count-only ranking. + // Recent in-window usage is a confident signal. for range 2 { dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, @@ -643,8 +621,7 @@ func TestListTemplates_RecentPersonalUsageRecommends(t *testing.T) { require.Equal(t, recentUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) require.Equal(t, float64(2), templates[0]["your_workspace_count"]) - require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) - require.Equal(t, "high_confidence_recommendation", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, recentUsage.ID.String(), result["recommended_template_id"]) } @@ -689,8 +666,6 @@ func TestListTemplates_DeletedRecentPersonalUsageShowsEvidence(t *testing.T) { require.Len(t, templates, 2) require.Equal(t, deletedUsage.ID.String(), templates[0]["id"]) require.Equal(t, unused.ID.String(), templates[1]["id"]) - require.Equal(t, "used_by_you", templates[0]["relevance_signals"]) - require.Equal(t, float64(1), templates[0]["your_recently_deleted_workspace_count"]) require.NotEmpty(t, templates[0]["last_used_by_you"]) _, hasActiveCount := templates[0]["your_workspace_count"] require.False(t, hasActiveCount) @@ -724,11 +699,9 @@ func TestListTemplates_AmbiguousTopMatches(t *testing.T) { result := runListTemplates(ctx, t, tool, `{"query":"go"}`) templates := listTemplateItems(t, result) require.Len(t, templates, 2) - require.Equal(t, "ambiguous_top_matches", result["selection_hint"]) + require.Equal(t, chattool.NextStepAskUser, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) - _, ok = templates[0]["recommended"] - require.False(t, ok) } //nolint:tparallel,paralleltest // Subtests share a single DB and run sequentially. @@ -797,10 +770,8 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { require.Len(t, templates, 1) m := templates[0].(map[string]any) require.Equal(t, t1.ID.String(), m["id"].(string)) - require.Equal(t, "only_available_template", result["selection_hint"]) + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) require.Equal(t, t1.ID.String(), result["recommended_template_id"]) - require.Equal(t, true, m["recommended"]) - require.Equal(t, float64(1), m["rank"]) }) t.Run("NoMatches", func(t *testing.T) { @@ -815,8 +786,7 @@ func TestTemplateAllowlistEnforcement(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(resp.Content), &result)) templates := result["templates"].([]any) require.Empty(t, templates) - require.Equal(t, "no_confident_match", result["selection_hint"]) - require.Equal(t, "no_matching_templates", result["recommendation_reason"]) + require.Equal(t, chattool.NextStepNoTemplates, result["next_step"]) _, ok := result["recommended_template_id"] require.False(t, ok) }) @@ -953,8 +923,7 @@ func TestGetTemplateRankingSignalsByOwnerID(t *testing.T) { OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, LastUsedAt: deletedLastUsedAt, Deleted: true, }) - // Non-deleted but outside the lookback window: it must not count toward the - // in-window active count, though it still keeps the user in the org count. + // Outside the lookback window: excluded from in-window counts, still an org dev. dbgen.Workspace(t, db, database.WorkspaceTable{ OwnerID: user.ID, OrganizationID: org.ID, TemplateID: used.ID, LastUsedAt: now.Add(-90 * 24 * time.Hour), diff --git a/coderd/x/chatd/chattool/readtemplate.go b/coderd/x/chatd/chattool/readtemplate.go index b790e2c4d1c17..984f0f5ba05c1 100644 --- a/coderd/x/chatd/chattool/readtemplate.go +++ b/coderd/x/chatd/chattool/readtemplate.go @@ -19,7 +19,7 @@ type ReadTemplateOptions struct { } type readTemplateArgs struct { - TemplateID string `json:"template_id" description:"The UUIDv4 of the template to read details for. Obtain this from list_templates recommended_template_id or a ranked template."` + TemplateID string `json:"template_id" description:"The UUIDv4 of the template to read details for. Obtain this from list_templates."` } // ReadTemplate returns a tool that retrieves details about a specific diff --git a/coderd/x/chatd/prompt.go b/coderd/x/chatd/prompt.go index d5ccedd7c3157..76b6940448659 100644 --- a/coderd/x/chatd/prompt.go +++ b/coderd/x/chatd/prompt.go @@ -1,5 +1,7 @@ package chatd +import "github.com/coder/coder/v2/coderd/x/chatd/chattool" + const defaultSystemPromptPlanPathBlockPlaceholder = "{{CODER_CHAT_PLAN_FILE_PATH_BLOCK}}" const workspaceAttachedAwareness = "This chat is attached to a workspace. You can use workspace tools like execute, read_file, write_file, etc." @@ -9,7 +11,7 @@ Do not create or start a workspace by default. Many requests can be completed us Workspace tools such as execute, read_file, write_file, and edit_files require an attached workspace.` const workspaceDetachedAwareness = workspaceDetachedAwarenessBase + ` Only call create_workspace or start_workspace when the user explicitly asks for a workspace-backed task, or when the task cannot be completed without inspecting, editing, or running files in a workspace. -If a workspace is needed, use list_templates before create_workspace. If list_templates returns user_selection_required or a no_confident_match or ambiguous_top_matches selection_hint, ask the user to choose before create_workspace. Call read_template only when you need template parameter or preset details.` +If a workspace is needed, use list_templates before create_workspace and follow its ` + chattool.NextStepField + `. Call read_template only when you need template parameter or preset details.` const workspaceDetachedNoCreateAwareness = workspaceDetachedAwarenessBase + ` This delegated chat cannot create or start a workspace. If workspace-backed work is required, report that need to the parent agent instead of trying workspace tools.` @@ -106,11 +108,8 @@ Ask the minimum number of questions needed to define the scope together. When no workspace is attached and you need to create one: -- Call list_templates with concise search terms from the user's task when the task suggests a language, framework, image, or environment. -- Treat recommended_template_id, or rank 1 when selection_hint is only_available_template or high_confidence_recommendation, as the default template unless the user asked for a different template. -- If user_selection_required is true, or selection_hint is no_confident_match or ambiguous_top_matches, do not call create_workspace. Ask the user to choose a template unless the user already explicitly selected one. -- Do not paginate unless selection_hint is ambiguous_top_matches or no_confident_match, no returned template fits the request, or the user asked to browse or compare templates. -- Call read_template before create_workspace when you need parameter names, required parameter values, or preset IDs. Otherwise use create_workspace with the selected template_id and defaults. +- Call list_templates with concise search terms from the user's task, then follow its ` + chattool.NextStepField + `: use the recommended template, or ask the user to choose when none is recommended. +- Call read_template only when you need parameter or preset details before create_workspace. diff --git a/docs/ai-coder/agents/tools/index.md b/docs/ai-coder/agents/tools/index.md new file mode 100644 index 0000000000000..2febdeed04b2a --- /dev/null +++ b/docs/ai-coder/agents/tools/index.md @@ -0,0 +1,156 @@ +# Tools + +Coder Agents completes work by calling tools: structured functions the agent +invokes during a chat to gather context and take action, such as listing +templates, reading files, running commands, or creating a workspace. + +This page explains how tool calls work and documents the tools the agent uses +to select a template and create a workspace. + +## How tool calls work + +Each turn of a conversation follows the same loop: + +1. You send a message. +2. The model decides whether it needs a tool and calls it with structured + arguments. For example, `list_templates` with `{"query": "docker"}`. +3. Coder executes the tool in the control plane using your identity and + permissions, and returns a JSON result to the model. +4. The model uses the result to decide what to do next: call another tool or + reply to you. + +Tool calls and their results are visible in the chat timeline, so you can +always inspect what the agent did and why. + +Two properties hold for every built-in tool: + +- **Your permissions apply.** Tools run with the chat owner's RBAC identity. + The agent cannot list templates or create workspaces that you could not + access yourself. +- **Results carry instructions.** Where a decision matters, the tool result + includes a `next_step` field containing a fixed instruction for the agent, + such as asking you to choose between similar templates. This keeps agent + behavior consistent without relying on long prompt rules. + +## Workspace creation tools + +A chat starts without a workspace, and many requests are answered without +one. When the agent needs compute (to read files, run commands, or edit +code), it provisions a workspace using three tools: + +| Tool | Purpose | +|--------------------|-----------------------------------------------------------| +| `list_templates` | Rank available templates and recommend one when confident | +| `read_template` | Read a template's parameters and presets | +| `create_workspace` | Create the workspace from a chosen template | + +Administrators can restrict which templates these tools can see with the +[template allowlist](../platform-controls/template-optimization.md#restrict-available-templates). + +### list_templates + +`list_templates` returns a ranked shortlist of the active, non-deprecated +templates in the chat's organization, so the agent can pick a template the +same way a colleague would: prefer what matches the request, what you already +use, and what the rest of the organization uses. + +#### Arguments + +| Argument | Type | Description | +|----------|---------|-------------------------------------------------------------------------------| +| `query` | string | Optional text matched against template names, display names, and descriptions | +| `page` | integer | Optional page number, starting at 1. Each page holds 10 templates | + +#### How templates are ranked + +Templates are ordered by three signals, in priority order: + +1. **Query relevance.** When a query is provided, an exact name match ranks + above a name prefix match, then a name substring match, then a description + match. Matching is case-insensitive and ignores spaces, hyphens, and + underscores, so `python gpu` matches `python-gpu`. Templates that do not + match the query at all are excluded. +2. **Your recent usage.** Workspaces you used in the last 60 days count + toward a template's score. Recent usage counts more than old usage (the + weight halves every 14 days), and recently deleted workspaces count at + reduced weight. +3. **Organization popularity.** The number of developers with an active + workspace on the template. Prebuilt workspaces that have not been claimed + are excluded so they do not inflate popularity. + +#### Recommendation and next_step + +Beyond the ranked list, the result tells the agent what to do next: + +- `recommended_template_id` is present only when the top template is a clear + winner: it is the only available template, it decisively matches the query, + or your usage and organization usage clearly favor it over the runner-up. +- `next_step` is always present and contains one of four fixed instructions: + +| Situation | `next_step` instruction for the agent | +|-------------------------------------|-------------------------------------------------------| +| A template is recommended | Use `recommended_template_id` with `create_workspace` | +| Top templates are too close to call | Ask you to choose a template | +| The query matched nothing | Retry without a query or ask you | +| No templates are available | Inform you that no templates are available | + +When the agent is told to ask, it presents the choices instead of guessing. +You can always override the recommendation by naming a template yourself. + +#### Result + +```json +{ + "templates": [ + { + "id": "0f9ab36e-43f6-4d8a-b3e6-6803d9a06f72", + "name": "docker", + "display_name": "Docker", + "description": "Provision Docker containers as Coder workspaces.", + "active_developers": 14, + "your_workspace_count": 2, + "last_used_by_you": "2026-06-09T10:04:18.123456Z" + } + ], + "page": 1, + "recommended_template_id": "0f9ab36e-43f6-4d8a-b3e6-6803d9a06f72", + "next_step": "Use recommended_template_id with create_workspace. Call read_template first only if you need parameter or preset details." +} +``` + +Each template includes evidence for its position: `active_developers`, +`your_workspace_count`, and `last_used_by_you` appear when they are non-zero. +`next_page` is present only when more results exist. + +### read_template + +`read_template` reads one template's details: its configurable parameters +(name, type, default, options, and validation rules) and its presets, +including preset parameter values and whether prebuilt workspaces are +available for faster startup. + +The agent calls it only when needed, typically when a template has required +parameters or when a preset should be applied. For templates that work with +defaults, the agent skips straight to `create_workspace`. + +### create_workspace + +`create_workspace` provisions a workspace from a template and waits for it to +become ready, then attaches it to the chat. + +| Argument | Type | Description | +|---------------|--------|------------------------------------------------------------------------------------| +| `template_id` | string | Required. The template UUID from `list_templates` | +| `name` | string | Optional workspace name. One is generated if omitted | +| `parameters` | object | Optional template parameter values from `read_template` | +| `preset_id` | string | Optional preset UUID. Applies preset parameters and may claim a prebuilt workspace | + +Guardrails: + +- The agent is instructed to create a workspace only when the task requires + one or when you explicitly ask for it, and to follow the `next_step` from + `list_templates`, which means asking you first when no template was + recommended. +- The tool is idempotent: if the chat already has a workspace building or + running, that workspace is returned instead of creating a duplicate. +- Templates outside the administrator's allowlist are rejected. diff --git a/docs/manifest.json b/docs/manifest.json index b6137b8c34b25..ce1ce4a211efc 100644 --- a/docs/manifest.json +++ b/docs/manifest.json @@ -1024,6 +1024,12 @@ "path": "./ai-coder/agents/models.md", "state": ["beta"] }, + { + "title": "Tools", + "description": "How Coder Agents uses tool calls to select templates and create workspaces", + "path": "./ai-coder/agents/tools/index.md", + "state": ["beta"] + }, { "title": "Platform Controls", "description": "How platform teams control agent behavior, models, and policies", From 19ce6aac42b3bfb0d8bbbefe1ca6ed4635142edc Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 11 Jun 2026 13:09:23 +0000 Subject: [PATCH 15/21] test(coderd): cover template recommendation edge cases from review Asserts the recommendation fires for recent deleted-only personal usage (CRF-28), adds the both-above-floor-with-large-gap recommendation case (CRF-29), and rewords the affinity-score comments to drop the inaccurate sqlc fragility claim; the score lives in Go so the ranking policy and its confidence thresholds stay in one place. --- coderd/database/querier.go | 4 ++-- coderd/database/queries.sql.go | 4 ++-- coderd/database/queries/workspaces.sql | 4 ++-- coderd/x/chatd/chattool/listtemplates.go | 6 ++---- .../x/chatd/chattool/listtemplates_internal_test.go | 13 +++++++++++++ coderd/x/chatd/chattool/listtemplates_test.go | 3 +++ 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 05834833e4c60..382fe5eb528d4 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -729,8 +729,8 @@ type sqlcQuerier interface { // GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for // one owner: in-window active and recently-deleted workspace counts, the last // in-window usage, and distinct active developers per template. The affinity - // score is computed in Go (see listtemplates.go) because sqlc type inference - // is fragile around complex parameterized expressions. + // score is computed in Go (see listtemplates.go) so the ranking policy and + // its confidence thresholds live in one place. GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 6fb24b3007095..18bf1f862ec88 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -35115,8 +35115,8 @@ type GetTemplateRankingSignalsByOwnerIDRow struct { // GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for // one owner: in-window active and recently-deleted workspace counts, the last // in-window usage, and distinct active developers per template. The affinity -// score is computed in Go (see listtemplates.go) because sqlc type inference -// is fragile around complex parameterized expressions. +// score is computed in Go (see listtemplates.go) so the ranking policy and +// its confidence thresholds live in one place. func (q *sqlQuerier) GetTemplateRankingSignalsByOwnerID(ctx context.Context, arg GetTemplateRankingSignalsByOwnerIDParams) ([]GetTemplateRankingSignalsByOwnerIDRow, error) { rows, err := q.db.QueryContext(ctx, getTemplateRankingSignalsByOwnerID, pq.Array(arg.TemplateIDs), diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 7d6c64aaba36e..af5e38acf652c 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -501,8 +501,8 @@ GROUP BY templates.id; -- GetTemplateRankingSignalsByOwnerID returns raw template-ranking signals for -- one owner: in-window active and recently-deleted workspace counts, the last -- in-window usage, and distinct active developers per template. The affinity --- score is computed in Go (see listtemplates.go) because sqlc type inference --- is fragile around complex parameterized expressions. +-- score is computed in Go (see listtemplates.go) so the ranking policy and +-- its confidence thresholds live in one place. WITH org_usage AS ( -- Distinct developers with a non-deleted workspace; the prebuilds system -- user is excluded so unclaimed prebuilds do not inflate popularity. diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 7cfca268e66b7..09828b0b6b1b3 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -30,10 +30,8 @@ const ( listTemplatesMinActiveDevelopersForRecommendation = 2 // Affinity ("frecency") parameters: recency-decayed personal usage plus - // log-scaled organization popularity. Computed in Go rather than SQL so - // the score and its confidence thresholds share float semantics, and - // because sqlc type inference is fragile around complex parameterized - // expressions. + // log-scaled organization popularity. The score is computed in Go so the + // ranking policy and its confidence thresholds live in one place. listTemplatesLookbackDays = 60 listTemplatesHalfLife = 14 * 24 * time.Hour listTemplatesPersonalWeight = 10.0 diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go index 190dbf1ce4eb6..af7674e1f9194 100644 --- a/coderd/x/chatd/chattool/listtemplates_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -180,6 +180,19 @@ func TestSelectTemplateRecommendation(t *testing.T) { require.Equal(t, NextStepAskUser, next) }) + t.Run("NoQueryConfidentWhenBothClearFloorWithLargeGap", func(t *testing.T) { + t.Parallel() + top := uuid.New() + id, next := selectTemplateRecommendation( + []rankedTemplate{ + {Template: database.Template{ID: top}, AffinityScore: 2.0, Signals: templateRankingSignals{OrgDevs: 6}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.2, Signals: templateRankingSignals{OrgDevs: 2}}, + }, 2, nil, + ) + require.Equal(t, top, id) + require.Equal(t, NextStepUseRecommended, next) + }) + t.Run("NoQueryLoadErrorAsksUser", func(t *testing.T) { t.Parallel() id, next := selectTemplateRecommendation( diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 52cf119f335a2..060589ba90363 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -669,6 +669,9 @@ func TestListTemplates_DeletedRecentPersonalUsageShowsEvidence(t *testing.T) { require.NotEmpty(t, templates[0]["last_used_by_you"]) _, hasActiveCount := templates[0]["your_workspace_count"] require.False(t, hasActiveCount) + // Recent deleted usage alone clears the confidence floor. + require.Equal(t, chattool.NextStepUseRecommended, result["next_step"]) + require.Equal(t, deletedUsage.ID.String(), result["recommended_template_id"]) } func TestListTemplates_AmbiguousTopMatches(t *testing.T) { From fcec96bce4bf1e55c4e91dff33393864755d314a Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 12 Jun 2026 09:59:21 +0000 Subject: [PATCH 16/21] docs: add list_templates request and response example Shows a concrete query and ranked two-template response in the agent tools doc, and clarifies that templates are scored independently with only raw evidence fields returned. --- docs/ai-coder/agents/tools/index.md | 36 ++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/docs/ai-coder/agents/tools/index.md b/docs/ai-coder/agents/tools/index.md index 2febdeed04b2a..ad88e78c5dc0b 100644 --- a/docs/ai-coder/agents/tools/index.md +++ b/docs/ai-coder/agents/tools/index.md @@ -78,6 +78,12 @@ Templates are ordered by three signals, in priority order: workspace on the template. Prebuilt workspaces that have not been claimed are excluded so they do not inflate popularity. +Each template is scored independently from its own signals; one template's +usage never affects another's score. The score is internal and only +determines ordering and the recommendation. The response carries the raw +evidence instead: `active_developers`, `your_workspace_count`, and +`last_used_by_you` appear on each template when they are non-zero. + #### Recommendation and next_step Beyond the ranked list, the result tells the agent what to do next: @@ -97,7 +103,20 @@ Beyond the ranked list, the result tells the agent what to do next: When the agent is told to ask, it presents the choices instead of guessing. You can always override the recommendation by naming a template yourself. -#### Result +#### Example + +A user asks for "a workspace to debug a Docker image". The agent extracts a +search term and calls `list_templates` with: + +```json +{ + "query": "docker" +} +``` + +The response lists matching templates best-first. Here `docker` is an exact +name match while `docker-gpu` is only a prefix match, so the top template is +a clear winner and is recommended: ```json { @@ -110,6 +129,13 @@ You can always override the recommendation by naming a template yourself. "active_developers": 14, "your_workspace_count": 2, "last_used_by_you": "2026-06-09T10:04:18.123456Z" + }, + { + "id": "8d2cf2a1-55b0-4c4e-9a3f-41be8a5cd1f0", + "name": "docker-gpu", + "display_name": "Docker GPU", + "description": "Docker workspaces with NVIDIA GPU access.", + "active_developers": 3 } ], "page": 1, @@ -118,8 +144,12 @@ You can always override the recommendation by naming a template yourself. } ``` -Each template includes evidence for its position: `active_developers`, -`your_workspace_count`, and `last_used_by_you` appear when they are non-zero. +The agent follows `next_step` and calls `create_workspace` with the +recommended template. Had the two templates tied (for example, both plain +substring matches with similar usage), the response would omit +`recommended_template_id` and `next_step` would instruct the agent to ask +you to choose. + `next_page` is present only when more results exist. ### read_template From 9e385920d4d5b100f82c1f19ac8b81f64bc8b681 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 12 Jun 2026 10:06:21 +0000 Subject: [PATCH 17/21] docs: state the exact deleted-workspace weight in template ranking --- docs/ai-coder/agents/tools/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ai-coder/agents/tools/index.md b/docs/ai-coder/agents/tools/index.md index ad88e78c5dc0b..3560994f61e84 100644 --- a/docs/ai-coder/agents/tools/index.md +++ b/docs/ai-coder/agents/tools/index.md @@ -72,8 +72,8 @@ Templates are ordered by three signals, in priority order: match the query at all are excluded. 2. **Your recent usage.** Workspaces you used in the last 60 days count toward a template's score. Recent usage counts more than old usage (the - weight halves every 14 days), and recently deleted workspaces count at - reduced weight. + weight halves every 14 days), and a recently deleted workspace counts + half as much as an active one. 3. **Organization popularity.** The number of developers with an active workspace on the template. Prebuilt workspaces that have not been claimed are excluded so they do not inflate popularity. From 4f6ac741cc2bc7752a51cb5a3ab541e917a6bc35 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Fri, 12 Jun 2026 10:11:06 +0000 Subject: [PATCH 18/21] docs: detail query relevance tiers and the affinity formula --- docs/ai-coder/agents/tools/index.md | 75 ++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/docs/ai-coder/agents/tools/index.md b/docs/ai-coder/agents/tools/index.md index 3560994f61e84..f4eab0cba7e65 100644 --- a/docs/ai-coder/agents/tools/index.md +++ b/docs/ai-coder/agents/tools/index.md @@ -63,34 +63,65 @@ use, and what the rest of the organization uses. #### How templates are ranked -Templates are ordered by three signals, in priority order: - -1. **Query relevance.** When a query is provided, an exact name match ranks - above a name prefix match, then a name substring match, then a description - match. Matching is case-insensitive and ignores spaces, hyphens, and - underscores, so `python gpu` matches `python-gpu`. Templates that do not - match the query at all are excluded. -2. **Your recent usage.** Workspaces you used in the last 60 days count - toward a template's score. Recent usage counts more than old usage (the - weight halves every 14 days), and a recently deleted workspace counts - half as much as an active one. -3. **Organization popularity.** The number of developers with an active - workspace on the template. Prebuilt workspaces that have not been claimed - are excluded so they do not inflate popularity. - -Each template is scored independently from its own signals; one template's -usage never affects another's score. The score is internal and only -determines ordering and the recommendation. The response carries the raw -evidence instead: `active_developers`, `your_workspace_count`, and -`last_used_by_you` appear on each template when they are non-zero. +Templates are ordered by query relevance first (when a query is provided), +then by an affinity score computed from your recent usage and organization +popularity. Name and ID break any remaining ties so the order is stable. + +##### Query relevance tiers + +When a query is provided, each template receives the highest tier that any +of its fields matches: + +| Tier | Match | +|------|-----------------------------------------------------| +| 4 | The name or display name equals the query | +| 3 | The name or display name starts with the query | +| 2 | The name or display name contains the query | +| 1 | The description contains the query | +| 0 | No match. The template is excluded from the results | + +The description is checked only when neither the name nor the display name +matched. Matching is case-insensitive and ignores spaces, hyphens, and +underscores, so `python gpu` matches `python-gpu`. A higher tier always +outranks a lower tier, regardless of usage. + +##### Affinity score + +Within a relevance tier, or when no query is given, templates are ordered by +an affinity score: + +```text +affinity = 10 x (active + 0.5 x deleted) x 0.5^(days_since_last_use / 14) + + ln(1 + active_developers) +``` + +- `active`: your workspaces on the template used in the last 60 days. +- `deleted`: your recently deleted workspaces used in the last 60 days, + counted at half the weight of active ones. +- `days_since_last_use`: days since you last used the template. The personal + term halves every 14 days and is zero when you have no usage in the last + 60 days. +- `active_developers`: developers in the organization with an active + workspace on the template, excluding unclaimed prebuilt workspaces. + +Personal usage carries ten times the weight of organization popularity, and +popularity grows logarithmically so heavily used templates do not drown out +your own history. Each template is scored independently from its own +signals; one template's usage never affects another's score. The score is +internal and only determines ordering and the recommendation. The response +carries the raw evidence instead: `active_developers`, +`your_workspace_count`, and `last_used_by_you` appear on each template when +they are non-zero. #### Recommendation and next_step Beyond the ranked list, the result tells the agent what to do next: - `recommended_template_id` is present only when the top template is a clear - winner: it is the only available template, it decisively matches the query, - or your usage and organization usage clearly favor it over the runner-up. + winner: it is the only available template, it matches the query at a + strictly higher tier than every other match, or its affinity score reaches + at least the score of two active developers and leads the runner-up by a + clear margin. - `next_step` is always present and contains one of four fixed instructions: | Situation | `next_step` instruction for the agent | From 3017d21d08cdbf55ef855a95919a262024a6d969 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Wed, 17 Jun 2026 13:01:29 +0000 Subject: [PATCH 19/21] feat(coderd/x/chatd): add list_templates ranking telemetry Add observability for the list_templates ranking decision and its downstream acceptance. Tier 1: a structured decision log per call with correlation IDs, ranking inputs (scores, gap, thresholds), outcome, and reason. The raw query text is never logged, only its presence and length. Tier 2: Prometheus metrics for recommendation outcome, signal-load failures, and the affinity gap between the top two candidates. Tier 3: an in-memory, best-effort RecommendationTracker correlates a recommendation with the template create_workspace later builds, recorded as template_recommendation_followup_total plus a log. The affinity score stays internal to the response; the telemetry surfaces it in logs and metrics without exposing it to the model. Regenerates the Prometheus metrics docs for the new metrics. --- coderd/x/chatd/chatd.go | 12 +- coderd/x/chatd/chatloop/metrics.go | 70 +++++++ coderd/x/chatd/chattool/createworkspace.go | 41 +++- .../chattool/createworkspace_internal_test.go | 65 ++++++ coderd/x/chatd/chattool/listtemplates.go | 193 ++++++++++++++++-- .../chattool/listtemplates_internal_test.go | 26 +-- .../listtemplates_telemetry_internal_test.go | 189 +++++++++++++++++ coderd/x/chatd/chattool/listtemplates_test.go | 55 +++++ .../x/chatd/chattool/recommendationtracker.go | 170 +++++++++++++++ .../recommendationtracker_internal_test.go | 110 ++++++++++ docs/admin/integrations/prometheus.md | 4 + scripts/metricsdocgen/generated_metrics | 12 ++ 12 files changed, 917 insertions(+), 30 deletions(-) create mode 100644 coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go create mode 100644 coderd/x/chatd/chattool/recommendationtracker.go create mode 100644 coderd/x/chatd/chattool/recommendationtracker_internal_test.go diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index 0a16dc874f526..97482ce66004b 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -204,7 +204,11 @@ type Server struct { chatWorker *chatWorker messagePartBuffer *messagepartbuffer.Buffer streamSyncPoller *streamSyncPoller - recordingSem chan struct{} + // templateRecommendations correlates a list_templates recommendation with + // the template a later create_workspace builds, for acceptance telemetry. + // In-memory and best-effort (see chattool.RecommendationTracker). + templateRecommendations *chattool.RecommendationTracker + recordingSem chan struct{} aibridgeTransportFactory *atomic.Pointer[aibridge.TransportFactory] aiGatewayRoutingEnabled bool @@ -3445,6 +3449,7 @@ func New(ps pubsub.Pubsub, cfg Config) *Server { panic("chatd: create chat worker: " + err.Error()) } p.chatWorker = chatWorker + p.templateRecommendations = chattool.NewRecommendationTracker(clk, 0, 0) //nolint:gocritic // The chat processor uses a scoped chatd context. ctx = dbauthz.AsChatd(ctx) @@ -4193,6 +4198,9 @@ func (p *Server) appendRootChatTools( Logger: p.logger, Clock: p.clock, AllowedTemplateIDs: p.chatTemplateAllowlist, + ChatID: opts.chat.ID, + Metrics: p.metrics, + Recommendations: p.templateRecommendations, }), chattool.ReadTemplate(p.db, opts.chat.OrganizationID, chattool.ReadTemplateOptions{ OwnerID: opts.chat.OwnerID, @@ -4207,6 +4215,8 @@ func (p *Server) appendRootChatTools( OnChatUpdated: onChatUpdated, Logger: p.logger, AllowedTemplateIDs: p.chatTemplateAllowlist, + Metrics: p.metrics, + Recommendations: p.templateRecommendations, }), chattool.StartWorkspace(p.db, opts.chat.ID, chattool.StartWorkspaceOptions{ OwnerID: opts.chat.OwnerID, diff --git a/coderd/x/chatd/chatloop/metrics.go b/coderd/x/chatd/chatloop/metrics.go index 6f13663017b97..ac58bc559e7cc 100644 --- a/coderd/x/chatd/chatloop/metrics.go +++ b/coderd/x/chatd/chatloop/metrics.go @@ -38,6 +38,12 @@ type Metrics struct { StepsTotal *prometheus.CounterVec StreamRetriesTotal *prometheus.CounterVec StreamBufferDroppedTotal prometheus.Counter + + // list_templates ranking telemetry. + ListTemplatesOutcomeTotal *prometheus.CounterVec + ListTemplatesSignalsFailuresTotal prometheus.Counter + ListTemplatesAffinityGap *prometheus.HistogramVec + TemplateRecommendationFollowupTotal *prometheus.CounterVec } // NewMetrics creates a new Metrics instance registered with the @@ -109,6 +115,31 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { Name: "stream_buffer_dropped_total", Help: "Number of chat stream buffer events dropped due to the per-chat buffer cap.", }), + ListTemplatesOutcomeTotal: factory.NewCounterVec(prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "list_templates_outcome_total", + Help: "Total list_templates calls by recommendation outcome (recommended, ask_user, no_matches, no_templates).", + }, []string{"outcome"}), + ListTemplatesSignalsFailuresTotal: factory.NewCounter(prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "list_templates_signals_failures_total", + Help: "Total list_templates calls where ranking signals failed to load, degrading the result toward asking the user.", + }), + ListTemplatesAffinityGap: factory.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "list_templates_affinity_gap", + Help: "Affinity score gap between the top two candidates when affinity is the deciding signal, labeled by whether a recommendation was made.", + Buckets: prometheus.ExponentialBuckets(0.1, 2, 9), // 0.1 .. 25.6 + }, []string{"recommended"}), + TemplateRecommendationFollowupTotal: factory.NewCounterVec(prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "template_recommendation_followup_total", + Help: "Total create_workspace calls by how the chosen template related to the prior list_templates recommendation.", + }, []string{"outcome"}), } } @@ -165,6 +196,45 @@ func (m *Metrics) RecordToolError(provider, model, toolLabel string) { m.ToolErrorsTotal.WithLabelValues(provider, model, toolLabel).Inc() } +// RecordListTemplatesOutcome increments list_templates_outcome_total for the +// given recommendation outcome. No-op when m is nil. +func (m *Metrics) RecordListTemplatesOutcome(outcome string) { + if m == nil { + return + } + m.ListTemplatesOutcomeTotal.WithLabelValues(outcome).Inc() +} + +// RecordListTemplatesSignalsFailure increments +// list_templates_signals_failures_total. No-op when m is nil. +func (m *Metrics) RecordListTemplatesSignalsFailure() { + if m == nil { + return + } + m.ListTemplatesSignalsFailuresTotal.Inc() +} + +// RecordListTemplatesAffinityGap observes the affinity gap between the top two +// list_templates candidates, labeled by whether a recommendation was made. +// Callers must only record when affinity is the deciding signal so the gap is +// non-negative and meaningful. No-op when m is nil. +func (m *Metrics) RecordListTemplatesAffinityGap(recommended bool, gap float64) { + if m == nil { + return + } + m.ListTemplatesAffinityGap.WithLabelValues(strconv.FormatBool(recommended)).Observe(gap) +} + +// RecordTemplateRecommendationFollowup increments +// template_recommendation_followup_total for how a create_workspace call +// related to the prior list_templates recommendation. No-op when m is nil. +func (m *Metrics) RecordTemplateRecommendationFollowup(outcome string) { + if m == nil { + return + } + m.TemplateRecommendationFollowupTotal.WithLabelValues(outcome).Inc() +} + // RecordStreamBufferDropped increments stream_buffer_dropped_total // once per dropped event. No-op when m is nil. func (m *Metrics) RecordStreamBufferDropped() { diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index a20db20ac5d39..c47435d05d0c8 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -62,7 +62,16 @@ type AgentConnFunc func( agentID uuid.UUID, ) (workspacesdk.AgentConn, func(), error) -// CreateWorkspaceOptions configures the create_workspace tool. +// CreateWorkspaceMetrics records create_workspace telemetry. It is implemented +// by *chatloop.Metrics and declared here (rather than imported) because +// chatloop imports chattool, so chattool must not import chatloop. +type CreateWorkspaceMetrics interface { + RecordTemplateRecommendationFollowup(outcome string) +} + +// CreateWorkspaceOptions configures the create_workspace tool. Metrics and +// Recommendations are optional telemetry hooks that classify how the chosen +// template related to the prior list_templates recommendation for this chat. type CreateWorkspaceOptions struct { OwnerID uuid.UUID CreateFn CreateWorkspaceFn @@ -72,6 +81,8 @@ type CreateWorkspaceOptions struct { OnChatUpdated func(database.Chat) Logger slog.Logger AllowedTemplateIDs func() map[uuid.UUID]bool + Metrics CreateWorkspaceMetrics + Recommendations *RecommendationTracker } type createWorkspaceArgs struct { @@ -277,6 +288,12 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option options.OnChatUpdated(updatedChat) } + // Tier 3 acceptance telemetry: classify how this freshly + // created workspace's template related to the prior + // list_templates recommendation. Only genuine creations reach + // here; the idempotent existing-workspace path returns earlier. + recordRecommendationFollowup(ctx, options, chatID, workspace.ID, templateID) + // Wait for the build to complete and the agent to // come online so subsequent tools can use the // workspace immediately. @@ -362,6 +379,28 @@ type existingWorkspaceResult struct { Err error } +// recordRecommendationFollowup classifies how a freshly created workspace's +// template related to the prior list_templates recommendation for the chat and +// records it as a metric plus a structured log. Classification is best-effort: +// it degrades to "no_recent_list_templates" when no in-memory record exists +// (restart, replica handoff, expiry, or list_templates was never called). +func recordRecommendationFollowup( + ctx context.Context, + options CreateWorkspaceOptions, + chatID, workspaceID, templateID uuid.UUID, +) { + followup := options.Recommendations.Classify(chatID, templateID) + if options.Metrics != nil { + options.Metrics.RecordTemplateRecommendationFollowup(followup) + } + options.Logger.Info(ctx, "create_workspace recommendation follow-up", + slog.F("chat_id", chatID), + slog.F("workspace_id", workspaceID), + slog.F("template_id", templateID), + slog.F("recommendation_followup", followup), + ) +} + // checkExistingWorkspace checks whether the given chat // already has a usable workspace. Returns an // existingWorkspaceResult with Done set when the caller should diff --git a/coderd/x/chatd/chattool/createworkspace_internal_test.go b/coderd/x/chatd/chattool/createworkspace_internal_test.go index 13f009d6686d8..960a36c97e924 100644 --- a/coderd/x/chatd/chattool/createworkspace_internal_test.go +++ b/coderd/x/chatd/chattool/createworkspace_internal_test.go @@ -2100,3 +2100,68 @@ func TestCreateWorkspace_WithPresetAndParams(t *testing.T) { require.Equal(t, "region", capturedReq.RichParameterValues[0].Name) require.Equal(t, "us-east", capturedReq.RichParameterValues[0].Value) } + +type fakeCreateWorkspaceMetrics struct { + followups []string +} + +func (m *fakeCreateWorkspaceMetrics) RecordTemplateRecommendationFollowup(outcome string) { + m.followups = append(m.followups, outcome) +} + +// TestCreateWorkspace_RecordsRecommendationFollowup verifies the create path +// classifies the chosen template against a prior list_templates recommendation +// shared through the tracker, and records the follow-up metric. +func TestCreateWorkspace_RecordsRecommendationFollowup(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + db := newCreateWorkspaceMockStore(ctrl) + + ownerID := uuid.New() + orgID := uuid.New() + chatID := uuid.New() + templateID := uuid.New() + workspaceID := uuid.New() + + db.EXPECT().GetChatByID(gomock.Any(), chatID).Return(database.Chat{ID: chatID}, nil) + db.EXPECT().UpdateChatWorkspaceBinding(gomock.Any(), gomock.Any()).Return(database.Chat{ID: chatID}, nil) + db.EXPECT().GetAuthorizationUserRoles(gomock.Any(), ownerID).Return(database.GetAuthorizationUserRolesRow{ + ID: ownerID, Roles: []string{}, Groups: []string{}, Status: database.UserStatusActive, + }, nil) + db.EXPECT().GetTemplateByID(gomock.Any(), templateID).Return(database.Template{ + ID: templateID, OrganizationID: orgID, + }, nil) + db.EXPECT().GetChatWorkspaceTTL(gomock.Any()).Return("0s", nil) + // Empty agent list short-circuits the agent-ready wait. + db.EXPECT().GetWorkspaceAgentsInLatestBuildByWorkspaceID(gomock.Any(), workspaceID). + Return([]database.WorkspaceAgent{}, nil) + + // A nil-build workspace skips the build-completion wait. + createFn := func(_ context.Context, _ uuid.UUID, req codersdk.CreateWorkspaceRequest) (codersdk.Workspace, error) { + return codersdk.Workspace{ID: workspaceID, Name: req.Name, OwnerName: "testuser"}, nil + } + + // Seed the tracker so the chat already has a recommendation for templateID. + tracker := NewRecommendationTracker(nil, 0, 0) + tracker.Record(chatID, templateID, []uuid.UUID{templateID}) + metrics := &fakeCreateWorkspaceMetrics{} + + tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{ + OwnerID: ownerID, + CreateFn: createFn, + WorkspaceMu: &sync.Mutex{}, + Logger: slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}), + Metrics: metrics, + Recommendations: tracker, + }) + + input := fmt.Sprintf(`{"template_id":%q}`, templateID.String()) + resp, err := tool.Run(context.Background(), fantasy.ToolCall{ID: "call-1", Name: "create_workspace", Input: input}) + require.NoError(t, err) + require.False(t, resp.IsError) + + require.Equal(t, []string{recommendationFollowupAccepted}, metrics.followups) + // The recommendation was consumed: a repeat classification finds nothing. + require.Equal(t, recommendationFollowupNoRecord, tracker.Classify(chatID, templateID)) +} diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 09828b0b6b1b3..4910d0773aedc 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -76,14 +76,60 @@ const ( queryScoreDescriptionMatch = 1 ) +// listTemplatesRankingVersion identifies the ranking policy (scoring formula +// and confidence thresholds) for telemetry. Bump it whenever the policy +// changes so recorded decisions can be segmented by version. +const listTemplatesRankingVersion = 1 + +// list_templates outcomes are the label values for +// list_templates_outcome_total and the "outcome" field of the decision log. +const ( + listTemplatesOutcomeRecommended = "recommended" + listTemplatesOutcomeAskUser = "ask_user" + listTemplatesOutcomeNoMatches = "no_matches" + listTemplatesOutcomeNoTemplates = "no_templates" +) + +// Recommendation reasons explain which branch produced the outcome. They are +// recorded in the decision log (not as a metric label) so the "why" survives +// without reconstructing it from the raw scores. +const ( + recommendationReasonNoTemplates = "no_templates" + recommendationReasonNoMatches = "no_matches" + recommendationReasonOnlyAvailable = "only_available_template" + recommendationReasonDecisiveQuery = "decisive_query_match" + recommendationReasonSignalsUnavailable = "signals_unavailable" + recommendationReasonQueryTieConfident = "query_tie_broken_by_affinity" + recommendationReasonQueryTieAmbiguous = "ambiguous_query_tie" + recommendationReasonAffinityConfident = "affinity_confident" + recommendationReasonAffinityLow = "affinity_below_floor" + recommendationReasonAffinityAmbiguous = "ambiguous_affinity" +) + +// ListTemplatesMetrics records list_templates ranking telemetry. It is +// implemented by *chatloop.Metrics and declared here (rather than imported) +// because chatloop imports chattool, so chattool must not import chatloop. +type ListTemplatesMetrics interface { + RecordListTemplatesOutcome(outcome string) + RecordListTemplatesSignalsFailure() + RecordListTemplatesAffinityGap(recommended bool, gap float64) +} + // ListTemplatesOptions configures the list_templates tool. OwnerID is // required; Clock defaults to a real clock when nil. AllowedTemplateIDs -// optionally restricts which templates can be returned. +// optionally restricts which templates can be returned. ChatID, Metrics, and +// Recommendations are optional telemetry hooks: ChatID correlates a result +// with a later create_workspace call, Metrics records aggregate ranking +// outcomes, and Recommendations remembers the result for follow-up +// classification. type ListTemplatesOptions struct { OwnerID uuid.UUID Logger slog.Logger Clock quartz.Clock AllowedTemplateIDs func() map[uuid.UUID]bool + ChatID uuid.UUID + Metrics ListTemplatesMetrics + Recommendations *RecommendationTracker } type listTemplatesArgs struct { @@ -122,7 +168,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp "by a query matching template name, display name, or description. "+ "Follow the "+NextStepField+" field in the result. Returns 10 per "+ "page; fetch next_page only when no listed template fits the request.", - func(ctx context.Context, args listTemplatesArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { + func(ctx context.Context, args listTemplatesArgs, toolCall fantasy.ToolCall) (fantasy.ToolResponse, error) { ctx, err := asOwner(ctx, db, options.OwnerID) if err != nil { return fantasy.NewTextErrorResponse(xerrors.Errorf("authorize list_templates owner: %w", err).Error()), nil @@ -176,7 +222,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp } rankTemplates(ranked, query) - recommendedID, nextStep := selectTemplateRecommendation( + recommendedID, nextStep, reason := selectTemplateRecommendation( ranked, visibleTemplateCount, signalsErr, @@ -191,10 +237,26 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp end := min(start+listTemplatesPageSize, totalCount) items := make([]map[string]any, 0, end-start) + pageTemplateIDs := make([]uuid.UUID, 0, end-start) for _, t := range ranked[start:end] { items = append(items, templateItem(t)) + pageTemplateIDs = append(pageTemplateIDs, t.Template.ID) } + recordListTemplatesTelemetry(ctx, options, toolCall.ID, organizationID, listTemplatesTelemetry{ + query: query, + page: page, + visibleTemplateCount: visibleTemplateCount, + candidateCount: totalCount, + returnedCount: len(items), + ranked: ranked, + recommendedID: recommendedID, + nextStep: nextStep, + reason: reason, + signalsErr: signalsErr, + }) + options.Recommendations.Record(options.ChatID, recommendedID, pageTemplateIDs) + result := map[string]any{ "templates": items, "page": page, @@ -302,55 +364,156 @@ func rankTemplates(ranked []rankedTemplate, query string) { } // selectTemplateRecommendation returns the recommended template (uuid.Nil for -// none) and the next-step instruction. A decisive query match recommends on +// none), the next-step instruction, and a reason identifying which branch +// decided the outcome (for telemetry). A decisive query match recommends on // its own; otherwise the affinity score must clear a floor and lead the // runner-up by a margin. func selectTemplateRecommendation( ranked []rankedTemplate, visibleTemplateCount int, rankingSignalsErr error, -) (uuid.UUID, string) { +) (recommendedID uuid.UUID, nextStep string, reason string) { if len(ranked) == 0 { if visibleTemplateCount == 0 { - return uuid.Nil, NextStepNoTemplates + return uuid.Nil, NextStepNoTemplates, recommendationReasonNoTemplates } - return uuid.Nil, NextStepNoMatches + return uuid.Nil, NextStepNoMatches, recommendationReasonNoMatches } top := ranked[0] if visibleTemplateCount == 1 && len(ranked) == 1 { - return top.Template.ID, NextStepUseRecommended + return top.Template.ID, NextStepUseRecommended, recommendationReasonOnlyAvailable } // A decisive query match recommends even when signals failed to load. if top.QueryScore > 0 && (len(ranked) == 1 || top.QueryScore > ranked[1].QueryScore) { - return top.Template.ID, NextStepUseRecommended + return top.Template.ID, NextStepUseRecommended, recommendationReasonDecisiveQuery } // Beyond a decisive query match, confidence comes from the affinity // score, so a failed signal load means asking the user. if rankingSignalsErr != nil { - return uuid.Nil, NextStepAskUser + return uuid.Nil, NextStepAskUser, recommendationReasonSignalsUnavailable } // Query tie: break it with a clear affinity gap. if top.QueryScore > 0 { if len(ranked) > 1 && affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { - return top.Template.ID, NextStepUseRecommended + return top.Template.ID, NextStepUseRecommended, recommendationReasonQueryTieConfident } - return uuid.Nil, NextStepAskUser + return uuid.Nil, NextStepAskUser, recommendationReasonQueryTieAmbiguous } // No query: the affinity score alone decides. if !affinityScoreAtLeast(top.AffinityScore, minConfidentAffinityScore) { - return uuid.Nil, NextStepAskUser + return uuid.Nil, NextStepAskUser, recommendationReasonAffinityLow } if len(ranked) > 1 && affinityScoreAtLeast(ranked[1].AffinityScore, minConfidentAffinityScore) && !affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { - return uuid.Nil, NextStepAskUser + return uuid.Nil, NextStepAskUser, recommendationReasonAffinityAmbiguous + } + return top.Template.ID, NextStepUseRecommended, recommendationReasonAffinityConfident +} + +// listTemplatesOutcome maps a next-step instruction to its telemetry outcome. +func listTemplatesOutcome(nextStep string) string { + switch nextStep { + case NextStepNoTemplates: + return listTemplatesOutcomeNoTemplates + case NextStepNoMatches: + return listTemplatesOutcomeNoMatches + case NextStepUseRecommended: + return listTemplatesOutcomeRecommended + default: + return listTemplatesOutcomeAskUser + } +} + +// listTemplatesTelemetry carries the data recorded for one list_templates call: +// aggregate ranking metrics plus the inputs for a structured decision log. +type listTemplatesTelemetry struct { + query string + page int + visibleTemplateCount int + candidateCount int + returnedCount int + ranked []rankedTemplate + recommendedID uuid.UUID + nextStep string + reason string + signalsErr error +} + +// recordListTemplatesTelemetry records the aggregate ranking metrics (Tier 2) +// and emits the structured decision log (Tier 1). The raw user query text is +// never logged: only its presence and length are, to avoid leaking task +// content. The affinity score itself stays internal; the log carries the +// inputs (scores, gap, thresholds) so a decision is reconstructable. +func recordListTemplatesTelemetry( + ctx context.Context, + options ListTemplatesOptions, + toolCallID string, + organizationID uuid.UUID, + t listTemplatesTelemetry, +) { + outcome := listTemplatesOutcome(t.nextStep) + recommended := t.recommendedID != uuid.Nil + + if options.Metrics != nil { + options.Metrics.RecordListTemplatesOutcome(outcome) + if t.signalsErr != nil { + options.Metrics.RecordListTemplatesSignalsFailure() + } + // The affinity gap is only meaningful when affinity is the deciding + // signal: no query, or the top two share the same query tier. In those + // cases the sort guarantees a non-negative gap. + if len(t.ranked) > 1 { + top, runner := t.ranked[0], t.ranked[1] + if t.query == "" || top.QueryScore == runner.QueryScore { + options.Metrics.RecordListTemplatesAffinityGap(recommended, top.AffinityScore-runner.AffinityScore) + } + } + } + + fields := []slog.Field{ + slog.F("tool_call_id", toolCallID), + slog.F("chat_id", options.ChatID), + slog.F("owner_id", options.OwnerID), + slog.F("organization_id", organizationID), + slog.F("query_present", t.query != ""), + slog.F("query_len", len(t.query)), + slog.F("page", t.page), + slog.F("visible_template_count", t.visibleTemplateCount), + slog.F("candidate_count", t.candidateCount), + slog.F("returned_count", t.returnedCount), + slog.F("outcome", outcome), + slog.F("recommendation_reason", t.reason), + slog.F("signals_load_failed", t.signalsErr != nil), + slog.F("ranking_version", listTemplatesRankingVersion), + slog.F("min_confident_affinity_score", minConfidentAffinityScore), + slog.F("min_confident_gap", minConfidentGap), + } + if recommended { + fields = append(fields, slog.F("recommended_template_id", t.recommendedID)) + } + if len(t.ranked) > 0 { + top := t.ranked[0] + fields = append(fields, + slog.F("top_template_id", top.Template.ID), + slog.F("top_query_score", top.QueryScore), + slog.F("top_affinity_score", top.AffinityScore), + ) + } + if len(t.ranked) > 1 { + runner := t.ranked[1] + fields = append(fields, + slog.F("runner_up_query_score", runner.QueryScore), + slog.F("runner_up_affinity_score", runner.AffinityScore), + slog.F("affinity_gap", t.ranked[0].AffinityScore-runner.AffinityScore), + ) } - return top.Template.ID, NextStepUseRecommended + options.Logger.Info(ctx, "list_templates decision", fields...) } func templateItem(t rankedTemplate) map[string]any { diff --git a/coderd/x/chatd/chattool/listtemplates_internal_test.go b/coderd/x/chatd/chattool/listtemplates_internal_test.go index af7674e1f9194..53cfae59fc646 100644 --- a/coderd/x/chatd/chattool/listtemplates_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_internal_test.go @@ -56,14 +56,14 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoTemplatesAvailable", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation(nil, 0, nil) + id, next, _ := selectTemplateRecommendation(nil, 0, nil) require.Equal(t, uuid.Nil, id) require.Equal(t, NextStepNoTemplates, next) }) t.Run("QueryFiltersEverything", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation(nil, 2, nil) + id, next, _ := selectTemplateRecommendation(nil, 2, nil) require.Equal(t, uuid.Nil, id) require.Equal(t, NextStepNoMatches, next) }) @@ -71,7 +71,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("OnlyAvailable", func(t *testing.T) { t.Parallel() only := uuid.New() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{{Template: database.Template{ID: only}}}, 1, loadErr, ) require.Equal(t, only, id) @@ -82,7 +82,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Parallel() top := uuid.New() for _, err := range []error{nil, loadErr} { - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, QueryScore: queryScoreExactName}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, @@ -96,7 +96,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("QueryTieBrokenByAffinityGap", func(t *testing.T) { t.Parallel() top := uuid.New() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, QueryScore: queryScoreNamePrefix, AffinityScore: 10, Signals: templateRankingSignals{ActiveCount: 1}}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, @@ -108,7 +108,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("QueryTieWithSmallGapIsAmbiguous", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0.1}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, @@ -120,7 +120,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("QueryTieWithLoadErrorAsksUser", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, @@ -132,7 +132,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryNoSignal", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}}, {Template: database.Template{ID: uuid.New()}}, @@ -145,7 +145,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryWeakSignalBelowFloor", func(t *testing.T) { t.Parallel() // One active developer scores ln(2), below the ln(3) floor. - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: 0}, @@ -158,7 +158,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryConfidentWhenLeadsRunnerUp", func(t *testing.T) { t.Parallel() top := uuid.New() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(1), Signals: templateRankingSignals{OrgDevs: 1}}, @@ -170,7 +170,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryAmbiguousWhenBothClearFloorAndClose", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.20, Signals: templateRankingSignals{OrgDevs: 2}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.15, Signals: templateRankingSignals{OrgDevs: 2}}, @@ -183,7 +183,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryConfidentWhenBothClearFloorWithLargeGap", func(t *testing.T) { t.Parallel() top := uuid.New() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: top}, AffinityScore: 2.0, Signals: templateRankingSignals{OrgDevs: 6}}, {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.2, Signals: templateRankingSignals{OrgDevs: 2}}, @@ -195,7 +195,7 @@ func TestSelectTemplateRecommendation(t *testing.T) { t.Run("NoQueryLoadErrorAsksUser", func(t *testing.T) { t.Parallel() - id, next := selectTemplateRecommendation( + id, next, _ := selectTemplateRecommendation( []rankedTemplate{ {Template: database.Template{ID: uuid.New()}, AffinityScore: math.Log1p(3), Signals: templateRankingSignals{OrgDevs: 3}}, {Template: database.Template{ID: uuid.New()}}, diff --git a/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go b/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go new file mode 100644 index 0000000000000..eb91278430802 --- /dev/null +++ b/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go @@ -0,0 +1,189 @@ +package chattool + +import ( + "context" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "golang.org/x/xerrors" + + "github.com/coder/coder/v2/coderd/database" +) + +type gapObservation struct { + recommended bool + gap float64 +} + +type fakeListTemplatesMetrics struct { + outcomes []string + signalsFailures int + gaps []gapObservation +} + +func (m *fakeListTemplatesMetrics) RecordListTemplatesOutcome(outcome string) { + m.outcomes = append(m.outcomes, outcome) +} + +func (m *fakeListTemplatesMetrics) RecordListTemplatesSignalsFailure() { + m.signalsFailures++ +} + +func (m *fakeListTemplatesMetrics) RecordListTemplatesAffinityGap(recommended bool, gap float64) { + m.gaps = append(m.gaps, gapObservation{recommended: recommended, gap: gap}) +} + +func rankedWith(queryScore int, affinity float64) rankedTemplate { + return rankedTemplate{ + Template: database.Template{ID: uuid.New()}, + QueryScore: queryScore, + AffinityScore: affinity, + } +} + +func TestRecordListTemplatesTelemetry_Metrics(t *testing.T) { + t.Parallel() + + t.Run("OutcomeAndSignalsFailure", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + ranked: []rankedTemplate{rankedWith(0, 0)}, + nextStep: NextStepAskUser, + reason: recommendationReasonAffinityLow, + signalsErr: xerrors.New("boom"), + }) + require.Equal(t, []string{listTemplatesOutcomeAskUser}, m.outcomes) + require.Equal(t, 1, m.signalsFailures) + }) + + t.Run("GapRecordedWhenNoQuery", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + ranked: []rankedTemplate{rankedWith(0, 5), rankedWith(0, 2)}, + recommendedID: uuid.New(), + nextStep: NextStepUseRecommended, + }) + require.Len(t, m.gaps, 1) + require.True(t, m.gaps[0].recommended) + require.InDelta(t, 3.0, m.gaps[0].gap, 1e-9) + require.Zero(t, m.signalsFailures) + }) + + t.Run("GapRecordedWhenEqualQueryTier", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + query: "py", + ranked: []rankedTemplate{rankedWith(queryScoreNamePrefix, 5), rankedWith(queryScoreNamePrefix, 1)}, + nextStep: NextStepAskUser, + }) + require.Len(t, m.gaps, 1) + require.False(t, m.gaps[0].recommended) + require.InDelta(t, 4.0, m.gaps[0].gap, 1e-9) + }) + + t.Run("GapSkippedWhenQueryTierDecides", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + // Different query tiers: the order is decided by relevance, not + // affinity, so the affinity gap would be misleading. + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + query: "py", + ranked: []rankedTemplate{rankedWith(queryScoreExactName, 0), rankedWith(queryScoreDescriptionMatch, 9)}, + recommendedID: uuid.New(), + nextStep: NextStepUseRecommended, + }) + require.Empty(t, m.gaps) + }) + + t.Run("GapSkippedWhenSingleCandidate", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + ranked: []rankedTemplate{rankedWith(0, 5)}, + recommendedID: uuid.New(), + nextStep: NextStepUseRecommended, + }) + require.Empty(t, m.gaps) + }) + + t.Run("NilMetricsDoesNotPanic", func(t *testing.T) { + t.Parallel() + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{}, "tc", uuid.New(), listTemplatesTelemetry{ + ranked: []rankedTemplate{rankedWith(0, 0)}, + nextStep: NextStepAskUser, + }) + }) +} + +func TestSelectTemplateRecommendation_Reasons(t *testing.T) { + t.Parallel() + + loadErr := xerrors.New("signals failed to load") + + t.Run("Outcomes", func(t *testing.T) { + t.Parallel() + _, _, reason := selectTemplateRecommendation(nil, 0, nil) + require.Equal(t, recommendationReasonNoTemplates, reason) + + _, _, reason = selectTemplateRecommendation(nil, 2, nil) + require.Equal(t, recommendationReasonNoMatches, reason) + + _, _, reason = selectTemplateRecommendation( + []rankedTemplate{{Template: database.Template{ID: uuid.New()}}}, 1, loadErr, + ) + require.Equal(t, recommendationReasonOnlyAvailable, reason) + }) + + t.Run("Query", func(t *testing.T) { + t.Parallel() + _, _, reason := selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreExactName}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreDescriptionMatch}, + }, 2, nil) + require.Equal(t, recommendationReasonDecisiveQuery, reason) + + // A failed signal load past a decisive query falls back to asking. + _, _, reason = selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix}, + }, 2, loadErr) + require.Equal(t, recommendationReasonSignalsUnavailable, reason) + + _, _, reason = selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 10, Signals: templateRankingSignals{ActiveCount: 1}}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, + }, 2, nil) + require.Equal(t, recommendationReasonQueryTieConfident, reason) + + _, _, reason = selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0.1}, + {Template: database.Template{ID: uuid.New()}, QueryScore: queryScoreNamePrefix, AffinityScore: 0}, + }, 2, nil) + require.Equal(t, recommendationReasonQueryTieAmbiguous, reason) + }) + + t.Run("Affinity", func(t *testing.T) { + t.Parallel() + _, _, reason := selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: 0.1}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 0}, + }, 2, nil) + require.Equal(t, recommendationReasonAffinityLow, reason) + + _, _, reason = selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.20, Signals: templateRankingSignals{OrgDevs: 2}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.15, Signals: templateRankingSignals{OrgDevs: 2}}, + }, 2, nil) + require.Equal(t, recommendationReasonAffinityAmbiguous, reason) + + _, _, reason = selectTemplateRecommendation([]rankedTemplate{ + {Template: database.Template{ID: uuid.New()}, AffinityScore: 2.0, Signals: templateRankingSignals{OrgDevs: 6}}, + {Template: database.Template{ID: uuid.New()}, AffinityScore: 1.2, Signals: templateRankingSignals{OrgDevs: 2}}, + }, 2, nil) + require.Equal(t, recommendationReasonAffinityConfident, reason) + }) +} diff --git a/coderd/x/chatd/chattool/listtemplates_test.go b/coderd/x/chatd/chattool/listtemplates_test.go index 060589ba90363..d7f4bcc53e028 100644 --- a/coderd/x/chatd/chattool/listtemplates_test.go +++ b/coderd/x/chatd/chattool/listtemplates_test.go @@ -1006,3 +1006,58 @@ func listTemplateItems(t *testing.T, result map[string]any) []map[string]any { } return templates } + +type fakeListTemplatesMetrics struct { + outcomes []string + signalsFailures int + gaps int +} + +func (m *fakeListTemplatesMetrics) RecordListTemplatesOutcome(outcome string) { + m.outcomes = append(m.outcomes, outcome) +} +func (m *fakeListTemplatesMetrics) RecordListTemplatesSignalsFailure() { m.signalsFailures++ } +func (m *fakeListTemplatesMetrics) RecordListTemplatesAffinityGap(bool, float64) { + m.gaps++ +} + +// TestListTemplates_RecordsTelemetry exercises the handler's telemetry wiring +// end to end: the outcome metric fires and the per-chat recommendation is +// recorded so a later create_workspace can classify it. +func TestListTemplates_RecordsTelemetry(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitShort) + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + _ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: org.ID}) + tmpl := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + Name: "only-template", + }) + + chatID := uuid.New() + metrics := &fakeListTemplatesMetrics{} + tracker := chattool.NewRecommendationTracker(nil, 0, 0) + + tool := chattool.ListTemplates(db, org.ID, chattool.ListTemplatesOptions{ + OwnerID: user.ID, + ChatID: chatID, + Metrics: metrics, + Recommendations: tracker, + }) + + resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "lt-1", Name: "list_templates", Input: "{}"}) + require.NoError(t, err) + require.False(t, resp.IsError) + + // The lone template is recommended, so the outcome metric records it. + require.Equal(t, []string{"recommended"}, metrics.outcomes) + require.Zero(t, metrics.signalsFailures) + + // The recommendation was recorded for the chat: classifying the chosen + // template as the recommended one yields an acceptance. + require.Equal(t, "accepted_recommendation", tracker.Classify(chatID, tmpl.ID)) +} diff --git a/coderd/x/chatd/chattool/recommendationtracker.go b/coderd/x/chatd/chattool/recommendationtracker.go new file mode 100644 index 0000000000000..6a0799aace6da --- /dev/null +++ b/coderd/x/chatd/chattool/recommendationtracker.go @@ -0,0 +1,170 @@ +package chattool + +import ( + "sync" + "time" + + "github.com/google/uuid" + + "github.com/coder/quartz" +) + +// Recommendation follow-up outcomes describe how a create_workspace call +// related to the most recent list_templates recommendation for the same chat. +// They are the label values for template_recommendation_followup_total. +const ( + // recommendationFollowupAccepted: the chosen template is the one + // list_templates recommended. + recommendationFollowupAccepted = "accepted_recommendation" + // recommendationFollowupOverrodeListed: a recommendation existed but the + // agent built a different template that was still on the shown page. + recommendationFollowupOverrodeListed = "overrode_with_listed_template" + // recommendationFollowupListedNoRec: no recommendation was made, and the + // agent built a template from the shown page. + recommendationFollowupListedNoRec = "created_listed_without_recommendation" + // recommendationFollowupUnlisted: the agent built a template that was not + // on the shown page (e.g. user named it, or an older list result). + recommendationFollowupUnlisted = "created_unlisted_template" + // recommendationFollowupNoRecord: no fresh list_templates result is known + // for the chat (restart, replica handoff, expiry, or none was called). + recommendationFollowupNoRecord = "no_recent_list_templates" +) + +const ( + // defaultRecommendationTTL bounds how long a recorded recommendation stays + // eligible for follow-up classification. + defaultRecommendationTTL = 30 * time.Minute + // defaultRecommendationMaxEntries bounds the tracker's memory footprint. + defaultRecommendationMaxEntries = 4096 +) + +// RecommendationTracker correlates the most recent list_templates result per +// chat with the template that create_workspace later builds, so we can measure +// whether the agent followed the recommendation. +// +// State is in-memory and best-effort: it is lost on restart and is not shared +// across replicas, so a follow-up handled elsewhere classifies as +// "no_recent_list_templates". The durable source of truth for offline analysis +// is the persisted chat transcript (the list_templates result and the +// create_workspace call) plus the chats.workspace_id binding; this tracker +// exists only to surface a live, aggregate acceptance signal. +type RecommendationTracker struct { + clock quartz.Clock + ttl time.Duration + maxEntries int + + mu sync.Mutex + entries map[uuid.UUID]recommendationEntry +} + +type recommendationEntry struct { + recommendedID uuid.UUID + listed map[uuid.UUID]struct{} + recordedAt time.Time +} + +// NewRecommendationTracker constructs a tracker. A nil clock defaults to a real +// clock; non-positive ttl or maxEntries fall back to defaults. +func NewRecommendationTracker(clock quartz.Clock, ttl time.Duration, maxEntries int) *RecommendationTracker { + if clock == nil { + clock = quartz.NewReal() + } + if ttl <= 0 { + ttl = defaultRecommendationTTL + } + if maxEntries <= 0 { + maxEntries = defaultRecommendationMaxEntries + } + return &RecommendationTracker{ + clock: clock, + ttl: ttl, + maxEntries: maxEntries, + entries: make(map[uuid.UUID]recommendationEntry), + } +} + +// Record stores the latest list_templates outcome for a chat. recommendedID may +// be uuid.Nil when no template was recommended. listedIDs are the template IDs +// shown on the returned page (what the agent actually saw). No-op when t is nil +// or chatID is uuid.Nil. +func (t *RecommendationTracker) Record(chatID, recommendedID uuid.UUID, listedIDs []uuid.UUID) { + if t == nil || chatID == uuid.Nil { + return + } + listed := make(map[uuid.UUID]struct{}, len(listedIDs)) + for _, id := range listedIDs { + if id != uuid.Nil { + listed[id] = struct{}{} + } + } + now := t.clock.Now() + + t.mu.Lock() + defer t.mu.Unlock() + t.evictLocked(now) + t.entries[chatID] = recommendationEntry{ + recommendedID: recommendedID, + listed: listed, + recordedAt: now, + } +} + +// Classify consumes the recorded recommendation for a chat and reports how the +// chosen template relates to it. The entry is removed so a single creation is +// counted once. Returns recommendationFollowupNoRecord when t is nil, chatID is +// uuid.Nil, or no fresh record exists. +func (t *RecommendationTracker) Classify(chatID, chosenID uuid.UUID) string { + if t == nil || chatID == uuid.Nil { + return recommendationFollowupNoRecord + } + now := t.clock.Now() + + t.mu.Lock() + defer t.mu.Unlock() + entry, ok := t.entries[chatID] + if !ok { + return recommendationFollowupNoRecord + } + delete(t.entries, chatID) + if now.Sub(entry.recordedAt) > t.ttl { + return recommendationFollowupNoRecord + } + + _, listed := entry.listed[chosenID] + switch { + case entry.recommendedID != uuid.Nil && chosenID == entry.recommendedID: + return recommendationFollowupAccepted + case listed && entry.recommendedID != uuid.Nil: + return recommendationFollowupOverrodeListed + case listed: + return recommendationFollowupListedNoRec + default: + return recommendationFollowupUnlisted + } +} + +// evictLocked drops expired entries and, if still at capacity, the oldest +// remaining entry to make room for one more. Callers must hold t.mu. +func (t *RecommendationTracker) evictLocked(now time.Time) { + for id, e := range t.entries { + if now.Sub(e.recordedAt) > t.ttl { + delete(t.entries, id) + } + } + if len(t.entries) < t.maxEntries { + return + } + var ( + oldestID uuid.UUID + oldestAt time.Time + found bool + ) + for id, e := range t.entries { + if !found || e.recordedAt.Before(oldestAt) { + oldestID, oldestAt, found = id, e.recordedAt, true + } + } + if found { + delete(t.entries, oldestID) + } +} diff --git a/coderd/x/chatd/chattool/recommendationtracker_internal_test.go b/coderd/x/chatd/chattool/recommendationtracker_internal_test.go new file mode 100644 index 0000000000000..d55269314093a --- /dev/null +++ b/coderd/x/chatd/chattool/recommendationtracker_internal_test.go @@ -0,0 +1,110 @@ +package chattool + +import ( + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + + "github.com/coder/quartz" +) + +func TestRecommendationTracker_Classify(t *testing.T) { + t.Parallel() + + t.Run("AcceptedRecommendation", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, rec, other := uuid.New(), uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec, other}) + require.Equal(t, recommendationFollowupAccepted, tr.Classify(chat, rec)) + }) + + t.Run("OverrodeWithListedTemplate", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, rec, other := uuid.New(), uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec, other}) + require.Equal(t, recommendationFollowupOverrodeListed, tr.Classify(chat, other)) + }) + + t.Run("ListedWithoutRecommendation", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, listed := uuid.New(), uuid.New() + // uuid.Nil recommendation: list_templates returned templates but + // recommended none. + tr.Record(chat, uuid.Nil, []uuid.UUID{listed}) + require.Equal(t, recommendationFollowupListedNoRec, tr.Classify(chat, listed)) + }) + + t.Run("UnlistedTemplate", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, rec, unlisted := uuid.New(), uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec}) + require.Equal(t, recommendationFollowupUnlisted, tr.Classify(chat, unlisted)) + }) + + t.Run("NoRecord", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + require.Equal(t, recommendationFollowupNoRecord, tr.Classify(uuid.New(), uuid.New())) + }) + + t.Run("ConsumedOnce", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, rec := uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec}) + require.Equal(t, recommendationFollowupAccepted, tr.Classify(chat, rec)) + // A second classification finds nothing: the entry was consumed. + require.Equal(t, recommendationFollowupNoRecord, tr.Classify(chat, rec)) + }) + + t.Run("ExpiredByTTL", func(t *testing.T) { + t.Parallel() + clock := quartz.NewMock(t) + tr := NewRecommendationTracker(clock, time.Minute, 0) + chat, rec := uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec}) + clock.Advance(time.Minute + time.Second) + require.Equal(t, recommendationFollowupNoRecord, tr.Classify(chat, rec)) + }) + + t.Run("NilTrackerAndNilChat", func(t *testing.T) { + t.Parallel() + var tr *RecommendationTracker + require.Equal(t, recommendationFollowupNoRecord, tr.Classify(uuid.New(), uuid.New())) + // Record on a nil tracker or with a nil chat ID must not panic. + tr.Record(uuid.New(), uuid.New(), nil) + live := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + live.Record(uuid.Nil, uuid.New(), nil) + require.Equal(t, recommendationFollowupNoRecord, live.Classify(uuid.Nil, uuid.New())) + }) +} + +func TestRecommendationTracker_EvictsOldestAtCapacity(t *testing.T) { + t.Parallel() + + clock := quartz.NewMock(t) + const maxEntries = 3 + tr := NewRecommendationTracker(clock, time.Hour, maxEntries) + + // Record the oldest entry, then advance so later entries are strictly + // newer, filling capacity beyond maxEntries. + oldest := uuid.New() + oldestRec := uuid.New() + tr.Record(oldest, oldestRec, []uuid.UUID{oldestRec}) + + for i := 0; i < maxEntries; i++ { + clock.Advance(time.Second) + chat, rec := uuid.New(), uuid.New() + tr.Record(chat, rec, []uuid.UUID{rec}) + } + + // The oldest entry was evicted to keep the map bounded; newer entries + // within TTL remain classifiable. + require.Equal(t, recommendationFollowupNoRecord, tr.Classify(oldest, oldestRec)) +} diff --git a/docs/admin/integrations/prometheus.md b/docs/admin/integrations/prometheus.md index 8df2126633d1f..8eda66a5112e9 100644 --- a/docs/admin/integrations/prometheus.md +++ b/docs/admin/integrations/prometheus.md @@ -212,11 +212,15 @@ deployment. They will always be available from the agent. | `coderd_chat_auto_archive_records_archived_total` | counter | Total number of chats archived by the auto-archive job (counting both roots and cascaded children). | | | `coderd_chatd_chats` | gauge | Number of chats being processed, by state. | `state` | | `coderd_chatd_compaction_total` | counter | Total compaction outcomes (only recorded when compaction was triggered or failed). | `model` `provider` `result` | +| `coderd_chatd_list_templates_affinity_gap` | histogram | Affinity score gap between the top two candidates when affinity is the deciding signal, labeled by whether a recommendation was made. | `recommended` | +| `coderd_chatd_list_templates_outcome_total` | counter | Total list_templates calls by recommendation outcome (recommended, ask_user, no_matches, no_templates). | `outcome` | +| `coderd_chatd_list_templates_signals_failures_total` | counter | Total list_templates calls where ranking signals failed to load, degrading the result toward asking the user. | | | `coderd_chatd_message_count` | histogram | Number of messages in the prompt per LLM request. | `model` `provider` | | `coderd_chatd_prompt_size_bytes` | histogram | Estimated byte size of the prompt per LLM request. | `model` `provider` | | `coderd_chatd_steps_total` | counter | Total agentic loop steps across all chats. | `model` `provider` | | `coderd_chatd_stream_buffer_dropped_total` | counter | Number of chat stream buffer events dropped due to the per-chat buffer cap. | | | `coderd_chatd_stream_retries_total` | counter | Total LLM stream retries. | `chain_broken` `kind` `model` `provider` | +| `coderd_chatd_template_recommendation_followup_total` | counter | Total create_workspace calls by how the chosen template related to the prior list_templates recommendation. | `outcome` | | `coderd_chatd_tool_errors_total` | counter | Total tool calls that returned an error result. | `model` `provider` `tool_name` | | `coderd_chatd_tool_result_size_bytes` | histogram | Size in bytes of each tool execution result. | `model` `provider` `tool_name` | | `coderd_chatd_ttft_seconds` | histogram | Time-to-first-token: wall time from LLM request to first streamed chunk. | `model` `provider` | diff --git a/scripts/metricsdocgen/generated_metrics b/scripts/metricsdocgen/generated_metrics index 76d25ef341ade..2fa0f357593be 100644 --- a/scripts/metricsdocgen/generated_metrics +++ b/scripts/metricsdocgen/generated_metrics @@ -238,6 +238,15 @@ coderd_chatd_chats{state=""} 0 # HELP coderd_chatd_compaction_total Total compaction outcomes (only recorded when compaction was triggered or failed). # TYPE coderd_chatd_compaction_total counter coderd_chatd_compaction_total{provider="",model="",result=""} 0 +# HELP coderd_chatd_list_templates_affinity_gap Affinity score gap between the top two candidates when affinity is the deciding signal, labeled by whether a recommendation was made. +# TYPE coderd_chatd_list_templates_affinity_gap histogram +coderd_chatd_list_templates_affinity_gap{recommended=""} 0 +# HELP coderd_chatd_list_templates_outcome_total Total list_templates calls by recommendation outcome (recommended, ask_user, no_matches, no_templates). +# TYPE coderd_chatd_list_templates_outcome_total counter +coderd_chatd_list_templates_outcome_total{outcome=""} 0 +# HELP coderd_chatd_list_templates_signals_failures_total Total list_templates calls where ranking signals failed to load, degrading the result toward asking the user. +# TYPE coderd_chatd_list_templates_signals_failures_total counter +coderd_chatd_list_templates_signals_failures_total 0 # HELP coderd_chatd_message_count Number of messages in the prompt per LLM request. # TYPE coderd_chatd_message_count histogram coderd_chatd_message_count{provider="",model=""} 0 @@ -253,6 +262,9 @@ coderd_chatd_stream_buffer_dropped_total 0 # HELP coderd_chatd_stream_retries_total Total LLM stream retries. # TYPE coderd_chatd_stream_retries_total counter coderd_chatd_stream_retries_total{provider="",model="",kind="",chain_broken=""} 0 +# HELP coderd_chatd_template_recommendation_followup_total Total create_workspace calls by how the chosen template related to the prior list_templates recommendation. +# TYPE coderd_chatd_template_recommendation_followup_total counter +coderd_chatd_template_recommendation_followup_total{outcome=""} 0 # HELP coderd_chatd_tool_errors_total Total tool calls that returned an error result. # TYPE coderd_chatd_tool_errors_total counter coderd_chatd_tool_errors_total{provider="",model="",tool_name=""} 0 From 137ccf945708df9423a63b7d9b2055691bebdee7 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 18 Jun 2026 06:00:28 +0000 Subject: [PATCH 20/21] fix: address list_templates ranking telemetry review feedback Resolve reviewer findings on the telemetry PR: - RecommendationTracker.Record accumulates listed template IDs across pages of the same result, so a follow-up build from a later page is classified as listed rather than unlisted (CRF-1). - list_templates records the affinity gap only when ranking signals loaded; a failed load leaves affinity scores at zero and would pollute the histogram (Codex P2). - listTemplatesOutcome maps NextStepAskUser explicitly and routes unknown next steps to an "unknown" bucket (CRF-4). - template_recommendation_followup_total help text enumerates its outcome values; metrics docs regenerated (CRF-5). - create_workspace follow-up log includes owner_id (CRF-6). - Rename recommendationFollowupListedNoRec to recommendationFollowupListedNoRecommendation (CRF-7). - Drop "Tier" labels from comments and tighten the tracker field comment (CRF-3, CRF-10). - Eviction test asserts a newer entry survives and uses range-over-int; add coverage for cross-page accumulation and the unknown outcome (CRF-2, CRF-8). --- coderd/x/chatd/chatd.go | 6 +-- coderd/x/chatd/chatloop/metrics.go | 2 +- coderd/x/chatd/chattool/createworkspace.go | 7 ++- .../chattool/createworkspace_internal_test.go | 2 +- coderd/x/chatd/chattool/listtemplates.go | 28 ++++++++---- .../listtemplates_telemetry_internal_test.go | 26 +++++++++++ .../x/chatd/chattool/recommendationtracker.go | 42 +++++++++++++----- .../recommendationtracker_internal_test.go | 44 +++++++++++++------ docs/admin/integrations/prometheus.md | 2 +- scripts/metricsdocgen/generated_metrics | 2 +- 10 files changed, 116 insertions(+), 45 deletions(-) diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index 97482ce66004b..cc15b6cd6e387 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -204,9 +204,9 @@ type Server struct { chatWorker *chatWorker messagePartBuffer *messagepartbuffer.Buffer streamSyncPoller *streamSyncPoller - // templateRecommendations correlates a list_templates recommendation with - // the template a later create_workspace builds, for acceptance telemetry. - // In-memory and best-effort (see chattool.RecommendationTracker). + // templateRecommendations tracks list_templates recommendations to classify + // the template a later create_workspace builds. In-memory and best-effort; + // see chattool.RecommendationTracker. templateRecommendations *chattool.RecommendationTracker recordingSem chan struct{} diff --git a/coderd/x/chatd/chatloop/metrics.go b/coderd/x/chatd/chatloop/metrics.go index ac58bc559e7cc..5d44e138358a4 100644 --- a/coderd/x/chatd/chatloop/metrics.go +++ b/coderd/x/chatd/chatloop/metrics.go @@ -138,7 +138,7 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { Namespace: metricsNamespace, Subsystem: metricsSubsystem, Name: "template_recommendation_followup_total", - Help: "Total create_workspace calls by how the chosen template related to the prior list_templates recommendation.", + Help: "Total create_workspace calls by how the chosen template related to the prior list_templates recommendation (accepted_recommendation, overrode_with_listed_template, created_listed_without_recommendation, created_unlisted_template, no_recent_list_templates).", }, []string{"outcome"}), } } diff --git a/coderd/x/chatd/chattool/createworkspace.go b/coderd/x/chatd/chattool/createworkspace.go index c47435d05d0c8..0ea3b2faa2d65 100644 --- a/coderd/x/chatd/chattool/createworkspace.go +++ b/coderd/x/chatd/chattool/createworkspace.go @@ -288,10 +288,8 @@ func CreateWorkspace(db database.Store, organizationID, chatID uuid.UUID, option options.OnChatUpdated(updatedChat) } - // Tier 3 acceptance telemetry: classify how this freshly - // created workspace's template related to the prior - // list_templates recommendation. Only genuine creations reach - // here; the idempotent existing-workspace path returns earlier. + // Only genuine creations reach here; the idempotent + // existing-workspace path returns earlier. recordRecommendationFollowup(ctx, options, chatID, workspace.ID, templateID) // Wait for the build to complete and the agent to @@ -395,6 +393,7 @@ func recordRecommendationFollowup( } options.Logger.Info(ctx, "create_workspace recommendation follow-up", slog.F("chat_id", chatID), + slog.F("owner_id", options.OwnerID), slog.F("workspace_id", workspaceID), slog.F("template_id", templateID), slog.F("recommendation_followup", followup), diff --git a/coderd/x/chatd/chattool/createworkspace_internal_test.go b/coderd/x/chatd/chattool/createworkspace_internal_test.go index 960a36c97e924..10230b4e521db 100644 --- a/coderd/x/chatd/chattool/createworkspace_internal_test.go +++ b/coderd/x/chatd/chattool/createworkspace_internal_test.go @@ -2144,7 +2144,7 @@ func TestCreateWorkspace_RecordsRecommendationFollowup(t *testing.T) { // Seed the tracker so the chat already has a recommendation for templateID. tracker := NewRecommendationTracker(nil, 0, 0) - tracker.Record(chatID, templateID, []uuid.UUID{templateID}) + tracker.Record(chatID, templateID, []uuid.UUID{templateID}, 1) metrics := &fakeCreateWorkspaceMetrics{} tool := CreateWorkspace(db, orgID, chatID, CreateWorkspaceOptions{ diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 4910d0773aedc..7244d988bc946 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -88,6 +88,11 @@ const ( listTemplatesOutcomeAskUser = "ask_user" listTemplatesOutcomeNoMatches = "no_matches" listTemplatesOutcomeNoTemplates = "no_templates" + // listTemplatesOutcomeUnknown is a defensive bucket for a next-step value + // that listTemplatesOutcome does not map. It should never appear in + // practice; an increment signals a new NextStep* constant that was not + // wired into the outcome mapping. + listTemplatesOutcomeUnknown = "unknown" ) // Recommendation reasons explain which branch produced the outcome. They are @@ -255,7 +260,7 @@ func ListTemplates(db database.Store, organizationID uuid.UUID, options ListTemp reason: reason, signalsErr: signalsErr, }) - options.Recommendations.Record(options.ChatID, recommendedID, pageTemplateIDs) + options.Recommendations.Record(options.ChatID, recommendedID, pageTemplateIDs, page) result := map[string]any{ "templates": items, @@ -425,8 +430,10 @@ func listTemplatesOutcome(nextStep string) string { return listTemplatesOutcomeNoMatches case NextStepUseRecommended: return listTemplatesOutcomeRecommended - default: + case NextStepAskUser: return listTemplatesOutcomeAskUser + default: + return listTemplatesOutcomeUnknown } } @@ -445,11 +452,11 @@ type listTemplatesTelemetry struct { signalsErr error } -// recordListTemplatesTelemetry records the aggregate ranking metrics (Tier 2) -// and emits the structured decision log (Tier 1). The raw user query text is -// never logged: only its presence and length are, to avoid leaking task -// content. The affinity score itself stays internal; the log carries the -// inputs (scores, gap, thresholds) so a decision is reconstructable. +// recordListTemplatesTelemetry records the aggregate ranking metrics and emits +// the structured decision log. The raw user query text is never logged: only +// its presence and length are, to avoid leaking task content. The affinity +// score itself stays internal; the log carries the inputs (scores, gap, +// thresholds) so a decision is reconstructable. func recordListTemplatesTelemetry( ctx context.Context, options ListTemplatesOptions, @@ -467,8 +474,11 @@ func recordListTemplatesTelemetry( } // The affinity gap is only meaningful when affinity is the deciding // signal: no query, or the top two share the same query tier. In those - // cases the sort guarantees a non-negative gap. - if len(t.ranked) > 1 { + // cases the sort guarantees a non-negative gap. A failed signal load + // leaves every affinity score at its zero default and forces an + // ask_user outcome, so recording the gap then would only add + // meaningless zero samples; the signals-failure counter covers it. + if t.signalsErr == nil && len(t.ranked) > 1 { top, runner := t.ranked[0], t.ranked[1] if t.query == "" || top.QueryScore == runner.QueryScore { options.Metrics.RecordListTemplatesAffinityGap(recommended, top.AffinityScore-runner.AffinityScore) diff --git a/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go b/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go index eb91278430802..89297b92b51e6 100644 --- a/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go +++ b/coderd/x/chatd/chattool/listtemplates_telemetry_internal_test.go @@ -110,6 +110,21 @@ func TestRecordListTemplatesTelemetry_Metrics(t *testing.T) { require.Empty(t, m.gaps) }) + t.Run("GapSkippedWhenSignalsFailed", func(t *testing.T) { + t.Parallel() + m := &fakeListTemplatesMetrics{} + // Signals failed to load, so affinity scores are meaningless zero + // defaults; only the signals-failure counter should move. + recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{Metrics: m}, "tc", uuid.New(), listTemplatesTelemetry{ + ranked: []rankedTemplate{rankedWith(0, 0), rankedWith(0, 0)}, + nextStep: NextStepAskUser, + reason: recommendationReasonSignalsUnavailable, + signalsErr: xerrors.New("boom"), + }) + require.Empty(t, m.gaps) + require.Equal(t, 1, m.signalsFailures) + }) + t.Run("NilMetricsDoesNotPanic", func(t *testing.T) { t.Parallel() recordListTemplatesTelemetry(context.Background(), ListTemplatesOptions{}, "tc", uuid.New(), listTemplatesTelemetry{ @@ -119,6 +134,17 @@ func TestRecordListTemplatesTelemetry_Metrics(t *testing.T) { }) } +func TestListTemplatesOutcome(t *testing.T) { + t.Parallel() + require.Equal(t, listTemplatesOutcomeNoTemplates, listTemplatesOutcome(NextStepNoTemplates)) + require.Equal(t, listTemplatesOutcomeNoMatches, listTemplatesOutcome(NextStepNoMatches)) + require.Equal(t, listTemplatesOutcomeRecommended, listTemplatesOutcome(NextStepUseRecommended)) + require.Equal(t, listTemplatesOutcomeAskUser, listTemplatesOutcome(NextStepAskUser)) + // An unmapped next-step value falls into the defensive unknown bucket + // instead of silently reporting ask_user. + require.Equal(t, listTemplatesOutcomeUnknown, listTemplatesOutcome("some-future-next-step")) +} + func TestSelectTemplateRecommendation_Reasons(t *testing.T) { t.Parallel() diff --git a/coderd/x/chatd/chattool/recommendationtracker.go b/coderd/x/chatd/chattool/recommendationtracker.go index 6a0799aace6da..244fa3d2912ca 100644 --- a/coderd/x/chatd/chattool/recommendationtracker.go +++ b/coderd/x/chatd/chattool/recommendationtracker.go @@ -19,9 +19,9 @@ const ( // recommendationFollowupOverrodeListed: a recommendation existed but the // agent built a different template that was still on the shown page. recommendationFollowupOverrodeListed = "overrode_with_listed_template" - // recommendationFollowupListedNoRec: no recommendation was made, and the - // agent built a template from the shown page. - recommendationFollowupListedNoRec = "created_listed_without_recommendation" + // recommendationFollowupListedNoRecommendation: no recommendation was made, + // and the agent built a template from the shown page. + recommendationFollowupListedNoRecommendation = "created_listed_without_recommendation" // recommendationFollowupUnlisted: the agent built a template that was not // on the shown page (e.g. user named it, or an older list result). recommendationFollowupUnlisted = "created_unlisted_template" @@ -85,22 +85,42 @@ func NewRecommendationTracker(clock quartz.Clock, ttl time.Duration, maxEntries // Record stores the latest list_templates outcome for a chat. recommendedID may // be uuid.Nil when no template was recommended. listedIDs are the template IDs -// shown on the returned page (what the agent actually saw). No-op when t is nil -// or chatID is uuid.Nil. -func (t *RecommendationTracker) Record(chatID, recommendedID uuid.UUID, listedIDs []uuid.UUID) { +// shown on the returned page (what the agent actually saw). page is the 1-based +// page number: page 1 starts a fresh record, while later pages of the same +// result accumulate their listed IDs so a follow-up build from any shown page +// still classifies as "listed" rather than "unlisted". No-op when t is nil or +// chatID is uuid.Nil. +func (t *RecommendationTracker) Record(chatID, recommendedID uuid.UUID, listedIDs []uuid.UUID, page int) { if t == nil || chatID == uuid.Nil { return } + now := t.clock.Now() + + t.mu.Lock() + defer t.mu.Unlock() + + // Later pages of the same result continue an existing record, so union + // their listed IDs instead of overwriting. Page 1, a missing entry, or a + // changed recommendation starts fresh. + if page > 1 { + if entry, ok := t.entries[chatID]; ok && entry.recommendedID == recommendedID { + for _, id := range listedIDs { + if id != uuid.Nil { + entry.listed[id] = struct{}{} + } + } + entry.recordedAt = now + t.entries[chatID] = entry + return + } + } + listed := make(map[uuid.UUID]struct{}, len(listedIDs)) for _, id := range listedIDs { if id != uuid.Nil { listed[id] = struct{}{} } } - now := t.clock.Now() - - t.mu.Lock() - defer t.mu.Unlock() t.evictLocked(now) t.entries[chatID] = recommendationEntry{ recommendedID: recommendedID, @@ -137,7 +157,7 @@ func (t *RecommendationTracker) Classify(chatID, chosenID uuid.UUID) string { case listed && entry.recommendedID != uuid.Nil: return recommendationFollowupOverrodeListed case listed: - return recommendationFollowupListedNoRec + return recommendationFollowupListedNoRecommendation default: return recommendationFollowupUnlisted } diff --git a/coderd/x/chatd/chattool/recommendationtracker_internal_test.go b/coderd/x/chatd/chattool/recommendationtracker_internal_test.go index d55269314093a..353a0b9c06dde 100644 --- a/coderd/x/chatd/chattool/recommendationtracker_internal_test.go +++ b/coderd/x/chatd/chattool/recommendationtracker_internal_test.go @@ -17,7 +17,7 @@ func TestRecommendationTracker_Classify(t *testing.T) { t.Parallel() tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) chat, rec, other := uuid.New(), uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec, other}) + tr.Record(chat, rec, []uuid.UUID{rec, other}, 1) require.Equal(t, recommendationFollowupAccepted, tr.Classify(chat, rec)) }) @@ -25,25 +25,38 @@ func TestRecommendationTracker_Classify(t *testing.T) { t.Parallel() tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) chat, rec, other := uuid.New(), uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec, other}) + tr.Record(chat, rec, []uuid.UUID{rec, other}, 1) require.Equal(t, recommendationFollowupOverrodeListed, tr.Classify(chat, other)) }) + t.Run("AccumulatesListedAcrossPages", func(t *testing.T) { + t.Parallel() + tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) + chat, rec := uuid.New(), uuid.New() + page1, page2 := uuid.New(), uuid.New() + // Page 1 seeds the record; a later page of the same result accumulates + // its listed IDs instead of replacing page 1's, so a build of a + // page-1 template is still "listed" rather than "unlisted". + tr.Record(chat, rec, []uuid.UUID{rec, page1}, 1) + tr.Record(chat, rec, []uuid.UUID{page2}, 2) + require.Equal(t, recommendationFollowupOverrodeListed, tr.Classify(chat, page1)) + }) + t.Run("ListedWithoutRecommendation", func(t *testing.T) { t.Parallel() tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) chat, listed := uuid.New(), uuid.New() // uuid.Nil recommendation: list_templates returned templates but // recommended none. - tr.Record(chat, uuid.Nil, []uuid.UUID{listed}) - require.Equal(t, recommendationFollowupListedNoRec, tr.Classify(chat, listed)) + tr.Record(chat, uuid.Nil, []uuid.UUID{listed}, 1) + require.Equal(t, recommendationFollowupListedNoRecommendation, tr.Classify(chat, listed)) }) t.Run("UnlistedTemplate", func(t *testing.T) { t.Parallel() tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) chat, rec, unlisted := uuid.New(), uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec}) + tr.Record(chat, rec, []uuid.UUID{rec}, 1) require.Equal(t, recommendationFollowupUnlisted, tr.Classify(chat, unlisted)) }) @@ -57,7 +70,7 @@ func TestRecommendationTracker_Classify(t *testing.T) { t.Parallel() tr := NewRecommendationTracker(quartz.NewMock(t), 0, 0) chat, rec := uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec}) + tr.Record(chat, rec, []uuid.UUID{rec}, 1) require.Equal(t, recommendationFollowupAccepted, tr.Classify(chat, rec)) // A second classification finds nothing: the entry was consumed. require.Equal(t, recommendationFollowupNoRecord, tr.Classify(chat, rec)) @@ -68,7 +81,7 @@ func TestRecommendationTracker_Classify(t *testing.T) { clock := quartz.NewMock(t) tr := NewRecommendationTracker(clock, time.Minute, 0) chat, rec := uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec}) + tr.Record(chat, rec, []uuid.UUID{rec}, 1) clock.Advance(time.Minute + time.Second) require.Equal(t, recommendationFollowupNoRecord, tr.Classify(chat, rec)) }) @@ -78,9 +91,9 @@ func TestRecommendationTracker_Classify(t *testing.T) { var tr *RecommendationTracker require.Equal(t, recommendationFollowupNoRecord, tr.Classify(uuid.New(), uuid.New())) // Record on a nil tracker or with a nil chat ID must not panic. - tr.Record(uuid.New(), uuid.New(), nil) + tr.Record(uuid.New(), uuid.New(), nil, 1) live := NewRecommendationTracker(quartz.NewMock(t), 0, 0) - live.Record(uuid.Nil, uuid.New(), nil) + live.Record(uuid.Nil, uuid.New(), nil, 1) require.Equal(t, recommendationFollowupNoRecord, live.Classify(uuid.Nil, uuid.New())) }) } @@ -96,15 +109,18 @@ func TestRecommendationTracker_EvictsOldestAtCapacity(t *testing.T) { // newer, filling capacity beyond maxEntries. oldest := uuid.New() oldestRec := uuid.New() - tr.Record(oldest, oldestRec, []uuid.UUID{oldestRec}) + tr.Record(oldest, oldestRec, []uuid.UUID{oldestRec}, 1) - for i := 0; i < maxEntries; i++ { + var survivor, survivorRec uuid.UUID + for range maxEntries { clock.Advance(time.Second) chat, rec := uuid.New(), uuid.New() - tr.Record(chat, rec, []uuid.UUID{rec}) + tr.Record(chat, rec, []uuid.UUID{rec}, 1) + survivor, survivorRec = chat, rec } - // The oldest entry was evicted to keep the map bounded; newer entries - // within TTL remain classifiable. + // The oldest entry was evicted to keep the map bounded, while a newer + // entry recorded within TTL remains classifiable. require.Equal(t, recommendationFollowupNoRecord, tr.Classify(oldest, oldestRec)) + require.Equal(t, recommendationFollowupAccepted, tr.Classify(survivor, survivorRec)) } diff --git a/docs/admin/integrations/prometheus.md b/docs/admin/integrations/prometheus.md index 8eda66a5112e9..84efe8508b63d 100644 --- a/docs/admin/integrations/prometheus.md +++ b/docs/admin/integrations/prometheus.md @@ -220,7 +220,7 @@ deployment. They will always be available from the agent. | `coderd_chatd_steps_total` | counter | Total agentic loop steps across all chats. | `model` `provider` | | `coderd_chatd_stream_buffer_dropped_total` | counter | Number of chat stream buffer events dropped due to the per-chat buffer cap. | | | `coderd_chatd_stream_retries_total` | counter | Total LLM stream retries. | `chain_broken` `kind` `model` `provider` | -| `coderd_chatd_template_recommendation_followup_total` | counter | Total create_workspace calls by how the chosen template related to the prior list_templates recommendation. | `outcome` | +| `coderd_chatd_template_recommendation_followup_total` | counter | Total create_workspace calls by how the chosen template related to the prior list_templates recommendation (accepted_recommendation, overrode_with_listed_template, created_listed_without_recommendation, created_unlisted_template, no_recent_list_templates). | `outcome` | | `coderd_chatd_tool_errors_total` | counter | Total tool calls that returned an error result. | `model` `provider` `tool_name` | | `coderd_chatd_tool_result_size_bytes` | histogram | Size in bytes of each tool execution result. | `model` `provider` `tool_name` | | `coderd_chatd_ttft_seconds` | histogram | Time-to-first-token: wall time from LLM request to first streamed chunk. | `model` `provider` | diff --git a/scripts/metricsdocgen/generated_metrics b/scripts/metricsdocgen/generated_metrics index 2fa0f357593be..9029d13096303 100644 --- a/scripts/metricsdocgen/generated_metrics +++ b/scripts/metricsdocgen/generated_metrics @@ -262,7 +262,7 @@ coderd_chatd_stream_buffer_dropped_total 0 # HELP coderd_chatd_stream_retries_total Total LLM stream retries. # TYPE coderd_chatd_stream_retries_total counter coderd_chatd_stream_retries_total{provider="",model="",kind="",chain_broken=""} 0 -# HELP coderd_chatd_template_recommendation_followup_total Total create_workspace calls by how the chosen template related to the prior list_templates recommendation. +# HELP coderd_chatd_template_recommendation_followup_total Total create_workspace calls by how the chosen template related to the prior list_templates recommendation (accepted_recommendation, overrode_with_listed_template, created_listed_without_recommendation, created_unlisted_template, no_recent_list_templates). # TYPE coderd_chatd_template_recommendation_followup_total counter coderd_chatd_template_recommendation_followup_total{outcome=""} 0 # HELP coderd_chatd_tool_errors_total Total tool calls that returned an error result. From ab4fb755816a692baed3d118f75272287f141652 Mon Sep 17 00:00:00 2001 From: Jaayden Halko Date: Thu, 18 Jun 2026 12:34:13 +0000 Subject: [PATCH 21/21] fix(coderd/x/chatd): address round 2 telemetry review nits - Reword recordListTemplatesTelemetry doc: the affinity score is "not included in the tool result shown to the model" rather than "stays internal", which misread as not logged (CRF-13). - Drop the redundant "list_templates ranking telemetry" section heading in the Metrics struct; the field names already convey it (CRF-14). - Document the intentional query-tie recommendation rationale: a query match is the baseline confidence signal there, so affinity only breaks the tie and need not clear the no-query affinity floor (Codex P2). --- coderd/x/chatd/chatloop/metrics.go | 1 - coderd/x/chatd/chattool/listtemplates.go | 10 +++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/coderd/x/chatd/chatloop/metrics.go b/coderd/x/chatd/chatloop/metrics.go index 5d44e138358a4..94b7fb42dbd02 100644 --- a/coderd/x/chatd/chatloop/metrics.go +++ b/coderd/x/chatd/chatloop/metrics.go @@ -39,7 +39,6 @@ type Metrics struct { StreamRetriesTotal *prometheus.CounterVec StreamBufferDroppedTotal prometheus.Counter - // list_templates ranking telemetry. ListTemplatesOutcomeTotal *prometheus.CounterVec ListTemplatesSignalsFailuresTotal prometheus.Counter ListTemplatesAffinityGap *prometheus.HistogramVec diff --git a/coderd/x/chatd/chattool/listtemplates.go b/coderd/x/chatd/chattool/listtemplates.go index 7244d988bc946..69e96f1a11e7d 100644 --- a/coderd/x/chatd/chattool/listtemplates.go +++ b/coderd/x/chatd/chattool/listtemplates.go @@ -401,7 +401,11 @@ func selectTemplateRecommendation( return uuid.Nil, NextStepAskUser, recommendationReasonSignalsUnavailable } - // Query tie: break it with a clear affinity gap. + // Query tie: both candidates matched the query at the same relevance tier, + // so the query itself is the baseline confidence signal and affinity only + // breaks the tie. A clear affinity gap is enough here; unlike the no-query + // branch below, the top score need not clear minConfidentAffinityScore on + // its own. if top.QueryScore > 0 { if len(ranked) > 1 && affinityScoreAtLeast(top.AffinityScore-ranked[1].AffinityScore, minConfidentGap) { return top.Template.ID, NextStepUseRecommended, recommendationReasonQueryTieConfident @@ -455,8 +459,8 @@ type listTemplatesTelemetry struct { // recordListTemplatesTelemetry records the aggregate ranking metrics and emits // the structured decision log. The raw user query text is never logged: only // its presence and length are, to avoid leaking task content. The affinity -// score itself stays internal; the log carries the inputs (scores, gap, -// thresholds) so a decision is reconstructable. +// score is not included in the tool result shown to the model; the log records +// the inputs (scores, gap, thresholds) so a decision is reconstructable. func recordListTemplatesTelemetry( ctx context.Context, options ListTemplatesOptions,