From dd9a1ee04b7a7e2bb6058da2aa21c9c267323948 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Mon, 15 Jun 2026 22:19:22 +0000 Subject: [PATCH 1/2] feat: integrate agent context snapshots into chats Makes the chat context foundation (#26385) live. Agent context pushes now hydrate bound chats and mark them dirty on drift, chat creation pins the agent's latest snapshot, the experimental chat API reports the pinned context state, and a new refresh endpoint re-pins the latest snapshot and clears the dirty marker. - coderd/x/chatd/context_hydration.go: HydrateAndMarkChatsDirty (push-time hydrate + dirty fan-out, publishes context_dirty after commit), hydrateChatContextOnCreate (create-time pin, best-effort), and RefreshChatContext (re-pin latest snapshot + clear dirty). - coderd/workspaceagentsrpc.go: wire the chat daemon in as the agentapi ContextDirtyMarker, nil-guarded so a disabled daemon stays a pure write path. - codersdk + db2sdk: expose Chat.Context (*ChatContext) carrying dirty state and snapshot error; re-add the context_dirty watch event. - coderd: PUT /api/experimental/chats/{chat}/context refresh endpoint. context_dirty_resources stays NULL and the live per-turn context pull is unchanged. Adds an end-to-end test where an echo-provisioned agent pushes an initial snapshot (hydrate), re-pushes a different hash (drift -> dirty), the API reports dirty, and refresh clears it. --- coderd/apidoc/docs.go | 65 ++++++++- coderd/apidoc/swagger.json | 63 ++++++++- coderd/coderd.go | 1 + coderd/database/db2sdk/db2sdk.go | 13 ++ coderd/database/db2sdk/db2sdk_test.go | 5 + coderd/exp_chats.go | 32 +++++ coderd/workspaceagentsrpc.go | 3 + coderd/x/chatd/chatd.go | 5 + coderd/x/chatd/context_hydration.go | 151 +++++++++++++++++++++ coderd/x/chatd/context_integration_test.go | 124 +++++++++++++++++ codersdk/chats.go | 41 +++++- docs/reference/api/chats.md | 97 +++++++++++++ docs/reference/api/schemas.md | 40 +++++- site/src/api/typesGenerated.ts | 31 +++++ 14 files changed, 662 insertions(+), 9 deletions(-) create mode 100644 coderd/x/chatd/context_hydration.go create mode 100644 coderd/x/chatd/context_integration_test.go diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 78f7af39c040b..79cd7d837c4bd 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -549,6 +549,39 @@ const docTemplate = `{ } } }, + "/api/experimental/chats/{chat}/context": { + "put": { + "description": "Experimental: this endpoint is subject to change.", + "tags": [ + "Chats" + ], + "summary": "Refresh chat context", + "operationId": "refresh-chat-context", + "parameters": [ + { + "type": "string", + "format": "uuid", + "description": "Chat ID", + "name": "chat", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/codersdk.Chat" + } + } + }, + "security": [ + { + "CoderSessionToken": [] + } + ] + } + }, "/api/experimental/chats/{chat}/diff": { "get": { "description": "Experimental: this endpoint is subject to change.", @@ -16387,6 +16420,14 @@ const docTemplate = `{ "client_type": { "$ref": "#/definitions/codersdk.ChatClientType" }, + "context": { + "description": "Context reports the chat's pinned workspace-context state and\nwhether it has drifted from the agent's latest pushed snapshot.\nNil when the chat has no pinned context yet.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.ChatContext" + } + ] + }, "created_at": { "type": "string", "format": "date-time" @@ -16542,6 +16583,24 @@ const docTemplate = `{ } } }, + "codersdk.ChatContext": { + "type": "object", + "properties": { + "dirty": { + "description": "Dirty is true when the agent's latest snapshot hash differs from the\nchat's pinned hash.", + "type": "boolean" + }, + "dirty_since": { + "description": "DirtySince is when drift was first detected; nil when not dirty.", + "type": "string", + "format": "date-time" + }, + "error": { + "description": "Error is the snapshot-level error copied from the pinned snapshot\n(empty when healthy).", + "type": "string" + } + } + }, "codersdk.ChatDiffContents": { "type": "object", "properties": { @@ -17481,7 +17540,8 @@ const docTemplate = `{ "created", "deleted", "diff_status_change", - "action_required" + "action_required", + "context_dirty" ], "x-enum-varnames": [ "ChatWatchEventKindStatusChange", @@ -17490,7 +17550,8 @@ const docTemplate = `{ "ChatWatchEventKindCreated", "ChatWatchEventKindDeleted", "ChatWatchEventKindDiffStatusChange", - "ChatWatchEventKindActionRequired" + "ChatWatchEventKindActionRequired", + "ChatWatchEventKindContextDirty" ] }, "codersdk.ConnectionLatency": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 10677d22b6b4b..5c26c1df24868 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -482,6 +482,37 @@ } } }, + "/api/experimental/chats/{chat}/context": { + "put": { + "description": "Experimental: this endpoint is subject to change.", + "tags": ["Chats"], + "summary": "Refresh chat context", + "operationId": "refresh-chat-context", + "parameters": [ + { + "type": "string", + "format": "uuid", + "description": "Chat ID", + "name": "chat", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/codersdk.Chat" + } + } + }, + "security": [ + { + "CoderSessionToken": [] + } + ] + } + }, "/api/experimental/chats/{chat}/diff": { "get": { "description": "Experimental: this endpoint is subject to change.", @@ -14707,6 +14738,14 @@ "client_type": { "$ref": "#/definitions/codersdk.ChatClientType" }, + "context": { + "description": "Context reports the chat's pinned workspace-context state and\nwhether it has drifted from the agent's latest pushed snapshot.\nNil when the chat has no pinned context yet.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.ChatContext" + } + ] + }, "created_at": { "type": "string", "format": "date-time" @@ -14850,6 +14889,24 @@ } } }, + "codersdk.ChatContext": { + "type": "object", + "properties": { + "dirty": { + "description": "Dirty is true when the agent's latest snapshot hash differs from the\nchat's pinned hash.", + "type": "boolean" + }, + "dirty_since": { + "description": "DirtySince is when drift was first detected; nil when not dirty.", + "type": "string", + "format": "date-time" + }, + "error": { + "description": "Error is the snapshot-level error copied from the pinned snapshot\n(empty when healthy).", + "type": "string" + } + } + }, "codersdk.ChatDiffContents": { "type": "object", "properties": { @@ -15759,7 +15816,8 @@ "created", "deleted", "diff_status_change", - "action_required" + "action_required", + "context_dirty" ], "x-enum-varnames": [ "ChatWatchEventKindStatusChange", @@ -15768,7 +15826,8 @@ "ChatWatchEventKindCreated", "ChatWatchEventKindDeleted", "ChatWatchEventKindDiffStatusChange", - "ChatWatchEventKindActionRequired" + "ChatWatchEventKindActionRequired", + "ChatWatchEventKindContextDirty" ] }, "codersdk.ConnectionLatency": { diff --git a/coderd/coderd.go b/coderd/coderd.go index d492dfc96067b..810a7ef49ae0c 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -1360,6 +1360,7 @@ func New(options *Options) *API { r.Post("/title/regenerate", api.regenerateChatTitle) r.Post("/title/propose", api.proposeChatTitle) r.Get("/diff", api.getChatDiffContents) + r.Put("/context", api.refreshChatContext) r.Route("/queue/{queuedMessage}", func(r chi.Router) { r.Delete("/", api.deleteChatQueuedMessage) r.Post("/promote", api.promoteChatQueuedMessage) diff --git a/coderd/database/db2sdk/db2sdk.go b/coderd/database/db2sdk/db2sdk.go index c4203ff2cc9b3..94039fcc6660b 100644 --- a/coderd/database/db2sdk/db2sdk.go +++ b/coderd/database/db2sdk/db2sdk.go @@ -1755,6 +1755,19 @@ func Chat(c database.Chat, diffStatus *database.ChatDiffStatus, files []database chat.LastInjectedContext = parts } } + // Report pinned-context state when the chat is context-tracked + // (has a pinned hash), dirty, or carries a snapshot error. + if len(c.ContextAggregateHash) > 0 || c.ContextDirtySince.Valid || c.ContextError != "" { + chatContext := &codersdk.ChatContext{ + Dirty: c.ContextDirtySince.Valid, + Error: c.ContextError, + } + if c.ContextDirtySince.Valid { + dirtySince := c.ContextDirtySince.Time + chatContext.DirtySince = &dirtySince + } + chat.Context = chatContext + } return chat } diff --git a/coderd/database/db2sdk/db2sdk_test.go b/coderd/database/db2sdk/db2sdk_test.go index 284cdd88d121c..bd82c2b5b3567 100644 --- a/coderd/database/db2sdk/db2sdk_test.go +++ b/coderd/database/db2sdk/db2sdk_test.go @@ -731,6 +731,11 @@ func TestChat_AllFieldsPopulated(t *testing.T) { RawMessage: json.RawMessage(`[{"name":"tool1","description":"test tool","inputSchema":{"type":"object"}}]`), Valid: true, }, + // Pinned-context columns drive codersdk.Chat.Context. Set all of + // them so the converted sub-struct's fields are non-zero too. + ContextAggregateHash: []byte{0x01, 0x02, 0x03}, + ContextDirtySince: sql.NullTime{Time: now, Valid: true}, + ContextError: "context boom", } // Only ChatID is needed here. This test checks that // Chat.DiffStatus is non-nil, not that every DiffStatus diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index 49967ada83ba7..84089005bec60 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -2618,6 +2618,38 @@ func (api *API) applyChatTitleUpdate( return updatedChat, false } +// refreshChatContext re-pins a chat to its agent's latest context snapshot +// and clears the dirty marker. +// +// @Summary Refresh chat context +// @ID refresh-chat-context +// @Security CoderSessionToken +// @Tags Chats +// @Param chat path string true "Chat ID" format(uuid) +// @Success 200 {object} codersdk.Chat +// @Router /api/experimental/chats/{chat}/context [put] +// @Description Experimental: this endpoint is subject to change. +func (api *API) refreshChatContext(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + chat := httpmw.ChatParam(r) + + if !api.Authorize(r, policy.ActionUpdate, chat.RBACObject()) { + httpapi.ResourceNotFound(rw) + return + } + + updated, err := api.chatDaemon.RefreshChatContext(ctx, chat) + if err != nil { + httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ + Message: "Internal error refreshing chat context.", + Detail: err.Error(), + }) + return + } + + httpapi.Write(ctx, rw, http.StatusOK, db2sdk.Chat(updated, nil, nil)) +} + // patchChat updates a chat resource. Supports updating labels, // workspace binding, archiving, pinning, and pinned-chat ordering. // diff --git a/coderd/workspaceagentsrpc.go b/coderd/workspaceagentsrpc.go index 22b33b91f15a0..d01dec9cb8f26 100644 --- a/coderd/workspaceagentsrpc.go +++ b/coderd/workspaceagentsrpc.go @@ -180,6 +180,9 @@ func (api *API) workspaceAgentRPC(rw http.ResponseWriter, r *http.Request) { // Optional: UpdateAgentMetricsFn: api.UpdateAgentMetrics, + // chatDaemon is always constructed (only its worker is gated), so + // this is non-nil; agentapi treats a nil marker as "chatd absent". + ContextDirtyMarker: api.chatDaemon, }, workspace, workspaceAgent) streamID := tailnet.StreamID{ diff --git a/coderd/x/chatd/chatd.go b/coderd/x/chatd/chatd.go index af448f23c3c2d..7ebc124e79ffe 100644 --- a/coderd/x/chatd/chatd.go +++ b/coderd/x/chatd/chatd.go @@ -1457,6 +1457,11 @@ func (p *Server) CreateChat(ctx context.Context, opts CreateOptions) (database.C // committed and emitted its own state-machine notifications. The // watch endpoint is maintained separately from chatstate notifications. p.publishChatPubsubEvent(chat, codersdk.ChatWatchEventKindCreated, nil) + + // Pin the chat to the agent's latest context snapshot if one exists. + // Best-effort: a chat created before its agent has pushed is hydrated + // by that agent's next push. + p.hydrateChatContextOnCreate(ctx, chat) return chat, nil } diff --git a/coderd/x/chatd/context_hydration.go b/coderd/x/chatd/context_hydration.go new file mode 100644 index 0000000000000..79c7b39e8ed29 --- /dev/null +++ b/coderd/x/chatd/context_hydration.go @@ -0,0 +1,151 @@ +package chatd + +import ( + "context" + "database/sql" + "errors" + "time" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog/v3" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/codersdk" +) + +// HydrateAndMarkChatsDirty implements agentapi.ContextDirtyMarker. It runs +// inside the PushContextState transaction: it stamps the pushed snapshot hash +// on chats for the agent that have not been hydrated yet (no dirty event), +// then flips already-pinned chats whose hash differs to dirty. It returns a +// callback that publishes the dirty watch events; the caller invokes it only +// after the transaction commits. +// +// The pinned hash on dirtied chats is intentionally left unchanged; the +// refresh endpoint re-pins it. The writes run as the chatd system subject +// because an agent does not own the chats bound to it. +func (p *Server) HydrateAndMarkChatsDirty(ctx context.Context, tx database.Store, agentID uuid.UUID, aggregateHash []byte, snapshotError string, now time.Time) (func(), error) { + //nolint:gocritic // An agent does not own the chats bound to it, so the + // daemon subject stamps and dirties them. + ctx = dbauthz.AsChatd(ctx) + + // Chats created before the agent finished its initial push land with a + // NULL pinned hash. Stamp them now so they start clean. This is their + // first hydration, so no dirty event is emitted. + if err := tx.HydrateAgentChatsContext(ctx, database.HydrateAgentChatsContextParams{ + AgentID: agentID, + AggregateHash: aggregateHash, + ContextError: snapshotError, + }); err != nil { + return nil, xerrors.Errorf("hydrate agent chats context: %w", err) + } + + flipped, err := tx.MarkChatsContextDirtyByAgent(ctx, database.MarkChatsContextDirtyByAgentParams{ + AgentID: agentID, + AggregateHash: aggregateHash, + DirtySince: sql.NullTime{Time: now, Valid: true}, + }) + if err != nil { + return nil, xerrors.Errorf("mark chats context dirty: %w", err) + } + if len(flipped) == 0 { + return func() {}, nil + } + + // Resolve the expanded chat rows for the flipped IDs so the watch + // events carry a complete chat payload. Read within the same tx for a + // consistent view of the chats just flipped. + active, err := tx.GetActiveChatsByAgentID(ctx, agentID) + if err != nil { + return nil, xerrors.Errorf("get active chats by agent: %w", err) + } + flippedIDs := make(map[uuid.UUID]struct{}, len(flipped)) + for _, f := range flipped { + flippedIDs[f.ID] = struct{}{} + } + dirtyChats := make([]database.Chat, 0, len(flipped)) + for _, chat := range active { + if _, ok := flippedIDs[chat.ID]; ok { + dirtyChats = append(dirtyChats, chat) + } + } + + return func() { + for _, chat := range dirtyChats { + p.publishChatPubsubEvent(chat, codersdk.ChatWatchEventKindContextDirty, nil) + } + }, nil +} + +// hydrateChatContextOnCreate pins a newly created chat to its agent's latest +// context snapshot when one already exists. Best-effort: a chat whose agent +// has not pushed yet is hydrated later by that agent's next push. Failures +// are logged and swallowed so they never block chat creation; the columns +// are dark and do not affect chat behavior. +func (p *Server) hydrateChatContextOnCreate(ctx context.Context, chat database.Chat) { + if !chat.AgentID.Valid { + return + } + //nolint:gocritic // Chatd is internal, not a user; it reads the agent + // snapshot and stamps the chat as the daemon subject. + ctx = dbauthz.AsChatd(ctx) + snapshot, err := p.db.GetLatestWorkspaceAgentContextSnapshot(ctx, chat.AgentID.UUID) + switch { + case errors.Is(err, sql.ErrNoRows): + return + case err != nil: + p.logger.Warn(ctx, "hydrate chat context on create: get latest snapshot", + slog.F("chat_id", chat.ID), slog.Error(err)) + return + } + if err := p.db.SetChatContextSnapshot(ctx, database.SetChatContextSnapshotParams{ + ID: chat.ID, + AggregateHash: snapshot.AggregateHash, + ContextError: snapshot.SnapshotError, + }); err != nil { + p.logger.Warn(ctx, "hydrate chat context on create: set snapshot", + slog.F("chat_id", chat.ID), slog.Error(err)) + } +} + +// RefreshChatContext re-pins a chat to its agent's latest context snapshot and +// clears the dirty marker. It backs PUT /chats/{chat}/context (no body). A +// chat with no bound agent, or whose agent has no snapshot, simply has its +// pinned hash and dirty marker cleared. +func (p *Server) RefreshChatContext(ctx context.Context, chat database.Chat) (database.Chat, error) { + //nolint:gocritic // Chatd is internal, not a user; it re-pins the chat to + // the agent snapshot as the daemon subject. + ctx = dbauthz.AsChatd(ctx) + + var ( + aggregateHash []byte + snapshotError string + ) + if chat.AgentID.Valid { + snapshot, err := p.db.GetLatestWorkspaceAgentContextSnapshot(ctx, chat.AgentID.UUID) + switch { + case errors.Is(err, sql.ErrNoRows): + // No snapshot yet; clear the pinned hash and dirty marker. + case err != nil: + return database.Chat{}, xerrors.Errorf("get latest snapshot: %w", err) + default: + aggregateHash = snapshot.AggregateHash + snapshotError = snapshot.SnapshotError + } + } + + if err := p.db.SetChatContextSnapshot(ctx, database.SetChatContextSnapshotParams{ + ID: chat.ID, + AggregateHash: aggregateHash, + ContextError: snapshotError, + }); err != nil { + return database.Chat{}, xerrors.Errorf("set chat context snapshot: %w", err) + } + + updated, err := p.db.GetChatByID(ctx, chat.ID) + if err != nil { + return database.Chat{}, xerrors.Errorf("get chat after refresh: %w", err) + } + return updated, nil +} diff --git a/coderd/x/chatd/context_integration_test.go b/coderd/x/chatd/context_integration_test.go new file mode 100644 index 0000000000000..c12154e399252 --- /dev/null +++ b/coderd/x/chatd/context_integration_test.go @@ -0,0 +1,124 @@ +package chatd_test + +import ( + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + + agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/coderdtest" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/agentsdk" + "github.com/coder/coder/v2/provisioner/echo" + "github.com/coder/coder/v2/testutil" +) + +// TestChatContextDirtyFromAgentPush is an end-to-end check of the chat +// context integration. An echo-provisioned workspace agent pushes a context +// snapshot that hydrates a bound chat; a later push with a different hash +// marks the chat dirty; the experimental API reports the dirty state; and the +// refresh endpoint re-pins the latest snapshot and clears it. +func TestChatContextDirtyFromAgentPush(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{ + DeploymentValues: directChatRoutingDeploymentValues(t), + IncludeProvisionerDaemon: true, + }) + user := coderdtest.CreateFirstUser(t, client) + expClient := codersdk.NewExperimentalClient(client) + + // Build a workspace with an agent via the echo provisioner. + agentToken := uuid.NewString() + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.PlanComplete, + ProvisionApply: echo.ApplyComplete, + ProvisionGraph: echo.ProvisionGraphWithAgent(agentToken), + }) + coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, client, template.ID) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + + ws, err := client.Workspace(ctx, workspace.ID) + require.NoError(t, err) + require.Len(t, ws.LatestBuild.Resources, 1) + require.Len(t, ws.LatestBuild.Resources[0].Agents, 1) + agentID := ws.LatestBuild.Resources[0].Agents[0].ID + + // A chat bound to the agent. In production agent_id is set lazily during + // a workspace turn (chatd.persistBuildAgentBinding); bind it directly here + // so the test exercises the context flow rather than turn resolution. + // dbgen.ChatModelConfig provisions an AI provider as needed so the chat's + // last_model_config_id foreign key is satisfied. + model := dbgen.ChatModelConfig(t, db, database.ChatModelConfig{}) + chat := dbgen.Chat(t, db, database.Chat{ + OrganizationID: user.OrganizationID, + OwnerID: user.UserID, + WorkspaceID: uuid.NullUUID{UUID: workspace.ID, Valid: true}, + AgentID: uuid.NullUUID{UUID: agentID, Valid: true}, + LastModelConfigID: model.ID, + Status: database.ChatStatusWaiting, + }) + + // Before any push there is no pinned context. + got, err := expClient.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.Nil(t, got.Context, "no pinned context before the first push") + + // Connect as the agent and push the initial snapshot. The push runs the + // hydrate/dirty fan-out synchronously inside its transaction, so the chat + // reflects the change by the time the RPC returns. + agentClient := agentsdk.New(client.URL, agentsdk.WithFixedToken(agentToken)) + aAPI, _, err := agentClient.ConnectRPC210(ctx) + require.NoError(t, err) + defer func() { _ = aAPI.DRPCConn().Close() }() + + hashA := []byte{0x01, 0x02, 0x03} + resp, err := aAPI.PushContextState(ctx, &agentproto.PushContextStateRequest{ + Version: 1, + Initial: true, + AggregateHash: hashA, + }) + require.NoError(t, err) + require.True(t, resp.GetAccepted()) + + // The initial push hydrates the chat to a clean (not dirty) context. + got, err = expClient.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.NotNil(t, got.Context, "chat should be hydrated after the initial push") + require.False(t, got.Context.Dirty, "initial hydration is clean") + require.Nil(t, got.Context.DirtySince) + + // The agent refreshes its context and pushes a different hash, which + // drifts from the pinned hash and marks the chat dirty. + hashB := []byte{0x04, 0x05, 0x06} + resp, err = aAPI.PushContextState(ctx, &agentproto.PushContextStateRequest{ + Version: 2, + AggregateHash: hashB, + }) + require.NoError(t, err) + require.True(t, resp.GetAccepted()) + + got, err = expClient.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.NotNil(t, got.Context) + require.True(t, got.Context.Dirty, "drift should mark the chat dirty") + require.NotNil(t, got.Context.DirtySince) + + // Refreshing re-pins the latest snapshot and clears the dirty marker. + refreshed, err := expClient.RefreshChatContext(ctx, chat.ID) + require.NoError(t, err) + require.NotNil(t, refreshed.Context) + require.False(t, refreshed.Context.Dirty, "refresh clears the dirty marker") + + got, err = expClient.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.NotNil(t, got.Context) + require.False(t, got.Context.Dirty) +} diff --git a/codersdk/chats.go b/codersdk/chats.go index f07c1889be550..06ccdc1e5c40e 100644 --- a/codersdk/chats.go +++ b/codersdk/chats.go @@ -142,8 +142,12 @@ type Chat struct { // is updated only when context changes, on first workspace // attach or agent change. LastInjectedContext []ChatMessagePart `json:"last_injected_context,omitempty"` - Warnings []string `json:"warnings,omitempty"` - ClientType ChatClientType `json:"client_type"` + // Context reports the chat's pinned workspace-context state and + // whether it has drifted from the agent's latest pushed snapshot. + // Nil when the chat has no pinned context yet. + Context *ChatContext `json:"context,omitempty"` + Warnings []string `json:"warnings,omitempty"` + ClientType ChatClientType `json:"client_type"` // Children holds child (subagent) chats nested under this root // chat. Always initialized to an empty slice so the JSON field // is present as []. Child chats cannot create their own @@ -152,6 +156,20 @@ type Chat struct { Children []Chat `json:"children"` } +// ChatContext reports a chat's pinned workspace context and whether it has +// drifted from the agent's latest pushed snapshot. The chat stays usable +// when dirty; refreshing re-pins it to the latest snapshot. +type ChatContext struct { + // Dirty is true when the agent's latest snapshot hash differs from the + // chat's pinned hash. + Dirty bool `json:"dirty"` + // DirtySince is when drift was first detected; nil when not dirty. + DirtySince *time.Time `json:"dirty_since,omitempty" format:"date-time"` + // Error is the snapshot-level error copied from the pinned snapshot + // (empty when healthy). + Error string `json:"error,omitempty"` +} + // ChatFileMetadata contains lightweight metadata about a file // associated with a chat, excluding the file content itself. type ChatFileMetadata struct { @@ -1692,6 +1710,10 @@ const ( ChatWatchEventKindDeleted ChatWatchEventKind = "deleted" ChatWatchEventKindDiffStatusChange ChatWatchEventKind = "diff_status_change" ChatWatchEventKindActionRequired ChatWatchEventKind = "action_required" + // ChatWatchEventKindContextDirty signals that the chat's pinned + // workspace context drifted from the agent's latest pushed snapshot. + // The chat stays usable; a refresh re-pins it to the latest snapshot. + ChatWatchEventKindContextDirty ChatWatchEventKind = "context_dirty" ) // ChatWatchEvent represents an event from the global chat watch stream. @@ -3086,6 +3108,21 @@ func (c *ExperimentalClient) GetChat(ctx context.Context, chatID uuid.UUID) (Cha return chat, json.NewDecoder(res.Body).Decode(&chat) } +// RefreshChatContext re-pins the chat to its agent's latest context snapshot +// and clears the dirty marker. The request takes no body. +func (c *ExperimentalClient) RefreshChatContext(ctx context.Context, chatID uuid.UUID) (Chat, error) { + res, err := c.Request(ctx, http.MethodPut, fmt.Sprintf("/api/experimental/chats/%s/context", chatID), nil) + if err != nil { + return Chat{}, err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return Chat{}, ReadBodyAsError(res) + } + var chat Chat + return chat, json.NewDecoder(res.Body).Decode(&chat) +} + func (c *ExperimentalClient) GetChatACL(ctx context.Context, chatID uuid.UUID) (ChatACL, error) { res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/experimental/chats/%s/acl", chatID), nil) if err != nil { diff --git a/docs/reference/api/chats.md b/docs/reference/api/chats.md index 7047299d3f7ea..602a191db5c66 100644 --- a/docs/reference/api/chats.md +++ b/docs/reference/api/chats.md @@ -36,6 +36,11 @@ Experimental: this endpoint is subject to change. {} ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -189,6 +194,10 @@ Status Code **200** | `» build_id` | string(uuid) | false | | | | `» children` | [codersdk.Chat](schemas.md#codersdkchat) | false | | Children holds child (subagent) chats nested under this root chat. Always initialized to an empty slice so the JSON field is present as []. Child chats cannot create their own subagents, so nesting depth is capped at 1 and this slice is always empty for child chats. | | `» client_type` | [codersdk.ChatClientType](schemas.md#codersdkchatclienttype) | false | | | +| `» context` | [codersdk.ChatContext](schemas.md#codersdkchatcontext) | false | | Context reports the chat's pinned workspace-context state and whether it has drifted from the agent's latest pushed snapshot. Nil when the chat has no pinned context yet. | +| `»» dirty` | boolean | false | | Dirty is true when the agent's latest snapshot hash differs from the chat's pinned hash. | +| `»» dirty_since` | string(date-time) | false | | Dirty since is when drift was first detected; nil when not dirty. | +| `»» error` | string | false | | Error is the snapshot-level error copied from the pinned snapshot (empty when healthy). | | `» created_at` | string(date-time) | false | | | | `» diff_status` | [codersdk.ChatDiffStatus](schemas.md#codersdkchatdiffstatus) | false | | | | `»» additions` | integer | false | | | @@ -382,6 +391,11 @@ Experimental: this endpoint is subject to change. "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -516,6 +530,11 @@ Experimental: this endpoint is subject to change. } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -801,6 +820,11 @@ Experimental: this endpoint is subject to change. {} ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -989,6 +1013,11 @@ Experimental: this endpoint is subject to change. "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -1123,6 +1152,11 @@ Experimental: this endpoint is subject to change. } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -1311,6 +1345,39 @@ Experimental: this endpoint is subject to change. To perform this operation, you must be authenticated. [Learn more](authentication.md). +## Refresh chat context + +### Code samples + +```shell +# Example request using curl +curl -X PUT http://coder-server:8080/api/experimental/chats/{chat}/context \ + -H 'Accept: */*' \ + -H 'Coder-Session-Token: API_KEY' +``` + +`PUT /api/experimental/chats/{chat}/context` + +Experimental: this endpoint is subject to change. + +### Parameters + +| Name | In | Type | Required | Description | +|--------|------|--------------|----------|-------------| +| `chat` | path | string(uuid) | true | Chat ID | + +### Example responses + +> 200 Response + +### Responses + +| Status | Meaning | Description | Schema | +|--------|---------------------------------------------------------|-------------|------------------------------------------| +| 200 | [OK](https://tools.ietf.org/html/rfc7231#section-6.3.1) | OK | [codersdk.Chat](schemas.md#codersdkchat) | + +To perform this operation, you must be authenticated. [Learn more](authentication.md). + ## Get chat diff contents ### Code samples @@ -1392,6 +1459,11 @@ Experimental: this endpoint is subject to change. "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -1526,6 +1598,11 @@ Experimental: this endpoint is subject to change. } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -2316,6 +2393,11 @@ Experimental: this endpoint is subject to change. "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -2450,6 +2532,11 @@ Experimental: this endpoint is subject to change. } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -2998,6 +3085,11 @@ Experimental: this endpoint is subject to change. "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -3132,6 +3224,11 @@ Experimental: this endpoint is subject to change. } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, diff --git a/docs/reference/api/schemas.md b/docs/reference/api/schemas.md index 28e697142358f..e5ef5a65b36ab 100644 --- a/docs/reference/api/schemas.md +++ b/docs/reference/api/schemas.md @@ -1925,6 +1925,11 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", "children": [], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -2059,6 +2064,11 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in } ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -2202,6 +2212,7 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `build_id` | string | false | | | | `children` | array of [codersdk.Chat](#codersdkchat) | false | | Children holds child (subagent) chats nested under this root chat. Always initialized to an empty slice so the JSON field is present as []. Child chats cannot create their own subagents, so nesting depth is capped at 1 and this slice is always empty for child chats. | | `client_type` | [codersdk.ChatClientType](#codersdkchatclienttype) | false | | | +| `context` | [codersdk.ChatContext](#codersdkchatcontext) | false | | Context reports the chat's pinned workspace-context state and whether it has drifted from the agent's latest pushed snapshot. Nil when the chat has no pinned context yet. | | `created_at` | string | false | | | | `diff_status` | [codersdk.ChatDiffStatus](#codersdkchatdiffstatus) | false | | | | `files` | array of [codersdk.ChatFileMetadata](#codersdkchatfilemetadata) | false | | | @@ -2327,6 +2338,24 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in | `acquire_batch_size` | integer | false | | | | `debug_logging_enabled` | boolean | false | | | +## codersdk.ChatContext + +```json +{ + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +|---------------|---------|----------|--------------|------------------------------------------------------------------------------------------| +| `dirty` | boolean | false | | Dirty is true when the agent's latest snapshot hash differs from the chat's pinned hash. | +| `dirty_since` | string | false | | Dirty since is when drift was first detected; nil when not dirty. | +| `error` | string | false | | Error is the snapshot-level error copied from the pinned snapshot (empty when healthy). | + ## codersdk.ChatDiffContents ```json @@ -3748,6 +3777,11 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in {} ], "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, "created_at": "2019-08-24T14:15:22Z", "diff_status": { "additions": 0, @@ -3909,9 +3943,9 @@ AuthorizationObject can represent a "set" of objects, such as: all workspaces in #### Enumerated Values -| Value(s) | -|------------------------------------------------------------------------------------------------------------------| -| `action_required`, `created`, `deleted`, `diff_status_change`, `status_change`, `summary_change`, `title_change` | +| Value(s) | +|-----------------------------------------------------------------------------------------------------------------------------------| +| `action_required`, `context_dirty`, `created`, `deleted`, `diff_status_change`, `status_change`, `summary_change`, `title_change` | ## codersdk.ConnectionLatency diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index f63f476f7d777..39105492a5479 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1503,6 +1503,12 @@ export interface Chat { * attach or agent change. */ readonly last_injected_context?: readonly ChatMessagePart[]; + /** + * Context reports the chat's pinned workspace-context state and + * whether it has drifted from the agent's latest pushed snapshot. + * Nil when the chat has no pinned context yet. + */ + readonly context?: ChatContext; readonly warnings?: readonly string[]; readonly client_type: ChatClientType; /** @@ -1587,6 +1593,29 @@ export interface ChatConfig { readonly ai_gateway_routing_enabled: boolean; } +// From codersdk/chats.go +/** + * ChatContext reports a chat's pinned workspace context and whether it has + * drifted from the agent's latest pushed snapshot. The chat stays usable + * when dirty; refreshing re-pins it to the latest snapshot. + */ +export interface ChatContext { + /** + * Dirty is true when the agent's latest snapshot hash differs from the + * chat's pinned hash. + */ + readonly dirty: boolean; + /** + * DirtySince is when drift was first detected; nil when not dirty. + */ + readonly dirty_since?: string; + /** + * Error is the snapshot-level error copied from the pinned snapshot + * (empty when healthy). + */ + readonly error?: string; +} + // From codersdk/chats.go export interface ChatContextFilePart { readonly type: "context-file"; @@ -3037,6 +3066,7 @@ export interface ChatWatchEvent { // From codersdk/chats.go export type ChatWatchEventKind = | "action_required" + | "context_dirty" | "created" | "deleted" | "diff_status_change" @@ -3046,6 +3076,7 @@ export type ChatWatchEventKind = export const ChatWatchEventKinds: ChatWatchEventKind[] = [ "action_required", + "context_dirty", "created", "deleted", "diff_status_change", From bedf763fcd0a622684817df885434bbb133e216a Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Tue, 16 Jun 2026 00:31:19 +0000 Subject: [PATCH 2/2] fix(chatd): address deep-review feedback on chat context - RefreshChatContext now reads the snapshot and re-pins in one repeatable-read transaction, closing a TOCTOU window where a concurrent push could leave the chat pinned to a stale hash with the dirty marker cleared (CRF-4). - hydrateChatContextOnCreate pins via the NULL-guarded HydrateAgentChatsContext so a concurrent push that already hydrated the chat is not clobbered (CRF-6). - The dirty fan-out resolves only the dirtied chats after commit instead of scanning every active chat for the agent (CRF-10), and reuses publishChatPubsubEvents (CRF-15). - Extract a shared latestAgentSnapshot helper (CRF-11), rename flipped->dirtied (CRF-14), align the no-op callback contract with its doc (CRF-13), and trim restated comments (CRF-12). - Add @Produce json to the refresh endpoint (CRF-1). - Tests: exercise the snapshot-error path end-to-end, assert an agent-less chat is untouched by the fan-out, and re-push the pinned hash to prove refresh advanced the pin (CRF-7/8/9); add an internal test for create-time hydration (CRF-2). --- coderd/agentapi/context.go | 2 +- coderd/apidoc/docs.go | 3 + coderd/apidoc/swagger.json | 1 + coderd/exp_chats.go | 1 + coderd/x/chatd/context_hydration.go | 160 ++++++---- .../chatd/context_hydration_internal_test.go | 81 +++++ coderd/x/chatd/context_integration_test.go | 50 ++- docs/reference/api/chats.md | 292 +++++++++++++++++- 8 files changed, 515 insertions(+), 75 deletions(-) create mode 100644 coderd/x/chatd/context_hydration_internal_test.go diff --git a/coderd/agentapi/context.go b/coderd/agentapi/context.go index 605635915a19d..f6dd69e4e3824 100644 --- a/coderd/agentapi/context.go +++ b/coderd/agentapi/context.go @@ -80,7 +80,7 @@ type ContextDirtyMarker interface { // already-pinned chats whose hash differs from aggregateHash. It // returns a callback that publishes the resulting dirty watch events; // the caller invokes it only after the transaction commits. The - // callback is nil when nothing transitioned to dirty. + // callback is a no-op when nothing transitioned to dirty. HydrateAndMarkChatsDirty(ctx context.Context, tx database.Store, agentID uuid.UUID, aggregateHash []byte, snapshotError string, now time.Time) (publishDirty func(), err error) } diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 79cd7d837c4bd..d6a520054e5ae 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -552,6 +552,9 @@ const docTemplate = `{ "/api/experimental/chats/{chat}/context": { "put": { "description": "Experimental: this endpoint is subject to change.", + "produces": [ + "application/json" + ], "tags": [ "Chats" ], diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index 5c26c1df24868..f25b007cdebd7 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -485,6 +485,7 @@ "/api/experimental/chats/{chat}/context": { "put": { "description": "Experimental: this endpoint is subject to change.", + "produces": ["application/json"], "tags": ["Chats"], "summary": "Refresh chat context", "operationId": "refresh-chat-context", diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index 84089005bec60..37a29763d9005 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -2625,6 +2625,7 @@ func (api *API) applyChatTitleUpdate( // @ID refresh-chat-context // @Security CoderSessionToken // @Tags Chats +// @Produce json // @Param chat path string true "Chat ID" format(uuid) // @Success 200 {object} codersdk.Chat // @Router /api/experimental/chats/{chat}/context [put] diff --git a/coderd/x/chatd/context_hydration.go b/coderd/x/chatd/context_hydration.go index 79c7b39e8ed29..e1026e9658207 100644 --- a/coderd/x/chatd/context_hydration.go +++ b/coderd/x/chatd/context_hydration.go @@ -15,24 +15,39 @@ import ( "github.com/coder/coder/v2/codersdk" ) +// latestAgentSnapshot returns the agent's most recent pinned context snapshot. +// ok is false (with a nil error) when the agent has not pushed a snapshot yet. +// It takes a Store so callers can run it against either the daemon database or +// an open transaction. +func latestAgentSnapshot(ctx context.Context, db database.Store, agentID uuid.UUID) (aggregateHash []byte, snapshotError string, ok bool, err error) { + snapshot, err := db.GetLatestWorkspaceAgentContextSnapshot(ctx, agentID) + switch { + case errors.Is(err, sql.ErrNoRows): + return nil, "", false, nil + case err != nil: + return nil, "", false, xerrors.Errorf("get latest snapshot: %w", err) + default: + return snapshot.AggregateHash, snapshot.SnapshotError, true, nil + } +} + // HydrateAndMarkChatsDirty implements agentapi.ContextDirtyMarker. It runs // inside the PushContextState transaction: it stamps the pushed snapshot hash // on chats for the agent that have not been hydrated yet (no dirty event), // then flips already-pinned chats whose hash differs to dirty. It returns a // callback that publishes the dirty watch events; the caller invokes it only -// after the transaction commits. +// after the transaction commits, and the callback is a no-op when nothing +// transitioned to dirty. // // The pinned hash on dirtied chats is intentionally left unchanged; the -// refresh endpoint re-pins it. The writes run as the chatd system subject -// because an agent does not own the chats bound to it. +// refresh endpoint re-pins it. func (p *Server) HydrateAndMarkChatsDirty(ctx context.Context, tx database.Store, agentID uuid.UUID, aggregateHash []byte, snapshotError string, now time.Time) (func(), error) { - //nolint:gocritic // An agent does not own the chats bound to it, so the - // daemon subject stamps and dirties them. + //nolint:gocritic // An agent does not own the chats bound to it. ctx = dbauthz.AsChatd(ctx) - // Chats created before the agent finished its initial push land with a - // NULL pinned hash. Stamp them now so they start clean. This is their - // first hydration, so no dirty event is emitted. + // Chats created before the agent's first push land with a NULL pinned + // hash. Stamp them now so they start clean; this is their first + // hydration, so no dirty event is emitted. if err := tx.HydrateAgentChatsContext(ctx, database.HydrateAgentChatsContextParams{ AgentID: agentID, AggregateHash: aggregateHash, @@ -41,7 +56,7 @@ func (p *Server) HydrateAndMarkChatsDirty(ctx context.Context, tx database.Store return nil, xerrors.Errorf("hydrate agent chats context: %w", err) } - flipped, err := tx.MarkChatsContextDirtyByAgent(ctx, database.MarkChatsContextDirtyByAgentParams{ + dirtied, err := tx.MarkChatsContextDirtyByAgent(ctx, database.MarkChatsContextDirtyByAgentParams{ AgentID: agentID, AggregateHash: aggregateHash, DirtySince: sql.NullTime{Time: now, Valid: true}, @@ -49,62 +64,62 @@ func (p *Server) HydrateAndMarkChatsDirty(ctx context.Context, tx database.Store if err != nil { return nil, xerrors.Errorf("mark chats context dirty: %w", err) } - if len(flipped) == 0 { + if len(dirtied) == 0 { return func() {}, nil } - // Resolve the expanded chat rows for the flipped IDs so the watch - // events carry a complete chat payload. Read within the same tx for a - // consistent view of the chats just flipped. - active, err := tx.GetActiveChatsByAgentID(ctx, agentID) - if err != nil { - return nil, xerrors.Errorf("get active chats by agent: %w", err) - } - flippedIDs := make(map[uuid.UUID]struct{}, len(flipped)) - for _, f := range flipped { - flippedIDs[f.ID] = struct{}{} - } - dirtyChats := make([]database.Chat, 0, len(flipped)) - for _, chat := range active { - if _, ok := flippedIDs[chat.ID]; ok { - dirtyChats = append(dirtyChats, chat) - } + dirtiedIDs := make([]uuid.UUID, len(dirtied)) + for i, d := range dirtied { + dirtiedIDs[i] = d.ID } + // The publish runs after commit, so resolve the dirtied chats then, + // reading only the transitioned IDs rather than scanning every active + // chat for the agent. return func() { - for _, chat := range dirtyChats { - p.publishChatPubsubEvent(chat, codersdk.ChatWatchEventKindContextDirty, nil) + dirtyChats := make([]database.Chat, 0, len(dirtiedIDs)) + for _, id := range dirtiedIDs { + chat, err := p.db.GetChatByID(ctx, id) + if err != nil { + p.logger.Warn(ctx, "publish context dirty: get chat", + slog.F("chat_id", id), slog.Error(err)) + continue + } + dirtyChats = append(dirtyChats, chat) } + p.publishChatPubsubEvents(dirtyChats, codersdk.ChatWatchEventKindContextDirty) }, nil } // hydrateChatContextOnCreate pins a newly created chat to its agent's latest // context snapshot when one already exists. Best-effort: a chat whose agent // has not pushed yet is hydrated later by that agent's next push. Failures -// are logged and swallowed so they never block chat creation; the columns -// are dark and do not affect chat behavior. +// are logged and swallowed so they never block chat creation. +// +// It stamps via the NULL-guarded HydrateAgentChatsContext so a concurrent +// push that already hydrated the chat is not clobbered with a stale hash. func (p *Server) hydrateChatContextOnCreate(ctx context.Context, chat database.Chat) { if !chat.AgentID.Valid { return } - //nolint:gocritic // Chatd is internal, not a user; it reads the agent - // snapshot and stamps the chat as the daemon subject. + //nolint:gocritic // Chatd stamps chats it does not own as the daemon subject. ctx = dbauthz.AsChatd(ctx) - snapshot, err := p.db.GetLatestWorkspaceAgentContextSnapshot(ctx, chat.AgentID.UUID) - switch { - case errors.Is(err, sql.ErrNoRows): - return - case err != nil: + + aggregateHash, snapshotError, ok, err := latestAgentSnapshot(ctx, p.db, chat.AgentID.UUID) + if err != nil { p.logger.Warn(ctx, "hydrate chat context on create: get latest snapshot", slog.F("chat_id", chat.ID), slog.Error(err)) return } - if err := p.db.SetChatContextSnapshot(ctx, database.SetChatContextSnapshotParams{ - ID: chat.ID, - AggregateHash: snapshot.AggregateHash, - ContextError: snapshot.SnapshotError, + if !ok { + return + } + if err := p.db.HydrateAgentChatsContext(ctx, database.HydrateAgentChatsContextParams{ + AgentID: chat.AgentID.UUID, + AggregateHash: aggregateHash, + ContextError: snapshotError, }); err != nil { - p.logger.Warn(ctx, "hydrate chat context on create: set snapshot", + p.logger.Warn(ctx, "hydrate chat context on create: stamp chats", slog.F("chat_id", chat.ID), slog.Error(err)) } } @@ -113,39 +128,48 @@ func (p *Server) hydrateChatContextOnCreate(ctx context.Context, chat database.C // clears the dirty marker. It backs PUT /chats/{chat}/context (no body). A // chat with no bound agent, or whose agent has no snapshot, simply has its // pinned hash and dirty marker cleared. +// +// The snapshot read and the re-pin run in one repeatable-read transaction so a +// concurrent push cannot land between them and leave the chat pinned to a +// stale hash with the dirty marker cleared. func (p *Server) RefreshChatContext(ctx context.Context, chat database.Chat) (database.Chat, error) { - //nolint:gocritic // Chatd is internal, not a user; it re-pins the chat to - // the agent snapshot as the daemon subject. + //nolint:gocritic // Chatd re-pins the chat as the daemon subject. ctx = dbauthz.AsChatd(ctx) - var ( - aggregateHash []byte - snapshotError string - ) - if chat.AgentID.Valid { - snapshot, err := p.db.GetLatestWorkspaceAgentContextSnapshot(ctx, chat.AgentID.UUID) - switch { - case errors.Is(err, sql.ErrNoRows): - // No snapshot yet; clear the pinned hash and dirty marker. - case err != nil: - return database.Chat{}, xerrors.Errorf("get latest snapshot: %w", err) - default: - aggregateHash = snapshot.AggregateHash - snapshotError = snapshot.SnapshotError + var updated database.Chat + err := database.ReadModifyUpdate(p.db, func(tx database.Store) error { + var ( + aggregateHash []byte + snapshotError string + ) + if chat.AgentID.Valid { + hash, snapErr, ok, err := latestAgentSnapshot(ctx, tx, chat.AgentID.UUID) + if err != nil { + return err + } + if ok { + aggregateHash = hash + snapshotError = snapErr + } } - } - if err := p.db.SetChatContextSnapshot(ctx, database.SetChatContextSnapshotParams{ - ID: chat.ID, - AggregateHash: aggregateHash, - ContextError: snapshotError, - }); err != nil { - return database.Chat{}, xerrors.Errorf("set chat context snapshot: %w", err) - } + if err := tx.SetChatContextSnapshot(ctx, database.SetChatContextSnapshotParams{ + ID: chat.ID, + AggregateHash: aggregateHash, + ContextError: snapshotError, + }); err != nil { + return xerrors.Errorf("set chat context snapshot: %w", err) + } - updated, err := p.db.GetChatByID(ctx, chat.ID) + got, err := tx.GetChatByID(ctx, chat.ID) + if err != nil { + return xerrors.Errorf("get chat after refresh: %w", err) + } + updated = got + return nil + }) if err != nil { - return database.Chat{}, xerrors.Errorf("get chat after refresh: %w", err) + return database.Chat{}, err } return updated, nil } diff --git a/coderd/x/chatd/context_hydration_internal_test.go b/coderd/x/chatd/context_hydration_internal_test.go new file mode 100644 index 0000000000000..90f315de97055 --- /dev/null +++ b/coderd/x/chatd/context_hydration_internal_test.go @@ -0,0 +1,81 @@ +package chatd + +import ( + "database/sql" + "testing" + + "github.com/google/uuid" + "go.uber.org/mock/gomock" + + "cdr.dev/slog/v3/sloggers/slogtest" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbmock" + "github.com/coder/coder/v2/testutil" +) + +// TestHydrateChatContextOnCreate covers the create-time pinning path, which the +// end-to-end test cannot reach: chats there are inserted directly, bypassing +// CreateChat. It pins to the agent's latest snapshot via the NULL-guarded +// HydrateAgentChatsContext so a concurrent push is never clobbered, and is a +// best-effort no-op when there is no agent or no snapshot. +func TestHydrateChatContextOnCreate(t *testing.T) { + t.Parallel() + + t.Run("PinsWhenSnapshotExists", func(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + ctrl := gomock.NewController(t) + db := dbmock.NewMockStore(ctrl) + server := &Server{db: db, logger: slogtest.Make(t, nil)} + + agentID := uuid.New() + chat := database.Chat{ID: uuid.New(), AgentID: uuid.NullUUID{UUID: agentID, Valid: true}} + snapshot := database.WorkspaceAgentContextSnapshot{ + WorkspaceAgentID: agentID, + AggregateHash: []byte{0x0a, 0x0b}, + SnapshotError: "one source failed", + } + + db.EXPECT().GetLatestWorkspaceAgentContextSnapshot(gomock.Any(), agentID). + Return(snapshot, nil) + // The guarded agent-scoped stamp, not an unconditional SetChatContextSnapshot, + // so a concurrent push that already hydrated the chat wins. + db.EXPECT().HydrateAgentChatsContext(gomock.Any(), database.HydrateAgentChatsContextParams{ + AgentID: agentID, + AggregateHash: snapshot.AggregateHash, + ContextError: snapshot.SnapshotError, + }).Return(nil) + + server.hydrateChatContextOnCreate(ctx, chat) + }) + + t.Run("SkipsWhenAgentless", func(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + ctrl := gomock.NewController(t) + // No EXPECT calls: a chat with no agent must touch the database zero times. + db := dbmock.NewMockStore(ctrl) + server := &Server{db: db, logger: slogtest.Make(t, nil)} + + server.hydrateChatContextOnCreate(ctx, database.Chat{ID: uuid.New()}) + }) + + t.Run("SkipsWhenNoSnapshot", func(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + ctrl := gomock.NewController(t) + db := dbmock.NewMockStore(ctrl) + server := &Server{db: db, logger: slogtest.Make(t, nil)} + + agentID := uuid.New() + // ErrNoRows means the agent has not pushed yet; no stamp is written + // (HydrateAgentChatsContext has no EXPECT, so a call would fail the test). + db.EXPECT().GetLatestWorkspaceAgentContextSnapshot(gomock.Any(), agentID). + Return(database.WorkspaceAgentContextSnapshot{}, sql.ErrNoRows) + + server.hydrateChatContextOnCreate(ctx, database.Chat{ + ID: uuid.New(), + AgentID: uuid.NullUUID{UUID: agentID, Valid: true}, + }) + }) +} diff --git a/coderd/x/chatd/context_integration_test.go b/coderd/x/chatd/context_integration_test.go index c12154e399252..1e8062526dd29 100644 --- a/coderd/x/chatd/context_integration_test.go +++ b/coderd/x/chatd/context_integration_test.go @@ -19,8 +19,10 @@ import ( // TestChatContextDirtyFromAgentPush is an end-to-end check of the chat // context integration. An echo-provisioned workspace agent pushes a context // snapshot that hydrates a bound chat; a later push with a different hash -// marks the chat dirty; the experimental API reports the dirty state; and the -// refresh endpoint re-pins the latest snapshot and clears it. +// marks the chat dirty; the experimental API reports the dirty state and the +// snapshot error; the refresh endpoint re-pins the latest snapshot and clears +// it; and a re-push of the now-pinned hash stays clean. A second chat bound to +// no agent stays untouched throughout, guarding the agent-scoped queries. func TestChatContextDirtyFromAgentPush(t *testing.T) { t.Parallel() @@ -66,11 +68,29 @@ func TestChatContextDirtyFromAgentPush(t *testing.T) { Status: database.ChatStatusWaiting, }) + // An unrelated chat bound to no agent. The hydrate and dirty queries + // scope by agent_id, so this chat must stay untouched by every push + // below; it guards against the scoping clause silently breaking. + otherChat := dbgen.Chat(t, db, database.Chat{ + OrganizationID: user.OrganizationID, + OwnerID: user.UserID, + LastModelConfigID: model.ID, + Status: database.ChatStatusWaiting, + }) + // Before any push there is no pinned context. got, err := expClient.GetChat(ctx, chat.ID) require.NoError(t, err) require.Nil(t, got.Context, "no pinned context before the first push") + requireChatContextNil := func(id uuid.UUID, msg string) { + t.Helper() + unrelated, err := expClient.GetChat(ctx, id) + require.NoError(t, err) + require.Nil(t, unrelated.Context, msg) + } + requireChatContextNil(otherChat.ID, "agent-less chat has no pinned context") + // Connect as the agent and push the initial snapshot. The push runs the // hydrate/dirty fan-out synchronously inside its transaction, so the chat // reflects the change by the time the RPC returns. @@ -95,12 +115,15 @@ func TestChatContextDirtyFromAgentPush(t *testing.T) { require.False(t, got.Context.Dirty, "initial hydration is clean") require.Nil(t, got.Context.DirtySince) - // The agent refreshes its context and pushes a different hash, which - // drifts from the pinned hash and marks the chat dirty. + // The agent refreshes its context and pushes a different hash carrying a + // snapshot-level error, which drifts from the pinned hash and marks the + // chat dirty. hashB := []byte{0x04, 0x05, 0x06} + const snapshotError = "two sources failed to resolve" resp, err = aAPI.PushContextState(ctx, &agentproto.PushContextStateRequest{ Version: 2, AggregateHash: hashB, + SnapshotError: snapshotError, }) require.NoError(t, err) require.True(t, resp.GetAccepted()) @@ -110,15 +133,32 @@ func TestChatContextDirtyFromAgentPush(t *testing.T) { require.NotNil(t, got.Context) require.True(t, got.Context.Dirty, "drift should mark the chat dirty") require.NotNil(t, got.Context.DirtySince) + requireChatContextNil(otherChat.ID, "agent-less chat unaffected by the dirty fan-out") - // Refreshing re-pins the latest snapshot and clears the dirty marker. + // Refreshing re-pins the latest snapshot (hash and error) and clears the + // dirty marker. refreshed, err := expClient.RefreshChatContext(ctx, chat.ID) require.NoError(t, err) require.NotNil(t, refreshed.Context) require.False(t, refreshed.Context.Dirty, "refresh clears the dirty marker") + require.Equal(t, snapshotError, refreshed.Context.Error, "refresh re-pins the snapshot error") got, err = expClient.GetChat(ctx, chat.ID) require.NoError(t, err) require.NotNil(t, got.Context) require.False(t, got.Context.Dirty) + + // Re-pushing the now-pinned hash proves the refresh advanced the pin to + // hashB: a matching hash must not re-dirty the chat. + resp, err = aAPI.PushContextState(ctx, &agentproto.PushContextStateRequest{ + Version: 3, + AggregateHash: hashB, + }) + require.NoError(t, err) + require.True(t, resp.GetAccepted()) + + got, err = expClient.GetChat(ctx, chat.ID) + require.NoError(t, err) + require.NotNil(t, got.Context) + require.False(t, got.Context.Dirty, "re-push of the pinned hash stays clean") } diff --git a/docs/reference/api/chats.md b/docs/reference/api/chats.md index 602a191db5c66..f3597c44627ca 100644 --- a/docs/reference/api/chats.md +++ b/docs/reference/api/chats.md @@ -1352,7 +1352,7 @@ To perform this operation, you must be authenticated. [Learn more](authenticatio ```shell # Example request using curl curl -X PUT http://coder-server:8080/api/experimental/chats/{chat}/context \ - -H 'Accept: */*' \ + -H 'Accept: application/json' \ -H 'Coder-Session-Token: API_KEY' ``` @@ -1370,6 +1370,296 @@ Experimental: this endpoint is subject to change. > 200 Response +```json +{ + "agent_id": "2b1e3b65-2c04-4fa2-a2d7-467901e98978", + "archived": true, + "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", + "children": [ + { + "agent_id": "2b1e3b65-2c04-4fa2-a2d7-467901e98978", + "archived": true, + "build_id": "bfb1f3fa-bf7b-43a5-9e0b-26cc050e44cb", + "children": [], + "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, + "created_at": "2019-08-24T14:15:22Z", + "diff_status": { + "additions": 0, + "approved": true, + "author_avatar_url": "string", + "author_login": "string", + "base_branch": "string", + "changed_files": 0, + "changes_requested": true, + "chat_id": "efc9fe20-a1e5-4a8c-9c48-f1b30c1e4f86", + "commits": 0, + "deletions": 0, + "head_branch": "string", + "pr_number": 0, + "pull_request_draft": true, + "pull_request_state": "string", + "pull_request_title": "string", + "refreshed_at": "2019-08-24T14:15:22Z", + "reviewer_count": 0, + "stale_at": "2019-08-24T14:15:22Z", + "url": "string" + }, + "files": [ + { + "created_at": "2019-08-24T14:15:22Z", + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "mime_type": "string", + "name": "string", + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05" + } + ], + "has_unread": true, + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "labels": { + "property1": "string", + "property2": "string" + }, + "last_error": { + "detail": "string", + "kind": "generic", + "message": "string", + "provider": "string", + "retryable": true, + "status_code": 0 + }, + "last_injected_context": [ + { + "args": [ + 0 + ], + "args_delta": "string", + "completed_at": "2019-08-24T14:15:22Z", + "content": "string", + "context_file_agent_id": { + "uuid": "string", + "valid": true + }, + "context_file_content": "string", + "context_file_directory": "string", + "context_file_os": "string", + "context_file_path": "string", + "context_file_skill_meta_file": "string", + "context_file_truncated": true, + "created_at": "2019-08-24T14:15:22Z", + "data": [ + 0 + ], + "end_line": 0, + "file_id": { + "uuid": "string", + "valid": true + }, + "file_name": "string", + "is_error": true, + "is_media": true, + "mcp_server_config_id": { + "uuid": "string", + "valid": true + }, + "media_type": "string", + "name": "string", + "parsed_commands": [ + [ + "string" + ] + ], + "provider_executed": true, + "provider_metadata": [ + 0 + ], + "result": [ + 0 + ], + "result_delta": "string", + "result_reset": true, + "signature": "string", + "skill_description": "string", + "skill_dir": "string", + "skill_name": "string", + "source_id": "string", + "start_line": 0, + "text": "string", + "title": "string", + "tool_call_id": "string", + "tool_name": "string", + "type": "text", + "url": "string" + } + ], + "last_model_config_id": "30ebb95f-c255-4759-9429-89aa4ec1554c", + "last_turn_summary": "string", + "mcp_server_ids": [ + "497f6eca-6276-4993-bfeb-53cbbbba6f08" + ], + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05", + "owner_name": "string", + "owner_username": "string", + "parent_chat_id": "c3609ee6-3b11-4a93-b9ae-e4fabcc99359", + "pin_order": 0, + "plan_mode": "plan", + "root_chat_id": "2898031c-fdce-4e3e-8c53-4481dd42fcd7", + "shared": true, + "status": "waiting", + "title": "string", + "updated_at": "2019-08-24T14:15:22Z", + "warnings": [ + "string" + ], + "workspace_id": "0967198e-ec7b-4c6b-b4d3-f71244cadbe9" + } + ], + "client_type": "ui", + "context": { + "dirty": true, + "dirty_since": "2019-08-24T14:15:22Z", + "error": "string" + }, + "created_at": "2019-08-24T14:15:22Z", + "diff_status": { + "additions": 0, + "approved": true, + "author_avatar_url": "string", + "author_login": "string", + "base_branch": "string", + "changed_files": 0, + "changes_requested": true, + "chat_id": "efc9fe20-a1e5-4a8c-9c48-f1b30c1e4f86", + "commits": 0, + "deletions": 0, + "head_branch": "string", + "pr_number": 0, + "pull_request_draft": true, + "pull_request_state": "string", + "pull_request_title": "string", + "refreshed_at": "2019-08-24T14:15:22Z", + "reviewer_count": 0, + "stale_at": "2019-08-24T14:15:22Z", + "url": "string" + }, + "files": [ + { + "created_at": "2019-08-24T14:15:22Z", + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "mime_type": "string", + "name": "string", + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05" + } + ], + "has_unread": true, + "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", + "labels": { + "property1": "string", + "property2": "string" + }, + "last_error": { + "detail": "string", + "kind": "generic", + "message": "string", + "provider": "string", + "retryable": true, + "status_code": 0 + }, + "last_injected_context": [ + { + "args": [ + 0 + ], + "args_delta": "string", + "completed_at": "2019-08-24T14:15:22Z", + "content": "string", + "context_file_agent_id": { + "uuid": "string", + "valid": true + }, + "context_file_content": "string", + "context_file_directory": "string", + "context_file_os": "string", + "context_file_path": "string", + "context_file_skill_meta_file": "string", + "context_file_truncated": true, + "created_at": "2019-08-24T14:15:22Z", + "data": [ + 0 + ], + "end_line": 0, + "file_id": { + "uuid": "string", + "valid": true + }, + "file_name": "string", + "is_error": true, + "is_media": true, + "mcp_server_config_id": { + "uuid": "string", + "valid": true + }, + "media_type": "string", + "name": "string", + "parsed_commands": [ + [ + "string" + ] + ], + "provider_executed": true, + "provider_metadata": [ + 0 + ], + "result": [ + 0 + ], + "result_delta": "string", + "result_reset": true, + "signature": "string", + "skill_description": "string", + "skill_dir": "string", + "skill_name": "string", + "source_id": "string", + "start_line": 0, + "text": "string", + "title": "string", + "tool_call_id": "string", + "tool_name": "string", + "type": "text", + "url": "string" + } + ], + "last_model_config_id": "30ebb95f-c255-4759-9429-89aa4ec1554c", + "last_turn_summary": "string", + "mcp_server_ids": [ + "497f6eca-6276-4993-bfeb-53cbbbba6f08" + ], + "organization_id": "7c60d51f-b44e-4682-87d6-449835ea4de6", + "owner_id": "8826ee2e-7933-4665-aef2-2393f84a0d05", + "owner_name": "string", + "owner_username": "string", + "parent_chat_id": "c3609ee6-3b11-4a93-b9ae-e4fabcc99359", + "pin_order": 0, + "plan_mode": "plan", + "root_chat_id": "2898031c-fdce-4e3e-8c53-4481dd42fcd7", + "shared": true, + "status": "waiting", + "title": "string", + "updated_at": "2019-08-24T14:15:22Z", + "warnings": [ + "string" + ], + "workspace_id": "0967198e-ec7b-4c6b-b4d3-f71244cadbe9" +} +``` + ### Responses | Status | Meaning | Description | Schema |