-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathchatdebug.sql
More file actions
308 lines (294 loc) · 11.5 KB
/
chatdebug.sql
File metadata and controls
308 lines (294 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
-- updated_at is the retention clock used by DeleteOldChatDebugRuns.
-- Set it on every write to keep retention semantics correct.
-- name: InsertChatDebugRun :one
INSERT INTO chat_debug_runs (
chat_id,
root_chat_id,
parent_chat_id,
model_config_id,
trigger_message_id,
history_tip_message_id,
kind,
status,
provider,
model,
summary,
started_at,
updated_at,
finished_at
)
VALUES (
@chat_id::uuid,
sqlc.narg('root_chat_id')::uuid,
sqlc.narg('parent_chat_id')::uuid,
sqlc.narg('model_config_id')::uuid,
sqlc.narg('trigger_message_id')::bigint,
sqlc.narg('history_tip_message_id')::bigint,
@kind::text,
@status::text,
sqlc.narg('provider')::text,
sqlc.narg('model')::text,
COALESCE(sqlc.narg('summary')::jsonb, '{}'::jsonb),
COALESCE(sqlc.narg('started_at')::timestamptz, NOW()),
COALESCE(sqlc.narg('updated_at')::timestamptz, NOW()),
sqlc.narg('finished_at')::timestamptz
)
RETURNING *;
-- name: UpdateChatDebugRun :one
-- Uses COALESCE so that passing NULL from Go means "keep the
-- existing value." This is intentional: debug rows follow a
-- write-once-finalize pattern where fields are set at creation
-- or finalization and never cleared back to NULL. The @now
-- parameter keeps updated_at under the caller's clock.
-- updated_at is also the retention clock used by DeleteOldChatDebugRuns.
--
-- finished_at is enforced as write-once at the SQL level: once
-- populated it cannot be overwritten by a later call. Callers
-- that issue a summary or status refresh after the run has
-- already finalized therefore cannot corrupt the original
-- completion timestamp, which keeps duration and ordering
-- calculations stable regardless of how many times the row is
-- updated.
UPDATE chat_debug_runs
SET
root_chat_id = COALESCE(sqlc.narg('root_chat_id')::uuid, root_chat_id),
parent_chat_id = COALESCE(sqlc.narg('parent_chat_id')::uuid, parent_chat_id),
model_config_id = COALESCE(sqlc.narg('model_config_id')::uuid, model_config_id),
trigger_message_id = COALESCE(sqlc.narg('trigger_message_id')::bigint, trigger_message_id),
history_tip_message_id = COALESCE(sqlc.narg('history_tip_message_id')::bigint, history_tip_message_id),
status = COALESCE(sqlc.narg('status')::text, status),
provider = COALESCE(sqlc.narg('provider')::text, provider),
model = COALESCE(sqlc.narg('model')::text, model),
summary = COALESCE(sqlc.narg('summary')::jsonb, summary),
finished_at = COALESCE(finished_at, sqlc.narg('finished_at')::timestamptz),
updated_at = @now::timestamptz
WHERE id = @id::uuid
AND chat_id = @chat_id::uuid
RETURNING *;
-- name: InsertChatDebugStep :one
-- The CTE atomically locks the parent run via UPDATE, bumps its
-- updated_at (eliminating a separate TouchChatDebugRunUpdatedAt
-- call), and enforces the finalization guard: if the run is already
-- finished, the UPDATE returns zero rows, the INSERT gets no source
-- rows, and sql.ErrNoRows is returned. The UPDATE also serializes
-- with concurrent FinalizeStale under READ COMMITTED isolation.
WITH locked_run AS (
UPDATE chat_debug_runs
SET updated_at = COALESCE(sqlc.narg('updated_at')::timestamptz, NOW())
WHERE id = @run_id::uuid
AND chat_id = @chat_id::uuid
AND finished_at IS NULL
RETURNING chat_id
)
INSERT INTO chat_debug_steps (
run_id,
chat_id,
step_number,
operation,
status,
history_tip_message_id,
assistant_message_id,
normalized_request,
normalized_response,
usage,
attempts,
error,
metadata,
started_at,
updated_at,
finished_at
)
SELECT
@run_id::uuid,
locked_run.chat_id,
@step_number::int,
@operation::text,
@status::text,
sqlc.narg('history_tip_message_id')::bigint,
sqlc.narg('assistant_message_id')::bigint,
COALESCE(sqlc.narg('normalized_request')::jsonb, '{}'::jsonb),
sqlc.narg('normalized_response')::jsonb,
sqlc.narg('usage')::jsonb,
COALESCE(sqlc.narg('attempts')::jsonb, '[]'::jsonb),
sqlc.narg('error')::jsonb,
COALESCE(sqlc.narg('metadata')::jsonb, '{}'::jsonb),
COALESCE(sqlc.narg('started_at')::timestamptz, NOW()),
COALESCE(sqlc.narg('updated_at')::timestamptz, NOW()),
sqlc.narg('finished_at')::timestamptz
FROM locked_run
RETURNING *;
-- name: UpdateChatDebugStep :one
-- Uses COALESCE so that passing NULL from Go means "keep the
-- existing value." This is intentional: debug rows follow a
-- write-once-finalize pattern where fields are set at creation
-- or finalization and never cleared back to NULL. The @now
-- parameter keeps updated_at under the caller's clock, matching
-- the injectable quartz.Clock used by FinalizeStale sweeps.
UPDATE chat_debug_steps
SET
status = COALESCE(sqlc.narg('status')::text, status),
history_tip_message_id = COALESCE(sqlc.narg('history_tip_message_id')::bigint, history_tip_message_id),
assistant_message_id = COALESCE(sqlc.narg('assistant_message_id')::bigint, assistant_message_id),
normalized_request = COALESCE(sqlc.narg('normalized_request')::jsonb, normalized_request),
normalized_response = COALESCE(sqlc.narg('normalized_response')::jsonb, normalized_response),
usage = COALESCE(sqlc.narg('usage')::jsonb, usage),
attempts = COALESCE(sqlc.narg('attempts')::jsonb, attempts),
error = COALESCE(sqlc.narg('error')::jsonb, error),
metadata = COALESCE(sqlc.narg('metadata')::jsonb, metadata),
finished_at = COALESCE(sqlc.narg('finished_at')::timestamptz, finished_at),
updated_at = @now::timestamptz
WHERE id = @id::uuid
AND chat_id = @chat_id::uuid
RETURNING *;
-- name: TouchChatDebugRunUpdatedAt :exec
-- Overrides updated_at on the parent run without touching any
-- other column. Used by tests that need to stamp a run with a
-- specific timestamp after the InsertChatDebugStep CTE has
-- already bumped it to NOW(), so stale-row finalization paths
-- can be exercised deterministically. The chatdebug service
-- itself does not call this: heartbeats go through
-- TouchChatDebugStepAndRun, and step creation updates the parent
-- run via the InsertChatDebugStep CTE.
UPDATE chat_debug_runs
SET updated_at = @now::timestamptz
WHERE id = @id::uuid
AND chat_id = @chat_id::uuid;
-- name: TouchChatDebugStepAndRun :exec
-- Atomically bumps updated_at on both the step and its parent run
-- in a single statement. This prevents FinalizeStale from
-- interleaving between the two touches and finalizing a run whose
-- step heartbeat was just written.
--
-- The step UPDATE joins through touched_run (via FROM) and reads
-- its RETURNING rows. Per the PostgreSQL WITH semantics, RETURNING
-- is the only way to communicate values between a data-modifying
-- CTE and the main query, and consuming those rows forces the run
-- UPDATE to complete before the step UPDATE. That matches the
-- lock order used by FinalizeStaleChatDebugRows and avoids a
-- deadlock between concurrent heartbeats and stale sweeps. The
-- join also constrains the step update to the specified run so a
-- mismatched (run_id, step_id) pair cannot silently refresh an
-- unrelated step.
WITH touched_run AS (
UPDATE chat_debug_runs
SET updated_at = @now::timestamptz
WHERE id = @run_id::uuid
AND chat_id = @chat_id::uuid
RETURNING id, chat_id
)
UPDATE chat_debug_steps
SET updated_at = @now::timestamptz
FROM touched_run
WHERE chat_debug_steps.id = @step_id::uuid
AND chat_debug_steps.run_id = touched_run.id
AND chat_debug_steps.chat_id = touched_run.chat_id;
-- name: GetChatDebugRunsByChatID :many
-- Returns the most recent debug runs for a chat, ordered newest-first.
-- Callers must supply an explicit limit to avoid unbounded result sets.
SELECT *
FROM chat_debug_runs
WHERE chat_id = @chat_id::uuid
ORDER BY started_at DESC, id DESC
LIMIT @limit_val::int;
-- name: GetChatDebugRunByID :one
SELECT *
FROM chat_debug_runs
WHERE id = @id::uuid;
-- name: GetChatDebugStepsByRunID :many
SELECT *
FROM chat_debug_steps
WHERE run_id = @run_id::uuid
ORDER BY step_number ASC, started_at ASC;
-- name: DeleteChatDebugDataByChatID :execrows
-- The started_before bound prevents retried cleanup from deleting
-- runs created by a replacement turn that races ahead of the retry
-- window (for example, after an unarchive races with a pending
-- archive-cleanup retry).
DELETE FROM chat_debug_runs
WHERE chat_id = @chat_id::uuid
AND started_at < @started_before::timestamptz;
-- name: DeleteChatDebugDataAfterMessageID :execrows
-- Deletes debug runs (and their cascaded steps) whose message IDs
-- exceed the cutoff. The started_before bound prevents retried
-- cleanup from deleting runs created by a replacement turn that
-- raced ahead of the retry window.
WITH affected_runs AS (
SELECT DISTINCT run.id
FROM chat_debug_runs run
WHERE run.chat_id = @chat_id::uuid
AND run.started_at < @started_before::timestamptz
AND (
run.history_tip_message_id > @message_id::bigint
OR run.trigger_message_id > @message_id::bigint
)
UNION
SELECT DISTINCT step.run_id AS id
FROM chat_debug_steps step
JOIN chat_debug_runs run ON run.id = step.run_id
AND run.chat_id = step.chat_id
WHERE step.chat_id = @chat_id::uuid
AND run.started_at < @started_before::timestamptz
AND (
step.assistant_message_id > @message_id::bigint
OR step.history_tip_message_id > @message_id::bigint
)
)
DELETE FROM chat_debug_runs
WHERE chat_id = @chat_id::uuid
AND id IN (SELECT id FROM affected_runs);
-- updated_at is the retention clock, so the window starts after the run
-- stops being written to.
-- Intentionally no finished_at IS NOT NULL guard: abandoned in-flight rows
-- older than the cutoff are also purged.
-- name: DeleteOldChatDebugRuns :execrows
WITH deletable AS (
SELECT id, chat_id
FROM chat_debug_runs
WHERE updated_at < @before_time::timestamptz
ORDER BY updated_at ASC
LIMIT @limit_count::int
)
DELETE FROM chat_debug_runs
USING deletable
WHERE chat_debug_runs.id = deletable.id
AND chat_debug_runs.chat_id = deletable.chat_id;
-- name: FinalizeStaleChatDebugRows :one
-- Marks orphaned in-progress rows as interrupted so they do not stay
-- in a non-terminal state forever. The NOT IN list must match the
-- terminal statuses defined by ChatDebugStatus in codersdk/chats.go.
--
-- The steps CTE also catches steps whose parent run was just finalized
-- (via run_id IN), because PostgreSQL data-modifying CTEs share the
-- same snapshot and cannot see each other's row updates. Without this,
-- a step with a recent updated_at would survive its run's finalization
-- and remain in 'in_progress' state permanently.
--
-- @now is the caller's clock timestamp so that mock-clock tests stay
-- consistent with the @updated_before cutoff.
WITH finalized_runs AS (
UPDATE chat_debug_runs
SET
status = 'interrupted',
updated_at = @now::timestamptz,
finished_at = @now::timestamptz
WHERE updated_at < @updated_before::timestamptz
AND finished_at IS NULL
AND status NOT IN ('completed', 'error', 'interrupted')
RETURNING id
), finalized_steps AS (
UPDATE chat_debug_steps
SET
status = 'interrupted',
updated_at = @now::timestamptz,
finished_at = @now::timestamptz
WHERE (
updated_at < @updated_before::timestamptz
OR run_id IN (SELECT id FROM finalized_runs)
)
AND finished_at IS NULL
AND status NOT IN ('completed', 'error', 'interrupted')
RETURNING 1
)
SELECT
(SELECT COUNT(*) FROM finalized_runs)::bigint AS runs_finalized,
(SELECT COUNT(*) FROM finalized_steps)::bigint AS steps_finalized;