forked from coder/coder
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinsights.sql
More file actions
384 lines (369 loc) · 14.7 KB
/
insights.sql
File metadata and controls
384 lines (369 loc) · 14.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
-- name: GetUserLatencyInsights :many
-- GetUserLatencyInsights returns the median and 95th percentile connection
-- latency that users have experienced. The result can be filtered on
-- template_ids, meaning only user data from workspaces based on those templates
-- will be included.
SELECT
workspace_agent_stats.user_id,
users.username,
users.avatar_url,
array_agg(DISTINCT template_id)::uuid[] AS template_ids,
coalesce((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_50,
coalesce((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_95
FROM workspace_agent_stats
JOIN users ON (users.id = workspace_agent_stats.user_id)
WHERE
workspace_agent_stats.created_at >= @start_time
AND workspace_agent_stats.created_at < @end_time
AND workspace_agent_stats.connection_median_latency_ms > 0
AND workspace_agent_stats.connection_count > 0
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY workspace_agent_stats.user_id, users.username, users.avatar_url
ORDER BY user_id ASC;
-- name: GetUserActivityInsights :many
-- GetUserActivityInsights returns the ranking with top active users.
-- The result can be filtered on template_ids, meaning only user data from workspaces
-- based on those templates will be included.
-- Note: When selecting data from multiple templates or the entire deployment,
-- be aware that it may lead to an increase in "usage" numbers (cumulative). In such cases,
-- users may be counted multiple times for the same time interval if they have used multiple templates
-- simultaneously.
WITH app_stats AS (
SELECT
s.start_time,
was.user_id,
w.template_id,
60 as seconds
FROM workspace_app_stats was
JOIN workspaces w ON (
w.id = was.workspace_id
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN w.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
)
-- This table contains both 1 minute entries and >1 minute entries,
-- to calculate this with our uniqueness constraints, we generate series
-- for the longer intervals.
CROSS JOIN LATERAL generate_series(
date_trunc('minute', was.session_started_at),
-- Subtract 1 microsecond to avoid creating an extra series.
date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
'1 minute'::interval
) s(start_time)
WHERE
s.start_time >= @start_time::timestamptz
-- Subtract one minute because the series only contains the start time.
AND s.start_time < (@end_time::timestamptz) - '1 minute'::interval
GROUP BY s.start_time, w.template_id, was.user_id
), session_stats AS (
SELECT
date_trunc('minute', was.created_at) as start_time,
was.user_id,
was.template_id,
CASE WHEN
SUM(was.session_count_vscode) > 0 OR
SUM(was.session_count_jetbrains) > 0 OR
SUM(was.session_count_reconnecting_pty) > 0 OR
SUM(was.session_count_ssh) > 0
THEN 60 ELSE 0 END as seconds
FROM workspace_agent_stats was
WHERE
was.created_at >= @start_time::timestamptz
AND was.created_at < @end_time::timestamptz
AND was.connection_count > 0
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN was.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY date_trunc('minute', was.created_at), was.user_id, was.template_id
), combined_stats AS (
SELECT
user_id,
template_id,
start_time,
seconds
FROM app_stats
UNION
SELECT
user_id,
template_id,
start_time,
seconds
FROM session_stats
)
SELECT
users.id as user_id,
users.username,
users.avatar_url,
array_agg(DISTINCT template_id)::uuid[] AS template_ids,
SUM(seconds) AS usage_seconds
FROM combined_stats
JOIN users ON (users.id = combined_stats.user_id)
GROUP BY users.id, username, avatar_url
ORDER BY user_id ASC;
-- name: GetTemplateInsights :one
-- GetTemplateInsights has a granularity of 5 minutes where if a session/app was
-- in use during a minute, we will add 5 minutes to the total usage for that
-- session/app (per user).
WITH agent_stats_by_interval_and_user AS (
SELECT
date_trunc('minute', was.created_at),
was.user_id,
array_agg(was.template_id) AS template_ids,
CASE WHEN SUM(was.session_count_vscode) > 0 THEN 60 ELSE 0 END AS usage_vscode_seconds,
CASE WHEN SUM(was.session_count_jetbrains) > 0 THEN 60 ELSE 0 END AS usage_jetbrains_seconds,
CASE WHEN SUM(was.session_count_reconnecting_pty) > 0 THEN 60 ELSE 0 END AS usage_reconnecting_pty_seconds,
CASE WHEN SUM(was.session_count_ssh) > 0 THEN 60 ELSE 0 END AS usage_ssh_seconds
FROM workspace_agent_stats was
WHERE
was.created_at >= @start_time::timestamptz
AND was.created_at < @end_time::timestamptz
AND was.connection_count > 0
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN was.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY date_trunc('minute', was.created_at), was.user_id
), template_ids AS (
SELECT array_agg(DISTINCT template_id) AS ids
FROM agent_stats_by_interval_and_user, unnest(template_ids) template_id
WHERE template_id IS NOT NULL
)
SELECT
COALESCE((SELECT ids FROM template_ids), '{}')::uuid[] AS template_ids,
-- Return IDs so we can combine this with GetTemplateAppInsights.
COALESCE(array_agg(DISTINCT user_id), '{}')::uuid[] AS active_user_ids,
COALESCE(SUM(usage_vscode_seconds), 0)::bigint AS usage_vscode_seconds,
COALESCE(SUM(usage_jetbrains_seconds), 0)::bigint AS usage_jetbrains_seconds,
COALESCE(SUM(usage_reconnecting_pty_seconds), 0)::bigint AS usage_reconnecting_pty_seconds,
COALESCE(SUM(usage_ssh_seconds), 0)::bigint AS usage_ssh_seconds
FROM agent_stats_by_interval_and_user;
-- name: GetTemplateInsightsByTemplate :many
WITH agent_stats_by_interval_and_user AS (
SELECT
date_trunc('minute', was.created_at) AS created_at_trunc,
was.template_id,
was.user_id,
CASE WHEN SUM(was.session_count_vscode) > 0 THEN 60 ELSE 0 END AS usage_vscode_seconds,
CASE WHEN SUM(was.session_count_jetbrains) > 0 THEN 60 ELSE 0 END AS usage_jetbrains_seconds,
CASE WHEN SUM(was.session_count_reconnecting_pty) > 0 THEN 60 ELSE 0 END AS usage_reconnecting_pty_seconds,
CASE WHEN SUM(was.session_count_ssh) > 0 THEN 60 ELSE 0 END AS usage_ssh_seconds
FROM workspace_agent_stats was
WHERE
was.created_at >= @start_time::timestamptz
AND was.created_at < @end_time::timestamptz
AND was.connection_count > 0
GROUP BY created_at_trunc, was.template_id, was.user_id
)
SELECT
template_id,
COALESCE(COUNT(DISTINCT user_id))::bigint AS active_users,
COALESCE(SUM(usage_vscode_seconds), 0)::bigint AS usage_vscode_seconds,
COALESCE(SUM(usage_jetbrains_seconds), 0)::bigint AS usage_jetbrains_seconds,
COALESCE(SUM(usage_reconnecting_pty_seconds), 0)::bigint AS usage_reconnecting_pty_seconds,
COALESCE(SUM(usage_ssh_seconds), 0)::bigint AS usage_ssh_seconds
FROM agent_stats_by_interval_and_user
GROUP BY template_id;
-- name: GetTemplateAppInsights :many
-- GetTemplateAppInsights returns the aggregate usage of each app in a given
-- timeframe. The result can be filtered on template_ids, meaning only user data
-- from workspaces based on those templates will be included.
WITH app_stats_by_user_and_agent AS (
SELECT
s.start_time,
60 as seconds,
w.template_id,
was.user_id,
was.agent_id,
was.access_method,
was.slug_or_port,
wa.display_name,
wa.icon,
(wa.slug IS NOT NULL)::boolean AS is_app
FROM workspace_app_stats was
JOIN workspaces w ON (
w.id = was.workspace_id
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN w.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
)
-- We do a left join here because we want to include user IDs that have used
-- e.g. ports when counting active users.
LEFT JOIN workspace_apps wa ON (
wa.agent_id = was.agent_id
AND wa.slug = was.slug_or_port
)
-- This table contains both 1 minute entries and >1 minute entries,
-- to calculate this with our uniqueness constraints, we generate series
-- for the longer intervals.
CROSS JOIN LATERAL generate_series(
date_trunc('minute', was.session_started_at),
-- Subtract 1 microsecond to avoid creating an extra series.
date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
'1 minute'::interval
) s(start_time)
WHERE
s.start_time >= @start_time::timestamptz
-- Subtract one minute because the series only contains the start time.
AND s.start_time < (@end_time::timestamptz) - '1 minute'::interval
GROUP BY s.start_time, w.template_id, was.user_id, was.agent_id, was.access_method, was.slug_or_port, wa.display_name, wa.icon, wa.slug
)
SELECT
array_agg(DISTINCT template_id)::uuid[] AS template_ids,
-- Return IDs so we can combine this with GetTemplateInsights.
array_agg(DISTINCT user_id)::uuid[] AS active_user_ids,
access_method,
slug_or_port,
display_name,
icon,
is_app,
SUM(seconds) AS usage_seconds
FROM app_stats_by_user_and_agent
GROUP BY access_method, slug_or_port, display_name, icon, is_app;
-- name: GetTemplateAppInsightsByTemplate :many
WITH app_stats_by_user_and_agent AS (
SELECT
s.start_time,
60 as seconds,
w.template_id,
was.user_id,
was.agent_id,
was.slug_or_port,
wa.display_name,
(wa.slug IS NOT NULL)::boolean AS is_app
FROM workspace_app_stats was
JOIN workspaces w ON (
w.id = was.workspace_id
)
-- We do a left join here because we want to include user IDs that have used
-- e.g. ports when counting active users.
LEFT JOIN workspace_apps wa ON (
wa.agent_id = was.agent_id
AND wa.slug = was.slug_or_port
)
-- This table contains both 1 minute entries and >1 minute entries,
-- to calculate this with our uniqueness constraints, we generate series
-- for the longer intervals.
CROSS JOIN LATERAL generate_series(
date_trunc('minute', was.session_started_at),
-- Subtract 1 microsecond to avoid creating an extra series.
date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
'1 minute'::interval
) s(start_time)
WHERE
s.start_time >= @start_time::timestamptz
-- Subtract one minute because the series only contains the start time.
AND s.start_time < (@end_time::timestamptz) - '1 minute'::interval
GROUP BY s.start_time, w.template_id, was.user_id, was.agent_id, was.slug_or_port, wa.display_name, wa.slug
)
SELECT
template_id,
display_name,
slug_or_port,
COALESCE(COUNT(DISTINCT user_id))::bigint AS active_users,
SUM(seconds) AS usage_seconds
FROM app_stats_by_user_and_agent
WHERE is_app IS TRUE
GROUP BY template_id, display_name, slug_or_port;
-- name: GetTemplateInsightsByInterval :many
-- GetTemplateInsightsByInterval returns all intervals between start and end
-- time, if end time is a partial interval, it will be included in the results and
-- that interval will be shorter than a full one. If there is no data for a selected
-- interval/template, it will be included in the results with 0 active users.
WITH ts AS (
SELECT
d::timestamptz AS from_,
CASE
WHEN (d::timestamptz + (@interval_days::int || ' day')::interval) <= @end_time::timestamptz
THEN (d::timestamptz + (@interval_days::int || ' day')::interval)
ELSE @end_time::timestamptz
END AS to_
FROM
-- Subtract 1 microsecond from end_time to avoid including the next interval in the results.
generate_series(@start_time::timestamptz, (@end_time::timestamptz) - '1 microsecond'::interval, (@interval_days::int || ' day')::interval) AS d
), unflattened_usage_by_interval AS (
-- We select data from both workspace agent stats and workspace app stats to
-- get a complete picture of usage. This matches how usage is calculated by
-- the combination of GetTemplateInsights and GetTemplateAppInsights. We use
-- a union all to avoid a costly distinct operation.
--
-- Note that one query must perform a left join so that all intervals are
-- present at least once.
SELECT
ts.*,
was.template_id,
was.user_id
FROM ts
LEFT JOIN workspace_agent_stats was ON (
was.created_at >= ts.from_
AND was.created_at < ts.to_
AND was.connection_count > 0
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN was.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
)
GROUP BY ts.from_, ts.to_, was.template_id, was.user_id
UNION ALL
SELECT
ts.*,
w.template_id,
was.user_id
FROM ts
JOIN workspace_app_stats was ON (
(was.session_started_at >= ts.from_ AND was.session_started_at < ts.to_)
OR (was.session_ended_at > ts.from_ AND was.session_ended_at < ts.to_)
OR (was.session_started_at < ts.from_ AND was.session_ended_at >= ts.to_)
)
JOIN workspaces w ON (
w.id = was.workspace_id
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN w.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
)
GROUP BY ts.from_, ts.to_, w.template_id, was.user_id
)
SELECT
from_ AS start_time,
to_ AS end_time,
array_remove(array_agg(DISTINCT template_id), NULL)::uuid[] AS template_ids,
COUNT(DISTINCT user_id) AS active_users
FROM unflattened_usage_by_interval
GROUP BY from_, to_;
-- name: GetTemplateParameterInsights :many
-- GetTemplateParameterInsights does for each template in a given timeframe,
-- look for the latest workspace build (for every workspace) that has been
-- created in the timeframe and return the aggregate usage counts of parameter
-- values.
WITH latest_workspace_builds AS (
SELECT
wb.id,
wbmax.template_id,
wb.template_version_id
FROM (
SELECT
tv.template_id, wbmax.workspace_id, MAX(wbmax.build_number) as max_build_number
FROM workspace_builds wbmax
JOIN template_versions tv ON (tv.id = wbmax.template_version_id)
WHERE
wbmax.created_at >= @start_time::timestamptz
AND wbmax.created_at < @end_time::timestamptz
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tv.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY tv.template_id, wbmax.workspace_id
) wbmax
JOIN workspace_builds wb ON (
wb.workspace_id = wbmax.workspace_id
AND wb.build_number = wbmax.max_build_number
)
), unique_template_params AS (
SELECT
ROW_NUMBER() OVER () AS num,
array_agg(DISTINCT wb.template_id)::uuid[] AS template_ids,
array_agg(wb.id)::uuid[] AS workspace_build_ids,
tvp.name,
tvp.type,
tvp.display_name,
tvp.description,
tvp.options
FROM latest_workspace_builds wb
JOIN template_version_parameters tvp ON (tvp.template_version_id = wb.template_version_id)
GROUP BY tvp.name, tvp.type, tvp.display_name, tvp.description, tvp.options
)
SELECT
utp.num,
utp.template_ids,
utp.name,
utp.type,
utp.display_name,
utp.description,
utp.options,
wbp.value,
COUNT(wbp.value) AS count
FROM unique_template_params utp
JOIN workspace_build_parameters wbp ON (utp.workspace_build_ids @> ARRAY[wbp.workspace_build_id] AND utp.name = wbp.name)
GROUP BY utp.num, utp.template_ids, utp.name, utp.type, utp.display_name, utp.description, utp.options, wbp.value;