Skip to content

Commit 04b0253

Browse files
authored
feat: add Prometheus metrics for license warnings and errors (#21749)
Fixes: coder/internal#767 Adds two new Prometheus metrics for license health monitoring: - `coderd_license_warnings` - count of active license warnings - `coderd_license_errors` - count of active license errors Metrics endpoint after startup of a deployment with license enabled: ``` ... # HELP coderd_license_errors The number of active license errors. # TYPE coderd_license_errors gauge coderd_license_errors 0 ... # HELP coderd_license_warnings The number of active license warnings. # TYPE coderd_license_warnings gauge coderd_license_warnings 0 ... ```
1 parent 06e3961 commit 04b0253

6 files changed

Lines changed: 162 additions & 8 deletions

File tree

coderd/entitlements/entitlements.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ func (l *Set) Errors() []string {
162162
return slices.Clone(l.entitlements.Errors)
163163
}
164164

165+
func (l *Set) Warnings() []string {
166+
l.entitlementsMu.RLock()
167+
defer l.entitlementsMu.RUnlock()
168+
return slices.Clone(l.entitlements.Warnings)
169+
}
170+
165171
func (l *Set) HasLicense() bool {
166172
l.entitlementsMu.RLock()
167173
defer l.entitlementsMu.RUnlock()

docs/admin/integrations/prometheus.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,10 @@ deployment. They will always be available from the agent.
147147
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
148148
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
149149
| `coderd_license_active_users` | gauge | The number of active users. | |
150+
| `coderd_license_errors` | gauge | The number of active license errors. | |
150151
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
151152
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
153+
| `coderd_license_warnings` | gauge | The number of active license warnings. | |
152154
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
153155
| `coderd_oauth2_external_requests_rate_limit` | gauge | The total number of allowed requests per interval. | `name` `resource` |
154156
| `coderd_oauth2_external_requests_rate_limit_next_reset_unix` | gauge | Unix timestamp of the next interval | `name` `resource` |

enterprise/coderd/license/metricscollector.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ var (
1111
activeUsersDesc = prometheus.NewDesc("coderd_license_active_users", "The number of active users.", nil, nil)
1212
limitUsersDesc = prometheus.NewDesc("coderd_license_limit_users", "The user seats limit based on the active Coder license.", nil, nil)
1313
userLimitEnabledDesc = prometheus.NewDesc("coderd_license_user_limit_enabled", "Returns 1 if the current license enforces the user limit.", nil, nil)
14+
15+
// Metrics for license warnings and errors.
16+
licenseWarningsDesc = prometheus.NewDesc("coderd_license_warnings", "The number of active license warnings.", nil, nil)
17+
licenseErrorsDesc = prometheus.NewDesc("coderd_license_errors", "The number of active license errors.", nil, nil)
1418
)
1519

1620
type MetricsCollector struct {
@@ -23,9 +27,19 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
2327
descCh <- activeUsersDesc
2428
descCh <- limitUsersDesc
2529
descCh <- userLimitEnabledDesc
30+
descCh <- licenseWarningsDesc
31+
descCh <- licenseErrorsDesc
2632
}
2733

2834
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
35+
// Collect user limit metrics.
36+
mc.collectUserLimit(metricsCh)
37+
38+
// Collect license warnings and errors metrics.
39+
mc.collectWarningsAndErrors(metricsCh)
40+
}
41+
42+
func (mc *MetricsCollector) collectUserLimit(metricsCh chan<- prometheus.Metric) {
2943
userLimitEntitlement, ok := mc.Entitlements.Feature(codersdk.FeatureUserLimit)
3044
if !ok {
3145
return
@@ -45,3 +59,11 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
4559
metricsCh <- prometheus.MustNewConstMetric(limitUsersDesc, prometheus.GaugeValue, float64(*userLimitEntitlement.Limit))
4660
}
4761
}
62+
63+
func (mc *MetricsCollector) collectWarningsAndErrors(metricsCh chan<- prometheus.Metric) {
64+
warnings := mc.Entitlements.Warnings()
65+
errors := mc.Entitlements.Errors()
66+
67+
metricsCh <- prometheus.MustNewConstMetric(licenseWarningsDesc, prometheus.GaugeValue, float64(len(warnings)))
68+
metricsCh <- prometheus.MustNewConstMetric(licenseErrorsDesc, prometheus.GaugeValue, float64(len(errors)))
69+
}

enterprise/coderd/license/metricscollector_test.go

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/aws/smithy-go/ptr"
99
"github.com/prometheus/client_golang/prometheus"
10+
prometheus_client "github.com/prometheus/client_model/go"
1011
"github.com/stretchr/testify/require"
1112

1213
"github.com/coder/coder/v2/coderd/entitlements"
@@ -48,16 +49,131 @@ func TestCollectLicenseMetrics(t *testing.T) {
4849
err = json.Unmarshal(goldenFile, &golden)
4950
require.NoError(t, err)
5051

51-
collected := map[string]int{}
52+
for name, expected := range golden {
53+
actual, ok := findMetric(metrics, name)
54+
require.True(t, ok, "metric %s not found", name)
55+
require.Equal(t, expected, actual, "metric %s", name)
56+
}
57+
}
58+
59+
func TestCollectLicenseMetrics_WarningsAndErrors(t *testing.T) {
60+
t.Parallel()
61+
62+
t.Run("NoWarningsOrErrors", func(t *testing.T) {
63+
t.Parallel()
64+
65+
registry := prometheus.NewRegistry()
66+
var sut license.MetricsCollector
67+
sut.Entitlements = entitlements.New()
68+
69+
registry.Register(&sut)
70+
71+
metrics, err := registry.Gather()
72+
require.NoError(t, err)
73+
74+
warnings, ok := findMetric(metrics, "coderd_license_warnings")
75+
require.True(t, ok)
76+
require.Zero(t, warnings)
77+
78+
errors, ok := findMetric(metrics, "coderd_license_errors")
79+
require.True(t, ok)
80+
require.Zero(t, errors)
81+
})
82+
83+
t.Run("WithWarnings", func(t *testing.T) {
84+
t.Parallel()
85+
86+
registry := prometheus.NewRegistry()
87+
var sut license.MetricsCollector
88+
sut.Entitlements = entitlements.New()
89+
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
90+
entitlements.Warnings = []string{
91+
"License expires in 30 days",
92+
"User limit is at 90% capacity",
93+
}
94+
})
95+
96+
registry.Register(&sut)
97+
98+
metrics, err := registry.Gather()
99+
require.NoError(t, err)
100+
101+
warnings, ok := findMetric(metrics, "coderd_license_warnings")
102+
require.True(t, ok)
103+
require.Equal(t, 2, warnings)
104+
105+
errors, ok := findMetric(metrics, "coderd_license_errors")
106+
require.True(t, ok)
107+
require.Zero(t, errors)
108+
})
109+
110+
t.Run("WithErrors", func(t *testing.T) {
111+
t.Parallel()
112+
113+
registry := prometheus.NewRegistry()
114+
var sut license.MetricsCollector
115+
sut.Entitlements = entitlements.New()
116+
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
117+
entitlements.Errors = []string{
118+
"License has expired",
119+
}
120+
})
121+
122+
registry.Register(&sut)
123+
124+
metrics, err := registry.Gather()
125+
require.NoError(t, err)
126+
127+
warnings, ok := findMetric(metrics, "coderd_license_warnings")
128+
require.True(t, ok)
129+
require.Zero(t, warnings)
130+
131+
errors, ok := findMetric(metrics, "coderd_license_errors")
132+
require.True(t, ok)
133+
require.Equal(t, 1, errors)
134+
})
135+
136+
t.Run("WithBothWarningsAndErrors", func(t *testing.T) {
137+
t.Parallel()
138+
139+
registry := prometheus.NewRegistry()
140+
var sut license.MetricsCollector
141+
sut.Entitlements = entitlements.New()
142+
sut.Entitlements.Modify(func(entitlements *codersdk.Entitlements) {
143+
entitlements.Warnings = []string{
144+
"License expires in 7 days",
145+
"User limit is at 95% capacity",
146+
"Feature X is deprecated",
147+
}
148+
entitlements.Errors = []string{
149+
"Invalid license signature",
150+
"License UUID mismatch",
151+
}
152+
})
153+
154+
registry.Register(&sut)
155+
156+
metrics, err := registry.Gather()
157+
require.NoError(t, err)
158+
159+
warnings, ok := findMetric(metrics, "coderd_license_warnings")
160+
require.True(t, ok)
161+
require.Equal(t, 3, warnings)
162+
163+
errors, ok := findMetric(metrics, "coderd_license_errors")
164+
require.True(t, ok)
165+
require.Equal(t, 2, errors)
166+
})
167+
}
168+
169+
// findMetric searches for a metric by name and returns its value.
170+
func findMetric(metrics []*prometheus_client.MetricFamily, name string) (int, bool) {
52171
for _, metric := range metrics {
53-
switch metric.GetName() {
54-
case "coderd_license_active_users", "coderd_license_limit_users", "coderd_license_user_limit_enabled":
172+
if metric.GetName() == name {
55173
for _, m := range metric.Metric {
56-
collected[metric.GetName()] = int(m.Gauge.GetValue())
174+
return int(m.Gauge.GetValue()), true
57175
}
58-
default:
59-
require.FailNowf(t, "unexpected metric collected", "metric: %s", metric.GetName())
60176
}
61177
}
62-
require.EqualValues(t, golden, collected)
178+
return 0, false
63179
}
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{
22
"coderd_license_active_users": 4,
33
"coderd_license_limit_users": 7,
4-
"coderd_license_user_limit_enabled": 1
4+
"coderd_license_user_limit_enabled": 1,
5+
"coderd_license_warnings": 0,
6+
"coderd_license_errors": 0
57
}

scripts/metricsdocgen/metrics

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,3 +978,9 @@ coderd_agentapi_metadata_flushed_total 71
978978
# HELP coderd_agentapi_metadata_publish_errors_total Total number of metadata batch pubsub publish calls that have resulted in an error.
979979
# TYPE coderd_agentapi_metadata_publish_errors_total counter
980980
coderd_agentapi_metadata_publish_errors_total 0
981+
# HELP coderd_license_warnings The number of active license warnings.
982+
# TYPE coderd_license_warnings gauge
983+
coderd_license_warnings 0
984+
# HELP coderd_license_errors The number of active license errors.
985+
# TYPE coderd_license_errors gauge
986+
coderd_license_errors 0

0 commit comments

Comments
 (0)