Skip to content

Commit 2b4ec9a

Browse files
feat(go): Implement metrics and tracing for http and grpc servers (feast-dev#5925)
* feat(go): implement metrics and tracing for http and grpc servers Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com> * fix(server): improve metrics, config, and shutdown logic Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com> * chore: update go.sum after rebase Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com> * docs: improve README instructions for metrics and tracing Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com> * fix(server): resolve potential deadlock during shutdown Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com> --------- Signed-off-by: Luis Azofra Begara <luis.azofra@cabify.com>
1 parent 114b7db commit 2b4ec9a

7 files changed

Lines changed: 322 additions & 56 deletions

File tree

go.mod

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@ require (
1111
github.com/aws/aws-sdk-go-v2/config v1.29.14
1212
github.com/aws/aws-sdk-go-v2/service/dynamodb v1.43.3
1313
github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3
14+
github.com/cabify/gotoprom v1.1.0
1415
github.com/ghodss/yaml v1.0.0
1516
github.com/go-sql-driver/mysql v1.8.1
1617
github.com/golang/protobuf v1.5.4
1718
github.com/google/uuid v1.6.0
19+
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0
1820
github.com/mattn/go-sqlite3 v1.14.23
1921
github.com/pkg/errors v0.9.1
22+
github.com/prometheus/client_golang v1.23.2
2023
github.com/redis/go-redis/v9 v9.6.1
2124
github.com/roberson-io/mmh3 v0.0.0-20190729202758-fdfce3ba6225
2225
github.com/rs/zerolog v1.33.0
@@ -64,6 +67,7 @@ require (
6467
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect
6568
github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect
6669
github.com/aws/smithy-go v1.22.2 // indirect
70+
github.com/beorn7/perks v1.0.1 // indirect
6771
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
6872
github.com/cespare/xxhash/v2 v2.3.0 // indirect
6973
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
@@ -81,6 +85,7 @@ require (
8185
github.com/google/s2a-go v0.1.9 // indirect
8286
github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
8387
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
88+
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect
8489
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
8590
github.com/jackc/pgpassfile v1.0.0 // indirect
8691
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
@@ -93,9 +98,13 @@ require (
9398
github.com/mattn/go-isatty v0.0.20 // indirect
9499
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
95100
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
101+
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
96102
github.com/pierrec/lz4/v4 v4.1.21 // indirect
97103
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
98104
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
105+
github.com/prometheus/client_model v0.6.2 // indirect
106+
github.com/prometheus/common v0.66.1 // indirect
107+
github.com/prometheus/procfs v0.16.1 // indirect
99108
github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect
100109
github.com/stretchr/objx v0.5.2 // indirect
101110
github.com/zeebo/errs v1.4.0 // indirect
@@ -107,6 +116,7 @@ require (
107116
go.opentelemetry.io/otel/metric v1.38.0 // indirect
108117
go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect
109118
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
119+
go.yaml.in/yaml/v2 v2.4.2 // indirect
110120
golang.org/x/crypto v0.45.0 // indirect
111121
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect
112122
golang.org/x/mod v0.29.0 // indirect

go.sum

Lines changed: 61 additions & 9 deletions
Large diffs are not rendered by default.

go/README.md

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,27 @@ To build and run the Go Feature Server locally, create a feature_store.yaml file
66

77
```bash
88
go build -o feast-go ./go/main.go
9-
# start the http server
10-
./feast-go --type=http --port=8080
9+
# start the http server (metrics on port 9090 by default)
10+
./feast-go --type=http --port=8080 --metrics-port=9090
1111
# or start the gRPC server
12-
#./feast-go --type=grpc --port=[your-choice]
12+
#./feast-go --type=grpc --port=[your-choice] --metrics-port=9091
1313
```
14+
## Prometheus Metrics
15+
The server exposes Prometheus metrics at the `/metrics` endpoint on a dedicated port (default `:9090`).
16+
- **HTTP Mode**: Metrics server runs on port `9090` (configurable via `-metrics-port`).
17+
- **gRPC Mode**: Metrics server runs on port `9090` (configurable via `-metrics-port`).
18+
19+
Key metrics include:
20+
- `http_request_duration_seconds`: Histogram of response latency.
21+
- `http_requests_total`: Counter of HTTP requests by status, method, and path.
22+
- Standard Go and Process metrics.
23+
24+
A `/health` endpoint is available on the main application port (default `:8080`) for readiness probes.
25+
1426

1527
## OTEL based observability
1628
The OS level env variable `ENABLE_OTEL_TRACING=="true"/"false"` (string type) is used to enable/disable this service (with Tracing only).
29+
You can also configure the service name using `OTEL_SERVICE_NAME` env variable (defaults to "FeastGoFeatureServer").
1730

1831
The default exporter URL is "http://localhost:4318". The default schema of sending data to collector is **HTTP**. Please refer the following two docs about the configuration of the OTEL exporter:
1932
1. https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package metrics
2+
3+
import (
4+
"reflect"
5+
"time"
6+
7+
"github.com/cabify/gotoprom"
8+
"github.com/cabify/gotoprom/prometheusvanilla"
9+
"github.com/prometheus/client_golang/prometheus"
10+
)
11+
12+
var HttpMetrics struct {
13+
Duration func(HttpLabels) TimeHistogram `name:"http_request_duration_seconds" help:"Time taken to serve HTTP requests" buckets:".005,.01,.025,.05,.1,.25,.5,1,2.5,5,10"`
14+
15+
RequestsTotal func(HttpLabels) prometheus.Counter `name:"http_requests_total" help:"Total number of HTTP requests"`
16+
}
17+
18+
type HttpLabels struct {
19+
Method string `label:"method"`
20+
Status int `label:"status"`
21+
Path string `label:"path"`
22+
}
23+
24+
func init() {
25+
gotoprom.MustAddBuilder(TimeHistogramType, RegisterTimeHistogram)
26+
gotoprom.MustInit(&HttpMetrics, "feast")
27+
}
28+
29+
var (
30+
TimeHistogramType = reflect.TypeOf((*TimeHistogram)(nil)).Elem()
31+
)
32+
33+
34+
func RegisterTimeHistogram(name, help, namespace string, labelNames []string, tag reflect.StructTag) (func(prometheus.Labels) interface{}, prometheus.Collector, error) {
35+
f, collector, err := prometheusvanilla.BuildHistogram(name, help, namespace, labelNames, tag)
36+
if err != nil {
37+
return nil, nil, err
38+
}
39+
40+
return func(labels prometheus.Labels) interface{} {
41+
return timeHistogramAdapter{Histogram: f(labels).(prometheus.Histogram)}
42+
}, collector, nil
43+
}
44+
45+
// TimeHistogram offers the basic prometheus.Histogram functionality
46+
type TimeHistogram interface {
47+
prometheus.Histogram
48+
// Duration observes the duration in seconds
49+
Duration(duration time.Duration)
50+
// Since observes the duration in seconds since the time point provided
51+
Since(time.Time)
52+
}
53+
54+
type timeHistogramAdapter struct {
55+
prometheus.Histogram
56+
}
57+
58+
// Duration observes the duration in seconds
59+
func (to timeHistogramAdapter) Duration(duration time.Duration) {
60+
to.Observe(duration.Seconds())
61+
}
62+
63+
// Since observes the duration in seconds since the time point provided
64+
func (to timeHistogramAdapter) Since(duration time.Time) {
65+
to.Duration(time.Since(duration))
66+
}

go/internal/feast/server/http_server.go

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
prototypes "github.com/feast-dev/feast/go/protos/feast/types"
1818
"github.com/feast-dev/feast/go/types"
1919
"github.com/rs/zerolog/log"
20+
21+
"github.com/feast-dev/feast/go/internal/feast/metrics"
2022
)
2123

2224
type httpServer struct {
@@ -335,10 +337,55 @@ func recoverMiddleware(next http.Handler) http.Handler {
335337
})
336338
}
337339

340+
type statusWriter struct {
341+
http.ResponseWriter
342+
status int
343+
}
344+
345+
func (w *statusWriter) WriteHeader(status int) {
346+
if w.status == 0 {
347+
w.status = status
348+
}
349+
w.ResponseWriter.WriteHeader(status)
350+
}
351+
352+
func (w *statusWriter) Write(b []byte) (int, error) {
353+
if w.status == 0 {
354+
w.status = 200
355+
}
356+
n, err := w.ResponseWriter.Write(b)
357+
return n, err
358+
}
359+
360+
func metricsMiddleware(next http.Handler) http.Handler {
361+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
362+
t0 := time.Now()
363+
sw := &statusWriter{ResponseWriter: w}
364+
next.ServeHTTP(sw, r)
365+
duration := time.Since(t0)
366+
367+
if sw.status == 0 {
368+
sw.status = 200
369+
}
370+
371+
metrics.HttpMetrics.Duration(metrics.HttpLabels{
372+
Method: r.Method,
373+
Status: sw.status,
374+
Path: r.URL.Path,
375+
}).Duration(duration)
376+
377+
metrics.HttpMetrics.RequestsTotal(metrics.HttpLabels{
378+
Method: r.Method,
379+
Status: sw.status,
380+
Path: r.URL.Path,
381+
}).Inc()
382+
})
383+
}
384+
338385
func (s *httpServer) Serve(host string, port int) error {
339386
mux := http.NewServeMux()
340-
mux.Handle("/get-online-features", recoverMiddleware(http.HandlerFunc(s.getOnlineFeatures)))
341-
mux.HandleFunc("/health", healthCheckHandler)
387+
mux.Handle("/get-online-features", metricsMiddleware(recoverMiddleware(http.HandlerFunc(s.getOnlineFeatures))))
388+
mux.Handle("/health", metricsMiddleware(http.HandlerFunc(healthCheckHandler)))
342389
s.server = &http.Server{Addr: fmt.Sprintf("%s:%d", host, port), Handler: mux, ReadTimeout: 5 * time.Second, WriteTimeout: 10 * time.Second, IdleTimeout: 15 * time.Second}
343390
err := s.server.ListenAndServe()
344391
// Don't return the error if it's caused by graceful shutdown using Stop()

0 commit comments

Comments
 (0)