Skip to content

Commit 4adccf9

Browse files
Merge pull request wso2#1129 from renuka-fernando/health
Gateway Health Status
2 parents 05090e6 + 4c3f000 commit 4adccf9

29 files changed

Lines changed: 503 additions & 83 deletions

File tree

.github/workflows/operator-integration-test.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -737,17 +737,15 @@ jobs:
737737
podLabels: {}
738738
priorityClassName: ""
739739
livenessProbe:
740-
httpGet:
741-
path: /server_info
742-
port: envoy-admin
740+
exec:
741+
command: ["health-check.sh"]
743742
initialDelaySeconds: 30
744743
periodSeconds: 10
745744
timeoutSeconds: 5
746745
failureThreshold: 3
747746
readinessProbe:
748-
httpGet:
749-
path: /server_info
750-
port: envoy-admin
747+
exec:
748+
command: ["health-check.sh"]
751749
initialDelaySeconds: 10
752750
periodSeconds: 5
753751
timeoutSeconds: 3

gateway/docker-compose.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ services:
3434
- APIP_GW_CONTROLLER_STORAGE_SQLITE_PATH=./data/gateway.db
3535
- APIP_GW_CONTROLLER_LOGGING_LEVEL=info
3636
- APIP_GW_CONTROLLER_METRICS_PORT=9091
37-
- APIP_GW_REGISTRATION_TOKEN=${GATEWAY_REGISTRATION_TOKEN:-}
38-
- APIP_GW_CONTROLLER_CONTROLPLANE_HOST=${GATEWAY_CONTROLPLANE_HOST:-host.docker.internal:9243}
37+
- APIP_GW_GATEWAY_REGISTRATION_TOKEN=${GATEWAY_REGISTRATION_TOKEN:-}
38+
- APIP_GW_CONTROLPLANE_HOST=${GATEWAY_CONTROLPLANE_HOST:-host.docker.internal:9243}
3939
volumes:
4040
- controller-data:/app/data
4141
- ./configs/config.toml:/etc/gateway-controller/config.toml:ro
@@ -65,6 +65,12 @@ services:
6565
- GATEWAY_CONTROLLER_HOST=gateway-controller
6666
- MOESIF_KEY=${MOESIF_KEY:-}
6767
- LOG_LEVEL=info
68+
healthcheck:
69+
test: ["CMD", "health-check.sh"]
70+
interval: 5s
71+
timeout: 3s
72+
retries: 10
73+
start_period: 15s
6874
volumes:
6975
- ./configs/config.toml:/etc/policy-engine/config.toml:ro
7076
networks:

gateway/gateway-controller/api/openapi-admin.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,24 @@ paths:
4141
schema:
4242
$ref: "#/components/schemas/ErrorResponse"
4343

44+
/health:
45+
get:
46+
summary: Health check
47+
description: |
48+
Returns the health status of the gateway controller.
49+
This endpoint is not subject to IP whitelist restrictions so that
50+
Docker and Kubernetes health probes can reach it.
51+
operationId: getHealth
52+
tags:
53+
- System
54+
responses:
55+
"200":
56+
description: Gateway controller is healthy
57+
content:
58+
application/json:
59+
schema:
60+
$ref: "#/components/schemas/HealthResponse"
61+
4462
/xds_sync_status:
4563
get:
4664
summary: Get xDS policy sync status
@@ -172,6 +190,17 @@ components:
172190
xds_sync:
173191
$ref: "#/components/schemas/ConfigDumpXDSSync"
174192

193+
HealthResponse:
194+
type: object
195+
properties:
196+
status:
197+
type: string
198+
description: Health status ("healthy")
199+
timestamp:
200+
type: string
201+
format: date-time
202+
description: Timestamp of the health check
203+
175204
XDSSyncStatusResponse:
176205
type: object
177206
properties:

gateway/gateway-controller/cmd/controller/main.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,12 @@ func main() {
197197
}
198198
cancel()
199199

200+
// Create channels to detect when router and policy engine first connect
201+
routerConnected := make(chan struct{})
202+
policyEngineConnected := make(chan struct{})
203+
200204
// Start xDS gRPC server with SDS support
201-
xdsServer := xds.NewServer(snapshotManager, sdsSecretManager, cfg.Controller.Server.XDSPort, log)
205+
xdsServer := xds.NewServer(snapshotManager, sdsSecretManager, cfg.Controller.Server.XDSPort, log, routerConnected)
202206
go func() {
203207
if err := xdsServer.Start(); err != nil {
204208
log.Error("xDS server failed", slog.Any("error", err))
@@ -275,7 +279,9 @@ func main() {
275279
cancel()
276280

277281
// Start policy xDS server in a separate goroutine
278-
var serverOpts []policyxds.ServerOption
282+
serverOpts := []policyxds.ServerOption{
283+
policyxds.WithOnFirstConnect(policyEngineConnected),
284+
}
279285
if cfg.Controller.PolicyServer.TLS.Enabled {
280286
serverOpts = append(serverOpts, policyxds.WithTLS(
281287
cfg.Controller.PolicyServer.TLS.CertFile,
@@ -412,6 +418,21 @@ func main() {
412418

413419
log.Info("Gateway Controller started successfully")
414420

421+
// Print banner when both router and policy engine have connected
422+
go func() {
423+
<-routerConnected
424+
<-policyEngineConnected
425+
fmt.Println()
426+
fmt.Println()
427+
fmt.Println("============================================================")
428+
fmt.Println()
429+
fmt.Println(" API Platform Gateway Started")
430+
fmt.Println()
431+
fmt.Println("============================================================")
432+
fmt.Println()
433+
fmt.Println()
434+
}()
435+
415436
// Wait for interrupt signal
416437
quit := make(chan os.Signal, 1)
417438
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)

gateway/gateway-controller/pkg/adminapi/generated/generated.go

Lines changed: 30 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gateway/gateway-controller/pkg/adminserver/server.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"log/slog"
88
"net"
99
"net/http"
10+
"time"
1011

1112
adminapi "github.com/wso2/api-platform/gateway/gateway-controller/pkg/adminapi/generated"
1213
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/config"
@@ -37,6 +38,8 @@ func NewServer(cfg *config.AdminServerConfig, apiServer apiServer, logger *slog.
3738

3839
mux.Handle("/config_dump", ipWhitelistMiddleware(cfg.AllowedIPs, http.HandlerFunc(s.handleConfigDump)))
3940
mux.Handle("/xds_sync_status", ipWhitelistMiddleware(cfg.AllowedIPs, http.HandlerFunc(s.handleXDSSyncStatus)))
41+
// Health endpoint is registered without IP whitelist so Docker/k8s health probes can reach it
42+
mux.Handle("/health", http.HandlerFunc(s.handleHealth))
4043

4144
s.httpSrv = &http.Server{
4245
Addr: fmt.Sprintf(":%d", cfg.Port),
@@ -93,6 +96,22 @@ func (s *Server) handleXDSSyncStatus(w http.ResponseWriter, r *http.Request) {
9396
_ = json.NewEncoder(w).Encode(resp)
9497
}
9598

99+
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
100+
if r.Method != http.MethodGet {
101+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
102+
return
103+
}
104+
105+
resp := map[string]string{
106+
"status": "healthy",
107+
"timestamp": time.Now().UTC().Format(time.RFC3339),
108+
}
109+
110+
w.Header().Set("Content-Type", "application/json")
111+
w.WriteHeader(http.StatusOK)
112+
_ = json.NewEncoder(w).Encode(resp)
113+
}
114+
96115
func ipWhitelistMiddleware(allowedIPs []string, next http.Handler) http.Handler {
97116
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
98117
clientIP := extractClientIP(r)

gateway/gateway-controller/pkg/adminserver/server_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,48 @@ func TestAdminServer_MethodNotAllowed(t *testing.T) {
100100
assert.Equal(t, http.StatusMethodNotAllowed, rr.Code)
101101
}
102102

103+
func TestAdminServer_HealthHandler(t *testing.T) {
104+
stub := &stubAPIServer{}
105+
s := NewServer(&config.AdminServerConfig{Port: 9092, AllowedIPs: []string{"*"}}, stub, slog.Default())
106+
107+
req := httptest.NewRequest(http.MethodGet, "/health", nil)
108+
req.RemoteAddr = "127.0.0.1:12345"
109+
rr := httptest.NewRecorder()
110+
111+
s.httpSrv.Handler.ServeHTTP(rr, req)
112+
assert.Equal(t, http.StatusOK, rr.Code)
113+
114+
var body map[string]string
115+
assert.NoError(t, json.NewDecoder(rr.Body).Decode(&body))
116+
assert.Equal(t, "healthy", body["status"])
117+
assert.NotEmpty(t, body["timestamp"])
118+
}
119+
120+
func TestAdminServer_HealthHandler_MethodNotAllowed(t *testing.T) {
121+
stub := &stubAPIServer{}
122+
s := NewServer(&config.AdminServerConfig{Port: 9092, AllowedIPs: []string{"*"}}, stub, slog.Default())
123+
124+
req := httptest.NewRequest(http.MethodPost, "/health", nil)
125+
req.RemoteAddr = "127.0.0.1:12345"
126+
rr := httptest.NewRecorder()
127+
128+
s.httpSrv.Handler.ServeHTTP(rr, req)
129+
assert.Equal(t, http.StatusMethodNotAllowed, rr.Code)
130+
}
131+
132+
func TestAdminServer_HealthHandler_NoIPWhitelist(t *testing.T) {
133+
stub := &stubAPIServer{}
134+
// Restrict IPs to only 127.0.0.1 — health should still be accessible from other IPs
135+
s := NewServer(&config.AdminServerConfig{Port: 9092, AllowedIPs: []string{"127.0.0.1"}}, stub, slog.Default())
136+
137+
req := httptest.NewRequest(http.MethodGet, "/health", nil)
138+
req.RemoteAddr = "192.168.1.10:12345"
139+
rr := httptest.NewRecorder()
140+
141+
s.httpSrv.Handler.ServeHTTP(rr, req)
142+
assert.Equal(t, http.StatusOK, rr.Code)
143+
}
144+
103145
func TestIsIPAllowed(t *testing.T) {
104146
assert.True(t, isIPAllowed("127.0.0.1", []string{"*"}))
105147
assert.True(t, isIPAllowed("127.0.0.1", []string{"0.0.0.0/0"}))

gateway/gateway-controller/pkg/policyxds/server.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"fmt"
2424
"log/slog"
2525
"net"
26+
"sync"
2627
"time"
2728

2829
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/apikeyxds"
@@ -45,6 +46,7 @@ type Server struct {
4546
lazyResourceSnapshotMgr *lazyresourcexds.LazyResourceSnapshotManager
4647
port int
4748
tlsConfig *TLSConfig
49+
onFirstConnect chan struct{}
4850
logger *slog.Logger
4951
}
5052

@@ -69,6 +71,13 @@ func WithTLS(certFile, keyFile string) ServerOption {
6971
}
7072
}
7173

74+
// WithOnFirstConnect sets a channel that will be closed when the first xDS client connects
75+
func WithOnFirstConnect(ch chan struct{}) ServerOption {
76+
return func(s *Server) {
77+
s.onFirstConnect = ch
78+
}
79+
}
80+
7281
// NewServer creates a new policy xDS server
7382
func NewServer(snapshotManager *SnapshotManager, apiKeySnapshotMgr *apikeyxds.APIKeySnapshotManager, lazyResourceSnapshotMgr *lazyresourcexds.LazyResourceSnapshotManager, port int, logger *slog.Logger, opts ...ServerOption) *Server {
7483
s := &Server{
@@ -118,7 +127,11 @@ func NewServer(snapshotManager *SnapshotManager, apiKeySnapshotMgr *apikeyxds.AP
118127
lazyResourceCache := lazyResourceSnapshotMgr.GetCache()
119128
combinedCache := NewCombinedCache(policyCache, apiKeyCache, lazyResourceCache, logger)
120129

121-
callbacks := &serverCallbacks{logger: logger}
130+
callbacks := &serverCallbacks{
131+
logger: logger,
132+
activeStreams: make(map[int64]bool),
133+
onFirstConnect: s.onFirstConnect,
134+
}
122135
xdsServer := server.NewServer(context.Background(), combinedCache, callbacks)
123136

124137
// Register ADS (Aggregated Discovery Service) for policy distribution
@@ -160,7 +173,11 @@ func (s *Server) Stop() {
160173

161174
// serverCallbacks implements xDS server callbacks for logging and debugging
162175
type serverCallbacks struct {
163-
logger *slog.Logger
176+
logger *slog.Logger
177+
activeStreams map[int64]bool
178+
activeStreamsMu sync.Mutex
179+
onFirstConnect chan struct{}
180+
firstConnectOnce sync.Once
164181
}
165182

166183
// OnStreamOpen is called when a new stream is opened
@@ -176,6 +193,10 @@ func (cb *serverCallbacks) OnStreamClosed(streamID int64, node *core.Node) {
176193
cb.logger.Info("Policy xDS stream closed",
177194
slog.Int64("stream_id", streamID),
178195
slog.String("node_id", node.GetId()))
196+
197+
cb.activeStreamsMu.Lock()
198+
defer cb.activeStreamsMu.Unlock()
199+
delete(cb.activeStreams, streamID)
179200
}
180201

181202
// OnStreamRequest is called when a discovery request is received
@@ -185,6 +206,17 @@ func (cb *serverCallbacks) OnStreamRequest(streamID int64, req *discoverygrpc.Di
185206
slog.String("type_url", req.GetTypeUrl()),
186207
slog.String("version", req.GetVersionInfo()),
187208
slog.Any("resource_names", req.GetResourceNames()))
209+
210+
cb.activeStreamsMu.Lock()
211+
defer cb.activeStreamsMu.Unlock()
212+
213+
if _, exists := cb.activeStreams[streamID]; !exists {
214+
cb.activeStreams[streamID] = true
215+
if cb.onFirstConnect != nil {
216+
cb.firstConnectOnce.Do(func() { close(cb.onFirstConnect) })
217+
}
218+
}
219+
188220
return nil
189221
}
190222

gateway/gateway-controller/pkg/policyxds/server_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func TestServerCallbacks_OnStreamOpen(t *testing.T) {
4040

4141
func TestServerCallbacks_OnStreamClosed(t *testing.T) {
4242
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
43-
cb := &serverCallbacks{logger: logger}
43+
cb := &serverCallbacks{logger: logger, activeStreams: make(map[int64]bool)}
4444

4545
node := &core.Node{Id: "test-node-id"}
4646

@@ -50,7 +50,7 @@ func TestServerCallbacks_OnStreamClosed(t *testing.T) {
5050

5151
func TestServerCallbacks_OnStreamRequest(t *testing.T) {
5252
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
53-
cb := &serverCallbacks{logger: logger}
53+
cb := &serverCallbacks{logger: logger, activeStreams: make(map[int64]bool)}
5454

5555
req := &discoverygrpc.DiscoveryRequest{
5656
TypeUrl: "test-type-url",

0 commit comments

Comments
 (0)