Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .claude/settings.local.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
{
"permissions": {
"allow": [
"Bash(podman run:*)"
"Bash(podman run:*)",
"Bash(git stash:*)",
"Bash(make lint:*)",
"Bash(ls:*)",
"Bash(git add:*)"
]
}
}
12 changes: 6 additions & 6 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,9 @@ Serves the hyperfleet REST API with full authentication, database connectivity,
- `--ocm-debug` - Enable OCM API debug logging

- **Monitoring & Health Checks:**
- `--health-check-server-bindaddress` - Health check server address (default: "localhost:8083")
- `--enable-health-check-https` - Enable HTTPS for health check server
- `--metrics-server-bindaddress` - Metrics server address (default: "localhost:8080")
- `--health-server-bindaddress` - Health endpoints server address (default: "localhost:8080")
- `--enable-health-https` - Enable HTTPS for health server
- `--metrics-server-bindaddress` - Metrics endpoint server address (default: "localhost:9090")
- `--enable-metrics-https` - Enable HTTPS for metrics server

- **Performance Tuning:**
Expand Down Expand Up @@ -686,8 +686,8 @@ The server is configured in cmd/hyperfleet/server/:

**Ports**:
- `8000` - Main API server
- `8080` - Metrics endpoint
- `8083` - Health check endpoint
- `8080` - Health endpoints (`/healthz`, `/readyz`)
- `9090` - Metrics endpoint (`/metrics`)

**Middleware Chain**:
1. Request logging
Expand Down Expand Up @@ -774,7 +774,7 @@ The API is designed to be stateless and horizontally scalable:

**Health Check**: `GET /healthcheck` returns 200 OK when database is accessible

**Metrics**: Prometheus metrics available at `/metrics`
**Metrics**: Prometheus metrics available at `/metrics` (port 9090)

## References

Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ The service starts on `localhost:8000`:
- **REST API**: `http://localhost:8000/api/hyperfleet/v1/`
- **OpenAPI spec**: `http://localhost:8000/api/hyperfleet/v1/openapi`
- **Swagger UI**: `http://localhost:8000/api/hyperfleet/v1/openapi.html`
- **Health check**: `http://localhost:8083/healthcheck`
- **Metrics**: `http://localhost:8080/metrics`
- **Liveness probe**: `http://localhost:8080/healthz`
- **Readiness probe**: `http://localhost:8080/readyz`
- **Metrics**: `http://localhost:9090/metrics`

```bash
# Test the API
Expand Down
16 changes: 8 additions & 8 deletions charts/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ spec:
args:
- serve
- --api-server-bindaddress={{ .Values.server.bindAddress | default ":8000" }}
- --health-check-server-bindaddress={{ .Values.server.healthBindAddress | default ":8083" }}
- --metrics-server-bindaddress={{ .Values.server.metricsBindAddress | default ":8080" }}
- --health-server-bindaddress={{ .Values.server.healthBindAddress | default ":8080" }}
- --metrics-server-bindaddress={{ .Values.server.metricsBindAddress | default ":9090" }}
ports:
- name: http
containerPort: 8000
protocol: TCP
- name: health
containerPort: 8083
containerPort: 8080
protocol: TCP
- name: metrics
containerPort: 8080
containerPort: 9090
protocol: TCP
env:
{{- if .Values.auth.jwksUrl }}
Expand All @@ -82,15 +82,15 @@ spec:
{{- end }}
livenessProbe:
httpGet:
path: /healthcheck
path: /healthz
port: health
initialDelaySeconds: 30
periodSeconds: 10
initialDelaySeconds: 15
periodSeconds: 20
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /healthcheck
path: /readyz
port: health
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
4 changes: 2 additions & 2 deletions charts/templates/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ spec:
targetPort: http
protocol: TCP
name: http
- port: 8083
- port: 8080
targetPort: health
protocol: TCP
name: health
- port: 8080
- port: 9090
targetPort: metrics
protocol: TCP
name: metrics
Expand Down
4 changes: 2 additions & 2 deletions charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ fullnameOverride: ""
# Use ":PORT" format to bind to all interfaces (required for Kubernetes)
server:
bindAddress: ":8000"
healthBindAddress: ":8083"
metricsBindAddress: ":8080"
healthBindAddress: ":8080"
metricsBindAddress: ":9090"

serviceAccount:
# Specifies whether a service account should be created
Expand Down
29 changes: 24 additions & 5 deletions cmd/hyperfleet-api/servecmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/openshift-hyperfleet/hyperfleet-api/cmd/hyperfleet-api/server"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/db/db_session"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/telemetry"
)
Expand Down Expand Up @@ -79,17 +80,30 @@ func runServe(cmd *cobra.Command, args []string) {
metricsServer := server.NewMetricsServer()
go metricsServer.Start()

healthcheckServer := server.NewHealthCheckServer()
go healthcheckServer.Start()
healthServer := server.NewHealthServer()
go healthServer.Start()

// Wait for health server to be listening before marking as ready
if notifier, ok := healthServer.(server.ListenNotifier); ok {
<-notifier.NotifyListening()
}

// Mark application as ready to receive traffic
health.GetReadinessState().SetReady()
logger.Info(ctx, "Application ready to receive traffic")

sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
<-sigChan

logger.Info(ctx, "Shutdown signal received, starting graceful shutdown...")

if err := healthcheckServer.Stop(); err != nil {
logger.WithError(ctx, err).Error("Failed to stop healthcheck server")
// Mark application as not ready (returns 503 on /readyz)
health.GetReadinessState().SetShuttingDown()
logger.Info(ctx, "Marked as not ready, draining in-flight requests...")

if err := healthServer.Stop(); err != nil {
logger.WithError(ctx, err).Error("Failed to stop health server")
}
if err := apiServer.Stop(); err != nil {
logger.WithError(ctx, err).Error("Failed to stop API server")
Expand All @@ -99,11 +113,16 @@ func runServe(cmd *cobra.Command, args []string) {
}

if tp != nil {
if err := telemetry.Shutdown(context.Background(), tp); err != nil {
shutdownCtx, cancel := context.WithTimeout(context.Background(), environments.Environment().Config.Health.ShutdownTimeout)
defer cancel()
if err := telemetry.Shutdown(shutdownCtx, tp); err != nil {
logger.WithError(ctx, err).Error("Failed to shutdown OpenTelemetry")
}
}

// Close database connections
environments.Environment().Teardown()

logger.Info(ctx, "Graceful shutdown completed")
}

Expand Down
99 changes: 99 additions & 0 deletions cmd/hyperfleet-api/server/health_server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package server

import (
"context"
"fmt"
"net"
"net/http"
"time"

"github.com/gorilla/mux"

"github.com/openshift-hyperfleet/hyperfleet-api/pkg/api"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/health"
"github.com/openshift-hyperfleet/hyperfleet-api/pkg/logger"
)

func NewHealthServer() Server {
mainRouter := mux.NewRouter()
mainRouter.NotFoundHandler = http.HandlerFunc(api.SendNotFound)

// health endpoints (HyperFleet standard)
healthHandler := health.NewHandler(env().Database.SessionFactory)
mainRouter.HandleFunc("/healthz", healthHandler.LivenessHandler).Methods(http.MethodGet)
mainRouter.HandleFunc("/readyz", healthHandler.ReadinessHandler).Methods(http.MethodGet)

var mainHandler http.Handler = mainRouter

s := &healthServer{
shutdownTimeout: env().Config.Health.ShutdownTimeout,
listening: make(chan struct{}),
}
s.httpServer = &http.Server{
Addr: env().Config.Health.BindAddress,
Handler: mainHandler,
}
return s
}

type healthServer struct {
httpServer *http.Server
shutdownTimeout time.Duration
listening chan struct{}
}

var _ Server = &healthServer{}

func (s *healthServer) Listen() (listener net.Listener, err error) {
return net.Listen("tcp", s.httpServer.Addr)
}

func (s *healthServer) Serve(listener net.Listener) {
ctx := context.Background()
var err error

if env().Config.Health.EnableHTTPS {
if env().Config.Server.HTTPSCertFile == "" || env().Config.Server.HTTPSKeyFile == "" {
check(
fmt.Errorf("unspecified required --https-cert-file, --https-key-file"),
"Can't start https server",
)
}

logger.With(ctx, logger.FieldBindAddress, env().Config.Health.BindAddress).Info("Serving Health with TLS")
err = s.httpServer.ServeTLS(listener, env().Config.Server.HTTPSCertFile, env().Config.Server.HTTPSKeyFile)
} else {
logger.With(ctx, logger.FieldBindAddress, env().Config.Health.BindAddress).Info("Serving Health without TLS")
err = s.httpServer.Serve(listener)
}
if err != nil && err != http.ErrServerClosed {
check(err, "Health server terminated with errors")
} else {
logger.Info(ctx, "Health server terminated")
}
}

// Start is a convenience wrapper that calls Listen() and Serve()
func (s *healthServer) Start() {
listener, err := s.Listen()
if err != nil {
check(err, "Failed to create health server listener")
return
}

// Signal that we're listening
close(s.listening)

s.Serve(listener)
}

// NotifyListening returns a channel that is closed when the server is listening
func (s *healthServer) NotifyListening() <-chan struct{} {
return s.listening
}

func (s healthServer) Stop() error {
ctx, cancel := context.WithTimeout(context.Background(), s.shutdownTimeout)
defer cancel()
return s.httpServer.Shutdown(ctx)
}
87 changes: 0 additions & 87 deletions cmd/hyperfleet-api/server/healthcheck_server.go

This file was deleted.

2 changes: 1 addition & 1 deletion cmd/hyperfleet-api/server/metrics_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func NewMetricsServer() Server {
mainRouter := mux.NewRouter()
mainRouter.NotFoundHandler = http.HandlerFunc(api.SendNotFound)

// metrics endpoint
// metrics endpoint only (health endpoints moved to health_server.go on port 8080)
prometheusMetricsHandler := handlers.NewPrometheusMetricsHandler()
mainRouter.Handle("/metrics", prometheusMetricsHandler.Handler())

Expand Down
6 changes: 6 additions & 0 deletions cmd/hyperfleet-api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ type Server interface {
Serve(net.Listener)
}

// ListenNotifier is an optional interface that servers can implement
// to signal when they are ready to accept connections
type ListenNotifier interface {
NotifyListening() <-chan struct{}
}

func removeTrailingSlash(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
r.URL.Path = strings.TrimSuffix(r.URL.Path, "/")
Expand Down
Loading