win 110902ad4b feat(health): split liveness and readiness probes
Add HealthService with Liveness (no-op) and Readiness (DB+Redis ping
with per-component timeout) checks. Expose three endpoints:

- /healthz : new liveness endpoint, zero-dependency, always 200
- /ready   : new readiness endpoint, returns 503 with details on dep
             failure; suitable for K8s readinessProbe and load balancers
- /health  : preserved for backward compatibility, equivalent to
             /healthz

Switch primary docker-compose healthcheck to /ready so the container
is only marked healthy once DB+Redis are reachable. Standalone/dev/
local compose files keep /health to avoid disrupting existing setups.

Tests: unit tests cover liveness, readiness with both deps healthy,
each dep failing independently, and per-component timeout enforcement.
2026-04-28 23:39:50 +08:00

62 lines
2.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package routes
import (
"context"
"net/http"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/gin-gonic/gin"
)
// readinessHandlerTimeout 限定 readiness 端点对外的最大返回耗时。
// HealthService 内部对每个组件再有独立超时,所以这里给宽一点即可。
const readinessHandlerTimeout = 3 * time.Second
// RegisterCommonRoutes 注册通用路由(健康检查、状态等)。
//
// 健康端点的语义分层:
// - /healthz : liveness 探针。零依赖、永远 200。容器/进程探活专用。
// - /ready : readiness 探针。检查 DB+Redis任一失败返回 503。
// - /health : 历史端点,等价于 /healthz保留向后兼容。
//
// dashboard 用的"业务健康分"由 ops_health_score 单独提供,与本路由无关。
func RegisterCommonRoutes(r *gin.Engine, healthService *service.HealthService) {
// Liveness仅证明进程在响应。
livenessHandler := func(c *gin.Context) {
_ = healthService.Liveness()
c.JSON(http.StatusOK, gin.H{"status": "ok"})
}
r.GET("/healthz", livenessHandler)
r.GET("/health", livenessHandler) // 向后兼容旧的 docker-compose healthcheck
// Readiness检查关键依赖。失败时返回 503 但仍带详情,便于排障。
r.GET("/ready", func(c *gin.Context) {
ctx, cancel := context.WithTimeout(c.Request.Context(), readinessHandlerTimeout)
defer cancel()
report := healthService.Readiness(ctx)
status := http.StatusOK
if !report.OK {
status = http.StatusServiceUnavailable
}
c.JSON(status, report)
})
// Claude Code 遥测日志忽略直接返回200
r.POST("/api/event_logging/batch", func(c *gin.Context) {
c.Status(http.StatusOK)
})
// Setup status endpoint (always returns needs_setup: false in normal mode)
// This is used by the frontend to detect when the service has restarted after setup
r.GET("/setup/status", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"code": 0,
"data": gin.H{
"needs_setup": false,
"step": "completed",
},
})
})
}