Merge pull request #2548 from Arron196/fix/sla-exclude-capacity-errors
fix: 统一 Ops SLA 与请求错误统计口径
This commit is contained in:
commit
e318376e88
@ -325,6 +325,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制
|
||||
if err != nil {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
reqLog.Warn("gateway.select_account_no_available",
|
||||
zap.String("model", reqModel),
|
||||
zap.Int64p("group_id", apiKey.GroupID),
|
||||
@ -374,6 +375,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
accountReleaseFunc := selection.ReleaseFunc
|
||||
if !selection.Acquired {
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
reqLog.Warn("gateway.select_account_no_slot_no_wait_plan",
|
||||
zap.Int64("account_id", account.ID),
|
||||
zap.String("model", reqModel),
|
||||
@ -566,6 +568,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID, subject.UserID)
|
||||
if err != nil {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
reqLog.Warn("gateway.select_account_no_available",
|
||||
zap.String("model", reqModel),
|
||||
zap.Int64p("group_id", currentAPIKey.GroupID),
|
||||
@ -626,6 +629,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
accountReleaseFunc := selection.ReleaseFunc
|
||||
if !selection.Acquired {
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
reqLog.Warn("gateway.select_account_no_slot_no_wait_plan",
|
||||
zap.Int64("account_id", account.ID),
|
||||
zap.String("model", reqModel),
|
||||
@ -1542,6 +1546,7 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) {
|
||||
account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
|
||||
if err != nil {
|
||||
reqLog.Warn("gateway.count_tokens_select_account_failed", zap.Error(err))
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable")
|
||||
return
|
||||
}
|
||||
|
||||
@ -169,6 +169,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0))
|
||||
if err != nil {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
@ -194,6 +195,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) {
|
||||
accountReleaseFunc := selection.ReleaseFunc
|
||||
if !selection.Acquired {
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts")
|
||||
return
|
||||
}
|
||||
|
||||
@ -174,6 +174,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0))
|
||||
if err != nil {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
@ -199,6 +200,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) {
|
||||
accountReleaseFunc := selection.ReleaseFunc
|
||||
if !selection.Acquired {
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts")
|
||||
return
|
||||
}
|
||||
|
||||
@ -61,6 +61,7 @@ func (h *GatewayHandler) GeminiV1BetaListModels(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gemini.FallbackModelsList())
|
||||
return
|
||||
}
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
@ -113,6 +114,7 @@ func (h *GatewayHandler) GeminiV1BetaGetModel(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gemini.FallbackModel(modelName))
|
||||
return
|
||||
}
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
@ -372,6 +374,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制
|
||||
if err != nil {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
@ -419,6 +422,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
accountReleaseFunc := selection.ReleaseFunc
|
||||
if !selection.Acquired {
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts")
|
||||
return
|
||||
}
|
||||
|
||||
@ -143,6 +143,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
|
||||
zap.Int("excluded_account_count", len(failedAccountIDs)),
|
||||
)
|
||||
if len(failedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
|
||||
return
|
||||
} else {
|
||||
@ -155,6 +156,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
if selection == nil || selection.Account == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
|
||||
return
|
||||
}
|
||||
|
||||
@ -282,6 +282,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
|
||||
zap.Int("excluded_account_count", len(failedAccountIDs)),
|
||||
)
|
||||
if len(failedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
if errors.Is(err, service.ErrNoAvailableCompactAccounts) {
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "compact_not_supported", "No available OpenAI accounts support /responses/compact", streamStarted)
|
||||
return
|
||||
@ -297,6 +298,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
if selection == nil || selection.Account == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
|
||||
return
|
||||
}
|
||||
@ -677,6 +679,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
|
||||
)
|
||||
if len(failedAccountIDs) == 0 {
|
||||
if err != nil {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
|
||||
return
|
||||
}
|
||||
@ -690,6 +693,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
if selection == nil || selection.Account == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
|
||||
return
|
||||
}
|
||||
@ -992,6 +996,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot(
|
||||
reqLog *zap.Logger,
|
||||
) (func(), bool) {
|
||||
if selection == nil || selection.Account == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted)
|
||||
return nil, false
|
||||
}
|
||||
@ -1002,6 +1007,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot(
|
||||
return wrapReleaseOnDone(ctx, selection.ReleaseFunc), true
|
||||
}
|
||||
if selection.WaitPlan == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@ -157,6 +157,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) {
|
||||
zap.Int("excluded_account_count", len(failedAccountIDs)),
|
||||
)
|
||||
if len(failedAccountIDs) == 0 {
|
||||
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted)
|
||||
return
|
||||
}
|
||||
@ -168,6 +169,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
if selection == nil || selection.Account == nil {
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted)
|
||||
return
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
@ -22,10 +23,11 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
opsModelKey = "ops_model"
|
||||
opsStreamKey = "ops_stream"
|
||||
opsRequestBodyKey = "ops_request_body"
|
||||
opsAccountIDKey = "ops_account_id"
|
||||
opsModelKey = "ops_model"
|
||||
opsStreamKey = "ops_stream"
|
||||
opsRequestBodyKey = "ops_request_body"
|
||||
opsAccountIDKey = "ops_account_id"
|
||||
opsRoutingCapacityLimitedKey = "ops_routing_capacity_limited"
|
||||
|
||||
opsUpstreamModelKey = "ops_upstream_model"
|
||||
opsRequestTypeKey = "ops_request_type"
|
||||
@ -45,6 +47,8 @@ const (
|
||||
opsCodeSubscriptionNotFound = "SUBSCRIPTION_NOT_FOUND"
|
||||
opsCodeSubscriptionInvalid = "SUBSCRIPTION_INVALID"
|
||||
opsCodeUserInactive = "USER_INACTIVE"
|
||||
opsCodeInvalidAPIKey = "INVALID_API_KEY"
|
||||
opsCodeAPIKeyRequired = "API_KEY_REQUIRED"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -393,6 +397,42 @@ func setOpsSelectedAccount(c *gin.Context, accountID int64, platform ...string)
|
||||
}
|
||||
}
|
||||
|
||||
func markOpsRoutingCapacityLimited(c *gin.Context) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.Set(opsRoutingCapacityLimitedKey, true)
|
||||
}
|
||||
|
||||
func markOpsRoutingCapacityLimitedIfNoAvailable(c *gin.Context, err error) {
|
||||
if !isOpsNoAvailableAccountError(err) {
|
||||
return
|
||||
}
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
}
|
||||
|
||||
func isOpsRoutingCapacityLimited(c *gin.Context) bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
v, ok := c.Get(opsRoutingCapacityLimitedKey)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
marked, _ := v.(bool)
|
||||
return marked
|
||||
}
|
||||
|
||||
func isOpsNoAvailableAccountError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, service.ErrNoAvailableAccounts) || errors.Is(err, service.ErrNoAvailableCompactAccounts) {
|
||||
return true
|
||||
}
|
||||
return isOpsNoAvailableAccountMessage(err.Error())
|
||||
}
|
||||
|
||||
type opsCaptureWriter struct {
|
||||
gin.ResponseWriter
|
||||
limit int
|
||||
@ -775,11 +815,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
|
||||
|
||||
normalizedType := normalizeOpsErrorType(parsed.ErrorType, parsed.Code)
|
||||
|
||||
phase := classifyOpsPhase(normalizedType, parsed.Message, parsed.Code)
|
||||
isBusinessLimited := classifyOpsIsBusinessLimited(normalizedType, phase, parsed.Code, status, parsed.Message)
|
||||
|
||||
errorOwner := classifyOpsErrorOwner(phase, parsed.Message)
|
||||
errorSource := classifyOpsErrorSource(phase, parsed.Message)
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, normalizedType, parsed.Message, parsed.Code, status)
|
||||
|
||||
entry := &service.OpsInsertErrorLogInput{
|
||||
RequestID: requestID,
|
||||
@ -1114,6 +1150,9 @@ func classifyOpsPhase(errType, message, code string) string {
|
||||
msg := strings.ToLower(message)
|
||||
// Standardized phases: request|auth|routing|upstream|network|internal
|
||||
// Map billing/concurrency/response => request; scheduling => routing.
|
||||
if isOpsClientAuthError(code, msg) {
|
||||
return "auth"
|
||||
}
|
||||
switch strings.TrimSpace(code) {
|
||||
case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid:
|
||||
return "request"
|
||||
@ -1134,7 +1173,7 @@ func classifyOpsPhase(errType, message, code string) string {
|
||||
case "upstream_error", "overloaded_error":
|
||||
return "upstream"
|
||||
case "api_error":
|
||||
if strings.Contains(msg, opsErrNoAvailableAccounts) {
|
||||
if isOpsNoAvailableAccountMessage(msg) {
|
||||
return "routing"
|
||||
}
|
||||
return "internal"
|
||||
@ -1178,7 +1217,27 @@ func classifyOpsIsRetryable(errType string, statusCode int) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string) bool {
|
||||
func classifyOpsErrorLog(c *gin.Context, errType, message, code string, status int) (phase string, isBusinessLimited bool, errorOwner string, errorSource string) {
|
||||
phase = classifyOpsPhase(errType, message, code)
|
||||
routingCapacityLimited := isOpsRoutingCapacityLimited(c)
|
||||
upstreamError := hasOpsUpstreamErrorContext(c)
|
||||
if upstreamError && !routingCapacityLimited {
|
||||
phase = "upstream"
|
||||
}
|
||||
if routingCapacityLimited {
|
||||
phase = "routing"
|
||||
}
|
||||
localClientAuthError := !upstreamError && phase == "auth" && isOpsClientAuthError(code, strings.ToLower(message))
|
||||
isBusinessLimited = routingCapacityLimited || classifyOpsIsBusinessLimited(errType, phase, code, status, message, localClientAuthError)
|
||||
errorOwner = classifyOpsErrorOwner(phase, message)
|
||||
errorSource = classifyOpsErrorSource(phase, message)
|
||||
return phase, isBusinessLimited, errorOwner, errorSource
|
||||
}
|
||||
|
||||
func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string, localClientAuthError ...bool) bool {
|
||||
if len(localClientAuthError) > 0 && localClientAuthError[0] {
|
||||
return true
|
||||
}
|
||||
switch strings.TrimSpace(code) {
|
||||
case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid, opsCodeUserInactive:
|
||||
return true
|
||||
@ -1195,6 +1254,47 @@ func classifyOpsIsBusinessLimited(errType, phase, code string, status int, messa
|
||||
return false
|
||||
}
|
||||
|
||||
func isOpsClientAuthError(code string, msg string) bool {
|
||||
switch strings.TrimSpace(code) {
|
||||
case opsCodeInvalidAPIKey, opsCodeAPIKeyRequired:
|
||||
return true
|
||||
}
|
||||
return strings.Contains(msg, "invalid api key") || strings.Contains(msg, "api key is required")
|
||||
}
|
||||
|
||||
func hasOpsUpstreamErrorContext(c *gin.Context) bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
if v, ok := c.Get(service.OpsUpstreamStatusCodeKey); ok {
|
||||
switch code := v.(type) {
|
||||
case int:
|
||||
if code > 0 {
|
||||
return true
|
||||
}
|
||||
case int64:
|
||||
if code > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := c.Get(service.OpsUpstreamErrorsKey); ok {
|
||||
if events, ok := v.([]*service.OpsUpstreamErrorEvent); ok && len(events) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isOpsNoAvailableAccountMessage(message string) bool {
|
||||
msg := strings.ToLower(message)
|
||||
return strings.Contains(msg, opsErrNoAvailableAccounts) ||
|
||||
strings.Contains(msg, "no available account") ||
|
||||
strings.Contains(msg, "no available gemini accounts") ||
|
||||
strings.Contains(msg, "no available openai accounts") ||
|
||||
strings.Contains(msg, "no available compatible accounts")
|
||||
}
|
||||
|
||||
func classifyOpsErrorOwner(phase string, message string) string {
|
||||
// Standardized owners: client|provider|platform
|
||||
switch phase {
|
||||
|
||||
@ -275,6 +275,187 @@ func TestNormalizeOpsErrorType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifyOpsNoAvailableAccountsExcludedFromSLA(t *testing.T) {
|
||||
const message = "No available accounts"
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
|
||||
errType := normalizeOpsErrorType("api_error", "")
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable)
|
||||
|
||||
require.Equal(t, "api_error", errType)
|
||||
require.Equal(t, "routing", phase)
|
||||
require.True(t, isBusinessLimited)
|
||||
require.Equal(t, "platform", errorOwner)
|
||||
require.Equal(t, "gateway", errorSource)
|
||||
}
|
||||
|
||||
func TestClassifyOpsRoutingCapacityMarkerExcludesMaskedSelectionFailureFromSLA(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
|
||||
c,
|
||||
"api_error",
|
||||
"Service temporarily unavailable",
|
||||
"",
|
||||
http.StatusServiceUnavailable,
|
||||
)
|
||||
|
||||
require.Equal(t, "routing", phase)
|
||||
require.True(t, isBusinessLimited)
|
||||
require.Equal(t, "platform", errorOwner)
|
||||
require.Equal(t, "gateway", errorSource)
|
||||
}
|
||||
|
||||
func TestClassifyOpsAuthClientErrorsExcludedFromSLA(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
errType string
|
||||
message string
|
||||
code string
|
||||
status int
|
||||
}{
|
||||
{
|
||||
name: "standard invalid API key",
|
||||
errType: "api_error",
|
||||
message: "Invalid API key",
|
||||
code: "INVALID_API_KEY",
|
||||
status: http.StatusUnauthorized,
|
||||
},
|
||||
{
|
||||
name: "standard missing API key",
|
||||
errType: "api_error",
|
||||
message: "API key is required in Authorization header (Bearer scheme), x-api-key header, or x-goog-api-key header",
|
||||
code: "API_KEY_REQUIRED",
|
||||
status: http.StatusUnauthorized,
|
||||
},
|
||||
{
|
||||
name: "google invalid API key",
|
||||
errType: "api_error",
|
||||
message: "Invalid API key",
|
||||
code: "401",
|
||||
status: http.StatusUnauthorized,
|
||||
},
|
||||
{
|
||||
name: "google missing API key",
|
||||
errType: "api_error",
|
||||
message: "API key is required",
|
||||
code: "401",
|
||||
status: http.StatusUnauthorized,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
errType := normalizeOpsErrorType(tt.errType, tt.code)
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, tt.message, tt.code, tt.status)
|
||||
|
||||
require.Equal(t, "api_error", errType)
|
||||
require.Equal(t, "auth", phase)
|
||||
require.True(t, isBusinessLimited)
|
||||
require.Equal(t, "client", errorOwner)
|
||||
require.Equal(t, "client_request", errorSource)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifyOpsUnsupportedModelExcludedFromSLA(t *testing.T) {
|
||||
tests := []string{
|
||||
"No available accounts: no available accounts supporting model: made-up-model",
|
||||
"No available accounts: no available OpenAI accounts supporting model: made-up-model",
|
||||
"No available Gemini accounts: no available Gemini accounts supporting model: made-up-model",
|
||||
"No available accounts: no available accounts supporting model: made-up-model (channel pricing restriction)",
|
||||
}
|
||||
|
||||
for _, message := range tests {
|
||||
t.Run(message, func(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
markOpsRoutingCapacityLimited(c)
|
||||
|
||||
errType := normalizeOpsErrorType("api_error", "")
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable)
|
||||
|
||||
require.Equal(t, "api_error", errType)
|
||||
require.Equal(t, "routing", phase)
|
||||
require.True(t, isBusinessLimited)
|
||||
require.Equal(t, "platform", errorOwner)
|
||||
require.Equal(t, "gateway", errorSource)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifyOpsUnmarkedNoAvailableTextStillCountsForSLA(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
|
||||
c,
|
||||
"api_error",
|
||||
"No available accounts",
|
||||
"",
|
||||
http.StatusServiceUnavailable,
|
||||
)
|
||||
|
||||
require.Equal(t, "routing", phase)
|
||||
require.False(t, isBusinessLimited)
|
||||
require.Equal(t, "platform", errorOwner)
|
||||
require.Equal(t, "gateway", errorSource)
|
||||
}
|
||||
|
||||
func TestClassifyOpsUpstreamAuthTextStillCountsForSLA(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
service.SetOpsUpstreamError(c, http.StatusUnauthorized, "Invalid API key", "")
|
||||
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
|
||||
c,
|
||||
"api_error",
|
||||
"Invalid API key",
|
||||
"401",
|
||||
http.StatusUnauthorized,
|
||||
)
|
||||
|
||||
require.Equal(t, "upstream", phase)
|
||||
require.False(t, isBusinessLimited)
|
||||
require.Equal(t, "provider", errorOwner)
|
||||
require.Equal(t, "upstream_http", errorSource)
|
||||
}
|
||||
|
||||
func TestClassifyOpsUpstreamNoAvailableTextStillCountsForSLA(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
service.SetOpsUpstreamError(c, http.StatusServiceUnavailable, "No available accounts", "")
|
||||
|
||||
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
|
||||
c,
|
||||
"api_error",
|
||||
"No available accounts",
|
||||
"",
|
||||
http.StatusServiceUnavailable,
|
||||
)
|
||||
|
||||
require.Equal(t, "upstream", phase)
|
||||
require.False(t, isBusinessLimited)
|
||||
require.Equal(t, "provider", errorOwner)
|
||||
require.Equal(t, "upstream_http", errorSource)
|
||||
}
|
||||
|
||||
func TestSetOpsEndpointContext_SetsContextKeys(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
@ -5053,7 +5053,7 @@ export default {
|
||||
switchRateTrend: 'Trend of account switches / total requests over the last 5 hours (avg switches).',
|
||||
latencyHistogram: 'Request duration distribution (ms) for successful requests.',
|
||||
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
|
||||
errorDistribution: 'Error distribution by status code.',
|
||||
errorDistribution: 'Error distribution by status code (SLA scope, excluding business limits).',
|
||||
goroutines:
|
||||
'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
|
||||
cpu: 'CPU usage percentage, showing system processor load.',
|
||||
|
||||
@ -5216,7 +5216,7 @@ export default {
|
||||
switchRateTrend: '近5小时内账号切换次数 / 请求总数的趋势(平均切换次数)。',
|
||||
latencyHistogram: '成功请求的请求时长分布(毫秒)。',
|
||||
errorTrend: '错误趋势(SLA 口径排除业务限制;上游错误率排除 429/529)。',
|
||||
errorDistribution: '按状态码统计的错误分布。',
|
||||
errorDistribution: '按状态码统计的错误分布(SLA 口径,排除业务限制)。',
|
||||
upstreamErrors: '上游服务返回的错误,包括API提供商的错误响应(排除429/529限流错误)。',
|
||||
goroutines:
|
||||
'Go 运行时的协程数量(轻量级线程)。没有绝对"安全值",建议以历史基线为准。经验参考:<2000 常见;2000-8000 需关注;>8000 且伴随队列上升时,优先排查阻塞/泄漏。',
|
||||
|
||||
@ -30,7 +30,11 @@ const colors = computed(() => ({
|
||||
text: isDarkMode.value ? '#9ca3af' : '#6b7280'
|
||||
}))
|
||||
|
||||
const hasData = computed(() => (props.data?.total ?? 0) > 0)
|
||||
const totalSlaErrors = computed(() =>
|
||||
(props.data?.items ?? []).reduce((total, item) => total + Number(item.sla || 0), 0)
|
||||
)
|
||||
|
||||
const hasData = computed(() => totalSlaErrors.value > 0)
|
||||
|
||||
const state = computed<ChartState>(() => {
|
||||
if (hasData.value) return 'ready'
|
||||
@ -54,7 +58,7 @@ const categories = computed<ErrorCategory[]>(() => {
|
||||
|
||||
for (const item of props.data.items || []) {
|
||||
const code = Number(item.status_code || 0)
|
||||
const count = Number(item.total || 0)
|
||||
const count = Number(item.sla || 0)
|
||||
if (!Number.isFinite(code) || !Number.isFinite(count)) continue
|
||||
|
||||
if ([502, 503, 504].includes(code)) upstream += count
|
||||
|
||||
@ -45,9 +45,7 @@ const colors = computed(() => ({
|
||||
text: isDarkMode.value ? '#9ca3af' : '#6b7280'
|
||||
}))
|
||||
|
||||
const totalRequestErrors = computed(() =>
|
||||
sumNumbers(props.points.map((p) => (p.error_count_sla ?? 0) + (p.business_limited_count ?? 0)))
|
||||
)
|
||||
const totalRequestErrors = computed(() => sumNumbers(props.points.map((p) => p.error_count_sla ?? 0)))
|
||||
|
||||
const totalUpstreamErrors = computed(() =>
|
||||
sumNumbers(
|
||||
|
||||
@ -0,0 +1,147 @@
|
||||
import { mount } from '@vue/test-utils'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { defineComponent } from 'vue'
|
||||
import OpsErrorDistributionChart from '../OpsErrorDistributionChart.vue'
|
||||
import OpsErrorTrendChart from '../OpsErrorTrendChart.vue'
|
||||
|
||||
vi.mock('chart.js', () => ({
|
||||
Chart: { register: vi.fn() },
|
||||
ArcElement: {},
|
||||
CategoryScale: {},
|
||||
Filler: {},
|
||||
Legend: {},
|
||||
LineElement: {},
|
||||
LinearScale: {},
|
||||
PointElement: {},
|
||||
Title: {},
|
||||
Tooltip: {},
|
||||
}))
|
||||
|
||||
vi.mock('vue-chartjs', async () => {
|
||||
const { defineComponent } = await import('vue')
|
||||
|
||||
return {
|
||||
Doughnut: defineComponent({
|
||||
name: 'Doughnut',
|
||||
props: {
|
||||
data: { type: Object, required: true },
|
||||
options: { type: Object, default: () => ({}) },
|
||||
},
|
||||
template: '<div class="doughnut-stub" />',
|
||||
}),
|
||||
Line: defineComponent({
|
||||
name: 'LineChartStub',
|
||||
props: {
|
||||
data: { type: Object, required: true },
|
||||
options: { type: Object, default: () => ({}) },
|
||||
},
|
||||
template: '<div class="line-stub" />',
|
||||
}),
|
||||
}
|
||||
})
|
||||
|
||||
vi.mock('../../utils/opsFormatters', () => ({
|
||||
formatHistoryLabel: (date: string | undefined) => date ?? '',
|
||||
sumNumbers: (values: Array<number | null | undefined>) =>
|
||||
values.reduce<number>((total, value) => total + (typeof value === 'number' && Number.isFinite(value) ? value : 0), 0),
|
||||
}))
|
||||
|
||||
vi.mock('vue-i18n', async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import('vue-i18n')>()
|
||||
|
||||
return {
|
||||
...actual,
|
||||
useI18n: () => ({
|
||||
t: (key: string) => key,
|
||||
}),
|
||||
}
|
||||
})
|
||||
|
||||
const HelpTooltipStub = defineComponent({
|
||||
name: 'HelpTooltip',
|
||||
props: {
|
||||
content: { type: String, default: '' },
|
||||
},
|
||||
template: '<span class="help-tooltip-stub" />',
|
||||
})
|
||||
|
||||
const EmptyStateStub = defineComponent({
|
||||
name: 'EmptyState',
|
||||
props: {
|
||||
title: { type: String, default: '' },
|
||||
description: { type: String, default: '' },
|
||||
},
|
||||
template: '<div class="empty-state-stub" />',
|
||||
})
|
||||
|
||||
const globalStubs = {
|
||||
stubs: {
|
||||
HelpTooltip: HelpTooltipStub,
|
||||
EmptyState: EmptyStateStub,
|
||||
},
|
||||
}
|
||||
|
||||
describe('Ops SLA-scoped error charts', () => {
|
||||
it('错误分布图按 SLA 错误数统计,不把业务限制错误算进请求错误分布', () => {
|
||||
const wrapper = mount(OpsErrorDistributionChart, {
|
||||
props: {
|
||||
loading: false,
|
||||
data: {
|
||||
total: 10,
|
||||
items: [
|
||||
{ status_code: 400, total: 7, sla: 2, business_limited: 5 },
|
||||
{ status_code: 503, total: 3, sla: 0, business_limited: 3 },
|
||||
],
|
||||
},
|
||||
},
|
||||
global: globalStubs,
|
||||
})
|
||||
|
||||
const doughnut = wrapper.findComponent({ name: 'Doughnut' })
|
||||
expect(doughnut.exists()).toBe(true)
|
||||
expect(doughnut.props('data')).toMatchObject({
|
||||
labels: ['admin.ops.client'],
|
||||
datasets: [{ data: [2] }],
|
||||
})
|
||||
})
|
||||
|
||||
it('错误分布图在只有业务限制错误时显示为空态', () => {
|
||||
const wrapper = mount(OpsErrorDistributionChart, {
|
||||
props: {
|
||||
loading: false,
|
||||
data: {
|
||||
total: 4,
|
||||
items: [{ status_code: 500, total: 4, sla: 0, business_limited: 4 }],
|
||||
},
|
||||
},
|
||||
global: globalStubs,
|
||||
})
|
||||
|
||||
expect(wrapper.findComponent({ name: 'Doughnut' }).exists()).toBe(false)
|
||||
expect(wrapper.find('.empty-state-stub').exists()).toBe(true)
|
||||
})
|
||||
|
||||
it('错误趋势图的请求错误详情按钮只按 SLA 错误启用', () => {
|
||||
const wrapper = mount(OpsErrorTrendChart, {
|
||||
props: {
|
||||
loading: false,
|
||||
timeRange: '1h',
|
||||
points: [
|
||||
{
|
||||
bucket_start: '2026-05-18T00:00:00Z',
|
||||
error_count_total: 5,
|
||||
business_limited_count: 5,
|
||||
error_count_sla: 0,
|
||||
upstream_error_count_excl_429_529: 0,
|
||||
upstream_429_count: 0,
|
||||
upstream_529_count: 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
global: globalStubs,
|
||||
})
|
||||
|
||||
const requestErrorsButton = wrapper.findAll('button')[0]
|
||||
expect(requestErrorsButton.attributes('disabled')).toBeDefined()
|
||||
})
|
||||
})
|
||||
Loading…
x
Reference in New Issue
Block a user