diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 65836a7e..35de1bff 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -325,6 +325,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制 if err != nil { if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) reqLog.Warn("gateway.select_account_no_available", zap.String("model", reqModel), zap.Int64p("group_id", apiKey.GroupID), @@ -374,6 +375,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) reqLog.Warn("gateway.select_account_no_slot_no_wait_plan", zap.Int64("account_id", account.ID), zap.String("model", reqModel), @@ -566,6 +568,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID, subject.UserID) if err != nil { if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) reqLog.Warn("gateway.select_account_no_available", zap.String("model", reqModel), zap.Int64p("group_id", currentAPIKey.GroupID), @@ -626,6 +629,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) reqLog.Warn("gateway.select_account_no_slot_no_wait_plan", zap.Int64("account_id", account.ID), zap.String("model", reqModel), @@ -1542,6 +1546,7 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) { account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model) if err != nil { reqLog.Warn("gateway.count_tokens_select_account_failed", zap.Error(err)) + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable") return } diff --git a/backend/internal/handler/gateway_handler_chat_completions.go b/backend/internal/handler/gateway_handler_chat_completions.go index c6b73190..00c8ac37 100644 --- a/backend/internal/handler/gateway_handler_chat_completions.go +++ b/backend/internal/handler/gateway_handler_chat_completions.go @@ -169,6 +169,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0)) if err != nil { if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error()) return } @@ -194,6 +195,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) { accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts") return } diff --git a/backend/internal/handler/gateway_handler_responses.go b/backend/internal/handler/gateway_handler_responses.go index a97f572d..b8a2af8e 100644 --- a/backend/internal/handler/gateway_handler_responses.go +++ b/backend/internal/handler/gateway_handler_responses.go @@ -174,6 +174,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0)) if err != nil { if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error()) return } @@ -199,6 +200,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) { accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts") return } diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index 90ebe9ec..3395eeec 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -61,6 +61,7 @@ func (h *GatewayHandler) GeminiV1BetaListModels(c *gin.Context) { c.JSON(http.StatusOK, gemini.FallbackModelsList()) return } + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error()) return } @@ -113,6 +114,7 @@ func (h *GatewayHandler) GeminiV1BetaGetModel(c *gin.Context) { c.JSON(http.StatusOK, gemini.FallbackModel(modelName)) return } + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error()) return } @@ -372,6 +374,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制 if err != nil { if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error()) return } @@ -419,6 +422,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts") return } diff --git a/backend/internal/handler/openai_chat_completions.go b/backend/internal/handler/openai_chat_completions.go index de384710..c85cd35d 100644 --- a/backend/internal/handler/openai_chat_completions.go +++ b/backend/internal/handler/openai_chat_completions.go @@ -143,6 +143,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) { zap.Int("excluded_account_count", len(failedAccountIDs)), ) if len(failedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted) return } else { @@ -155,6 +156,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) { } } if selection == nil || selection.Account == nil { + markOpsRoutingCapacityLimited(c) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted) return } diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go index 6b07b7ba..dcd737af 100644 --- a/backend/internal/handler/openai_gateway_handler.go +++ b/backend/internal/handler/openai_gateway_handler.go @@ -282,6 +282,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) { zap.Int("excluded_account_count", len(failedAccountIDs)), ) if len(failedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) if errors.Is(err, service.ErrNoAvailableCompactAccounts) { h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "compact_not_supported", "No available OpenAI accounts support /responses/compact", streamStarted) return @@ -297,6 +298,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) { return } if selection == nil || selection.Account == nil { + markOpsRoutingCapacityLimited(c) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted) return } @@ -677,6 +679,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) { ) if len(failedAccountIDs) == 0 { if err != nil { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted) return } @@ -690,6 +693,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) { } } if selection == nil || selection.Account == nil { + markOpsRoutingCapacityLimited(c) h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted) return } @@ -992,6 +996,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot( reqLog *zap.Logger, ) (func(), bool) { if selection == nil || selection.Account == nil { + markOpsRoutingCapacityLimited(c) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted) return nil, false } @@ -1002,6 +1007,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot( return wrapReleaseOnDone(ctx, selection.ReleaseFunc), true } if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted) return nil, false } diff --git a/backend/internal/handler/openai_images.go b/backend/internal/handler/openai_images.go index 08a6b6e8..be19a035 100644 --- a/backend/internal/handler/openai_images.go +++ b/backend/internal/handler/openai_images.go @@ -157,6 +157,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) { zap.Int("excluded_account_count", len(failedAccountIDs)), ) if len(failedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted) return } @@ -168,6 +169,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) { return } if selection == nil || selection.Account == nil { + markOpsRoutingCapacityLimited(c) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted) return } diff --git a/backend/internal/handler/ops_error_logger.go b/backend/internal/handler/ops_error_logger.go index 93554912..398124cc 100644 --- a/backend/internal/handler/ops_error_logger.go +++ b/backend/internal/handler/ops_error_logger.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "log" "runtime" "runtime/debug" @@ -22,10 +23,11 @@ import ( ) const ( - opsModelKey = "ops_model" - opsStreamKey = "ops_stream" - opsRequestBodyKey = "ops_request_body" - opsAccountIDKey = "ops_account_id" + opsModelKey = "ops_model" + opsStreamKey = "ops_stream" + opsRequestBodyKey = "ops_request_body" + opsAccountIDKey = "ops_account_id" + opsRoutingCapacityLimitedKey = "ops_routing_capacity_limited" opsUpstreamModelKey = "ops_upstream_model" opsRequestTypeKey = "ops_request_type" @@ -45,6 +47,8 @@ const ( opsCodeSubscriptionNotFound = "SUBSCRIPTION_NOT_FOUND" opsCodeSubscriptionInvalid = "SUBSCRIPTION_INVALID" opsCodeUserInactive = "USER_INACTIVE" + opsCodeInvalidAPIKey = "INVALID_API_KEY" + opsCodeAPIKeyRequired = "API_KEY_REQUIRED" ) const ( @@ -393,6 +397,42 @@ func setOpsSelectedAccount(c *gin.Context, accountID int64, platform ...string) } } +func markOpsRoutingCapacityLimited(c *gin.Context) { + if c == nil { + return + } + c.Set(opsRoutingCapacityLimitedKey, true) +} + +func markOpsRoutingCapacityLimitedIfNoAvailable(c *gin.Context, err error) { + if !isOpsNoAvailableAccountError(err) { + return + } + markOpsRoutingCapacityLimited(c) +} + +func isOpsRoutingCapacityLimited(c *gin.Context) bool { + if c == nil { + return false + } + v, ok := c.Get(opsRoutingCapacityLimitedKey) + if !ok { + return false + } + marked, _ := v.(bool) + return marked +} + +func isOpsNoAvailableAccountError(err error) bool { + if err == nil { + return false + } + if errors.Is(err, service.ErrNoAvailableAccounts) || errors.Is(err, service.ErrNoAvailableCompactAccounts) { + return true + } + return isOpsNoAvailableAccountMessage(err.Error()) +} + type opsCaptureWriter struct { gin.ResponseWriter limit int @@ -775,11 +815,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc { normalizedType := normalizeOpsErrorType(parsed.ErrorType, parsed.Code) - phase := classifyOpsPhase(normalizedType, parsed.Message, parsed.Code) - isBusinessLimited := classifyOpsIsBusinessLimited(normalizedType, phase, parsed.Code, status, parsed.Message) - - errorOwner := classifyOpsErrorOwner(phase, parsed.Message) - errorSource := classifyOpsErrorSource(phase, parsed.Message) + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, normalizedType, parsed.Message, parsed.Code, status) entry := &service.OpsInsertErrorLogInput{ RequestID: requestID, @@ -1114,6 +1150,9 @@ func classifyOpsPhase(errType, message, code string) string { msg := strings.ToLower(message) // Standardized phases: request|auth|routing|upstream|network|internal // Map billing/concurrency/response => request; scheduling => routing. + if isOpsClientAuthError(code, msg) { + return "auth" + } switch strings.TrimSpace(code) { case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid: return "request" @@ -1134,7 +1173,7 @@ func classifyOpsPhase(errType, message, code string) string { case "upstream_error", "overloaded_error": return "upstream" case "api_error": - if strings.Contains(msg, opsErrNoAvailableAccounts) { + if isOpsNoAvailableAccountMessage(msg) { return "routing" } return "internal" @@ -1178,7 +1217,27 @@ func classifyOpsIsRetryable(errType string, statusCode int) bool { } } -func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string) bool { +func classifyOpsErrorLog(c *gin.Context, errType, message, code string, status int) (phase string, isBusinessLimited bool, errorOwner string, errorSource string) { + phase = classifyOpsPhase(errType, message, code) + routingCapacityLimited := isOpsRoutingCapacityLimited(c) + upstreamError := hasOpsUpstreamErrorContext(c) + if upstreamError && !routingCapacityLimited { + phase = "upstream" + } + if routingCapacityLimited { + phase = "routing" + } + localClientAuthError := !upstreamError && phase == "auth" && isOpsClientAuthError(code, strings.ToLower(message)) + isBusinessLimited = routingCapacityLimited || classifyOpsIsBusinessLimited(errType, phase, code, status, message, localClientAuthError) + errorOwner = classifyOpsErrorOwner(phase, message) + errorSource = classifyOpsErrorSource(phase, message) + return phase, isBusinessLimited, errorOwner, errorSource +} + +func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string, localClientAuthError ...bool) bool { + if len(localClientAuthError) > 0 && localClientAuthError[0] { + return true + } switch strings.TrimSpace(code) { case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid, opsCodeUserInactive: return true @@ -1195,6 +1254,47 @@ func classifyOpsIsBusinessLimited(errType, phase, code string, status int, messa return false } +func isOpsClientAuthError(code string, msg string) bool { + switch strings.TrimSpace(code) { + case opsCodeInvalidAPIKey, opsCodeAPIKeyRequired: + return true + } + return strings.Contains(msg, "invalid api key") || strings.Contains(msg, "api key is required") +} + +func hasOpsUpstreamErrorContext(c *gin.Context) bool { + if c == nil { + return false + } + if v, ok := c.Get(service.OpsUpstreamStatusCodeKey); ok { + switch code := v.(type) { + case int: + if code > 0 { + return true + } + case int64: + if code > 0 { + return true + } + } + } + if v, ok := c.Get(service.OpsUpstreamErrorsKey); ok { + if events, ok := v.([]*service.OpsUpstreamErrorEvent); ok && len(events) > 0 { + return true + } + } + return false +} + +func isOpsNoAvailableAccountMessage(message string) bool { + msg := strings.ToLower(message) + return strings.Contains(msg, opsErrNoAvailableAccounts) || + strings.Contains(msg, "no available account") || + strings.Contains(msg, "no available gemini accounts") || + strings.Contains(msg, "no available openai accounts") || + strings.Contains(msg, "no available compatible accounts") +} + func classifyOpsErrorOwner(phase string, message string) string { // Standardized owners: client|provider|platform switch phase { diff --git a/backend/internal/handler/ops_error_logger_test.go b/backend/internal/handler/ops_error_logger_test.go index 6ae45110..e1df03cc 100644 --- a/backend/internal/handler/ops_error_logger_test.go +++ b/backend/internal/handler/ops_error_logger_test.go @@ -275,6 +275,187 @@ func TestNormalizeOpsErrorType(t *testing.T) { } } +func TestClassifyOpsNoAvailableAccountsExcludedFromSLA(t *testing.T) { + const message = "No available accounts" + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + markOpsRoutingCapacityLimited(c) + + errType := normalizeOpsErrorType("api_error", "") + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable) + + require.Equal(t, "api_error", errType) + require.Equal(t, "routing", phase) + require.True(t, isBusinessLimited) + require.Equal(t, "platform", errorOwner) + require.Equal(t, "gateway", errorSource) +} + +func TestClassifyOpsRoutingCapacityMarkerExcludesMaskedSelectionFailureFromSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + markOpsRoutingCapacityLimited(c) + + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog( + c, + "api_error", + "Service temporarily unavailable", + "", + http.StatusServiceUnavailable, + ) + + require.Equal(t, "routing", phase) + require.True(t, isBusinessLimited) + require.Equal(t, "platform", errorOwner) + require.Equal(t, "gateway", errorSource) +} + +func TestClassifyOpsAuthClientErrorsExcludedFromSLA(t *testing.T) { + tests := []struct { + name string + errType string + message string + code string + status int + }{ + { + name: "standard invalid API key", + errType: "api_error", + message: "Invalid API key", + code: "INVALID_API_KEY", + status: http.StatusUnauthorized, + }, + { + name: "standard missing API key", + errType: "api_error", + message: "API key is required in Authorization header (Bearer scheme), x-api-key header, or x-goog-api-key header", + code: "API_KEY_REQUIRED", + status: http.StatusUnauthorized, + }, + { + name: "google invalid API key", + errType: "api_error", + message: "Invalid API key", + code: "401", + status: http.StatusUnauthorized, + }, + { + name: "google missing API key", + errType: "api_error", + message: "API key is required", + code: "401", + status: http.StatusUnauthorized, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + errType := normalizeOpsErrorType(tt.errType, tt.code) + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, tt.message, tt.code, tt.status) + + require.Equal(t, "api_error", errType) + require.Equal(t, "auth", phase) + require.True(t, isBusinessLimited) + require.Equal(t, "client", errorOwner) + require.Equal(t, "client_request", errorSource) + }) + } +} + +func TestClassifyOpsUnsupportedModelExcludedFromSLA(t *testing.T) { + tests := []string{ + "No available accounts: no available accounts supporting model: made-up-model", + "No available accounts: no available OpenAI accounts supporting model: made-up-model", + "No available Gemini accounts: no available Gemini accounts supporting model: made-up-model", + "No available accounts: no available accounts supporting model: made-up-model (channel pricing restriction)", + } + + for _, message := range tests { + t.Run(message, func(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + markOpsRoutingCapacityLimited(c) + + errType := normalizeOpsErrorType("api_error", "") + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable) + + require.Equal(t, "api_error", errType) + require.Equal(t, "routing", phase) + require.True(t, isBusinessLimited) + require.Equal(t, "platform", errorOwner) + require.Equal(t, "gateway", errorSource) + }) + } +} + +func TestClassifyOpsUnmarkedNoAvailableTextStillCountsForSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog( + c, + "api_error", + "No available accounts", + "", + http.StatusServiceUnavailable, + ) + + require.Equal(t, "routing", phase) + require.False(t, isBusinessLimited) + require.Equal(t, "platform", errorOwner) + require.Equal(t, "gateway", errorSource) +} + +func TestClassifyOpsUpstreamAuthTextStillCountsForSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + service.SetOpsUpstreamError(c, http.StatusUnauthorized, "Invalid API key", "") + + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog( + c, + "api_error", + "Invalid API key", + "401", + http.StatusUnauthorized, + ) + + require.Equal(t, "upstream", phase) + require.False(t, isBusinessLimited) + require.Equal(t, "provider", errorOwner) + require.Equal(t, "upstream_http", errorSource) +} + +func TestClassifyOpsUpstreamNoAvailableTextStillCountsForSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + service.SetOpsUpstreamError(c, http.StatusServiceUnavailable, "No available accounts", "") + + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog( + c, + "api_error", + "No available accounts", + "", + http.StatusServiceUnavailable, + ) + + require.Equal(t, "upstream", phase) + require.False(t, isBusinessLimited) + require.Equal(t, "provider", errorOwner) + require.Equal(t, "upstream_http", errorSource) +} + func TestSetOpsEndpointContext_SetsContextKeys(t *testing.T) { gin.SetMode(gin.TestMode) rec := httptest.NewRecorder() diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 02d044ef..b7c10dda 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -5053,7 +5053,7 @@ export default { switchRateTrend: 'Trend of account switches / total requests over the last 5 hours (avg switches).', latencyHistogram: 'Request duration distribution (ms) for successful requests.', errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).', - errorDistribution: 'Error distribution by status code.', + errorDistribution: 'Error distribution by status code (SLA scope, excluding business limits).', goroutines: 'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.', cpu: 'CPU usage percentage, showing system processor load.', diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index 687c2df6..bd38c73f 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -5216,7 +5216,7 @@ export default { switchRateTrend: '近5小时内账号切换次数 / 请求总数的趋势(平均切换次数)。', latencyHistogram: '成功请求的请求时长分布(毫秒)。', errorTrend: '错误趋势(SLA 口径排除业务限制;上游错误率排除 429/529)。', - errorDistribution: '按状态码统计的错误分布。', + errorDistribution: '按状态码统计的错误分布(SLA 口径,排除业务限制)。', upstreamErrors: '上游服务返回的错误,包括API提供商的错误响应(排除429/529限流错误)。', goroutines: 'Go 运行时的协程数量(轻量级线程)。没有绝对"安全值",建议以历史基线为准。经验参考:<2000 常见;2000-8000 需关注;>8000 且伴随队列上升时,优先排查阻塞/泄漏。', diff --git a/frontend/src/views/admin/ops/components/OpsErrorDistributionChart.vue b/frontend/src/views/admin/ops/components/OpsErrorDistributionChart.vue index a52b5442..ad7ce074 100644 --- a/frontend/src/views/admin/ops/components/OpsErrorDistributionChart.vue +++ b/frontend/src/views/admin/ops/components/OpsErrorDistributionChart.vue @@ -30,7 +30,11 @@ const colors = computed(() => ({ text: isDarkMode.value ? '#9ca3af' : '#6b7280' })) -const hasData = computed(() => (props.data?.total ?? 0) > 0) +const totalSlaErrors = computed(() => + (props.data?.items ?? []).reduce((total, item) => total + Number(item.sla || 0), 0) +) + +const hasData = computed(() => totalSlaErrors.value > 0) const state = computed(() => { if (hasData.value) return 'ready' @@ -54,7 +58,7 @@ const categories = computed(() => { for (const item of props.data.items || []) { const code = Number(item.status_code || 0) - const count = Number(item.total || 0) + const count = Number(item.sla || 0) if (!Number.isFinite(code) || !Number.isFinite(count)) continue if ([502, 503, 504].includes(code)) upstream += count diff --git a/frontend/src/views/admin/ops/components/OpsErrorTrendChart.vue b/frontend/src/views/admin/ops/components/OpsErrorTrendChart.vue index 088dc317..6e07926f 100644 --- a/frontend/src/views/admin/ops/components/OpsErrorTrendChart.vue +++ b/frontend/src/views/admin/ops/components/OpsErrorTrendChart.vue @@ -45,9 +45,7 @@ const colors = computed(() => ({ text: isDarkMode.value ? '#9ca3af' : '#6b7280' })) -const totalRequestErrors = computed(() => - sumNumbers(props.points.map((p) => (p.error_count_sla ?? 0) + (p.business_limited_count ?? 0))) -) +const totalRequestErrors = computed(() => sumNumbers(props.points.map((p) => p.error_count_sla ?? 0))) const totalUpstreamErrors = computed(() => sumNumbers( diff --git a/frontend/src/views/admin/ops/components/__tests__/OpsErrorScopeCharts.spec.ts b/frontend/src/views/admin/ops/components/__tests__/OpsErrorScopeCharts.spec.ts new file mode 100644 index 00000000..b7a6590f --- /dev/null +++ b/frontend/src/views/admin/ops/components/__tests__/OpsErrorScopeCharts.spec.ts @@ -0,0 +1,147 @@ +import { mount } from '@vue/test-utils' +import { describe, expect, it, vi } from 'vitest' +import { defineComponent } from 'vue' +import OpsErrorDistributionChart from '../OpsErrorDistributionChart.vue' +import OpsErrorTrendChart from '../OpsErrorTrendChart.vue' + +vi.mock('chart.js', () => ({ + Chart: { register: vi.fn() }, + ArcElement: {}, + CategoryScale: {}, + Filler: {}, + Legend: {}, + LineElement: {}, + LinearScale: {}, + PointElement: {}, + Title: {}, + Tooltip: {}, +})) + +vi.mock('vue-chartjs', async () => { + const { defineComponent } = await import('vue') + + return { + Doughnut: defineComponent({ + name: 'Doughnut', + props: { + data: { type: Object, required: true }, + options: { type: Object, default: () => ({}) }, + }, + template: '
', + }), + Line: defineComponent({ + name: 'LineChartStub', + props: { + data: { type: Object, required: true }, + options: { type: Object, default: () => ({}) }, + }, + template: '
', + }), + } +}) + +vi.mock('../../utils/opsFormatters', () => ({ + formatHistoryLabel: (date: string | undefined) => date ?? '', + sumNumbers: (values: Array) => + values.reduce((total, value) => total + (typeof value === 'number' && Number.isFinite(value) ? value : 0), 0), +})) + +vi.mock('vue-i18n', async (importOriginal) => { + const actual = await importOriginal() + + return { + ...actual, + useI18n: () => ({ + t: (key: string) => key, + }), + } +}) + +const HelpTooltipStub = defineComponent({ + name: 'HelpTooltip', + props: { + content: { type: String, default: '' }, + }, + template: '', +}) + +const EmptyStateStub = defineComponent({ + name: 'EmptyState', + props: { + title: { type: String, default: '' }, + description: { type: String, default: '' }, + }, + template: '
', +}) + +const globalStubs = { + stubs: { + HelpTooltip: HelpTooltipStub, + EmptyState: EmptyStateStub, + }, +} + +describe('Ops SLA-scoped error charts', () => { + it('错误分布图按 SLA 错误数统计,不把业务限制错误算进请求错误分布', () => { + const wrapper = mount(OpsErrorDistributionChart, { + props: { + loading: false, + data: { + total: 10, + items: [ + { status_code: 400, total: 7, sla: 2, business_limited: 5 }, + { status_code: 503, total: 3, sla: 0, business_limited: 3 }, + ], + }, + }, + global: globalStubs, + }) + + const doughnut = wrapper.findComponent({ name: 'Doughnut' }) + expect(doughnut.exists()).toBe(true) + expect(doughnut.props('data')).toMatchObject({ + labels: ['admin.ops.client'], + datasets: [{ data: [2] }], + }) + }) + + it('错误分布图在只有业务限制错误时显示为空态', () => { + const wrapper = mount(OpsErrorDistributionChart, { + props: { + loading: false, + data: { + total: 4, + items: [{ status_code: 500, total: 4, sla: 0, business_limited: 4 }], + }, + }, + global: globalStubs, + }) + + expect(wrapper.findComponent({ name: 'Doughnut' }).exists()).toBe(false) + expect(wrapper.find('.empty-state-stub').exists()).toBe(true) + }) + + it('错误趋势图的请求错误详情按钮只按 SLA 错误启用', () => { + const wrapper = mount(OpsErrorTrendChart, { + props: { + loading: false, + timeRange: '1h', + points: [ + { + bucket_start: '2026-05-18T00:00:00Z', + error_count_total: 5, + business_limited_count: 5, + error_count_sla: 0, + upstream_error_count_excl_429_529: 0, + upstream_429_count: 0, + upstream_529_count: 0, + }, + ], + }, + global: globalStubs, + }) + + const requestErrorsButton = wrapper.findAll('button')[0] + expect(requestErrorsButton.attributes('disabled')).toBeDefined() + }) +})