Merge pull request #2548 from Arron196/fix/sla-exclude-capacity-errors

fix: 统一 Ops SLA 与请求错误统计口径
This commit is contained in:
Wesley Liddick 2026-05-19 09:29:22 +08:00 committed by GitHub
commit e318376e88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 471 additions and 18 deletions

View File

@ -325,6 +325,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
reqLog.Warn("gateway.select_account_no_available",
zap.String("model", reqModel),
zap.Int64p("group_id", apiKey.GroupID),
@ -374,6 +375,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
accountReleaseFunc := selection.ReleaseFunc
if !selection.Acquired {
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
reqLog.Warn("gateway.select_account_no_slot_no_wait_plan",
zap.Int64("account_id", account.ID),
zap.String("model", reqModel),
@ -566,6 +568,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID, subject.UserID)
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
reqLog.Warn("gateway.select_account_no_available",
zap.String("model", reqModel),
zap.Int64p("group_id", currentAPIKey.GroupID),
@ -626,6 +629,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
accountReleaseFunc := selection.ReleaseFunc
if !selection.Acquired {
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
reqLog.Warn("gateway.select_account_no_slot_no_wait_plan",
zap.Int64("account_id", account.ID),
zap.String("model", reqModel),
@ -1542,6 +1546,7 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) {
account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
if err != nil {
reqLog.Warn("gateway.count_tokens_select_account_failed", zap.Error(err))
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable")
return
}

View File

@ -169,6 +169,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0))
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
return
}
@ -194,6 +195,7 @@ func (h *GatewayHandler) ChatCompletions(c *gin.Context) {
accountReleaseFunc := selection.ReleaseFunc
if !selection.Acquired {
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
h.chatCompletionsErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts")
return
}

View File

@ -174,6 +174,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, fs.FailedAccountIDs, "", int64(0))
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
return
}
@ -199,6 +200,7 @@ func (h *GatewayHandler) Responses(c *gin.Context) {
accountReleaseFunc := selection.ReleaseFunc
if !selection.Acquired {
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
h.responsesErrorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts")
return
}

View File

@ -61,6 +61,7 @@ func (h *GatewayHandler) GeminiV1BetaListModels(c *gin.Context) {
c.JSON(http.StatusOK, gemini.FallbackModelsList())
return
}
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
return
}
@ -113,6 +114,7 @@ func (h *GatewayHandler) GeminiV1BetaGetModel(c *gin.Context) {
c.JSON(http.StatusOK, gemini.FallbackModel(modelName))
return
}
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
return
}
@ -372,6 +374,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, fs.FailedAccountIDs, "", int64(0)) // Gemini 不使用会话限制
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
return
}
@ -419,6 +422,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
accountReleaseFunc := selection.ReleaseFunc
if !selection.Acquired {
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts")
return
}

View File

@ -143,6 +143,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
zap.Int("excluded_account_count", len(failedAccountIDs)),
)
if len(failedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
return
} else {
@ -155,6 +156,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
}
}
if selection == nil || selection.Account == nil {
markOpsRoutingCapacityLimited(c)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
return
}

View File

@ -282,6 +282,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
zap.Int("excluded_account_count", len(failedAccountIDs)),
)
if len(failedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
if errors.Is(err, service.ErrNoAvailableCompactAccounts) {
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "compact_not_supported", "No available OpenAI accounts support /responses/compact", streamStarted)
return
@ -297,6 +298,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
return
}
if selection == nil || selection.Account == nil {
markOpsRoutingCapacityLimited(c)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
return
}
@ -677,6 +679,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
)
if len(failedAccountIDs) == 0 {
if err != nil {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
return
}
@ -690,6 +693,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
}
}
if selection == nil || selection.Account == nil {
markOpsRoutingCapacityLimited(c)
h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
return
}
@ -992,6 +996,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot(
reqLog *zap.Logger,
) (func(), bool) {
if selection == nil || selection.Account == nil {
markOpsRoutingCapacityLimited(c)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted)
return nil, false
}
@ -1002,6 +1007,7 @@ func (h *OpenAIGatewayHandler) acquireResponsesAccountSlot(
return wrapReleaseOnDone(ctx, selection.ReleaseFunc), true
}
if selection.WaitPlan == nil {
markOpsRoutingCapacityLimited(c)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", *streamStarted)
return nil, false
}

View File

@ -157,6 +157,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) {
zap.Int("excluded_account_count", len(failedAccountIDs)),
)
if len(failedAccountIDs) == 0 {
markOpsRoutingCapacityLimitedIfNoAvailable(c, err)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted)
return
}
@ -168,6 +169,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) {
return
}
if selection == nil || selection.Account == nil {
markOpsRoutingCapacityLimited(c)
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available compatible accounts", streamStarted)
return
}

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"log"
"runtime"
"runtime/debug"
@ -22,10 +23,11 @@ import (
)
const (
opsModelKey = "ops_model"
opsStreamKey = "ops_stream"
opsRequestBodyKey = "ops_request_body"
opsAccountIDKey = "ops_account_id"
opsModelKey = "ops_model"
opsStreamKey = "ops_stream"
opsRequestBodyKey = "ops_request_body"
opsAccountIDKey = "ops_account_id"
opsRoutingCapacityLimitedKey = "ops_routing_capacity_limited"
opsUpstreamModelKey = "ops_upstream_model"
opsRequestTypeKey = "ops_request_type"
@ -45,6 +47,8 @@ const (
opsCodeSubscriptionNotFound = "SUBSCRIPTION_NOT_FOUND"
opsCodeSubscriptionInvalid = "SUBSCRIPTION_INVALID"
opsCodeUserInactive = "USER_INACTIVE"
opsCodeInvalidAPIKey = "INVALID_API_KEY"
opsCodeAPIKeyRequired = "API_KEY_REQUIRED"
)
const (
@ -393,6 +397,42 @@ func setOpsSelectedAccount(c *gin.Context, accountID int64, platform ...string)
}
}
func markOpsRoutingCapacityLimited(c *gin.Context) {
if c == nil {
return
}
c.Set(opsRoutingCapacityLimitedKey, true)
}
func markOpsRoutingCapacityLimitedIfNoAvailable(c *gin.Context, err error) {
if !isOpsNoAvailableAccountError(err) {
return
}
markOpsRoutingCapacityLimited(c)
}
func isOpsRoutingCapacityLimited(c *gin.Context) bool {
if c == nil {
return false
}
v, ok := c.Get(opsRoutingCapacityLimitedKey)
if !ok {
return false
}
marked, _ := v.(bool)
return marked
}
func isOpsNoAvailableAccountError(err error) bool {
if err == nil {
return false
}
if errors.Is(err, service.ErrNoAvailableAccounts) || errors.Is(err, service.ErrNoAvailableCompactAccounts) {
return true
}
return isOpsNoAvailableAccountMessage(err.Error())
}
type opsCaptureWriter struct {
gin.ResponseWriter
limit int
@ -775,11 +815,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
normalizedType := normalizeOpsErrorType(parsed.ErrorType, parsed.Code)
phase := classifyOpsPhase(normalizedType, parsed.Message, parsed.Code)
isBusinessLimited := classifyOpsIsBusinessLimited(normalizedType, phase, parsed.Code, status, parsed.Message)
errorOwner := classifyOpsErrorOwner(phase, parsed.Message)
errorSource := classifyOpsErrorSource(phase, parsed.Message)
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, normalizedType, parsed.Message, parsed.Code, status)
entry := &service.OpsInsertErrorLogInput{
RequestID: requestID,
@ -1114,6 +1150,9 @@ func classifyOpsPhase(errType, message, code string) string {
msg := strings.ToLower(message)
// Standardized phases: request|auth|routing|upstream|network|internal
// Map billing/concurrency/response => request; scheduling => routing.
if isOpsClientAuthError(code, msg) {
return "auth"
}
switch strings.TrimSpace(code) {
case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid:
return "request"
@ -1134,7 +1173,7 @@ func classifyOpsPhase(errType, message, code string) string {
case "upstream_error", "overloaded_error":
return "upstream"
case "api_error":
if strings.Contains(msg, opsErrNoAvailableAccounts) {
if isOpsNoAvailableAccountMessage(msg) {
return "routing"
}
return "internal"
@ -1178,7 +1217,27 @@ func classifyOpsIsRetryable(errType string, statusCode int) bool {
}
}
func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string) bool {
func classifyOpsErrorLog(c *gin.Context, errType, message, code string, status int) (phase string, isBusinessLimited bool, errorOwner string, errorSource string) {
phase = classifyOpsPhase(errType, message, code)
routingCapacityLimited := isOpsRoutingCapacityLimited(c)
upstreamError := hasOpsUpstreamErrorContext(c)
if upstreamError && !routingCapacityLimited {
phase = "upstream"
}
if routingCapacityLimited {
phase = "routing"
}
localClientAuthError := !upstreamError && phase == "auth" && isOpsClientAuthError(code, strings.ToLower(message))
isBusinessLimited = routingCapacityLimited || classifyOpsIsBusinessLimited(errType, phase, code, status, message, localClientAuthError)
errorOwner = classifyOpsErrorOwner(phase, message)
errorSource = classifyOpsErrorSource(phase, message)
return phase, isBusinessLimited, errorOwner, errorSource
}
func classifyOpsIsBusinessLimited(errType, phase, code string, status int, message string, localClientAuthError ...bool) bool {
if len(localClientAuthError) > 0 && localClientAuthError[0] {
return true
}
switch strings.TrimSpace(code) {
case opsCodeInsufficientBalance, opsCodeUsageLimitExceeded, opsCodeSubscriptionNotFound, opsCodeSubscriptionInvalid, opsCodeUserInactive:
return true
@ -1195,6 +1254,47 @@ func classifyOpsIsBusinessLimited(errType, phase, code string, status int, messa
return false
}
func isOpsClientAuthError(code string, msg string) bool {
switch strings.TrimSpace(code) {
case opsCodeInvalidAPIKey, opsCodeAPIKeyRequired:
return true
}
return strings.Contains(msg, "invalid api key") || strings.Contains(msg, "api key is required")
}
func hasOpsUpstreamErrorContext(c *gin.Context) bool {
if c == nil {
return false
}
if v, ok := c.Get(service.OpsUpstreamStatusCodeKey); ok {
switch code := v.(type) {
case int:
if code > 0 {
return true
}
case int64:
if code > 0 {
return true
}
}
}
if v, ok := c.Get(service.OpsUpstreamErrorsKey); ok {
if events, ok := v.([]*service.OpsUpstreamErrorEvent); ok && len(events) > 0 {
return true
}
}
return false
}
func isOpsNoAvailableAccountMessage(message string) bool {
msg := strings.ToLower(message)
return strings.Contains(msg, opsErrNoAvailableAccounts) ||
strings.Contains(msg, "no available account") ||
strings.Contains(msg, "no available gemini accounts") ||
strings.Contains(msg, "no available openai accounts") ||
strings.Contains(msg, "no available compatible accounts")
}
func classifyOpsErrorOwner(phase string, message string) string {
// Standardized owners: client|provider|platform
switch phase {

View File

@ -275,6 +275,187 @@ func TestNormalizeOpsErrorType(t *testing.T) {
}
}
func TestClassifyOpsNoAvailableAccountsExcludedFromSLA(t *testing.T) {
const message = "No available accounts"
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
markOpsRoutingCapacityLimited(c)
errType := normalizeOpsErrorType("api_error", "")
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable)
require.Equal(t, "api_error", errType)
require.Equal(t, "routing", phase)
require.True(t, isBusinessLimited)
require.Equal(t, "platform", errorOwner)
require.Equal(t, "gateway", errorSource)
}
func TestClassifyOpsRoutingCapacityMarkerExcludesMaskedSelectionFailureFromSLA(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
markOpsRoutingCapacityLimited(c)
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
c,
"api_error",
"Service temporarily unavailable",
"",
http.StatusServiceUnavailable,
)
require.Equal(t, "routing", phase)
require.True(t, isBusinessLimited)
require.Equal(t, "platform", errorOwner)
require.Equal(t, "gateway", errorSource)
}
func TestClassifyOpsAuthClientErrorsExcludedFromSLA(t *testing.T) {
tests := []struct {
name string
errType string
message string
code string
status int
}{
{
name: "standard invalid API key",
errType: "api_error",
message: "Invalid API key",
code: "INVALID_API_KEY",
status: http.StatusUnauthorized,
},
{
name: "standard missing API key",
errType: "api_error",
message: "API key is required in Authorization header (Bearer scheme), x-api-key header, or x-goog-api-key header",
code: "API_KEY_REQUIRED",
status: http.StatusUnauthorized,
},
{
name: "google invalid API key",
errType: "api_error",
message: "Invalid API key",
code: "401",
status: http.StatusUnauthorized,
},
{
name: "google missing API key",
errType: "api_error",
message: "API key is required",
code: "401",
status: http.StatusUnauthorized,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
errType := normalizeOpsErrorType(tt.errType, tt.code)
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, tt.message, tt.code, tt.status)
require.Equal(t, "api_error", errType)
require.Equal(t, "auth", phase)
require.True(t, isBusinessLimited)
require.Equal(t, "client", errorOwner)
require.Equal(t, "client_request", errorSource)
})
}
}
func TestClassifyOpsUnsupportedModelExcludedFromSLA(t *testing.T) {
tests := []string{
"No available accounts: no available accounts supporting model: made-up-model",
"No available accounts: no available OpenAI accounts supporting model: made-up-model",
"No available Gemini accounts: no available Gemini accounts supporting model: made-up-model",
"No available accounts: no available accounts supporting model: made-up-model (channel pricing restriction)",
}
for _, message := range tests {
t.Run(message, func(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
markOpsRoutingCapacityLimited(c)
errType := normalizeOpsErrorType("api_error", "")
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, message, "", http.StatusServiceUnavailable)
require.Equal(t, "api_error", errType)
require.Equal(t, "routing", phase)
require.True(t, isBusinessLimited)
require.Equal(t, "platform", errorOwner)
require.Equal(t, "gateway", errorSource)
})
}
}
func TestClassifyOpsUnmarkedNoAvailableTextStillCountsForSLA(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
c,
"api_error",
"No available accounts",
"",
http.StatusServiceUnavailable,
)
require.Equal(t, "routing", phase)
require.False(t, isBusinessLimited)
require.Equal(t, "platform", errorOwner)
require.Equal(t, "gateway", errorSource)
}
func TestClassifyOpsUpstreamAuthTextStillCountsForSLA(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
service.SetOpsUpstreamError(c, http.StatusUnauthorized, "Invalid API key", "")
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
c,
"api_error",
"Invalid API key",
"401",
http.StatusUnauthorized,
)
require.Equal(t, "upstream", phase)
require.False(t, isBusinessLimited)
require.Equal(t, "provider", errorOwner)
require.Equal(t, "upstream_http", errorSource)
}
func TestClassifyOpsUpstreamNoAvailableTextStillCountsForSLA(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
service.SetOpsUpstreamError(c, http.StatusServiceUnavailable, "No available accounts", "")
phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(
c,
"api_error",
"No available accounts",
"",
http.StatusServiceUnavailable,
)
require.Equal(t, "upstream", phase)
require.False(t, isBusinessLimited)
require.Equal(t, "provider", errorOwner)
require.Equal(t, "upstream_http", errorSource)
}
func TestSetOpsEndpointContext_SetsContextKeys(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()

View File

@ -5053,7 +5053,7 @@ export default {
switchRateTrend: 'Trend of account switches / total requests over the last 5 hours (avg switches).',
latencyHistogram: 'Request duration distribution (ms) for successful requests.',
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
errorDistribution: 'Error distribution by status code.',
errorDistribution: 'Error distribution by status code (SLA scope, excluding business limits).',
goroutines:
'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
cpu: 'CPU usage percentage, showing system processor load.',

View File

@ -5216,7 +5216,7 @@ export default {
switchRateTrend: '近5小时内账号切换次数 / 请求总数的趋势(平均切换次数)。',
latencyHistogram: '成功请求的请求时长分布(毫秒)。',
errorTrend: '错误趋势SLA 口径排除业务限制;上游错误率排除 429/529。',
errorDistribution: '按状态码统计的错误分布。',
errorDistribution: '按状态码统计的错误分布SLA 口径,排除业务限制)。',
upstreamErrors: '上游服务返回的错误包括API提供商的错误响应排除429/529限流错误。',
goroutines:
'Go 运行时的协程数量(轻量级线程)。没有绝对"安全值",建议以历史基线为准。经验参考:<2000 常见2000-8000 需关注;>8000 且伴随队列上升时,优先排查阻塞/泄漏。',

View File

@ -30,7 +30,11 @@ const colors = computed(() => ({
text: isDarkMode.value ? '#9ca3af' : '#6b7280'
}))
const hasData = computed(() => (props.data?.total ?? 0) > 0)
const totalSlaErrors = computed(() =>
(props.data?.items ?? []).reduce((total, item) => total + Number(item.sla || 0), 0)
)
const hasData = computed(() => totalSlaErrors.value > 0)
const state = computed<ChartState>(() => {
if (hasData.value) return 'ready'
@ -54,7 +58,7 @@ const categories = computed<ErrorCategory[]>(() => {
for (const item of props.data.items || []) {
const code = Number(item.status_code || 0)
const count = Number(item.total || 0)
const count = Number(item.sla || 0)
if (!Number.isFinite(code) || !Number.isFinite(count)) continue
if ([502, 503, 504].includes(code)) upstream += count

View File

@ -45,9 +45,7 @@ const colors = computed(() => ({
text: isDarkMode.value ? '#9ca3af' : '#6b7280'
}))
const totalRequestErrors = computed(() =>
sumNumbers(props.points.map((p) => (p.error_count_sla ?? 0) + (p.business_limited_count ?? 0)))
)
const totalRequestErrors = computed(() => sumNumbers(props.points.map((p) => p.error_count_sla ?? 0)))
const totalUpstreamErrors = computed(() =>
sumNumbers(

View File

@ -0,0 +1,147 @@
import { mount } from '@vue/test-utils'
import { describe, expect, it, vi } from 'vitest'
import { defineComponent } from 'vue'
import OpsErrorDistributionChart from '../OpsErrorDistributionChart.vue'
import OpsErrorTrendChart from '../OpsErrorTrendChart.vue'
vi.mock('chart.js', () => ({
Chart: { register: vi.fn() },
ArcElement: {},
CategoryScale: {},
Filler: {},
Legend: {},
LineElement: {},
LinearScale: {},
PointElement: {},
Title: {},
Tooltip: {},
}))
vi.mock('vue-chartjs', async () => {
const { defineComponent } = await import('vue')
return {
Doughnut: defineComponent({
name: 'Doughnut',
props: {
data: { type: Object, required: true },
options: { type: Object, default: () => ({}) },
},
template: '<div class="doughnut-stub" />',
}),
Line: defineComponent({
name: 'LineChartStub',
props: {
data: { type: Object, required: true },
options: { type: Object, default: () => ({}) },
},
template: '<div class="line-stub" />',
}),
}
})
vi.mock('../../utils/opsFormatters', () => ({
formatHistoryLabel: (date: string | undefined) => date ?? '',
sumNumbers: (values: Array<number | null | undefined>) =>
values.reduce<number>((total, value) => total + (typeof value === 'number' && Number.isFinite(value) ? value : 0), 0),
}))
vi.mock('vue-i18n', async (importOriginal) => {
const actual = await importOriginal<typeof import('vue-i18n')>()
return {
...actual,
useI18n: () => ({
t: (key: string) => key,
}),
}
})
const HelpTooltipStub = defineComponent({
name: 'HelpTooltip',
props: {
content: { type: String, default: '' },
},
template: '<span class="help-tooltip-stub" />',
})
const EmptyStateStub = defineComponent({
name: 'EmptyState',
props: {
title: { type: String, default: '' },
description: { type: String, default: '' },
},
template: '<div class="empty-state-stub" />',
})
const globalStubs = {
stubs: {
HelpTooltip: HelpTooltipStub,
EmptyState: EmptyStateStub,
},
}
describe('Ops SLA-scoped error charts', () => {
it('错误分布图按 SLA 错误数统计,不把业务限制错误算进请求错误分布', () => {
const wrapper = mount(OpsErrorDistributionChart, {
props: {
loading: false,
data: {
total: 10,
items: [
{ status_code: 400, total: 7, sla: 2, business_limited: 5 },
{ status_code: 503, total: 3, sla: 0, business_limited: 3 },
],
},
},
global: globalStubs,
})
const doughnut = wrapper.findComponent({ name: 'Doughnut' })
expect(doughnut.exists()).toBe(true)
expect(doughnut.props('data')).toMatchObject({
labels: ['admin.ops.client'],
datasets: [{ data: [2] }],
})
})
it('错误分布图在只有业务限制错误时显示为空态', () => {
const wrapper = mount(OpsErrorDistributionChart, {
props: {
loading: false,
data: {
total: 4,
items: [{ status_code: 500, total: 4, sla: 0, business_limited: 4 }],
},
},
global: globalStubs,
})
expect(wrapper.findComponent({ name: 'Doughnut' }).exists()).toBe(false)
expect(wrapper.find('.empty-state-stub').exists()).toBe(true)
})
it('错误趋势图的请求错误详情按钮只按 SLA 错误启用', () => {
const wrapper = mount(OpsErrorTrendChart, {
props: {
loading: false,
timeRange: '1h',
points: [
{
bucket_start: '2026-05-18T00:00:00Z',
error_count_total: 5,
business_limited_count: 5,
error_count_sla: 0,
upstream_error_count_excl_429_529: 0,
upstream_429_count: 0,
upstream_529_count: 0,
},
],
},
global: globalStubs,
})
const requestErrorsButton = wrapper.findAll('button')[0]
expect(requestErrorsButton.attributes('disabled')).toBeDefined()
})
})