diff --git a/backend/internal/handler/ops_error_logger.go b/backend/internal/handler/ops_error_logger.go index 398124cc..c8803aab 100644 --- a/backend/internal/handler/ops_error_logger.go +++ b/backend/internal/handler/ops_error_logger.go @@ -1220,15 +1220,19 @@ func classifyOpsIsRetryable(errType string, statusCode int) bool { func classifyOpsErrorLog(c *gin.Context, errType, message, code string, status int) (phase string, isBusinessLimited bool, errorOwner string, errorSource string) { phase = classifyOpsPhase(errType, message, code) routingCapacityLimited := isOpsRoutingCapacityLimited(c) + clientBusinessLimited := service.HasOpsClientBusinessLimited(c) upstreamError := hasOpsUpstreamErrorContext(c) if upstreamError && !routingCapacityLimited { phase = "upstream" } + if clientBusinessLimited && !upstreamError && !routingCapacityLimited { + phase = "auth" + } if routingCapacityLimited { phase = "routing" } localClientAuthError := !upstreamError && phase == "auth" && isOpsClientAuthError(code, strings.ToLower(message)) - isBusinessLimited = routingCapacityLimited || classifyOpsIsBusinessLimited(errType, phase, code, status, message, localClientAuthError) + isBusinessLimited = routingCapacityLimited || clientBusinessLimited || classifyOpsIsBusinessLimited(errType, phase, code, status, message, localClientAuthError) errorOwner = classifyOpsErrorOwner(phase, message) errorSource = classifyOpsErrorSource(phase, message) return phase, isBusinessLimited, errorOwner, errorSource diff --git a/backend/internal/handler/ops_error_logger_test.go b/backend/internal/handler/ops_error_logger_test.go index e1df03cc..2a141fdf 100644 --- a/backend/internal/handler/ops_error_logger_test.go +++ b/backend/internal/handler/ops_error_logger_test.go @@ -370,6 +370,37 @@ func TestClassifyOpsAuthClientErrorsExcludedFromSLA(t *testing.T) { } } +func TestClassifyOpsIPRestrictionAccessDeniedExcludedFromSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + service.MarkOpsClientBusinessLimited(c, service.OpsClientBusinessLimitedReasonIPRestriction) + + errType := normalizeOpsErrorType("api_error", "ACCESS_DENIED") + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, "Access denied", "ACCESS_DENIED", http.StatusForbidden) + + require.Equal(t, "api_error", errType) + require.Equal(t, "auth", phase) + require.True(t, isBusinessLimited) + require.Equal(t, "client", errorOwner) + require.Equal(t, "client_request", errorSource) +} + +func TestClassifyOpsOtherErrorsStillCountForSLA(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + errType := normalizeOpsErrorType("api_error", "INTERNAL_ERROR") + phase, isBusinessLimited, errorOwner, errorSource := classifyOpsErrorLog(c, errType, "Failed to validate API key", "INTERNAL_ERROR", http.StatusInternalServerError) + + require.Equal(t, "api_error", errType) + require.Equal(t, "internal", phase) + require.False(t, isBusinessLimited) + require.Equal(t, "platform", errorOwner) + require.Equal(t, "gateway", errorSource) +} + func TestClassifyOpsUnsupportedModelExcludedFromSLA(t *testing.T) { tests := []string{ "No available accounts: no available accounts supporting model: made-up-model", diff --git a/backend/internal/server/middleware/api_key_auth.go b/backend/internal/server/middleware/api_key_auth.go index 972c1eaf..c15f534e 100644 --- a/backend/internal/server/middleware/api_key_auth.go +++ b/backend/internal/server/middleware/api_key_auth.go @@ -92,6 +92,7 @@ func apiKeyAuthWithSubscription(apiKeyService *service.APIKeyService, subscripti clientIP := ip.GetTrustedClientIP(c) allowed, _ := ip.CheckIPRestrictionWithCompiledRules(clientIP, apiKey.CompiledIPWhitelist, apiKey.CompiledIPBlacklist) if !allowed { + service.MarkOpsClientBusinessLimited(c, service.OpsClientBusinessLimitedReasonIPRestriction) AbortWithError(c, 403, "ACCESS_DENIED", "Access denied") return } diff --git a/backend/internal/server/middleware/api_key_auth_test.go b/backend/internal/server/middleware/api_key_auth_test.go index 4a4ab0f9..d6760d8d 100644 --- a/backend/internal/server/middleware/api_key_auth_test.go +++ b/backend/internal/server/middleware/api_key_auth_test.go @@ -333,6 +333,15 @@ func TestAPIKeyAuthIPRestrictionDoesNotTrustSpoofedForwardHeaders(t *testing.T) apiKeyService := service.NewAPIKeyService(apiKeyRepo, nil, nil, nil, nil, nil, cfg) router := gin.New() require.NoError(t, router.SetTrustedProxies(nil)) + var markedBusinessLimited bool + var businessLimitedReason string + router.Use(func(c *gin.Context) { + c.Next() + markedBusinessLimited = service.HasOpsClientBusinessLimited(c) + if v, ok := c.Get(service.OpsClientBusinessLimitedReasonKey); ok { + businessLimitedReason, _ = v.(string) + } + }) router.Use(gin.HandlerFunc(NewAPIKeyAuthMiddleware(apiKeyService, nil, cfg))) router.GET("/t", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"ok": true}) @@ -349,6 +358,8 @@ func TestAPIKeyAuthIPRestrictionDoesNotTrustSpoofedForwardHeaders(t *testing.T) require.Equal(t, http.StatusForbidden, w.Code) require.Contains(t, w.Body.String(), "ACCESS_DENIED") + require.True(t, markedBusinessLimited) + require.Equal(t, service.OpsClientBusinessLimitedReasonIPRestriction, businessLimitedReason) } func TestAPIKeyAuthTouchesLastUsedOnSuccess(t *testing.T) { diff --git a/backend/internal/service/ops_upstream_context.go b/backend/internal/service/ops_upstream_context.go index 05d444e1..5c8ac5a6 100644 --- a/backend/internal/service/ops_upstream_context.go +++ b/backend/internal/service/ops_upstream_context.go @@ -36,6 +36,12 @@ const ( // OpsSkipPassthroughKey 由 applyErrorPassthroughRule 在命中 skip_monitoring=true 的规则时设置。 // ops_error_logger 中间件检查此 key,为 true 时跳过错误记录。 OpsSkipPassthroughKey = "ops_skip_passthrough" + + // Client-side configuration denials should remain visible in ops_error_logs, + // but should be excluded from SLA/error-rate calculations. + OpsClientBusinessLimitedKey = "ops_client_business_limited" + OpsClientBusinessLimitedReasonKey = "ops_client_business_limited_reason" + OpsClientBusinessLimitedReasonIPRestriction = "api_key_ip_restriction" ) func setOpsUpstreamRequestBody(c *gin.Context, body []byte) { @@ -53,6 +59,28 @@ func SetOpsLatencyMs(c *gin.Context, key string, value int64) { c.Set(key, value) } +func MarkOpsClientBusinessLimited(c *gin.Context, reason string) { + if c == nil { + return + } + c.Set(OpsClientBusinessLimitedKey, true) + if reason = strings.TrimSpace(reason); reason != "" { + c.Set(OpsClientBusinessLimitedReasonKey, reason) + } +} + +func HasOpsClientBusinessLimited(c *gin.Context) bool { + if c == nil { + return false + } + v, ok := c.Get(OpsClientBusinessLimitedKey) + if !ok { + return false + } + marked, _ := v.(bool) + return marked +} + // SetOpsUpstreamError is the exported wrapper for setOpsUpstreamError, used by // handler-layer code (e.g. failover-exhausted paths) that needs to record the // original upstream status code before mapping it to a client-facing code.