From bec1e2b6978e390af4bdc097d28d787c2425d5bb Mon Sep 17 00:00:00 2001 From: name <136912576+is7Qin@users.noreply.github.com> Date: Sat, 16 May 2026 19:40:23 +0800 Subject: [PATCH] =?UTF-8?q?fix(openai):=20=E6=B0=B8=E4=B9=85=E7=A6=81?= =?UTF-8?q?=E7=94=A8=E7=BC=BA=E5=A4=B1=20refresh=5Ftoken=20=E7=9A=84=20OAu?= =?UTF-8?q?th=20=E8=B4=A6=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit token_provider 在 expires_at 已过且 refresh_token 缺失时,仅返回 error,未做任何降级。 HandleUpstreamError 的 OAuth 401 分支也只走 10min 冷却,不区分账号是否具备刷新能力。 两条路径相加导致缺 refresh_token 的账号被反复选中、每次都在 token 阶段失败,对用户呈现持续 502。 token_provider.GetAccessToken: 命中"过期且无 refresh_token"时调用 SetError 永久禁用并清缓存, 依赖 background context 避免请求 ctx 提前结束影响落库。 ratelimit_service 401 OAuth 分支:refresh_token 为空时直接 SetError,不再写 expires_at、 不再 SetTempUnschedulable,缓存失效保留。RT 账号路径完全不动。 新增/调整测试覆盖两条路径,旧测试为 RT 路径补足 refresh_token 字段以保留原意图。 --- .../internal/service/openai_token_provider.go | 39 ++++++++++++- .../service/openai_token_provider_test.go | 31 ++++++++++ backend/internal/service/ratelimit_service.go | 11 ++++ .../service/ratelimit_service_401_test.go | 56 ++++++++++++++++++- 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/backend/internal/service/openai_token_provider.go b/backend/internal/service/openai_token_provider.go index a680d451..5b55d200 100644 --- a/backend/internal/service/openai_token_provider.go +++ b/backend/internal/service/openai_token_provider.go @@ -154,7 +154,11 @@ func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Accou needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= openAITokenRefreshSkew if needsRefresh && strings.TrimSpace(account.GetOpenAIRefreshToken()) == "" { if expiresAt != nil && !time.Now().Before(*expiresAt) { - return "", errors.New("openai access_token expired and refresh_token is missing") + const reason = "openai access_token expired and refresh_token is missing" + // 永久故障:缺失 refresh_token 时账号无法自愈,必须立即从调度池剔除, + // 否则会被反复选中、每次都在 token 阶段直接返回错误,对用户呈现持续 502。 + p.disableAccountMissingRefreshToken(account, reason) + return "", errors.New(reason) } needsRefresh = false } @@ -261,6 +265,39 @@ func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Accou return accessToken, nil } +// disableAccountMissingRefreshToken 在请求路径上发现 OpenAI OAuth 账号 +// 凭证已过期且 refresh_token 缺失时,将账号标记为 error 状态。 +// 这是一种永久性故障:仅靠后续请求或 TokenRefreshService 不会自愈 +// (NeedsRefresh 也会因 refresh_token 为空直接跳过), +// 必须主动剔除以避免账号被持续选中导致用户端反复 502。 +// 使用 background context 是因为请求 context 可能很快结束。 +func (p *OpenAITokenProvider) disableAccountMissingRefreshToken(account *Account, reason string) { + if p == nil || p.accountRepo == nil || account == nil { + return + } + bgCtx := context.Background() + if err := p.accountRepo.SetError(bgCtx, account.ID, reason); err != nil { + slog.Warn("openai_token_provider.set_error_failed", + "account_id", account.ID, + "error", err, + ) + return + } + if p.tokenCache != nil { + cacheKey := OpenAITokenCacheKey(account) + if err := p.tokenCache.DeleteAccessToken(bgCtx, cacheKey); err != nil { + slog.Warn("openai_token_provider.cache_delete_failed", + "account_id", account.ID, + "error", err, + ) + } + } + slog.Warn("openai_token_provider.account_disabled_missing_refresh_token", + "account_id", account.ID, + "reason", reason, + ) +} + func (p *OpenAITokenProvider) waitForTokenAfterLockRace(ctx context.Context, cacheKey string) (string, error) { wait := openAILockInitialWait totalWaitMs := int64(0) diff --git a/backend/internal/service/openai_token_provider_test.go b/backend/internal/service/openai_token_provider_test.go index 4b69db8a..df2f0f3e 100644 --- a/backend/internal/service/openai_token_provider_test.go +++ b/backend/internal/service/openai_token_provider_test.go @@ -930,3 +930,34 @@ func TestOpenAITokenProvider_RuntimeMetrics_LockAcquireFailure(t *testing.T) { require.GreaterOrEqual(t, metrics.LockAcquireFailure, int64(1)) require.GreaterOrEqual(t, metrics.RefreshRequests, int64(1)) } + +func TestOpenAITokenProvider_NoRefreshTokenExpired_DisablesAccount(t *testing.T) { + cache := newOpenAITokenCacheStub() + repo := &rateLimitAccountRepoStub{} + + expiresAt := time.Now().Add(-time.Minute).UTC().Format(time.RFC3339) + account := &Account{ + ID: 2881, + Platform: PlatformOpenAI, + Type: AccountTypeOAuth, + Credentials: map[string]any{ + "access_token": "expired-access-token", + "expires_at": expiresAt, + }, + } + + cacheKey := OpenAITokenCacheKey(account) + cache.tokens[cacheKey] = "stale-cached-token" + // Force the provider past the cache hit branch. + cache.getErr = errors.New("simulated cache miss") + + provider := NewOpenAITokenProvider(repo, cache, nil) + + token, err := provider.GetAccessToken(context.Background(), account) + require.Error(t, err) + require.Empty(t, token) + require.Contains(t, err.Error(), "refresh_token is missing") + + require.Equal(t, 1, repo.setErrorCalls, "account should be disabled via SetError exactly once") + require.Contains(t, repo.lastErrorMsg, "refresh_token is missing") +} diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go index 19c45a5a..892d9aca 100644 --- a/backend/internal/service/ratelimit_service.go +++ b/backend/internal/service/ratelimit_service.go @@ -209,6 +209,17 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc slog.Warn("oauth_401_invalidate_cache_failed", "account_id", account.ID, "error", err) } } + // 缺少 refresh_token 的 OAuth 账号无法在冷却期内自愈(后台刷新服务也会跳过), + // 直接走 SetError 永久禁用,避免冷却结束后再被选中产生一发无意义的 502。 + if strings.TrimSpace(account.GetCredential("refresh_token")) == "" { + msg := "Authentication failed (401): refresh_token missing, cannot recover" + if upstreamMsg != "" { + msg = "OAuth 401 (no refresh_token): " + upstreamMsg + } + s.handleAuthError(ctx, account, msg) + shouldDisable = true + break + } // 2. 设置 expires_at 为当前时间,强制下次请求刷新 token if account.Credentials == nil { account.Credentials = make(map[string]any) diff --git a/backend/internal/service/ratelimit_service_401_test.go b/backend/internal/service/ratelimit_service_401_test.go index 73b7849f..a964775e 100644 --- a/backend/internal/service/ratelimit_service_401_test.go +++ b/backend/internal/service/ratelimit_service_401_test.go @@ -85,6 +85,7 @@ func TestRateLimitService_HandleUpstreamError_OAuth401SetsTempUnschedulable(t *t Platform: PlatformGemini, Type: AccountTypeOAuth, Credentials: map[string]any{ + "refresh_token": "rt-100", "temp_unschedulable_enabled": true, "temp_unschedulable_rules": []any{ map[string]any{ @@ -138,6 +139,9 @@ func TestRateLimitService_HandleUpstreamError_OAuth401InvalidatorError(t *testin ID: 101, Platform: PlatformOpenAI, Type: AccountTypeOAuth, + Credentials: map[string]any{ + "refresh_token": "rt-101", + }, } shouldDisable := service.HandleUpstreamError(context.Background(), account, 401, http.Header{}, []byte("unauthorized")) @@ -175,7 +179,8 @@ func TestRateLimitService_HandleUpstreamError_OAuth401UsesCredentialsUpdater(t * Platform: PlatformOpenAI, Type: AccountTypeOAuth, Credentials: map[string]any{ - "access_token": "token", + "access_token": "token", + "refresh_token": "rt-103", }, } @@ -185,3 +190,52 @@ func TestRateLimitService_HandleUpstreamError_OAuth401UsesCredentialsUpdater(t * require.Equal(t, 1, repo.updateCredentialsCalls) require.NotEmpty(t, repo.lastCredentials["expires_at"]) } + +// 缺少 refresh_token 的 OAuth 账号 401 应直接 SetError 永久禁用, +// 不再走 10 分钟冷却(冷却期内无人能刷新它,结束后还会被选中再 502 一次)。 +func TestRateLimitService_HandleUpstreamError_OAuth401NoRefreshTokenSetsError(t *testing.T) { + t.Run("openai_no_refresh_token", func(t *testing.T) { + repo := &rateLimitAccountRepoStub{} + invalidator := &tokenCacheInvalidatorRecorder{} + service := NewRateLimitService(repo, nil, &config.Config{}, nil, nil) + service.SetTokenCacheInvalidator(invalidator) + account := &Account{ + ID: 2881, + Platform: PlatformOpenAI, + Type: AccountTypeOAuth, + Credentials: map[string]any{ + "access_token": "expired-at", + // no refresh_token + }, + } + + shouldDisable := service.HandleUpstreamError(context.Background(), account, 401, http.Header{}, []byte("unauthorized")) + + require.True(t, shouldDisable) + require.Equal(t, 1, repo.setErrorCalls, "AT-only OAuth 401 must SetError") + require.Equal(t, 0, repo.tempCalls, "AT-only OAuth 401 must NOT temp-unschedule") + require.Equal(t, 0, repo.updateCredentialsCalls, "no point forcing expires_at when refresh is impossible") + require.Contains(t, repo.lastErrorMsg, "refresh_token missing") + require.Len(t, invalidator.accounts, 1, "cache should still be invalidated") + }) + + t.Run("openai_blank_refresh_token_treated_as_missing", func(t *testing.T) { + repo := &rateLimitAccountRepoStub{} + service := NewRateLimitService(repo, nil, &config.Config{}, nil, nil) + account := &Account{ + ID: 2882, + Platform: PlatformOpenAI, + Type: AccountTypeOAuth, + Credentials: map[string]any{ + "access_token": "expired-at", + "refresh_token": " ", + }, + } + + shouldDisable := service.HandleUpstreamError(context.Background(), account, 401, http.Header{}, []byte("unauthorized")) + + require.True(t, shouldDisable) + require.Equal(t, 1, repo.setErrorCalls) + require.Equal(t, 0, repo.tempCalls) + }) +}