diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go index d12824ec..ecbd86d1 100644 --- a/backend/internal/service/ratelimit_service.go +++ b/backend/internal/service/ratelimit_service.go @@ -248,17 +248,15 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc shouldDisable = true break } - // 2. 设置 expires_at 为当前时间,强制下次请求刷新 token - if account.Credentials == nil { - account.Credentials = make(map[string]any) - } - account.Credentials["expires_at"] = time.Now().Format(time.RFC3339) - if err := persistAccountCredentials(ctx, s.accountRepo, account, account.Credentials); err != nil { - slog.Warn("oauth_401_force_refresh_update_failed", "account_id", account.ID, "error", err) - } else { - slog.Info("oauth_401_force_refresh_set", "account_id", account.ID, "platform", account.Platform) - } - // 3. 临时不可调度,替代 SetError(保持 status=active 让刷新服务能拾取) + // 2. 临时不可调度,替代 SetError(保持 status=active 让刷新服务能拾取) + // 注意:此处不再写回 account.Credentials/expires_at。 + // 原实现使用请求开始时的 account 快照整列覆盖 credentials JSONB(见 + // persistAccountCredentials → accountRepository.UpdateCredentials → SetCredentials), + // 在另一个 worker 刚刷新完 refresh_token 的窄窗口内会把新 refresh_token 回滚为旧值, + // 导致下一周期用旧 refresh_token 调上游拿到 invalid_grant 后, + // tryRecoverFromRefreshRace 重读 DB 发现 currentRT == usedRT 也救不回来,账号被错误 disable。 + // 这里仅依赖 InvalidateToken + SetTempUnschedulable 让账号在冷却期内不被调度, + // 冷却结束后由 token_provider 的 NeedsRefresh / token_refresh_service 走带分布式锁的正路刷新。 msg := "Authentication failed (401): invalid or expired credentials" if upstreamMsg != "" { msg = "OAuth 401: " + upstreamMsg diff --git a/backend/internal/service/ratelimit_service_401_test.go b/backend/internal/service/ratelimit_service_401_test.go index a964775e..873aaf33 100644 --- a/backend/internal/service/ratelimit_service_401_test.go +++ b/backend/internal/service/ratelimit_service_401_test.go @@ -129,7 +129,10 @@ func TestRateLimitService_HandleUpstreamError_OAuth401SetsTempUnschedulable(t *t } // TestRateLimitService_HandleUpstreamError_OAuth401InvalidatorError -// OpenAI OAuth 401 缓存失效出错时仍走 temp_unschedulable +// OpenAI OAuth 401 缓存失效出错时仍走 temp_unschedulable。 +// 注意:401 handler 不再回写 credentials(避免请求开始时的快照整列覆盖 DB +// 把另一个 worker 刚刷新出来的新 refresh_token 回滚为旧值), +// 因此 updateCredentialsCalls 应当为 0。 func TestRateLimitService_HandleUpstreamError_OAuth401InvalidatorError(t *testing.T) { repo := &rateLimitAccountRepoStub{} invalidator := &tokenCacheInvalidatorRecorder{err: errors.New("boom")} @@ -149,7 +152,7 @@ func TestRateLimitService_HandleUpstreamError_OAuth401InvalidatorError(t *testin require.True(t, shouldDisable) require.Equal(t, 0, repo.setErrorCalls) require.Equal(t, 1, repo.tempCalls) - require.Equal(t, 1, repo.updateCredentialsCalls) + require.Equal(t, 0, repo.updateCredentialsCalls) require.Len(t, invalidator.accounts, 1) } @@ -171,7 +174,12 @@ func TestRateLimitService_HandleUpstreamError_NonOAuth401(t *testing.T) { require.Empty(t, invalidator.accounts) } -func TestRateLimitService_HandleUpstreamError_OAuth401UsesCredentialsUpdater(t *testing.T) { +// TestRateLimitService_HandleUpstreamError_OAuth401DoesNotOverwriteCredentials +// 回归测试:确保 401 handler 不再使用请求开始时的 account 快照写回 credentials。 +// 原实现会通过 persistAccountCredentials → UpdateCredentials → SetCredentials +// 整列覆盖 credentials JSONB,在另一个 worker 刚刷新完 refresh_token 的窄窗口内 +// 会把新 refresh_token 回滚为快照中的旧值,导致下一周期拿 invalid_grant 被错误 disable。 +func TestRateLimitService_HandleUpstreamError_OAuth401DoesNotOverwriteCredentials(t *testing.T) { repo := &rateLimitAccountRepoStub{} service := NewRateLimitService(repo, nil, &config.Config{}, nil, nil) account := &Account{ @@ -187,8 +195,9 @@ func TestRateLimitService_HandleUpstreamError_OAuth401UsesCredentialsUpdater(t * shouldDisable := service.HandleUpstreamError(context.Background(), account, 401, http.Header{}, []byte("unauthorized")) require.True(t, shouldDisable) - require.Equal(t, 1, repo.updateCredentialsCalls) - require.NotEmpty(t, repo.lastCredentials["expires_at"]) + require.Equal(t, 0, repo.updateCredentialsCalls, "401 handler must not write credentials back from the request-start snapshot") + require.Equal(t, 1, repo.tempCalls, "401 handler should still set temp-unschedulable cooldown") + require.Nil(t, repo.lastCredentials, "no credentials should have been persisted") } // 缺少 refresh_token 的 OAuth 账号 401 应直接 SetError 永久禁用,