From 3953dc9ce40e795637f48b2cc0bf4ddb489b2238 Mon Sep 17 00:00:00 2001 From: KnowSky404 Date: Thu, 30 Apr 2026 10:19:59 +0800 Subject: [PATCH 01/28] fix: add OpenAI compact bulk edit fields --- .../account/BulkEditAccountModal.vue | 151 +++++++++++++++++- .../__tests__/BulkEditAccountModal.spec.ts | 38 +++++ 2 files changed, 184 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/account/BulkEditAccountModal.vue b/frontend/src/components/account/BulkEditAccountModal.vue index 05016a6d..c8d53220 100644 --- a/frontend/src/components/account/BulkEditAccountModal.vue +++ b/frontend/src/components/account/BulkEditAccountModal.vue @@ -779,6 +779,110 @@ + +
+
+
+ +

+ {{ t('admin.accounts.openai.compactModeDesc') }} +

+
+ +
+
+ +
+
+
+
+ + + + +
+
+ +
+
+
@@ -989,7 +1093,7 @@ import { ref, watch, computed } from 'vue' import { useI18n } from 'vue-i18n' import { useAppStore } from '@/stores/app' import { adminAPI } from '@/api/admin' -import type { Proxy as ProxyConfig, AdminGroup, AccountPlatform, AccountType } from '@/types' +import type { Proxy as ProxyConfig, AdminGroup, AccountPlatform, AccountType, OpenAICompactMode } from '@/types' import BaseDialog from '@/components/common/BaseDialog.vue' import ConfirmDialog from '@/components/common/ConfirmDialog.vue' import Select from '@/components/common/Select.vue' @@ -1115,6 +1219,8 @@ const enableOpenAIPassthrough = ref(false) const enableOpenAIWSMode = ref(false) const enableOpenAIAPIKeyWSMode = ref(false) const enableCodexCLIOnly = ref(false) +const enableOpenAICompactMode = ref(false) +const enableOpenAICompactModelMapping = ref(false) const enableRpmLimit = ref(false) // State - field values @@ -1140,6 +1246,8 @@ const openaiPassthroughEnabled = ref(false) const openaiOAuthResponsesWebSocketV2Mode = ref(OPENAI_WS_MODE_OFF) const openaiAPIKeyResponsesWebSocketV2Mode = ref(OPENAI_WS_MODE_OFF) const codexCLIOnlyEnabled = ref(false) +const openAICompactMode = ref('auto') +const openAICompactModelMappings = ref([]) const rpmLimitEnabled = ref(false) const bulkBaseRpm = ref(null) const bulkRpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered') @@ -1178,6 +1286,11 @@ const openAIWSModeOptions = computed(() => [ { value: OPENAI_WS_MODE_CTX_POOL, label: t('admin.accounts.openai.wsModeCtxPool') }, { value: OPENAI_WS_MODE_PASSTHROUGH, label: t('admin.accounts.openai.wsModePassthrough') } ]) +const openAICompactModeOptions = computed(() => [ + { value: 'auto', label: t('admin.accounts.openai.compactModeAuto') }, + { value: 'force_on', label: t('admin.accounts.openai.compactModeForceOn') }, + { value: 'force_off', label: t('admin.accounts.openai.compactModeForceOff') } +]) const openAIWSModeConcurrencyHintKey = computed(() => resolveOpenAIWSModeConcurrencyHintKey(openaiOAuthResponsesWebSocketV2Mode.value) ) @@ -1194,6 +1307,14 @@ const removeModelMapping = (index: number) => { modelMappings.value.splice(index, 1) } +const addOpenAICompactModelMapping = () => { + openAICompactModelMappings.value.push({ from: '', to: '' }) +} + +const removeOpenAICompactModelMapping = (index: number) => { + openAICompactModelMappings.value.splice(index, 1) +} + const addPresetMapping = (from: string, to: string) => { const exists = modelMappings.value.some((m) => m.from === from) if (exists) { @@ -1262,6 +1383,10 @@ const buildModelMappingObject = (): Record | null => { ) } +const buildOpenAICompactModelMapping = (): Record | null => { + return buildModelMappingPayload('mapping', [], openAICompactModelMappings.value) +} + const buildUpdatePayload = (): Record | null => { const updates: Record = {} const credentials: Record = {} @@ -1350,10 +1475,6 @@ const buildUpdatePayload = (): Record | null => { credentialsChanged = true } - if (credentialsChanged) { - updates.credentials = credentials - } - if (enableOpenAIWSMode.value) { const extra = ensureExtra() extra.openai_oauth_responses_websockets_v2_mode = openaiOAuthResponsesWebSocketV2Mode.value @@ -1375,6 +1496,16 @@ const buildUpdatePayload = (): Record | null => { extra.codex_cli_only = codexCLIOnlyEnabled.value } + if (enableOpenAICompactMode.value) { + const extra = ensureExtra() + extra.openai_compact_mode = openAICompactMode.value + } + + if (enableOpenAICompactModelMapping.value) { + credentials.compact_model_mapping = buildOpenAICompactModelMapping() ?? {} + credentialsChanged = true + } + // RPM limit settings (写入 extra 字段) if (enableRpmLimit.value) { const extra = ensureExtra() @@ -1402,6 +1533,10 @@ const buildUpdatePayload = (): Record | null => { umqExtra.user_msg_queue_enabled = false // 清理旧字段(JSONB merge) } + if (credentialsChanged) { + updates.credentials = credentials + } + return Object.keys(updates).length > 0 ? updates : null } @@ -1467,6 +1602,8 @@ const handleSubmit = async () => { enableOpenAIWSMode.value || enableOpenAIAPIKeyWSMode.value || enableCodexCLIOnly.value || + enableOpenAICompactMode.value || + enableOpenAICompactModelMapping.value || enableRpmLimit.value || userMsgQueueMode.value !== null @@ -1567,6 +1704,8 @@ watch( enableOpenAIWSMode.value = false enableOpenAIAPIKeyWSMode.value = false enableCodexCLIOnly.value = false + enableOpenAICompactMode.value = false + enableOpenAICompactModelMapping.value = false enableRpmLimit.value = false // Reset all values @@ -1588,6 +1727,8 @@ watch( openaiOAuthResponsesWebSocketV2Mode.value = OPENAI_WS_MODE_OFF openaiAPIKeyResponsesWebSocketV2Mode.value = OPENAI_WS_MODE_OFF codexCLIOnlyEnabled.value = false + openAICompactMode.value = 'auto' + openAICompactModelMappings.value = [] rpmLimitEnabled.value = false bulkBaseRpm.value = null bulkRpmStrategy.value = 'tiered' diff --git a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts index 50d170da..caa307fc 100644 --- a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts +++ b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts @@ -217,6 +217,44 @@ describe('BulkEditAccountModal', () => { }) }) + it('筛选 OpenAI 账号批量编辑应提交 Compact 模式和专属模型映射', async () => { + const wrapper = mountModal({ + accountIds: [], + selectedPlatforms: [], + selectedTypes: [], + target: { + mode: 'filtered', + filters: { platform: 'openai' }, + previewCount: 12, + selectedPlatforms: ['openai'], + selectedTypes: ['oauth', 'apikey'] + } + }) + + await wrapper.get('#bulk-edit-openai-compact-mode-enabled').setValue(true) + await wrapper.get('[data-testid="bulk-edit-openai-compact-mode-select"]').setValue('force_on') + await wrapper.get('#bulk-edit-openai-compact-model-mapping-enabled').setValue(true) + await wrapper.get('[data-testid="bulk-edit-openai-compact-model-mapping-add"]').trigger('click') + const inputs = wrapper.findAll('[data-testid="bulk-edit-openai-compact-model-mapping-input"]') + await inputs[0].setValue('gpt-5.4') + await inputs[1].setValue('gpt-5.4-openai-compact') + await wrapper.get('#bulk-edit-account-form').trigger('submit.prevent') + await flushPromises() + + expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledTimes(1) + expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledWith({ + filters: { platform: 'openai' }, + extra: { + openai_compact_mode: 'force_on' + }, + credentials: { + compact_model_mapping: { + 'gpt-5.4': 'gpt-5.4-openai-compact' + } + } + }) + }) + it('OpenAI 账号批量编辑可关闭自动透传', async () => { const wrapper = mountModal({ selectedPlatforms: ['openai'], From 4b904c887cc51003077ee01aba5f6c8429848718 Mon Sep 17 00:00:00 2001 From: gaoren002 Date: Thu, 30 Apr 2026 03:01:39 +0000 Subject: [PATCH 02/28] fix(rate-limit): make 429 fallback cooldown configurable --- backend/internal/config/config.go | 6 +- .../internal/handler/admin/setting_handler.go | 52 ++++++ backend/internal/handler/dto/settings.go | 6 + backend/internal/server/routes/admin.go | 3 + backend/internal/service/domain_constants.go | 3 + .../service/rate_limit_429_cooldown_test.go | 114 +++++++++++++ backend/internal/service/ratelimit_service.go | 63 ++++++-- backend/internal/service/setting_service.go | 49 ++++++ backend/internal/service/settings_view.go | 16 ++ deploy/config.example.yaml | 4 + frontend/src/api/admin/settings.ts | 26 +++ frontend/src/i18n/locales/en.ts | 10 ++ frontend/src/i18n/locales/zh.ts | 10 ++ frontend/src/views/admin/SettingsView.vue | 150 ++++++++++++++++++ .../admin/__tests__/SettingsView.spec.ts | 20 +++ 15 files changed, 520 insertions(+), 12 deletions(-) create mode 100644 backend/internal/service/rate_limit_429_cooldown_test.go diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 87263db0..316ae9e2 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -1093,8 +1093,9 @@ type DefaultConfig struct { } type RateLimitConfig struct { - OverloadCooldownMinutes int `mapstructure:"overload_cooldown_minutes"` // 529过载冷却时间(分钟) - OAuth401CooldownMinutes int `mapstructure:"oauth_401_cooldown_minutes"` // OAuth 401临时不可调度冷却(分钟) + OverloadCooldownMinutes int `mapstructure:"overload_cooldown_minutes"` // 529过载冷却时间(分钟) + RateLimit429CooldownSeconds int `mapstructure:"rate_limit_429_cooldown_seconds"` // 429无重置时间时的默认回避时间(秒) + OAuth401CooldownMinutes int `mapstructure:"oauth_401_cooldown_minutes"` // OAuth 401临时不可调度冷却(分钟) } // APIKeyAuthCacheConfig API Key 认证缓存配置 @@ -1554,6 +1555,7 @@ func setDefaults() { // RateLimit viper.SetDefault("rate_limit.overload_cooldown_minutes", 10) + viper.SetDefault("rate_limit.rate_limit_429_cooldown_seconds", 5) viper.SetDefault("rate_limit.oauth_401_cooldown_minutes", 10) // Pricing - 从 model-price-repo 同步模型定价和上下文窗口数据(固定到 commit,避免分支漂移) diff --git a/backend/internal/handler/admin/setting_handler.go b/backend/internal/handler/admin/setting_handler.go index d6580191..1afaa7a2 100644 --- a/backend/internal/handler/admin/setting_handler.go +++ b/backend/internal/handler/admin/setting_handler.go @@ -2450,6 +2450,58 @@ func (h *SettingHandler) UpdateOverloadCooldownSettings(c *gin.Context) { }) } +// GetRateLimit429CooldownSettings 获取429默认回避配置 +// GET /api/v1/admin/settings/rate-limit-429-cooldown +func (h *SettingHandler) GetRateLimit429CooldownSettings(c *gin.Context) { + settings, err := h.settingService.GetRateLimit429CooldownSettings(c.Request.Context()) + if err != nil { + response.ErrorFrom(c, err) + return + } + + response.Success(c, dto.RateLimit429CooldownSettings{ + Enabled: settings.Enabled, + CooldownSeconds: settings.CooldownSeconds, + }) +} + +// UpdateRateLimit429CooldownSettingsRequest 更新429默认回避配置请求 +type UpdateRateLimit429CooldownSettingsRequest struct { + Enabled bool `json:"enabled"` + CooldownSeconds int `json:"cooldown_seconds"` +} + +// UpdateRateLimit429CooldownSettings 更新429默认回避配置 +// PUT /api/v1/admin/settings/rate-limit-429-cooldown +func (h *SettingHandler) UpdateRateLimit429CooldownSettings(c *gin.Context) { + var req UpdateRateLimit429CooldownSettingsRequest + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, "Invalid request: "+err.Error()) + return + } + + settings := &service.RateLimit429CooldownSettings{ + Enabled: req.Enabled, + CooldownSeconds: req.CooldownSeconds, + } + + if err := h.settingService.SetRateLimit429CooldownSettings(c.Request.Context(), settings); err != nil { + response.BadRequest(c, err.Error()) + return + } + + updatedSettings, err := h.settingService.GetRateLimit429CooldownSettings(c.Request.Context()) + if err != nil { + response.ErrorFrom(c, err) + return + } + + response.Success(c, dto.RateLimit429CooldownSettings{ + Enabled: updatedSettings.Enabled, + CooldownSeconds: updatedSettings.CooldownSeconds, + }) +} + // GetStreamTimeoutSettings 获取流超时处理配置 // GET /api/v1/admin/settings/stream-timeout func (h *SettingHandler) GetStreamTimeoutSettings(c *gin.Context) { diff --git a/backend/internal/handler/dto/settings.go b/backend/internal/handler/dto/settings.go index b865d703..551c0124 100644 --- a/backend/internal/handler/dto/settings.go +++ b/backend/internal/handler/dto/settings.go @@ -263,6 +263,12 @@ type OverloadCooldownSettings struct { CooldownMinutes int `json:"cooldown_minutes"` } +// RateLimit429CooldownSettings 429默认回避配置 DTO +type RateLimit429CooldownSettings struct { + Enabled bool `json:"enabled"` + CooldownSeconds int `json:"cooldown_seconds"` +} + // StreamTimeoutSettings 流超时处理配置 DTO type StreamTimeoutSettings struct { Enabled bool `json:"enabled"` diff --git a/backend/internal/server/routes/admin.go b/backend/internal/server/routes/admin.go index 1c786f50..0a6a2962 100644 --- a/backend/internal/server/routes/admin.go +++ b/backend/internal/server/routes/admin.go @@ -408,6 +408,9 @@ func registerSettingsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { // 529过载冷却配置 adminSettings.GET("/overload-cooldown", h.Admin.Setting.GetOverloadCooldownSettings) adminSettings.PUT("/overload-cooldown", h.Admin.Setting.UpdateOverloadCooldownSettings) + // 429默认回避配置 + adminSettings.GET("/rate-limit-429-cooldown", h.Admin.Setting.GetRateLimit429CooldownSettings) + adminSettings.PUT("/rate-limit-429-cooldown", h.Admin.Setting.UpdateRateLimit429CooldownSettings) // 流超时处理配置 adminSettings.GET("/stream-timeout", h.Admin.Setting.GetStreamTimeoutSettings) adminSettings.PUT("/stream-timeout", h.Admin.Setting.UpdateStreamTimeoutSettings) diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go index bddcf6ab..6eb901b3 100644 --- a/backend/internal/service/domain_constants.go +++ b/backend/internal/service/domain_constants.go @@ -286,6 +286,9 @@ const ( // SettingKeyOverloadCooldownSettings stores JSON config for 529 overload cooldown handling. SettingKeyOverloadCooldownSettings = "overload_cooldown_settings" + // SettingKeyRateLimit429CooldownSettings stores JSON config for 429 fallback cooldown handling. + SettingKeyRateLimit429CooldownSettings = "rate_limit_429_cooldown_settings" + // ========================= // Stream Timeout Handling // ========================= diff --git a/backend/internal/service/rate_limit_429_cooldown_test.go b/backend/internal/service/rate_limit_429_cooldown_test.go new file mode 100644 index 00000000..35206454 --- /dev/null +++ b/backend/internal/service/rate_limit_429_cooldown_test.go @@ -0,0 +1,114 @@ +//go:build unit + +package service + +import ( + "context" + "encoding/json" + "net/http" + "testing" + "time" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/stretchr/testify/require" +) + +type rateLimit429AccountRepoStub struct { + mockAccountRepoForGemini + rateLimitCalls int + lastRateLimitID int64 + lastRateLimitReset time.Time +} + +func (r *rateLimit429AccountRepoStub) SetRateLimited(_ context.Context, id int64, resetAt time.Time) error { + r.rateLimitCalls++ + r.lastRateLimitID = id + r.lastRateLimitReset = resetAt + return nil +} + +func TestGetRateLimit429CooldownSettings_DefaultsWhenNotSet(t *testing.T) { + repo := newMockSettingRepo() + svc := NewSettingService(repo, &config.Config{}) + + settings, err := svc.GetRateLimit429CooldownSettings(context.Background()) + require.NoError(t, err) + require.True(t, settings.Enabled) + require.Equal(t, 5, settings.CooldownSeconds) +} + +func TestGetRateLimit429CooldownSettings_ReadsFromDB(t *testing.T) { + repo := newMockSettingRepo() + data, _ := json.Marshal(RateLimit429CooldownSettings{Enabled: false, CooldownSeconds: 12}) + repo.data[SettingKeyRateLimit429CooldownSettings] = string(data) + svc := NewSettingService(repo, &config.Config{}) + + settings, err := svc.GetRateLimit429CooldownSettings(context.Background()) + require.NoError(t, err) + require.False(t, settings.Enabled) + require.Equal(t, 12, settings.CooldownSeconds) +} + +func TestSetRateLimit429CooldownSettings_EnabledRejectsOutOfRange(t *testing.T) { + svc := NewSettingService(newMockSettingRepo(), &config.Config{}) + + for _, seconds := range []int{0, -1, 7201, 99999} { + err := svc.SetRateLimit429CooldownSettings(context.Background(), &RateLimit429CooldownSettings{ + Enabled: true, CooldownSeconds: seconds, + }) + require.Error(t, err, "should reject enabled=true + cooldown_seconds=%d", seconds) + require.Contains(t, err.Error(), "cooldown_seconds must be between 1-7200") + } +} + +func TestHandle429_FallbackUsesDBSeconds(t *testing.T) { + accountRepo := &rateLimit429AccountRepoStub{} + settingRepo := newMockSettingRepo() + data, _ := json.Marshal(RateLimit429CooldownSettings{Enabled: true, CooldownSeconds: 12}) + settingRepo.data[SettingKeyRateLimit429CooldownSettings] = string(data) + + settingSvc := NewSettingService(settingRepo, &config.Config{}) + svc := NewRateLimitService(accountRepo, nil, &config.Config{}, nil, nil) + svc.SetSettingService(settingSvc) + + account := &Account{ID: 42, Platform: PlatformOpenAI, Type: AccountTypeOAuth} + before := time.Now() + svc.handle429(context.Background(), account, http.Header{}, []byte(`{"error":{"type":"rate_limit_error","message":"slow down"}}`)) + after := time.Now() + + require.Equal(t, 1, accountRepo.rateLimitCalls) + require.Equal(t, int64(42), accountRepo.lastRateLimitID) + require.True(t, !accountRepo.lastRateLimitReset.Before(before.Add(12*time.Second)) && !accountRepo.lastRateLimitReset.After(after.Add(12*time.Second))) +} + +func TestHandle429_FallbackDisabledSkipsLocalMark(t *testing.T) { + accountRepo := &rateLimit429AccountRepoStub{} + settingRepo := newMockSettingRepo() + data, _ := json.Marshal(RateLimit429CooldownSettings{Enabled: false, CooldownSeconds: 12}) + settingRepo.data[SettingKeyRateLimit429CooldownSettings] = string(data) + + settingSvc := NewSettingService(settingRepo, &config.Config{}) + svc := NewRateLimitService(accountRepo, nil, &config.Config{}, nil, nil) + svc.SetSettingService(settingSvc) + + account := &Account{ID: 43, Platform: PlatformOpenAI, Type: AccountTypeOAuth} + svc.handle429(context.Background(), account, http.Header{}, []byte(`{"error":{"type":"rate_limit_error","message":"slow down"}}`)) + + require.Zero(t, accountRepo.rateLimitCalls) +} + +func TestHandle429_FallbackUsesConfigSecondsWhenSettingServiceMissing(t *testing.T) { + accountRepo := &rateLimit429AccountRepoStub{} + cfg := &config.Config{} + cfg.RateLimit.RateLimit429CooldownSeconds = 9 + svc := NewRateLimitService(accountRepo, nil, cfg, nil, nil) + + account := &Account{ID: 44, Platform: PlatformGemini, Type: AccountTypeAPIKey} + before := time.Now() + svc.handle429(context.Background(), account, http.Header{}, []byte(`{"error":{"message":"slow down"}}`)) + after := time.Now() + + require.Equal(t, 1, accountRepo.rateLimitCalls) + require.Equal(t, int64(44), accountRepo.lastRateLimitID) + require.True(t, !accountRepo.lastRateLimitReset.Before(before.Add(9*time.Second)) && !accountRepo.lastRateLimitReset.After(after.Add(9*time.Second))) +} diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go index 9344de47..293fc528 100644 --- a/backend/internal/service/ratelimit_service.go +++ b/backend/internal/service/ratelimit_service.go @@ -55,6 +55,11 @@ type geminiUsageTotalsBatchProvider interface { const geminiPrecheckCacheTTL = time.Minute +const ( + defaultRateLimit429CooldownSeconds = 5 + maxRateLimit429CooldownSeconds = 7200 +) + const ( openAI403CooldownMinutesDefault = 10 openAI403DisableThreshold = 3 @@ -891,12 +896,8 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head return } - // 其他平台:没有重置时间,使用默认5分钟 - resetAt := time.Now().Add(5 * time.Minute) - slog.Warn("rate_limit_no_reset_time", "account_id", account.ID, "platform", account.Platform, "using_default", "5m") - if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil { - slog.Warn("rate_limit_set_failed", "account_id", account.ID, "error", err) - } + // 其他平台:没有重置时间,使用可配置的秒级默认回避,避免误伤长时间不可调度。 + s.apply429FallbackRateLimit(ctx, account, "no_reset_time") return } @@ -904,10 +905,7 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head ts, err := strconv.ParseInt(resetTimestamp, 10, 64) if err != nil { slog.Warn("rate_limit_reset_parse_failed", "reset_timestamp", resetTimestamp, "error", err) - resetAt := time.Now().Add(5 * time.Minute) - if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil { - slog.Warn("rate_limit_set_failed", "account_id", account.ID, "error", err) - } + s.apply429FallbackRateLimit(ctx, account, "reset_parse_failed") return } @@ -929,6 +927,51 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head slog.Info("account_rate_limited", "account_id", account.ID, "reset_at", resetAt) } +func (s *RateLimitService) apply429FallbackRateLimit(ctx context.Context, account *Account, reason string) { + cooldown, enabled := s.get429FallbackCooldown(ctx, account) + if !enabled { + slog.Info("rate_limit_429_fallback_ignored", "account_id", account.ID, "platform", account.Platform, "reason", reason) + return + } + + resetAt := time.Now().Add(cooldown) + slog.Warn("rate_limit_429_fallback_used", "account_id", account.ID, "platform", account.Platform, "reason", reason, "using_default", cooldown.String()) + if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil { + slog.Warn("rate_limit_set_failed", "account_id", account.ID, "error", err) + } +} + +func (s *RateLimitService) get429FallbackCooldown(ctx context.Context, account *Account) (time.Duration, bool) { + if s.settingService != nil { + settings, err := s.settingService.GetRateLimit429CooldownSettings(ctx) + if err == nil && settings != nil { + if !settings.Enabled { + return 0, false + } + seconds := clampRateLimit429CooldownSeconds(settings.CooldownSeconds) + return time.Duration(seconds) * time.Second, true + } + slog.Warn("rate_limit_429_settings_read_failed", "account_id", account.ID, "error", err) + } + + seconds := defaultRateLimit429CooldownSeconds + if s.cfg != nil && s.cfg.RateLimit.RateLimit429CooldownSeconds > 0 { + seconds = s.cfg.RateLimit.RateLimit429CooldownSeconds + } + seconds = clampRateLimit429CooldownSeconds(seconds) + return time.Duration(seconds) * time.Second, true +} + +func clampRateLimit429CooldownSeconds(seconds int) int { + if seconds < 1 { + return 1 + } + if seconds > maxRateLimit429CooldownSeconds { + return maxRateLimit429CooldownSeconds + } + return seconds +} + // calculateOpenAI429ResetTime 从 OpenAI 429 响应头计算正确的重置时间 // 返回 nil 表示无法从响应头中确定重置时间 func calculateOpenAI429ResetTime(headers http.Header) *time.Time { diff --git a/backend/internal/service/setting_service.go b/backend/internal/service/setting_service.go index 966b4b84..c0cd9902 100644 --- a/backend/internal/service/setting_service.go +++ b/backend/internal/service/setting_service.go @@ -2748,6 +2748,55 @@ func (s *SettingService) SetOverloadCooldownSettings(ctx context.Context, settin return s.settingRepo.Set(ctx, SettingKeyOverloadCooldownSettings, string(data)) } +// GetRateLimit429CooldownSettings 获取429默认回避配置 +func (s *SettingService) GetRateLimit429CooldownSettings(ctx context.Context) (*RateLimit429CooldownSettings, error) { + value, err := s.settingRepo.GetValue(ctx, SettingKeyRateLimit429CooldownSettings) + if err != nil { + if errors.Is(err, ErrSettingNotFound) { + return DefaultRateLimit429CooldownSettings(), nil + } + return nil, fmt.Errorf("get 429 cooldown settings: %w", err) + } + if value == "" { + return DefaultRateLimit429CooldownSettings(), nil + } + + var settings RateLimit429CooldownSettings + if err := json.Unmarshal([]byte(value), &settings); err != nil { + return DefaultRateLimit429CooldownSettings(), nil + } + + if settings.CooldownSeconds < 1 { + settings.CooldownSeconds = 1 + } + if settings.CooldownSeconds > 7200 { + settings.CooldownSeconds = 7200 + } + + return &settings, nil +} + +// SetRateLimit429CooldownSettings 设置429默认回避配置 +func (s *SettingService) SetRateLimit429CooldownSettings(ctx context.Context, settings *RateLimit429CooldownSettings) error { + if settings == nil { + return fmt.Errorf("settings cannot be nil") + } + + if settings.CooldownSeconds < 1 || settings.CooldownSeconds > 7200 { + if settings.Enabled { + return fmt.Errorf("cooldown_seconds must be between 1-7200") + } + settings.CooldownSeconds = 5 + } + + data, err := json.Marshal(settings) + if err != nil { + return fmt.Errorf("marshal 429 cooldown settings: %w", err) + } + + return s.settingRepo.Set(ctx, SettingKeyRateLimit429CooldownSettings, string(data)) +} + // GetOIDCConnectOAuthConfig 返回用于登录的“最终生效” OIDC 配置。 // // 优先级: diff --git a/backend/internal/service/settings_view.go b/backend/internal/service/settings_view.go index c0962ff0..bc81d4ac 100644 --- a/backend/internal/service/settings_view.go +++ b/backend/internal/service/settings_view.go @@ -380,6 +380,14 @@ type OverloadCooldownSettings struct { CooldownMinutes int `json:"cooldown_minutes"` } +// RateLimit429CooldownSettings 429默认回避配置 +type RateLimit429CooldownSettings struct { + // Enabled 是否在无法解析上游重置时间时应用默认429回避 + Enabled bool `json:"enabled"` + // CooldownSeconds 默认回避时长(秒) + CooldownSeconds int `json:"cooldown_seconds"` +} + // DefaultOverloadCooldownSettings 返回默认的过载冷却配置(启用,10分钟) func DefaultOverloadCooldownSettings() *OverloadCooldownSettings { return &OverloadCooldownSettings{ @@ -388,6 +396,14 @@ func DefaultOverloadCooldownSettings() *OverloadCooldownSettings { } } +// DefaultRateLimit429CooldownSettings 返回默认的429回避配置(启用,5秒) +func DefaultRateLimit429CooldownSettings() *RateLimit429CooldownSettings { + return &RateLimit429CooldownSettings{ + Enabled: true, + CooldownSeconds: 5, + } +} + // DefaultBetaPolicySettings 返回默认的 Beta 策略配置 func DefaultBetaPolicySettings() *BetaPolicySettings { return &BetaPolicySettings{ diff --git a/deploy/config.example.yaml b/deploy/config.example.yaml index dfc363b5..01760657 100644 --- a/deploy/config.example.yaml +++ b/deploy/config.example.yaml @@ -924,6 +924,10 @@ rate_limit: # 上游返回 529(过载)时的冷却时间(分钟) overload_cooldown_minutes: 10 + # Default cooldown time (in seconds) when upstream returns 429 without a reset time + # 上游返回 429 且无明确重置时间时的默认回避时间(秒) + rate_limit_429_cooldown_seconds: 5 + # ============================================================================= # Pricing Data Source (Optional) # 定价数据源(可选) diff --git a/frontend/src/api/admin/settings.ts b/frontend/src/api/admin/settings.ts index e8ab6af5..f20ed8fb 100644 --- a/frontend/src/api/admin/settings.ts +++ b/frontend/src/api/admin/settings.ts @@ -803,6 +803,30 @@ export async function updateOverloadCooldownSettings( return data; } +// ==================== 429 Rate Limit Cooldown Settings ==================== + +export interface RateLimit429CooldownSettings { + enabled: boolean; + cooldown_seconds: number; +} + +export async function getRateLimit429CooldownSettings(): Promise { + const { data } = await apiClient.get( + "/admin/settings/rate-limit-429-cooldown", + ); + return data; +} + +export async function updateRateLimit429CooldownSettings( + settings: RateLimit429CooldownSettings, +): Promise { + const { data } = await apiClient.put( + "/admin/settings/rate-limit-429-cooldown", + settings, + ); + return data; +} + // ==================== Stream Timeout Settings ==================== /** @@ -1022,6 +1046,8 @@ export const settingsAPI = { deleteAdminApiKey, getOverloadCooldownSettings, updateOverloadCooldownSettings, + getRateLimit429CooldownSettings, + updateRateLimit429CooldownSettings, getStreamTimeoutSettings, updateStreamTimeoutSettings, getRectifierSettings, diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 0425955f..93ada241 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -5484,6 +5484,16 @@ export default { saved: 'Overload cooldown settings saved', saveFailed: 'Failed to save overload cooldown settings' }, + rateLimit429Cooldown: { + title: '429 Default Cooldown', + description: 'Configure the default account cooldown when upstream returns 429 without an explicit reset time', + enabled: 'Enable 429 Default Cooldown', + enabledHint: 'Pause account scheduling when a 429 has no reset time, then auto-recover after cooldown', + cooldownSeconds: 'Cooldown Duration (seconds)', + cooldownSecondsHint: 'Default cooldown duration (1-7200 seconds); explicit upstream reset times still take precedence', + saved: '429 default cooldown settings saved', + saveFailed: 'Failed to save 429 default cooldown settings' + }, streamTimeout: { title: 'Stream Timeout Handling', description: 'Configure account handling strategy when upstream response times out', diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index a8656a7b..e0189350 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -5644,6 +5644,16 @@ export default { saved: '过载冷却设置保存成功', saveFailed: '保存过载冷却设置失败' }, + rateLimit429Cooldown: { + title: '429 默认回避', + description: '配置上游返回 429 且没有明确重置时间时的默认账号回避策略', + enabled: '启用 429 默认回避', + enabledHint: '收到无重置时间的 429 时暂停该账号调度,冷却后自动恢复', + cooldownSeconds: '回避时长(秒)', + cooldownSecondsHint: '默认回避持续时间(1-7200 秒);上游返回明确 reset 时仍优先使用上游时间', + saved: '429 默认回避设置保存成功', + saveFailed: '保存 429 默认回避设置失败' + }, streamTimeout: { title: '流超时处理', description: '配置上游响应超时时的账户处理策略,避免问题账户持续被选中', diff --git a/frontend/src/views/admin/SettingsView.vue b/frontend/src/views/admin/SettingsView.vue index ad0587b8..55e69820 100644 --- a/frontend/src/views/admin/SettingsView.vue +++ b/frontend/src/views/admin/SettingsView.vue @@ -291,6 +291,113 @@
+ +
+
+

+ {{ t("admin.settings.rateLimit429Cooldown.title") }} +

+

+ {{ t("admin.settings.rateLimit429Cooldown.description") }} +

+
+
+
+
+ {{ t("common.loading") }} +
+ + +
+
+
{ loadSubscriptionGroups(); loadAdminApiKey(); loadOverloadCooldownSettings(); + loadRateLimit429CooldownSettings(); loadStreamTimeoutSettings(); loadRectifierSettings(); loadBetaPolicySettings(); diff --git a/frontend/src/views/admin/__tests__/SettingsView.spec.ts b/frontend/src/views/admin/__tests__/SettingsView.spec.ts index 239c474e..9144649c 100644 --- a/frontend/src/views/admin/__tests__/SettingsView.spec.ts +++ b/frontend/src/views/admin/__tests__/SettingsView.spec.ts @@ -11,6 +11,8 @@ const { updateWebSearchEmulationConfig, getAdminApiKey, getOverloadCooldownSettings, + getRateLimit429CooldownSettings, + updateRateLimit429CooldownSettings, getStreamTimeoutSettings, getRectifierSettings, getBetaPolicySettings, @@ -31,6 +33,8 @@ const { updateWebSearchEmulationConfig: vi.fn(), getAdminApiKey: vi.fn(), getOverloadCooldownSettings: vi.fn(), + getRateLimit429CooldownSettings: vi.fn(), + updateRateLimit429CooldownSettings: vi.fn(), getStreamTimeoutSettings: vi.fn(), getRectifierSettings: vi.fn(), getBetaPolicySettings: vi.fn(), @@ -57,6 +61,8 @@ vi.mock("@/api", () => ({ updateWebSearchEmulationConfig, getAdminApiKey, getOverloadCooldownSettings, + getRateLimit429CooldownSettings, + updateRateLimit429CooldownSettings, getStreamTimeoutSettings, getRectifierSettings, getBetaPolicySettings, @@ -453,6 +459,8 @@ describe("admin SettingsView payment visible method controls", () => { updateWebSearchEmulationConfig.mockReset(); getAdminApiKey.mockReset(); getOverloadCooldownSettings.mockReset(); + getRateLimit429CooldownSettings.mockReset(); + updateRateLimit429CooldownSettings.mockReset(); getStreamTimeoutSettings.mockReset(); getRectifierSettings.mockReset(); getBetaPolicySettings.mockReset(); @@ -489,6 +497,11 @@ describe("admin SettingsView payment visible method controls", () => { enabled: true, cooldown_minutes: 10, }); + getRateLimit429CooldownSettings.mockResolvedValue({ + enabled: true, + cooldown_seconds: 5, + }); + updateRateLimit429CooldownSettings.mockImplementation(async (payload) => payload); getStreamTimeoutSettings.mockResolvedValue({ enabled: true, action: "temp_unsched", @@ -669,6 +682,8 @@ describe("admin SettingsView wechat connect controls", () => { updateWebSearchEmulationConfig.mockReset(); getAdminApiKey.mockReset(); getOverloadCooldownSettings.mockReset(); + getRateLimit429CooldownSettings.mockReset(); + updateRateLimit429CooldownSettings.mockReset(); getStreamTimeoutSettings.mockReset(); getRectifierSettings.mockReset(); getBetaPolicySettings.mockReset(); @@ -708,6 +723,11 @@ describe("admin SettingsView wechat connect controls", () => { enabled: true, cooldown_minutes: 10, }); + getRateLimit429CooldownSettings.mockResolvedValue({ + enabled: true, + cooldown_seconds: 5, + }); + updateRateLimit429CooldownSettings.mockImplementation(async (payload) => payload); getStreamTimeoutSettings.mockResolvedValue({ enabled: true, action: "temp_unsched", From 4e4cc809711a3dd734bd19d086676f235094e533 Mon Sep 17 00:00:00 2001 From: alfadb-bot Date: Thu, 30 Apr 2026 19:25:45 +0800 Subject: [PATCH 03/28] fix(openai-gateway): route APIKey accounts to /v1/chat/completions when upstream lacks Responses API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI APIKey accounts with base_url pointing to third-party OpenAI-compatible upstreams (DeepSeek, Kimi, GLM, Qwen, etc.) were failing because the gateway unconditionally converted Chat Completions requests to Responses format and forwarded to {base_url}/v1/responses, which only exists on OpenAI's official endpoint. Detection-based routing: - Probe upstream capability on account create/update via a minimal POST to /v1/responses; HTTP 404/405 means 'unsupported', any other response means 'supported'. - Persist result as accounts.extra.openai_responses_supported (bool). - ForwardAsChatCompletions branches at function entry: APIKey accounts with explicit support=false go through new forwardAsRawChatCompletions which passthrough-forwards CC body to /v1/chat/completions without protocol conversion. Default behavior for accounts without the marker preserves the legacy 'always Responses' path — existing OpenAI APIKey accounts that were working before this change continue to work without modification (the 'reality is evidence' principle: an account that has been running implies upstream capability). Probe is fired async after Create / Update / BatchCreate; failures only log, never block the admin flow. BulkUpdate omitted (low signal of base_url changes; can be added if needed). Implementation: - New pkg internal/pkg/openai_compat: marker key + ShouldUseResponsesAPI - New service file openai_apikey_responses_probe.go: probe + persist - New service file openai_gateway_chat_completions_raw.go: CC pass-through - Account test endpoint short-circuits with explicit message for probed-unsupported accounts (full CC test path is a TODO) Zero schema changes, zero migrations, zero frontend changes, zero wire modifications — all wired through existing AccountTestService injection. Closes: DeepSeek-OpenAI account (id=128) production failure --- .../internal/handler/admin/account_handler.go | 32 ++ .../pkg/openai_compat/upstream_capability.go | 75 ++++ .../openai_compat/upstream_capability_test.go | 55 +++ .../internal/service/account_test_service.go | 10 + .../service/openai_apikey_responses_probe.go | 149 +++++++ .../openai_gateway_chat_completions.go | 22 +- .../openai_gateway_chat_completions_raw.go | 368 ++++++++++++++++++ 7 files changed, 708 insertions(+), 3 deletions(-) create mode 100644 backend/internal/pkg/openai_compat/upstream_capability.go create mode 100644 backend/internal/pkg/openai_compat/upstream_capability_test.go create mode 100644 backend/internal/service/openai_apikey_responses_probe.go create mode 100644 backend/internal/service/openai_gateway_chat_completions_raw.go diff --git a/backend/internal/handler/admin/account_handler.go b/backend/internal/handler/admin/account_handler.go index 2d00ccc6..c93fff7b 100644 --- a/backend/internal/handler/admin/account_handler.go +++ b/backend/internal/handler/admin/account_handler.go @@ -637,9 +637,39 @@ func (h *AccountHandler) Update(c *gin.Context) { return } + // OpenAI APIKey: credentials 修改后重新探测上游能力(base_url/api_key 可能变更)。 + // 异步执行,探测失败不影响账号更新响应。 + if len(req.Credentials) > 0 { + h.scheduleOpenAIResponsesProbe(account) + } + response.Success(c, h.buildAccountResponseWithRuntime(c.Request.Context(), account)) } +// scheduleOpenAIResponsesProbe 异步触发 OpenAI APIKey 账号的 Responses API 能力探测。 +// +// 仅对 platform=openai && type=apikey 账号生效;其他账号无操作。 +// 探测本身在 goroutine 中执行(会发一次 HTTP 请求到上游),不会阻塞 +// 当前请求。探测错误仅记录日志,不向上下文传播:探测失败时标记保持缺失, +// 网关会按"现状即证据"默认走 Responses。 +func (h *AccountHandler) scheduleOpenAIResponsesProbe(account *service.Account) { + if account == nil || account.Platform != service.PlatformOpenAI || account.Type != service.AccountTypeAPIKey { + return + } + if h.accountTestService == nil { + return + } + accountID := account.ID + go func() { + defer func() { + if r := recover(); r != nil { + slog.Error("openai_responses_probe_panic", "account_id", accountID, "recover", r) + } + }() + h.accountTestService.ProbeOpenAIAPIKeyResponsesSupport(context.Background(), accountID) + }() +} + // Delete handles deleting an account // DELETE /api/v1/admin/accounts/:id func (h *AccountHandler) Delete(c *gin.Context) { @@ -1231,6 +1261,8 @@ func (h *AccountHandler) BatchCreate(c *gin.Context) { openaiPrivacyAccounts = append(openaiPrivacyAccounts, account) } } + // OpenAI APIKey 账号异步探测 /v1/responses 能力。 + h.scheduleOpenAIResponsesProbe(account) success++ results = append(results, gin.H{ "name": item.Name, diff --git a/backend/internal/pkg/openai_compat/upstream_capability.go b/backend/internal/pkg/openai_compat/upstream_capability.go new file mode 100644 index 00000000..ff05afe5 --- /dev/null +++ b/backend/internal/pkg/openai_compat/upstream_capability.go @@ -0,0 +1,75 @@ +// Package openai_compat 提供 OpenAI 协议族在不同上游间的能力差异判定工具。 +// +// 背景:sub2api 的 OpenAI APIKey 账号通过 base_url 接入多种第三方 OpenAI 兼容上游 +// (DeepSeek、Kimi、GLM、Qwen 等)。这些上游普遍只支持 /v1/chat/completions, +// 不存在 /v1/responses 端点。但网关历史代码无差别走 CC→Responses 转换并打到 +// /v1/responses,导致兼容上游 404。 +// +// 本包提供基于"账号探测标记"的能力判定,配合 +// internal/service/openai_apikey_responses_probe.go 在创建/修改账号时一次性 +// 探测并落标。 +// +// 设计取舍: +// - 不维护静态 host 白名单——避免新增厂商时必须改代码(讨论沉淀于 +// pensieve/short-term/knowledge/upstream-capability-detection-design-tradeoffs) +// - 标记缺失时默认 true(即"走 Responses"),保持与重构前老代码完全一致的存量 +// 账号行为("现状即证据"原则;详见 +// pensieve/short-term/maxims/preserve-existing-runtime-behavior-when-replacing-logic-in-stateful-systems) +package openai_compat + +// AccountResponsesSupport 描述账号上游对 OpenAI Responses API 的支持状态。 +// +// 仅用于 platform=openai + type=apikey 的账号;其他账号类型不应调用本包判定。 +type AccountResponsesSupport int + +const ( + // ResponsesSupportUnknown 表示账号尚未完成能力探测(extra 字段缺失)。 + // 上游路由层应按"现状即证据"原则默认走 Responses,保持与重构前一致。 + ResponsesSupportUnknown AccountResponsesSupport = iota + + // ResponsesSupportYes 探测确认上游支持 /v1/responses。 + ResponsesSupportYes + + // ResponsesSupportNo 探测确认上游不支持 /v1/responses,应走 + // /v1/chat/completions 直转路径。 + ResponsesSupportNo +) + +// ExtraKeyResponsesSupported 是 accounts.extra JSON 中存储探测结果的键名。 +// 值类型为 bool:true=支持、false=不支持、键缺失=未探测。 +const ExtraKeyResponsesSupported = "openai_responses_supported" + +// ResolveResponsesSupport 从账号的 extra map 中读取探测标记。 +// +// 标记缺失或类型不匹配时返回 ResponsesSupportUnknown——调用方应按 +// "未探测=保留旧行为=走 Responses" 处理(参见 ShouldUseResponsesAPI)。 +func ResolveResponsesSupport(extra map[string]any) AccountResponsesSupport { + if extra == nil { + return ResponsesSupportUnknown + } + v, ok := extra[ExtraKeyResponsesSupported] + if !ok { + return ResponsesSupportUnknown + } + supported, ok := v.(bool) + if !ok { + return ResponsesSupportUnknown + } + if supported { + return ResponsesSupportYes + } + return ResponsesSupportNo +} + +// ShouldUseResponsesAPI 判断 OpenAI APIKey 账号的入站 /v1/chat/completions 请求 +// 是否应走"CC→Responses 转换 + 上游 /v1/responses"路径。 +// +// 返回 true 的两种情况: +// 1. 账号已探测确认支持 Responses +// 2. 账号未探测(标记缺失)——按"现状即证据"原则保留旧行为 +// +// 仅当账号已探测且确认不支持时返回 false,此时调用方应走 CC 直转路径 +// (详见 internal/service/openai_gateway_chat_completions_raw.go)。 +func ShouldUseResponsesAPI(extra map[string]any) bool { + return ResolveResponsesSupport(extra) != ResponsesSupportNo +} diff --git a/backend/internal/pkg/openai_compat/upstream_capability_test.go b/backend/internal/pkg/openai_compat/upstream_capability_test.go new file mode 100644 index 00000000..d650daa4 --- /dev/null +++ b/backend/internal/pkg/openai_compat/upstream_capability_test.go @@ -0,0 +1,55 @@ +package openai_compat + +import "testing" + +func TestResolveResponsesSupport(t *testing.T) { + tests := []struct { + name string + extra map[string]any + want AccountResponsesSupport + }{ + {"nil extra", nil, ResponsesSupportUnknown}, + {"empty extra", map[string]any{}, ResponsesSupportUnknown}, + {"key missing", map[string]any{"other": "value"}, ResponsesSupportUnknown}, + {"value true", map[string]any{ExtraKeyResponsesSupported: true}, ResponsesSupportYes}, + {"value false", map[string]any{ExtraKeyResponsesSupported: false}, ResponsesSupportNo}, + {"value wrong type string", map[string]any{ExtraKeyResponsesSupported: "true"}, ResponsesSupportUnknown}, + {"value wrong type number", map[string]any{ExtraKeyResponsesSupported: 1}, ResponsesSupportUnknown}, + {"value nil", map[string]any{ExtraKeyResponsesSupported: nil}, ResponsesSupportUnknown}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := ResolveResponsesSupport(tc.extra) + if got != tc.want { + t.Errorf("ResolveResponsesSupport(%v) = %v, want %v", tc.extra, got, tc.want) + } + }) + } +} + +func TestShouldUseResponsesAPI(t *testing.T) { + tests := []struct { + name string + extra map[string]any + want bool + }{ + // 关键不变量:未探测必须返回 true(保留旧行为) + {"unknown defaults to true (preserve old behavior)", nil, true}, + {"unknown empty defaults to true", map[string]any{}, true}, + {"unknown wrong type defaults to true", map[string]any{ExtraKeyResponsesSupported: "yes"}, true}, + + // 已探测:标记决定 + {"explicitly supported", map[string]any{ExtraKeyResponsesSupported: true}, true}, + {"explicitly unsupported", map[string]any{ExtraKeyResponsesSupported: false}, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := ShouldUseResponsesAPI(tc.extra) + if got != tc.want { + t.Errorf("ShouldUseResponsesAPI(%v) = %v, want %v", tc.extra, got, tc.want) + } + }) + } +} diff --git a/backend/internal/service/account_test_service.go b/backend/internal/service/account_test_service.go index 391e7475..572a12b1 100644 --- a/backend/internal/service/account_test_service.go +++ b/backend/internal/service/account_test_service.go @@ -21,6 +21,7 @@ import ( "github.com/Wei-Shaw/sub2api/internal/pkg/claude" "github.com/Wei-Shaw/sub2api/internal/pkg/geminicli" "github.com/Wei-Shaw/sub2api/internal/pkg/openai" + "github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat" "github.com/Wei-Shaw/sub2api/internal/util/urlvalidator" "github.com/gin-gonic/gin" "github.com/google/uuid" @@ -554,6 +555,15 @@ func (s *AccountTestService) testOpenAIAccountConnection(c *gin.Context, account if err != nil { return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error())) } + // 账号已被探测为不支持 Responses(如 DeepSeek/Kimi 等)时,丢出明确提示。 + // 账号本身可用(网关会走 CC 直转),仅测试入口需要补齐 CC SSE 处理逻辑。 + // TODO:实现 CC 格式的账号测试路径(需专门的 CC SSE handler)。 + if !openai_compat.ShouldUseResponsesAPI(account.Extra) { + return s.sendErrorAndEnd(c, + "账号已被探测为不支持 OpenAI Responses API(如 DeepSeek/Kimi 等三方兼容上游),"+ + "账号本身可正常使用,但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。", + ) + } apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses" } else { return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type)) diff --git a/backend/internal/service/openai_apikey_responses_probe.go b/backend/internal/service/openai_apikey_responses_probe.go new file mode 100644 index 00000000..1bddcf50 --- /dev/null +++ b/backend/internal/service/openai_apikey_responses_probe.go @@ -0,0 +1,149 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/pkg/openai" + "github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat" +) + +// openaiResponsesProbeTimeout 是探测请求的超时时长。 +// 探测必须快速失败——超时不应阻塞账号创建/更新流程。 +const openaiResponsesProbeTimeout = 8 * time.Second + +// openaiResponsesProbePayload 是探测使用的最小 Responses 请求体。 +// 仅作能力探测,不期望响应内容质量;Stream=false 减少 SSE 解析开销。 +// +// 注意:探测的目标是区分"端点存在"与"端点不存在"——只要上游返回非 404 的 +// 4xx/5xx(如 400 invalid_request_error / 401 unauthorized / 422 等), +// 都视为"端点存在 → 支持 Responses"。仅 404 / 405 视为"端点不存在"。 +func openaiResponsesProbePayload(modelID string) []byte { + if strings.TrimSpace(modelID) == "" { + modelID = openai.DefaultTestModel + } + body, _ := json.Marshal(map[string]any{ + "model": modelID, + "input": []map[string]any{ + { + "role": "user", + "content": []map[string]any{ + {"type": "input_text", "text": "hi"}, + }, + }, + }, + "instructions": openai.DefaultInstructions, + "stream": false, + }) + return body +} + +// ProbeOpenAIAPIKeyResponsesSupport 探测 OpenAI APIKey 账号上游是否支持 +// /v1/responses 端点,并将结果持久化到 accounts.extra.openai_responses_supported。 +// +// 调用时机:账号创建/更新后,且仅当 platform=openai && type=apikey 时。 +// +// 探测策略(参见包文档 internal/pkg/openai_compat): +// - 上游 404 / 405 → 不支持,写 false +// - 上游 2xx / 其他 4xx(401/422/400 等)/ 5xx → 支持,写 true +// - 网络层失败(连接错误、超时)→ 不写标记,保持 unknown +// (后续请求仍按"现状即证据"默认走 Responses) +// +// 该方法是幂等的:重复调用会以最新探测结果覆盖标记。 +// +// 关于失败处理:探测本身的失败不应阻塞账号创建——账号能创建/更新成功就够了, +// 探测结果只影响后续路由优化。所有错误都仅记录日志,不向调用方传播。 +func (s *AccountTestService) ProbeOpenAIAPIKeyResponsesSupport(ctx context.Context, accountID int64) { + account, err := s.accountRepo.GetByID(ctx, accountID) + if err != nil { + logger.LegacyPrintf("service.openai_probe", "probe_load_account_failed: account_id=%d err=%v", accountID, err) + return + } + if account.Platform != PlatformOpenAI || account.Type != AccountTypeAPIKey { + // 仅 OpenAI APIKey 账号需要探测;其他账号类型无能力差异。 + return + } + + apiKey := account.GetOpenAIApiKey() + if apiKey == "" { + logger.LegacyPrintf("service.openai_probe", "probe_skip_no_apikey: account_id=%d", accountID) + return + } + baseURL := account.GetOpenAIBaseURL() + if baseURL == "" { + baseURL = "https://api.openai.com" + } + normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL) + if err != nil { + logger.LegacyPrintf("service.openai_probe", "probe_invalid_baseurl: account_id=%d base_url=%q err=%v", accountID, baseURL, err) + return + } + + probeURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/responses" + + probeCtx, cancel := context.WithTimeout(ctx, openaiResponsesProbeTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(probeCtx, http.MethodPost, probeURL, bytes.NewReader(openaiResponsesProbePayload(""))) + if err != nil { + logger.LegacyPrintf("service.openai_probe", "probe_build_request_failed: account_id=%d err=%v", accountID, err) + return + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Accept", "application/json") + + proxyURL := "" + if account.ProxyID != nil && account.Proxy != nil { + proxyURL = account.Proxy.URL() + } + + resp, err := s.httpUpstream.DoWithTLS(req, proxyURL, account.ID, account.Concurrency, s.tlsFPProfileService.ResolveTLSProfile(account)) + if err != nil { + // 网络层失败:不写标记,保持 unknown,下次重试或由网关 fallback 处理 + logger.LegacyPrintf("service.openai_probe", "probe_request_failed: account_id=%d url=%s err=%v", accountID, probeURL, err) + return + } + defer func() { + _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1<<20)) + _ = resp.Body.Close() + }() + + supported := isResponsesEndpointSupportedByStatus(resp.StatusCode) + + if err := s.accountRepo.UpdateExtra(ctx, accountID, map[string]any{ + openai_compat.ExtraKeyResponsesSupported: supported, + }); err != nil { + logger.LegacyPrintf("service.openai_probe", "probe_persist_failed: account_id=%d supported=%v err=%v", accountID, supported, err) + return + } + + logger.LegacyPrintf("service.openai_probe", + "probe_done: account_id=%d base_url=%s status=%d supported=%v", + accountID, normalizedBaseURL, resp.StatusCode, supported, + ) +} + +// isResponsesEndpointSupportedByStatus 根据探测响应的 HTTP 状态码判定上游 +// 是否暴露 /v1/responses 端点。 +// +// 关键观察:第三方 OpenAI 兼容上游(DeepSeek/Kimi 等)对未知端点统一返回 404 +// 或 405;而 OpenAI 官方/有 Responses 实现的上游会因为请求体最简(缺字段) +// 返回 400/422 等业务错误,但端点本身存在。 +// +// 因此:仅 404 和 405 视为"端点不存在",其他 status 视为"端点存在"。 +// +// 5xx 也视为"端点存在"——上游偶发故障不应误判为不支持。 +func isResponsesEndpointSupportedByStatus(status int) bool { + switch status { + case http.StatusNotFound, http.StatusMethodNotAllowed: + return false + } + return true +} diff --git a/backend/internal/service/openai_gateway_chat_completions.go b/backend/internal/service/openai_gateway_chat_completions.go index 5822ae4c..f88a9d46 100644 --- a/backend/internal/service/openai_gateway_chat_completions.go +++ b/backend/internal/service/openai_gateway_chat_completions.go @@ -14,6 +14,7 @@ import ( "github.com/Wei-Shaw/sub2api/internal/pkg/apicompat" "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat" "github.com/Wei-Shaw/sub2api/internal/util/responseheaders" "github.com/gin-gonic/gin" "github.com/tidwall/gjson" @@ -39,9 +40,18 @@ var cursorResponsesUnsupportedFields = []string{ // ForwardAsChatCompletions accepts a Chat Completions request body, converts it // to OpenAI Responses API format, forwards to the OpenAI upstream, and converts -// the response back to Chat Completions format. All account types (OAuth and API -// Key) go through the Responses API conversion path since the upstream only -// exposes the /v1/responses endpoint. +// the response back to Chat Completions format. +// +// 历史背景:该函数原本对所有 OpenAI 账号无差别走 CC→Responses 转换 + /v1/responses +// 端点——这在 OAuth(ChatGPT 内部 API 仅支持 Responses)和官方 APIKey 账号上是 +// 正确的,但 sub2api 接入 DeepSeek/Kimi/GLM 等第三方 OpenAI 兼容上游后假设破裂: +// 这些上游普遍只支持 /v1/chat/completions,无 /v1/responses 端点。 +// +// 当前路由策略(基于账号探测标记,详见 openai_compat.ShouldUseResponsesAPI): +// - APIKey 账号 + 探测确认不支持 Responses → 走 forwardAsRawChatCompletions +// 直转上游 /v1/chat/completions,不做协议转换 +// - 其他所有情况(OAuth、APIKey 探测确认支持、未探测)→ 走原有 CC→Responses +// 转换路径(保留旧行为,存量未探测账号零兼容破坏) func (s *OpenAIGatewayService) ForwardAsChatCompletions( ctx context.Context, c *gin.Context, @@ -50,6 +60,12 @@ func (s *OpenAIGatewayService) ForwardAsChatCompletions( promptCacheKey string, defaultMappedModel string, ) (*OpenAIForwardResult, error) { + // 入口分流:APIKey 账号 + 已探测且确认上游不支持 Responses,走 CC 直转。 + // 标记缺失(未探测)按"现状即证据"原则继续走下方原 Responses 转换路径。 + if account.Type == AccountTypeAPIKey && !openai_compat.ShouldUseResponsesAPI(account.Extra) { + return s.forwardAsRawChatCompletions(ctx, c, account, body, defaultMappedModel) + } + startTime := time.Now() // 1. Parse Chat Completions request diff --git a/backend/internal/service/openai_gateway_chat_completions_raw.go b/backend/internal/service/openai_gateway_chat_completions_raw.go new file mode 100644 index 00000000..e9a1a0b1 --- /dev/null +++ b/backend/internal/service/openai_gateway_chat_completions_raw.go @@ -0,0 +1,368 @@ +package service + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/apicompat" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/util/responseheaders" + "github.com/gin-gonic/gin" + "github.com/tidwall/gjson" + "go.uber.org/zap" +) + +// forwardAsRawChatCompletions 直转客户端的 Chat Completions 请求到上游 +// `{base_url}/v1/chat/completions`,**不**做 CC↔Responses 协议转换。 +// +// 适用场景:account.platform=openai && account.type=apikey && 上游已被探测确认 +// 不支持 /v1/responses 端点(如 DeepSeek/Kimi/GLM/Qwen 等第三方 OpenAI 兼容上游)。 +// +// 与 ForwardAsChatCompletions 的关键差异: +// +// - 不调用 apicompat.ChatCompletionsToResponses,body 仅做模型 ID 改写 +// - 上游 URL 拼到 /v1/chat/completions 而非 /v1/responses +// - 流式响应 SSE 直接透传给客户端(上游 chunk 已是 CC 格式) +// - 非流式响应 JSON 直接透传,仅按需提取 usage +// - 不应用 codex OAuth transform(APIKey 路径无 OAuth) +// - 不注入 prompt_cache_key(OAuth 专属机制) +// +// 调用入口:openai_gateway_chat_completions.go::ForwardAsChatCompletions +// 在函数顶部按 openai_compat.ShouldUseResponsesAPI 分流。 +func (s *OpenAIGatewayService) forwardAsRawChatCompletions( + ctx context.Context, + c *gin.Context, + account *Account, + body []byte, + defaultMappedModel string, +) (*OpenAIForwardResult, error) { + startTime := time.Now() + + // 1. Parse minimal fields needed for routing/billing + originalModel := gjson.GetBytes(body, "model").String() + if originalModel == "" { + writeChatCompletionsError(c, http.StatusBadRequest, "invalid_request_error", "model is required") + return nil, fmt.Errorf("missing model in request") + } + clientStream := gjson.GetBytes(body, "stream").Bool() + includeUsage := gjson.GetBytes(body, "stream_options.include_usage").Bool() + + // 2. Resolve model mapping (same as ForwardAsChatCompletions) + billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel) + upstreamModel := normalizeOpenAIModelForUpstream(account, billingModel) + + // 3. Rewrite model in body (no protocol conversion) + upstreamBody := body + if upstreamModel != originalModel { + upstreamBody = ReplaceModelInBody(body, upstreamModel) + } + + // 4. Apply OpenAI fast policy on the CC body + updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, upstreamBody) + if policyErr != nil { + var blocked *OpenAIFastBlockedError + if errors.As(policyErr, &blocked) { + writeChatCompletionsError(c, http.StatusForbidden, "permission_error", blocked.Message) + } + return nil, policyErr + } + upstreamBody = updatedBody + + logger.L().Debug("openai chat_completions raw: forwarding without protocol conversion", + zap.Int64("account_id", account.ID), + zap.String("original_model", originalModel), + zap.String("billing_model", billingModel), + zap.String("upstream_model", upstreamModel), + zap.Bool("stream", clientStream), + ) + + // 5. Build upstream request + apiKey := account.GetOpenAIApiKey() + if apiKey == "" { + return nil, fmt.Errorf("account %d missing api_key", account.ID) + } + baseURL := account.GetOpenAIBaseURL() + if baseURL == "" { + baseURL = "https://api.openai.com" + } + validatedURL, err := s.validateUpstreamBaseURL(baseURL) + if err != nil { + return nil, fmt.Errorf("invalid base_url: %w", err) + } + targetURL := buildOpenAIChatCompletionsURL(validatedURL) + + upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(upstreamBody)) + if err != nil { + return nil, fmt.Errorf("build upstream request: %w", err) + } + upstreamReq.Header.Set("Content-Type", "application/json") + upstreamReq.Header.Set("Authorization", "Bearer "+apiKey) + if clientStream { + upstreamReq.Header.Set("Accept", "text/event-stream") + } else { + upstreamReq.Header.Set("Accept", "application/json") + } + + // Whitelist passthrough headers (subset of openaiAllowedHeaders relevant to CC). + for key, values := range c.Request.Header { + lowerKey := strings.ToLower(key) + if openaiAllowedHeaders[lowerKey] { + for _, v := range values { + upstreamReq.Header.Add(key, v) + } + } + } + customUA := account.GetOpenAIUserAgent() + if customUA != "" { + upstreamReq.Header.Set("user-agent", customUA) + } + + // 6. Send request + proxyURL := "" + if account.Proxy != nil { + proxyURL = account.Proxy.URL() + } + resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency) + if err != nil { + safeErr := sanitizeUpstreamErrorMessage(err.Error()) + setOpsUpstreamError(c, 0, safeErr, "") + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: 0, + Kind: "request_error", + Message: safeErr, + }) + writeChatCompletionsError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed") + return nil, fmt.Errorf("upstream request failed: %s", safeErr) + } + defer func() { _ = resp.Body.Close() }() + + // 7. Handle error response with failover + if resp.StatusCode >= 400 { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) + _ = resp.Body.Close() + resp.Body = io.NopCloser(bytes.NewReader(respBody)) + + upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody)) + upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg) + if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) { + upstreamDetail := "" + if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody { + maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes + if maxBytes <= 0 { + maxBytes = 2048 + } + upstreamDetail = truncateString(string(respBody), maxBytes) + } + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: resp.Header.Get("x-request-id"), + Kind: "failover", + Message: upstreamMsg, + Detail: upstreamDetail, + }) + if s.rateLimitService != nil { + s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) + } + return nil, &UpstreamFailoverError{ + StatusCode: resp.StatusCode, + ResponseBody: respBody, + RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)), + } + } + return s.handleChatCompletionsErrorResponse(resp, c, account) + } + + // 8. Forward response + if clientStream { + return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, includeUsage, startTime) + } + return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime) +} + +// streamRawChatCompletions 透传上游 CC SSE 流到客户端,并提取 usage(包括 +// 末尾 [DONE] 之前的 chunk 中的 usage 字段,按 OpenAI CC 协议)。 +func (s *OpenAIGatewayService) streamRawChatCompletions( + c *gin.Context, + resp *http.Response, + originalModel string, + billingModel string, + upstreamModel string, + includeUsage bool, + startTime time.Time, +) (*OpenAIForwardResult, error) { + requestID := resp.Header.Get("x-request-id") + + if s.responseHeaderFilter != nil { + responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter) + } + c.Writer.Header().Set("Content-Type", "text/event-stream") + c.Writer.Header().Set("Cache-Control", "no-cache") + c.Writer.Header().Set("Connection", "keep-alive") + c.Writer.Header().Set("X-Accel-Buffering", "no") + c.Writer.WriteHeader(http.StatusOK) + + scanner := bufio.NewScanner(resp.Body) + maxLineSize := defaultMaxLineSize + if s.cfg != nil && s.cfg.Gateway.MaxLineSize > 0 { + maxLineSize = s.cfg.Gateway.MaxLineSize + } + scanner.Buffer(make([]byte, 0, 64*1024), maxLineSize) + + var usage OpenAIUsage + var firstTokenMs *int + + for scanner.Scan() { + line := scanner.Text() + // Direct passthrough: write each line + blank line separator + if _, werr := c.Writer.WriteString(line + "\n"); werr != nil { + logger.L().Debug("openai chat_completions raw: client write failed", + zap.Error(werr), + zap.String("request_id", requestID), + ) + break + } + if line == "" { + c.Writer.Flush() + continue + } + c.Writer.Flush() + + // Track first token timing on first non-empty data line + if firstTokenMs == nil && strings.HasPrefix(line, "data: ") && line != "data: [DONE]" { + elapsed := int(time.Since(startTime).Milliseconds()) + firstTokenMs = &elapsed + } + + // Extract usage from any chunk that carries it (CC streams typically put + // usage in the final chunk before [DONE], but may also appear elsewhere). + if strings.HasPrefix(line, "data: ") && line != "data: [DONE]" { + payload := line[6:] + if u := extractCCStreamUsage(payload); u != nil { + usage = *u + } + } + } + + if err := scanner.Err(); err != nil { + if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { + logger.L().Warn("openai chat_completions raw: stream read error", + zap.Error(err), + zap.String("request_id", requestID), + ) + } + } + + _ = includeUsage // CC 协议下 usage 是否包含由客户端的 stream_options 决定,上游会自行处理;我们仅做提取。 + + return &OpenAIForwardResult{ + RequestID: requestID, + Usage: usage, + Model: originalModel, + BillingModel: billingModel, + UpstreamModel: upstreamModel, + Stream: true, + Duration: time.Since(startTime), + FirstTokenMs: firstTokenMs, + }, nil +} + +// bufferRawChatCompletions 透传上游 CC 非流式 JSON 响应。 +func (s *OpenAIGatewayService) bufferRawChatCompletions( + c *gin.Context, + resp *http.Response, + originalModel string, + billingModel string, + upstreamModel string, + startTime time.Time, +) (*OpenAIForwardResult, error) { + requestID := resp.Header.Get("x-request-id") + + respBody, err := io.ReadAll(io.LimitReader(resp.Body, 32<<20)) + if err != nil { + writeChatCompletionsError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response") + return nil, fmt.Errorf("read upstream body: %w", err) + } + + var ccResp apicompat.ChatCompletionsResponse + var usage OpenAIUsage + if err := json.Unmarshal(respBody, &ccResp); err == nil && ccResp.Usage != nil { + usage = OpenAIUsage{ + InputTokens: ccResp.Usage.PromptTokens, + OutputTokens: ccResp.Usage.CompletionTokens, + } + if ccResp.Usage.PromptTokensDetails != nil { + usage.CacheReadInputTokens = ccResp.Usage.PromptTokensDetails.CachedTokens + } + } + + if s.responseHeaderFilter != nil { + responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter) + } + if ct := resp.Header.Get("Content-Type"); ct != "" { + c.Writer.Header().Set("Content-Type", ct) + } else { + c.Writer.Header().Set("Content-Type", "application/json") + } + c.Writer.WriteHeader(http.StatusOK) + _, _ = c.Writer.Write(respBody) + + return &OpenAIForwardResult{ + RequestID: requestID, + Usage: usage, + Model: originalModel, + BillingModel: billingModel, + UpstreamModel: upstreamModel, + Stream: false, + Duration: time.Since(startTime), + }, nil +} + +// extractCCStreamUsage 从单个 CC 流式 chunk 的 payload 中提取 usage 字段。 +// CC 协议中 usage 仅出现在末尾 chunk(且仅当客户端请求 stream_options.include_usage +// 时),但上游可能在多个 chunk 中重复——总是用最新值。 +func extractCCStreamUsage(payload string) *OpenAIUsage { + usageResult := gjson.Get(payload, "usage") + if !usageResult.Exists() || !usageResult.IsObject() { + return nil + } + u := OpenAIUsage{ + InputTokens: int(gjson.Get(payload, "usage.prompt_tokens").Int()), + OutputTokens: int(gjson.Get(payload, "usage.completion_tokens").Int()), + } + if cached := gjson.Get(payload, "usage.prompt_tokens_details.cached_tokens"); cached.Exists() { + u.CacheReadInputTokens = int(cached.Int()) + } + return &u +} + +// buildOpenAIChatCompletionsURL 拼接上游 Chat Completions 端点 URL。 +// +// - base 已是 /chat/completions:原样返回 +// - base 以 /v1 结尾:追加 /chat/completions +// - 其他情况:追加 /v1/chat/completions +// +// 与 buildOpenAIResponsesURL 是姐妹函数。 +func buildOpenAIChatCompletionsURL(base string) string { + normalized := strings.TrimRight(strings.TrimSpace(base), "/") + if strings.HasSuffix(normalized, "/chat/completions") { + return normalized + } + if strings.HasSuffix(normalized, "/v1") { + return normalized + "/chat/completions" + } + return normalized + "/v1/chat/completions" +} From 4d145300c3b7e810d5736ea73ffd6eabd2c24ff2 Mon Sep 17 00:00:00 2001 From: alfadb-bot Date: Thu, 30 Apr 2026 20:16:44 +0800 Subject: [PATCH 04/28] fixup! fix(openai-gateway): route APIKey accounts to /v1/chat/completions when upstream lacks Responses API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address self-review findings: R7: Use a narrow per-trust-domain header allowlist for CC raw forwarding. The previously reused openaiAllowedHeaders contains Codex client-only headers (originator/session_id/x-codex-turn-state/x-codex-turn-metadata/conversation_id) that must not leak to third-party OpenAI-compatible upstreams (DeepSeek/Kimi/ GLM/Qwen). Strict upstreams may 400 with 'unknown parameter'; lenient ones silently pollute their request statistics. New openaiCCRawAllowedHeaders only allows generic HTTP headers (accept-language, user-agent); content-type/ authorization/accept are set explicitly by callers. R4: Drop the dead includeUsage parameter from streamRawChatCompletions. The CC pass-through path doesn't need to inspect the client's stream_options flag — the upstream handles it and we only extract usage when it appears in chunks. Killing the unused parameter removes a misleading 'parameter read but discarded' code smell. Sediment refs: - pensieve/short-term/maxims/dont-reuse-shared-headers-whitelist-across-different-upstream-trust-domains - pensieve/short-term/knowledge/openai-gateway-shared-state-quirks - pensieve/short-term/pipelines/run-when-self-reviewing-forwarder-implementation --- .../openai_gateway_chat_completions_raw.go | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/backend/internal/service/openai_gateway_chat_completions_raw.go b/backend/internal/service/openai_gateway_chat_completions_raw.go index e9a1a0b1..6c7eec41 100644 --- a/backend/internal/service/openai_gateway_chat_completions_raw.go +++ b/backend/internal/service/openai_gateway_chat_completions_raw.go @@ -20,6 +20,25 @@ import ( "go.uber.org/zap" ) +// openaiCCRawAllowedHeaders 是 CC 直转路径专用的客户端 header 透传白名单。 +// +// **关键**:不能复用 openaiAllowedHeaders——后者含 Codex 客户端专属 header +// (originator / session_id / x-codex-turn-state / x-codex-turn-metadata / conversation_id), +// 这些在 ChatGPT OAuth 上游是必需的,但透传给 DeepSeek/Kimi/GLM 等第三方 +// OpenAI 兼容上游会造成: +// - 完全忽略(多数友好厂商)——隐性污染上游统计 +// - 400 "unknown parameter"(严格上游)——可见错误 +// +// 这里仅放行通用 HTTP header;content-type / authorization / accept 由上下文 +// 显式设置,不依赖透传。 +// +// 参见决策记录: +// pensieve/short-term/maxims/dont-reuse-shared-headers-whitelist-across-different-upstream-trust-domains +var openaiCCRawAllowedHeaders = map[string]bool{ + "accept-language": true, + "user-agent": true, +} + // forwardAsRawChatCompletions 直转客户端的 Chat Completions 请求到上游 // `{base_url}/v1/chat/completions`,**不**做 CC↔Responses 协议转换。 // @@ -53,7 +72,6 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions( return nil, fmt.Errorf("missing model in request") } clientStream := gjson.GetBytes(body, "stream").Bool() - includeUsage := gjson.GetBytes(body, "stream_options.include_usage").Bool() // 2. Resolve model mapping (same as ForwardAsChatCompletions) billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel) @@ -111,10 +129,10 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions( upstreamReq.Header.Set("Accept", "application/json") } - // Whitelist passthrough headers (subset of openaiAllowedHeaders relevant to CC). + // 透传白名单中的客户端 header。详见 openaiCCRawAllowedHeaders 的设计说明。 for key, values := range c.Request.Header { lowerKey := strings.ToLower(key) - if openaiAllowedHeaders[lowerKey] { + if openaiCCRawAllowedHeaders[lowerKey] { for _, v := range values { upstreamReq.Header.Add(key, v) } @@ -188,20 +206,23 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions( // 8. Forward response if clientStream { - return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, includeUsage, startTime) + return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime) } return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime) } // streamRawChatCompletions 透传上游 CC SSE 流到客户端,并提取 usage(包括 // 末尾 [DONE] 之前的 chunk 中的 usage 字段,按 OpenAI CC 协议)。 +// +// usage 字段仅在客户端请求 stream_options.include_usage=true 时出现于上游响应中。 +// 本函数不检查客户端的请求 flag——上游会自行处理,我们仅在上游响应 +// chunk 中出现 usage 时提取。 func (s *OpenAIGatewayService) streamRawChatCompletions( c *gin.Context, resp *http.Response, originalModel string, billingModel string, upstreamModel string, - includeUsage bool, startTime time.Time, ) (*OpenAIForwardResult, error) { requestID := resp.Header.Get("x-request-id") @@ -266,8 +287,6 @@ func (s *OpenAIGatewayService) streamRawChatCompletions( } } - _ = includeUsage // CC 协议下 usage 是否包含由客户端的 stream_options 决定,上游会自行处理;我们仅做提取。 - return &OpenAIForwardResult{ RequestID: requestID, Usage: usage, From adf01ac8804c6acceba1763b836706c027097aef Mon Sep 17 00:00:00 2001 From: alfadb Date: Thu, 30 Apr 2026 21:46:46 +0800 Subject: [PATCH 05/28] =?UTF-8?q?fix(openai-gateway):=20address=20PR=20rev?= =?UTF-8?q?iew=20=E2=80=94=20probe=20URL=20/v1=20prefix,=20Create=20trigge?= =?UTF-8?q?r,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix four issues flagged by copilot-pull-request-reviewer on PR #2143: 1. Probe URL missing /v1 prefix (openai_apikey_responses_probe.go) Replaced bare TrimSuffix + "/responses" with buildOpenAIResponsesURL(), which handles bare domain → /v1/responses correctly. Affected: - ProbeOpenAIAPIKeyResponsesSupport (probe URL) - TestAccount endpoint (apiURL for APIKey accounts) 2. Create endpoint not triggering probe (account_handler.go) Capture created account from idempotent closure and call scheduleOpenAIResponsesProbe after success, same pattern as BatchCreate and Update. 3. Tests (openai_gateway_chat_completions_raw_test.go) Added TestBuildOpenAIChatCompletionsURL (7 cases covering bare domain, /v1 suffix, trailing slash, third-party domains, whitespace) and TestBuildOpenAIResponsesURL_ProbeURL (6 cases locking the probe URL construction for bare-domain inputs). All unit tests pass; go build ./cmd/server/ clean. --- .../internal/handler/admin/account_handler.go | 8 +++ .../internal/service/account_test_service.go | 2 +- .../service/openai_apikey_responses_probe.go | 2 +- ...penai_gateway_chat_completions_raw_test.go | 67 +++++++++++++++++++ 4 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 backend/internal/service/openai_gateway_chat_completions_raw_test.go diff --git a/backend/internal/handler/admin/account_handler.go b/backend/internal/handler/admin/account_handler.go index c93fff7b..ffab74d6 100644 --- a/backend/internal/handler/admin/account_handler.go +++ b/backend/internal/handler/admin/account_handler.go @@ -528,6 +528,10 @@ func (h *AccountHandler) Create(c *gin.Context) { // 确定是否跳过混合渠道检查 skipCheck := req.ConfirmMixedChannelRisk != nil && *req.ConfirmMixedChannelRisk + // 捕获闭包内创建的账号引用,用于创建成功后触发异步探测。 + // 幂等重放时闭包不会执行 → createdAccount 为 nil → 不重复调度。 + var createdAccount *service.Account + result, err := executeAdminIdempotent(c, "admin.accounts.create", req, service.DefaultWriteIdempotencyTTL(), func(ctx context.Context) (any, error) { account, execErr := h.adminService.CreateAccount(ctx, &service.CreateAccountInput{ Name: req.Name, @@ -549,6 +553,7 @@ func (h *AccountHandler) Create(c *gin.Context) { if execErr != nil { return nil, execErr } + createdAccount = account // Antigravity OAuth: 新账号直接设置隐私 h.adminService.ForceAntigravityPrivacy(ctx, account) // OpenAI OAuth: 新账号直接设置隐私 @@ -577,6 +582,9 @@ func (h *AccountHandler) Create(c *gin.Context) { if result != nil && result.Replayed { c.Header("X-Idempotency-Replayed", "true") } + // OpenAI APIKey 账号创建后异步探测上游 /v1/responses 能力。 + // 探测失败不影响账号创建响应。 + h.scheduleOpenAIResponsesProbe(createdAccount) response.Success(c, result.Data) } diff --git a/backend/internal/service/account_test_service.go b/backend/internal/service/account_test_service.go index 572a12b1..ddb4343a 100644 --- a/backend/internal/service/account_test_service.go +++ b/backend/internal/service/account_test_service.go @@ -564,7 +564,7 @@ func (s *AccountTestService) testOpenAIAccountConnection(c *gin.Context, account "账号本身可正常使用,但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。", ) } - apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses" + apiURL = buildOpenAIResponsesURL(normalizedBaseURL) } else { return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type)) } diff --git a/backend/internal/service/openai_apikey_responses_probe.go b/backend/internal/service/openai_apikey_responses_probe.go index 1bddcf50..a4eb9252 100644 --- a/backend/internal/service/openai_apikey_responses_probe.go +++ b/backend/internal/service/openai_apikey_responses_probe.go @@ -85,7 +85,7 @@ func (s *AccountTestService) ProbeOpenAIAPIKeyResponsesSupport(ctx context.Conte return } - probeURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/responses" + probeURL := buildOpenAIResponsesURL(normalizedBaseURL) probeCtx, cancel := context.WithTimeout(ctx, openaiResponsesProbeTimeout) defer cancel() diff --git a/backend/internal/service/openai_gateway_chat_completions_raw_test.go b/backend/internal/service/openai_gateway_chat_completions_raw_test.go new file mode 100644 index 00000000..01013837 --- /dev/null +++ b/backend/internal/service/openai_gateway_chat_completions_raw_test.go @@ -0,0 +1,67 @@ +//go:build unit + +package service + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBuildOpenAIChatCompletionsURL(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + base string + want string + }{ + // 已是 /chat/completions:原样返回 + {"already chat/completions", "https://api.openai.com/v1/chat/completions", "https://api.openai.com/v1/chat/completions"}, + // 以 /v1 结尾:追加 /chat/completions + {"bare /v1", "https://api.openai.com/v1", "https://api.openai.com/v1/chat/completions"}, + // 其他情况:追加 /v1/chat/completions + {"bare domain", "https://api.openai.com", "https://api.openai.com/v1/chat/completions"}, + {"domain with trailing slash", "https://api.openai.com/", "https://api.openai.com/v1/chat/completions"}, + // 第三方上游常见形式 + {"third-party bare domain", "https://api.deepseek.com", "https://api.deepseek.com/v1/chat/completions"}, + {"third-party with path prefix", "https://api.gptgod.online/api", "https://api.gptgod.online/api/v1/chat/completions"}, + // 带空白字符 + {"whitespace trimmed", " https://api.openai.com/v1 ", "https://api.openai.com/v1/chat/completions"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := buildOpenAIChatCompletionsURL(tt.base) + require.Equal(t, tt.want, got) + }) + } +} + +// TestBuildOpenAIResponsesURL_ProbeURL 锁定 probe/测试端点使用的 URL 构建逻辑, +// 确保 buildOpenAIResponsesURL 对标准 OpenAI base_url 格式均拼出 `/v1/responses`。 +func TestBuildOpenAIResponsesURL_ProbeURL(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + base string + want string + }{ + {"bare domain", "https://api.openai.com", "https://api.openai.com/v1/responses"}, + {"domain trailing slash", "https://api.openai.com/", "https://api.openai.com/v1/responses"}, + {"bare /v1", "https://api.openai.com/v1", "https://api.openai.com/v1/responses"}, + {"already /responses", "https://api.openai.com/v1/responses", "https://api.openai.com/v1/responses"}, + {"third-party bare domain", "https://api.deepseek.com", "https://api.deepseek.com/v1/responses"}, + {"only domain, no scheme", "api.gptgod.online", "api.gptgod.online/v1/responses"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := buildOpenAIResponsesURL(tt.base) + require.Equal(t, tt.want, got) + }) + } +} From 57099a6af6d89bbd995be6d7c8b04f79932de6a8 Mon Sep 17 00:00:00 2001 From: alfadb Date: Sat, 2 May 2026 10:22:16 +0800 Subject: [PATCH 06/28] fix(openai-gateway): extract reasoning_effort in raw Chat Completions path The forwardAsRawChatCompletions path (used when APIKey accounts target upstreams that don't support Responses API, e.g. DeepSeek) was missing reasoning_effort and service_tier extraction, causing all reasoning effort values to be silently dropped. Extract both from the raw Chat Completions body before forwarding, and propagate them through streamRawChatCompletions / bufferRawChatCompletions to the OpenAIForwardResult. --- .../openai_gateway_chat_completions_raw.go | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/backend/internal/service/openai_gateway_chat_completions_raw.go b/backend/internal/service/openai_gateway_chat_completions_raw.go index 6c7eec41..9535395f 100644 --- a/backend/internal/service/openai_gateway_chat_completions_raw.go +++ b/backend/internal/service/openai_gateway_chat_completions_raw.go @@ -73,6 +73,10 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions( } clientStream := gjson.GetBytes(body, "stream").Bool() + // 1b. Extract reasoning effort and service tier from the raw body before any transformation. + reasoningEffort := extractOpenAIReasoningEffortFromBody(body, originalModel) + serviceTier := extractOpenAIServiceTierFromBody(body) + // 2. Resolve model mapping (same as ForwardAsChatCompletions) billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel) upstreamModel := normalizeOpenAIModelForUpstream(account, billingModel) @@ -206,9 +210,9 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions( // 8. Forward response if clientStream { - return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime) + return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, reasoningEffort, serviceTier, startTime) } - return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime) + return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, reasoningEffort, serviceTier, startTime) } // streamRawChatCompletions 透传上游 CC SSE 流到客户端,并提取 usage(包括 @@ -223,6 +227,8 @@ func (s *OpenAIGatewayService) streamRawChatCompletions( originalModel string, billingModel string, upstreamModel string, + reasoningEffort *string, + serviceTier *string, startTime time.Time, ) (*OpenAIForwardResult, error) { requestID := resp.Header.Get("x-request-id") @@ -288,14 +294,16 @@ func (s *OpenAIGatewayService) streamRawChatCompletions( } return &OpenAIForwardResult{ - RequestID: requestID, - Usage: usage, - Model: originalModel, - BillingModel: billingModel, - UpstreamModel: upstreamModel, - Stream: true, - Duration: time.Since(startTime), - FirstTokenMs: firstTokenMs, + RequestID: requestID, + Usage: usage, + Model: originalModel, + BillingModel: billingModel, + UpstreamModel: upstreamModel, + ReasoningEffort: reasoningEffort, + ServiceTier: serviceTier, + Stream: true, + Duration: time.Since(startTime), + FirstTokenMs: firstTokenMs, }, nil } @@ -306,6 +314,8 @@ func (s *OpenAIGatewayService) bufferRawChatCompletions( originalModel string, billingModel string, upstreamModel string, + reasoningEffort *string, + serviceTier *string, startTime time.Time, ) (*OpenAIForwardResult, error) { requestID := resp.Header.Get("x-request-id") @@ -340,13 +350,15 @@ func (s *OpenAIGatewayService) bufferRawChatCompletions( _, _ = c.Writer.Write(respBody) return &OpenAIForwardResult{ - RequestID: requestID, - Usage: usage, - Model: originalModel, - BillingModel: billingModel, - UpstreamModel: upstreamModel, - Stream: false, - Duration: time.Since(startTime), + RequestID: requestID, + Usage: usage, + Model: originalModel, + BillingModel: billingModel, + UpstreamModel: upstreamModel, + ReasoningEffort: reasoningEffort, + ServiceTier: serviceTier, + Stream: false, + Duration: time.Since(startTime), }, nil } From e736de1ed91c6f08a4e2eaf55252a920f52ef3b6 Mon Sep 17 00:00:00 2001 From: alfadb Date: Sat, 2 May 2026 10:31:57 +0800 Subject: [PATCH 07/28] fix(handler): log correct upstream endpoint for raw CC path DeriveUpstreamEndpoint hard-codes /v1/responses for PlatformOpenAI, but APIKey accounts probed to not support Responses API are forwarded directly to /v1/chat/completions via forwardAsRawChatCompletions. Add resolveRawCCUpstreamEndpoint which returns /v1/chat/completions when the account's extra.openai_responses_supported is explicitly false. --- .../internal/handler/openai_chat_completions.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/backend/internal/handler/openai_chat_completions.go b/backend/internal/handler/openai_chat_completions.go index f395970a..7fd24f97 100644 --- a/backend/internal/handler/openai_chat_completions.go +++ b/backend/internal/handler/openai_chat_completions.go @@ -10,6 +10,7 @@ import ( pkghttputil "github.com/Wei-Shaw/sub2api/internal/pkg/httputil" "github.com/Wei-Shaw/sub2api/internal/pkg/ip" "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat" middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware" "github.com/Wei-Shaw/sub2api/internal/service" "github.com/gin-gonic/gin" @@ -276,7 +277,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) { Account: account, Subscription: subscription, InboundEndpoint: GetInboundEndpoint(c), - UpstreamEndpoint: GetUpstreamEndpoint(c, account.Platform), + UpstreamEndpoint: resolveRawCCUpstreamEndpoint(c, account), UserAgent: userAgent, IPAddress: clientIP, APIKeyService: h.apiKeyService, @@ -299,3 +300,16 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) { return } } + +// resolveRawCCUpstreamEndpoint returns the actual upstream endpoint for +// OpenAI Chat Completions requests. For APIKey accounts whose upstream +// has been probed to not support the Responses API, the request is +// forwarded directly to /v1/chat/completions — not through the default +// CC→Responses conversion path. +func resolveRawCCUpstreamEndpoint(c *gin.Context, account *service.Account) string { + if account != nil && account.Type == service.AccountTypeAPIKey && + !openai_compat.ShouldUseResponsesAPI(account.Extra) { + return "/v1/chat/completions" + } + return GetUpstreamEndpoint(c, account.Platform) +} From f2f6bc6c045dcb9d0f556a4a288639559d2aadc9 Mon Sep 17 00:00:00 2001 From: Derek Date: Sat, 2 May 2026 23:57:10 +0800 Subject: [PATCH 08/28] =?UTF-8?q?feat:=20Select=20=E5=92=8C=20GroupSelecto?= =?UTF-8?q?r=20=E7=BB=84=E4=BB=B6=E6=94=AF=E6=8C=81=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=90=9C=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当选项数量 > 5 时自动启用搜索过滤,无需修改任何使用处代码。 - Select.vue: searchable 默认值改为 'auto',内部自动判断 - GroupSelector.vue: 新增 searchable prop 和搜索输入框 --- .../src/components/common/GroupSelector.vue | 58 +++++++++++++++---- frontend/src/components/common/Select.vue | 15 +++-- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/frontend/src/components/common/GroupSelector.vue b/frontend/src/components/common/GroupSelector.vue index 582b6f0b..e5980ef5 100644 --- a/frontend/src/components/common/GroupSelector.vue +++ b/frontend/src/components/common/GroupSelector.vue @@ -5,7 +5,24 @@ {{ t('common.selectedCount', { count: modelValue.length }) }}
+ + +
+