Merge pull request #2873 from wucm667/feat/account-quota-threshold-auto-pause

feat(account): 支持按 5h/7d 用量阈值自动暂停账号调度
This commit is contained in:
Wesley Liddick 2026-05-29 15:40:33 +08:00 committed by GitHub
commit f68d351158
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 1150 additions and 27 deletions

View File

@ -195,7 +195,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService, tlsFingerprintProfileService, channelService, modelPricingResolver, balanceNotifyService, serviceUserPlatformQuotaRepository)
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink)
opsService := service.ProvideOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink, settingService)
encryptionKey, err := payment.ProvideEncryptionKey(configConfig)
if err != nil {
return nil, err

View File

@ -548,6 +548,17 @@ func filterSchedulerExtra(extra map[string]any) map[string]any {
"openai_ws_force_http",
"openai_responses_mode",
"openai_responses_supported",
"codex_5h_used_percent",
"codex_7d_used_percent",
"codex_5h_reset_at",
"codex_7d_reset_at",
"codex_5h_reset_after_seconds",
"codex_7d_reset_after_seconds",
"codex_usage_updated_at",
"auto_pause_5h_threshold",
"auto_pause_7d_threshold",
"auto_pause_5h_disabled",
"auto_pause_7d_disabled",
}
filtered := make(map[string]any)
for _, key := range keys {

View File

@ -75,3 +75,36 @@ func TestBuildSchedulerMetadataAccount_KeepsSlimGroupMembership(t *testing.T) {
require.Equal(t, int64(11), got.AccountGroups[1].GroupID)
require.Nil(t, got.Groups)
}
func TestBuildSchedulerMetadataAccount_KeepsQuotaAutoPauseFields(t *testing.T) {
account := service.Account{
ID: 88,
Extra: map[string]any{
"codex_5h_used_percent": 12.34,
"codex_7d_used_percent": 56.78,
"codex_5h_reset_at": "2026-05-29T10:00:00Z",
"codex_7d_reset_at": "2026-06-01T10:00:00Z",
"codex_5h_reset_after_seconds": 300,
"codex_7d_reset_after_seconds": 600,
"codex_usage_updated_at": "2026-05-29T09:00:00Z",
"auto_pause_5h_threshold": 0.95,
"auto_pause_7d_threshold": 0.96,
"auto_pause_5h_disabled": true,
"auto_pause_7d_disabled": false,
},
}
got := buildSchedulerMetadataAccount(account)
require.Equal(t, 12.34, got.Extra["codex_5h_used_percent"])
require.Equal(t, 56.78, got.Extra["codex_7d_used_percent"])
require.Equal(t, "2026-05-29T10:00:00Z", got.Extra["codex_5h_reset_at"])
require.Equal(t, "2026-06-01T10:00:00Z", got.Extra["codex_7d_reset_at"])
require.Equal(t, 300, got.Extra["codex_5h_reset_after_seconds"])
require.Equal(t, 600, got.Extra["codex_7d_reset_after_seconds"])
require.Equal(t, "2026-05-29T09:00:00Z", got.Extra["codex_usage_updated_at"])
require.Equal(t, 0.95, got.Extra["auto_pause_5h_threshold"])
require.Equal(t, 0.96, got.Extra["auto_pause_7d_threshold"])
require.Equal(t, true, got.Extra["auto_pause_5h_disabled"])
require.Equal(t, false, got.Extra["auto_pause_7d_disabled"])
}

View File

@ -370,7 +370,6 @@ func (s *defaultOpenAIAccountScheduler) selectBySessionHash(
_ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash)
return nil, nil
}
result, acquireErr := s.service.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
if acquireErr == nil && result != nil && result.Acquired {
_ = s.service.refreshStickySessionTTL(ctx, req.GroupID, sessionHash, s.service.openAIWSSessionStickyTTL())
@ -975,6 +974,13 @@ func (s *defaultOpenAIAccountScheduler) isAccountRequestCompatible(ctx context.C
if s != nil && s.service != nil && s.service.isOpenAIAccountRuntimeBlocked(account) {
return false
}
// Quota auto-pause must be evaluated during the initial filter too. Without it the
// TopK candidate pool can be filled with paused accounts and the later fresh/DB
// rechecks won't reach healthy accounts that fell outside TopK — manifesting as
// "no available accounts" even though healthy ones exist.
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
return false
}
if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) {
return false
}
@ -1154,6 +1160,7 @@ func (s *OpenAIGatewayService) selectAccountWithScheduler(
requiredImageCapability OpenAIImagesCapability,
requireCompact bool,
) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) {
ctx = s.withOpenAIQuotaAutoPauseContext(ctx)
decision := OpenAIAccountScheduleDecision{}
scheduler := s.getOpenAIAccountScheduler(ctx)
if scheduler == nil {

View File

@ -691,6 +691,224 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_SessionStickyRateLimite
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy5hThreshold(t *testing.T) {
ctx := context.Background()
primary := Account{
ID: 35001,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 95.0,
"auto_pause_5h_threshold": 0.95,
},
}
secondary := Account{ID: 35002, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35002), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AllowsBelow5hThreshold(t *testing.T) {
ctx := context.Background()
primary := Account{
ID: 35101,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 80.0,
"auto_pause_5h_threshold": 0.95,
},
}
secondary := Account{ID: 35102, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35101), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy7dThreshold(t *testing.T) {
ctx := context.Background()
primary := Account{
ID: 35201,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_7d_used_percent": 95.0,
"auto_pause_7d_threshold": 0.95,
},
}
secondary := Account{ID: 35202, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35202), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UnconfiguredThresholdKeepsLegacyBehavior(t *testing.T) {
ctx := context.Background()
primary := Account{ID: 35301, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 0, Extra: map[string]any{"codex_5h_used_percent": 99.0, "codex_7d_used_percent": 99.0}}
secondary := Account{ID: 35302, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35301), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UsesGlobalDefaultThreshold(t *testing.T) {
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
primary := Account{
ID: 35401,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 95.0,
},
}
secondary := Account{ID: 35402, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35402), account.ID)
}
// Regression: a per-account explicit-disable flag exempts the account from auto-pause
// even when a global default threshold is set. Without this, "leave threshold blank"
// silently falls back to global default and admins have no way to whitelist a single
// account.
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerAccountDisableOverridesGlobalDefault(t *testing.T) {
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
// Account has high usage AND no per-account threshold (would normally fall back to
// the global default and get paused), but the explicit disable flag is set.
primary := Account{
ID: 35701,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 99.0,
"auto_pause_5h_disabled": true,
},
}
secondary := Account{ID: 35702, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35701), account.ID)
}
// Disable is per-window: disabling only 5h must still allow 7d auto-pause to fire.
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerWindowDisableScoped(t *testing.T) {
ctx := context.Background()
primary := Account{
ID: 35801,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 99.0,
"codex_7d_used_percent": 99.0,
"auto_pause_5h_disabled": true,
"auto_pause_7d_threshold": 0.95,
},
}
secondary := Account{ID: 35802, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35802), account.ID, "7d auto-pause must still fire even though 5h is disabled")
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_StaleUsageWindowResetSkipsPause(t *testing.T) {
ctx := context.Background()
// Usage is over threshold but the window's reset time has already passed, so the
// cached percentage is stale (the real window rolled over) and the account must NOT
// stay paused — otherwise it could be skipped forever with no traffic to refresh it.
primary := Account{
ID: 35501,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 99.0,
"auto_pause_5h_threshold": 0.95,
"codex_5h_reset_at": time.Now().Add(-time.Minute).Format(time.RFC3339),
},
}
secondary := Account{ID: 35502, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35501), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_FreshUsageWindowStillPauses(t *testing.T) {
ctx := context.Background()
// Same as above but the window has not reset yet, so the account stays paused.
primary := Account{
ID: 35601,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 99.0,
"auto_pause_5h_threshold": 0.95,
"codex_5h_reset_at": time.Now().Add(time.Hour).Format(time.RFC3339),
},
}
secondary := Account{ID: 35602, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
require.NoError(t, err)
require.NotNil(t, account)
require.Equal(t, int64(35602), account.ID)
}
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_SkipsFreshlyRateLimitedSnapshotCandidate(t *testing.T) {
ctx := context.Background()
groupID := int64(10102)
@ -1238,6 +1456,85 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKFallback
}
}
// Regression: TopK initial filter must drop quota-auto-paused accounts. Otherwise
// the candidate pool is filled with paused accounts, healthy accounts fall outside
// TopK, and the scheduler returns "no available accounts" even though healthy ones
// exist.
func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKExcludesQuotaPaused(t *testing.T) {
ctx := context.Background()
groupID := int64(110)
accounts := []Account{
{
ID: 37001,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 0,
Extra: map[string]any{
"codex_5h_used_percent": 96.0,
"auto_pause_5h_threshold": 0.95,
},
},
{
ID: 37002,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 1,
Priority: 5,
},
}
cfg := &config.Config{}
cfg.Gateway.OpenAIWS.LBTopK = 1 // TopK=1 makes the bug fatal: paused account would crowd out the healthy one entirely
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Priority = 0.4
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Load = 1.0
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Queue = 1.0
concurrencyCache := schedulerTestConcurrencyCache{
loadMap: map[int64]*AccountLoadInfo{
37001: {AccountID: 37001, LoadRate: 5, WaitingCount: 0},
37002: {AccountID: 37002, LoadRate: 5, WaitingCount: 0},
},
acquireResults: map[int64]bool{
37002: true,
},
}
svc := &OpenAIGatewayService{
accountRepo: schedulerTestOpenAIAccountRepo{accounts: accounts},
cache: &schedulerTestGatewayCache{},
cfg: cfg,
rateLimitService: newOpenAIAdvancedSchedulerRateLimitService("true"),
concurrencyService: NewConcurrencyService(concurrencyCache),
}
selection, decision, err := svc.SelectAccountWithScheduler(
ctx,
&groupID,
"",
"",
"gpt-5.1",
nil,
OpenAIUpstreamTransportAny,
false,
)
require.NoError(t, err)
require.NotNil(t, selection)
require.NotNil(t, selection.Account)
require.Equal(t, int64(37002), selection.Account.ID)
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
// Only the healthy account should ever enter the candidate pool; the paused one
// must be filtered out at the initial-filter stage.
require.Equal(t, 1, decision.CandidateCount)
if selection.ReleaseFunc != nil {
selection.ReleaseFunc()
}
}
func TestOpenAIGatewayService_OpenAIAccountSchedulerMetrics(t *testing.T) {
ctx := context.Background()
groupID := int64(12)

View File

@ -1290,7 +1290,7 @@ func (s *OpenAIGatewayService) SelectAccountForModel(ctx context.Context, groupI
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*Account, error) {
return s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
return s.selectAccountForModelWithExclusions(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
}
// noAvailableOpenAISelectionError builds the standard "no account available" error
@ -1327,6 +1327,17 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
if account == nil || !account.IsOpenAI() || !account.IsSchedulableForModelWithContext(ctx, requestedModel) {
return false
}
if paused, reason := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
// Debug level: this fires per-candidate on the scheduling hot path, so Info
// would amplify into log spam once several accounts cross the threshold.
slog.Debug("account_auto_paused_by_quota",
"account_id", account.ID,
"window", reason.window,
"threshold", reason.threshold,
"utilization", reason.utilization,
)
return false
}
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
return false
}
@ -1339,6 +1350,201 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
return true
}
type openAIQuotaAutoPauseDecision struct {
window string
threshold float64
utilization float64
}
func shouldAutoPauseOpenAIAccountByQuota(ctx context.Context, account *Account) (bool, openAIQuotaAutoPauseDecision) {
if account == nil || !account.IsOpenAI() {
return false, openAIQuotaAutoPauseDecision{}
}
// Per-account explicit-disable flags must take precedence over the global default.
// Without these, leaving the account threshold blank means "use global default",
// so an admin has no way to exempt a single account from auto-pause once a global
// default exists. The disable flag is per-window so an account can opt out of
// only 5h or only 7d auto-pause.
disabled5h := resolveAccountExtraBool(account.Extra, "auto_pause_5h_disabled")
disabled7d := resolveAccountExtraBool(account.Extra, "auto_pause_7d_disabled")
threshold5h, threshold7d := resolveOpenAIQuotaAutoPauseThresholds(ctx, account)
now := time.Now()
if !disabled5h && threshold5h > 0 {
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "5h", now); ok && utilization >= threshold5h {
return true, openAIQuotaAutoPauseDecision{window: "5h", threshold: threshold5h, utilization: utilization}
}
}
if !disabled7d && threshold7d > 0 {
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "7d", now); ok && utilization >= threshold7d {
return true, openAIQuotaAutoPauseDecision{window: "7d", threshold: threshold7d, utilization: utilization}
}
}
return false, openAIQuotaAutoPauseDecision{}
}
// resolveAccountExtraBool reads a bool-like value from account extra, tolerating
// the few shapes JSON unmarshalling may produce (real bool, "true"/"false"
// strings, 0/1 numbers).
func resolveAccountExtraBool(extra map[string]any, key string) bool {
if len(extra) == 0 {
return false
}
value, ok := extra[key]
if !ok || value == nil {
return false
}
switch v := value.(type) {
case bool:
return v
case string:
parsed, err := strconv.ParseBool(strings.TrimSpace(v))
return err == nil && parsed
case float64:
return v != 0
case float32:
return v != 0
case int:
return v != 0
case int64:
return v != 0
case json.Number:
if i, err := v.Int64(); err == nil {
return i != 0
}
}
return false
}
func resolveOpenAIQuotaAutoPauseThresholds(ctx context.Context, account *Account) (float64, float64) {
threshold5h, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_5h_threshold")
threshold7d, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_7d_threshold")
threshold5h = clamp01(threshold5h)
threshold7d = clamp01(threshold7d)
if threshold5h > 0 && threshold7d > 0 {
return threshold5h, threshold7d
}
settings := openAIQuotaAutoPauseSettingsFromContext(ctx)
if threshold5h <= 0 {
threshold5h = clamp01(settings.DefaultThreshold5h)
}
if threshold7d <= 0 {
threshold7d = clamp01(settings.DefaultThreshold7d)
}
return threshold5h, threshold7d
}
func resolveAccountExtraNumber(extra map[string]any, keys ...string) (float64, bool) {
if len(extra) == 0 {
return 0, false
}
for _, key := range keys {
value, ok := extra[key]
if !ok || value == nil {
continue
}
switch v := value.(type) {
case float64:
return v, true
case float32:
return float64(v), true
case int:
return float64(v), true
case int64:
return float64(v), true
case json.Number:
parsed, err := v.Float64()
if err == nil {
return parsed, true
}
case string:
parsed, err := strconv.ParseFloat(strings.TrimSpace(v), 64)
if err == nil {
return parsed, true
}
}
}
return 0, false
}
// resolveOpenAIQuotaUtilization returns the current utilization ratio (0..1) for the
// given Codex usage window. ok=false means there is no usable signal to pause on:
// either no snapshot exists, or the window has already rolled over so the cached
// percentage is stale. The stale guard matters because a paused account stops
// receiving requests, so its snapshot is never refreshed from upstream headers —
// without this check an old used_percent would keep the account paused forever even
// after the real window reset.
func resolveOpenAIQuotaUtilization(extra map[string]any, window string, now time.Time) (float64, bool) {
usedPercent := readOpenAIQuotaUsedPercent(extra, window)
if usedPercent <= 0 {
return 0, false
}
if openAIQuotaWindowReset(extra, window, now) {
return 0, false
}
return usedPercent / 100, true
}
// openAIQuotaWindowReset reports whether the Codex usage window's reset time has
// already passed relative to now. It prefers the absolute codex_<window>_reset_at
// timestamp and falls back to codex_<window>_reset_after_seconds anchored at
// codex_usage_updated_at, mirroring AccountUsageService's window-progress logic.
func openAIQuotaWindowReset(extra map[string]any, window string, now time.Time) bool {
if len(extra) == 0 {
return false
}
if resetAtRaw, ok := extra["codex_"+window+"_reset_at"]; ok {
if resetAt, err := parseTime(fmt.Sprint(resetAtRaw)); err == nil {
return !now.Before(resetAt)
}
}
resetAfter := parseExtraInt(extra["codex_"+window+"_reset_after_seconds"])
if resetAfter <= 0 {
return false
}
base := now
if updatedRaw, ok := extra["codex_usage_updated_at"]; ok {
if updatedAt, err := parseTime(fmt.Sprint(updatedRaw)); err == nil {
base = updatedAt
}
}
resetAt := base.Add(time.Duration(resetAfter) * time.Second)
return !now.Before(resetAt)
}
func readOpenAIQuotaUsedPercent(extra map[string]any, window string) float64 {
if len(extra) == 0 {
return 0
}
if value, ok := resolveAccountExtraNumber(extra, "codex_"+window+"_used_percent"); ok {
return value
}
return 0
}
type openAIQuotaAutoPauseCtxKey struct{}
func withOpenAIQuotaAutoPauseSettings(ctx context.Context, settings OpsOpenAIAccountQuotaAutoPauseSettings) context.Context {
if ctx == nil {
ctx = context.Background()
}
return context.WithValue(ctx, openAIQuotaAutoPauseCtxKey{}, settings)
}
func openAIQuotaAutoPauseSettingsFromContext(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
if ctx == nil {
return OpsOpenAIAccountQuotaAutoPauseSettings{}
}
settings, _ := ctx.Value(openAIQuotaAutoPauseCtxKey{}).(OpsOpenAIAccountQuotaAutoPauseSettings)
return settings
}
func (s *OpenAIGatewayService) withOpenAIQuotaAutoPauseContext(ctx context.Context) context.Context {
if s == nil || s.settingService == nil {
return ctx
}
return withOpenAIQuotaAutoPauseSettings(ctx, s.settingService.GetOpenAIQuotaAutoPauseSettings(ctx))
}
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
// compact support are tried first, followed by unknown, then explicitly unsupported.
// The relative order within each tier is preserved.
@ -1587,7 +1793,7 @@ func (s *OpenAIGatewayService) isBetterAccount(candidate, current *Account) bool
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
return s.selectAccountWithLoadAwareness(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, "")
return s.selectAccountWithLoadAwareness(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, "")
}
func (s *OpenAIGatewayService) selectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, requiredCapability OpenAIEndpointCapability) (*AccountSelectionResult, error) {

View File

@ -48,6 +48,46 @@ func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_Hit(t *testing.T
}
}
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_QuotaAutoPausedMiss(t *testing.T) {
ctx := context.Background()
groupID := int64(23)
account := Account{
ID: 77,
Platform: PlatformOpenAI,
Type: AccountTypeAPIKey,
Status: StatusActive,
Schedulable: true,
Concurrency: 2,
Extra: map[string]any{
"openai_apikey_responses_websockets_v2_enabled": true,
"codex_5h_used_percent": 96.0,
"auto_pause_5h_threshold": 0.95,
},
}
cache := &stubGatewayCache{}
store := NewOpenAIWSStateStore(cache)
cfg := newOpenAIWSV2TestConfig()
svc := &OpenAIGatewayService{
accountRepo: stubOpenAIAccountRepo{accounts: []Account{account}},
cache: cache,
cfg: cfg,
concurrencyService: NewConcurrencyService(stubConcurrencyCache{}),
openaiWSStateStore: store,
}
require.NoError(t, store.BindResponseAccount(ctx, groupID, "resp_prev_quota", account.ID, time.Hour))
selection, err := svc.SelectAccountByPreviousResponseID(ctx, &groupID, "resp_prev_quota", "gpt-5.1", nil, false)
require.NoError(t, err)
require.Nil(t, selection, "超过 5h 配额阈值的账号不应继续命中 previous_response_id 粘连")
// Auto-pause is transient, so the binding is preserved: the chain can resume on the
// same account once the quota window resets.
boundAccountID, getErr := store.GetResponseAccount(ctx, groupID, "resp_prev_quota")
require.NoError(t, getErr)
require.Equal(t, account.ID, boundAccountID)
}
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_RateLimitedMiss(t *testing.T) {
ctx := context.Background()
groupID := int64(23)

View File

@ -4060,6 +4060,13 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
if !account.SupportsOpenAIEndpointCapability(requiredCapability) {
return nil, nil
}
// Quota auto-pause must also gate the previous_response_id sticky path; otherwise an
// account over its 5h/7d threshold keeps serving the same response chain even though
// normal scheduling skips it. Pause is transient, so fall through to normal scheduling
// without deleting the binding (the window may reset before the next turn).
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
return nil, nil
}
if s.schedulerSnapshot != nil && s.accountRepo != nil {
latest, latestErr := s.accountRepo.GetByID(ctx, account.ID)
if latestErr != nil || latest == nil {
@ -4076,6 +4083,9 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
if !latest.SupportsOpenAIEndpointCapability(requiredCapability) {
return nil, nil
}
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, latest); paused {
return nil, nil
}
if s.isOpenAIAccountRuntimeBlocked(latest) {
_ = store.DeleteResponseAccount(ctx, derefGroupID(groupID), responseID)
return nil, nil

View File

@ -41,6 +41,11 @@ type OpsService struct {
// cleanupReloader 由 wire 在 OpsCleanupService 构造完成后通过 SetCleanupReloader 注入。
// 解耦避免 OpsService -> OpsCleanupService 的硬依赖cleanup 也读 settings会循环
cleanupReloader CleanupReloader
// quotaAutoPauseSink 由 wire 注入(通常是 SettingService.SetOpenAIQuotaAutoPauseSettings
// UpdateOpsAdvancedSettings 写入新配置后调用,把最新的 quota auto-pause 全局默认阈值
// 立即同步到调度热路径读取的内存缓存,避免下次请求才能感知新值。
quotaAutoPauseSink func(OpsOpenAIAccountQuotaAutoPauseSettings)
}
// CleanupReloader 由 OpsCleanupService 实现。
@ -57,6 +62,16 @@ func (s *OpsService) SetCleanupReloader(r CleanupReloader) {
s.cleanupReloader = r
}
// SetOpenAIQuotaAutoPauseSettingsSink 由 wire 注入,把最新的 quota auto-pause 全局默认
// 阈值 push 到调度热路径读取的内存缓存。同 SetCleanupReloader 的解耦目的:避免 OpsService
// 持有 *SettingService 引入循环依赖。
func (s *OpsService) SetOpenAIQuotaAutoPauseSettingsSink(sink func(OpsOpenAIAccountQuotaAutoPauseSettings)) {
if s == nil {
return
}
s.quotaAutoPauseSink = sink
}
func NewOpsService(
opsRepo OpsRepository,
settingRepo SettingRepository,

View File

@ -369,6 +369,7 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
Aggregation: OpsAggregationSettings{
AggregationEnabled: false,
},
OpenAIAccountQuotaAutoPause: OpsOpenAIAccountQuotaAutoPauseSettings{},
IgnoreCountTokensErrors: true, // count_tokens 404 是预期行为,默认忽略
IgnoreContextCanceled: true, // Default to true - client disconnects are not errors
IgnoreNoAvailableAccounts: false, // Default to false - this is a real routing issue
@ -384,6 +385,8 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
if cfg == nil {
return
}
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h)
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d)
cfg.DataRetention.CleanupSchedule = strings.TrimSpace(cfg.DataRetention.CleanupSchedule)
if cfg.DataRetention.CleanupSchedule == "" {
cfg.DataRetention.CleanupSchedule = opsCleanupDefaultSchedule
@ -405,6 +408,16 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
}
}
func clampOpsQuotaAutoPauseThreshold(value float64) float64 {
if value <= 0 {
return 0
}
if value > 1 {
return 1
}
return value
}
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
if cfg == nil {
return errors.New("invalid config")
@ -477,6 +490,12 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
if err := s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(raw)); err != nil {
return nil, err
}
// Push the new quota auto-pause settings straight into the in-memory cache that
// the OpenAI scheduling hot path reads, so the next request observes the new value
// without waiting for the background refresher's TTL.
if s.quotaAutoPauseSink != nil {
s.quotaAutoPauseSink(cfg.OpenAIAccountQuotaAutoPause)
}
// notify cleanup service to reload schedule/enabled.
if s.cleanupReloader != nil {

View File

@ -4,6 +4,9 @@ import (
"context"
"encoding/json"
"testing"
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
)
func TestGetOpsAdvancedSettings_DefaultHidesOpenAITokenStats(t *testing.T) {
@ -95,3 +98,64 @@ func TestGetOpsAdvancedSettings_BackfillsNewDisplayFlagsFromDefaults(t *testing.
t.Fatalf("DisplayAlertEvents = false, want true default backfill")
}
}
func TestGetOpenAIQuotaAutoPauseSettings_ReadsDefaultsFromOpsAdvancedSettings(t *testing.T) {
repo := newRuntimeSettingRepoStub()
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.95,"default_threshold_7d":0.9}}`
svc := NewSettingService(repo, &config.Config{})
// Warm the in-memory cache synchronously so the assertion below is deterministic.
// GetOpenAIQuotaAutoPauseSettings is non-blocking on the hot path (returns the
// cached value, refreshes asynchronously); for tests and startup, Warm is the
// synchronous entry point that guarantees a populated cache.
settings := svc.WarmOpenAIQuotaAutoPauseSettings(context.Background())
if settings.DefaultThreshold5h != 0.95 {
t.Fatalf("DefaultThreshold5h = %v, want 0.95", settings.DefaultThreshold5h)
}
if settings.DefaultThreshold7d != 0.9 {
t.Fatalf("DefaultThreshold7d = %v, want 0.9", settings.DefaultThreshold7d)
}
// Subsequent Get must hit the warm cache and return the same value without any DB
// access — that's the hot-path invariant.
cached := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
if cached.DefaultThreshold5h != 0.95 || cached.DefaultThreshold7d != 0.9 {
t.Fatalf("cached read = %+v, want {0.95, 0.9}", cached)
}
}
// Hot-path invariant: a Get with cold cache must return immediately (zero defaults)
// rather than blocking on the DB. The async refresher will populate the cache for
// subsequent calls.
func TestGetOpenAIQuotaAutoPauseSettings_ColdCacheNonBlocking(t *testing.T) {
repo := newRuntimeSettingRepoStub()
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.7}}`
svc := NewSettingService(repo, &config.Config{})
start := time.Now()
settings := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
elapsed := time.Since(start)
if elapsed > 50*time.Millisecond {
t.Fatalf("cold-cache Get must be non-blocking, took %v", elapsed)
}
// Cold cache means we get zero defaults (the async refresh hasn't completed yet).
if settings.DefaultThreshold5h != 0 || settings.DefaultThreshold7d != 0 {
t.Fatalf("cold-cache Get = %+v, want zeroes", settings)
}
}
// Explicit cache write (e.g. from UpdateOpsAdvancedSettings) must be visible on the
// very next read without any DB roundtrip.
func TestSetOpenAIQuotaAutoPauseSettings_VisibleImmediately(t *testing.T) {
svc := NewSettingService(newRuntimeSettingRepoStub(), &config.Config{})
svc.SetOpenAIQuotaAutoPauseSettings(OpsOpenAIAccountQuotaAutoPauseSettings{
DefaultThreshold5h: 0.88,
DefaultThreshold7d: 0.77,
})
got := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
if got.DefaultThreshold5h != 0.88 || got.DefaultThreshold7d != 0.77 {
t.Fatalf("after Set, Get = %+v, want {0.88, 0.77}", got)
}
}

View File

@ -92,17 +92,23 @@ type OpsAlertRuntimeSettings struct {
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
type OpsAdvancedSettings struct {
DataRetention OpsDataRetentionSettings `json:"data_retention"`
Aggregation OpsAggregationSettings `json:"aggregation"`
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
DisplayAlertEvents bool `json:"display_alert_events"`
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
DataRetention OpsDataRetentionSettings `json:"data_retention"`
Aggregation OpsAggregationSettings `json:"aggregation"`
OpenAIAccountQuotaAutoPause OpsOpenAIAccountQuotaAutoPauseSettings `json:"openai_account_quota_auto_pause"`
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
DisplayAlertEvents bool `json:"display_alert_events"`
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
}
type OpsOpenAIAccountQuotaAutoPauseSettings struct {
DefaultThreshold5h float64 `json:"default_threshold_5h"`
DefaultThreshold7d float64 `json:"default_threshold_7d"`
}
type OpsDataRetentionSettings struct {

View File

@ -137,6 +137,11 @@ type cachedOpenAICodexUserAgent struct {
expiresAt int64 // unix nano
}
type cachedOpenAIQuotaAutoPauseSettings struct {
settings OpsOpenAIAccountQuotaAutoPauseSettings
expiresAt int64
}
const openAICodexUserAgentCacheTTL = 60 * time.Second
const openAICodexUserAgentErrorTTL = 5 * time.Second
const openAICodexUserAgentDBTimeout = 5 * time.Second
@ -152,6 +157,12 @@ const openAIAllowCodexPluginCacheTTL = 60 * time.Second
const openAIAllowCodexPluginErrorTTL = 5 * time.Second
const openAIAllowCodexPluginDBTimeout = 5 * time.Second
const openAIQuotaAutoPauseSettingsCacheTTL = 60 * time.Second
const openAIQuotaAutoPauseSettingsErrorTTL = 5 * time.Second
const openAIQuotaAutoPauseSettingsDBTimeout = 5 * time.Second
const openAIQuotaAutoPauseSettingsRefreshKey = "openai_quota_auto_pause_settings"
// DefaultSubscriptionGroupReader validates group references used by default subscriptions.
type DefaultSubscriptionGroupReader interface {
GetByID(ctx context.Context, id int64) (*Group, error)
@ -176,6 +187,15 @@ type SettingService struct {
openAICodexUASF singleflight.Group
openAIAllowCodexPluginCache atomic.Value // *cachedOpenAIAllowCodexPlugin
openAIAllowCodexPluginSF singleflight.Group
// openAIQuotaAutoPauseSettingsCache holds the most recently observed quota auto-pause
// settings. GetOpenAIQuotaAutoPauseSettings reads this atomic.Value on the request hot
// path without ever blocking on the DB; when the cached entry expires, a background
// goroutine refreshes it via openAIQuotaAutoPauseSettingsSF (stale-while-revalidate).
// This per-service field also gives tests natural isolation — each SettingService
// instance owns its own cache, no shared package-level state.
openAIQuotaAutoPauseSettingsCache atomic.Value // *cachedOpenAIQuotaAutoPauseSettings
openAIQuotaAutoPauseSettingsSF singleflight.Group
}
// DefaultPlatformQuotaSetting 单 platform 三档限额nil = 沿用上层0 = 显式禁用;>0 = 上限)
@ -2027,6 +2047,17 @@ func (s *SettingService) refreshCachedSettings(settings *SystemSettings) {
enabled: settings.OpenAIAdvancedSchedulerEnabled,
expiresAt: time.Now().Add(openAIAdvancedSchedulerSettingCacheTTL).UnixNano(),
})
// Invalidate the quota auto-pause cache and let the next read trigger a fresh load.
// We can't know from here whether ops_advanced_settings was also touched, so be
// defensive: store an expired entry — GetOpenAIQuotaAutoPauseSettings will serve
// stale and kick off an async refresh, never blocking the request that follows.
s.openAIQuotaAutoPauseSettingsSF.Forget(openAIQuotaAutoPauseSettingsRefreshKey)
if cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); cached != nil {
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
settings: cached.settings,
expiresAt: 0,
})
}
if s.cfg != nil {
s.cfg.SetTrustForwardedIPForAPIKeyACL(settings.APIKeyACLTrustForwardedIP)
}
@ -4448,6 +4479,106 @@ func (s *SettingService) GetClaudeCodeVersionBounds(ctx context.Context) (min, m
return b.min, b.max
}
// GetOpenAIQuotaAutoPauseSettings returns the current global default quota auto-pause
// settings. It is invoked on the OpenAI scheduling hot path (once per request) and is
// therefore designed to never block on the DB:
//
// - Fresh cached value → returned immediately.
// - Stale or empty cache → the last known value is returned, and a background
// goroutine refreshes the cache via singleflight (stale-while-revalidate).
// - First call with no cache yet → zero defaults are returned and the same async
// refresh is kicked off; the next call gets the freshly populated value.
//
// Callers that need the freshly persisted value synchronously (tests, post-update
// confirmation, optional startup warm-up) should call WarmOpenAIQuotaAutoPauseSettings.
func (s *SettingService) GetOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
if s == nil {
return OpsOpenAIAccountQuotaAutoPauseSettings{}
}
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
now := time.Now().UnixNano()
if cached != nil && now < cached.expiresAt {
return cached.settings
}
// Stale or unset: trigger background refresh without blocking this request.
// singleflight.DoChan dedupes concurrent refreshes; we deliberately ignore the
// returned channel — the result is observable via the atomic cache.
s.openAIQuotaAutoPauseSettingsSF.DoChan(openAIQuotaAutoPauseSettingsRefreshKey, func() (any, error) {
s.refreshOpenAIQuotaAutoPauseSettings(context.Background())
return nil, nil
})
if cached != nil {
return cached.settings // serve stale value while revalidating
}
return OpsOpenAIAccountQuotaAutoPauseSettings{}
}
// WarmOpenAIQuotaAutoPauseSettings synchronously loads the quota auto-pause settings
// into the in-memory cache. Useful for application startup (so the first request hits
// a warm cache) and for tests that need deterministic reads immediately after
// constructing the service.
func (s *SettingService) WarmOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
if s == nil {
return OpsOpenAIAccountQuotaAutoPauseSettings{}
}
s.refreshOpenAIQuotaAutoPauseSettings(ctx)
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
if cached == nil {
return OpsOpenAIAccountQuotaAutoPauseSettings{}
}
return cached.settings
}
// refreshOpenAIQuotaAutoPauseSettings reads the latest settings from the DB and stores
// them into the in-memory cache. On error it stores the prior value (or zero defaults
// if nothing is cached yet) with the shorter error TTL so the next refresh comes
// sooner. Always uses its own timeout-bounded context to keep refresh latency
// predictable regardless of the caller.
func (s *SettingService) refreshOpenAIQuotaAutoPauseSettings(ctx context.Context) {
if s == nil || s.settingRepo == nil {
return
}
dbCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), openAIQuotaAutoPauseSettingsDBTimeout)
defer cancel()
settings := OpsOpenAIAccountQuotaAutoPauseSettings{}
ttl := openAIQuotaAutoPauseSettingsCacheTTL
raw, err := s.settingRepo.GetValue(dbCtx, SettingKeyOpsAdvancedSettings)
if err == nil {
cfg := defaultOpsAdvancedSettings()
if strings.TrimSpace(raw) != "" {
if jsonErr := json.Unmarshal([]byte(raw), cfg); jsonErr == nil {
normalizeOpsAdvancedSettings(cfg)
}
}
settings = cfg.OpenAIAccountQuotaAutoPause
} else if !errors.Is(err, ErrSettingNotFound) {
// Real error: keep serving prior value but refresh sooner.
if prior, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); prior != nil {
settings = prior.settings
}
ttl = openAIQuotaAutoPauseSettingsErrorTTL
}
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
settings: settings,
expiresAt: time.Now().Add(ttl).UnixNano(),
})
}
// SetOpenAIQuotaAutoPauseSettings writes the given settings directly into the in-memory
// cache. Called from settings-write code paths so that the next read reflects the new
// value immediately, without waiting for the background refresh.
func (s *SettingService) SetOpenAIQuotaAutoPauseSettings(settings OpsOpenAIAccountQuotaAutoPauseSettings) {
if s == nil {
return
}
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
settings: settings,
expiresAt: time.Now().Add(openAIQuotaAutoPauseSettingsCacheTTL).UnixNano(),
})
}
// GetRectifierSettings 获取请求整流器配置
func (s *SettingService) GetRectifierSettings(ctx context.Context) (*RectifierSettings, error) {
value, err := s.settingRepo.GetValue(ctx, SettingKeyRectifierSettings)

View File

@ -396,6 +396,46 @@ func ProvideBackupService(
return svc
}
// ProvideOpsService constructs OpsService and wires the SettingService-backed quota
// auto-pause cache sink. Mirrors the SetCleanupReloader pattern: OpsService doesn't
// hold a *SettingService reference, but wire injects a tiny callback so writes to
// ops_advanced_settings immediately propagate into the scheduler hot-path cache.
func ProvideOpsService(
opsRepo OpsRepository,
settingRepo SettingRepository,
cfg *config.Config,
accountRepo AccountRepository,
userRepo UserRepository,
concurrencyService *ConcurrencyService,
gatewayService *GatewayService,
openAIGatewayService *OpenAIGatewayService,
geminiCompatService *GeminiMessagesCompatService,
antigravityGatewayService *AntigravityGatewayService,
systemLogSink *OpsSystemLogSink,
settingService *SettingService,
) *OpsService {
svc := NewOpsService(
opsRepo,
settingRepo,
cfg,
accountRepo,
userRepo,
concurrencyService,
gatewayService,
openAIGatewayService,
geminiCompatService,
antigravityGatewayService,
systemLogSink,
)
if settingService != nil {
svc.SetOpenAIQuotaAutoPauseSettingsSink(settingService.SetOpenAIQuotaAutoPauseSettings)
// Optional warm-up so the first scheduled request after process start observes
// a populated cache rather than zero defaults. Best-effort, sync-bounded.
settingService.WarmOpenAIQuotaAutoPauseSettings(context.Background())
}
return svc
}
// ProvideSettingService wires SettingService with group reader and proxy repo.
func ProvideSettingService(settingRepo SettingRepository, groupRepo GroupRepository, proxyRepo ProxyRepository, cfg *config.Config) *SettingService {
svc := NewSettingService(settingRepo, cfg)
@ -481,7 +521,7 @@ var ProviderSet = wire.NewSet(
NewDataManagementService,
ProvideBackupService,
ProvideOpsSystemLogSink,
NewOpsService,
ProvideOpsService,
ProvideOpsMetricsCollector,
ProvideOpsAggregationService,
ProvideOpsAlertEvaluatorService,

View File

@ -778,9 +778,15 @@ export interface OpsAlertRuntimeSettings {
thresholds: OpsMetricThresholds // 指标阈值配置
}
export interface OpsOpenAIAccountQuotaAutoPauseSettings {
default_threshold_5h: number // 0~10 表示不启用全局默认 5h 阈值
default_threshold_7d: number // 0~10 表示不启用全局默认 7d 阈值
}
export interface OpsAdvancedSettings {
data_retention: OpsDataRetentionSettings
aggregation: OpsAggregationSettings
openai_account_quota_auto_pause: OpsOpenAIAccountQuotaAutoPauseSettings
ignore_count_tokens_errors: boolean
ignore_context_canceled: boolean
ignore_no_available_accounts: boolean

View File

@ -1787,6 +1787,84 @@
</div>
</div>
<div
v-if="account?.platform === 'openai'"
class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4"
>
<div class="space-y-2">
<div class="flex items-center justify-between">
<label class="input-label mb-0">{{ t('admin.accounts.autoPause5hDisabled') }}</label>
<button
type="button"
@click="autoPause5hDisabled = !autoPause5hDisabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
autoPause5hDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
data-testid="auto-pause-5h-disabled"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
autoPause5hDisabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.autoPause5hThreshold') }}</label>
<input
v-model.number="autoPause5hThreshold"
type="number"
min="0"
max="100"
step="0.1"
class="input"
:disabled="autoPause5hDisabled"
data-testid="auto-pause-5h-threshold"
/>
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
</div>
<div class="space-y-2">
<div class="flex items-center justify-between">
<label class="input-label mb-0">{{ t('admin.accounts.autoPause7dDisabled') }}</label>
<button
type="button"
@click="autoPause7dDisabled = !autoPause7dDisabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
autoPause7dDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
data-testid="auto-pause-7d-disabled"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
autoPause7dDisabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.autoPause7dThreshold') }}</label>
<input
v-model.number="autoPause7dThreshold"
type="number"
min="0"
max="100"
step="0.1"
class="input"
:disabled="autoPause7dDisabled"
data-testid="auto-pause-7d-threshold"
/>
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
</div>
</div>
<!-- 配额控制 (Anthropic OAuth/SetupToken: 亲和 + 窗口费用 + 会话 + RPM ) -->
<div
v-if="account?.platform === 'anthropic' && (account?.type === 'oauth' || account?.type === 'setup-token')"
@ -2447,6 +2525,10 @@ const selectedErrorCodes = ref<number[]>([])
const customErrorCodeInput = ref<number | null>(null)
const interceptWarmupRequests = ref(false)
const autoPauseOnExpired = ref(false)
const autoPause5hThreshold = ref<number | null>(null)
const autoPause7dThreshold = ref<number | null>(null)
const autoPause5hDisabled = ref(false)
const autoPause7dDisabled = ref(false)
const mixedScheduling = ref(false) // For antigravity accounts: enable mixed scheduling
const allowOverages = ref(false) // For antigravity accounts: enable AI Credits overages
const antigravityModelRestrictionMode = ref<'whitelist' | 'mapping'>('whitelist')
@ -2862,9 +2944,13 @@ const syncFormFromAccount = (newAccount: Account | null) => {
// Load mixed scheduling setting (only for antigravity accounts)
mixedScheduling.value = false
allowOverages.value = false
const extra = newAccount.extra as Record<string, unknown> | undefined
mixedScheduling.value = extra?.mixed_scheduling === true
allowOverages.value = extra?.allow_overages === true
const extra = newAccount.extra as Record<string, unknown> | undefined
mixedScheduling.value = extra?.mixed_scheduling === true
allowOverages.value = extra?.allow_overages === true
autoPause5hThreshold.value = typeof extra?.auto_pause_5h_threshold === 'number' ? extra.auto_pause_5h_threshold * 100 : null
autoPause7dThreshold.value = typeof extra?.auto_pause_7d_threshold === 'number' ? extra.auto_pause_7d_threshold * 100 : null
autoPause5hDisabled.value = extra?.auto_pause_5h_disabled === true
autoPause7dDisabled.value = extra?.auto_pause_7d_disabled === true
// Load OpenAI passthrough toggle (OpenAI OAuth/API Key)
openaiPassthroughEnabled.value = false
@ -3987,9 +4073,9 @@ const handleSubmit = async () => {
}
// For OpenAI OAuth/API Key accounts, handle passthrough mode in extra
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
const newExtra: Record<string, unknown> = { ...currentExtra }
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
const newExtra: Record<string, unknown> = { ...currentExtra }
const hadCodexCLIOnlyEnabled = currentExtra.codex_cli_only === true
if (props.account.type === 'oauth') {
newExtra.openai_oauth_responses_websockets_v2_mode = openaiOAuthResponsesWebSocketV2Mode.value
@ -4011,15 +4097,35 @@ const handleSubmit = async () => {
} else {
newExtra.openai_compact_mode = openAICompactMode.value
}
if (props.account.type === 'apikey') {
if (props.account.type === 'apikey') {
if (!openAITextGenerationCapabilityEnabled.value || openAIResponsesMode.value === 'auto') {
delete newExtra.openai_responses_mode
} else {
newExtra.openai_responses_mode = openAIResponsesMode.value
}
}
}
if (autoPause5hThreshold.value != null && autoPause5hThreshold.value > 0) {
newExtra.auto_pause_5h_threshold = autoPause5hThreshold.value / 100
} else {
delete newExtra.auto_pause_5h_threshold
}
if (autoPause7dThreshold.value != null && autoPause7dThreshold.value > 0) {
newExtra.auto_pause_7d_threshold = autoPause7dThreshold.value / 100
} else {
delete newExtra.auto_pause_7d_threshold
}
if (autoPause5hDisabled.value) {
newExtra.auto_pause_5h_disabled = true
} else {
delete newExtra.auto_pause_5h_disabled
}
if (autoPause7dDisabled.value) {
newExtra.auto_pause_7d_disabled = true
} else {
delete newExtra.auto_pause_7d_disabled
}
delete newExtra.codex_image_generation_bridge_enabled
delete newExtra.codex_image_generation_bridge_enabled
if (codexImageGenerationBridgeMode.value === 'inherit') {
delete newExtra.codex_image_generation_bridge
} else {

View File

@ -330,6 +330,49 @@ describe('EditAccountModal', () => {
])
})
it('submits OpenAI quota auto-pause thresholds in extra', async () => {
const account = buildAccount()
account.extra = {
auto_pause_5h_threshold: 0.9,
auto_pause_7d_threshold: 0.8
}
updateAccountMock.mockReset()
checkMixedChannelRiskMock.mockReset()
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
updateAccountMock.mockResolvedValue(account)
const wrapper = mountModal(account)
await wrapper.get('[data-testid="auto-pause-5h-threshold"]').setValue('95')
await wrapper.get('[data-testid="auto-pause-7d-threshold"]').setValue('96')
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
expect(updateAccountMock).toHaveBeenCalledTimes(1)
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_threshold).toBe(0.95)
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_threshold).toBe(0.96)
})
it('submits OpenAI quota auto-pause disable flag in extra', async () => {
// Toggling the per-account disable flag must persist as auto_pause_5h_disabled
// so an admin can exempt one account from auto-pause even when a global default
// threshold is configured (otherwise leaving the threshold blank would silently
// fall back to the global default).
const account = buildAccount()
updateAccountMock.mockReset()
checkMixedChannelRiskMock.mockReset()
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
updateAccountMock.mockResolvedValue(account)
const wrapper = mountModal(account)
await wrapper.get('[data-testid="auto-pause-5h-disabled"]').trigger('click')
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
expect(updateAccountMock).toHaveBeenCalledTimes(1)
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_disabled).toBe(true)
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_disabled).toBeUndefined()
})
it('keeps at least one OpenAI APIKey endpoint capability selected', async () => {
const account = buildAccount()
updateAccountMock.mockReset()

View File

@ -3475,6 +3475,12 @@ export default {
'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens',
autoPauseOnExpired: 'Auto Pause On Expired',
autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires',
autoPause5hThreshold: '5h Usage Threshold (%)',
autoPause7dThreshold: '7d Usage Threshold (%)',
autoPauseThresholdHint: 'Leave empty or set 0 to use the global default threshold (configured in Ops settings); set a value to override the global default. Reaching the threshold only skips the account during scheduling and does not modify schedulable.',
autoPause5hDisabled: 'Disable 5h auto-pause',
autoPause7dDisabled: 'Disable 7d auto-pause',
autoPauseDisabledHint: 'When enabled, this account is never auto-paused (even if a global default threshold is configured).',
// Quota control (Anthropic OAuth/SetupToken only)
quotaControl: {
title: 'Quota Control',
@ -5190,6 +5196,11 @@ export default {
aggregation: 'Pre-aggregation Tasks',
enableAggregation: 'Enable Pre-aggregation',
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
openaiQuotaAutoPause: 'OpenAI Account Quota Auto-pause',
openaiQuotaAutoPauseHint: 'When an OpenAI account reaches its 5h / 7d usage threshold, the scheduler skips it automatically and resumes once the window rolls over. Per-account thresholds take precedence over this global default.',
openaiQuotaAutoPauseDefault5h: 'Default 5h usage threshold (%)',
openaiQuotaAutoPauseDefault7d: 'Default 7d usage threshold (%)',
openaiQuotaAutoPauseThresholdHint: 'Value 0-100; leave blank or 0 to disable the global default threshold.',
errorFiltering: 'Error Filtering',
ignoreCountTokensErrors: 'Ignore count_tokens errors',
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
@ -5220,7 +5231,8 @@ export default {
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100',
openaiQuotaAutoPauseRange: 'OpenAI quota auto-pause threshold must be between 0 and 100'
}
},
concurrency: {

View File

@ -3613,6 +3613,12 @@ export default {
interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token',
autoPauseOnExpired: '过期自动暂停调度',
autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度',
autoPause5hThreshold: '5h 用量阈值(%)',
autoPause7dThreshold: '7d 用量阈值(%)',
autoPauseThresholdHint: '留空或填 0 表示使用全局默认阈值(在运维设置中配置);填具体值则覆盖全局默认。达到阈值后仅在调度时跳过账号,不修改 schedulable。',
autoPause5hDisabled: '禁用 5h 自动暂停',
autoPause7dDisabled: '禁用 7d 自动暂停',
autoPauseDisabledHint: '开启后该账号永不进入自动暂停(即使全局默认阈值已配置)。',
// Quota control (Anthropic OAuth/SetupToken only)
quotaControl: {
title: '配额控制',
@ -5349,6 +5355,11 @@ export default {
aggregation: '预聚合任务',
enableAggregation: '启用预聚合任务',
aggregationHint: '预聚合可提升长时间窗口查询性能',
openaiQuotaAutoPause: 'OpenAI 账号配额自动暂停',
openaiQuotaAutoPauseHint: '当 OpenAI 账号 5h / 7d 用量达到阈值时,调度会自动跳过该账号;窗口滚动后自动恢复。账号级阈值优先于此全局默认值。',
openaiQuotaAutoPauseDefault5h: '默认 5h 用量阈值 (%)',
openaiQuotaAutoPauseDefault7d: '默认 7d 用量阈值 (%)',
openaiQuotaAutoPauseThresholdHint: '取值 0-100留空或 0 表示不启用全局默认阈值。',
errorFiltering: '错误过滤',
ignoreCountTokensErrors: '忽略 count_tokens 错误',
ignoreCountTokensErrorsHint: '启用后count_tokens 请求的错误将不会写入错误日志。',
@ -5380,7 +5391,8 @@ export default {
slaMinPercentRange: 'SLA最低百分比必须在0-100之间',
ttftP99MaxRange: 'TTFT P99最大值必须大于等于0',
requestErrorRateMaxRange: '请求错误率最大值必须在0-100之间',
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间'
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间',
openaiQuotaAutoPauseRange: 'OpenAI 配额自动暂停阈值必须在 0-100 之间'
}
},
concurrency: {

View File

@ -50,6 +50,10 @@ async function loadAllSettings() {
runtimeSettings.value = runtime
emailConfig.value = email
advancedSettings.value = advanced
// payload
if (advancedSettings.value && !advancedSettings.value.openai_account_quota_auto_pause) {
advancedSettings.value.openai_account_quota_auto_pause = { default_threshold_5h: 0, default_threshold_7d: 0 }
}
// 使
if (thresholds && Object.keys(thresholds).length > 0) {
metricThresholds.value = {
@ -119,6 +123,28 @@ function removeRecipient(target: 'alert' | 'report', email: string) {
if (idx >= 0) list.splice(idx, 1)
}
// OpenAI 0~1 UI (0~100)
const quotaAutoPause5hPercent = computed<number | null>({
get() {
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_5h
return v && v > 0 ? Math.round(v * 1000) / 10 : null
},
set(val) {
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_5h = val != null && val > 0 ? val / 100 : 0
}
})
const quotaAutoPause7dPercent = computed<number | null>({
get() {
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_7d
return v && v > 0 ? Math.round(v * 1000) / 10 : null
},
set(val) {
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_7d = val != null && val > 0 ? val / 100 : 0
}
})
//
const validation = computed(() => {
const errors: string[] = []
@ -145,6 +171,11 @@ const validation = computed(() => {
if (hourly_metrics_retention_days < 0 || hourly_metrics_retention_days > 365) {
errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
}
const { default_threshold_5h, default_threshold_7d } = advancedSettings.value.openai_account_quota_auto_pause
if (default_threshold_5h < 0 || default_threshold_5h > 1 || default_threshold_7d < 0 || default_threshold_7d > 1) {
errors.push(t('admin.ops.settings.validation.openaiQuotaAutoPauseRange'))
}
}
//
@ -473,6 +504,40 @@ async function saveAllSettings() {
</div>
</div>
<!-- OpenAI 账号配额自动暂停全局默认阈值 -->
<div class="space-y-3">
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.openaiQuotaAutoPause') }}</h5>
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseHint') }}</p>
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
<div>
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault5h') }}</label>
<input
v-model.number="quotaAutoPause5hPercent"
type="number"
min="0"
max="100"
step="0.1"
class="input"
data-testid="ops-quota-auto-pause-5h"
/>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault7d') }}</label>
<input
v-model.number="quotaAutoPause7dPercent"
type="number"
min="0"
max="100"
step="0.1"
class="input"
data-testid="ops-quota-auto-pause-7d"
/>
</div>
</div>
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseThresholdHint') }}</p>
</div>
<!-- Error Filtering -->
<div class="space-y-3">
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.errorFiltering') }}</h5>