Merge pull request #2873 from wucm667/feat/account-quota-threshold-auto-pause
feat(account): 支持按 5h/7d 用量阈值自动暂停账号调度
This commit is contained in:
commit
f68d351158
@ -195,7 +195,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
|
||||
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService, tlsFingerprintProfileService, channelService, modelPricingResolver, balanceNotifyService, serviceUserPlatformQuotaRepository)
|
||||
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
|
||||
opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
|
||||
opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink)
|
||||
opsService := service.ProvideOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink, settingService)
|
||||
encryptionKey, err := payment.ProvideEncryptionKey(configConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@ -548,6 +548,17 @@ func filterSchedulerExtra(extra map[string]any) map[string]any {
|
||||
"openai_ws_force_http",
|
||||
"openai_responses_mode",
|
||||
"openai_responses_supported",
|
||||
"codex_5h_used_percent",
|
||||
"codex_7d_used_percent",
|
||||
"codex_5h_reset_at",
|
||||
"codex_7d_reset_at",
|
||||
"codex_5h_reset_after_seconds",
|
||||
"codex_7d_reset_after_seconds",
|
||||
"codex_usage_updated_at",
|
||||
"auto_pause_5h_threshold",
|
||||
"auto_pause_7d_threshold",
|
||||
"auto_pause_5h_disabled",
|
||||
"auto_pause_7d_disabled",
|
||||
}
|
||||
filtered := make(map[string]any)
|
||||
for _, key := range keys {
|
||||
|
||||
@ -75,3 +75,36 @@ func TestBuildSchedulerMetadataAccount_KeepsSlimGroupMembership(t *testing.T) {
|
||||
require.Equal(t, int64(11), got.AccountGroups[1].GroupID)
|
||||
require.Nil(t, got.Groups)
|
||||
}
|
||||
|
||||
func TestBuildSchedulerMetadataAccount_KeepsQuotaAutoPauseFields(t *testing.T) {
|
||||
account := service.Account{
|
||||
ID: 88,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 12.34,
|
||||
"codex_7d_used_percent": 56.78,
|
||||
"codex_5h_reset_at": "2026-05-29T10:00:00Z",
|
||||
"codex_7d_reset_at": "2026-06-01T10:00:00Z",
|
||||
"codex_5h_reset_after_seconds": 300,
|
||||
"codex_7d_reset_after_seconds": 600,
|
||||
"codex_usage_updated_at": "2026-05-29T09:00:00Z",
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
"auto_pause_7d_threshold": 0.96,
|
||||
"auto_pause_5h_disabled": true,
|
||||
"auto_pause_7d_disabled": false,
|
||||
},
|
||||
}
|
||||
|
||||
got := buildSchedulerMetadataAccount(account)
|
||||
|
||||
require.Equal(t, 12.34, got.Extra["codex_5h_used_percent"])
|
||||
require.Equal(t, 56.78, got.Extra["codex_7d_used_percent"])
|
||||
require.Equal(t, "2026-05-29T10:00:00Z", got.Extra["codex_5h_reset_at"])
|
||||
require.Equal(t, "2026-06-01T10:00:00Z", got.Extra["codex_7d_reset_at"])
|
||||
require.Equal(t, 300, got.Extra["codex_5h_reset_after_seconds"])
|
||||
require.Equal(t, 600, got.Extra["codex_7d_reset_after_seconds"])
|
||||
require.Equal(t, "2026-05-29T09:00:00Z", got.Extra["codex_usage_updated_at"])
|
||||
require.Equal(t, 0.95, got.Extra["auto_pause_5h_threshold"])
|
||||
require.Equal(t, 0.96, got.Extra["auto_pause_7d_threshold"])
|
||||
require.Equal(t, true, got.Extra["auto_pause_5h_disabled"])
|
||||
require.Equal(t, false, got.Extra["auto_pause_7d_disabled"])
|
||||
}
|
||||
|
||||
@ -370,7 +370,6 @@ func (s *defaultOpenAIAccountScheduler) selectBySessionHash(
|
||||
_ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
result, acquireErr := s.service.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
|
||||
if acquireErr == nil && result != nil && result.Acquired {
|
||||
_ = s.service.refreshStickySessionTTL(ctx, req.GroupID, sessionHash, s.service.openAIWSSessionStickyTTL())
|
||||
@ -975,6 +974,13 @@ func (s *defaultOpenAIAccountScheduler) isAccountRequestCompatible(ctx context.C
|
||||
if s != nil && s.service != nil && s.service.isOpenAIAccountRuntimeBlocked(account) {
|
||||
return false
|
||||
}
|
||||
// Quota auto-pause must be evaluated during the initial filter too. Without it the
|
||||
// TopK candidate pool can be filled with paused accounts and the later fresh/DB
|
||||
// rechecks won't reach healthy accounts that fell outside TopK — manifesting as
|
||||
// "no available accounts" even though healthy ones exist.
|
||||
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||
return false
|
||||
}
|
||||
if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) {
|
||||
return false
|
||||
}
|
||||
@ -1154,6 +1160,7 @@ func (s *OpenAIGatewayService) selectAccountWithScheduler(
|
||||
requiredImageCapability OpenAIImagesCapability,
|
||||
requireCompact bool,
|
||||
) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) {
|
||||
ctx = s.withOpenAIQuotaAutoPauseContext(ctx)
|
||||
decision := OpenAIAccountScheduleDecision{}
|
||||
scheduler := s.getOpenAIAccountScheduler(ctx)
|
||||
if scheduler == nil {
|
||||
|
||||
@ -691,6 +691,224 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_SessionStickyRateLimite
|
||||
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy5hThreshold(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
primary := Account{
|
||||
ID: 35001,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 95.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35002, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35002), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AllowsBelow5hThreshold(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
primary := Account{
|
||||
ID: 35101,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 80.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35102, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35101), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy7dThreshold(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
primary := Account{
|
||||
ID: 35201,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_7d_used_percent": 95.0,
|
||||
"auto_pause_7d_threshold": 0.95,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35202, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35202), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UnconfiguredThresholdKeepsLegacyBehavior(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
primary := Account{ID: 35301, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 0, Extra: map[string]any{"codex_5h_used_percent": 99.0, "codex_7d_used_percent": 99.0}}
|
||||
secondary := Account{ID: 35302, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35301), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UsesGlobalDefaultThreshold(t *testing.T) {
|
||||
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
|
||||
primary := Account{
|
||||
ID: 35401,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 95.0,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35402, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35402), account.ID)
|
||||
}
|
||||
|
||||
// Regression: a per-account explicit-disable flag exempts the account from auto-pause
|
||||
// even when a global default threshold is set. Without this, "leave threshold blank"
|
||||
// silently falls back to global default and admins have no way to whitelist a single
|
||||
// account.
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerAccountDisableOverridesGlobalDefault(t *testing.T) {
|
||||
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
|
||||
// Account has high usage AND no per-account threshold (would normally fall back to
|
||||
// the global default and get paused), but the explicit disable flag is set.
|
||||
primary := Account{
|
||||
ID: 35701,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 99.0,
|
||||
"auto_pause_5h_disabled": true,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35702, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35701), account.ID)
|
||||
}
|
||||
|
||||
// Disable is per-window: disabling only 5h must still allow 7d auto-pause to fire.
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerWindowDisableScoped(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
primary := Account{
|
||||
ID: 35801,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 99.0,
|
||||
"codex_7d_used_percent": 99.0,
|
||||
"auto_pause_5h_disabled": true,
|
||||
"auto_pause_7d_threshold": 0.95,
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35802, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35802), account.ID, "7d auto-pause must still fire even though 5h is disabled")
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_StaleUsageWindowResetSkipsPause(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
// Usage is over threshold but the window's reset time has already passed, so the
|
||||
// cached percentage is stale (the real window rolled over) and the account must NOT
|
||||
// stay paused — otherwise it could be skipped forever with no traffic to refresh it.
|
||||
primary := Account{
|
||||
ID: 35501,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 99.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
"codex_5h_reset_at": time.Now().Add(-time.Minute).Format(time.RFC3339),
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35502, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35501), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_FreshUsageWindowStillPauses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
// Same as above but the window has not reset yet, so the account stays paused.
|
||||
primary := Account{
|
||||
ID: 35601,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 99.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
"codex_5h_reset_at": time.Now().Add(time.Hour).Format(time.RFC3339),
|
||||
},
|
||||
}
|
||||
secondary := Account{ID: 35602, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||
|
||||
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, account)
|
||||
require.Equal(t, int64(35602), account.ID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_SkipsFreshlyRateLimitedSnapshotCandidate(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
groupID := int64(10102)
|
||||
@ -1238,6 +1456,85 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKFallback
|
||||
}
|
||||
}
|
||||
|
||||
// Regression: TopK initial filter must drop quota-auto-paused accounts. Otherwise
|
||||
// the candidate pool is filled with paused accounts, healthy accounts fall outside
|
||||
// TopK, and the scheduler returns "no available accounts" even though healthy ones
|
||||
// exist.
|
||||
func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKExcludesQuotaPaused(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
groupID := int64(110)
|
||||
accounts := []Account{
|
||||
{
|
||||
ID: 37001,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 0,
|
||||
Extra: map[string]any{
|
||||
"codex_5h_used_percent": 96.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: 37002,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 1,
|
||||
Priority: 5,
|
||||
},
|
||||
}
|
||||
|
||||
cfg := &config.Config{}
|
||||
cfg.Gateway.OpenAIWS.LBTopK = 1 // TopK=1 makes the bug fatal: paused account would crowd out the healthy one entirely
|
||||
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Priority = 0.4
|
||||
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Load = 1.0
|
||||
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Queue = 1.0
|
||||
|
||||
concurrencyCache := schedulerTestConcurrencyCache{
|
||||
loadMap: map[int64]*AccountLoadInfo{
|
||||
37001: {AccountID: 37001, LoadRate: 5, WaitingCount: 0},
|
||||
37002: {AccountID: 37002, LoadRate: 5, WaitingCount: 0},
|
||||
},
|
||||
acquireResults: map[int64]bool{
|
||||
37002: true,
|
||||
},
|
||||
}
|
||||
|
||||
svc := &OpenAIGatewayService{
|
||||
accountRepo: schedulerTestOpenAIAccountRepo{accounts: accounts},
|
||||
cache: &schedulerTestGatewayCache{},
|
||||
cfg: cfg,
|
||||
rateLimitService: newOpenAIAdvancedSchedulerRateLimitService("true"),
|
||||
concurrencyService: NewConcurrencyService(concurrencyCache),
|
||||
}
|
||||
|
||||
selection, decision, err := svc.SelectAccountWithScheduler(
|
||||
ctx,
|
||||
&groupID,
|
||||
"",
|
||||
"",
|
||||
"gpt-5.1",
|
||||
nil,
|
||||
OpenAIUpstreamTransportAny,
|
||||
false,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, selection)
|
||||
require.NotNil(t, selection.Account)
|
||||
require.Equal(t, int64(37002), selection.Account.ID)
|
||||
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
|
||||
// Only the healthy account should ever enter the candidate pool; the paused one
|
||||
// must be filtered out at the initial-filter stage.
|
||||
require.Equal(t, 1, decision.CandidateCount)
|
||||
if selection.ReleaseFunc != nil {
|
||||
selection.ReleaseFunc()
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_OpenAIAccountSchedulerMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
groupID := int64(12)
|
||||
|
||||
@ -1290,7 +1290,7 @@ func (s *OpenAIGatewayService) SelectAccountForModel(ctx context.Context, groupI
|
||||
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
|
||||
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
|
||||
func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*Account, error) {
|
||||
return s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
|
||||
return s.selectAccountForModelWithExclusions(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
|
||||
}
|
||||
|
||||
// noAvailableOpenAISelectionError builds the standard "no account available" error
|
||||
@ -1327,6 +1327,17 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
|
||||
if account == nil || !account.IsOpenAI() || !account.IsSchedulableForModelWithContext(ctx, requestedModel) {
|
||||
return false
|
||||
}
|
||||
if paused, reason := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||
// Debug level: this fires per-candidate on the scheduling hot path, so Info
|
||||
// would amplify into log spam once several accounts cross the threshold.
|
||||
slog.Debug("account_auto_paused_by_quota",
|
||||
"account_id", account.ID,
|
||||
"window", reason.window,
|
||||
"threshold", reason.threshold,
|
||||
"utilization", reason.utilization,
|
||||
)
|
||||
return false
|
||||
}
|
||||
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
|
||||
return false
|
||||
}
|
||||
@ -1339,6 +1350,201 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
|
||||
return true
|
||||
}
|
||||
|
||||
type openAIQuotaAutoPauseDecision struct {
|
||||
window string
|
||||
threshold float64
|
||||
utilization float64
|
||||
}
|
||||
|
||||
func shouldAutoPauseOpenAIAccountByQuota(ctx context.Context, account *Account) (bool, openAIQuotaAutoPauseDecision) {
|
||||
if account == nil || !account.IsOpenAI() {
|
||||
return false, openAIQuotaAutoPauseDecision{}
|
||||
}
|
||||
// Per-account explicit-disable flags must take precedence over the global default.
|
||||
// Without these, leaving the account threshold blank means "use global default",
|
||||
// so an admin has no way to exempt a single account from auto-pause once a global
|
||||
// default exists. The disable flag is per-window so an account can opt out of
|
||||
// only 5h or only 7d auto-pause.
|
||||
disabled5h := resolveAccountExtraBool(account.Extra, "auto_pause_5h_disabled")
|
||||
disabled7d := resolveAccountExtraBool(account.Extra, "auto_pause_7d_disabled")
|
||||
threshold5h, threshold7d := resolveOpenAIQuotaAutoPauseThresholds(ctx, account)
|
||||
now := time.Now()
|
||||
if !disabled5h && threshold5h > 0 {
|
||||
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "5h", now); ok && utilization >= threshold5h {
|
||||
return true, openAIQuotaAutoPauseDecision{window: "5h", threshold: threshold5h, utilization: utilization}
|
||||
}
|
||||
}
|
||||
if !disabled7d && threshold7d > 0 {
|
||||
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "7d", now); ok && utilization >= threshold7d {
|
||||
return true, openAIQuotaAutoPauseDecision{window: "7d", threshold: threshold7d, utilization: utilization}
|
||||
}
|
||||
}
|
||||
return false, openAIQuotaAutoPauseDecision{}
|
||||
}
|
||||
|
||||
// resolveAccountExtraBool reads a bool-like value from account extra, tolerating
|
||||
// the few shapes JSON unmarshalling may produce (real bool, "true"/"false"
|
||||
// strings, 0/1 numbers).
|
||||
func resolveAccountExtraBool(extra map[string]any, key string) bool {
|
||||
if len(extra) == 0 {
|
||||
return false
|
||||
}
|
||||
value, ok := extra[key]
|
||||
if !ok || value == nil {
|
||||
return false
|
||||
}
|
||||
switch v := value.(type) {
|
||||
case bool:
|
||||
return v
|
||||
case string:
|
||||
parsed, err := strconv.ParseBool(strings.TrimSpace(v))
|
||||
return err == nil && parsed
|
||||
case float64:
|
||||
return v != 0
|
||||
case float32:
|
||||
return v != 0
|
||||
case int:
|
||||
return v != 0
|
||||
case int64:
|
||||
return v != 0
|
||||
case json.Number:
|
||||
if i, err := v.Int64(); err == nil {
|
||||
return i != 0
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func resolveOpenAIQuotaAutoPauseThresholds(ctx context.Context, account *Account) (float64, float64) {
|
||||
threshold5h, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_5h_threshold")
|
||||
threshold7d, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_7d_threshold")
|
||||
threshold5h = clamp01(threshold5h)
|
||||
threshold7d = clamp01(threshold7d)
|
||||
if threshold5h > 0 && threshold7d > 0 {
|
||||
return threshold5h, threshold7d
|
||||
}
|
||||
settings := openAIQuotaAutoPauseSettingsFromContext(ctx)
|
||||
if threshold5h <= 0 {
|
||||
threshold5h = clamp01(settings.DefaultThreshold5h)
|
||||
}
|
||||
if threshold7d <= 0 {
|
||||
threshold7d = clamp01(settings.DefaultThreshold7d)
|
||||
}
|
||||
return threshold5h, threshold7d
|
||||
}
|
||||
|
||||
func resolveAccountExtraNumber(extra map[string]any, keys ...string) (float64, bool) {
|
||||
if len(extra) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
for _, key := range keys {
|
||||
value, ok := extra[key]
|
||||
if !ok || value == nil {
|
||||
continue
|
||||
}
|
||||
switch v := value.(type) {
|
||||
case float64:
|
||||
return v, true
|
||||
case float32:
|
||||
return float64(v), true
|
||||
case int:
|
||||
return float64(v), true
|
||||
case int64:
|
||||
return float64(v), true
|
||||
case json.Number:
|
||||
parsed, err := v.Float64()
|
||||
if err == nil {
|
||||
return parsed, true
|
||||
}
|
||||
case string:
|
||||
parsed, err := strconv.ParseFloat(strings.TrimSpace(v), 64)
|
||||
if err == nil {
|
||||
return parsed, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// resolveOpenAIQuotaUtilization returns the current utilization ratio (0..1) for the
|
||||
// given Codex usage window. ok=false means there is no usable signal to pause on:
|
||||
// either no snapshot exists, or the window has already rolled over so the cached
|
||||
// percentage is stale. The stale guard matters because a paused account stops
|
||||
// receiving requests, so its snapshot is never refreshed from upstream headers —
|
||||
// without this check an old used_percent would keep the account paused forever even
|
||||
// after the real window reset.
|
||||
func resolveOpenAIQuotaUtilization(extra map[string]any, window string, now time.Time) (float64, bool) {
|
||||
usedPercent := readOpenAIQuotaUsedPercent(extra, window)
|
||||
if usedPercent <= 0 {
|
||||
return 0, false
|
||||
}
|
||||
if openAIQuotaWindowReset(extra, window, now) {
|
||||
return 0, false
|
||||
}
|
||||
return usedPercent / 100, true
|
||||
}
|
||||
|
||||
// openAIQuotaWindowReset reports whether the Codex usage window's reset time has
|
||||
// already passed relative to now. It prefers the absolute codex_<window>_reset_at
|
||||
// timestamp and falls back to codex_<window>_reset_after_seconds anchored at
|
||||
// codex_usage_updated_at, mirroring AccountUsageService's window-progress logic.
|
||||
func openAIQuotaWindowReset(extra map[string]any, window string, now time.Time) bool {
|
||||
if len(extra) == 0 {
|
||||
return false
|
||||
}
|
||||
if resetAtRaw, ok := extra["codex_"+window+"_reset_at"]; ok {
|
||||
if resetAt, err := parseTime(fmt.Sprint(resetAtRaw)); err == nil {
|
||||
return !now.Before(resetAt)
|
||||
}
|
||||
}
|
||||
resetAfter := parseExtraInt(extra["codex_"+window+"_reset_after_seconds"])
|
||||
if resetAfter <= 0 {
|
||||
return false
|
||||
}
|
||||
base := now
|
||||
if updatedRaw, ok := extra["codex_usage_updated_at"]; ok {
|
||||
if updatedAt, err := parseTime(fmt.Sprint(updatedRaw)); err == nil {
|
||||
base = updatedAt
|
||||
}
|
||||
}
|
||||
resetAt := base.Add(time.Duration(resetAfter) * time.Second)
|
||||
return !now.Before(resetAt)
|
||||
}
|
||||
|
||||
func readOpenAIQuotaUsedPercent(extra map[string]any, window string) float64 {
|
||||
if len(extra) == 0 {
|
||||
return 0
|
||||
}
|
||||
if value, ok := resolveAccountExtraNumber(extra, "codex_"+window+"_used_percent"); ok {
|
||||
return value
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type openAIQuotaAutoPauseCtxKey struct{}
|
||||
|
||||
func withOpenAIQuotaAutoPauseSettings(ctx context.Context, settings OpsOpenAIAccountQuotaAutoPauseSettings) context.Context {
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
return context.WithValue(ctx, openAIQuotaAutoPauseCtxKey{}, settings)
|
||||
}
|
||||
|
||||
func openAIQuotaAutoPauseSettingsFromContext(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||
if ctx == nil {
|
||||
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
}
|
||||
settings, _ := ctx.Value(openAIQuotaAutoPauseCtxKey{}).(OpsOpenAIAccountQuotaAutoPauseSettings)
|
||||
return settings
|
||||
}
|
||||
|
||||
func (s *OpenAIGatewayService) withOpenAIQuotaAutoPauseContext(ctx context.Context) context.Context {
|
||||
if s == nil || s.settingService == nil {
|
||||
return ctx
|
||||
}
|
||||
return withOpenAIQuotaAutoPauseSettings(ctx, s.settingService.GetOpenAIQuotaAutoPauseSettings(ctx))
|
||||
}
|
||||
|
||||
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
|
||||
// compact support are tried first, followed by unknown, then explicitly unsupported.
|
||||
// The relative order within each tier is preserved.
|
||||
@ -1587,7 +1793,7 @@ func (s *OpenAIGatewayService) isBetterAccount(candidate, current *Account) bool
|
||||
|
||||
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
|
||||
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
|
||||
return s.selectAccountWithLoadAwareness(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, "")
|
||||
return s.selectAccountWithLoadAwareness(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, "")
|
||||
}
|
||||
|
||||
func (s *OpenAIGatewayService) selectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, requiredCapability OpenAIEndpointCapability) (*AccountSelectionResult, error) {
|
||||
|
||||
@ -48,6 +48,46 @@ func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_Hit(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_QuotaAutoPausedMiss(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
groupID := int64(23)
|
||||
account := Account{
|
||||
ID: 77,
|
||||
Platform: PlatformOpenAI,
|
||||
Type: AccountTypeAPIKey,
|
||||
Status: StatusActive,
|
||||
Schedulable: true,
|
||||
Concurrency: 2,
|
||||
Extra: map[string]any{
|
||||
"openai_apikey_responses_websockets_v2_enabled": true,
|
||||
"codex_5h_used_percent": 96.0,
|
||||
"auto_pause_5h_threshold": 0.95,
|
||||
},
|
||||
}
|
||||
cache := &stubGatewayCache{}
|
||||
store := NewOpenAIWSStateStore(cache)
|
||||
cfg := newOpenAIWSV2TestConfig()
|
||||
svc := &OpenAIGatewayService{
|
||||
accountRepo: stubOpenAIAccountRepo{accounts: []Account{account}},
|
||||
cache: cache,
|
||||
cfg: cfg,
|
||||
concurrencyService: NewConcurrencyService(stubConcurrencyCache{}),
|
||||
openaiWSStateStore: store,
|
||||
}
|
||||
|
||||
require.NoError(t, store.BindResponseAccount(ctx, groupID, "resp_prev_quota", account.ID, time.Hour))
|
||||
|
||||
selection, err := svc.SelectAccountByPreviousResponseID(ctx, &groupID, "resp_prev_quota", "gpt-5.1", nil, false)
|
||||
require.NoError(t, err)
|
||||
require.Nil(t, selection, "超过 5h 配额阈值的账号不应继续命中 previous_response_id 粘连")
|
||||
|
||||
// Auto-pause is transient, so the binding is preserved: the chain can resume on the
|
||||
// same account once the quota window resets.
|
||||
boundAccountID, getErr := store.GetResponseAccount(ctx, groupID, "resp_prev_quota")
|
||||
require.NoError(t, getErr)
|
||||
require.Equal(t, account.ID, boundAccountID)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_RateLimitedMiss(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
groupID := int64(23)
|
||||
|
||||
@ -4060,6 +4060,13 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
|
||||
if !account.SupportsOpenAIEndpointCapability(requiredCapability) {
|
||||
return nil, nil
|
||||
}
|
||||
// Quota auto-pause must also gate the previous_response_id sticky path; otherwise an
|
||||
// account over its 5h/7d threshold keeps serving the same response chain even though
|
||||
// normal scheduling skips it. Pause is transient, so fall through to normal scheduling
|
||||
// without deleting the binding (the window may reset before the next turn).
|
||||
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||
return nil, nil
|
||||
}
|
||||
if s.schedulerSnapshot != nil && s.accountRepo != nil {
|
||||
latest, latestErr := s.accountRepo.GetByID(ctx, account.ID)
|
||||
if latestErr != nil || latest == nil {
|
||||
@ -4076,6 +4083,9 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
|
||||
if !latest.SupportsOpenAIEndpointCapability(requiredCapability) {
|
||||
return nil, nil
|
||||
}
|
||||
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, latest); paused {
|
||||
return nil, nil
|
||||
}
|
||||
if s.isOpenAIAccountRuntimeBlocked(latest) {
|
||||
_ = store.DeleteResponseAccount(ctx, derefGroupID(groupID), responseID)
|
||||
return nil, nil
|
||||
|
||||
@ -41,6 +41,11 @@ type OpsService struct {
|
||||
// cleanupReloader 由 wire 在 OpsCleanupService 构造完成后通过 SetCleanupReloader 注入。
|
||||
// 解耦避免 OpsService -> OpsCleanupService 的硬依赖(cleanup 也读 settings,会循环)。
|
||||
cleanupReloader CleanupReloader
|
||||
|
||||
// quotaAutoPauseSink 由 wire 注入(通常是 SettingService.SetOpenAIQuotaAutoPauseSettings)。
|
||||
// UpdateOpsAdvancedSettings 写入新配置后调用,把最新的 quota auto-pause 全局默认阈值
|
||||
// 立即同步到调度热路径读取的内存缓存,避免下次请求才能感知新值。
|
||||
quotaAutoPauseSink func(OpsOpenAIAccountQuotaAutoPauseSettings)
|
||||
}
|
||||
|
||||
// CleanupReloader 由 OpsCleanupService 实现。
|
||||
@ -57,6 +62,16 @@ func (s *OpsService) SetCleanupReloader(r CleanupReloader) {
|
||||
s.cleanupReloader = r
|
||||
}
|
||||
|
||||
// SetOpenAIQuotaAutoPauseSettingsSink 由 wire 注入,把最新的 quota auto-pause 全局默认
|
||||
// 阈值 push 到调度热路径读取的内存缓存。同 SetCleanupReloader 的解耦目的:避免 OpsService
|
||||
// 持有 *SettingService 引入循环依赖。
|
||||
func (s *OpsService) SetOpenAIQuotaAutoPauseSettingsSink(sink func(OpsOpenAIAccountQuotaAutoPauseSettings)) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.quotaAutoPauseSink = sink
|
||||
}
|
||||
|
||||
func NewOpsService(
|
||||
opsRepo OpsRepository,
|
||||
settingRepo SettingRepository,
|
||||
|
||||
@ -369,6 +369,7 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
|
||||
Aggregation: OpsAggregationSettings{
|
||||
AggregationEnabled: false,
|
||||
},
|
||||
OpenAIAccountQuotaAutoPause: OpsOpenAIAccountQuotaAutoPauseSettings{},
|
||||
IgnoreCountTokensErrors: true, // count_tokens 404 是预期行为,默认忽略
|
||||
IgnoreContextCanceled: true, // Default to true - client disconnects are not errors
|
||||
IgnoreNoAvailableAccounts: false, // Default to false - this is a real routing issue
|
||||
@ -384,6 +385,8 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
||||
if cfg == nil {
|
||||
return
|
||||
}
|
||||
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h)
|
||||
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d)
|
||||
cfg.DataRetention.CleanupSchedule = strings.TrimSpace(cfg.DataRetention.CleanupSchedule)
|
||||
if cfg.DataRetention.CleanupSchedule == "" {
|
||||
cfg.DataRetention.CleanupSchedule = opsCleanupDefaultSchedule
|
||||
@ -405,6 +408,16 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
||||
}
|
||||
}
|
||||
|
||||
func clampOpsQuotaAutoPauseThreshold(value float64) float64 {
|
||||
if value <= 0 {
|
||||
return 0
|
||||
}
|
||||
if value > 1 {
|
||||
return 1
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
||||
if cfg == nil {
|
||||
return errors.New("invalid config")
|
||||
@ -477,6 +490,12 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
|
||||
if err := s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(raw)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Push the new quota auto-pause settings straight into the in-memory cache that
|
||||
// the OpenAI scheduling hot path reads, so the next request observes the new value
|
||||
// without waiting for the background refresher's TTL.
|
||||
if s.quotaAutoPauseSink != nil {
|
||||
s.quotaAutoPauseSink(cfg.OpenAIAccountQuotaAutoPause)
|
||||
}
|
||||
|
||||
// notify cleanup service to reload schedule/enabled.
|
||||
if s.cleanupReloader != nil {
|
||||
|
||||
@ -4,6 +4,9 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/config"
|
||||
)
|
||||
|
||||
func TestGetOpsAdvancedSettings_DefaultHidesOpenAITokenStats(t *testing.T) {
|
||||
@ -95,3 +98,64 @@ func TestGetOpsAdvancedSettings_BackfillsNewDisplayFlagsFromDefaults(t *testing.
|
||||
t.Fatalf("DisplayAlertEvents = false, want true default backfill")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetOpenAIQuotaAutoPauseSettings_ReadsDefaultsFromOpsAdvancedSettings(t *testing.T) {
|
||||
repo := newRuntimeSettingRepoStub()
|
||||
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.95,"default_threshold_7d":0.9}}`
|
||||
svc := NewSettingService(repo, &config.Config{})
|
||||
|
||||
// Warm the in-memory cache synchronously so the assertion below is deterministic.
|
||||
// GetOpenAIQuotaAutoPauseSettings is non-blocking on the hot path (returns the
|
||||
// cached value, refreshes asynchronously); for tests and startup, Warm is the
|
||||
// synchronous entry point that guarantees a populated cache.
|
||||
settings := svc.WarmOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
if settings.DefaultThreshold5h != 0.95 {
|
||||
t.Fatalf("DefaultThreshold5h = %v, want 0.95", settings.DefaultThreshold5h)
|
||||
}
|
||||
if settings.DefaultThreshold7d != 0.9 {
|
||||
t.Fatalf("DefaultThreshold7d = %v, want 0.9", settings.DefaultThreshold7d)
|
||||
}
|
||||
|
||||
// Subsequent Get must hit the warm cache and return the same value without any DB
|
||||
// access — that's the hot-path invariant.
|
||||
cached := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
if cached.DefaultThreshold5h != 0.95 || cached.DefaultThreshold7d != 0.9 {
|
||||
t.Fatalf("cached read = %+v, want {0.95, 0.9}", cached)
|
||||
}
|
||||
}
|
||||
|
||||
// Hot-path invariant: a Get with cold cache must return immediately (zero defaults)
|
||||
// rather than blocking on the DB. The async refresher will populate the cache for
|
||||
// subsequent calls.
|
||||
func TestGetOpenAIQuotaAutoPauseSettings_ColdCacheNonBlocking(t *testing.T) {
|
||||
repo := newRuntimeSettingRepoStub()
|
||||
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.7}}`
|
||||
svc := NewSettingService(repo, &config.Config{})
|
||||
|
||||
start := time.Now()
|
||||
settings := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
elapsed := time.Since(start)
|
||||
if elapsed > 50*time.Millisecond {
|
||||
t.Fatalf("cold-cache Get must be non-blocking, took %v", elapsed)
|
||||
}
|
||||
// Cold cache means we get zero defaults (the async refresh hasn't completed yet).
|
||||
if settings.DefaultThreshold5h != 0 || settings.DefaultThreshold7d != 0 {
|
||||
t.Fatalf("cold-cache Get = %+v, want zeroes", settings)
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit cache write (e.g. from UpdateOpsAdvancedSettings) must be visible on the
|
||||
// very next read without any DB roundtrip.
|
||||
func TestSetOpenAIQuotaAutoPauseSettings_VisibleImmediately(t *testing.T) {
|
||||
svc := NewSettingService(newRuntimeSettingRepoStub(), &config.Config{})
|
||||
|
||||
svc.SetOpenAIQuotaAutoPauseSettings(OpsOpenAIAccountQuotaAutoPauseSettings{
|
||||
DefaultThreshold5h: 0.88,
|
||||
DefaultThreshold7d: 0.77,
|
||||
})
|
||||
|
||||
got := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
if got.DefaultThreshold5h != 0.88 || got.DefaultThreshold7d != 0.77 {
|
||||
t.Fatalf("after Set, Get = %+v, want {0.88, 0.77}", got)
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,17 +92,23 @@ type OpsAlertRuntimeSettings struct {
|
||||
|
||||
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
|
||||
type OpsAdvancedSettings struct {
|
||||
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
||||
Aggregation OpsAggregationSettings `json:"aggregation"`
|
||||
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
|
||||
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
|
||||
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
|
||||
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
|
||||
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
|
||||
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
|
||||
DisplayAlertEvents bool `json:"display_alert_events"`
|
||||
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
|
||||
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
|
||||
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
||||
Aggregation OpsAggregationSettings `json:"aggregation"`
|
||||
OpenAIAccountQuotaAutoPause OpsOpenAIAccountQuotaAutoPauseSettings `json:"openai_account_quota_auto_pause"`
|
||||
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
|
||||
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
|
||||
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
|
||||
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
|
||||
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
|
||||
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
|
||||
DisplayAlertEvents bool `json:"display_alert_events"`
|
||||
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
|
||||
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
|
||||
}
|
||||
|
||||
type OpsOpenAIAccountQuotaAutoPauseSettings struct {
|
||||
DefaultThreshold5h float64 `json:"default_threshold_5h"`
|
||||
DefaultThreshold7d float64 `json:"default_threshold_7d"`
|
||||
}
|
||||
|
||||
type OpsDataRetentionSettings struct {
|
||||
|
||||
@ -137,6 +137,11 @@ type cachedOpenAICodexUserAgent struct {
|
||||
expiresAt int64 // unix nano
|
||||
}
|
||||
|
||||
type cachedOpenAIQuotaAutoPauseSettings struct {
|
||||
settings OpsOpenAIAccountQuotaAutoPauseSettings
|
||||
expiresAt int64
|
||||
}
|
||||
|
||||
const openAICodexUserAgentCacheTTL = 60 * time.Second
|
||||
const openAICodexUserAgentErrorTTL = 5 * time.Second
|
||||
const openAICodexUserAgentDBTimeout = 5 * time.Second
|
||||
@ -152,6 +157,12 @@ const openAIAllowCodexPluginCacheTTL = 60 * time.Second
|
||||
const openAIAllowCodexPluginErrorTTL = 5 * time.Second
|
||||
const openAIAllowCodexPluginDBTimeout = 5 * time.Second
|
||||
|
||||
const openAIQuotaAutoPauseSettingsCacheTTL = 60 * time.Second
|
||||
const openAIQuotaAutoPauseSettingsErrorTTL = 5 * time.Second
|
||||
const openAIQuotaAutoPauseSettingsDBTimeout = 5 * time.Second
|
||||
|
||||
const openAIQuotaAutoPauseSettingsRefreshKey = "openai_quota_auto_pause_settings"
|
||||
|
||||
// DefaultSubscriptionGroupReader validates group references used by default subscriptions.
|
||||
type DefaultSubscriptionGroupReader interface {
|
||||
GetByID(ctx context.Context, id int64) (*Group, error)
|
||||
@ -176,6 +187,15 @@ type SettingService struct {
|
||||
openAICodexUASF singleflight.Group
|
||||
openAIAllowCodexPluginCache atomic.Value // *cachedOpenAIAllowCodexPlugin
|
||||
openAIAllowCodexPluginSF singleflight.Group
|
||||
|
||||
// openAIQuotaAutoPauseSettingsCache holds the most recently observed quota auto-pause
|
||||
// settings. GetOpenAIQuotaAutoPauseSettings reads this atomic.Value on the request hot
|
||||
// path without ever blocking on the DB; when the cached entry expires, a background
|
||||
// goroutine refreshes it via openAIQuotaAutoPauseSettingsSF (stale-while-revalidate).
|
||||
// This per-service field also gives tests natural isolation — each SettingService
|
||||
// instance owns its own cache, no shared package-level state.
|
||||
openAIQuotaAutoPauseSettingsCache atomic.Value // *cachedOpenAIQuotaAutoPauseSettings
|
||||
openAIQuotaAutoPauseSettingsSF singleflight.Group
|
||||
}
|
||||
|
||||
// DefaultPlatformQuotaSetting 单 platform 三档限额(nil = 沿用上层;0 = 显式禁用;>0 = 上限)
|
||||
@ -2027,6 +2047,17 @@ func (s *SettingService) refreshCachedSettings(settings *SystemSettings) {
|
||||
enabled: settings.OpenAIAdvancedSchedulerEnabled,
|
||||
expiresAt: time.Now().Add(openAIAdvancedSchedulerSettingCacheTTL).UnixNano(),
|
||||
})
|
||||
// Invalidate the quota auto-pause cache and let the next read trigger a fresh load.
|
||||
// We can't know from here whether ops_advanced_settings was also touched, so be
|
||||
// defensive: store an expired entry — GetOpenAIQuotaAutoPauseSettings will serve
|
||||
// stale and kick off an async refresh, never blocking the request that follows.
|
||||
s.openAIQuotaAutoPauseSettingsSF.Forget(openAIQuotaAutoPauseSettingsRefreshKey)
|
||||
if cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); cached != nil {
|
||||
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||
settings: cached.settings,
|
||||
expiresAt: 0,
|
||||
})
|
||||
}
|
||||
if s.cfg != nil {
|
||||
s.cfg.SetTrustForwardedIPForAPIKeyACL(settings.APIKeyACLTrustForwardedIP)
|
||||
}
|
||||
@ -4448,6 +4479,106 @@ func (s *SettingService) GetClaudeCodeVersionBounds(ctx context.Context) (min, m
|
||||
return b.min, b.max
|
||||
}
|
||||
|
||||
// GetOpenAIQuotaAutoPauseSettings returns the current global default quota auto-pause
|
||||
// settings. It is invoked on the OpenAI scheduling hot path (once per request) and is
|
||||
// therefore designed to never block on the DB:
|
||||
//
|
||||
// - Fresh cached value → returned immediately.
|
||||
// - Stale or empty cache → the last known value is returned, and a background
|
||||
// goroutine refreshes the cache via singleflight (stale-while-revalidate).
|
||||
// - First call with no cache yet → zero defaults are returned and the same async
|
||||
// refresh is kicked off; the next call gets the freshly populated value.
|
||||
//
|
||||
// Callers that need the freshly persisted value synchronously (tests, post-update
|
||||
// confirmation, optional startup warm-up) should call WarmOpenAIQuotaAutoPauseSettings.
|
||||
func (s *SettingService) GetOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||
if s == nil {
|
||||
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
}
|
||||
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
|
||||
now := time.Now().UnixNano()
|
||||
if cached != nil && now < cached.expiresAt {
|
||||
return cached.settings
|
||||
}
|
||||
// Stale or unset: trigger background refresh without blocking this request.
|
||||
// singleflight.DoChan dedupes concurrent refreshes; we deliberately ignore the
|
||||
// returned channel — the result is observable via the atomic cache.
|
||||
s.openAIQuotaAutoPauseSettingsSF.DoChan(openAIQuotaAutoPauseSettingsRefreshKey, func() (any, error) {
|
||||
s.refreshOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
return nil, nil
|
||||
})
|
||||
if cached != nil {
|
||||
return cached.settings // serve stale value while revalidating
|
||||
}
|
||||
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
}
|
||||
|
||||
// WarmOpenAIQuotaAutoPauseSettings synchronously loads the quota auto-pause settings
|
||||
// into the in-memory cache. Useful for application startup (so the first request hits
|
||||
// a warm cache) and for tests that need deterministic reads immediately after
|
||||
// constructing the service.
|
||||
func (s *SettingService) WarmOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||
if s == nil {
|
||||
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
}
|
||||
s.refreshOpenAIQuotaAutoPauseSettings(ctx)
|
||||
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
|
||||
if cached == nil {
|
||||
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
}
|
||||
return cached.settings
|
||||
}
|
||||
|
||||
// refreshOpenAIQuotaAutoPauseSettings reads the latest settings from the DB and stores
|
||||
// them into the in-memory cache. On error it stores the prior value (or zero defaults
|
||||
// if nothing is cached yet) with the shorter error TTL so the next refresh comes
|
||||
// sooner. Always uses its own timeout-bounded context to keep refresh latency
|
||||
// predictable regardless of the caller.
|
||||
func (s *SettingService) refreshOpenAIQuotaAutoPauseSettings(ctx context.Context) {
|
||||
if s == nil || s.settingRepo == nil {
|
||||
return
|
||||
}
|
||||
dbCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), openAIQuotaAutoPauseSettingsDBTimeout)
|
||||
defer cancel()
|
||||
|
||||
settings := OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||
ttl := openAIQuotaAutoPauseSettingsCacheTTL
|
||||
raw, err := s.settingRepo.GetValue(dbCtx, SettingKeyOpsAdvancedSettings)
|
||||
if err == nil {
|
||||
cfg := defaultOpsAdvancedSettings()
|
||||
if strings.TrimSpace(raw) != "" {
|
||||
if jsonErr := json.Unmarshal([]byte(raw), cfg); jsonErr == nil {
|
||||
normalizeOpsAdvancedSettings(cfg)
|
||||
}
|
||||
}
|
||||
settings = cfg.OpenAIAccountQuotaAutoPause
|
||||
} else if !errors.Is(err, ErrSettingNotFound) {
|
||||
// Real error: keep serving prior value but refresh sooner.
|
||||
if prior, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); prior != nil {
|
||||
settings = prior.settings
|
||||
}
|
||||
ttl = openAIQuotaAutoPauseSettingsErrorTTL
|
||||
}
|
||||
|
||||
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||
settings: settings,
|
||||
expiresAt: time.Now().Add(ttl).UnixNano(),
|
||||
})
|
||||
}
|
||||
|
||||
// SetOpenAIQuotaAutoPauseSettings writes the given settings directly into the in-memory
|
||||
// cache. Called from settings-write code paths so that the next read reflects the new
|
||||
// value immediately, without waiting for the background refresh.
|
||||
func (s *SettingService) SetOpenAIQuotaAutoPauseSettings(settings OpsOpenAIAccountQuotaAutoPauseSettings) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||
settings: settings,
|
||||
expiresAt: time.Now().Add(openAIQuotaAutoPauseSettingsCacheTTL).UnixNano(),
|
||||
})
|
||||
}
|
||||
|
||||
// GetRectifierSettings 获取请求整流器配置
|
||||
func (s *SettingService) GetRectifierSettings(ctx context.Context) (*RectifierSettings, error) {
|
||||
value, err := s.settingRepo.GetValue(ctx, SettingKeyRectifierSettings)
|
||||
|
||||
@ -396,6 +396,46 @@ func ProvideBackupService(
|
||||
return svc
|
||||
}
|
||||
|
||||
// ProvideOpsService constructs OpsService and wires the SettingService-backed quota
|
||||
// auto-pause cache sink. Mirrors the SetCleanupReloader pattern: OpsService doesn't
|
||||
// hold a *SettingService reference, but wire injects a tiny callback so writes to
|
||||
// ops_advanced_settings immediately propagate into the scheduler hot-path cache.
|
||||
func ProvideOpsService(
|
||||
opsRepo OpsRepository,
|
||||
settingRepo SettingRepository,
|
||||
cfg *config.Config,
|
||||
accountRepo AccountRepository,
|
||||
userRepo UserRepository,
|
||||
concurrencyService *ConcurrencyService,
|
||||
gatewayService *GatewayService,
|
||||
openAIGatewayService *OpenAIGatewayService,
|
||||
geminiCompatService *GeminiMessagesCompatService,
|
||||
antigravityGatewayService *AntigravityGatewayService,
|
||||
systemLogSink *OpsSystemLogSink,
|
||||
settingService *SettingService,
|
||||
) *OpsService {
|
||||
svc := NewOpsService(
|
||||
opsRepo,
|
||||
settingRepo,
|
||||
cfg,
|
||||
accountRepo,
|
||||
userRepo,
|
||||
concurrencyService,
|
||||
gatewayService,
|
||||
openAIGatewayService,
|
||||
geminiCompatService,
|
||||
antigravityGatewayService,
|
||||
systemLogSink,
|
||||
)
|
||||
if settingService != nil {
|
||||
svc.SetOpenAIQuotaAutoPauseSettingsSink(settingService.SetOpenAIQuotaAutoPauseSettings)
|
||||
// Optional warm-up so the first scheduled request after process start observes
|
||||
// a populated cache rather than zero defaults. Best-effort, sync-bounded.
|
||||
settingService.WarmOpenAIQuotaAutoPauseSettings(context.Background())
|
||||
}
|
||||
return svc
|
||||
}
|
||||
|
||||
// ProvideSettingService wires SettingService with group reader and proxy repo.
|
||||
func ProvideSettingService(settingRepo SettingRepository, groupRepo GroupRepository, proxyRepo ProxyRepository, cfg *config.Config) *SettingService {
|
||||
svc := NewSettingService(settingRepo, cfg)
|
||||
@ -481,7 +521,7 @@ var ProviderSet = wire.NewSet(
|
||||
NewDataManagementService,
|
||||
ProvideBackupService,
|
||||
ProvideOpsSystemLogSink,
|
||||
NewOpsService,
|
||||
ProvideOpsService,
|
||||
ProvideOpsMetricsCollector,
|
||||
ProvideOpsAggregationService,
|
||||
ProvideOpsAlertEvaluatorService,
|
||||
|
||||
@ -778,9 +778,15 @@ export interface OpsAlertRuntimeSettings {
|
||||
thresholds: OpsMetricThresholds // 指标阈值配置
|
||||
}
|
||||
|
||||
export interface OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||
default_threshold_5h: number // 0~1,0 表示不启用全局默认 5h 阈值
|
||||
default_threshold_7d: number // 0~1,0 表示不启用全局默认 7d 阈值
|
||||
}
|
||||
|
||||
export interface OpsAdvancedSettings {
|
||||
data_retention: OpsDataRetentionSettings
|
||||
aggregation: OpsAggregationSettings
|
||||
openai_account_quota_auto_pause: OpsOpenAIAccountQuotaAutoPauseSettings
|
||||
ignore_count_tokens_errors: boolean
|
||||
ignore_context_canceled: boolean
|
||||
ignore_no_available_accounts: boolean
|
||||
|
||||
@ -1787,6 +1787,84 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-if="account?.platform === 'openai'"
|
||||
class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4"
|
||||
>
|
||||
<div class="space-y-2">
|
||||
<div class="flex items-center justify-between">
|
||||
<label class="input-label mb-0">{{ t('admin.accounts.autoPause5hDisabled') }}</label>
|
||||
<button
|
||||
type="button"
|
||||
@click="autoPause5hDisabled = !autoPause5hDisabled"
|
||||
:class="[
|
||||
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
|
||||
autoPause5hDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
|
||||
]"
|
||||
data-testid="auto-pause-5h-disabled"
|
||||
>
|
||||
<span
|
||||
:class="[
|
||||
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
|
||||
autoPause5hDisabled ? 'translate-x-5' : 'translate-x-0'
|
||||
]"
|
||||
/>
|
||||
</button>
|
||||
</div>
|
||||
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
|
||||
</div>
|
||||
<div>
|
||||
<label class="input-label">{{ t('admin.accounts.autoPause5hThreshold') }}</label>
|
||||
<input
|
||||
v-model.number="autoPause5hThreshold"
|
||||
type="number"
|
||||
min="0"
|
||||
max="100"
|
||||
step="0.1"
|
||||
class="input"
|
||||
:disabled="autoPause5hDisabled"
|
||||
data-testid="auto-pause-5h-threshold"
|
||||
/>
|
||||
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
|
||||
</div>
|
||||
<div class="space-y-2">
|
||||
<div class="flex items-center justify-between">
|
||||
<label class="input-label mb-0">{{ t('admin.accounts.autoPause7dDisabled') }}</label>
|
||||
<button
|
||||
type="button"
|
||||
@click="autoPause7dDisabled = !autoPause7dDisabled"
|
||||
:class="[
|
||||
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
|
||||
autoPause7dDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
|
||||
]"
|
||||
data-testid="auto-pause-7d-disabled"
|
||||
>
|
||||
<span
|
||||
:class="[
|
||||
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
|
||||
autoPause7dDisabled ? 'translate-x-5' : 'translate-x-0'
|
||||
]"
|
||||
/>
|
||||
</button>
|
||||
</div>
|
||||
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
|
||||
</div>
|
||||
<div>
|
||||
<label class="input-label">{{ t('admin.accounts.autoPause7dThreshold') }}</label>
|
||||
<input
|
||||
v-model.number="autoPause7dThreshold"
|
||||
type="number"
|
||||
min="0"
|
||||
max="100"
|
||||
step="0.1"
|
||||
class="input"
|
||||
:disabled="autoPause7dDisabled"
|
||||
data-testid="auto-pause-7d-threshold"
|
||||
/>
|
||||
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 配额控制 (Anthropic OAuth/SetupToken: 亲和 + 窗口费用 + 会话 + RPM 等) -->
|
||||
<div
|
||||
v-if="account?.platform === 'anthropic' && (account?.type === 'oauth' || account?.type === 'setup-token')"
|
||||
@ -2447,6 +2525,10 @@ const selectedErrorCodes = ref<number[]>([])
|
||||
const customErrorCodeInput = ref<number | null>(null)
|
||||
const interceptWarmupRequests = ref(false)
|
||||
const autoPauseOnExpired = ref(false)
|
||||
const autoPause5hThreshold = ref<number | null>(null)
|
||||
const autoPause7dThreshold = ref<number | null>(null)
|
||||
const autoPause5hDisabled = ref(false)
|
||||
const autoPause7dDisabled = ref(false)
|
||||
const mixedScheduling = ref(false) // For antigravity accounts: enable mixed scheduling
|
||||
const allowOverages = ref(false) // For antigravity accounts: enable AI Credits overages
|
||||
const antigravityModelRestrictionMode = ref<'whitelist' | 'mapping'>('whitelist')
|
||||
@ -2862,9 +2944,13 @@ const syncFormFromAccount = (newAccount: Account | null) => {
|
||||
// Load mixed scheduling setting (only for antigravity accounts)
|
||||
mixedScheduling.value = false
|
||||
allowOverages.value = false
|
||||
const extra = newAccount.extra as Record<string, unknown> | undefined
|
||||
mixedScheduling.value = extra?.mixed_scheduling === true
|
||||
allowOverages.value = extra?.allow_overages === true
|
||||
const extra = newAccount.extra as Record<string, unknown> | undefined
|
||||
mixedScheduling.value = extra?.mixed_scheduling === true
|
||||
allowOverages.value = extra?.allow_overages === true
|
||||
autoPause5hThreshold.value = typeof extra?.auto_pause_5h_threshold === 'number' ? extra.auto_pause_5h_threshold * 100 : null
|
||||
autoPause7dThreshold.value = typeof extra?.auto_pause_7d_threshold === 'number' ? extra.auto_pause_7d_threshold * 100 : null
|
||||
autoPause5hDisabled.value = extra?.auto_pause_5h_disabled === true
|
||||
autoPause7dDisabled.value = extra?.auto_pause_7d_disabled === true
|
||||
|
||||
// Load OpenAI passthrough toggle (OpenAI OAuth/API Key)
|
||||
openaiPassthroughEnabled.value = false
|
||||
@ -3987,9 +4073,9 @@ const handleSubmit = async () => {
|
||||
}
|
||||
|
||||
// For OpenAI OAuth/API Key accounts, handle passthrough mode in extra
|
||||
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
|
||||
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
|
||||
const newExtra: Record<string, unknown> = { ...currentExtra }
|
||||
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
|
||||
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
|
||||
const newExtra: Record<string, unknown> = { ...currentExtra }
|
||||
const hadCodexCLIOnlyEnabled = currentExtra.codex_cli_only === true
|
||||
if (props.account.type === 'oauth') {
|
||||
newExtra.openai_oauth_responses_websockets_v2_mode = openaiOAuthResponsesWebSocketV2Mode.value
|
||||
@ -4011,15 +4097,35 @@ const handleSubmit = async () => {
|
||||
} else {
|
||||
newExtra.openai_compact_mode = openAICompactMode.value
|
||||
}
|
||||
if (props.account.type === 'apikey') {
|
||||
if (props.account.type === 'apikey') {
|
||||
if (!openAITextGenerationCapabilityEnabled.value || openAIResponsesMode.value === 'auto') {
|
||||
delete newExtra.openai_responses_mode
|
||||
} else {
|
||||
newExtra.openai_responses_mode = openAIResponsesMode.value
|
||||
}
|
||||
}
|
||||
}
|
||||
if (autoPause5hThreshold.value != null && autoPause5hThreshold.value > 0) {
|
||||
newExtra.auto_pause_5h_threshold = autoPause5hThreshold.value / 100
|
||||
} else {
|
||||
delete newExtra.auto_pause_5h_threshold
|
||||
}
|
||||
if (autoPause7dThreshold.value != null && autoPause7dThreshold.value > 0) {
|
||||
newExtra.auto_pause_7d_threshold = autoPause7dThreshold.value / 100
|
||||
} else {
|
||||
delete newExtra.auto_pause_7d_threshold
|
||||
}
|
||||
if (autoPause5hDisabled.value) {
|
||||
newExtra.auto_pause_5h_disabled = true
|
||||
} else {
|
||||
delete newExtra.auto_pause_5h_disabled
|
||||
}
|
||||
if (autoPause7dDisabled.value) {
|
||||
newExtra.auto_pause_7d_disabled = true
|
||||
} else {
|
||||
delete newExtra.auto_pause_7d_disabled
|
||||
}
|
||||
|
||||
delete newExtra.codex_image_generation_bridge_enabled
|
||||
delete newExtra.codex_image_generation_bridge_enabled
|
||||
if (codexImageGenerationBridgeMode.value === 'inherit') {
|
||||
delete newExtra.codex_image_generation_bridge
|
||||
} else {
|
||||
|
||||
@ -330,6 +330,49 @@ describe('EditAccountModal', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('submits OpenAI quota auto-pause thresholds in extra', async () => {
|
||||
const account = buildAccount()
|
||||
account.extra = {
|
||||
auto_pause_5h_threshold: 0.9,
|
||||
auto_pause_7d_threshold: 0.8
|
||||
}
|
||||
updateAccountMock.mockReset()
|
||||
checkMixedChannelRiskMock.mockReset()
|
||||
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
|
||||
updateAccountMock.mockResolvedValue(account)
|
||||
|
||||
const wrapper = mountModal(account)
|
||||
|
||||
await wrapper.get('[data-testid="auto-pause-5h-threshold"]').setValue('95')
|
||||
await wrapper.get('[data-testid="auto-pause-7d-threshold"]').setValue('96')
|
||||
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
|
||||
|
||||
expect(updateAccountMock).toHaveBeenCalledTimes(1)
|
||||
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_threshold).toBe(0.95)
|
||||
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_threshold).toBe(0.96)
|
||||
})
|
||||
|
||||
it('submits OpenAI quota auto-pause disable flag in extra', async () => {
|
||||
// Toggling the per-account disable flag must persist as auto_pause_5h_disabled
|
||||
// so an admin can exempt one account from auto-pause even when a global default
|
||||
// threshold is configured (otherwise leaving the threshold blank would silently
|
||||
// fall back to the global default).
|
||||
const account = buildAccount()
|
||||
updateAccountMock.mockReset()
|
||||
checkMixedChannelRiskMock.mockReset()
|
||||
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
|
||||
updateAccountMock.mockResolvedValue(account)
|
||||
|
||||
const wrapper = mountModal(account)
|
||||
|
||||
await wrapper.get('[data-testid="auto-pause-5h-disabled"]').trigger('click')
|
||||
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
|
||||
|
||||
expect(updateAccountMock).toHaveBeenCalledTimes(1)
|
||||
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_disabled).toBe(true)
|
||||
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_disabled).toBeUndefined()
|
||||
})
|
||||
|
||||
it('keeps at least one OpenAI APIKey endpoint capability selected', async () => {
|
||||
const account = buildAccount()
|
||||
updateAccountMock.mockReset()
|
||||
|
||||
@ -3475,6 +3475,12 @@ export default {
|
||||
'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens',
|
||||
autoPauseOnExpired: 'Auto Pause On Expired',
|
||||
autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires',
|
||||
autoPause5hThreshold: '5h Usage Threshold (%)',
|
||||
autoPause7dThreshold: '7d Usage Threshold (%)',
|
||||
autoPauseThresholdHint: 'Leave empty or set 0 to use the global default threshold (configured in Ops settings); set a value to override the global default. Reaching the threshold only skips the account during scheduling and does not modify schedulable.',
|
||||
autoPause5hDisabled: 'Disable 5h auto-pause',
|
||||
autoPause7dDisabled: 'Disable 7d auto-pause',
|
||||
autoPauseDisabledHint: 'When enabled, this account is never auto-paused (even if a global default threshold is configured).',
|
||||
// Quota control (Anthropic OAuth/SetupToken only)
|
||||
quotaControl: {
|
||||
title: 'Quota Control',
|
||||
@ -5190,6 +5196,11 @@ export default {
|
||||
aggregation: 'Pre-aggregation Tasks',
|
||||
enableAggregation: 'Enable Pre-aggregation',
|
||||
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
|
||||
openaiQuotaAutoPause: 'OpenAI Account Quota Auto-pause',
|
||||
openaiQuotaAutoPauseHint: 'When an OpenAI account reaches its 5h / 7d usage threshold, the scheduler skips it automatically and resumes once the window rolls over. Per-account thresholds take precedence over this global default.',
|
||||
openaiQuotaAutoPauseDefault5h: 'Default 5h usage threshold (%)',
|
||||
openaiQuotaAutoPauseDefault7d: 'Default 7d usage threshold (%)',
|
||||
openaiQuotaAutoPauseThresholdHint: 'Value 0-100; leave blank or 0 to disable the global default threshold.',
|
||||
errorFiltering: 'Error Filtering',
|
||||
ignoreCountTokensErrors: 'Ignore count_tokens errors',
|
||||
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
|
||||
@ -5220,7 +5231,8 @@ export default {
|
||||
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
|
||||
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
|
||||
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
|
||||
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
|
||||
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100',
|
||||
openaiQuotaAutoPauseRange: 'OpenAI quota auto-pause threshold must be between 0 and 100'
|
||||
}
|
||||
},
|
||||
concurrency: {
|
||||
|
||||
@ -3613,6 +3613,12 @@ export default {
|
||||
interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token',
|
||||
autoPauseOnExpired: '过期自动暂停调度',
|
||||
autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度',
|
||||
autoPause5hThreshold: '5h 用量阈值(%)',
|
||||
autoPause7dThreshold: '7d 用量阈值(%)',
|
||||
autoPauseThresholdHint: '留空或填 0 表示使用全局默认阈值(在运维设置中配置);填具体值则覆盖全局默认。达到阈值后仅在调度时跳过账号,不修改 schedulable。',
|
||||
autoPause5hDisabled: '禁用 5h 自动暂停',
|
||||
autoPause7dDisabled: '禁用 7d 自动暂停',
|
||||
autoPauseDisabledHint: '开启后该账号永不进入自动暂停(即使全局默认阈值已配置)。',
|
||||
// Quota control (Anthropic OAuth/SetupToken only)
|
||||
quotaControl: {
|
||||
title: '配额控制',
|
||||
@ -5349,6 +5355,11 @@ export default {
|
||||
aggregation: '预聚合任务',
|
||||
enableAggregation: '启用预聚合任务',
|
||||
aggregationHint: '预聚合可提升长时间窗口查询性能',
|
||||
openaiQuotaAutoPause: 'OpenAI 账号配额自动暂停',
|
||||
openaiQuotaAutoPauseHint: '当 OpenAI 账号 5h / 7d 用量达到阈值时,调度会自动跳过该账号;窗口滚动后自动恢复。账号级阈值优先于此全局默认值。',
|
||||
openaiQuotaAutoPauseDefault5h: '默认 5h 用量阈值 (%)',
|
||||
openaiQuotaAutoPauseDefault7d: '默认 7d 用量阈值 (%)',
|
||||
openaiQuotaAutoPauseThresholdHint: '取值 0-100,留空或 0 表示不启用全局默认阈值。',
|
||||
errorFiltering: '错误过滤',
|
||||
ignoreCountTokensErrors: '忽略 count_tokens 错误',
|
||||
ignoreCountTokensErrorsHint: '启用后,count_tokens 请求的错误将不会写入错误日志。',
|
||||
@ -5380,7 +5391,8 @@ export default {
|
||||
slaMinPercentRange: 'SLA最低百分比必须在0-100之间',
|
||||
ttftP99MaxRange: 'TTFT P99最大值必须大于等于0',
|
||||
requestErrorRateMaxRange: '请求错误率最大值必须在0-100之间',
|
||||
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间'
|
||||
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间',
|
||||
openaiQuotaAutoPauseRange: 'OpenAI 配额自动暂停阈值必须在 0-100 之间'
|
||||
}
|
||||
},
|
||||
concurrency: {
|
||||
|
||||
@ -50,6 +50,10 @@ async function loadAllSettings() {
|
||||
runtimeSettings.value = runtime
|
||||
emailConfig.value = email
|
||||
advancedSettings.value = advanced
|
||||
// 兼容旧 payload:后端未返回该字段时补默认值,保证表单可绑定
|
||||
if (advancedSettings.value && !advancedSettings.value.openai_account_quota_auto_pause) {
|
||||
advancedSettings.value.openai_account_quota_auto_pause = { default_threshold_5h: 0, default_threshold_7d: 0 }
|
||||
}
|
||||
// 如果后端返回了阈值,使用后端的值;否则保持默认值
|
||||
if (thresholds && Object.keys(thresholds).length > 0) {
|
||||
metricThresholds.value = {
|
||||
@ -119,6 +123,28 @@ function removeRecipient(target: 'alert' | 'report', email: string) {
|
||||
if (idx >= 0) list.splice(idx, 1)
|
||||
}
|
||||
|
||||
// OpenAI 账号配额自动暂停:后端按 0~1 分数存储,UI 按百分比(0~100)展示
|
||||
const quotaAutoPause5hPercent = computed<number | null>({
|
||||
get() {
|
||||
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_5h
|
||||
return v && v > 0 ? Math.round(v * 1000) / 10 : null
|
||||
},
|
||||
set(val) {
|
||||
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
|
||||
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_5h = val != null && val > 0 ? val / 100 : 0
|
||||
}
|
||||
})
|
||||
const quotaAutoPause7dPercent = computed<number | null>({
|
||||
get() {
|
||||
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_7d
|
||||
return v && v > 0 ? Math.round(v * 1000) / 10 : null
|
||||
},
|
||||
set(val) {
|
||||
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
|
||||
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_7d = val != null && val > 0 ? val / 100 : 0
|
||||
}
|
||||
})
|
||||
|
||||
// 验证
|
||||
const validation = computed(() => {
|
||||
const errors: string[] = []
|
||||
@ -145,6 +171,11 @@ const validation = computed(() => {
|
||||
if (hourly_metrics_retention_days < 0 || hourly_metrics_retention_days > 365) {
|
||||
errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
|
||||
}
|
||||
|
||||
const { default_threshold_5h, default_threshold_7d } = advancedSettings.value.openai_account_quota_auto_pause
|
||||
if (default_threshold_5h < 0 || default_threshold_5h > 1 || default_threshold_7d < 0 || default_threshold_7d > 1) {
|
||||
errors.push(t('admin.ops.settings.validation.openaiQuotaAutoPauseRange'))
|
||||
}
|
||||
}
|
||||
|
||||
// 验证指标阈值
|
||||
@ -473,6 +504,40 @@ async function saveAllSettings() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- OpenAI 账号配额自动暂停(全局默认阈值) -->
|
||||
<div class="space-y-3">
|
||||
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.openaiQuotaAutoPause') }}</h5>
|
||||
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseHint') }}</p>
|
||||
|
||||
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
|
||||
<div>
|
||||
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault5h') }}</label>
|
||||
<input
|
||||
v-model.number="quotaAutoPause5hPercent"
|
||||
type="number"
|
||||
min="0"
|
||||
max="100"
|
||||
step="0.1"
|
||||
class="input"
|
||||
data-testid="ops-quota-auto-pause-5h"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault7d') }}</label>
|
||||
<input
|
||||
v-model.number="quotaAutoPause7dPercent"
|
||||
type="number"
|
||||
min="0"
|
||||
max="100"
|
||||
step="0.1"
|
||||
class="input"
|
||||
data-testid="ops-quota-auto-pause-7d"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseThresholdHint') }}</p>
|
||||
</div>
|
||||
|
||||
<!-- Error Filtering -->
|
||||
<div class="space-y-3">
|
||||
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.errorFiltering') }}</h5>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user