Merge pull request #2873 from wucm667/feat/account-quota-threshold-auto-pause
feat(account): 支持按 5h/7d 用量阈值自动暂停账号调度
This commit is contained in:
commit
f68d351158
@ -195,7 +195,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
|
|||||||
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService, tlsFingerprintProfileService, channelService, modelPricingResolver, balanceNotifyService, serviceUserPlatformQuotaRepository)
|
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService, tlsFingerprintProfileService, channelService, modelPricingResolver, balanceNotifyService, serviceUserPlatformQuotaRepository)
|
||||||
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
|
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
|
||||||
opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
|
opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
|
||||||
opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink)
|
opsService := service.ProvideOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink, settingService)
|
||||||
encryptionKey, err := payment.ProvideEncryptionKey(configConfig)
|
encryptionKey, err := payment.ProvideEncryptionKey(configConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@ -548,6 +548,17 @@ func filterSchedulerExtra(extra map[string]any) map[string]any {
|
|||||||
"openai_ws_force_http",
|
"openai_ws_force_http",
|
||||||
"openai_responses_mode",
|
"openai_responses_mode",
|
||||||
"openai_responses_supported",
|
"openai_responses_supported",
|
||||||
|
"codex_5h_used_percent",
|
||||||
|
"codex_7d_used_percent",
|
||||||
|
"codex_5h_reset_at",
|
||||||
|
"codex_7d_reset_at",
|
||||||
|
"codex_5h_reset_after_seconds",
|
||||||
|
"codex_7d_reset_after_seconds",
|
||||||
|
"codex_usage_updated_at",
|
||||||
|
"auto_pause_5h_threshold",
|
||||||
|
"auto_pause_7d_threshold",
|
||||||
|
"auto_pause_5h_disabled",
|
||||||
|
"auto_pause_7d_disabled",
|
||||||
}
|
}
|
||||||
filtered := make(map[string]any)
|
filtered := make(map[string]any)
|
||||||
for _, key := range keys {
|
for _, key := range keys {
|
||||||
|
|||||||
@ -75,3 +75,36 @@ func TestBuildSchedulerMetadataAccount_KeepsSlimGroupMembership(t *testing.T) {
|
|||||||
require.Equal(t, int64(11), got.AccountGroups[1].GroupID)
|
require.Equal(t, int64(11), got.AccountGroups[1].GroupID)
|
||||||
require.Nil(t, got.Groups)
|
require.Nil(t, got.Groups)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildSchedulerMetadataAccount_KeepsQuotaAutoPauseFields(t *testing.T) {
|
||||||
|
account := service.Account{
|
||||||
|
ID: 88,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 12.34,
|
||||||
|
"codex_7d_used_percent": 56.78,
|
||||||
|
"codex_5h_reset_at": "2026-05-29T10:00:00Z",
|
||||||
|
"codex_7d_reset_at": "2026-06-01T10:00:00Z",
|
||||||
|
"codex_5h_reset_after_seconds": 300,
|
||||||
|
"codex_7d_reset_after_seconds": 600,
|
||||||
|
"codex_usage_updated_at": "2026-05-29T09:00:00Z",
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
"auto_pause_7d_threshold": 0.96,
|
||||||
|
"auto_pause_5h_disabled": true,
|
||||||
|
"auto_pause_7d_disabled": false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := buildSchedulerMetadataAccount(account)
|
||||||
|
|
||||||
|
require.Equal(t, 12.34, got.Extra["codex_5h_used_percent"])
|
||||||
|
require.Equal(t, 56.78, got.Extra["codex_7d_used_percent"])
|
||||||
|
require.Equal(t, "2026-05-29T10:00:00Z", got.Extra["codex_5h_reset_at"])
|
||||||
|
require.Equal(t, "2026-06-01T10:00:00Z", got.Extra["codex_7d_reset_at"])
|
||||||
|
require.Equal(t, 300, got.Extra["codex_5h_reset_after_seconds"])
|
||||||
|
require.Equal(t, 600, got.Extra["codex_7d_reset_after_seconds"])
|
||||||
|
require.Equal(t, "2026-05-29T09:00:00Z", got.Extra["codex_usage_updated_at"])
|
||||||
|
require.Equal(t, 0.95, got.Extra["auto_pause_5h_threshold"])
|
||||||
|
require.Equal(t, 0.96, got.Extra["auto_pause_7d_threshold"])
|
||||||
|
require.Equal(t, true, got.Extra["auto_pause_5h_disabled"])
|
||||||
|
require.Equal(t, false, got.Extra["auto_pause_7d_disabled"])
|
||||||
|
}
|
||||||
|
|||||||
@ -370,7 +370,6 @@ func (s *defaultOpenAIAccountScheduler) selectBySessionHash(
|
|||||||
_ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash)
|
_ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
result, acquireErr := s.service.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
|
result, acquireErr := s.service.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
|
||||||
if acquireErr == nil && result != nil && result.Acquired {
|
if acquireErr == nil && result != nil && result.Acquired {
|
||||||
_ = s.service.refreshStickySessionTTL(ctx, req.GroupID, sessionHash, s.service.openAIWSSessionStickyTTL())
|
_ = s.service.refreshStickySessionTTL(ctx, req.GroupID, sessionHash, s.service.openAIWSSessionStickyTTL())
|
||||||
@ -975,6 +974,13 @@ func (s *defaultOpenAIAccountScheduler) isAccountRequestCompatible(ctx context.C
|
|||||||
if s != nil && s.service != nil && s.service.isOpenAIAccountRuntimeBlocked(account) {
|
if s != nil && s.service != nil && s.service.isOpenAIAccountRuntimeBlocked(account) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
// Quota auto-pause must be evaluated during the initial filter too. Without it the
|
||||||
|
// TopK candidate pool can be filled with paused accounts and the later fresh/DB
|
||||||
|
// rechecks won't reach healthy accounts that fell outside TopK — manifesting as
|
||||||
|
// "no available accounts" even though healthy ones exist.
|
||||||
|
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) {
|
if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -1154,6 +1160,7 @@ func (s *OpenAIGatewayService) selectAccountWithScheduler(
|
|||||||
requiredImageCapability OpenAIImagesCapability,
|
requiredImageCapability OpenAIImagesCapability,
|
||||||
requireCompact bool,
|
requireCompact bool,
|
||||||
) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) {
|
) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) {
|
||||||
|
ctx = s.withOpenAIQuotaAutoPauseContext(ctx)
|
||||||
decision := OpenAIAccountScheduleDecision{}
|
decision := OpenAIAccountScheduleDecision{}
|
||||||
scheduler := s.getOpenAIAccountScheduler(ctx)
|
scheduler := s.getOpenAIAccountScheduler(ctx)
|
||||||
if scheduler == nil {
|
if scheduler == nil {
|
||||||
|
|||||||
@ -691,6 +691,224 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_SessionStickyRateLimite
|
|||||||
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
|
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy5hThreshold(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
primary := Account{
|
||||||
|
ID: 35001,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 95.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35002, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35002), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AllowsBelow5hThreshold(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
primary := Account{
|
||||||
|
ID: 35101,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 80.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35102, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35101), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_AutoPauseBy7dThreshold(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
primary := Account{
|
||||||
|
ID: 35201,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_7d_used_percent": 95.0,
|
||||||
|
"auto_pause_7d_threshold": 0.95,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35202, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35202), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UnconfiguredThresholdKeepsLegacyBehavior(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
primary := Account{ID: 35301, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 0, Extra: map[string]any{"codex_5h_used_percent": 99.0, "codex_7d_used_percent": 99.0}}
|
||||||
|
secondary := Account{ID: 35302, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35301), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_UsesGlobalDefaultThreshold(t *testing.T) {
|
||||||
|
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
|
||||||
|
primary := Account{
|
||||||
|
ID: 35401,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 95.0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35402, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35402), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regression: a per-account explicit-disable flag exempts the account from auto-pause
|
||||||
|
// even when a global default threshold is set. Without this, "leave threshold blank"
|
||||||
|
// silently falls back to global default and admins have no way to whitelist a single
|
||||||
|
// account.
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerAccountDisableOverridesGlobalDefault(t *testing.T) {
|
||||||
|
ctx := withOpenAIQuotaAutoPauseSettings(context.Background(), OpsOpenAIAccountQuotaAutoPauseSettings{DefaultThreshold5h: 0.95})
|
||||||
|
// Account has high usage AND no per-account threshold (would normally fall back to
|
||||||
|
// the global default and get paused), but the explicit disable flag is set.
|
||||||
|
primary := Account{
|
||||||
|
ID: 35701,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 99.0,
|
||||||
|
"auto_pause_5h_disabled": true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35702, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35701), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disable is per-window: disabling only 5h must still allow 7d auto-pause to fire.
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_PerWindowDisableScoped(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
primary := Account{
|
||||||
|
ID: 35801,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 99.0,
|
||||||
|
"codex_7d_used_percent": 99.0,
|
||||||
|
"auto_pause_5h_disabled": true,
|
||||||
|
"auto_pause_7d_threshold": 0.95,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35802, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35802), account.ID, "7d auto-pause must still fire even though 5h is disabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_StaleUsageWindowResetSkipsPause(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
// Usage is over threshold but the window's reset time has already passed, so the
|
||||||
|
// cached percentage is stale (the real window rolled over) and the account must NOT
|
||||||
|
// stay paused — otherwise it could be skipped forever with no traffic to refresh it.
|
||||||
|
primary := Account{
|
||||||
|
ID: 35501,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 99.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
"codex_5h_reset_at": time.Now().Add(-time.Minute).Format(time.RFC3339),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35502, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35501), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_FreshUsageWindowStillPauses(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
// Same as above but the window has not reset yet, so the account stays paused.
|
||||||
|
primary := Account{
|
||||||
|
ID: 35601,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 99.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
"codex_5h_reset_at": time.Now().Add(time.Hour).Format(time.RFC3339),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
secondary := Account{ID: 35602, Platform: PlatformOpenAI, Type: AccountTypeAPIKey, Status: StatusActive, Schedulable: true, Concurrency: 1, Priority: 5}
|
||||||
|
svc := &OpenAIGatewayService{accountRepo: schedulerTestOpenAIAccountRepo{accounts: []Account{primary, secondary}}, cfg: &config.Config{}}
|
||||||
|
|
||||||
|
account, err := svc.SelectAccountForModelWithExclusions(ctx, nil, "", "gpt-5.1", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, account)
|
||||||
|
require.Equal(t, int64(35602), account.ID)
|
||||||
|
}
|
||||||
|
|
||||||
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_SkipsFreshlyRateLimitedSnapshotCandidate(t *testing.T) {
|
func TestOpenAIGatewayService_SelectAccountForModelWithExclusions_SkipsFreshlyRateLimitedSnapshotCandidate(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
groupID := int64(10102)
|
groupID := int64(10102)
|
||||||
@ -1238,6 +1456,85 @@ func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKFallback
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression: TopK initial filter must drop quota-auto-paused accounts. Otherwise
|
||||||
|
// the candidate pool is filled with paused accounts, healthy accounts fall outside
|
||||||
|
// TopK, and the scheduler returns "no available accounts" even though healthy ones
|
||||||
|
// exist.
|
||||||
|
func TestOpenAIGatewayService_SelectAccountWithScheduler_LoadBalanceTopKExcludesQuotaPaused(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
groupID := int64(110)
|
||||||
|
accounts := []Account{
|
||||||
|
{
|
||||||
|
ID: 37001,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 0,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"codex_5h_used_percent": 96.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: 37002,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 1,
|
||||||
|
Priority: 5,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := &config.Config{}
|
||||||
|
cfg.Gateway.OpenAIWS.LBTopK = 1 // TopK=1 makes the bug fatal: paused account would crowd out the healthy one entirely
|
||||||
|
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Priority = 0.4
|
||||||
|
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Load = 1.0
|
||||||
|
cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Queue = 1.0
|
||||||
|
|
||||||
|
concurrencyCache := schedulerTestConcurrencyCache{
|
||||||
|
loadMap: map[int64]*AccountLoadInfo{
|
||||||
|
37001: {AccountID: 37001, LoadRate: 5, WaitingCount: 0},
|
||||||
|
37002: {AccountID: 37002, LoadRate: 5, WaitingCount: 0},
|
||||||
|
},
|
||||||
|
acquireResults: map[int64]bool{
|
||||||
|
37002: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
svc := &OpenAIGatewayService{
|
||||||
|
accountRepo: schedulerTestOpenAIAccountRepo{accounts: accounts},
|
||||||
|
cache: &schedulerTestGatewayCache{},
|
||||||
|
cfg: cfg,
|
||||||
|
rateLimitService: newOpenAIAdvancedSchedulerRateLimitService("true"),
|
||||||
|
concurrencyService: NewConcurrencyService(concurrencyCache),
|
||||||
|
}
|
||||||
|
|
||||||
|
selection, decision, err := svc.SelectAccountWithScheduler(
|
||||||
|
ctx,
|
||||||
|
&groupID,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"gpt-5.1",
|
||||||
|
nil,
|
||||||
|
OpenAIUpstreamTransportAny,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, selection)
|
||||||
|
require.NotNil(t, selection.Account)
|
||||||
|
require.Equal(t, int64(37002), selection.Account.ID)
|
||||||
|
require.Equal(t, openAIAccountScheduleLayerLoadBalance, decision.Layer)
|
||||||
|
// Only the healthy account should ever enter the candidate pool; the paused one
|
||||||
|
// must be filtered out at the initial-filter stage.
|
||||||
|
require.Equal(t, 1, decision.CandidateCount)
|
||||||
|
if selection.ReleaseFunc != nil {
|
||||||
|
selection.ReleaseFunc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestOpenAIGatewayService_OpenAIAccountSchedulerMetrics(t *testing.T) {
|
func TestOpenAIGatewayService_OpenAIAccountSchedulerMetrics(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
groupID := int64(12)
|
groupID := int64(12)
|
||||||
|
|||||||
@ -1290,7 +1290,7 @@ func (s *OpenAIGatewayService) SelectAccountForModel(ctx context.Context, groupI
|
|||||||
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
|
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
|
||||||
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
|
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
|
||||||
func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*Account, error) {
|
func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*Account, error) {
|
||||||
return s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
|
return s.selectAccountForModelWithExclusions(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, 0, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// noAvailableOpenAISelectionError builds the standard "no account available" error
|
// noAvailableOpenAISelectionError builds the standard "no account available" error
|
||||||
@ -1327,6 +1327,17 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
|
|||||||
if account == nil || !account.IsOpenAI() || !account.IsSchedulableForModelWithContext(ctx, requestedModel) {
|
if account == nil || !account.IsOpenAI() || !account.IsSchedulableForModelWithContext(ctx, requestedModel) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if paused, reason := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||||
|
// Debug level: this fires per-candidate on the scheduling hot path, so Info
|
||||||
|
// would amplify into log spam once several accounts cross the threshold.
|
||||||
|
slog.Debug("account_auto_paused_by_quota",
|
||||||
|
"account_id", account.ID,
|
||||||
|
"window", reason.window,
|
||||||
|
"threshold", reason.threshold,
|
||||||
|
"utilization", reason.utilization,
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
|
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -1339,6 +1350,201 @@ func isOpenAIAccountEligibleForRequest(ctx context.Context, account *Account, re
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type openAIQuotaAutoPauseDecision struct {
|
||||||
|
window string
|
||||||
|
threshold float64
|
||||||
|
utilization float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func shouldAutoPauseOpenAIAccountByQuota(ctx context.Context, account *Account) (bool, openAIQuotaAutoPauseDecision) {
|
||||||
|
if account == nil || !account.IsOpenAI() {
|
||||||
|
return false, openAIQuotaAutoPauseDecision{}
|
||||||
|
}
|
||||||
|
// Per-account explicit-disable flags must take precedence over the global default.
|
||||||
|
// Without these, leaving the account threshold blank means "use global default",
|
||||||
|
// so an admin has no way to exempt a single account from auto-pause once a global
|
||||||
|
// default exists. The disable flag is per-window so an account can opt out of
|
||||||
|
// only 5h or only 7d auto-pause.
|
||||||
|
disabled5h := resolveAccountExtraBool(account.Extra, "auto_pause_5h_disabled")
|
||||||
|
disabled7d := resolveAccountExtraBool(account.Extra, "auto_pause_7d_disabled")
|
||||||
|
threshold5h, threshold7d := resolveOpenAIQuotaAutoPauseThresholds(ctx, account)
|
||||||
|
now := time.Now()
|
||||||
|
if !disabled5h && threshold5h > 0 {
|
||||||
|
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "5h", now); ok && utilization >= threshold5h {
|
||||||
|
return true, openAIQuotaAutoPauseDecision{window: "5h", threshold: threshold5h, utilization: utilization}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !disabled7d && threshold7d > 0 {
|
||||||
|
if utilization, ok := resolveOpenAIQuotaUtilization(account.Extra, "7d", now); ok && utilization >= threshold7d {
|
||||||
|
return true, openAIQuotaAutoPauseDecision{window: "7d", threshold: threshold7d, utilization: utilization}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, openAIQuotaAutoPauseDecision{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveAccountExtraBool reads a bool-like value from account extra, tolerating
|
||||||
|
// the few shapes JSON unmarshalling may produce (real bool, "true"/"false"
|
||||||
|
// strings, 0/1 numbers).
|
||||||
|
func resolveAccountExtraBool(extra map[string]any, key string) bool {
|
||||||
|
if len(extra) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
value, ok := extra[key]
|
||||||
|
if !ok || value == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
switch v := value.(type) {
|
||||||
|
case bool:
|
||||||
|
return v
|
||||||
|
case string:
|
||||||
|
parsed, err := strconv.ParseBool(strings.TrimSpace(v))
|
||||||
|
return err == nil && parsed
|
||||||
|
case float64:
|
||||||
|
return v != 0
|
||||||
|
case float32:
|
||||||
|
return v != 0
|
||||||
|
case int:
|
||||||
|
return v != 0
|
||||||
|
case int64:
|
||||||
|
return v != 0
|
||||||
|
case json.Number:
|
||||||
|
if i, err := v.Int64(); err == nil {
|
||||||
|
return i != 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveOpenAIQuotaAutoPauseThresholds(ctx context.Context, account *Account) (float64, float64) {
|
||||||
|
threshold5h, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_5h_threshold")
|
||||||
|
threshold7d, _ := resolveAccountExtraNumber(account.Extra, "auto_pause_7d_threshold")
|
||||||
|
threshold5h = clamp01(threshold5h)
|
||||||
|
threshold7d = clamp01(threshold7d)
|
||||||
|
if threshold5h > 0 && threshold7d > 0 {
|
||||||
|
return threshold5h, threshold7d
|
||||||
|
}
|
||||||
|
settings := openAIQuotaAutoPauseSettingsFromContext(ctx)
|
||||||
|
if threshold5h <= 0 {
|
||||||
|
threshold5h = clamp01(settings.DefaultThreshold5h)
|
||||||
|
}
|
||||||
|
if threshold7d <= 0 {
|
||||||
|
threshold7d = clamp01(settings.DefaultThreshold7d)
|
||||||
|
}
|
||||||
|
return threshold5h, threshold7d
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveAccountExtraNumber(extra map[string]any, keys ...string) (float64, bool) {
|
||||||
|
if len(extra) == 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
for _, key := range keys {
|
||||||
|
value, ok := extra[key]
|
||||||
|
if !ok || value == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch v := value.(type) {
|
||||||
|
case float64:
|
||||||
|
return v, true
|
||||||
|
case float32:
|
||||||
|
return float64(v), true
|
||||||
|
case int:
|
||||||
|
return float64(v), true
|
||||||
|
case int64:
|
||||||
|
return float64(v), true
|
||||||
|
case json.Number:
|
||||||
|
parsed, err := v.Float64()
|
||||||
|
if err == nil {
|
||||||
|
return parsed, true
|
||||||
|
}
|
||||||
|
case string:
|
||||||
|
parsed, err := strconv.ParseFloat(strings.TrimSpace(v), 64)
|
||||||
|
if err == nil {
|
||||||
|
return parsed, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveOpenAIQuotaUtilization returns the current utilization ratio (0..1) for the
|
||||||
|
// given Codex usage window. ok=false means there is no usable signal to pause on:
|
||||||
|
// either no snapshot exists, or the window has already rolled over so the cached
|
||||||
|
// percentage is stale. The stale guard matters because a paused account stops
|
||||||
|
// receiving requests, so its snapshot is never refreshed from upstream headers —
|
||||||
|
// without this check an old used_percent would keep the account paused forever even
|
||||||
|
// after the real window reset.
|
||||||
|
func resolveOpenAIQuotaUtilization(extra map[string]any, window string, now time.Time) (float64, bool) {
|
||||||
|
usedPercent := readOpenAIQuotaUsedPercent(extra, window)
|
||||||
|
if usedPercent <= 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
if openAIQuotaWindowReset(extra, window, now) {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return usedPercent / 100, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// openAIQuotaWindowReset reports whether the Codex usage window's reset time has
|
||||||
|
// already passed relative to now. It prefers the absolute codex_<window>_reset_at
|
||||||
|
// timestamp and falls back to codex_<window>_reset_after_seconds anchored at
|
||||||
|
// codex_usage_updated_at, mirroring AccountUsageService's window-progress logic.
|
||||||
|
func openAIQuotaWindowReset(extra map[string]any, window string, now time.Time) bool {
|
||||||
|
if len(extra) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if resetAtRaw, ok := extra["codex_"+window+"_reset_at"]; ok {
|
||||||
|
if resetAt, err := parseTime(fmt.Sprint(resetAtRaw)); err == nil {
|
||||||
|
return !now.Before(resetAt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resetAfter := parseExtraInt(extra["codex_"+window+"_reset_after_seconds"])
|
||||||
|
if resetAfter <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
base := now
|
||||||
|
if updatedRaw, ok := extra["codex_usage_updated_at"]; ok {
|
||||||
|
if updatedAt, err := parseTime(fmt.Sprint(updatedRaw)); err == nil {
|
||||||
|
base = updatedAt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resetAt := base.Add(time.Duration(resetAfter) * time.Second)
|
||||||
|
return !now.Before(resetAt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readOpenAIQuotaUsedPercent(extra map[string]any, window string) float64 {
|
||||||
|
if len(extra) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if value, ok := resolveAccountExtraNumber(extra, "codex_"+window+"_used_percent"); ok {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type openAIQuotaAutoPauseCtxKey struct{}
|
||||||
|
|
||||||
|
func withOpenAIQuotaAutoPauseSettings(ctx context.Context, settings OpsOpenAIAccountQuotaAutoPauseSettings) context.Context {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
|
return context.WithValue(ctx, openAIQuotaAutoPauseCtxKey{}, settings)
|
||||||
|
}
|
||||||
|
|
||||||
|
func openAIQuotaAutoPauseSettingsFromContext(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||||
|
if ctx == nil {
|
||||||
|
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
}
|
||||||
|
settings, _ := ctx.Value(openAIQuotaAutoPauseCtxKey{}).(OpsOpenAIAccountQuotaAutoPauseSettings)
|
||||||
|
return settings
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *OpenAIGatewayService) withOpenAIQuotaAutoPauseContext(ctx context.Context) context.Context {
|
||||||
|
if s == nil || s.settingService == nil {
|
||||||
|
return ctx
|
||||||
|
}
|
||||||
|
return withOpenAIQuotaAutoPauseSettings(ctx, s.settingService.GetOpenAIQuotaAutoPauseSettings(ctx))
|
||||||
|
}
|
||||||
|
|
||||||
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
|
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
|
||||||
// compact support are tried first, followed by unknown, then explicitly unsupported.
|
// compact support are tried first, followed by unknown, then explicitly unsupported.
|
||||||
// The relative order within each tier is preserved.
|
// The relative order within each tier is preserved.
|
||||||
@ -1587,7 +1793,7 @@ func (s *OpenAIGatewayService) isBetterAccount(candidate, current *Account) bool
|
|||||||
|
|
||||||
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
|
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
|
||||||
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
|
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
|
||||||
return s.selectAccountWithLoadAwareness(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, "")
|
return s.selectAccountWithLoadAwareness(s.withOpenAIQuotaAutoPauseContext(ctx), groupID, sessionHash, requestedModel, excludedIDs, false, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpenAIGatewayService) selectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, requiredCapability OpenAIEndpointCapability) (*AccountSelectionResult, error) {
|
func (s *OpenAIGatewayService) selectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, requiredCapability OpenAIEndpointCapability) (*AccountSelectionResult, error) {
|
||||||
|
|||||||
@ -48,6 +48,46 @@ func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_Hit(t *testing.T
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_QuotaAutoPausedMiss(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
groupID := int64(23)
|
||||||
|
account := Account{
|
||||||
|
ID: 77,
|
||||||
|
Platform: PlatformOpenAI,
|
||||||
|
Type: AccountTypeAPIKey,
|
||||||
|
Status: StatusActive,
|
||||||
|
Schedulable: true,
|
||||||
|
Concurrency: 2,
|
||||||
|
Extra: map[string]any{
|
||||||
|
"openai_apikey_responses_websockets_v2_enabled": true,
|
||||||
|
"codex_5h_used_percent": 96.0,
|
||||||
|
"auto_pause_5h_threshold": 0.95,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cache := &stubGatewayCache{}
|
||||||
|
store := NewOpenAIWSStateStore(cache)
|
||||||
|
cfg := newOpenAIWSV2TestConfig()
|
||||||
|
svc := &OpenAIGatewayService{
|
||||||
|
accountRepo: stubOpenAIAccountRepo{accounts: []Account{account}},
|
||||||
|
cache: cache,
|
||||||
|
cfg: cfg,
|
||||||
|
concurrencyService: NewConcurrencyService(stubConcurrencyCache{}),
|
||||||
|
openaiWSStateStore: store,
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, store.BindResponseAccount(ctx, groupID, "resp_prev_quota", account.ID, time.Hour))
|
||||||
|
|
||||||
|
selection, err := svc.SelectAccountByPreviousResponseID(ctx, &groupID, "resp_prev_quota", "gpt-5.1", nil, false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Nil(t, selection, "超过 5h 配额阈值的账号不应继续命中 previous_response_id 粘连")
|
||||||
|
|
||||||
|
// Auto-pause is transient, so the binding is preserved: the chain can resume on the
|
||||||
|
// same account once the quota window resets.
|
||||||
|
boundAccountID, getErr := store.GetResponseAccount(ctx, groupID, "resp_prev_quota")
|
||||||
|
require.NoError(t, getErr)
|
||||||
|
require.Equal(t, account.ID, boundAccountID)
|
||||||
|
}
|
||||||
|
|
||||||
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_RateLimitedMiss(t *testing.T) {
|
func TestOpenAIGatewayService_SelectAccountByPreviousResponseID_RateLimitedMiss(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
groupID := int64(23)
|
groupID := int64(23)
|
||||||
|
|||||||
@ -4060,6 +4060,13 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
|
|||||||
if !account.SupportsOpenAIEndpointCapability(requiredCapability) {
|
if !account.SupportsOpenAIEndpointCapability(requiredCapability) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
// Quota auto-pause must also gate the previous_response_id sticky path; otherwise an
|
||||||
|
// account over its 5h/7d threshold keeps serving the same response chain even though
|
||||||
|
// normal scheduling skips it. Pause is transient, so fall through to normal scheduling
|
||||||
|
// without deleting the binding (the window may reset before the next turn).
|
||||||
|
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, account); paused {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
if s.schedulerSnapshot != nil && s.accountRepo != nil {
|
if s.schedulerSnapshot != nil && s.accountRepo != nil {
|
||||||
latest, latestErr := s.accountRepo.GetByID(ctx, account.ID)
|
latest, latestErr := s.accountRepo.GetByID(ctx, account.ID)
|
||||||
if latestErr != nil || latest == nil {
|
if latestErr != nil || latest == nil {
|
||||||
@ -4076,6 +4083,9 @@ func (s *OpenAIGatewayService) selectAccountByPreviousResponseIDForCapability(
|
|||||||
if !latest.SupportsOpenAIEndpointCapability(requiredCapability) {
|
if !latest.SupportsOpenAIEndpointCapability(requiredCapability) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
if paused, _ := shouldAutoPauseOpenAIAccountByQuota(ctx, latest); paused {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
if s.isOpenAIAccountRuntimeBlocked(latest) {
|
if s.isOpenAIAccountRuntimeBlocked(latest) {
|
||||||
_ = store.DeleteResponseAccount(ctx, derefGroupID(groupID), responseID)
|
_ = store.DeleteResponseAccount(ctx, derefGroupID(groupID), responseID)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
|||||||
@ -41,6 +41,11 @@ type OpsService struct {
|
|||||||
// cleanupReloader 由 wire 在 OpsCleanupService 构造完成后通过 SetCleanupReloader 注入。
|
// cleanupReloader 由 wire 在 OpsCleanupService 构造完成后通过 SetCleanupReloader 注入。
|
||||||
// 解耦避免 OpsService -> OpsCleanupService 的硬依赖(cleanup 也读 settings,会循环)。
|
// 解耦避免 OpsService -> OpsCleanupService 的硬依赖(cleanup 也读 settings,会循环)。
|
||||||
cleanupReloader CleanupReloader
|
cleanupReloader CleanupReloader
|
||||||
|
|
||||||
|
// quotaAutoPauseSink 由 wire 注入(通常是 SettingService.SetOpenAIQuotaAutoPauseSettings)。
|
||||||
|
// UpdateOpsAdvancedSettings 写入新配置后调用,把最新的 quota auto-pause 全局默认阈值
|
||||||
|
// 立即同步到调度热路径读取的内存缓存,避免下次请求才能感知新值。
|
||||||
|
quotaAutoPauseSink func(OpsOpenAIAccountQuotaAutoPauseSettings)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CleanupReloader 由 OpsCleanupService 实现。
|
// CleanupReloader 由 OpsCleanupService 实现。
|
||||||
@ -57,6 +62,16 @@ func (s *OpsService) SetCleanupReloader(r CleanupReloader) {
|
|||||||
s.cleanupReloader = r
|
s.cleanupReloader = r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetOpenAIQuotaAutoPauseSettingsSink 由 wire 注入,把最新的 quota auto-pause 全局默认
|
||||||
|
// 阈值 push 到调度热路径读取的内存缓存。同 SetCleanupReloader 的解耦目的:避免 OpsService
|
||||||
|
// 持有 *SettingService 引入循环依赖。
|
||||||
|
func (s *OpsService) SetOpenAIQuotaAutoPauseSettingsSink(sink func(OpsOpenAIAccountQuotaAutoPauseSettings)) {
|
||||||
|
if s == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.quotaAutoPauseSink = sink
|
||||||
|
}
|
||||||
|
|
||||||
func NewOpsService(
|
func NewOpsService(
|
||||||
opsRepo OpsRepository,
|
opsRepo OpsRepository,
|
||||||
settingRepo SettingRepository,
|
settingRepo SettingRepository,
|
||||||
|
|||||||
@ -369,6 +369,7 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
|
|||||||
Aggregation: OpsAggregationSettings{
|
Aggregation: OpsAggregationSettings{
|
||||||
AggregationEnabled: false,
|
AggregationEnabled: false,
|
||||||
},
|
},
|
||||||
|
OpenAIAccountQuotaAutoPause: OpsOpenAIAccountQuotaAutoPauseSettings{},
|
||||||
IgnoreCountTokensErrors: true, // count_tokens 404 是预期行为,默认忽略
|
IgnoreCountTokensErrors: true, // count_tokens 404 是预期行为,默认忽略
|
||||||
IgnoreContextCanceled: true, // Default to true - client disconnects are not errors
|
IgnoreContextCanceled: true, // Default to true - client disconnects are not errors
|
||||||
IgnoreNoAvailableAccounts: false, // Default to false - this is a real routing issue
|
IgnoreNoAvailableAccounts: false, // Default to false - this is a real routing issue
|
||||||
@ -384,6 +385,8 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
|||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold5h)
|
||||||
|
cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d = clampOpsQuotaAutoPauseThreshold(cfg.OpenAIAccountQuotaAutoPause.DefaultThreshold7d)
|
||||||
cfg.DataRetention.CleanupSchedule = strings.TrimSpace(cfg.DataRetention.CleanupSchedule)
|
cfg.DataRetention.CleanupSchedule = strings.TrimSpace(cfg.DataRetention.CleanupSchedule)
|
||||||
if cfg.DataRetention.CleanupSchedule == "" {
|
if cfg.DataRetention.CleanupSchedule == "" {
|
||||||
cfg.DataRetention.CleanupSchedule = opsCleanupDefaultSchedule
|
cfg.DataRetention.CleanupSchedule = opsCleanupDefaultSchedule
|
||||||
@ -405,6 +408,16 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func clampOpsQuotaAutoPauseThreshold(value float64) float64 {
|
||||||
|
if value <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if value > 1 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
return errors.New("invalid config")
|
return errors.New("invalid config")
|
||||||
@ -477,6 +490,12 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
|
|||||||
if err := s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(raw)); err != nil {
|
if err := s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(raw)); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
// Push the new quota auto-pause settings straight into the in-memory cache that
|
||||||
|
// the OpenAI scheduling hot path reads, so the next request observes the new value
|
||||||
|
// without waiting for the background refresher's TTL.
|
||||||
|
if s.quotaAutoPauseSink != nil {
|
||||||
|
s.quotaAutoPauseSink(cfg.OpenAIAccountQuotaAutoPause)
|
||||||
|
}
|
||||||
|
|
||||||
// notify cleanup service to reload schedule/enabled.
|
// notify cleanup service to reload schedule/enabled.
|
||||||
if s.cleanupReloader != nil {
|
if s.cleanupReloader != nil {
|
||||||
|
|||||||
@ -4,6 +4,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/Wei-Shaw/sub2api/internal/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGetOpsAdvancedSettings_DefaultHidesOpenAITokenStats(t *testing.T) {
|
func TestGetOpsAdvancedSettings_DefaultHidesOpenAITokenStats(t *testing.T) {
|
||||||
@ -95,3 +98,64 @@ func TestGetOpsAdvancedSettings_BackfillsNewDisplayFlagsFromDefaults(t *testing.
|
|||||||
t.Fatalf("DisplayAlertEvents = false, want true default backfill")
|
t.Fatalf("DisplayAlertEvents = false, want true default backfill")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetOpenAIQuotaAutoPauseSettings_ReadsDefaultsFromOpsAdvancedSettings(t *testing.T) {
|
||||||
|
repo := newRuntimeSettingRepoStub()
|
||||||
|
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.95,"default_threshold_7d":0.9}}`
|
||||||
|
svc := NewSettingService(repo, &config.Config{})
|
||||||
|
|
||||||
|
// Warm the in-memory cache synchronously so the assertion below is deterministic.
|
||||||
|
// GetOpenAIQuotaAutoPauseSettings is non-blocking on the hot path (returns the
|
||||||
|
// cached value, refreshes asynchronously); for tests and startup, Warm is the
|
||||||
|
// synchronous entry point that guarantees a populated cache.
|
||||||
|
settings := svc.WarmOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
if settings.DefaultThreshold5h != 0.95 {
|
||||||
|
t.Fatalf("DefaultThreshold5h = %v, want 0.95", settings.DefaultThreshold5h)
|
||||||
|
}
|
||||||
|
if settings.DefaultThreshold7d != 0.9 {
|
||||||
|
t.Fatalf("DefaultThreshold7d = %v, want 0.9", settings.DefaultThreshold7d)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subsequent Get must hit the warm cache and return the same value without any DB
|
||||||
|
// access — that's the hot-path invariant.
|
||||||
|
cached := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
if cached.DefaultThreshold5h != 0.95 || cached.DefaultThreshold7d != 0.9 {
|
||||||
|
t.Fatalf("cached read = %+v, want {0.95, 0.9}", cached)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hot-path invariant: a Get with cold cache must return immediately (zero defaults)
|
||||||
|
// rather than blocking on the DB. The async refresher will populate the cache for
|
||||||
|
// subsequent calls.
|
||||||
|
func TestGetOpenAIQuotaAutoPauseSettings_ColdCacheNonBlocking(t *testing.T) {
|
||||||
|
repo := newRuntimeSettingRepoStub()
|
||||||
|
repo.values[SettingKeyOpsAdvancedSettings] = `{"openai_account_quota_auto_pause":{"default_threshold_5h":0.7}}`
|
||||||
|
svc := NewSettingService(repo, &config.Config{})
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
settings := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if elapsed > 50*time.Millisecond {
|
||||||
|
t.Fatalf("cold-cache Get must be non-blocking, took %v", elapsed)
|
||||||
|
}
|
||||||
|
// Cold cache means we get zero defaults (the async refresh hasn't completed yet).
|
||||||
|
if settings.DefaultThreshold5h != 0 || settings.DefaultThreshold7d != 0 {
|
||||||
|
t.Fatalf("cold-cache Get = %+v, want zeroes", settings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Explicit cache write (e.g. from UpdateOpsAdvancedSettings) must be visible on the
|
||||||
|
// very next read without any DB roundtrip.
|
||||||
|
func TestSetOpenAIQuotaAutoPauseSettings_VisibleImmediately(t *testing.T) {
|
||||||
|
svc := NewSettingService(newRuntimeSettingRepoStub(), &config.Config{})
|
||||||
|
|
||||||
|
svc.SetOpenAIQuotaAutoPauseSettings(OpsOpenAIAccountQuotaAutoPauseSettings{
|
||||||
|
DefaultThreshold5h: 0.88,
|
||||||
|
DefaultThreshold7d: 0.77,
|
||||||
|
})
|
||||||
|
|
||||||
|
got := svc.GetOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
if got.DefaultThreshold5h != 0.88 || got.DefaultThreshold7d != 0.77 {
|
||||||
|
t.Fatalf("after Set, Get = %+v, want {0.88, 0.77}", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -92,17 +92,23 @@ type OpsAlertRuntimeSettings struct {
|
|||||||
|
|
||||||
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
|
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
|
||||||
type OpsAdvancedSettings struct {
|
type OpsAdvancedSettings struct {
|
||||||
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
||||||
Aggregation OpsAggregationSettings `json:"aggregation"`
|
Aggregation OpsAggregationSettings `json:"aggregation"`
|
||||||
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
|
OpenAIAccountQuotaAutoPause OpsOpenAIAccountQuotaAutoPauseSettings `json:"openai_account_quota_auto_pause"`
|
||||||
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
|
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
|
||||||
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
|
IgnoreContextCanceled bool `json:"ignore_context_canceled"`
|
||||||
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
|
IgnoreNoAvailableAccounts bool `json:"ignore_no_available_accounts"`
|
||||||
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
|
IgnoreInvalidApiKeyErrors bool `json:"ignore_invalid_api_key_errors"`
|
||||||
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
|
IgnoreInsufficientBalanceErrors bool `json:"ignore_insufficient_balance_errors"`
|
||||||
DisplayAlertEvents bool `json:"display_alert_events"`
|
DisplayOpenAITokenStats bool `json:"display_openai_token_stats"`
|
||||||
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
|
DisplayAlertEvents bool `json:"display_alert_events"`
|
||||||
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
|
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
|
||||||
|
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpsOpenAIAccountQuotaAutoPauseSettings struct {
|
||||||
|
DefaultThreshold5h float64 `json:"default_threshold_5h"`
|
||||||
|
DefaultThreshold7d float64 `json:"default_threshold_7d"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type OpsDataRetentionSettings struct {
|
type OpsDataRetentionSettings struct {
|
||||||
|
|||||||
@ -137,6 +137,11 @@ type cachedOpenAICodexUserAgent struct {
|
|||||||
expiresAt int64 // unix nano
|
expiresAt int64 // unix nano
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type cachedOpenAIQuotaAutoPauseSettings struct {
|
||||||
|
settings OpsOpenAIAccountQuotaAutoPauseSettings
|
||||||
|
expiresAt int64
|
||||||
|
}
|
||||||
|
|
||||||
const openAICodexUserAgentCacheTTL = 60 * time.Second
|
const openAICodexUserAgentCacheTTL = 60 * time.Second
|
||||||
const openAICodexUserAgentErrorTTL = 5 * time.Second
|
const openAICodexUserAgentErrorTTL = 5 * time.Second
|
||||||
const openAICodexUserAgentDBTimeout = 5 * time.Second
|
const openAICodexUserAgentDBTimeout = 5 * time.Second
|
||||||
@ -152,6 +157,12 @@ const openAIAllowCodexPluginCacheTTL = 60 * time.Second
|
|||||||
const openAIAllowCodexPluginErrorTTL = 5 * time.Second
|
const openAIAllowCodexPluginErrorTTL = 5 * time.Second
|
||||||
const openAIAllowCodexPluginDBTimeout = 5 * time.Second
|
const openAIAllowCodexPluginDBTimeout = 5 * time.Second
|
||||||
|
|
||||||
|
const openAIQuotaAutoPauseSettingsCacheTTL = 60 * time.Second
|
||||||
|
const openAIQuotaAutoPauseSettingsErrorTTL = 5 * time.Second
|
||||||
|
const openAIQuotaAutoPauseSettingsDBTimeout = 5 * time.Second
|
||||||
|
|
||||||
|
const openAIQuotaAutoPauseSettingsRefreshKey = "openai_quota_auto_pause_settings"
|
||||||
|
|
||||||
// DefaultSubscriptionGroupReader validates group references used by default subscriptions.
|
// DefaultSubscriptionGroupReader validates group references used by default subscriptions.
|
||||||
type DefaultSubscriptionGroupReader interface {
|
type DefaultSubscriptionGroupReader interface {
|
||||||
GetByID(ctx context.Context, id int64) (*Group, error)
|
GetByID(ctx context.Context, id int64) (*Group, error)
|
||||||
@ -176,6 +187,15 @@ type SettingService struct {
|
|||||||
openAICodexUASF singleflight.Group
|
openAICodexUASF singleflight.Group
|
||||||
openAIAllowCodexPluginCache atomic.Value // *cachedOpenAIAllowCodexPlugin
|
openAIAllowCodexPluginCache atomic.Value // *cachedOpenAIAllowCodexPlugin
|
||||||
openAIAllowCodexPluginSF singleflight.Group
|
openAIAllowCodexPluginSF singleflight.Group
|
||||||
|
|
||||||
|
// openAIQuotaAutoPauseSettingsCache holds the most recently observed quota auto-pause
|
||||||
|
// settings. GetOpenAIQuotaAutoPauseSettings reads this atomic.Value on the request hot
|
||||||
|
// path without ever blocking on the DB; when the cached entry expires, a background
|
||||||
|
// goroutine refreshes it via openAIQuotaAutoPauseSettingsSF (stale-while-revalidate).
|
||||||
|
// This per-service field also gives tests natural isolation — each SettingService
|
||||||
|
// instance owns its own cache, no shared package-level state.
|
||||||
|
openAIQuotaAutoPauseSettingsCache atomic.Value // *cachedOpenAIQuotaAutoPauseSettings
|
||||||
|
openAIQuotaAutoPauseSettingsSF singleflight.Group
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultPlatformQuotaSetting 单 platform 三档限额(nil = 沿用上层;0 = 显式禁用;>0 = 上限)
|
// DefaultPlatformQuotaSetting 单 platform 三档限额(nil = 沿用上层;0 = 显式禁用;>0 = 上限)
|
||||||
@ -2027,6 +2047,17 @@ func (s *SettingService) refreshCachedSettings(settings *SystemSettings) {
|
|||||||
enabled: settings.OpenAIAdvancedSchedulerEnabled,
|
enabled: settings.OpenAIAdvancedSchedulerEnabled,
|
||||||
expiresAt: time.Now().Add(openAIAdvancedSchedulerSettingCacheTTL).UnixNano(),
|
expiresAt: time.Now().Add(openAIAdvancedSchedulerSettingCacheTTL).UnixNano(),
|
||||||
})
|
})
|
||||||
|
// Invalidate the quota auto-pause cache and let the next read trigger a fresh load.
|
||||||
|
// We can't know from here whether ops_advanced_settings was also touched, so be
|
||||||
|
// defensive: store an expired entry — GetOpenAIQuotaAutoPauseSettings will serve
|
||||||
|
// stale and kick off an async refresh, never blocking the request that follows.
|
||||||
|
s.openAIQuotaAutoPauseSettingsSF.Forget(openAIQuotaAutoPauseSettingsRefreshKey)
|
||||||
|
if cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); cached != nil {
|
||||||
|
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||||
|
settings: cached.settings,
|
||||||
|
expiresAt: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
if s.cfg != nil {
|
if s.cfg != nil {
|
||||||
s.cfg.SetTrustForwardedIPForAPIKeyACL(settings.APIKeyACLTrustForwardedIP)
|
s.cfg.SetTrustForwardedIPForAPIKeyACL(settings.APIKeyACLTrustForwardedIP)
|
||||||
}
|
}
|
||||||
@ -4448,6 +4479,106 @@ func (s *SettingService) GetClaudeCodeVersionBounds(ctx context.Context) (min, m
|
|||||||
return b.min, b.max
|
return b.min, b.max
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetOpenAIQuotaAutoPauseSettings returns the current global default quota auto-pause
|
||||||
|
// settings. It is invoked on the OpenAI scheduling hot path (once per request) and is
|
||||||
|
// therefore designed to never block on the DB:
|
||||||
|
//
|
||||||
|
// - Fresh cached value → returned immediately.
|
||||||
|
// - Stale or empty cache → the last known value is returned, and a background
|
||||||
|
// goroutine refreshes the cache via singleflight (stale-while-revalidate).
|
||||||
|
// - First call with no cache yet → zero defaults are returned and the same async
|
||||||
|
// refresh is kicked off; the next call gets the freshly populated value.
|
||||||
|
//
|
||||||
|
// Callers that need the freshly persisted value synchronously (tests, post-update
|
||||||
|
// confirmation, optional startup warm-up) should call WarmOpenAIQuotaAutoPauseSettings.
|
||||||
|
func (s *SettingService) GetOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||||
|
if s == nil {
|
||||||
|
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
}
|
||||||
|
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
|
||||||
|
now := time.Now().UnixNano()
|
||||||
|
if cached != nil && now < cached.expiresAt {
|
||||||
|
return cached.settings
|
||||||
|
}
|
||||||
|
// Stale or unset: trigger background refresh without blocking this request.
|
||||||
|
// singleflight.DoChan dedupes concurrent refreshes; we deliberately ignore the
|
||||||
|
// returned channel — the result is observable via the atomic cache.
|
||||||
|
s.openAIQuotaAutoPauseSettingsSF.DoChan(openAIQuotaAutoPauseSettingsRefreshKey, func() (any, error) {
|
||||||
|
s.refreshOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
return nil, nil
|
||||||
|
})
|
||||||
|
if cached != nil {
|
||||||
|
return cached.settings // serve stale value while revalidating
|
||||||
|
}
|
||||||
|
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WarmOpenAIQuotaAutoPauseSettings synchronously loads the quota auto-pause settings
|
||||||
|
// into the in-memory cache. Useful for application startup (so the first request hits
|
||||||
|
// a warm cache) and for tests that need deterministic reads immediately after
|
||||||
|
// constructing the service.
|
||||||
|
func (s *SettingService) WarmOpenAIQuotaAutoPauseSettings(ctx context.Context) OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||||
|
if s == nil {
|
||||||
|
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
}
|
||||||
|
s.refreshOpenAIQuotaAutoPauseSettings(ctx)
|
||||||
|
cached, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings)
|
||||||
|
if cached == nil {
|
||||||
|
return OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
}
|
||||||
|
return cached.settings
|
||||||
|
}
|
||||||
|
|
||||||
|
// refreshOpenAIQuotaAutoPauseSettings reads the latest settings from the DB and stores
|
||||||
|
// them into the in-memory cache. On error it stores the prior value (or zero defaults
|
||||||
|
// if nothing is cached yet) with the shorter error TTL so the next refresh comes
|
||||||
|
// sooner. Always uses its own timeout-bounded context to keep refresh latency
|
||||||
|
// predictable regardless of the caller.
|
||||||
|
func (s *SettingService) refreshOpenAIQuotaAutoPauseSettings(ctx context.Context) {
|
||||||
|
if s == nil || s.settingRepo == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dbCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), openAIQuotaAutoPauseSettingsDBTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
settings := OpsOpenAIAccountQuotaAutoPauseSettings{}
|
||||||
|
ttl := openAIQuotaAutoPauseSettingsCacheTTL
|
||||||
|
raw, err := s.settingRepo.GetValue(dbCtx, SettingKeyOpsAdvancedSettings)
|
||||||
|
if err == nil {
|
||||||
|
cfg := defaultOpsAdvancedSettings()
|
||||||
|
if strings.TrimSpace(raw) != "" {
|
||||||
|
if jsonErr := json.Unmarshal([]byte(raw), cfg); jsonErr == nil {
|
||||||
|
normalizeOpsAdvancedSettings(cfg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
settings = cfg.OpenAIAccountQuotaAutoPause
|
||||||
|
} else if !errors.Is(err, ErrSettingNotFound) {
|
||||||
|
// Real error: keep serving prior value but refresh sooner.
|
||||||
|
if prior, _ := s.openAIQuotaAutoPauseSettingsCache.Load().(*cachedOpenAIQuotaAutoPauseSettings); prior != nil {
|
||||||
|
settings = prior.settings
|
||||||
|
}
|
||||||
|
ttl = openAIQuotaAutoPauseSettingsErrorTTL
|
||||||
|
}
|
||||||
|
|
||||||
|
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||||
|
settings: settings,
|
||||||
|
expiresAt: time.Now().Add(ttl).UnixNano(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetOpenAIQuotaAutoPauseSettings writes the given settings directly into the in-memory
|
||||||
|
// cache. Called from settings-write code paths so that the next read reflects the new
|
||||||
|
// value immediately, without waiting for the background refresh.
|
||||||
|
func (s *SettingService) SetOpenAIQuotaAutoPauseSettings(settings OpsOpenAIAccountQuotaAutoPauseSettings) {
|
||||||
|
if s == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.openAIQuotaAutoPauseSettingsCache.Store(&cachedOpenAIQuotaAutoPauseSettings{
|
||||||
|
settings: settings,
|
||||||
|
expiresAt: time.Now().Add(openAIQuotaAutoPauseSettingsCacheTTL).UnixNano(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// GetRectifierSettings 获取请求整流器配置
|
// GetRectifierSettings 获取请求整流器配置
|
||||||
func (s *SettingService) GetRectifierSettings(ctx context.Context) (*RectifierSettings, error) {
|
func (s *SettingService) GetRectifierSettings(ctx context.Context) (*RectifierSettings, error) {
|
||||||
value, err := s.settingRepo.GetValue(ctx, SettingKeyRectifierSettings)
|
value, err := s.settingRepo.GetValue(ctx, SettingKeyRectifierSettings)
|
||||||
|
|||||||
@ -396,6 +396,46 @@ func ProvideBackupService(
|
|||||||
return svc
|
return svc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ProvideOpsService constructs OpsService and wires the SettingService-backed quota
|
||||||
|
// auto-pause cache sink. Mirrors the SetCleanupReloader pattern: OpsService doesn't
|
||||||
|
// hold a *SettingService reference, but wire injects a tiny callback so writes to
|
||||||
|
// ops_advanced_settings immediately propagate into the scheduler hot-path cache.
|
||||||
|
func ProvideOpsService(
|
||||||
|
opsRepo OpsRepository,
|
||||||
|
settingRepo SettingRepository,
|
||||||
|
cfg *config.Config,
|
||||||
|
accountRepo AccountRepository,
|
||||||
|
userRepo UserRepository,
|
||||||
|
concurrencyService *ConcurrencyService,
|
||||||
|
gatewayService *GatewayService,
|
||||||
|
openAIGatewayService *OpenAIGatewayService,
|
||||||
|
geminiCompatService *GeminiMessagesCompatService,
|
||||||
|
antigravityGatewayService *AntigravityGatewayService,
|
||||||
|
systemLogSink *OpsSystemLogSink,
|
||||||
|
settingService *SettingService,
|
||||||
|
) *OpsService {
|
||||||
|
svc := NewOpsService(
|
||||||
|
opsRepo,
|
||||||
|
settingRepo,
|
||||||
|
cfg,
|
||||||
|
accountRepo,
|
||||||
|
userRepo,
|
||||||
|
concurrencyService,
|
||||||
|
gatewayService,
|
||||||
|
openAIGatewayService,
|
||||||
|
geminiCompatService,
|
||||||
|
antigravityGatewayService,
|
||||||
|
systemLogSink,
|
||||||
|
)
|
||||||
|
if settingService != nil {
|
||||||
|
svc.SetOpenAIQuotaAutoPauseSettingsSink(settingService.SetOpenAIQuotaAutoPauseSettings)
|
||||||
|
// Optional warm-up so the first scheduled request after process start observes
|
||||||
|
// a populated cache rather than zero defaults. Best-effort, sync-bounded.
|
||||||
|
settingService.WarmOpenAIQuotaAutoPauseSettings(context.Background())
|
||||||
|
}
|
||||||
|
return svc
|
||||||
|
}
|
||||||
|
|
||||||
// ProvideSettingService wires SettingService with group reader and proxy repo.
|
// ProvideSettingService wires SettingService with group reader and proxy repo.
|
||||||
func ProvideSettingService(settingRepo SettingRepository, groupRepo GroupRepository, proxyRepo ProxyRepository, cfg *config.Config) *SettingService {
|
func ProvideSettingService(settingRepo SettingRepository, groupRepo GroupRepository, proxyRepo ProxyRepository, cfg *config.Config) *SettingService {
|
||||||
svc := NewSettingService(settingRepo, cfg)
|
svc := NewSettingService(settingRepo, cfg)
|
||||||
@ -481,7 +521,7 @@ var ProviderSet = wire.NewSet(
|
|||||||
NewDataManagementService,
|
NewDataManagementService,
|
||||||
ProvideBackupService,
|
ProvideBackupService,
|
||||||
ProvideOpsSystemLogSink,
|
ProvideOpsSystemLogSink,
|
||||||
NewOpsService,
|
ProvideOpsService,
|
||||||
ProvideOpsMetricsCollector,
|
ProvideOpsMetricsCollector,
|
||||||
ProvideOpsAggregationService,
|
ProvideOpsAggregationService,
|
||||||
ProvideOpsAlertEvaluatorService,
|
ProvideOpsAlertEvaluatorService,
|
||||||
|
|||||||
@ -778,9 +778,15 @@ export interface OpsAlertRuntimeSettings {
|
|||||||
thresholds: OpsMetricThresholds // 指标阈值配置
|
thresholds: OpsMetricThresholds // 指标阈值配置
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface OpsOpenAIAccountQuotaAutoPauseSettings {
|
||||||
|
default_threshold_5h: number // 0~1,0 表示不启用全局默认 5h 阈值
|
||||||
|
default_threshold_7d: number // 0~1,0 表示不启用全局默认 7d 阈值
|
||||||
|
}
|
||||||
|
|
||||||
export interface OpsAdvancedSettings {
|
export interface OpsAdvancedSettings {
|
||||||
data_retention: OpsDataRetentionSettings
|
data_retention: OpsDataRetentionSettings
|
||||||
aggregation: OpsAggregationSettings
|
aggregation: OpsAggregationSettings
|
||||||
|
openai_account_quota_auto_pause: OpsOpenAIAccountQuotaAutoPauseSettings
|
||||||
ignore_count_tokens_errors: boolean
|
ignore_count_tokens_errors: boolean
|
||||||
ignore_context_canceled: boolean
|
ignore_context_canceled: boolean
|
||||||
ignore_no_available_accounts: boolean
|
ignore_no_available_accounts: boolean
|
||||||
|
|||||||
@ -1787,6 +1787,84 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
v-if="account?.platform === 'openai'"
|
||||||
|
class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4"
|
||||||
|
>
|
||||||
|
<div class="space-y-2">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<label class="input-label mb-0">{{ t('admin.accounts.autoPause5hDisabled') }}</label>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
@click="autoPause5hDisabled = !autoPause5hDisabled"
|
||||||
|
:class="[
|
||||||
|
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
|
||||||
|
autoPause5hDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
|
||||||
|
]"
|
||||||
|
data-testid="auto-pause-5h-disabled"
|
||||||
|
>
|
||||||
|
<span
|
||||||
|
:class="[
|
||||||
|
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
|
||||||
|
autoPause5hDisabled ? 'translate-x-5' : 'translate-x-0'
|
||||||
|
]"
|
||||||
|
/>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="input-label">{{ t('admin.accounts.autoPause5hThreshold') }}</label>
|
||||||
|
<input
|
||||||
|
v-model.number="autoPause5hThreshold"
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
max="100"
|
||||||
|
step="0.1"
|
||||||
|
class="input"
|
||||||
|
:disabled="autoPause5hDisabled"
|
||||||
|
data-testid="auto-pause-5h-threshold"
|
||||||
|
/>
|
||||||
|
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
|
||||||
|
</div>
|
||||||
|
<div class="space-y-2">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<label class="input-label mb-0">{{ t('admin.accounts.autoPause7dDisabled') }}</label>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
@click="autoPause7dDisabled = !autoPause7dDisabled"
|
||||||
|
:class="[
|
||||||
|
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
|
||||||
|
autoPause7dDisabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
|
||||||
|
]"
|
||||||
|
data-testid="auto-pause-7d-disabled"
|
||||||
|
>
|
||||||
|
<span
|
||||||
|
:class="[
|
||||||
|
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
|
||||||
|
autoPause7dDisabled ? 'translate-x-5' : 'translate-x-0'
|
||||||
|
]"
|
||||||
|
/>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p class="input-hint">{{ t('admin.accounts.autoPauseDisabledHint') }}</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="input-label">{{ t('admin.accounts.autoPause7dThreshold') }}</label>
|
||||||
|
<input
|
||||||
|
v-model.number="autoPause7dThreshold"
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
max="100"
|
||||||
|
step="0.1"
|
||||||
|
class="input"
|
||||||
|
:disabled="autoPause7dDisabled"
|
||||||
|
data-testid="auto-pause-7d-threshold"
|
||||||
|
/>
|
||||||
|
<p class="input-hint">{{ t('admin.accounts.autoPauseThresholdHint') }}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- 配额控制 (Anthropic OAuth/SetupToken: 亲和 + 窗口费用 + 会话 + RPM 等) -->
|
<!-- 配额控制 (Anthropic OAuth/SetupToken: 亲和 + 窗口费用 + 会话 + RPM 等) -->
|
||||||
<div
|
<div
|
||||||
v-if="account?.platform === 'anthropic' && (account?.type === 'oauth' || account?.type === 'setup-token')"
|
v-if="account?.platform === 'anthropic' && (account?.type === 'oauth' || account?.type === 'setup-token')"
|
||||||
@ -2447,6 +2525,10 @@ const selectedErrorCodes = ref<number[]>([])
|
|||||||
const customErrorCodeInput = ref<number | null>(null)
|
const customErrorCodeInput = ref<number | null>(null)
|
||||||
const interceptWarmupRequests = ref(false)
|
const interceptWarmupRequests = ref(false)
|
||||||
const autoPauseOnExpired = ref(false)
|
const autoPauseOnExpired = ref(false)
|
||||||
|
const autoPause5hThreshold = ref<number | null>(null)
|
||||||
|
const autoPause7dThreshold = ref<number | null>(null)
|
||||||
|
const autoPause5hDisabled = ref(false)
|
||||||
|
const autoPause7dDisabled = ref(false)
|
||||||
const mixedScheduling = ref(false) // For antigravity accounts: enable mixed scheduling
|
const mixedScheduling = ref(false) // For antigravity accounts: enable mixed scheduling
|
||||||
const allowOverages = ref(false) // For antigravity accounts: enable AI Credits overages
|
const allowOverages = ref(false) // For antigravity accounts: enable AI Credits overages
|
||||||
const antigravityModelRestrictionMode = ref<'whitelist' | 'mapping'>('whitelist')
|
const antigravityModelRestrictionMode = ref<'whitelist' | 'mapping'>('whitelist')
|
||||||
@ -2862,9 +2944,13 @@ const syncFormFromAccount = (newAccount: Account | null) => {
|
|||||||
// Load mixed scheduling setting (only for antigravity accounts)
|
// Load mixed scheduling setting (only for antigravity accounts)
|
||||||
mixedScheduling.value = false
|
mixedScheduling.value = false
|
||||||
allowOverages.value = false
|
allowOverages.value = false
|
||||||
const extra = newAccount.extra as Record<string, unknown> | undefined
|
const extra = newAccount.extra as Record<string, unknown> | undefined
|
||||||
mixedScheduling.value = extra?.mixed_scheduling === true
|
mixedScheduling.value = extra?.mixed_scheduling === true
|
||||||
allowOverages.value = extra?.allow_overages === true
|
allowOverages.value = extra?.allow_overages === true
|
||||||
|
autoPause5hThreshold.value = typeof extra?.auto_pause_5h_threshold === 'number' ? extra.auto_pause_5h_threshold * 100 : null
|
||||||
|
autoPause7dThreshold.value = typeof extra?.auto_pause_7d_threshold === 'number' ? extra.auto_pause_7d_threshold * 100 : null
|
||||||
|
autoPause5hDisabled.value = extra?.auto_pause_5h_disabled === true
|
||||||
|
autoPause7dDisabled.value = extra?.auto_pause_7d_disabled === true
|
||||||
|
|
||||||
// Load OpenAI passthrough toggle (OpenAI OAuth/API Key)
|
// Load OpenAI passthrough toggle (OpenAI OAuth/API Key)
|
||||||
openaiPassthroughEnabled.value = false
|
openaiPassthroughEnabled.value = false
|
||||||
@ -3987,9 +4073,9 @@ const handleSubmit = async () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For OpenAI OAuth/API Key accounts, handle passthrough mode in extra
|
// For OpenAI OAuth/API Key accounts, handle passthrough mode in extra
|
||||||
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
|
if (props.account.platform === 'openai' && (props.account.type === 'oauth' || props.account.type === 'apikey')) {
|
||||||
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
|
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
|
||||||
const newExtra: Record<string, unknown> = { ...currentExtra }
|
const newExtra: Record<string, unknown> = { ...currentExtra }
|
||||||
const hadCodexCLIOnlyEnabled = currentExtra.codex_cli_only === true
|
const hadCodexCLIOnlyEnabled = currentExtra.codex_cli_only === true
|
||||||
if (props.account.type === 'oauth') {
|
if (props.account.type === 'oauth') {
|
||||||
newExtra.openai_oauth_responses_websockets_v2_mode = openaiOAuthResponsesWebSocketV2Mode.value
|
newExtra.openai_oauth_responses_websockets_v2_mode = openaiOAuthResponsesWebSocketV2Mode.value
|
||||||
@ -4011,15 +4097,35 @@ const handleSubmit = async () => {
|
|||||||
} else {
|
} else {
|
||||||
newExtra.openai_compact_mode = openAICompactMode.value
|
newExtra.openai_compact_mode = openAICompactMode.value
|
||||||
}
|
}
|
||||||
if (props.account.type === 'apikey') {
|
if (props.account.type === 'apikey') {
|
||||||
if (!openAITextGenerationCapabilityEnabled.value || openAIResponsesMode.value === 'auto') {
|
if (!openAITextGenerationCapabilityEnabled.value || openAIResponsesMode.value === 'auto') {
|
||||||
delete newExtra.openai_responses_mode
|
delete newExtra.openai_responses_mode
|
||||||
} else {
|
} else {
|
||||||
newExtra.openai_responses_mode = openAIResponsesMode.value
|
newExtra.openai_responses_mode = openAIResponsesMode.value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (autoPause5hThreshold.value != null && autoPause5hThreshold.value > 0) {
|
||||||
|
newExtra.auto_pause_5h_threshold = autoPause5hThreshold.value / 100
|
||||||
|
} else {
|
||||||
|
delete newExtra.auto_pause_5h_threshold
|
||||||
|
}
|
||||||
|
if (autoPause7dThreshold.value != null && autoPause7dThreshold.value > 0) {
|
||||||
|
newExtra.auto_pause_7d_threshold = autoPause7dThreshold.value / 100
|
||||||
|
} else {
|
||||||
|
delete newExtra.auto_pause_7d_threshold
|
||||||
|
}
|
||||||
|
if (autoPause5hDisabled.value) {
|
||||||
|
newExtra.auto_pause_5h_disabled = true
|
||||||
|
} else {
|
||||||
|
delete newExtra.auto_pause_5h_disabled
|
||||||
|
}
|
||||||
|
if (autoPause7dDisabled.value) {
|
||||||
|
newExtra.auto_pause_7d_disabled = true
|
||||||
|
} else {
|
||||||
|
delete newExtra.auto_pause_7d_disabled
|
||||||
|
}
|
||||||
|
|
||||||
delete newExtra.codex_image_generation_bridge_enabled
|
delete newExtra.codex_image_generation_bridge_enabled
|
||||||
if (codexImageGenerationBridgeMode.value === 'inherit') {
|
if (codexImageGenerationBridgeMode.value === 'inherit') {
|
||||||
delete newExtra.codex_image_generation_bridge
|
delete newExtra.codex_image_generation_bridge
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -330,6 +330,49 @@ describe('EditAccountModal', () => {
|
|||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('submits OpenAI quota auto-pause thresholds in extra', async () => {
|
||||||
|
const account = buildAccount()
|
||||||
|
account.extra = {
|
||||||
|
auto_pause_5h_threshold: 0.9,
|
||||||
|
auto_pause_7d_threshold: 0.8
|
||||||
|
}
|
||||||
|
updateAccountMock.mockReset()
|
||||||
|
checkMixedChannelRiskMock.mockReset()
|
||||||
|
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
|
||||||
|
updateAccountMock.mockResolvedValue(account)
|
||||||
|
|
||||||
|
const wrapper = mountModal(account)
|
||||||
|
|
||||||
|
await wrapper.get('[data-testid="auto-pause-5h-threshold"]').setValue('95')
|
||||||
|
await wrapper.get('[data-testid="auto-pause-7d-threshold"]').setValue('96')
|
||||||
|
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
|
||||||
|
|
||||||
|
expect(updateAccountMock).toHaveBeenCalledTimes(1)
|
||||||
|
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_threshold).toBe(0.95)
|
||||||
|
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_threshold).toBe(0.96)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('submits OpenAI quota auto-pause disable flag in extra', async () => {
|
||||||
|
// Toggling the per-account disable flag must persist as auto_pause_5h_disabled
|
||||||
|
// so an admin can exempt one account from auto-pause even when a global default
|
||||||
|
// threshold is configured (otherwise leaving the threshold blank would silently
|
||||||
|
// fall back to the global default).
|
||||||
|
const account = buildAccount()
|
||||||
|
updateAccountMock.mockReset()
|
||||||
|
checkMixedChannelRiskMock.mockReset()
|
||||||
|
checkMixedChannelRiskMock.mockResolvedValue({ has_risk: false })
|
||||||
|
updateAccountMock.mockResolvedValue(account)
|
||||||
|
|
||||||
|
const wrapper = mountModal(account)
|
||||||
|
|
||||||
|
await wrapper.get('[data-testid="auto-pause-5h-disabled"]').trigger('click')
|
||||||
|
await wrapper.get('form#edit-account-form').trigger('submit.prevent')
|
||||||
|
|
||||||
|
expect(updateAccountMock).toHaveBeenCalledTimes(1)
|
||||||
|
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_5h_disabled).toBe(true)
|
||||||
|
expect(updateAccountMock.mock.calls[0]?.[1]?.extra?.auto_pause_7d_disabled).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
it('keeps at least one OpenAI APIKey endpoint capability selected', async () => {
|
it('keeps at least one OpenAI APIKey endpoint capability selected', async () => {
|
||||||
const account = buildAccount()
|
const account = buildAccount()
|
||||||
updateAccountMock.mockReset()
|
updateAccountMock.mockReset()
|
||||||
|
|||||||
@ -3475,6 +3475,12 @@ export default {
|
|||||||
'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens',
|
'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens',
|
||||||
autoPauseOnExpired: 'Auto Pause On Expired',
|
autoPauseOnExpired: 'Auto Pause On Expired',
|
||||||
autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires',
|
autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires',
|
||||||
|
autoPause5hThreshold: '5h Usage Threshold (%)',
|
||||||
|
autoPause7dThreshold: '7d Usage Threshold (%)',
|
||||||
|
autoPauseThresholdHint: 'Leave empty or set 0 to use the global default threshold (configured in Ops settings); set a value to override the global default. Reaching the threshold only skips the account during scheduling and does not modify schedulable.',
|
||||||
|
autoPause5hDisabled: 'Disable 5h auto-pause',
|
||||||
|
autoPause7dDisabled: 'Disable 7d auto-pause',
|
||||||
|
autoPauseDisabledHint: 'When enabled, this account is never auto-paused (even if a global default threshold is configured).',
|
||||||
// Quota control (Anthropic OAuth/SetupToken only)
|
// Quota control (Anthropic OAuth/SetupToken only)
|
||||||
quotaControl: {
|
quotaControl: {
|
||||||
title: 'Quota Control',
|
title: 'Quota Control',
|
||||||
@ -5190,6 +5196,11 @@ export default {
|
|||||||
aggregation: 'Pre-aggregation Tasks',
|
aggregation: 'Pre-aggregation Tasks',
|
||||||
enableAggregation: 'Enable Pre-aggregation',
|
enableAggregation: 'Enable Pre-aggregation',
|
||||||
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
|
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
|
||||||
|
openaiQuotaAutoPause: 'OpenAI Account Quota Auto-pause',
|
||||||
|
openaiQuotaAutoPauseHint: 'When an OpenAI account reaches its 5h / 7d usage threshold, the scheduler skips it automatically and resumes once the window rolls over. Per-account thresholds take precedence over this global default.',
|
||||||
|
openaiQuotaAutoPauseDefault5h: 'Default 5h usage threshold (%)',
|
||||||
|
openaiQuotaAutoPauseDefault7d: 'Default 7d usage threshold (%)',
|
||||||
|
openaiQuotaAutoPauseThresholdHint: 'Value 0-100; leave blank or 0 to disable the global default threshold.',
|
||||||
errorFiltering: 'Error Filtering',
|
errorFiltering: 'Error Filtering',
|
||||||
ignoreCountTokensErrors: 'Ignore count_tokens errors',
|
ignoreCountTokensErrors: 'Ignore count_tokens errors',
|
||||||
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
|
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
|
||||||
@ -5220,7 +5231,8 @@ export default {
|
|||||||
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
|
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
|
||||||
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
|
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
|
||||||
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
|
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
|
||||||
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
|
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100',
|
||||||
|
openaiQuotaAutoPauseRange: 'OpenAI quota auto-pause threshold must be between 0 and 100'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
concurrency: {
|
concurrency: {
|
||||||
|
|||||||
@ -3613,6 +3613,12 @@ export default {
|
|||||||
interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token',
|
interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token',
|
||||||
autoPauseOnExpired: '过期自动暂停调度',
|
autoPauseOnExpired: '过期自动暂停调度',
|
||||||
autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度',
|
autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度',
|
||||||
|
autoPause5hThreshold: '5h 用量阈值(%)',
|
||||||
|
autoPause7dThreshold: '7d 用量阈值(%)',
|
||||||
|
autoPauseThresholdHint: '留空或填 0 表示使用全局默认阈值(在运维设置中配置);填具体值则覆盖全局默认。达到阈值后仅在调度时跳过账号,不修改 schedulable。',
|
||||||
|
autoPause5hDisabled: '禁用 5h 自动暂停',
|
||||||
|
autoPause7dDisabled: '禁用 7d 自动暂停',
|
||||||
|
autoPauseDisabledHint: '开启后该账号永不进入自动暂停(即使全局默认阈值已配置)。',
|
||||||
// Quota control (Anthropic OAuth/SetupToken only)
|
// Quota control (Anthropic OAuth/SetupToken only)
|
||||||
quotaControl: {
|
quotaControl: {
|
||||||
title: '配额控制',
|
title: '配额控制',
|
||||||
@ -5349,6 +5355,11 @@ export default {
|
|||||||
aggregation: '预聚合任务',
|
aggregation: '预聚合任务',
|
||||||
enableAggregation: '启用预聚合任务',
|
enableAggregation: '启用预聚合任务',
|
||||||
aggregationHint: '预聚合可提升长时间窗口查询性能',
|
aggregationHint: '预聚合可提升长时间窗口查询性能',
|
||||||
|
openaiQuotaAutoPause: 'OpenAI 账号配额自动暂停',
|
||||||
|
openaiQuotaAutoPauseHint: '当 OpenAI 账号 5h / 7d 用量达到阈值时,调度会自动跳过该账号;窗口滚动后自动恢复。账号级阈值优先于此全局默认值。',
|
||||||
|
openaiQuotaAutoPauseDefault5h: '默认 5h 用量阈值 (%)',
|
||||||
|
openaiQuotaAutoPauseDefault7d: '默认 7d 用量阈值 (%)',
|
||||||
|
openaiQuotaAutoPauseThresholdHint: '取值 0-100,留空或 0 表示不启用全局默认阈值。',
|
||||||
errorFiltering: '错误过滤',
|
errorFiltering: '错误过滤',
|
||||||
ignoreCountTokensErrors: '忽略 count_tokens 错误',
|
ignoreCountTokensErrors: '忽略 count_tokens 错误',
|
||||||
ignoreCountTokensErrorsHint: '启用后,count_tokens 请求的错误将不会写入错误日志。',
|
ignoreCountTokensErrorsHint: '启用后,count_tokens 请求的错误将不会写入错误日志。',
|
||||||
@ -5380,7 +5391,8 @@ export default {
|
|||||||
slaMinPercentRange: 'SLA最低百分比必须在0-100之间',
|
slaMinPercentRange: 'SLA最低百分比必须在0-100之间',
|
||||||
ttftP99MaxRange: 'TTFT P99最大值必须大于等于0',
|
ttftP99MaxRange: 'TTFT P99最大值必须大于等于0',
|
||||||
requestErrorRateMaxRange: '请求错误率最大值必须在0-100之间',
|
requestErrorRateMaxRange: '请求错误率最大值必须在0-100之间',
|
||||||
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间'
|
upstreamErrorRateMaxRange: '上游错误率最大值必须在0-100之间',
|
||||||
|
openaiQuotaAutoPauseRange: 'OpenAI 配额自动暂停阈值必须在 0-100 之间'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
concurrency: {
|
concurrency: {
|
||||||
|
|||||||
@ -50,6 +50,10 @@ async function loadAllSettings() {
|
|||||||
runtimeSettings.value = runtime
|
runtimeSettings.value = runtime
|
||||||
emailConfig.value = email
|
emailConfig.value = email
|
||||||
advancedSettings.value = advanced
|
advancedSettings.value = advanced
|
||||||
|
// 兼容旧 payload:后端未返回该字段时补默认值,保证表单可绑定
|
||||||
|
if (advancedSettings.value && !advancedSettings.value.openai_account_quota_auto_pause) {
|
||||||
|
advancedSettings.value.openai_account_quota_auto_pause = { default_threshold_5h: 0, default_threshold_7d: 0 }
|
||||||
|
}
|
||||||
// 如果后端返回了阈值,使用后端的值;否则保持默认值
|
// 如果后端返回了阈值,使用后端的值;否则保持默认值
|
||||||
if (thresholds && Object.keys(thresholds).length > 0) {
|
if (thresholds && Object.keys(thresholds).length > 0) {
|
||||||
metricThresholds.value = {
|
metricThresholds.value = {
|
||||||
@ -119,6 +123,28 @@ function removeRecipient(target: 'alert' | 'report', email: string) {
|
|||||||
if (idx >= 0) list.splice(idx, 1)
|
if (idx >= 0) list.splice(idx, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenAI 账号配额自动暂停:后端按 0~1 分数存储,UI 按百分比(0~100)展示
|
||||||
|
const quotaAutoPause5hPercent = computed<number | null>({
|
||||||
|
get() {
|
||||||
|
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_5h
|
||||||
|
return v && v > 0 ? Math.round(v * 1000) / 10 : null
|
||||||
|
},
|
||||||
|
set(val) {
|
||||||
|
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
|
||||||
|
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_5h = val != null && val > 0 ? val / 100 : 0
|
||||||
|
}
|
||||||
|
})
|
||||||
|
const quotaAutoPause7dPercent = computed<number | null>({
|
||||||
|
get() {
|
||||||
|
const v = advancedSettings.value?.openai_account_quota_auto_pause?.default_threshold_7d
|
||||||
|
return v && v > 0 ? Math.round(v * 1000) / 10 : null
|
||||||
|
},
|
||||||
|
set(val) {
|
||||||
|
if (!advancedSettings.value?.openai_account_quota_auto_pause) return
|
||||||
|
advancedSettings.value.openai_account_quota_auto_pause.default_threshold_7d = val != null && val > 0 ? val / 100 : 0
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
// 验证
|
// 验证
|
||||||
const validation = computed(() => {
|
const validation = computed(() => {
|
||||||
const errors: string[] = []
|
const errors: string[] = []
|
||||||
@ -145,6 +171,11 @@ const validation = computed(() => {
|
|||||||
if (hourly_metrics_retention_days < 0 || hourly_metrics_retention_days > 365) {
|
if (hourly_metrics_retention_days < 0 || hourly_metrics_retention_days > 365) {
|
||||||
errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
|
errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { default_threshold_5h, default_threshold_7d } = advancedSettings.value.openai_account_quota_auto_pause
|
||||||
|
if (default_threshold_5h < 0 || default_threshold_5h > 1 || default_threshold_7d < 0 || default_threshold_7d > 1) {
|
||||||
|
errors.push(t('admin.ops.settings.validation.openaiQuotaAutoPauseRange'))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 验证指标阈值
|
// 验证指标阈值
|
||||||
@ -473,6 +504,40 @@ async function saveAllSettings() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- OpenAI 账号配额自动暂停(全局默认阈值) -->
|
||||||
|
<div class="space-y-3">
|
||||||
|
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.openaiQuotaAutoPause') }}</h5>
|
||||||
|
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseHint') }}</p>
|
||||||
|
|
||||||
|
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
|
||||||
|
<div>
|
||||||
|
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault5h') }}</label>
|
||||||
|
<input
|
||||||
|
v-model.number="quotaAutoPause5hPercent"
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
max="100"
|
||||||
|
step="0.1"
|
||||||
|
class="input"
|
||||||
|
data-testid="ops-quota-auto-pause-5h"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="input-label">{{ t('admin.ops.settings.openaiQuotaAutoPauseDefault7d') }}</label>
|
||||||
|
<input
|
||||||
|
v-model.number="quotaAutoPause7dPercent"
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
max="100"
|
||||||
|
step="0.1"
|
||||||
|
class="input"
|
||||||
|
data-testid="ops-quota-auto-pause-7d"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p class="text-xs text-gray-500">{{ t('admin.ops.settings.openaiQuotaAutoPauseThresholdHint') }}</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Error Filtering -->
|
<!-- Error Filtering -->
|
||||||
<div class="space-y-3">
|
<div class="space-y-3">
|
||||||
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.errorFiltering') }}</h5>
|
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">{{ t('admin.ops.settings.errorFiltering') }}</h5>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user