From 496469ac4e22a90f417ce1f1b48ff8868f938183 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Sat, 25 Apr 2026 22:50:35 +0800
Subject: [PATCH 01/46] fix(gateway): skip body mimicry for real Claude Code
 clients to restore prompt caching

PR #1914 unconditionally applied the full mimicry pipeline to all OAuth
accounts, including real Claude Code CLI clients. This replaced the
client's long system prompt (~10K+ tokens with stable cache_control
breakpoints) with a short ~45 token [billing, CC prompt] pair, which
falls below Anthropic's 1024-token minimum cacheable prefix threshold.
The result: every request created a new cache but never hit an existing
one.

Fix: restore the Claude Code client detection gate so that real CC
clients bypass body-level mimicry (system rewrite, message cache
management, tool name obfuscation). Non-CC third-party clients
(opencode, etc.) continue to receive full mimicry.

Also harden the detection logic:
- Make UA regex case-insensitive (align with claude_code_validator.go)
- Validate metadata.user_id format via ParseMetadataUserID() instead of
  just checking non-empty, preventing third-party tools from spoofing
  a claude-cli/* UA with an arbitrary user_id string to bypass mimicry
---
 .../internal/service/gateway_prompt_test.go   | 41 +++++++++++++++----
 backend/internal/service/gateway_service.go   | 34 +++++++++------
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/backend/internal/service/gateway_prompt_test.go b/backend/internal/service/gateway_prompt_test.go
index 443486ab..f3a22c1d 100644
--- a/backend/internal/service/gateway_prompt_test.go
+++ b/backend/internal/service/gateway_prompt_test.go
@@ -9,6 +9,11 @@ import (
 )
 
 func TestIsClaudeCodeClient(t *testing.T) {
+	// 合法的 legacy 格式 metadata.user_id（64位 hex + account uuid + session uuid）
+	legacyUserID := "user_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2_account_550e8400-e29b-41d4-a716-446655440000_session_123e4567-e89b-12d3-a456-426614174000"
+	// 合法的 JSON 格式 metadata.user_id（2.1.78+ 版本）
+	jsonUserID := `{"device_id":"a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2","account_uuid":"550e8400-e29b-41d4-a716-446655440000","session_id":"123e4567-e89b-12d3-a456-426614174000"}`
+
 	tests := []struct {
 		name           string
 		userAgent      string
@@ -16,15 +21,21 @@ func TestIsClaudeCodeClient(t *testing.T) {
 		want           bool
 	}{
 		{
-			name:           "Claude Code client",
+			name:           "Claude Code client with legacy user_id",
 			userAgent:      "claude-cli/1.0.62 (darwin; arm64)",
-			metadataUserID: "session_123e4567-e89b-12d3-a456-426614174000",
+			metadataUserID: legacyUserID,
 			want:           true,
 		},
 		{
-			name:           "Claude Code without version suffix",
-			userAgent:      "claude-cli/2.0.0",
-			metadataUserID: "session_abc",
+			name:           "Claude Code client with JSON user_id",
+			userAgent:      "claude-cli/2.1.92 (external, cli)",
+			metadataUserID: jsonUserID,
+			want:           true,
+		},
+		{
+			name:           "Claude Code case insensitive UA",
+			userAgent:      "Claude-CLI/2.0.0",
+			metadataUserID: legacyUserID,
 			want:           true,
 		},
 		{
@@ -34,21 +45,33 @@ func TestIsClaudeCodeClient(t *testing.T) {
 			want:           false,
 		},
 		{
-			name:           "Different user agent",
+			name:           "Claude CLI UA with invalid user_id format",
+			userAgent:      "claude-cli/2.0.0",
+			metadataUserID: "fake-user-id-12345",
+			want:           false,
+		},
+		{
+			name:           "Different user agent with valid user_id",
 			userAgent:      "curl/7.68.0",
-			metadataUserID: "user123",
+			metadataUserID: legacyUserID,
 			want:           false,
 		},
 		{
 			name:           "Empty user agent",
 			userAgent:      "",
-			metadataUserID: "user123",
+			metadataUserID: legacyUserID,
 			want:           false,
 		},
 		{
 			name:           "Similar but not Claude CLI",
 			userAgent:      "claude-api/1.0.0",
-			metadataUserID: "user123",
+			metadataUserID: legacyUserID,
+			want:           false,
+		},
+		{
+			name:           "Opencode spoofing UA with arbitrary user_id",
+			userAgent:      "claude-cli/2.1.92",
+			metadataUserID: "session_abc",
 			want:           false,
 		},
 	}
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index ffd66fc7..6be19ba6 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -329,7 +329,7 @@ func isClaudeCodeCredentialScopeError(msg string) bool {
 // Some upstream APIs return non-standard "data:" without space (should be "data: ").
 var (
 	sseDataRe            = regexp.MustCompile(`^data:\s*`)
-	claudeCliUserAgentRe = regexp.MustCompile(`^claude-cli/\d+\.\d+\.\d+`)
+	claudeCliUserAgentRe = regexp.MustCompile(`(?i)^claude-cli/\d+\.\d+\.\d+`)
 
 	// claudeCodePromptPrefixes 用于检测 Claude Code 系统提示词的前缀列表
 	// 支持多种变体：标准版、Agent SDK 版、Explore Agent 版、Compact 版等
@@ -3709,13 +3709,19 @@ func sleepWithContext(ctx context.Context, d time.Duration) error {
 	}
 }
 
-// isClaudeCodeClient 判断请求是否来自 Claude Code 客户端
-// 简化判断：User-Agent 匹配 + metadata.user_id 存在
+// isClaudeCodeClient 判断请求是否来自真正的 Claude Code 客户端。
+// 判定条件：
+//  1. User-Agent 匹配 claude-cli/X.Y.Z（大小写不敏感）
+//  2. metadata.user_id 符合 Claude Code 格式（legacy 或 JSON 格式）
+//
+// 只检查 metadata.user_id 非空不够严格：第三方工具（opencode 等）可能伪造 UA
+// 并附带任意 metadata.user_id 字符串，从而绕过 mimicry。必须通过 ParseMetadataUserID
+// 验证格式才能确认是真正的 Claude Code 客户端。
 func isClaudeCodeClient(userAgent string, metadataUserID string) bool {
-	if metadataUserID == "" {
+	if !claudeCliUserAgentRe.MatchString(userAgent) {
 		return false
 	}
-	return claudeCliUserAgentRe.MatchString(userAgent)
+	return ParseMetadataUserID(metadataUserID) != nil
 }
 
 // normalizeSystemParam 将 json.RawMessage 类型的 system 参数转为标准 Go 类型（string / []any / nil），
@@ -4144,12 +4150,15 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		})
 	}
 
-	// OAuth 账号无条件走完整 mimicry，与 Parrot 对齐。
-	// 不再检查 isClaudeCodeRequest —— 即使客户端自称 Claude Code（opencode 等
-	// 第三方工具会伪装 UA / X-App / system prompt），它的伪装往往不完整（缺 billing
-	// block / 工具名混淆 / cache 策略等），被 Anthropic 判为 third-party。
-	// 无条件覆盖不会对真正的 Claude Code 造成问题，因为我们的伪装更完整。
-	shouldMimicClaudeCode := account.IsOAuth()
+	// Claude Code 客户端判定：UA 匹配 claude-cli/* 且携带 metadata.user_id。
+	// 真正的 Claude Code 客户端自带完整的 system prompt、cache_control 断点和 header，
+	// 不需要代理做任何 body 级别的 mimicry；强行替换反而会破坏客户端的缓存策略
+	// （长 system prompt 被替换为 ~45 tokens 的短 prompt，低于 Anthropic 1024 token
+	// 最低缓存门槛，导致系统级缓存失效）。
+	//
+	// 对于非 Claude Code 的第三方客户端（opencode 等），仍然走完整 mimicry。
+	isClaudeCode := IsClaudeCodeClient(ctx) || isClaudeCodeClient(c.GetHeader("User-Agent"), parsed.MetadataUserID)
+	shouldMimicClaudeCode := account.IsOAuth() && !isClaudeCode
 
 	if shouldMimicClaudeCode {
 		// 与 Parrot 对齐：OAuth 账号无条件重写 system（即使客户端已发了 Claude Code
@@ -8387,7 +8396,8 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 	// Pre-filter: strip empty text blocks to prevent upstream 400.
 	body = StripEmptyTextBlocks(body)
 
-	shouldMimicClaudeCode := account.IsOAuth()
+	isClaudeCodeCT := IsClaudeCodeClient(ctx) || isClaudeCodeClient(c.GetHeader("User-Agent"), parsed.MetadataUserID)
+	shouldMimicClaudeCode := account.IsOAuth() && !isClaudeCodeCT
 
 	if shouldMimicClaudeCode {
 		normalizeOpts := claudeOAuthNormalizeOptions{stripSystemCacheControl: true}

From b17704d6effc717e5644ad09f61abe9aa2296775 Mon Sep 17 00:00:00 2001
From: deqiying <deqiying@gmail.com>
Date: Sun, 26 Apr 2026 01:14:59 +0800
Subject: [PATCH 02/46] =?UTF-8?q?fix(anthropic):=20=E4=BF=AE=E6=AD=A3?=
 =?UTF-8?q?=E7=BC=93=E5=AD=98=20token=20=E7=9A=84=20Anthropic=20=E7=94=A8?=
 =?UTF-8?q?=E9=87=8F=E8=AF=AD=E4=B9=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../pkg/apicompat/anthropic_responses_test.go | 79 +++++++++++++++++++
 .../pkg/apicompat/responses_to_anthropic.go   | 39 ++++++---
 2 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 095305c2..c35b51b6 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -181,6 +181,55 @@ func TestResponsesToAnthropic_TextOnly(t *testing.T) {
 	assert.Equal(t, 5, anth.Usage.OutputTokens)
 }
 
+func TestResponsesToAnthropic_CachedTokensUseAnthropicInputSemantics(t *testing.T) {
+	resp := &ResponsesResponse{
+		ID:     "resp_cached",
+		Model:  "gpt-5.2",
+		Status: "completed",
+		Output: []ResponsesOutput{
+			{
+				Type: "message",
+				Content: []ResponsesContentPart{
+					{Type: "output_text", Text: "Cached response"},
+				},
+			},
+		},
+		Usage: &ResponsesUsage{
+			InputTokens:  54006,
+			OutputTokens: 123,
+			TotalTokens:  54129,
+			InputTokensDetails: &ResponsesInputTokensDetails{
+				CachedTokens: 50688,
+			},
+		},
+	}
+
+	anth := ResponsesToAnthropic(resp, "claude-sonnet-4-5-20250929")
+	assert.Equal(t, 3318, anth.Usage.InputTokens)
+	assert.Equal(t, 50688, anth.Usage.CacheReadInputTokens)
+	assert.Equal(t, 123, anth.Usage.OutputTokens)
+}
+
+func TestResponsesToAnthropic_CachedTokensClampInputTokens(t *testing.T) {
+	resp := &ResponsesResponse{
+		ID:     "resp_cached_clamp",
+		Model:  "gpt-5.2",
+		Status: "completed",
+		Usage: &ResponsesUsage{
+			InputTokens:  100,
+			OutputTokens: 5,
+			InputTokensDetails: &ResponsesInputTokensDetails{
+				CachedTokens: 150,
+			},
+		},
+	}
+
+	anth := ResponsesToAnthropic(resp, "claude-sonnet-4-5-20250929")
+	assert.Equal(t, 0, anth.Usage.InputTokens)
+	assert.Equal(t, 150, anth.Usage.CacheReadInputTokens)
+	assert.Equal(t, 5, anth.Usage.OutputTokens)
+}
+
 func TestResponsesToAnthropic_ToolUse(t *testing.T) {
 	resp := &ResponsesResponse{
 		ID:     "resp_456",
@@ -343,6 +392,36 @@ func TestStreamingTextOnly(t *testing.T) {
 	assert.Equal(t, "message_stop", events[1].Type)
 }
 
+func TestStreamingCachedTokensUseAnthropicInputSemantics(t *testing.T) {
+	state := NewResponsesEventToAnthropicState()
+	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:     "response.created",
+		Response: &ResponsesResponse{ID: "resp_cached_stream", Model: "gpt-5.2"},
+	}, state)
+
+	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type: "response.completed",
+		Response: &ResponsesResponse{
+			Status: "completed",
+			Usage: &ResponsesUsage{
+				InputTokens:  54006,
+				OutputTokens: 123,
+				TotalTokens:  54129,
+				InputTokensDetails: &ResponsesInputTokensDetails{
+					CachedTokens: 50688,
+				},
+			},
+		},
+	}, state)
+
+	require.Len(t, events, 2)
+	assert.Equal(t, "message_delta", events[0].Type)
+	assert.Equal(t, 3318, events[0].Usage.InputTokens)
+	assert.Equal(t, 50688, events[0].Usage.CacheReadInputTokens)
+	assert.Equal(t, 123, events[0].Usage.OutputTokens)
+	assert.Equal(t, "message_stop", events[1].Type)
+}
+
 func TestStreamingToolCall(t *testing.T) {
 	state := NewResponsesEventToAnthropicState()
 
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go
index 5409a0f4..40bed302 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -84,18 +84,34 @@ func ResponsesToAnthropic(resp *ResponsesResponse, model string) *AnthropicRespo
 	out.StopReason = responsesStatusToAnthropicStopReason(resp.Status, resp.IncompleteDetails, blocks)
 
 	if resp.Usage != nil {
-		out.Usage = AnthropicUsage{
-			InputTokens:  resp.Usage.InputTokens,
-			OutputTokens: resp.Usage.OutputTokens,
-		}
-		if resp.Usage.InputTokensDetails != nil {
-			out.Usage.CacheReadInputTokens = resp.Usage.InputTokensDetails.CachedTokens
-		}
+		out.Usage = anthropicUsageFromResponsesUsage(resp.Usage)
 	}
 
 	return out
 }
 
+func anthropicUsageFromResponsesUsage(usage *ResponsesUsage) AnthropicUsage {
+	if usage == nil {
+		return AnthropicUsage{}
+	}
+
+	cachedTokens := 0
+	if usage.InputTokensDetails != nil {
+		cachedTokens = usage.InputTokensDetails.CachedTokens
+	}
+
+	inputTokens := usage.InputTokens - cachedTokens
+	if inputTokens < 0 {
+		inputTokens = 0
+	}
+
+	return AnthropicUsage{
+		InputTokens:          inputTokens,
+		OutputTokens:         usage.OutputTokens,
+		CacheReadInputTokens: cachedTokens,
+	}
+}
+
 func responsesStatusToAnthropicStopReason(status string, details *ResponsesIncompleteDetails, blocks []AnthropicContentBlock) string {
 	switch status {
 	case "incomplete":
@@ -466,11 +482,10 @@ func resToAnthHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventTo
 	stopReason := "end_turn"
 	if evt.Response != nil {
 		if evt.Response.Usage != nil {
-			state.InputTokens = evt.Response.Usage.InputTokens
-			state.OutputTokens = evt.Response.Usage.OutputTokens
-			if evt.Response.Usage.InputTokensDetails != nil {
-				state.CacheReadInputTokens = evt.Response.Usage.InputTokensDetails.CachedTokens
-			}
+			usage := anthropicUsageFromResponsesUsage(evt.Response.Usage)
+			state.InputTokens = usage.InputTokens
+			state.OutputTokens = usage.OutputTokens
+			state.CacheReadInputTokens = usage.CacheReadInputTokens
 		}
 		switch evt.Response.Status {
 		case "incomplete":

From 489a4d934e4f91560bdc73e91ac91dd133b5b3b1 Mon Sep 17 00:00:00 2001
From: Oliver <sholiver@gmail.com>
Date: Sat, 25 Apr 2026 19:46:32 -0400
Subject: [PATCH 03/46] Show today stats for Vertex usage window

---
 .../components/account/AccountUsageCell.vue   | 35 +++++++++++++
 .../__tests__/AccountUsageCell.spec.ts        | 52 +++++++++++++++++++
 2 files changed, 87 insertions(+)

diff --git a/frontend/src/components/account/AccountUsageCell.vue b/frontend/src/components/account/AccountUsageCell.vue
index 1c023fb3..2c04e673 100644
--- a/frontend/src/components/account/AccountUsageCell.vue
+++ b/frontend/src/components/account/AccountUsageCell.vue
@@ -332,6 +332,37 @@
 
       <!-- Usage data or unlimited flow -->
       <div class="space-y-1">
+        <div
+          v-if="showGeminiTodayStats && todayStats"
+          class="mb-0.5 flex items-center"
+        >
+          <div class="flex items-center gap-1.5 text-[9px] text-gray-500 dark:text-gray-400">
+            <span class="rounded bg-gray-100 px-1.5 py-0.5 dark:bg-gray-800">
+              {{ formatKeyRequests }} req
+            </span>
+            <span class="rounded bg-gray-100 px-1.5 py-0.5 dark:bg-gray-800">
+              {{ formatKeyTokens }}
+            </span>
+            <span class="rounded bg-gray-100 px-1.5 py-0.5 dark:bg-gray-800" :title="t('usage.accountBilled')">
+              A ${{ formatKeyCost }}
+            </span>
+            <span
+              v-if="todayStats.user_cost != null"
+              class="rounded bg-gray-100 px-1.5 py-0.5 dark:bg-gray-800"
+              :title="t('usage.userBilled')"
+            >
+              U ${{ formatKeyUserCost }}
+            </span>
+          </div>
+        </div>
+        <div
+          v-else-if="showGeminiTodayStats && todayStatsLoading"
+          class="mb-0.5 flex items-center gap-1"
+        >
+          <div class="h-3 w-10 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
+          <div class="h-3 w-8 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
+          <div class="h-3 w-12 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
+        </div>
         <div v-if="loading" class="space-y-1">
           <div class="flex items-center gap-1">
             <div class="h-3 w-[32px] animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
@@ -512,6 +543,10 @@ const shouldFetchUsage = computed(() => {
   return false
 })
 
+const showGeminiTodayStats = computed(() => {
+  return props.account.platform === 'gemini' && props.account.type === 'service_account'
+})
+
 const geminiUsageAvailable = computed(() => {
   return (
     !!usageInfo.value?.gemini_shared_daily ||
diff --git a/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts b/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
index 9158da64..fa4104f6 100644
--- a/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
+++ b/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
@@ -57,6 +57,19 @@ function makeAccount(overrides: Partial<Account>): Account {
 describe('AccountUsageCell', () => {
   beforeEach(() => {
     getUsage.mockReset()
+    Object.defineProperty(window, 'matchMedia', {
+      writable: true,
+      value: vi.fn().mockImplementation(() => ({
+        matches: true,
+        media: '(min-width: 768px)',
+        onchange: null,
+        addListener: vi.fn(),
+        removeListener: vi.fn(),
+        addEventListener: vi.fn(),
+        removeEventListener: vi.fn(),
+        dispatchEvent: vi.fn(),
+      }))
+    })
   })
 
   it('Antigravity 图片用量会聚合新旧 image 模型', async () => {
@@ -603,4 +616,43 @@ describe('AccountUsageCell', () => {
 
 		expect(wrapper.text().trim()).toBe('-')
   })
+
+  it('Vertex 账号会在 Gemini 用量窗口里展示 today stats 徽章', async () => {
+		const wrapper = mount(AccountUsageCell, {
+		  props: {
+		    account: makeAccount({
+		      id: 4001,
+		      platform: 'gemini',
+		      type: 'service_account',
+          credentials: {
+            tier_id: 'vertex',
+            project_id: 'vertex-proj',
+            client_email: 'svc@vertex-proj.iam.gserviceaccount.com',
+            location: 'global'
+          },
+		      extra: {}
+		    }),
+		    todayStats: {
+		      requests: 0,
+		      tokens: 0,
+		      cost: 0,
+		      standard_cost: 0,
+		      user_cost: 0
+		    }
+		  },
+		  global: {
+		    stubs: {
+		      UsageProgressBar: true,
+		      AccountQuotaInfo: true
+		    }
+		  }
+		})
+
+		await flushPromises()
+
+		expect(wrapper.text()).toContain('0 req')
+		expect(wrapper.text()).toContain('0')
+		expect(wrapper.text()).toContain('A $0.00')
+		expect(wrapper.text()).toContain('U $0.00')
+  })
 })

From 6d11f9ed77837968ba35188ebdc980ef60740e50 Mon Sep 17 00:00:00 2001
From: Oliver <sholiver@gmail.com>
Date: Sat, 25 Apr 2026 20:39:58 -0400
Subject: [PATCH 04/46] Add Vertex service account support

---
 backend/cmd/server/wire_gen.go                |   4 +-
 backend/internal/domain/constants.go          |  11 +-
 .../internal/handler/admin/account_handler.go |   4 +-
 .../internal/service/account_test_service.go  | 101 +++++-
 .../internal/service/claude_token_provider.go |  48 ++-
 .../service/claude_token_provider_test.go     |   8 +-
 backend/internal/service/domain_constants.go  |  11 +-
 ...y_anthropic_vertex_service_account_test.go |  68 ++++
 backend/internal/service/gateway_service.go   |  88 ++++-
 .../service/gemini_messages_compat_service.go |  59 +++-
 .../internal/service/gemini_token_provider.go |  53 ++-
 .../service/vertex_service_account.go         | 303 +++++++++++++++++
 .../service/vertex_service_account_test.go    |  77 +++++
 .../components/account/CreateAccountModal.vue | 310 +++++++++++++++++-
 .../components/account/EditAccountModal.vue   | 129 ++++++++
 .../components/common/PlatformTypeBadge.vue   |   3 +
 frontend/src/types/index.ts                   |   2 +-
 17 files changed, 1243 insertions(+), 36 deletions(-)
 create mode 100644 backend/internal/service/gateway_anthropic_vertex_service_account_test.go
 create mode 100644 backend/internal/service/vertex_service_account.go
 create mode 100644 backend/internal/service/vertex_service_account_test.go

diff --git a/backend/cmd/server/wire_gen.go b/backend/cmd/server/wire_gen.go
index f767bbea..dea46561 100644
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -145,13 +145,14 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	accountUsageService := service.NewAccountUsageService(accountRepository, usageLogRepository, claudeUsageFetcher, geminiQuotaService, antigravityQuotaFetcher, usageCache, identityCache, tlsFingerprintProfileService)
 	oAuthRefreshAPI := service.ProvideOAuthRefreshAPI(accountRepository, geminiTokenCache)
 	geminiTokenProvider := service.ProvideGeminiTokenProvider(accountRepository, geminiTokenCache, geminiOAuthService, oAuthRefreshAPI)
+	claudeTokenProvider := service.ProvideClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService, oAuthRefreshAPI)
 	gatewayCache := repository.NewGatewayCache(redisClient)
 	schedulerOutboxRepository := repository.NewSchedulerOutboxRepository(db)
 	schedulerSnapshotService := service.ProvideSchedulerSnapshotService(schedulerCache, schedulerOutboxRepository, accountRepository, groupRepository, configConfig)
 	antigravityTokenProvider := service.ProvideAntigravityTokenProvider(accountRepository, geminiTokenCache, antigravityOAuthService, oAuthRefreshAPI, tempUnschedCache)
 	internal500CounterCache := repository.NewInternal500CounterCache(redisClient)
 	antigravityGatewayService := service.NewAntigravityGatewayService(accountRepository, gatewayCache, schedulerSnapshotService, antigravityTokenProvider, rateLimitService, httpUpstream, settingService, internal500CounterCache)
-	accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, antigravityGatewayService, httpUpstream, configConfig, tlsFingerprintProfileService)
+	accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, claudeTokenProvider, antigravityGatewayService, httpUpstream, configConfig, tlsFingerprintProfileService)
 	crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
 	accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService, sessionLimitCache, rpmCache, compositeTokenCacheInvalidator)
 	adminAnnouncementHandler := admin.NewAnnouncementHandler(announcementService)
@@ -178,7 +179,6 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	billingService := service.NewBillingService(configConfig, pricingService)
 	identityService := service.NewIdentityService(identityCache)
 	deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
-	claudeTokenProvider := service.ProvideClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService, oAuthRefreshAPI)
 	digestSessionStore := service.NewDigestSessionStore()
 	channelRepository := repository.NewChannelRepository(db)
 	channelService := service.NewChannelService(channelRepository, groupRepository, apiKeyAuthCacheInvalidator, pricingService)
diff --git a/backend/internal/domain/constants.go b/backend/internal/domain/constants.go
index a57f7067..27c543dd 100644
--- a/backend/internal/domain/constants.go
+++ b/backend/internal/domain/constants.go
@@ -26,11 +26,12 @@ const (
 
 // Account type constants
 const (
-	AccountTypeOAuth      = "oauth"       // OAuth类型账号（full scope: profile + inference）
-	AccountTypeSetupToken = "setup-token" // Setup Token类型账号（inference only scope）
-	AccountTypeAPIKey     = "apikey"      // API Key类型账号
-	AccountTypeUpstream   = "upstream"    // 上游透传类型账号（通过 Base URL + API Key 连接上游）
-	AccountTypeBedrock    = "bedrock"     // AWS Bedrock 类型账号（通过 SigV4 签名或 API Key 连接 Bedrock，由 credentials.auth_mode 区分）
+	AccountTypeOAuth          = "oauth"           // OAuth类型账号（full scope: profile + inference）
+	AccountTypeSetupToken     = "setup-token"     // Setup Token类型账号（inference only scope）
+	AccountTypeAPIKey         = "apikey"          // API Key类型账号
+	AccountTypeUpstream       = "upstream"        // 上游透传类型账号（通过 Base URL + API Key 连接上游）
+	AccountTypeBedrock        = "bedrock"         // AWS Bedrock 类型账号（通过 SigV4 签名或 API Key 连接 Bedrock，由 credentials.auth_mode 区分）
+	AccountTypeServiceAccount = "service_account" // Google Service Account 类型账号（用于 Vertex AI）
 )
 
 // Redeem type constants
diff --git a/backend/internal/handler/admin/account_handler.go b/backend/internal/handler/admin/account_handler.go
index 7454451a..e69e056f 100644
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -98,7 +98,7 @@ type CreateAccountRequest struct {
 	Name                    string         `json:"name" binding:"required"`
 	Notes                   *string        `json:"notes"`
 	Platform                string         `json:"platform" binding:"required"`
-	Type                    string         `json:"type" binding:"required,oneof=oauth setup-token apikey upstream bedrock"`
+	Type                    string         `json:"type" binding:"required,oneof=oauth setup-token apikey upstream bedrock service_account"`
 	Credentials             map[string]any `json:"credentials" binding:"required"`
 	Extra                   map[string]any `json:"extra"`
 	ProxyID                 *int64         `json:"proxy_id"`
@@ -117,7 +117,7 @@ type CreateAccountRequest struct {
 type UpdateAccountRequest struct {
 	Name                    string         `json:"name"`
 	Notes                   *string        `json:"notes"`
-	Type                    string         `json:"type" binding:"omitempty,oneof=oauth setup-token apikey upstream bedrock"`
+	Type                    string         `json:"type" binding:"omitempty,oneof=oauth setup-token apikey upstream bedrock service_account"`
 	Credentials             map[string]any `json:"credentials"`
 	Extra                   map[string]any `json:"extra"`
 	ProxyID                 *int64         `json:"proxy_id"`
diff --git a/backend/internal/service/account_test_service.go b/backend/internal/service/account_test_service.go
index c0bbc6dc..aa657e0e 100644
--- a/backend/internal/service/account_test_service.go
+++ b/backend/internal/service/account_test_service.go
@@ -64,6 +64,7 @@ func isOpenAIImageModel(model string) bool {
 type AccountTestService struct {
 	accountRepo               AccountRepository
 	geminiTokenProvider       *GeminiTokenProvider
+	claudeTokenProvider       *ClaudeTokenProvider
 	antigravityGatewayService *AntigravityGatewayService
 	httpUpstream              HTTPUpstream
 	cfg                       *config.Config
@@ -74,6 +75,7 @@ type AccountTestService struct {
 func NewAccountTestService(
 	accountRepo AccountRepository,
 	geminiTokenProvider *GeminiTokenProvider,
+	claudeTokenProvider *ClaudeTokenProvider,
 	antigravityGatewayService *AntigravityGatewayService,
 	httpUpstream HTTPUpstream,
 	cfg *config.Config,
@@ -82,6 +84,7 @@ func NewAccountTestService(
 	return &AccountTestService{
 		accountRepo:               accountRepo,
 		geminiTokenProvider:       geminiTokenProvider,
+		claudeTokenProvider:       claudeTokenProvider,
 		antigravityGatewayService: antigravityGatewayService,
 		httpUpstream:              httpUpstream,
 		cfg:                       cfg,
@@ -210,6 +213,9 @@ func (s *AccountTestService) testClaudeAccountConnection(c *gin.Context, account
 	if account.IsBedrock() {
 		return s.testBedrockAccountConnection(c, ctx, account, testModelID)
 	}
+	if account.Type == AccountTypeServiceAccount {
+		return s.testClaudeVertexServiceAccountConnection(c, ctx, account, testModelID)
+	}
 
 	// Determine authentication method and API URL
 	var authToken string
@@ -313,6 +319,74 @@ func (s *AccountTestService) testClaudeAccountConnection(c *gin.Context, account
 	return s.processClaudeStream(c, resp.Body)
 }
 
+func (s *AccountTestService) testClaudeVertexServiceAccountConnection(c *gin.Context, ctx context.Context, account *Account, testModelID string) error {
+	if mappedModel, matched := account.ResolveMappedModel(testModelID); matched {
+		testModelID = mappedModel
+	} else {
+		testModelID = normalizeVertexAnthropicModelID(claude.NormalizeModelID(testModelID))
+	}
+
+	c.Writer.Header().Set("Content-Type", "text/event-stream")
+	c.Writer.Header().Set("Cache-Control", "no-cache")
+	c.Writer.Header().Set("Connection", "keep-alive")
+	c.Writer.Header().Set("X-Accel-Buffering", "no")
+	c.Writer.Flush()
+
+	payload, err := createTestPayload(testModelID)
+	if err != nil {
+		return s.sendErrorAndEnd(c, "Failed to create test payload")
+	}
+	payloadBytes, _ := json.Marshal(payload)
+	vertexBody, err := buildVertexAnthropicRequestBody(payloadBytes)
+	if err != nil {
+		return s.sendErrorAndEnd(c, fmt.Sprintf("Failed to create Vertex request body: %s", err.Error()))
+	}
+
+	if s.claudeTokenProvider == nil {
+		return s.sendErrorAndEnd(c, "Claude token provider not configured")
+	}
+	accessToken, err := s.claudeTokenProvider.GetAccessToken(ctx, account)
+	if err != nil {
+		return s.sendErrorAndEnd(c, fmt.Sprintf("Failed to get service account access token: %s", err.Error()))
+	}
+
+	fullURL, err := buildVertexAnthropicURL(account.VertexProjectID(), account.VertexLocation(testModelID), testModelID, true)
+	if err != nil {
+		return s.sendErrorAndEnd(c, fmt.Sprintf("Failed to build Vertex URL: %s", err.Error()))
+	}
+
+	s.sendEvent(c, TestEvent{Type: "test_start", Model: testModelID})
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, fullURL, bytes.NewReader(vertexBody))
+	if err != nil {
+		return s.sendErrorAndEnd(c, "Failed to create request")
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+
+	proxyURL := ""
+	if account.ProxyID != nil && account.Proxy != nil {
+		proxyURL = account.Proxy.URL()
+	}
+
+	resp, err := s.httpUpstream.DoWithTLS(req, proxyURL, account.ID, account.Concurrency, s.tlsFPProfileService.ResolveTLSProfile(account))
+	if err != nil {
+		return s.sendErrorAndEnd(c, fmt.Sprintf("Request failed: %s", err.Error()))
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		errMsg := fmt.Sprintf("API returned %d: %s", resp.StatusCode, string(body))
+		if resp.StatusCode == http.StatusForbidden {
+			_ = s.accountRepo.SetError(ctx, account.ID, errMsg)
+		}
+		return s.sendErrorAndEnd(c, errMsg)
+	}
+
+	return s.processClaudeStream(c, resp.Body)
+}
+
 // testBedrockAccountConnection tests a Bedrock (SigV4 or API Key) account using non-streaming invoke
 func (s *AccountTestService) testBedrockAccountConnection(c *gin.Context, ctx context.Context, account *Account, testModelID string) error {
 	region := bedrockRuntimeRegion(account)
@@ -711,8 +785,8 @@ func (s *AccountTestService) testGeminiAccountConnection(c *gin.Context, account
 		testModelID = geminicli.DefaultTestModel
 	}
 
-	// For API Key accounts with model mapping, map the model
-	if account.Type == AccountTypeAPIKey {
+	// For static upstream credentials with model mapping, map the model
+	if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount {
 		mapping := account.GetModelMapping()
 		if len(mapping) > 0 {
 			if mappedModel, exists := mapping[testModelID]; exists {
@@ -740,6 +814,8 @@ func (s *AccountTestService) testGeminiAccountConnection(c *gin.Context, account
 		req, err = s.buildGeminiAPIKeyRequest(ctx, account, testModelID, payload)
 	case AccountTypeOAuth:
 		req, err = s.buildGeminiOAuthRequest(ctx, account, testModelID, payload)
+	case AccountTypeServiceAccount:
+		req, err = s.buildGeminiServiceAccountRequest(ctx, account, testModelID, payload)
 	default:
 		return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type))
 	}
@@ -893,6 +969,27 @@ func (s *AccountTestService) buildGeminiOAuthRequest(ctx context.Context, accoun
 	return s.buildCodeAssistRequest(ctx, accessToken, projectID, modelID, payload)
 }
 
+func (s *AccountTestService) buildGeminiServiceAccountRequest(ctx context.Context, account *Account, modelID string, payload []byte) (*http.Request, error) {
+	if s.geminiTokenProvider == nil {
+		return nil, fmt.Errorf("gemini token provider not configured")
+	}
+	accessToken, err := s.geminiTokenProvider.GetAccessToken(ctx, account)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get service account access token: %w", err)
+	}
+	fullURL, err := buildVertexGeminiURL(account.VertexProjectID(), account.VertexLocation(modelID), modelID, "streamGenerateContent", true)
+	if err != nil {
+		return nil, err
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, fullURL, bytes.NewReader(payload))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+accessToken)
+	return req, nil
+}
+
 // buildCodeAssistRequest builds request for Google Code Assist API (used by Gemini CLI and Antigravity)
 func (s *AccountTestService) buildCodeAssistRequest(ctx context.Context, accessToken, projectID, modelID string, payload []byte) (*http.Request, error) {
 	var inner map[string]any
diff --git a/backend/internal/service/claude_token_provider.go b/backend/internal/service/claude_token_provider.go
index 82fa31c4..9292979f 100644
--- a/backend/internal/service/claude_token_provider.go
+++ b/backend/internal/service/claude_token_provider.go
@@ -17,7 +17,7 @@ const (
 // ClaudeTokenCache token cache interface.
 type ClaudeTokenCache = GeminiTokenCache
 
-// ClaudeTokenProvider manages access_token for Claude OAuth accounts.
+// ClaudeTokenProvider manages access_token for Claude OAuth and Vertex service account accounts.
 type ClaudeTokenProvider struct {
 	accountRepo   AccountRepository
 	tokenCache    ClaudeTokenCache
@@ -56,8 +56,11 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	if account == nil {
 		return "", errors.New("account is nil")
 	}
-	if account.Platform != PlatformAnthropic || account.Type != AccountTypeOAuth {
-		return "", errors.New("not an anthropic oauth account")
+	if account.Platform != PlatformAnthropic || (account.Type != AccountTypeOAuth && account.Type != AccountTypeServiceAccount) {
+		return "", errors.New("not an anthropic oauth or service account")
+	}
+	if account.Type == AccountTypeServiceAccount {
+		return p.getServiceAccountAccessToken(ctx, account)
 	}
 
 	cacheKey := ClaudeTokenCacheKey(account)
@@ -157,3 +160,42 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 
 	return accessToken, nil
 }
+
+func (p *ClaudeTokenProvider) getServiceAccountAccessToken(ctx context.Context, account *Account) (string, error) {
+	key, err := parseVertexServiceAccountKey(account)
+	if err != nil {
+		return "", err
+	}
+	cacheKey := vertexServiceAccountCacheKey(account, key)
+
+	if p.tokenCache != nil {
+		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+			return token, nil
+		}
+	}
+
+	locked := false
+	if p.tokenCache != nil {
+		var lockErr error
+		locked, lockErr = p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
+		if lockErr == nil && locked {
+			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
+		} else if lockErr != nil {
+			slog.Warn("vertex_service_account_token_lock_failed", "account_id", account.ID, "error", lockErr)
+		} else {
+			time.Sleep(claudeLockWaitTime)
+			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+				return token, nil
+			}
+		}
+	}
+
+	accessToken, ttl, err := exchangeVertexServiceAccountToken(ctx, key)
+	if err != nil {
+		return "", err
+	}
+	if p.tokenCache != nil {
+		_ = p.tokenCache.SetAccessToken(ctx, cacheKey, accessToken, ttl)
+	}
+	return accessToken, nil
+}
diff --git a/backend/internal/service/claude_token_provider_test.go b/backend/internal/service/claude_token_provider_test.go
index 3e21f6f4..d4a4a14a 100644
--- a/backend/internal/service/claude_token_provider_test.go
+++ b/backend/internal/service/claude_token_provider_test.go
@@ -137,7 +137,7 @@ func (p *testClaudeTokenProvider) GetAccessToken(ctx context.Context, account *A
 		return "", errors.New("account is nil")
 	}
 	if account.Platform != PlatformAnthropic || account.Type != AccountTypeOAuth {
-		return "", errors.New("not an anthropic oauth account")
+		return "", errors.New("not an anthropic oauth or service account")
 	}
 
 	cacheKey := ClaudeTokenCacheKey(account)
@@ -371,7 +371,7 @@ func TestClaudeTokenProvider_WrongPlatform(t *testing.T) {
 
 	token, err := provider.GetAccessToken(context.Background(), account)
 	require.Error(t, err)
-	require.Contains(t, err.Error(), "not an anthropic oauth account")
+	require.Contains(t, err.Error(), "not an anthropic oauth or service account")
 	require.Empty(t, token)
 }
 
@@ -385,7 +385,7 @@ func TestClaudeTokenProvider_WrongAccountType(t *testing.T) {
 
 	token, err := provider.GetAccessToken(context.Background(), account)
 	require.Error(t, err)
-	require.Contains(t, err.Error(), "not an anthropic oauth account")
+	require.Contains(t, err.Error(), "not an anthropic oauth or service account")
 	require.Empty(t, token)
 }
 
@@ -399,7 +399,7 @@ func TestClaudeTokenProvider_SetupTokenType(t *testing.T) {
 
 	token, err := provider.GetAccessToken(context.Background(), account)
 	require.Error(t, err)
-	require.Contains(t, err.Error(), "not an anthropic oauth account")
+	require.Contains(t, err.Error(), "not an anthropic oauth or service account")
 	require.Empty(t, token)
 }
 
diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go
index 04037987..e3d3a872 100644
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -36,11 +36,12 @@ const (
 
 // Account type constants
 const (
-	AccountTypeOAuth      = domain.AccountTypeOAuth      // OAuth类型账号（full scope: profile + inference）
-	AccountTypeSetupToken = domain.AccountTypeSetupToken // Setup Token类型账号（inference only scope）
-	AccountTypeAPIKey     = domain.AccountTypeAPIKey     // API Key类型账号
-	AccountTypeUpstream   = domain.AccountTypeUpstream   // 上游透传类型账号（通过 Base URL + API Key 连接上游）
-	AccountTypeBedrock    = domain.AccountTypeBedrock    // AWS Bedrock 类型账号（通过 SigV4 签名或 API Key 连接 Bedrock，由 credentials.auth_mode 区分）
+	AccountTypeOAuth          = domain.AccountTypeOAuth          // OAuth类型账号（full scope: profile + inference）
+	AccountTypeSetupToken     = domain.AccountTypeSetupToken     // Setup Token类型账号（inference only scope）
+	AccountTypeAPIKey         = domain.AccountTypeAPIKey         // API Key类型账号
+	AccountTypeUpstream       = domain.AccountTypeUpstream       // 上游透传类型账号（通过 Base URL + API Key 连接上游）
+	AccountTypeBedrock        = domain.AccountTypeBedrock        // AWS Bedrock 类型账号（通过 SigV4 签名或 API Key 连接 Bedrock，由 credentials.auth_mode 区分）
+	AccountTypeServiceAccount = domain.AccountTypeServiceAccount // Google Service Account 类型账号（用于 Vertex AI）
 )
 
 // Redeem type constants
diff --git a/backend/internal/service/gateway_anthropic_vertex_service_account_test.go b/backend/internal/service/gateway_anthropic_vertex_service_account_test.go
new file mode 100644
index 00000000..aa779805
--- /dev/null
+++ b/backend/internal/service/gateway_anthropic_vertex_service_account_test.go
@@ -0,0 +1,68 @@
+package service
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
+)
+
+func TestGatewayService_BuildAnthropicVertexServiceAccountRequest(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+	c.Request.Header.Set("Authorization", "Bearer inbound-token")
+	c.Request.Header.Set("X-Api-Key", "inbound-api-key")
+	c.Request.Header.Set("Anthropic-Version", "2023-06-01")
+	c.Request.Header.Set("Anthropic-Beta", "interleaved-thinking-2025-05-14")
+
+	account := &Account{
+		ID:       301,
+		Platform: PlatformAnthropic,
+		Type:     AccountTypeServiceAccount,
+		Credentials: map[string]any{
+			"project_id": "vertex-proj",
+			"location":   "us-east5",
+		},
+	}
+	body := []byte(`{"model":"claude-sonnet-4-5","stream":false,"max_tokens":32,"messages":[{"role":"user","content":"hello"}]}`)
+
+	svc := &GatewayService{}
+	req, err := svc.buildUpstreamRequest(
+		context.Background(),
+		c,
+		account,
+		body,
+		"vertex-token",
+		"service_account",
+		"claude-sonnet-4-5@20250929",
+		false,
+		false,
+	)
+	require.NoError(t, err)
+	require.Equal(t, "https://us-east5-aiplatform.googleapis.com/v1/projects/vertex-proj/locations/us-east5/publishers/anthropic/models/claude-sonnet-4-5@20250929:rawPredict", req.URL.String())
+	require.Equal(t, "Bearer vertex-token", getHeaderRaw(req.Header, "authorization"))
+	require.Empty(t, getHeaderRaw(req.Header, "x-api-key"))
+	require.Empty(t, getHeaderRaw(req.Header, "anthropic-version"))
+	require.Equal(t, "interleaved-thinking-2025-05-14", getHeaderRaw(req.Header, "anthropic-beta"))
+
+	got := readRequestBodyForTest(t, req)
+	require.Equal(t, "", gjson.GetBytes(got, "model").String())
+	require.Equal(t, vertexAnthropicVersion, gjson.GetBytes(got, "anthropic_version").String())
+	require.Equal(t, "hello", gjson.GetBytes(got, "messages.0.content").String())
+}
+
+func readRequestBodyForTest(t *testing.T, req *http.Request) []byte {
+	t.Helper()
+	require.NotNil(t, req.Body)
+	body, err := io.ReadAll(req.Body)
+	require.NoError(t, err)
+	return body
+}
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 6be19ba6..75725753 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -3597,7 +3597,11 @@ func (s *GatewayService) isModelSupportedByAccount(account *Account, requestedMo
 	}
 	// OAuth/SetupToken 账号使用 Anthropic 标准映射（短ID → 长ID）
 	if account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
-		requestedModel = claude.NormalizeModelID(requestedModel)
+		if account.Type == AccountTypeServiceAccount {
+			requestedModel = normalizeVertexAnthropicModelID(claude.NormalizeModelID(requestedModel))
+		} else {
+			requestedModel = claude.NormalizeModelID(requestedModel)
+		}
 	}
 	// 其他平台使用账户的模型支持检查
 	return account.IsModelSupported(requestedModel)
@@ -3617,6 +3621,18 @@ func (s *GatewayService) GetAccessToken(ctx context.Context, account *Account) (
 		return apiKey, "apikey", nil
 	case AccountTypeBedrock:
 		return "", "bedrock", nil // Bedrock 使用 SigV4 签名或 API Key，由 forwardBedrock 处理
+	case AccountTypeServiceAccount:
+		if account.Platform != PlatformAnthropic {
+			return "", "", fmt.Errorf("unsupported service account platform: %s", account.Platform)
+		}
+		if s.claudeTokenProvider == nil {
+			return "", "", errors.New("claude token provider not configured")
+		}
+		accessToken, err := s.claudeTokenProvider.GetAccessToken(ctx, account)
+		if err != nil {
+			return "", "", err
+		}
+		return accessToken, "service_account", nil
 	default:
 		return "", "", fmt.Errorf("unsupported account type: %s", account.Type)
 	}
@@ -4219,6 +4235,18 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 			mappingSource = "account"
 		}
 	}
+	if mappingSource == "" && account.Platform == PlatformAnthropic && account.Type == AccountTypeServiceAccount {
+		if candidate, matched := account.ResolveMappedModel(reqModel); matched {
+			mappedModel = candidate
+			mappingSource = "account"
+		} else {
+			normalized := normalizeVertexAnthropicModelID(claude.NormalizeModelID(reqModel))
+			if normalized != reqModel {
+				mappedModel = normalized
+				mappingSource = "vertex"
+			}
+		}
+	}
 	if mappingSource == "" && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
 		normalized := claude.NormalizeModelID(reqModel)
 		if normalized != reqModel {
@@ -5688,6 +5716,10 @@ func (s *GatewayService) handleBedrockNonStreamingResponse(
 }
 
 func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token, tokenType, modelID string, reqStream bool, mimicClaudeCode bool) (*http.Request, error) {
+	if account.Platform == PlatformAnthropic && account.Type == AccountTypeServiceAccount {
+		return s.buildUpstreamRequestAnthropicVertex(ctx, c, account, body, token, modelID, reqStream)
+	}
+
 	// 确定目标URL
 	targetURL := claudeAPIURL
 	if account.Type == AccountTypeAPIKey {
@@ -5874,6 +5906,60 @@ func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Contex
 	return req, nil
 }
 
+func (s *GatewayService) buildUpstreamRequestAnthropicVertex(
+	ctx context.Context,
+	c *gin.Context,
+	account *Account,
+	body []byte,
+	token string,
+	modelID string,
+	reqStream bool,
+) (*http.Request, error) {
+	vertexBody, err := buildVertexAnthropicRequestBody(body)
+	if err != nil {
+		return nil, err
+	}
+	setOpsUpstreamRequestBody(c, vertexBody)
+	fullURL, err := buildVertexAnthropicURL(account.VertexProjectID(), account.VertexLocation(modelID), modelID, reqStream)
+	if err != nil {
+		return nil, err
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, fullURL, bytes.NewReader(vertexBody))
+	if err != nil {
+		return nil, err
+	}
+
+	if c != nil && c.Request != nil {
+		for key, values := range c.Request.Header {
+			lowerKey := strings.ToLower(strings.TrimSpace(key))
+			if !allowedHeaders[lowerKey] || lowerKey == "anthropic-version" {
+				continue
+			}
+			wireKey := resolveWireCasing(key)
+			for _, v := range values {
+				addHeaderRaw(req.Header, wireKey, v)
+			}
+		}
+	}
+
+	req.Header.Del("authorization")
+	req.Header.Del("x-api-key")
+	req.Header.Del("x-goog-api-key")
+	req.Header.Del("cookie")
+	req.Header.Del("anthropic-version")
+	setHeaderRaw(req.Header, "authorization", "Bearer "+token)
+	setHeaderRaw(req.Header, "content-type", "application/json")
+
+	s.debugLogGatewaySnapshot("UPSTREAM_FORWARD_VERTEX_ANTHROPIC", req.Header, vertexBody, map[string]string{
+		"url":        req.URL.String(),
+		"token_type": "service_account",
+		"model":      modelID,
+		"stream":     strconv.FormatBool(reqStream),
+	})
+
+	return req, nil
+}
+
 // getBetaHeader 处理anthropic-beta header
 // 对于OAuth账号，需要确保包含oauth-2025-04-20
 func (s *GatewayService) getBetaHeader(modelID string, clientBetaHeader string) string {
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index 7a24071b..20293ac8 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -579,7 +579,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 
 	originalModel := req.Model
 	mappedModel := req.Model
-	if account.Type == AccountTypeAPIKey {
+	if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount {
 		mappedModel = account.GetMappedModel(req.Model)
 	}
 
@@ -712,6 +712,36 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 		}
 		requestIDHeader = "x-request-id"
 
+	case AccountTypeServiceAccount:
+		buildReq = func(ctx context.Context) (*http.Request, string, error) {
+			if s.tokenProvider == nil {
+				return nil, "", errors.New("gemini token provider not configured")
+			}
+			accessToken, err := s.tokenProvider.GetAccessToken(ctx, account)
+			if err != nil {
+				return nil, "", err
+			}
+
+			action := "generateContent"
+			if req.Stream {
+				action = "streamGenerateContent"
+			}
+			fullURL, err := buildVertexGeminiURL(account.VertexProjectID(), account.VertexLocation(mappedModel), mappedModel, action, req.Stream)
+			if err != nil {
+				return nil, "", err
+			}
+
+			restGeminiReq := normalizeGeminiRequestForAIStudio(geminiReq)
+			upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, fullURL, bytes.NewReader(restGeminiReq))
+			if err != nil {
+				return nil, "", err
+			}
+			upstreamReq.Header.Set("Content-Type", "application/json")
+			upstreamReq.Header.Set("Authorization", "Bearer "+accessToken)
+			return upstreamReq, "x-request-id", nil
+		}
+		requestIDHeader = "x-request-id"
+
 	default:
 		return nil, fmt.Errorf("unsupported account type: %s", account.Type)
 	}
@@ -1094,7 +1124,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 	body = ensureGeminiFunctionCallThoughtSignatures(body)
 
 	mappedModel := originalModel
-	if account.Type == AccountTypeAPIKey {
+	if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount {
 		mappedModel = account.GetMappedModel(originalModel)
 	}
 
@@ -1213,6 +1243,31 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 		}
 		requestIDHeader = "x-request-id"
 
+	case AccountTypeServiceAccount:
+		buildReq = func(ctx context.Context) (*http.Request, string, error) {
+			if s.tokenProvider == nil {
+				return nil, "", errors.New("gemini token provider not configured")
+			}
+			accessToken, err := s.tokenProvider.GetAccessToken(ctx, account)
+			if err != nil {
+				return nil, "", err
+			}
+
+			fullURL, err := buildVertexGeminiURL(account.VertexProjectID(), account.VertexLocation(mappedModel), mappedModel, upstreamAction, useUpstreamStream)
+			if err != nil {
+				return nil, "", err
+			}
+
+			upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, fullURL, bytes.NewReader(body))
+			if err != nil {
+				return nil, "", err
+			}
+			upstreamReq.Header.Set("Content-Type", "application/json")
+			upstreamReq.Header.Set("Authorization", "Bearer "+accessToken)
+			return upstreamReq, "x-request-id", nil
+		}
+		requestIDHeader = "x-request-id"
+
 	default:
 		return nil, s.writeGoogleError(c, http.StatusBadGateway, "Unsupported account type: "+account.Type)
 	}
diff --git a/backend/internal/service/gemini_token_provider.go b/backend/internal/service/gemini_token_provider.go
index 7add3460..c22f2131 100644
--- a/backend/internal/service/gemini_token_provider.go
+++ b/backend/internal/service/gemini_token_provider.go
@@ -15,7 +15,7 @@ const (
 	geminiTokenCacheSkew   = 5 * time.Minute
 )
 
-// GeminiTokenProvider manages access_token for Gemini OAuth accounts.
+// GeminiTokenProvider manages access_token for Gemini OAuth and Vertex service account accounts.
 type GeminiTokenProvider struct {
 	accountRepo        AccountRepository
 	tokenCache         GeminiTokenCache
@@ -53,8 +53,11 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	if account == nil {
 		return "", errors.New("account is nil")
 	}
-	if account.Platform != PlatformGemini || account.Type != AccountTypeOAuth {
-		return "", errors.New("not a gemini oauth account")
+	if account.Platform != PlatformGemini || (account.Type != AccountTypeOAuth && account.Type != AccountTypeServiceAccount) {
+		return "", errors.New("not a gemini oauth or service account")
+	}
+	if account.Type == AccountTypeServiceAccount {
+		return p.getServiceAccountAccessToken(ctx, account)
 	}
 
 	cacheKey := GeminiTokenCacheKey(account)
@@ -168,7 +171,51 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	return accessToken, nil
 }
 
+func (p *GeminiTokenProvider) getServiceAccountAccessToken(ctx context.Context, account *Account) (string, error) {
+	key, err := parseVertexServiceAccountKey(account)
+	if err != nil {
+		return "", err
+	}
+	cacheKey := vertexServiceAccountCacheKey(account, key)
+
+	if p.tokenCache != nil {
+		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+			return token, nil
+		}
+	}
+
+	locked := false
+	if p.tokenCache != nil {
+		var lockErr error
+		locked, lockErr = p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
+		if lockErr == nil && locked {
+			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
+		} else if lockErr != nil {
+			slog.Warn("vertex_service_account_token_lock_failed", "account_id", account.ID, "error", lockErr)
+		} else {
+			time.Sleep(200 * time.Millisecond)
+			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+				return token, nil
+			}
+		}
+	}
+
+	accessToken, ttl, err := exchangeVertexServiceAccountToken(ctx, key)
+	if err != nil {
+		return "", err
+	}
+	if p.tokenCache != nil {
+		_ = p.tokenCache.SetAccessToken(ctx, cacheKey, accessToken, ttl)
+	}
+	return accessToken, nil
+}
+
 func GeminiTokenCacheKey(account *Account) string {
+	if account != nil && account.Type == AccountTypeServiceAccount {
+		if key, err := parseVertexServiceAccountKey(account); err == nil {
+			return vertexServiceAccountCacheKey(account, key)
+		}
+	}
 	projectID := strings.TrimSpace(account.GetCredential("project_id"))
 	if projectID != "" {
 		return "gemini:" + projectID
diff --git a/backend/internal/service/vertex_service_account.go b/backend/internal/service/vertex_service_account.go
new file mode 100644
index 00000000..d4130b93
--- /dev/null
+++ b/backend/internal/service/vertex_service_account.go
@@ -0,0 +1,303 @@
+package service
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/golang-jwt/jwt/v5"
+)
+
+const (
+	vertexDefaultLocation         = "us-central1"
+	vertexDefaultTokenURL         = "https://oauth2.googleapis.com/token"
+	vertexCloudPlatformScope      = "https://www.googleapis.com/auth/cloud-platform"
+	vertexServiceAccountCacheSkew = 5 * time.Minute
+	vertexAnthropicVersion        = "vertex-2023-10-16"
+)
+
+var (
+	vertexLocationPattern                = regexp.MustCompile(`^[a-z0-9-]+$`)
+	vertexAnthropicDatedModelIDPattern   = regexp.MustCompile(`^(.+)-([0-9]{8})$`)
+	vertexAnthropicAlreadyDatedIDPattern = regexp.MustCompile(`^.+@[0-9]{8}$`)
+)
+
+type vertexServiceAccountKey struct {
+	Type         string `json:"type"`
+	ProjectID    string `json:"project_id"`
+	PrivateKeyID string `json:"private_key_id"`
+	PrivateKey   string `json:"private_key"`
+	ClientEmail  string `json:"client_email"`
+	TokenURI     string `json:"token_uri"`
+}
+
+type vertexTokenResponse struct {
+	AccessToken string `json:"access_token"`
+	TokenType   string `json:"token_type"`
+	ExpiresIn   int64  `json:"expires_in"`
+	Error       string `json:"error"`
+	ErrorDesc   string `json:"error_description"`
+}
+
+func (a *Account) IsVertexServiceAccount() bool {
+	return a != nil && a.Type == AccountTypeServiceAccount
+}
+
+func (a *Account) VertexProjectID() string {
+	if a == nil {
+		return ""
+	}
+	if v := strings.TrimSpace(a.GetCredential("project_id")); v != "" {
+		return v
+	}
+	key, err := parseVertexServiceAccountKey(a)
+	if err == nil {
+		return strings.TrimSpace(key.ProjectID)
+	}
+	return ""
+}
+
+func (a *Account) VertexLocation(model string) string {
+	if a == nil {
+		return vertexDefaultLocation
+	}
+	if model != "" && a.Credentials != nil {
+		if raw, ok := a.Credentials["vertex_model_locations"].(map[string]any); ok {
+			if loc, ok := raw[model].(string); ok && strings.TrimSpace(loc) != "" {
+				return strings.TrimSpace(loc)
+			}
+		}
+	}
+	if v := strings.TrimSpace(a.GetCredential("location")); v != "" {
+		return v
+	}
+	if v := strings.TrimSpace(a.GetCredential("vertex_location")); v != "" {
+		return v
+	}
+	return vertexDefaultLocation
+}
+
+func parseVertexServiceAccountKey(account *Account) (*vertexServiceAccountKey, error) {
+	if account == nil || account.Credentials == nil {
+		return nil, errors.New("service account credentials not configured")
+	}
+
+	if raw := strings.TrimSpace(account.GetCredential("service_account_json")); raw != "" {
+		return parseVertexServiceAccountJSON([]byte(raw))
+	}
+	if raw := strings.TrimSpace(account.GetCredential("service_account")); raw != "" {
+		return parseVertexServiceAccountJSON([]byte(raw))
+	}
+	if nested, ok := account.Credentials["service_account_json"].(map[string]any); ok {
+		b, _ := json.Marshal(nested)
+		return parseVertexServiceAccountJSON(b)
+	}
+	if nested, ok := account.Credentials["service_account"].(map[string]any); ok {
+		b, _ := json.Marshal(nested)
+		return parseVertexServiceAccountJSON(b)
+	}
+	return nil, errors.New("service_account_json not found in credentials")
+}
+
+func parseVertexServiceAccountJSON(raw []byte) (*vertexServiceAccountKey, error) {
+	var key vertexServiceAccountKey
+	if err := json.Unmarshal(raw, &key); err != nil {
+		return nil, fmt.Errorf("invalid service account json: %w", err)
+	}
+	if strings.TrimSpace(key.ClientEmail) == "" {
+		return nil, errors.New("service account json missing client_email")
+	}
+	if strings.TrimSpace(key.PrivateKey) == "" {
+		return nil, errors.New("service account json missing private_key")
+	}
+	if strings.TrimSpace(key.ProjectID) == "" {
+		return nil, errors.New("service account json missing project_id")
+	}
+	if strings.TrimSpace(key.TokenURI) == "" {
+		key.TokenURI = vertexDefaultTokenURL
+	}
+	return &key, nil
+}
+
+func vertexServiceAccountCacheKey(account *Account, key *vertexServiceAccountKey) string {
+	fingerprint := ""
+	if key != nil {
+		sum := sha256.Sum256([]byte(key.ClientEmail + "\x00" + key.PrivateKeyID))
+		fingerprint = hex.EncodeToString(sum[:8])
+	}
+	if fingerprint == "" && account != nil {
+		fingerprint = fmt.Sprintf("account:%d", account.ID)
+	}
+	return "vertex:service_account:" + fingerprint
+}
+
+func exchangeVertexServiceAccountToken(ctx context.Context, key *vertexServiceAccountKey) (string, time.Duration, error) {
+	now := time.Now()
+	claims := jwt.MapClaims{
+		"iss":   key.ClientEmail,
+		"scope": vertexCloudPlatformScope,
+		"aud":   key.TokenURI,
+		"iat":   now.Unix(),
+		"exp":   now.Add(time.Hour).Unix(),
+	}
+	token := jwt.NewWithClaims(jwt.SigningMethodRS256, claims)
+	if strings.TrimSpace(key.PrivateKeyID) != "" {
+		token.Header["kid"] = key.PrivateKeyID
+	}
+	privateKey, err := jwt.ParseRSAPrivateKeyFromPEM([]byte(key.PrivateKey))
+	if err != nil {
+		return "", 0, fmt.Errorf("parse service account private key: %w", err)
+	}
+	assertion, err := token.SignedString(privateKey)
+	if err != nil {
+		return "", 0, fmt.Errorf("sign service account assertion: %w", err)
+	}
+
+	values := url.Values{}
+	values.Set("grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer")
+	values.Set("assertion", assertion)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, key.TokenURI, strings.NewReader(values.Encode()))
+	if err != nil {
+		return "", 0, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	client := &http.Client{Timeout: 15 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", 0, fmt.Errorf("service account token request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+	var parsed vertexTokenResponse
+	_ = json.Unmarshal(body, &parsed)
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		msg := strings.TrimSpace(parsed.ErrorDesc)
+		if msg == "" {
+			msg = strings.TrimSpace(parsed.Error)
+		}
+		if msg == "" {
+			msg = string(bytes.TrimSpace(body))
+		}
+		return "", 0, fmt.Errorf("service account token request returned %d: %s", resp.StatusCode, msg)
+	}
+	if strings.TrimSpace(parsed.AccessToken) == "" {
+		return "", 0, errors.New("service account token response missing access_token")
+	}
+	ttl := time.Duration(parsed.ExpiresIn) * time.Second
+	if ttl <= 0 {
+		ttl = time.Hour
+	}
+	if ttl > vertexServiceAccountCacheSkew {
+		ttl -= vertexServiceAccountCacheSkew
+	}
+	return parsed.AccessToken, ttl, nil
+}
+
+func buildVertexGeminiURL(projectID, location, model, action string, stream bool) (string, error) {
+	projectID = strings.TrimSpace(projectID)
+	location = strings.TrimSpace(location)
+	model = strings.TrimSpace(model)
+	action = strings.TrimSpace(action)
+	if projectID == "" {
+		return "", errors.New("vertex project_id is required")
+	}
+	if location == "" {
+		location = vertexDefaultLocation
+	}
+	if !vertexLocationPattern.MatchString(location) {
+		return "", fmt.Errorf("invalid vertex location: %s", location)
+	}
+	if model == "" {
+		return "", errors.New("vertex model is required")
+	}
+	switch action {
+	case "generateContent", "streamGenerateContent", "countTokens":
+	default:
+		return "", fmt.Errorf("unsupported vertex gemini action: %s", action)
+	}
+	host := fmt.Sprintf("%s-aiplatform.googleapis.com", location)
+	if location == "global" {
+		host = "aiplatform.googleapis.com"
+	}
+	u := fmt.Sprintf(
+		"https://%s/v1/projects/%s/locations/%s/publishers/google/models/%s:%s",
+		host,
+		url.PathEscape(projectID),
+		url.PathEscape(location),
+		url.PathEscape(model),
+		action,
+	)
+	if stream {
+		u += "?alt=sse"
+	}
+	return u, nil
+}
+
+func buildVertexAnthropicURL(projectID, location, model string, stream bool) (string, error) {
+	projectID = strings.TrimSpace(projectID)
+	location = strings.TrimSpace(location)
+	model = strings.TrimSpace(model)
+	if projectID == "" {
+		return "", errors.New("vertex project_id is required")
+	}
+	if location == "" {
+		location = vertexDefaultLocation
+	}
+	if !vertexLocationPattern.MatchString(location) {
+		return "", fmt.Errorf("invalid vertex location: %s", location)
+	}
+	if model == "" {
+		return "", errors.New("vertex model is required")
+	}
+	action := "rawPredict"
+	if stream {
+		action = "streamRawPredict"
+	}
+	host := fmt.Sprintf("%s-aiplatform.googleapis.com", location)
+	if location == "global" {
+		host = "aiplatform.googleapis.com"
+	}
+	escapedModel := strings.ReplaceAll(url.PathEscape(model), "%40", "@")
+	return fmt.Sprintf(
+		"https://%s/v1/projects/%s/locations/%s/publishers/anthropic/models/%s:%s",
+		host,
+		url.PathEscape(projectID),
+		url.PathEscape(location),
+		escapedModel,
+		action,
+	), nil
+}
+
+func normalizeVertexAnthropicModelID(model string) string {
+	model = strings.TrimSpace(model)
+	if model == "" || vertexAnthropicAlreadyDatedIDPattern.MatchString(model) {
+		return model
+	}
+	if m := vertexAnthropicDatedModelIDPattern.FindStringSubmatch(model); len(m) == 3 {
+		return m[1] + "@" + m[2]
+	}
+	return model
+}
+
+func buildVertexAnthropicRequestBody(body []byte) ([]byte, error) {
+	var payload map[string]any
+	if err := json.Unmarshal(body, &payload); err != nil {
+		return nil, fmt.Errorf("parse anthropic vertex request body: %w", err)
+	}
+	delete(payload, "model")
+	payload["anthropic_version"] = vertexAnthropicVersion
+	return json.Marshal(payload)
+}
diff --git a/backend/internal/service/vertex_service_account_test.go b/backend/internal/service/vertex_service_account_test.go
new file mode 100644
index 00000000..519f5b2f
--- /dev/null
+++ b/backend/internal/service/vertex_service_account_test.go
@@ -0,0 +1,77 @@
+package service
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
+)
+
+func TestBuildVertexGeminiURL(t *testing.T) {
+	got, err := buildVertexGeminiURL("my-project", "us-central1", "gemini-3-pro", "streamGenerateContent", true)
+	require.NoError(t, err)
+	require.Equal(t, "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-3-pro:streamGenerateContent?alt=sse", got)
+}
+
+func TestBuildVertexGeminiURLUsesGlobalEndpointHost(t *testing.T) {
+	got, err := buildVertexGeminiURL("my-project", "global", "gemini-3-flash-preview", "streamGenerateContent", true)
+	require.NoError(t, err)
+	require.Equal(t, "https://aiplatform.googleapis.com/v1/projects/my-project/locations/global/publishers/google/models/gemini-3-flash-preview:streamGenerateContent?alt=sse", got)
+}
+
+func TestBuildVertexAnthropicURL(t *testing.T) {
+	got, err := buildVertexAnthropicURL("my-project", "us-east5", "claude-sonnet-4-5@20250929", false)
+	require.NoError(t, err)
+	require.Equal(t, "https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models/claude-sonnet-4-5@20250929:rawPredict", got)
+}
+
+func TestBuildVertexAnthropicURLUsesGlobalEndpointHost(t *testing.T) {
+	got, err := buildVertexAnthropicURL("my-project", "global", "claude-haiku-4-5@20251001", true)
+	require.NoError(t, err)
+	require.Equal(t, "https://aiplatform.googleapis.com/v1/projects/my-project/locations/global/publishers/anthropic/models/claude-haiku-4-5@20251001:streamRawPredict", got)
+}
+
+func TestNormalizeVertexAnthropicModelID(t *testing.T) {
+	require.Equal(t, "claude-sonnet-4-5@20250929", normalizeVertexAnthropicModelID("claude-sonnet-4-5-20250929"))
+	require.Equal(t, "claude-sonnet-4-5@20250929", normalizeVertexAnthropicModelID("claude-sonnet-4-5@20250929"))
+	require.Equal(t, "claude-sonnet-4-6", normalizeVertexAnthropicModelID("claude-sonnet-4-6"))
+}
+
+func TestBuildVertexAnthropicRequestBody(t *testing.T) {
+	got, err := buildVertexAnthropicRequestBody([]byte(`{"model":"claude-sonnet-4-5","anthropic_version":"2023-06-01","max_tokens":64,"messages":[{"role":"user","content":"hi"}]}`))
+	require.NoError(t, err)
+	require.Equal(t, "", gjson.GetBytes(got, "model").String())
+	require.Equal(t, vertexAnthropicVersion, gjson.GetBytes(got, "anthropic_version").String())
+	require.Equal(t, int64(64), gjson.GetBytes(got, "max_tokens").Int())
+	require.Equal(t, "hi", gjson.GetBytes(got, "messages.0.content").String())
+}
+
+func TestBuildVertexGeminiURLRejectsInvalidLocation(t *testing.T) {
+	_, err := buildVertexGeminiURL("my-project", "us-central1/path", "gemini-3-pro", "generateContent", false)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "invalid vertex location")
+}
+
+func TestParseVertexServiceAccountKey(t *testing.T) {
+	raw := `{
+		"type": "service_account",
+		"project_id": "vertex-proj",
+		"private_key_id": "kid",
+		"private_key": "-----BEGIN PRIVATE KEY-----\nabc\n-----END PRIVATE KEY-----\n",
+		"client_email": "svc@vertex-proj.iam.gserviceaccount.com"
+	}`
+	account := &Account{
+		Type:     AccountTypeServiceAccount,
+		Platform: PlatformGemini,
+		Credentials: map[string]any{
+			"service_account_json": raw,
+		},
+	}
+	key, err := parseVertexServiceAccountKey(account)
+	require.NoError(t, err)
+	require.Equal(t, "vertex-proj", key.ProjectID)
+	require.Equal(t, "svc@vertex-proj.iam.gserviceaccount.com", key.ClientEmail)
+	require.Equal(t, vertexDefaultTokenURL, key.TokenURI)
+	require.True(t, strings.Contains(key.PrivateKey, "BEGIN PRIVATE KEY"))
+}
diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index 96673f8f..e7a790ec 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -153,7 +153,7 @@
       <!-- Account Type Selection (Anthropic) -->
       <div v-if="form.platform === 'anthropic'">
         <label class="input-label">{{ t('admin.accounts.accountType') }}</label>
-        <div class="mt-2 grid grid-cols-3 gap-3" data-tour="account-form-type">
+        <div class="mt-2 grid grid-cols-2 gap-3 sm:grid-cols-4" data-tour="account-form-type">
           <button
             type="button"
             @click="accountCategory = 'oauth-based'"
@@ -244,6 +244,39 @@
             </div>
           </button>
 
+          <button
+            type="button"
+            @click="accountCategory = 'service_account'"
+            :class="[
+              'flex items-center gap-3 rounded-lg border-2 p-3 text-left transition-all',
+              accountCategory === 'service_account'
+                ? 'border-sky-500 bg-sky-50 dark:bg-sky-900/20'
+                : 'border-gray-200 hover:border-sky-300 dark:border-dark-600 dark:hover:border-sky-700'
+            ]"
+          >
+            <div
+              :class="[
+                'flex h-8 w-8 shrink-0 items-center justify-center rounded-lg',
+                accountCategory === 'service_account'
+                  ? 'bg-sky-500 text-white'
+                  : 'bg-gray-100 text-gray-500 dark:bg-dark-600 dark:text-gray-400'
+              ]"
+            >
+              <Icon name="cloud" size="sm" />
+            </div>
+            <div>
+              <span class="block text-sm font-medium text-gray-900 dark:text-white">Vertex</span>
+              <span class="text-xs text-gray-500 dark:text-gray-400">Service Account</span>
+            </div>
+          </button>
+
+        </div>
+
+        <div
+          v-if="accountCategory === 'service_account'"
+          class="mt-3 rounded-lg border border-sky-200 bg-sky-50 px-3 py-2 text-xs text-sky-800 dark:border-sky-800/40 dark:bg-sky-900/20 dark:text-sky-200"
+        >
+          <p>使用 Google Cloud Service Account JSON 通过 Vertex AI 调用 Anthropic Claude。建议配置模型映射，将客户端 Claude 模型名映射到 Vertex 模型 ID。</p>
         </div>
       </div>
 
@@ -302,6 +335,7 @@
               <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.responsesApi') }}</span>
             </div>
           </button>
+
         </div>
       </div>
 
@@ -320,7 +354,7 @@
             {{ t('admin.accounts.gemini.helpButton') }}
           </button>
         </div>
-        <div class="mt-2 grid grid-cols-2 gap-3" data-tour="account-form-type">
+        <div class="mt-2 grid grid-cols-3 gap-3" data-tour="account-form-type">
           <button
             type="button"
             @click="accountCategory = 'oauth-based'"
@@ -392,6 +426,36 @@
               </span>
             </div>
           </button>
+
+          <button
+            type="button"
+            @click="accountCategory = 'service_account'"
+            :class="[
+              'flex items-center gap-3 rounded-lg border-2 p-3 text-left transition-all',
+              accountCategory === 'service_account'
+                ? 'border-sky-500 bg-sky-50 dark:bg-sky-900/20'
+                : 'border-gray-200 hover:border-sky-300 dark:border-dark-600 dark:hover:border-sky-700'
+            ]"
+          >
+            <div
+              :class="[
+                'flex h-8 w-8 shrink-0 items-center justify-center rounded-lg',
+                accountCategory === 'service_account'
+                  ? 'bg-sky-500 text-white'
+                  : 'bg-gray-100 text-gray-500 dark:bg-dark-600 dark:text-gray-400'
+              ]"
+            >
+              <Icon name="cloud" size="sm" />
+            </div>
+            <div>
+              <span class="block text-sm font-medium text-gray-900 dark:text-white">
+                Vertex
+              </span>
+              <span class="text-xs text-gray-500 dark:text-gray-400">
+                Service Account
+              </span>
+            </div>
+          </button>
         </div>
 
         <div
@@ -411,6 +475,13 @@
           </div>
         </div>
 
+        <div
+          v-if="accountCategory === 'service_account'"
+          class="mt-3 rounded-lg border border-sky-200 bg-sky-50 px-3 py-2 text-xs text-sky-800 dark:border-sky-800/40 dark:bg-sky-900/20 dark:text-sky-200"
+        >
+          <p>使用 Google Cloud Service Account JSON 访问 Vertex AI Gemini。建议将 Vertex 账号放入独立分组，避免和 AI Studio/Gemini OAuth 同模型混调。</p>
+        </div>
+
         <!-- OAuth Type Selection (only show when oauth-based is selected) -->
         <div v-if="accountCategory === 'oauth-based'" class="mt-4">
           <label class="input-label">{{ t('admin.accounts.oauth.gemini.oauthTypeLabel') }}</label>
@@ -610,7 +681,7 @@
         </div>
 
         <!-- Tier selection (used as fallback when auto-detection is unavailable/fails) -->
-        <div class="mt-4">
+        <div v-if="accountCategory !== 'service_account'" class="mt-4">
           <label class="input-label">{{ t('admin.accounts.gemini.tier.label') }}</label>
           <div class="mt-2">
             <select
@@ -729,6 +800,96 @@
         </div>
       </div>
 
+      <!-- Vertex Service Account -->
+      <div v-if="(form.platform === 'gemini' || form.platform === 'anthropic') && accountCategory === 'service_account'" class="space-y-4">
+        <div>
+          <label class="input-label">Service Account JSON</label>
+          <input
+            ref="vertexServiceAccountFileInput"
+            type="file"
+            accept="application/json,.json"
+            class="hidden"
+            @change="handleVertexServiceAccountFile"
+          />
+          <div
+            :class="[
+              'rounded-lg border-2 border-dashed px-4 py-5 transition-colors',
+              vertexServiceAccountDragActive
+                ? 'border-sky-500 bg-sky-50 dark:border-sky-500 dark:bg-sky-900/20'
+                : 'border-gray-300 bg-gray-50 hover:border-sky-400 hover:bg-sky-50/60 dark:border-dark-500 dark:bg-dark-700/40 dark:hover:border-sky-600 dark:hover:bg-sky-900/10'
+            ]"
+            @dragenter.prevent="vertexServiceAccountDragActive = true"
+            @dragover.prevent="vertexServiceAccountDragActive = true"
+            @dragleave.prevent="vertexServiceAccountDragActive = false"
+            @drop.prevent="handleVertexServiceAccountDrop"
+          >
+            <div class="flex flex-col gap-3 sm:flex-row sm:items-center sm:justify-between">
+              <div class="min-w-0">
+                <div class="flex items-center gap-2 text-sm font-medium text-gray-900 dark:text-white">
+                  <Icon name="upload" size="sm" />
+                  <span>{{ vertexClientEmail ? '已读取 Service Account JSON' : '拖入 Service Account JSON' }}</span>
+                </div>
+                <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                  {{ vertexClientEmail ? '密钥内容不会在表单中显示。' : '把 .json 文件拖到这里，或点击按钮选择文件。' }}
+                </p>
+              </div>
+              <button
+                type="button"
+                class="btn btn-secondary shrink-0"
+                @click="vertexServiceAccountFileInput?.click()"
+              >
+                <Icon name="upload" size="sm" />
+                选择 JSON
+              </button>
+            </div>
+            <div
+              v-if="vertexClientEmail"
+              class="mt-3 rounded-md border border-sky-200 bg-white px-3 py-2 text-xs text-sky-900 dark:border-sky-800/50 dark:bg-dark-800 dark:text-sky-200"
+            >
+              <div class="truncate">Project ID: <span class="font-mono">{{ vertexProjectId }}</span></div>
+              <div class="truncate">Client Email: <span class="font-mono">{{ vertexClientEmail }}</span></div>
+            </div>
+          </div>
+          <p class="input-hint">上传或拖入 JSON 后会自动读取 project_id，密钥内容仅用于创建账号提交。</p>
+        </div>
+
+        <div class="grid grid-cols-1 gap-4 sm:grid-cols-2">
+          <div>
+            <label class="input-label">Project ID</label>
+            <input
+              v-model="vertexProjectId"
+              type="text"
+              class="input font-mono"
+              readonly
+              placeholder="从 JSON 自动读取"
+            />
+          </div>
+          <div>
+            <label class="input-label">Location</label>
+            <select
+              v-model="vertexLocation"
+              required
+              class="input font-mono"
+            >
+              <optgroup
+                v-for="group in vertexLocationOptions"
+                :key="group.label"
+                :label="group.label"
+              >
+                <option
+                  v-for="option in group.options"
+                  :key="option.value"
+                  :value="option.value"
+                >
+                  {{ option.label }}
+                </option>
+              </optgroup>
+            </select>
+            <p class="input-hint">不同 Vertex 模型可用 location 可能不同，这里选择账号默认 endpoint location。</p>
+          </div>
+        </div>
+      </div>
+
       <!-- Antigravity model restriction (applies to OAuth + Upstream) -->
       <!-- Antigravity 只支持模型映射模式，不支持白名单模式 -->
       <div v-if="form.platform === 'antigravity'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
@@ -3085,7 +3246,7 @@ interface TempUnschedRuleForm {
 // State
 const step = ref(1)
 const submitting = ref(false)
-const accountCategory = ref<'oauth-based' | 'apikey' | 'bedrock'>('oauth-based') // UI selection for account category
+const accountCategory = ref<'oauth-based' | 'apikey' | 'bedrock' | 'service_account'>('oauth-based') // UI selection for account category
 const addMethod = ref<AddMethod>('oauth') // For oauth-based: 'oauth' or 'setup-token'
 const apiKeyBaseUrl = ref('https://api.anthropic.com')
 const apiKeyValue = ref('')
@@ -3151,6 +3312,58 @@ const bedrockSessionToken = ref('')
 const bedrockRegion = ref('us-east-1')
 const bedrockForceGlobal = ref(false)
 const bedrockApiKeyValue = ref('')
+const vertexServiceAccountFileInput = ref<HTMLInputElement | null>(null)
+const vertexServiceAccountJson = ref('')
+const vertexProjectId = ref('')
+const vertexClientEmail = ref('')
+const vertexLocation = ref('global')
+const vertexServiceAccountDragActive = ref(false)
+const vertexLocationOptions = [
+  {
+    label: 'Common',
+    options: [
+      { value: 'us-central1', label: 'us-central1 (Iowa)' },
+      { value: 'global', label: 'global' },
+      { value: 'us', label: 'us' },
+      { value: 'eu', label: 'eu' }
+    ]
+  },
+  {
+    label: 'United States',
+    options: [
+      { value: 'us-east1', label: 'us-east1 (South Carolina)' },
+      { value: 'us-east4', label: 'us-east4 (Northern Virginia)' },
+      { value: 'us-east5', label: 'us-east5 (Columbus)' },
+      { value: 'us-south1', label: 'us-south1 (Dallas)' },
+      { value: 'us-west1', label: 'us-west1 (Oregon)' },
+      { value: 'us-west4', label: 'us-west4 (Las Vegas)' }
+    ]
+  },
+  {
+    label: 'Europe',
+    options: [
+      { value: 'europe-west1', label: 'europe-west1 (Belgium)' },
+      { value: 'europe-west2', label: 'europe-west2 (London)' },
+      { value: 'europe-west3', label: 'europe-west3 (Frankfurt)' },
+      { value: 'europe-west4', label: 'europe-west4 (Netherlands)' },
+      { value: 'europe-west6', label: 'europe-west6 (Zurich)' },
+      { value: 'europe-west8', label: 'europe-west8 (Milan)' },
+      { value: 'europe-west9', label: 'europe-west9 (Paris)' }
+    ]
+  },
+  {
+    label: 'Asia Pacific',
+    options: [
+      { value: 'asia-east1', label: 'asia-east1 (Taiwan)' },
+      { value: 'asia-east2', label: 'asia-east2 (Hong Kong)' },
+      { value: 'asia-northeast1', label: 'asia-northeast1 (Tokyo)' },
+      { value: 'asia-northeast3', label: 'asia-northeast3 (Seoul)' },
+      { value: 'asia-south1', label: 'asia-south1 (Mumbai)' },
+      { value: 'asia-southeast1', label: 'asia-southeast1 (Singapore)' },
+      { value: 'australia-southeast1', label: 'australia-southeast1 (Sydney)' }
+    ]
+  }
+] as const
 const tempUnschedEnabled = ref(false)
 const tempUnschedRules = ref<TempUnschedRuleForm[]>([])
 const getModelMappingKey = createStableObjectKeyResolver<ModelMapping>('create-model-mapping')
@@ -3397,7 +3610,7 @@ watch(
 
 // Sync form.type based on accountCategory, addMethod, and platform-specific type
 watch(
-  [accountCategory, addMethod, antigravityAccountType],
+  [accountCategory, addMethod, antigravityAccountType, () => form.platform],
   ([category, method, agType]) => {
     // Antigravity upstream 类型（实际创建为 apikey）
     if (form.platform === 'antigravity' && agType === 'upstream') {
@@ -3409,7 +3622,9 @@ watch(
       form.type = 'bedrock' as AccountType
       return
     }
-    if (category === 'oauth-based') {
+    if ((form.platform === 'gemini' || form.platform === 'anthropic') && category === 'service_account') {
+      form.type = 'service_account' as AccountType
+    } else if (category === 'oauth-based') {
       form.type = method as AccountType // 'oauth' or 'setup-token'
     } else {
       form.type = 'apikey'
@@ -3447,6 +3662,12 @@ watch(
       antigravityModelMappings.value = []
       antigravityModelRestrictionMode.value = 'mapping'
     }
+    if (newPlatform !== 'gemini' && newPlatform !== 'anthropic' && accountCategory.value === 'service_account') {
+      accountCategory.value = 'oauth-based'
+    }
+    if (newPlatform !== 'anthropic' && accountCategory.value === 'bedrock') {
+      accountCategory.value = 'oauth-based'
+    }
     // Reset Bedrock fields when switching platforms
     bedrockAccessKeyId.value = ''
     bedrockSecretAccessKey.value = ''
@@ -3455,6 +3676,10 @@ watch(
     bedrockForceGlobal.value = false
     bedrockAuthMode.value = 'sigv4'
     bedrockApiKeyValue.value = ''
+    vertexServiceAccountJson.value = ''
+    vertexProjectId.value = ''
+    vertexClientEmail.value = ''
+    vertexLocation.value = 'global'
     // Reset Anthropic/Antigravity-specific settings when switching to other platforms
     if (newPlatform !== 'anthropic' && newPlatform !== 'antigravity') {
       interceptWarmupRequests.value = false
@@ -3886,6 +4111,10 @@ const resetForm = () => {
   antigravityAccountType.value = 'oauth'
   upstreamBaseUrl.value = ''
   upstreamApiKey.value = ''
+  vertexServiceAccountJson.value = ''
+  vertexProjectId.value = ''
+  vertexClientEmail.value = ''
+  vertexLocation.value = 'global'
   tempUnschedEnabled.value = false
   tempUnschedRules.value = []
   geminiOAuthType.value = 'code_assist'
@@ -4009,6 +4238,52 @@ const normalizePoolModeRetryCount = (value: number) => {
   return normalized
 }
 
+const applyVertexServiceAccountJson = (value: string) => {
+  const raw = value.trim()
+  if (!raw) {
+    vertexProjectId.value = ''
+    vertexClientEmail.value = ''
+    return false
+  }
+  try {
+    const parsed = JSON.parse(raw) as Record<string, unknown>
+    const projectId = typeof parsed.project_id === 'string' ? parsed.project_id.trim() : ''
+    const clientEmail = typeof parsed.client_email === 'string' ? parsed.client_email.trim() : ''
+    const privateKey = typeof parsed.private_key === 'string' ? parsed.private_key.trim() : ''
+    if (!projectId || !clientEmail || !privateKey) {
+      appStore.showError('Service Account JSON 缺少 project_id、client_email 或 private_key')
+      return false
+    }
+    vertexProjectId.value = projectId
+    vertexClientEmail.value = clientEmail
+    vertexServiceAccountJson.value = JSON.stringify(parsed)
+    return true
+  } catch {
+    appStore.showError('Service Account JSON 格式无效')
+    return false
+  }
+}
+
+const parseVertexServiceAccountJson = () => applyVertexServiceAccountJson(vertexServiceAccountJson.value)
+
+const handleVertexServiceAccountFile = async (event: Event) => {
+  const input = event.target as HTMLInputElement
+  const file = input.files?.[0]
+  if (!file) return
+  try {
+    applyVertexServiceAccountJson(await file.text())
+  } finally {
+    input.value = ''
+  }
+}
+
+const handleVertexServiceAccountDrop = async (event: DragEvent) => {
+  vertexServiceAccountDragActive.value = false
+  const file = event.dataTransfer?.files?.[0]
+  if (!file) return
+  applyVertexServiceAccountJson(await file.text())
+}
+
 const handleSubmit = async () => {
   // For OAuth-based type, handle OAuth flow (goes to step 2)
   if (isOAuthFlow.value) {
@@ -4122,6 +4397,29 @@ const handleSubmit = async () => {
     return
   }
 
+  if ((form.platform === 'gemini' || form.platform === 'anthropic') && accountCategory.value === 'service_account') {
+    if (!form.name.trim()) {
+      appStore.showError(t('admin.accounts.pleaseEnterAccountName'))
+      return
+    }
+    if (!parseVertexServiceAccountJson()) {
+      return
+    }
+    if (!vertexLocation.value.trim()) {
+      appStore.showError('请填写 Vertex location')
+      return
+    }
+    const credentials: Record<string, unknown> = {
+      service_account_json: vertexServiceAccountJson.value.trim(),
+      project_id: vertexProjectId.value.trim(),
+      client_email: vertexClientEmail.value.trim(),
+      location: vertexLocation.value.trim(),
+      tier_id: 'vertex'
+    }
+    await createAccountAndFinish(form.platform, 'service_account' as AccountType, credentials)
+    return
+  }
+
   // For apikey type, create directly
   if (!apiKeyValue.value.trim()) {
     appStore.showError(t('admin.accounts.pleaseEnterApiKey'))
diff --git a/frontend/src/components/account/EditAccountModal.vue b/frontend/src/components/account/EditAccountModal.vue
index 42211ba7..69e2186b 100644
--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -567,6 +567,46 @@
         </div>
       </div>
 
+      <!-- Vertex Service Account -->
+      <div v-if="(account.platform === 'gemini' || account.platform === 'anthropic') && account.type === 'service_account'" class="space-y-4">
+        <div class="grid grid-cols-1 gap-4 sm:grid-cols-2">
+          <div>
+            <label class="input-label">Project ID</label>
+            <input
+              v-model="editVertexProjectId"
+              type="text"
+              class="input font-mono"
+              readonly
+              placeholder="从 JSON 自动读取"
+            />
+            <p class="input-hint">Service Account JSON 不在编辑页显示；需要更换 JSON 时请删除账号后重新创建。</p>
+          </div>
+          <div>
+            <label class="input-label">Location</label>
+            <select
+              v-model="editVertexLocation"
+              required
+              class="input font-mono"
+            >
+              <optgroup
+                v-for="group in vertexLocationOptions"
+                :key="group.label"
+                :label="group.label"
+              >
+                <option
+                  v-for="option in group.options"
+                  :key="option.value"
+                  :value="option.value"
+                >
+                  {{ option.label }}
+                </option>
+              </optgroup>
+            </select>
+            <p class="input-hint">不同 Vertex 模型可用 location 可能不同，这里选择账号默认 endpoint location。</p>
+          </div>
+        </div>
+      </div>
+
       <!-- Bedrock fields (for bedrock type, both SigV4 and API Key modes) -->
       <div v-if="account.type === 'bedrock'" class="space-y-4">
         <!-- SigV4 fields -->
@@ -1987,6 +2027,55 @@ const editBedrockSessionToken = ref('')
 const editBedrockRegion = ref('')
 const editBedrockForceGlobal = ref(false)
 const editBedrockApiKeyValue = ref('')
+const editVertexProjectId = ref('')
+const editVertexClientEmail = ref('')
+const editVertexLocation = ref('us-central1')
+const vertexLocationOptions = [
+  {
+    label: 'Common',
+    options: [
+      { value: 'us-central1', label: 'us-central1 (Iowa)' },
+      { value: 'global', label: 'global' },
+      { value: 'us', label: 'us' },
+      { value: 'eu', label: 'eu' }
+    ]
+  },
+  {
+    label: 'United States',
+    options: [
+      { value: 'us-east1', label: 'us-east1 (South Carolina)' },
+      { value: 'us-east4', label: 'us-east4 (Northern Virginia)' },
+      { value: 'us-east5', label: 'us-east5 (Columbus)' },
+      { value: 'us-south1', label: 'us-south1 (Dallas)' },
+      { value: 'us-west1', label: 'us-west1 (Oregon)' },
+      { value: 'us-west4', label: 'us-west4 (Las Vegas)' }
+    ]
+  },
+  {
+    label: 'Europe',
+    options: [
+      { value: 'europe-west1', label: 'europe-west1 (Belgium)' },
+      { value: 'europe-west2', label: 'europe-west2 (London)' },
+      { value: 'europe-west3', label: 'europe-west3 (Frankfurt)' },
+      { value: 'europe-west4', label: 'europe-west4 (Netherlands)' },
+      { value: 'europe-west6', label: 'europe-west6 (Zurich)' },
+      { value: 'europe-west8', label: 'europe-west8 (Milan)' },
+      { value: 'europe-west9', label: 'europe-west9 (Paris)' }
+    ]
+  },
+  {
+    label: 'Asia Pacific',
+    options: [
+      { value: 'asia-east1', label: 'asia-east1 (Taiwan)' },
+      { value: 'asia-east2', label: 'asia-east2 (Hong Kong)' },
+      { value: 'asia-northeast1', label: 'asia-northeast1 (Tokyo)' },
+      { value: 'asia-northeast3', label: 'asia-northeast3 (Seoul)' },
+      { value: 'asia-south1', label: 'asia-south1 (Mumbai)' },
+      { value: 'asia-southeast1', label: 'asia-southeast1 (Singapore)' },
+      { value: 'australia-southeast1', label: 'australia-southeast1 (Sydney)' }
+    ]
+  }
+] as const
 const isBedrockAPIKeyMode = computed(() =>
   props.account?.type === 'bedrock' &&
   (props.account?.credentials as Record<string, unknown>)?.auth_mode === 'apikey'
@@ -2246,6 +2335,9 @@ const syncFormFromAccount = (newAccount: Account | null) => {
   const credentials = newAccount.credentials as Record<string, unknown> | undefined
   interceptWarmupRequests.value = credentials?.intercept_warmup_requests === true
   autoPauseOnExpired.value = newAccount.auto_pause_on_expired === true
+  editVertexProjectId.value = ''
+  editVertexClientEmail.value = ''
+  editVertexLocation.value = 'us-central1'
 
   // Load mixed scheduling setting (only for antigravity accounts)
   mixedScheduling.value = false
@@ -2467,6 +2559,11 @@ const syncFormFromAccount = (newAccount: Account | null) => {
   } else if (newAccount.type === 'upstream' && newAccount.credentials) {
     const credentials = newAccount.credentials as Record<string, unknown>
     editBaseUrl.value = (credentials.base_url as string) || ''
+  } else if ((newAccount.platform === 'gemini' || newAccount.platform === 'anthropic') && newAccount.type === 'service_account' && newAccount.credentials) {
+    const credentials = newAccount.credentials as Record<string, unknown>
+    editVertexProjectId.value = (credentials.project_id as string) || ''
+    editVertexClientEmail.value = (credentials.client_email as string) || ''
+    editVertexLocation.value = (credentials.location as string) || (credentials.vertex_location as string) || 'us-central1'
   } else {
     const platformDefaultUrl =
       newAccount.platform === 'openai'
@@ -3057,6 +3154,38 @@ const handleSubmit = async () => {
         return
       }
 
+      updatePayload.credentials = newCredentials
+    } else if ((props.account.platform === 'gemini' || props.account.platform === 'anthropic') && props.account.type === 'service_account') {
+      const currentCredentials = (props.account.credentials as Record<string, unknown>) || {}
+      const newCredentials: Record<string, unknown> = { ...currentCredentials }
+
+      if (!editVertexProjectId.value.trim()) {
+        appStore.showError('Service Account JSON 缺少 project_id')
+        return
+      }
+      if (!editVertexClientEmail.value.trim()) {
+        appStore.showError('Service Account JSON 缺少 client_email')
+        return
+      }
+      if (!editVertexLocation.value.trim()) {
+        appStore.showError('请填写 Vertex location')
+        return
+      }
+
+      if (!currentCredentials.service_account_json && !currentCredentials.service_account) {
+        appStore.showError('请上传 Service Account JSON')
+        return
+      }
+      newCredentials.project_id = editVertexProjectId.value.trim()
+      newCredentials.client_email = editVertexClientEmail.value.trim()
+      newCredentials.location = editVertexLocation.value.trim()
+      newCredentials.tier_id = 'vertex'
+
+      applyInterceptWarmup(newCredentials, interceptWarmupRequests.value, 'edit')
+      if (!applyTempUnschedConfig(newCredentials)) {
+        return
+      }
+
       updatePayload.credentials = newCredentials
     } else if (props.account.type === 'bedrock') {
       const currentCredentials = (props.account.credentials as Record<string, unknown>) || {}
diff --git a/frontend/src/components/common/PlatformTypeBadge.vue b/frontend/src/components/common/PlatformTypeBadge.vue
index 1ebc8892..1c7b08c0 100644
--- a/frontend/src/components/common/PlatformTypeBadge.vue
+++ b/frontend/src/components/common/PlatformTypeBadge.vue
@@ -25,6 +25,7 @@
         <!-- Setup Token icon -->
         <Icon v-else-if="type === 'setup-token'" name="shield" size="xs" />
         <!-- API Key icon -->
+        <Icon v-else-if="type === 'service_account'" name="cloud" size="xs" />
         <Icon v-else name="key" size="xs" />
         <span>{{ typeLabel }}</span>
       </span>
@@ -88,6 +89,8 @@ const typeLabel = computed(() => {
       return 'Key'
     case 'bedrock':
       return 'AWS'
+    case 'service_account':
+      return 'Vertex'
     default:
       return props.type
   }
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index 2a15ad00..80789011 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -641,7 +641,7 @@ export interface UpdateGroupRequest {
 // ==================== Account & Proxy Types ====================
 
 export type AccountPlatform = 'anthropic' | 'openai' | 'gemini' | 'antigravity'
-export type AccountType = 'oauth' | 'setup-token' | 'apikey' | 'upstream' | 'bedrock'
+export type AccountType = 'oauth' | 'setup-token' | 'apikey' | 'upstream' | 'bedrock' | 'service_account'
 export type OAuthAddMethod = 'oauth' | 'setup-token'
 export type ProxyProtocol = 'http' | 'https' | 'socks5' | 'socks5h'
 

From 9b6dcc57bda7daf87dc4b0552cd0267a2a163782 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Sun, 26 Apr 2026 12:31:52 +0800
Subject: [PATCH 05/46] =?UTF-8?q?feat(affiliate):=20=E5=AE=8C=E5=96=84?=
 =?UTF-8?q?=E9=82=80=E8=AF=B7=E8=BF=94=E5=88=A9=E7=B3=BB=E7=BB=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  - 修复返利不到账的根因：tryClaimAffiliateRebateAudit 中 PostgreSQL 参数类型推断冲突
  - 补全 OAuth 注册路径（LinuxDo/OIDC/WeChat/Pending Flow）的邀请码绑定
  - 前端 OAuth 注册页面传递 aff_code 参数
  - 新增返利冻结期机制：可配置冻结时间，到期后自动解冻（懒解冻）
  - 新增返利有效期：绑定后 N 天内有效，过期不再产生返利
  - 新增单人返利上限：超出上限部分精确截断
  - 增强返利流程 slog 结构化日志，便于排查问题
  - 已邀请用户列表增加返利明细列
---
 .gitignore                                    |   1 +
 .../internal/handler/admin/setting_handler.go |  48 +++++++
 .../internal/handler/auth_linuxdo_oauth.go    |   3 +-
 .../handler/auth_oauth_pending_flow.go        |   2 +
 backend/internal/handler/auth_oidc_oauth.go   |   3 +-
 backend/internal/handler/auth_wechat_oauth.go |   3 +-
 backend/internal/handler/dto/settings.go      |  13 +-
 backend/internal/repository/affiliate_repo.go | 116 +++++++++++++--
 .../affiliate_repo_integration_test.go        |   2 +-
 backend/internal/server/api_contract_test.go  |   6 +
 backend/internal/service/affiliate_service.go |  54 ++++++-
 .../internal/service/auth_oauth_email_flow.go |   2 +
 backend/internal/service/auth_service.go      |  21 ++-
 .../service/auth_service_register_test.go     |   4 +-
 backend/internal/service/domain_constants.go  |  16 ++-
 .../internal/service/payment_fulfillment.go   |  35 +++--
 backend/internal/service/setting_service.go   |  84 +++++++++++
 backend/internal/service/settings_view.go     |  15 +-
 .../133_affiliate_rebate_freeze.sql           |  17 +++
 .../api/__tests__/auth-oauth-adoption.spec.ts |  40 ++++++
 frontend/src/api/admin/settings.ts            |   6 +
 frontend/src/api/auth.ts                      |  35 +++--
 .../components/auth/LinuxDoOAuthSection.vue   |   5 +-
 .../src/components/auth/OidcOAuthSection.vue  |   3 +
 .../components/auth/WechatOAuthSection.vue    |   3 +
 frontend/src/i18n/locales/en.ts               |  12 +-
 frontend/src/i18n/locales/zh.ts               |  12 +-
 frontend/src/types/index.ts                   |   2 +
 .../utils/__tests__/oauthAffiliate.spec.ts    |  48 +++++++
 frontend/src/utils/oauthAffiliate.ts          | 133 ++++++++++++++++++
 frontend/src/views/admin/SettingsView.vue     |  56 ++++++++
 frontend/src/views/auth/EmailVerifyView.vue   |   9 +-
 .../src/views/auth/LinuxDoCallbackView.vue    |  22 ++-
 frontend/src/views/auth/LoginView.vue         |   3 +
 frontend/src/views/auth/OidcCallbackView.vue  |  22 ++-
 frontend/src/views/auth/RegisterView.vue      |  40 +++++-
 .../src/views/auth/WechatCallbackView.vue     |  22 ++-
 .../auth/__tests__/EmailVerifyView.spec.ts    |   3 +
 .../__tests__/LinuxDoCallbackView.spec.ts     |   1 +
 .../auth/__tests__/OidcCallbackView.spec.ts   |   1 +
 .../auth/__tests__/WechatCallbackView.spec.ts |   1 +
 frontend/src/views/user/AffiliateView.vue     |  32 +++--
 42 files changed, 852 insertions(+), 104 deletions(-)
 create mode 100644 backend/migrations/133_affiliate_rebate_freeze.sql
 create mode 100644 frontend/src/utils/__tests__/oauthAffiliate.spec.ts
 create mode 100644 frontend/src/utils/oauthAffiliate.ts

diff --git a/.gitignore b/.gitignore
index bf7ee064..a61f406d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 docs/claude-relay-service/
+.codex
 
 # ===================
 # Go 后端
diff --git a/backend/internal/handler/admin/setting_handler.go b/backend/internal/handler/admin/setting_handler.go
index 40bf1c69..320dbd6b 100644
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -186,6 +186,9 @@ func (h *SettingHandler) GetSettings(c *gin.Context) {
 		DefaultConcurrency:                     settings.DefaultConcurrency,
 		DefaultBalance:                         settings.DefaultBalance,
 		AffiliateRebateRate:                    settings.AffiliateRebateRate,
+		AffiliateRebateFreezeHours:             settings.AffiliateRebateFreezeHours,
+		AffiliateRebateDurationDays:            settings.AffiliateRebateDurationDays,
+		AffiliateRebatePerInviteeCap:           settings.AffiliateRebatePerInviteeCap,
 		DefaultUserRPMLimit:                    settings.DefaultUserRPMLimit,
 		DefaultSubscriptions:                   defaultSubscriptions,
 		EnableModelFallback:                    settings.EnableModelFallback,
@@ -342,6 +345,9 @@ type UpdateSettingsRequest struct {
 	DefaultConcurrency                       int                               `json:"default_concurrency"`
 	DefaultBalance                           float64                           `json:"default_balance"`
 	AffiliateRebateRate                      *float64                          `json:"affiliate_rebate_rate"`
+	AffiliateRebateFreezeHours               *int                              `json:"affiliate_rebate_freeze_hours"`
+	AffiliateRebateDurationDays              *int                              `json:"affiliate_rebate_duration_days"`
+	AffiliateRebatePerInviteeCap             *float64                          `json:"affiliate_rebate_per_invitee_cap"`
 	DefaultUserRPMLimit                      int                               `json:"default_user_rpm_limit"`
 	DefaultSubscriptions                     []dto.DefaultSubscriptionSetting  `json:"default_subscriptions"`
 	AuthSourceDefaultEmailBalance            *float64                          `json:"auth_source_default_email_balance"`
@@ -485,6 +491,33 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 	if affiliateRebateRate > service.AffiliateRebateRateMax {
 		affiliateRebateRate = service.AffiliateRebateRateMax
 	}
+	affiliateRebateFreezeHours := previousSettings.AffiliateRebateFreezeHours
+	if req.AffiliateRebateFreezeHours != nil {
+		affiliateRebateFreezeHours = *req.AffiliateRebateFreezeHours
+	}
+	if affiliateRebateFreezeHours < 0 {
+		affiliateRebateFreezeHours = service.AffiliateRebateFreezeHoursDefault
+	}
+	if affiliateRebateFreezeHours > service.AffiliateRebateFreezeHoursMax {
+		affiliateRebateFreezeHours = service.AffiliateRebateFreezeHoursMax
+	}
+	affiliateRebateDurationDays := previousSettings.AffiliateRebateDurationDays
+	if req.AffiliateRebateDurationDays != nil {
+		affiliateRebateDurationDays = *req.AffiliateRebateDurationDays
+	}
+	if affiliateRebateDurationDays < 0 {
+		affiliateRebateDurationDays = service.AffiliateRebateDurationDaysDefault
+	}
+	if affiliateRebateDurationDays > service.AffiliateRebateDurationDaysMax {
+		affiliateRebateDurationDays = service.AffiliateRebateDurationDaysMax
+	}
+	affiliateRebatePerInviteeCap := previousSettings.AffiliateRebatePerInviteeCap
+	if req.AffiliateRebatePerInviteeCap != nil {
+		affiliateRebatePerInviteeCap = *req.AffiliateRebatePerInviteeCap
+	}
+	if affiliateRebatePerInviteeCap < 0 {
+		affiliateRebatePerInviteeCap = service.AffiliateRebatePerInviteeCapDefault
+	}
 	// 通用表格配置：兼容旧客户端未传字段时保留当前值。
 	if req.TableDefaultPageSize <= 0 {
 		req.TableDefaultPageSize = previousSettings.TableDefaultPageSize
@@ -1137,6 +1170,9 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 		DefaultConcurrency:               req.DefaultConcurrency,
 		DefaultBalance:                   req.DefaultBalance,
 		AffiliateRebateRate:              affiliateRebateRate,
+		AffiliateRebateFreezeHours:       affiliateRebateFreezeHours,
+		AffiliateRebateDurationDays:      affiliateRebateDurationDays,
+		AffiliateRebatePerInviteeCap:     affiliateRebatePerInviteeCap,
 		DefaultUserRPMLimit:              req.DefaultUserRPMLimit,
 		DefaultSubscriptions:             defaultSubscriptions,
 		EnableModelFallback:              req.EnableModelFallback,
@@ -1458,6 +1494,9 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 		DefaultConcurrency:                     updatedSettings.DefaultConcurrency,
 		DefaultBalance:                         updatedSettings.DefaultBalance,
 		AffiliateRebateRate:                    updatedSettings.AffiliateRebateRate,
+		AffiliateRebateFreezeHours:             updatedSettings.AffiliateRebateFreezeHours,
+		AffiliateRebateDurationDays:            updatedSettings.AffiliateRebateDurationDays,
+		AffiliateRebatePerInviteeCap:           updatedSettings.AffiliateRebatePerInviteeCap,
 		DefaultUserRPMLimit:                    updatedSettings.DefaultUserRPMLimit,
 		DefaultSubscriptions:                   updatedDefaultSubscriptions,
 		EnableModelFallback:                    updatedSettings.EnableModelFallback,
@@ -1768,6 +1807,15 @@ func diffSettings(before *service.SystemSettings, after *service.SystemSettings,
 	if before.AffiliateRebateRate != after.AffiliateRebateRate {
 		changed = append(changed, "affiliate_rebate_rate")
 	}
+	if before.AffiliateRebateFreezeHours != after.AffiliateRebateFreezeHours {
+		changed = append(changed, "affiliate_rebate_freeze_hours")
+	}
+	if before.AffiliateRebateDurationDays != after.AffiliateRebateDurationDays {
+		changed = append(changed, "affiliate_rebate_duration_days")
+	}
+	if before.AffiliateRebatePerInviteeCap != after.AffiliateRebatePerInviteeCap {
+		changed = append(changed, "affiliate_rebate_per_invitee_cap")
+	}
 	if !equalDefaultSubscriptions(before.DefaultSubscriptions, after.DefaultSubscriptions) {
 		changed = append(changed, "default_subscriptions")
 	}
diff --git a/backend/internal/handler/auth_linuxdo_oauth.go b/backend/internal/handler/auth_linuxdo_oauth.go
index 2ef05963..7df4abfd 100644
--- a/backend/internal/handler/auth_linuxdo_oauth.go
+++ b/backend/internal/handler/auth_linuxdo_oauth.go
@@ -435,6 +435,7 @@ func (h *AuthHandler) createLinuxDoOAuthChoicePendingSession(
 
 type completeLinuxDoOAuthRequest struct {
 	InvitationCode   string `json:"invitation_code" binding:"required"`
+	AffCode          string `json:"aff_code,omitempty"`
 	AdoptDisplayName *bool  `json:"adopt_display_name,omitempty"`
 	AdoptAvatar      *bool  `json:"adopt_avatar,omitempty"`
 }
@@ -518,7 +519,7 @@ func (h *AuthHandler) CompleteLinuxDoOAuthRegistration(c *gin.Context) {
 		response.ErrorFrom(c, err)
 		return
 	}
-	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode)
+	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode, req.AffCode)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
diff --git a/backend/internal/handler/auth_oauth_pending_flow.go b/backend/internal/handler/auth_oauth_pending_flow.go
index 604ad903..490afd0f 100644
--- a/backend/internal/handler/auth_oauth_pending_flow.go
+++ b/backend/internal/handler/auth_oauth_pending_flow.go
@@ -67,6 +67,7 @@ type createPendingOAuthAccountRequest struct {
 	VerifyCode       string `json:"verify_code,omitempty"`
 	Password         string `json:"password" binding:"required,min=6"`
 	InvitationCode   string `json:"invitation_code,omitempty"`
+	AffCode          string `json:"aff_code,omitempty"`
 	AdoptDisplayName *bool  `json:"adopt_display_name,omitempty"`
 	AdoptAvatar      *bool  `json:"adopt_avatar,omitempty"`
 }
@@ -1751,6 +1752,7 @@ func (h *AuthHandler) createPendingOAuthAccount(c *gin.Context, provider string)
 		user,
 		strings.TrimSpace(req.InvitationCode),
 		strings.TrimSpace(session.ProviderType),
+		strings.TrimSpace(req.AffCode),
 	); err != nil {
 		_ = tx.Rollback()
 		if rollbackCreatedUser(err) {
diff --git a/backend/internal/handler/auth_oidc_oauth.go b/backend/internal/handler/auth_oidc_oauth.go
index 0ac8871b..4264002d 100644
--- a/backend/internal/handler/auth_oidc_oauth.go
+++ b/backend/internal/handler/auth_oidc_oauth.go
@@ -582,6 +582,7 @@ func (h *AuthHandler) createOIDCOAuthChoicePendingSession(
 
 type completeOIDCOAuthRequest struct {
 	InvitationCode   string `json:"invitation_code" binding:"required"`
+	AffCode          string `json:"aff_code,omitempty"`
 	AdoptDisplayName *bool  `json:"adopt_display_name,omitempty"`
 	AdoptAvatar      *bool  `json:"adopt_avatar,omitempty"`
 }
@@ -665,7 +666,7 @@ func (h *AuthHandler) CompleteOIDCOAuthRegistration(c *gin.Context) {
 		response.ErrorFrom(c, err)
 		return
 	}
-	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode)
+	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode, req.AffCode)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
diff --git a/backend/internal/handler/auth_wechat_oauth.go b/backend/internal/handler/auth_wechat_oauth.go
index efee4cc0..34e70ed0 100644
--- a/backend/internal/handler/auth_wechat_oauth.go
+++ b/backend/internal/handler/auth_wechat_oauth.go
@@ -481,6 +481,7 @@ func (h *AuthHandler) wechatPaymentResumeService() *service.PaymentResumeService
 
 type completeWeChatOAuthRequest struct {
 	InvitationCode   string `json:"invitation_code" binding:"required"`
+	AffCode          string `json:"aff_code,omitempty"`
 	AdoptDisplayName *bool  `json:"adopt_display_name,omitempty"`
 	AdoptAvatar      *bool  `json:"adopt_avatar,omitempty"`
 }
@@ -547,7 +548,7 @@ func (h *AuthHandler) CompleteWeChatOAuthRegistration(c *gin.Context) {
 		return
 	}
 
-	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode)
+	tokenPair, user, err := h.authService.LoginOrRegisterOAuthWithTokenPair(c.Request.Context(), email, username, req.InvitationCode, req.AffCode)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
diff --git a/backend/internal/handler/dto/settings.go b/backend/internal/handler/dto/settings.go
index 051fab18..92ae4dc6 100644
--- a/backend/internal/handler/dto/settings.go
+++ b/backend/internal/handler/dto/settings.go
@@ -106,11 +106,14 @@ type SystemSettings struct {
 	CustomMenuItems             []CustomMenuItem `json:"custom_menu_items"`
 	CustomEndpoints             []CustomEndpoint `json:"custom_endpoints"`
 
-	DefaultConcurrency   int                          `json:"default_concurrency"`
-	DefaultBalance       float64                      `json:"default_balance"`
-	AffiliateRebateRate  float64                      `json:"affiliate_rebate_rate"`
-	DefaultUserRPMLimit  int                          `json:"default_user_rpm_limit"`
-	DefaultSubscriptions []DefaultSubscriptionSetting `json:"default_subscriptions"`
+	DefaultConcurrency           int                          `json:"default_concurrency"`
+	DefaultBalance               float64                      `json:"default_balance"`
+	AffiliateRebateRate          float64                      `json:"affiliate_rebate_rate"`
+	AffiliateRebateFreezeHours   int                          `json:"affiliate_rebate_freeze_hours"`
+	AffiliateRebateDurationDays  int                          `json:"affiliate_rebate_duration_days"`
+	AffiliateRebatePerInviteeCap float64                      `json:"affiliate_rebate_per_invitee_cap"`
+	DefaultUserRPMLimit          int                          `json:"default_user_rpm_limit"`
+	DefaultSubscriptions         []DefaultSubscriptionSetting `json:"default_subscriptions"`
 
 	// Model fallback configuration
 	EnableModelFallback      bool   `json:"enable_model_fallback"`
diff --git a/backend/internal/repository/affiliate_repo.go b/backend/internal/repository/affiliate_repo.go
index e3dd56b8..ef89e5b6 100644
--- a/backend/internal/repository/affiliate_repo.go
+++ b/backend/internal/repository/affiliate_repo.go
@@ -86,17 +86,21 @@ func (r *affiliateRepository) BindInviter(ctx context.Context, userID, inviterID
 	return bound, nil
 }
 
-func (r *affiliateRepository) AccrueQuota(ctx context.Context, inviterID, inviteeUserID int64, amount float64) (bool, error) {
+func (r *affiliateRepository) AccrueQuota(ctx context.Context, inviterID, inviteeUserID int64, amount float64, freezeHours int) (bool, error) {
 	if amount <= 0 {
 		return false, nil
 	}
 
 	var applied bool
 	err := r.withTx(ctx, func(txCtx context.Context, txClient *dbent.Client) error {
-		res, err := txClient.ExecContext(txCtx,
-			"UPDATE user_affiliates SET aff_quota = aff_quota + $1, aff_history_quota = aff_history_quota + $1, updated_at = NOW() WHERE user_id = $2",
-			amount, inviterID,
-		)
+		// freezeHours > 0: add to frozen quota; == 0: add to available quota directly
+		var updateSQL string
+		if freezeHours > 0 {
+			updateSQL = "UPDATE user_affiliates SET aff_frozen_quota = aff_frozen_quota + $1, aff_history_quota = aff_history_quota + $1, updated_at = NOW() WHERE user_id = $2"
+		} else {
+			updateSQL = "UPDATE user_affiliates SET aff_quota = aff_quota + $1, aff_history_quota = aff_history_quota + $1, updated_at = NOW() WHERE user_id = $2"
+		}
+		res, err := txClient.ExecContext(txCtx, updateSQL, amount, inviterID)
 		if err != nil {
 			return err
 		}
@@ -106,10 +110,19 @@ func (r *affiliateRepository) AccrueQuota(ctx context.Context, inviterID, invite
 			return nil
 		}
 
-		if _, err = txClient.ExecContext(txCtx, `
+		if freezeHours > 0 {
+			if _, err = txClient.ExecContext(txCtx, `
+INSERT INTO user_affiliate_ledger (user_id, action, amount, source_user_id, frozen_until, created_at, updated_at)
+VALUES ($1, 'accrue', $2, $3, NOW() + make_interval(hours => $4), NOW(), NOW())`,
+				inviterID, amount, inviteeUserID, freezeHours); err != nil {
+				return fmt.Errorf("insert affiliate accrue ledger: %w", err)
+			}
+		} else {
+			if _, err = txClient.ExecContext(txCtx, `
 INSERT INTO user_affiliate_ledger (user_id, action, amount, source_user_id, created_at, updated_at)
 VALUES ($1, 'accrue', $2, $3, NOW(), NOW())`, inviterID, amount, inviteeUserID); err != nil {
-			return fmt.Errorf("insert affiliate accrue ledger: %w", err)
+				return fmt.Errorf("insert affiliate accrue ledger: %w", err)
+			}
 		}
 
 		applied = true
@@ -121,6 +134,76 @@ VALUES ($1, 'accrue', $2, $3, NOW(), NOW())`, inviterID, amount, inviteeUserID);
 	return applied, nil
 }
 
+func (r *affiliateRepository) GetAccruedRebateFromInvitee(ctx context.Context, inviterID, inviteeUserID int64) (float64, error) {
+	client := clientFromContext(ctx, r.client)
+	rows, err := client.QueryContext(ctx,
+		`SELECT COALESCE(SUM(amount), 0)::double precision FROM user_affiliate_ledger WHERE user_id = $1 AND source_user_id = $2 AND action = 'accrue'`,
+		inviterID, inviteeUserID)
+	if err != nil {
+		return 0, fmt.Errorf("query accrued rebate from invitee: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+	var total float64
+	if rows.Next() {
+		if err := rows.Scan(&total); err != nil {
+			return 0, err
+		}
+	}
+	return total, rows.Close()
+}
+
+func (r *affiliateRepository) ThawFrozenQuota(ctx context.Context, userID int64) (float64, error) {
+	var thawed float64
+	err := r.withTx(ctx, func(txCtx context.Context, txClient *dbent.Client) error {
+		var err error
+		thawed, err = thawFrozenQuotaTx(txCtx, txClient, userID)
+		return err
+	})
+	return thawed, err
+}
+
+// thawFrozenQuotaTx moves matured frozen quota to available quota within an existing tx.
+func thawFrozenQuotaTx(txCtx context.Context, txClient *dbent.Client, userID int64) (float64, error) {
+	rows, err := txClient.QueryContext(txCtx, `
+WITH matured AS (
+    UPDATE user_affiliate_ledger
+    SET frozen_until = NULL, updated_at = NOW()
+    WHERE user_id = $1
+      AND frozen_until IS NOT NULL
+      AND frozen_until <= NOW()
+    RETURNING amount
+)
+SELECT COALESCE(SUM(amount), 0) FROM matured`, userID)
+	if err != nil {
+		return 0, fmt.Errorf("thaw frozen quota: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	var thawed float64
+	if rows.Next() {
+		if err := rows.Scan(&thawed); err != nil {
+			return 0, err
+		}
+	}
+	if err := rows.Close(); err != nil {
+		return 0, err
+	}
+	if thawed <= 0 {
+		return 0, nil
+	}
+
+	_, err = txClient.ExecContext(txCtx, `
+UPDATE user_affiliates
+SET aff_quota = aff_quota + $1,
+    aff_frozen_quota = GREATEST(aff_frozen_quota - $1, 0),
+    updated_at = NOW()
+WHERE user_id = $2`, thawed, userID)
+	if err != nil {
+		return 0, fmt.Errorf("move thawed quota: %w", err)
+	}
+	return thawed, nil
+}
+
 func (r *affiliateRepository) TransferQuotaToBalance(ctx context.Context, userID int64) (float64, float64, error) {
 	var transferred float64
 	var newBalance float64
@@ -130,6 +213,11 @@ func (r *affiliateRepository) TransferQuotaToBalance(ctx context.Context, userID
 			return err
 		}
 
+		// Thaw any matured frozen quota before transfer.
+		if _, err := thawFrozenQuotaTx(txCtx, txClient, userID); err != nil {
+			return fmt.Errorf("thaw before transfer: %w", err)
+		}
+
 		rows, err := txClient.QueryContext(txCtx, `
 WITH claimed AS (
 	SELECT aff_quota::double precision AS amount
@@ -211,10 +299,16 @@ func (r *affiliateRepository) ListInvitees(ctx context.Context, inviterID int64,
 SELECT ua.user_id,
        COALESCE(u.email, ''),
        COALESCE(u.username, ''),
-       ua.created_at
+       ua.created_at,
+       COALESCE(SUM(ual.amount), 0)::double precision AS total_rebate
 FROM user_affiliates ua
 LEFT JOIN users u ON u.id = ua.user_id
+LEFT JOIN user_affiliate_ledger ual
+       ON ual.user_id = $1
+      AND ual.source_user_id = ua.user_id
+      AND ual.action = 'accrue'
 WHERE ua.inviter_id = $1
+GROUP BY ua.user_id, u.email, u.username, ua.created_at
 ORDER BY ua.created_at DESC
 LIMIT $2`, inviterID, limit)
 	if err != nil {
@@ -226,7 +320,7 @@ LIMIT $2`, inviterID, limit)
 	for rows.Next() {
 		var item service.AffiliateInvitee
 		var createdAt time.Time
-		if err := rows.Scan(&item.UserID, &item.Email, &item.Username, &createdAt); err != nil {
+		if err := rows.Scan(&item.UserID, &item.Email, &item.Username, &createdAt, &item.TotalRebate); err != nil {
 			return nil, err
 		}
 		item.CreatedAt = &createdAt
@@ -299,6 +393,7 @@ SELECT user_id,
        inviter_id,
        aff_count,
        aff_quota::double precision,
+       aff_frozen_quota::double precision,
        aff_history_quota::double precision,
        created_at,
        updated_at
@@ -326,6 +421,7 @@ WHERE user_id = $1`, userID)
 		&inviterID,
 		&out.AffCount,
 		&out.AffQuota,
+		&out.AffFrozenQuota,
 		&out.AffHistoryQuota,
 		&out.CreatedAt,
 		&out.UpdatedAt,
@@ -351,6 +447,7 @@ SELECT user_id,
        inviter_id,
        aff_count,
        aff_quota::double precision,
+       aff_frozen_quota::double precision,
        aff_history_quota::double precision,
        created_at,
        updated_at
@@ -380,6 +477,7 @@ LIMIT 1`, strings.ToUpper(strings.TrimSpace(code)))
 		&inviterID,
 		&out.AffCount,
 		&out.AffQuota,
+		&out.AffFrozenQuota,
 		&out.AffHistoryQuota,
 		&out.CreatedAt,
 		&out.UpdatedAt,
diff --git a/backend/internal/repository/affiliate_repo_integration_test.go b/backend/internal/repository/affiliate_repo_integration_test.go
index 369f57cf..697a193b 100644
--- a/backend/internal/repository/affiliate_repo_integration_test.go
+++ b/backend/internal/repository/affiliate_repo_integration_test.go
@@ -125,7 +125,7 @@ func TestAffiliateRepository_AccrueQuota_ReusesOuterTransaction(t *testing.T) {
 	require.NoError(t, err)
 	require.True(t, bound, "invitee must bind to inviter")
 
-	applied, err := repo.AccrueQuota(txCtx, inviter.ID, invitee.ID, 3.5)
+	applied, err := repo.AccrueQuota(txCtx, inviter.ID, invitee.ID, 3.5, 0)
 	require.NoError(t, err)
 	require.True(t, applied, "AccrueQuota must report applied=true")
 
diff --git a/backend/internal/server/api_contract_test.go b/backend/internal/server/api_contract_test.go
index 39286cbf..ca6fd0cc 100644
--- a/backend/internal/server/api_contract_test.go
+++ b/backend/internal/server/api_contract_test.go
@@ -716,6 +716,9 @@ func TestAPIContracts(t *testing.T) {
 					"default_concurrency": 5,
 					"default_balance": 1.25,
 					"affiliate_rebate_rate": 20,
+					"affiliate_rebate_freeze_hours": 0,
+					"affiliate_rebate_duration_days": 0,
+					"affiliate_rebate_per_invitee_cap": 0,
 					"default_user_rpm_limit": 0,
 					"default_subscriptions": [],
 					"enable_model_fallback": false,
@@ -898,6 +901,9 @@ func TestAPIContracts(t *testing.T) {
 					"default_concurrency": 0,
 					"default_balance": 0,
 					"affiliate_rebate_rate": 20,
+					"affiliate_rebate_freeze_hours": 0,
+					"affiliate_rebate_duration_days": 0,
+					"affiliate_rebate_per_invitee_cap": 0,
 					"default_user_rpm_limit": 0,
 					"default_subscriptions": [],
 					"enable_model_fallback": false,
diff --git a/backend/internal/service/affiliate_service.go b/backend/internal/service/affiliate_service.go
index aca32076..5a4e91e7 100644
--- a/backend/internal/service/affiliate_service.go
+++ b/backend/internal/service/affiliate_service.go
@@ -65,16 +65,18 @@ type AffiliateSummary struct {
 	InviterID            *int64    `json:"inviter_id,omitempty"`
 	AffCount             int       `json:"aff_count"`
 	AffQuota             float64   `json:"aff_quota"`
+	AffFrozenQuota       float64   `json:"aff_frozen_quota"`
 	AffHistoryQuota      float64   `json:"aff_history_quota"`
 	CreatedAt            time.Time `json:"created_at"`
 	UpdatedAt            time.Time `json:"updated_at"`
 }
 
 type AffiliateInvitee struct {
-	UserID    int64      `json:"user_id"`
-	Email     string     `json:"email"`
-	Username  string     `json:"username"`
-	CreatedAt *time.Time `json:"created_at,omitempty"`
+	UserID      int64      `json:"user_id"`
+	Email       string     `json:"email"`
+	Username    string     `json:"username"`
+	CreatedAt   *time.Time `json:"created_at,omitempty"`
+	TotalRebate float64    `json:"total_rebate"`
 }
 
 type AffiliateDetail struct {
@@ -83,6 +85,7 @@ type AffiliateDetail struct {
 	InviterID       *int64  `json:"inviter_id,omitempty"`
 	AffCount        int     `json:"aff_count"`
 	AffQuota        float64 `json:"aff_quota"`
+	AffFrozenQuota  float64 `json:"aff_frozen_quota"`
 	AffHistoryQuota float64 `json:"aff_history_quota"`
 	// EffectiveRebateRatePercent 是当前用户作为邀请人时实际生效的返利比例：
 	// 优先用户自己的专属比例（aff_rebate_rate_percent），否则回退到全局比例。
@@ -95,7 +98,9 @@ type AffiliateRepository interface {
 	EnsureUserAffiliate(ctx context.Context, userID int64) (*AffiliateSummary, error)
 	GetAffiliateByCode(ctx context.Context, code string) (*AffiliateSummary, error)
 	BindInviter(ctx context.Context, userID, inviterID int64) (bool, error)
-	AccrueQuota(ctx context.Context, inviterID, inviteeUserID int64, amount float64) (bool, error)
+	AccrueQuota(ctx context.Context, inviterID, inviteeUserID int64, amount float64, freezeHours int) (bool, error)
+	GetAccruedRebateFromInvitee(ctx context.Context, inviterID, inviteeUserID int64) (float64, error)
+	ThawFrozenQuota(ctx context.Context, userID int64) (float64, error)
 	TransferQuotaToBalance(ctx context.Context, userID int64) (float64, float64, error)
 	ListInvitees(ctx context.Context, inviterID int64, limit int) ([]AffiliateInvitee, error)
 
@@ -160,6 +165,12 @@ func (s *AffiliateService) EnsureUserAffiliate(ctx context.Context, userID int64
 }
 
 func (s *AffiliateService) GetAffiliateDetail(ctx context.Context, userID int64) (*AffiliateDetail, error) {
+	// Lazy thaw: move any matured frozen quota to available before reading.
+	if s != nil && s.repo != nil {
+		// best-effort: thaw failure is non-fatal
+		_, _ = s.repo.ThawFrozenQuota(ctx, userID)
+	}
+
 	summary, err := s.EnsureUserAffiliate(ctx, userID)
 	if err != nil {
 		return nil, err
@@ -174,6 +185,7 @@ func (s *AffiliateService) GetAffiliateDetail(ctx context.Context, userID int64)
 		InviterID:                  summary.InviterID,
 		AffCount:                   summary.AffCount,
 		AffQuota:                   summary.AffQuota,
+		AffFrozenQuota:             summary.AffFrozenQuota,
 		AffHistoryQuota:            summary.AffHistoryQuota,
 		EffectiveRebateRatePercent: s.resolveRebateRatePercent(ctx, summary),
 		Invitees:                   invitees,
@@ -250,13 +262,43 @@ func (s *AffiliateService) AccrueInviteRebate(ctx context.Context, inviteeUserID
 	if err != nil {
 		return 0, err
 	}
+	// 有效期检查：超过返利有效期后不再产生返利
+	if s.settingService != nil {
+		if durationDays := s.settingService.GetAffiliateRebateDurationDays(ctx); durationDays > 0 {
+			if time.Now().After(inviteeSummary.CreatedAt.AddDate(0, 0, durationDays)) {
+				return 0, nil
+			}
+		}
+	}
+
 	rebateRatePercent := s.resolveRebateRatePercent(ctx, inviterSummary)
 	rebate := roundTo(baseRechargeAmount*(rebateRatePercent/100), 8)
 	if rebate <= 0 {
 		return 0, nil
 	}
 
-	applied, err := s.repo.AccrueQuota(ctx, *inviteeSummary.InviterID, inviteeUserID, rebate)
+	// 单人上限检查：精确截断到剩余额度
+	if s.settingService != nil {
+		if perInviteeCap := s.settingService.GetAffiliateRebatePerInviteeCap(ctx); perInviteeCap > 0 {
+			existing, err := s.repo.GetAccruedRebateFromInvitee(ctx, *inviteeSummary.InviterID, inviteeUserID)
+			if err != nil {
+				return 0, err
+			}
+			if existing >= perInviteeCap {
+				return 0, nil
+			}
+			if remaining := perInviteeCap - existing; rebate > remaining {
+				rebate = roundTo(remaining, 8)
+			}
+		}
+	}
+
+	var freezeHours int
+	if s.settingService != nil {
+		freezeHours = s.settingService.GetAffiliateRebateFreezeHours(ctx)
+	}
+
+	applied, err := s.repo.AccrueQuota(ctx, *inviteeSummary.InviterID, inviteeUserID, rebate, freezeHours)
 	if err != nil {
 		return 0, err
 	}
diff --git a/backend/internal/service/auth_oauth_email_flow.go b/backend/internal/service/auth_oauth_email_flow.go
index a18cf39c..9815f31b 100644
--- a/backend/internal/service/auth_oauth_email_flow.go
+++ b/backend/internal/service/auth_oauth_email_flow.go
@@ -175,6 +175,7 @@ func (s *AuthService) FinalizeOAuthEmailAccount(
 	user *User,
 	invitationCode string,
 	signupSource string,
+	affiliateCode string,
 ) error {
 	if s == nil || user == nil || user.ID <= 0 {
 		return ErrServiceUnavailable
@@ -194,6 +195,7 @@ func (s *AuthService) FinalizeOAuthEmailAccount(
 	s.updateOAuthSignupSource(ctx, user.ID, signupSource)
 	grantPlan := s.resolveSignupGrantPlan(ctx, signupSource)
 	s.assignSubscriptions(ctx, user.ID, grantPlan.Subscriptions, "auto assigned by signup defaults")
+	s.bindOAuthAffiliate(ctx, user.ID, affiliateCode)
 	return nil
 }
 
diff --git a/backend/internal/service/auth_service.go b/backend/internal/service/auth_service.go
index 08b0f4b7..b1adf071 100644
--- a/backend/internal/service/auth_service.go
+++ b/backend/internal/service/auth_service.go
@@ -563,7 +563,8 @@ func (s *AuthService) LoginOrRegisterOAuth(ctx context.Context, email, username
 // LoginOrRegisterOAuthWithTokenPair 用于第三方 OAuth/SSO 登录，返回完整的 TokenPair。
 // 与 LoginOrRegisterOAuth 功能相同，但返回 TokenPair 而非单个 token。
 // invitationCode 仅在邀请码注册模式下新用户注册时使用；已有账号登录时忽略。
-func (s *AuthService) LoginOrRegisterOAuthWithTokenPair(ctx context.Context, email, username, invitationCode string) (*TokenPair, *User, error) {
+// affiliateCode 用于邀请返利绑定，仅在新用户注册时使用。
+func (s *AuthService) LoginOrRegisterOAuthWithTokenPair(ctx context.Context, email, username, invitationCode, affiliateCode string) (*TokenPair, *User, error) {
 	// 检查 refreshTokenCache 是否可用
 	if s.refreshTokenCache == nil {
 		return nil, nil, errors.New("refresh token cache not configured")
@@ -666,6 +667,7 @@ func (s *AuthService) LoginOrRegisterOAuthWithTokenPair(ctx context.Context, ema
 					user = newUser
 					s.postAuthUserBootstrap(ctx, user, signupSource, false)
 					s.assignSubscriptions(ctx, user.ID, grantPlan.Subscriptions, "auto assigned by signup defaults")
+					s.bindOAuthAffiliate(ctx, user.ID, affiliateCode)
 				}
 			} else {
 				if err := s.userRepo.Create(ctx, newUser); err != nil {
@@ -683,6 +685,7 @@ func (s *AuthService) LoginOrRegisterOAuthWithTokenPair(ctx context.Context, ema
 					user = newUser
 					s.postAuthUserBootstrap(ctx, user, signupSource, false)
 					s.assignSubscriptions(ctx, user.ID, grantPlan.Subscriptions, "auto assigned by signup defaults")
+					s.bindOAuthAffiliate(ctx, user.ID, affiliateCode)
 					if invitationRedeemCode != nil {
 						if err := s.redeemRepo.Use(ctx, invitationRedeemCode.ID, user.ID); err != nil {
 							return nil, nil, ErrInvitationCodeInvalid
@@ -777,6 +780,22 @@ func authSourceSignupSettings(defaults *AuthSourceDefaultSettings, signupSource
 	}
 }
 
+// bindOAuthAffiliate initializes the affiliate profile and binds the inviter
+// for an OAuth-registered user. Failures are logged but never block registration.
+func (s *AuthService) bindOAuthAffiliate(ctx context.Context, userID int64, affiliateCode string) {
+	if s.affiliateService == nil || userID <= 0 {
+		return
+	}
+	if _, err := s.affiliateService.EnsureUserAffiliate(ctx, userID); err != nil {
+		logger.LegacyPrintf("service.auth", "[Auth] Failed to initialize affiliate profile for user %d: %v", userID, err)
+	}
+	if code := strings.TrimSpace(affiliateCode); code != "" {
+		if err := s.affiliateService.BindInviterByCode(ctx, userID, code); err != nil {
+			logger.LegacyPrintf("service.auth", "[Auth] Failed to bind affiliate inviter for user %d: %v", userID, err)
+		}
+	}
+}
+
 func (s *AuthService) postAuthUserBootstrap(ctx context.Context, user *User, signupSource string, touchLogin bool) {
 	if user == nil || user.ID <= 0 {
 		return
diff --git a/backend/internal/service/auth_service_register_test.go b/backend/internal/service/auth_service_register_test.go
index c1ad6240..acc44a38 100644
--- a/backend/internal/service/auth_service_register_test.go
+++ b/backend/internal/service/auth_service_register_test.go
@@ -622,7 +622,7 @@ func TestAuthService_LoginOrRegisterOAuthWithTokenPair_UsesLinuxDoAuthSourceDefa
 	service.defaultSubAssigner = assigner
 	service.refreshTokenCache = &refreshTokenCacheStub{}
 
-	tokenPair, user, err := service.LoginOrRegisterOAuthWithTokenPair(context.Background(), "linuxdo-123@linuxdo-connect.invalid", "linuxdo_user", "")
+	tokenPair, user, err := service.LoginOrRegisterOAuthWithTokenPair(context.Background(), "linuxdo-123@linuxdo-connect.invalid", "linuxdo_user", "", "")
 	require.NoError(t, err)
 	require.NotNil(t, tokenPair)
 	require.NotNil(t, user)
@@ -658,7 +658,7 @@ func TestAuthService_LoginOrRegisterOAuthWithTokenPair_ExistingUserDoesNotGrantA
 	service.defaultSubAssigner = assigner
 	service.refreshTokenCache = &refreshTokenCacheStub{}
 
-	tokenPair, user, err := service.LoginOrRegisterOAuthWithTokenPair(context.Background(), existing.Email, "linuxdo_user", "")
+	tokenPair, user, err := service.LoginOrRegisterOAuthWithTokenPair(context.Background(), existing.Email, "linuxdo_user", "", "")
 	require.NoError(t, err)
 	require.NotNil(t, tokenPair)
 	require.Equal(t, existing.ID, user.ID)
diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go
index 04037987..0ef4a486 100644
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -20,10 +20,15 @@ const (
 
 // Affiliate rebate settings
 const (
-	AffiliateRebateRateDefault = 20.0
-	AffiliateRebateRateMin     = 0.0
-	AffiliateRebateRateMax     = 100.0
-	AffiliateEnabledDefault    = false // 邀请返利总开关默认关闭
+	AffiliateRebateRateDefault          = 20.0
+	AffiliateRebateRateMin              = 0.0
+	AffiliateRebateRateMax              = 100.0
+	AffiliateEnabledDefault             = false // 邀请返利总开关默认关闭
+	AffiliateRebateFreezeHoursDefault   = 0     // 0 = 不冻结（向后兼容）
+	AffiliateRebateFreezeHoursMax       = 720   // 最大 30 天
+	AffiliateRebateDurationDaysDefault  = 0     // 0 = 永久有效
+	AffiliateRebateDurationDaysMax      = 3650  // ~10 年
+	AffiliateRebatePerInviteeCapDefault = 0.0   // 0 = 无上限
 )
 
 // Platform constants
@@ -97,6 +102,9 @@ const (
 	SettingKeyInvitationCodeEnabled            = "invitation_code_enabled"             // 是否启用邀请码注册
 	SettingKeyAffiliateEnabled                 = "affiliate_enabled"                   // 邀请返利功能总开关
 	SettingKeyAffiliateRebateRate              = "affiliate_rebate_rate"               // 邀请返利比例（百分比，0-100）
+	SettingKeyAffiliateRebateFreezeHours       = "affiliate_rebate_freeze_hours"       // 返利冻结期（小时，0=不冻结）
+	SettingKeyAffiliateRebateDurationDays      = "affiliate_rebate_duration_days"      // 返利有效期（天，0=永久）
+	SettingKeyAffiliateRebatePerInviteeCap     = "affiliate_rebate_per_invitee_cap"    // 单人返利上限（0=无上限）
 
 	// 邮件服务设置
 	SettingKeySMTPHost     = "smtp_host"      // SMTP服务器地址
diff --git a/backend/internal/service/payment_fulfillment.go b/backend/internal/service/payment_fulfillment.go
index c6167447..5df69aea 100644
--- a/backend/internal/service/payment_fulfillment.go
+++ b/backend/internal/service/payment_fulfillment.go
@@ -269,7 +269,9 @@ func (s *PaymentService) doBalance(ctx context.Context, o *dbent.PaymentOrder) e
 
 	switch action {
 	case redeemActionSkipCompleted:
-		s.applyAffiliateRebateForOrder(ctx, o)
+		if err := s.applyAffiliateRebateForOrder(ctx, o); err != nil {
+			return err
+		}
 		// Code already created and redeemed — just mark completed
 		return s.markCompleted(ctx, o, "RECHARGE_SUCCESS")
 	case redeemActionCreate:
@@ -283,7 +285,9 @@ func (s *PaymentService) doBalance(ctx context.Context, o *dbent.PaymentOrder) e
 	if _, err := s.redeemService.Redeem(ctx, o.UserID, o.RechargeCode); err != nil {
 		return fmt.Errorf("redeem balance: %w", err)
 	}
-	s.applyAffiliateRebateForOrder(ctx, o)
+	if err := s.applyAffiliateRebateForOrder(ctx, o); err != nil {
+		return err
+	}
 	return s.markCompleted(ctx, o, "RECHARGE_SUCCESS")
 }
 
@@ -361,12 +365,12 @@ func (s *PaymentService) hasAuditLog(ctx context.Context, orderID int64, action
 	return c > 0
 }
 
-func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *dbent.PaymentOrder) {
+func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *dbent.PaymentOrder) error {
 	if o == nil || o.OrderType != payment.OrderTypeBalance || o.Amount <= 0 {
-		return
+		return nil
 	}
 	if s.affiliateService == nil {
-		return
+		return nil
 	}
 
 	tx, err := s.entClient.Tx(ctx)
@@ -374,7 +378,7 @@ func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *db
 		s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 			"error": fmt.Sprintf("begin affiliate rebate tx: %v", err),
 		})
-		return
+		return fmt.Errorf("begin affiliate rebate tx: %w", err)
 	}
 	defer func() { _ = tx.Rollback() }()
 
@@ -384,10 +388,10 @@ func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *db
 		s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 			"error": err.Error(),
 		})
-		return
+		return fmt.Errorf("claim affiliate rebate audit: %w", err)
 	}
 	if !claimed {
-		return
+		return nil
 	}
 
 	rebateAmount, err := s.affiliateService.AccrueInviteRebate(txCtx, o.UserID, o.Amount)
@@ -395,7 +399,7 @@ func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *db
 		s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 			"error": err.Error(),
 		})
-		return
+		return fmt.Errorf("accrue affiliate rebate: %w", err)
 	}
 
 	if rebateAmount <= 0 {
@@ -406,14 +410,15 @@ func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *db
 			s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 				"error": err.Error(),
 			})
-			return
+			return fmt.Errorf("update affiliate rebate skipped audit: %w", err)
 		}
 		if err := tx.Commit(); err != nil {
 			s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 				"error": fmt.Sprintf("commit affiliate rebate tx: %v", err),
 			})
+			return fmt.Errorf("commit affiliate rebate tx: %w", err)
 		}
-		return
+		return nil
 	}
 
 	if err := s.updateClaimedAffiliateRebateAudit(txCtx, tx.Client(), o.ID, "AFFILIATE_REBATE_APPLIED", map[string]any{
@@ -423,14 +428,16 @@ func (s *PaymentService) applyAffiliateRebateForOrder(ctx context.Context, o *db
 		s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 			"error": err.Error(),
 		})
-		return
+		return fmt.Errorf("update affiliate rebate applied audit: %w", err)
 	}
 
 	if err := tx.Commit(); err != nil {
 		s.writeAuditLog(ctx, o.ID, "AFFILIATE_REBATE_FAILED", "system", map[string]any{
 			"error": fmt.Sprintf("commit affiliate rebate tx: %v", err),
 		})
+		return fmt.Errorf("commit affiliate rebate tx: %w", err)
 	}
+	return nil
 }
 
 func (s *PaymentService) tryClaimAffiliateRebateAudit(ctx context.Context, client *dbent.Client, orderID int64, baseAmount float64) (bool, error) {
@@ -444,11 +451,11 @@ func (s *PaymentService) tryClaimAffiliateRebateAudit(ctx context.Context, clien
 	})
 	rows, err := client.QueryContext(ctx, `
 INSERT INTO payment_audit_logs (order_id, action, detail, operator, created_at)
-SELECT $1, 'AFFILIATE_REBATE_APPLIED', $2, 'system', NOW()
+SELECT $1::text, 'AFFILIATE_REBATE_APPLIED', $2::text, 'system', NOW()
 WHERE NOT EXISTS (
 	SELECT 1
 	FROM payment_audit_logs
-	WHERE order_id = $1
+	WHERE order_id = $1::text
 	  AND action IN ('AFFILIATE_REBATE_APPLIED', 'AFFILIATE_REBATE_SKIPPED')
 )
 ON CONFLICT (order_id, action) DO NOTHING
diff --git a/backend/internal/service/setting_service.go b/backend/internal/service/setting_service.go
index f871ee85..33316031 100644
--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -1175,6 +1175,24 @@ func (s *SettingService) buildSystemSettingsUpdates(ctx context.Context, setting
 	updates[SettingKeyDefaultBalance] = strconv.FormatFloat(settings.DefaultBalance, 'f', 8, 64)
 	settings.AffiliateRebateRate = clampAffiliateRebateRate(settings.AffiliateRebateRate)
 	updates[SettingKeyAffiliateRebateRate] = strconv.FormatFloat(settings.AffiliateRebateRate, 'f', 8, 64)
+	if settings.AffiliateRebateFreezeHours < 0 {
+		settings.AffiliateRebateFreezeHours = AffiliateRebateFreezeHoursDefault
+	}
+	if settings.AffiliateRebateFreezeHours > AffiliateRebateFreezeHoursMax {
+		settings.AffiliateRebateFreezeHours = AffiliateRebateFreezeHoursMax
+	}
+	updates[SettingKeyAffiliateRebateFreezeHours] = strconv.Itoa(settings.AffiliateRebateFreezeHours)
+	if settings.AffiliateRebateDurationDays < 0 {
+		settings.AffiliateRebateDurationDays = AffiliateRebateDurationDaysDefault
+	}
+	if settings.AffiliateRebateDurationDays > AffiliateRebateDurationDaysMax {
+		settings.AffiliateRebateDurationDays = AffiliateRebateDurationDaysMax
+	}
+	updates[SettingKeyAffiliateRebateDurationDays] = strconv.Itoa(settings.AffiliateRebateDurationDays)
+	if settings.AffiliateRebatePerInviteeCap < 0 {
+		settings.AffiliateRebatePerInviteeCap = AffiliateRebatePerInviteeCapDefault
+	}
+	updates[SettingKeyAffiliateRebatePerInviteeCap] = strconv.FormatFloat(settings.AffiliateRebatePerInviteeCap, 'f', 8, 64)
 	updates[SettingKeyDefaultUserRPMLimit] = strconv.Itoa(settings.DefaultUserRPMLimit)
 	defaultSubsJSON, err := json.Marshal(settings.DefaultSubscriptions)
 	if err != nil {
@@ -1512,6 +1530,54 @@ func (s *SettingService) GetAffiliateRebateRatePercent(ctx context.Context) floa
 	return clampAffiliateRebateRate(rate)
 }
 
+// GetAffiliateRebateFreezeHours 返回返利冻结期（小时）。
+// 返回 0 表示不冻结（向后兼容）。
+func (s *SettingService) GetAffiliateRebateFreezeHours(ctx context.Context) int {
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyAffiliateRebateFreezeHours)
+	if err != nil {
+		return AffiliateRebateFreezeHoursDefault
+	}
+	hours, err := strconv.Atoi(strings.TrimSpace(raw))
+	if err != nil || hours < 0 {
+		return AffiliateRebateFreezeHoursDefault
+	}
+	if hours > AffiliateRebateFreezeHoursMax {
+		return AffiliateRebateFreezeHoursMax
+	}
+	return hours
+}
+
+// GetAffiliateRebateDurationDays 返回返利有效期（天）。
+// 返回 0 表示永久有效。
+func (s *SettingService) GetAffiliateRebateDurationDays(ctx context.Context) int {
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyAffiliateRebateDurationDays)
+	if err != nil {
+		return AffiliateRebateDurationDaysDefault
+	}
+	days, err := strconv.Atoi(strings.TrimSpace(raw))
+	if err != nil || days < 0 {
+		return AffiliateRebateDurationDaysDefault
+	}
+	if days > AffiliateRebateDurationDaysMax {
+		return AffiliateRebateDurationDaysMax
+	}
+	return days
+}
+
+// GetAffiliateRebatePerInviteeCap 返回单人返利上限。
+// 返回 0 表示无上限。
+func (s *SettingService) GetAffiliateRebatePerInviteeCap(ctx context.Context) float64 {
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyAffiliateRebatePerInviteeCap)
+	if err != nil {
+		return AffiliateRebatePerInviteeCapDefault
+	}
+	cap, err := strconv.ParseFloat(strings.TrimSpace(raw), 64)
+	if err != nil || cap < 0 || math.IsNaN(cap) || math.IsInf(cap, 0) {
+		return AffiliateRebatePerInviteeCapDefault
+	}
+	return cap
+}
+
 // IsPasswordResetEnabled 检查是否启用密码重置功能
 // 要求：必须同时开启邮件验证
 func (s *SettingService) IsPasswordResetEnabled(ctx context.Context) bool {
@@ -1755,6 +1821,9 @@ func (s *SettingService) InitializeDefaultSettings(ctx context.Context) error {
 		SettingKeyDefaultConcurrency:                       strconv.Itoa(s.cfg.Default.UserConcurrency),
 		SettingKeyDefaultBalance:                           strconv.FormatFloat(s.cfg.Default.UserBalance, 'f', 8, 64),
 		SettingKeyAffiliateRebateRate:                      strconv.FormatFloat(AffiliateRebateRateDefault, 'f', 8, 64),
+		SettingKeyAffiliateRebateFreezeHours:               strconv.Itoa(AffiliateRebateFreezeHoursDefault),
+		SettingKeyAffiliateRebateDurationDays:              strconv.Itoa(AffiliateRebateDurationDaysDefault),
+		SettingKeyAffiliateRebatePerInviteeCap:             strconv.FormatFloat(AffiliateRebatePerInviteeCapDefault, 'f', 2, 64),
 		SettingKeyDefaultUserRPMLimit:                      "0",
 		SettingKeyDefaultSubscriptions:                     "[]",
 		SettingKeyAuthSourceDefaultEmailBalance:            "0",
@@ -1890,6 +1959,21 @@ func (s *SettingService) parseSettings(settings map[string]string) *SystemSettin
 	} else {
 		result.AffiliateRebateRate = AffiliateRebateRateDefault
 	}
+	if freezeHours, err := strconv.Atoi(settings[SettingKeyAffiliateRebateFreezeHours]); err == nil && freezeHours >= 0 {
+		if freezeHours > AffiliateRebateFreezeHoursMax {
+			freezeHours = AffiliateRebateFreezeHoursMax
+		}
+		result.AffiliateRebateFreezeHours = freezeHours
+	}
+	if durationDays, err := strconv.Atoi(settings[SettingKeyAffiliateRebateDurationDays]); err == nil && durationDays >= 0 {
+		if durationDays > AffiliateRebateDurationDaysMax {
+			durationDays = AffiliateRebateDurationDaysMax
+		}
+		result.AffiliateRebateDurationDays = durationDays
+	}
+	if perInviteeCap, err := strconv.ParseFloat(settings[SettingKeyAffiliateRebatePerInviteeCap], 64); err == nil && perInviteeCap >= 0 {
+		result.AffiliateRebatePerInviteeCap = perInviteeCap
+	}
 	result.DefaultSubscriptions = parseDefaultSubscriptions(settings[SettingKeyDefaultSubscriptions])
 
 	// 敏感信息直接返回，方便测试连接时使用
diff --git a/backend/internal/service/settings_view.go b/backend/internal/service/settings_view.go
index 70d8efc3..5ec7d313 100644
--- a/backend/internal/service/settings_view.go
+++ b/backend/internal/service/settings_view.go
@@ -104,12 +104,15 @@ type SystemSettings struct {
 	CustomMenuItems             string // JSON array of custom menu items
 	CustomEndpoints             string // JSON array of custom endpoints
 
-	DefaultConcurrency   int
-	DefaultBalance       float64
-	AffiliateEnabled     bool
-	AffiliateRebateRate  float64
-	DefaultUserRPMLimit  int
-	DefaultSubscriptions []DefaultSubscriptionSetting
+	DefaultConcurrency           int
+	DefaultBalance               float64
+	AffiliateEnabled             bool
+	AffiliateRebateRate          float64
+	AffiliateRebateFreezeHours   int
+	AffiliateRebateDurationDays  int
+	AffiliateRebatePerInviteeCap float64
+	DefaultUserRPMLimit          int
+	DefaultSubscriptions         []DefaultSubscriptionSetting
 
 	// Model fallback configuration
 	EnableModelFallback      bool   `json:"enable_model_fallback"`
diff --git a/backend/migrations/133_affiliate_rebate_freeze.sql b/backend/migrations/133_affiliate_rebate_freeze.sql
new file mode 100644
index 00000000..b87d59b7
--- /dev/null
+++ b/backend/migrations/133_affiliate_rebate_freeze.sql
@@ -0,0 +1,17 @@
+-- 1) Add frozen quota column to user_affiliates for rebate freeze period.
+ALTER TABLE user_affiliates
+    ADD COLUMN IF NOT EXISTS aff_frozen_quota DECIMAL(20,8) NOT NULL DEFAULT 0;
+
+COMMENT ON COLUMN user_affiliates.aff_frozen_quota IS 'Rebate quota currently frozen (pending thaw after freeze period)';
+
+-- 2) Add frozen_until column to user_affiliate_ledger for per-entry freeze tracking.
+-- NULL = no freeze (or already thawed); non-NULL = frozen until this timestamp.
+ALTER TABLE user_affiliate_ledger
+    ADD COLUMN IF NOT EXISTS frozen_until TIMESTAMPTZ NULL;
+
+COMMENT ON COLUMN user_affiliate_ledger.frozen_until IS 'Rebate frozen until this time; NULL means already thawed or never frozen';
+
+-- 3) Partial index for efficient thaw queries (only rows still frozen).
+CREATE INDEX IF NOT EXISTS idx_ual_frozen_thaw
+    ON user_affiliate_ledger (user_id, frozen_until)
+    WHERE frozen_until IS NOT NULL;
diff --git a/frontend/src/api/__tests__/auth-oauth-adoption.spec.ts b/frontend/src/api/__tests__/auth-oauth-adoption.spec.ts
index a484d7ed..07a68c03 100644
--- a/frontend/src/api/__tests__/auth-oauth-adoption.spec.ts
+++ b/frontend/src/api/__tests__/auth-oauth-adoption.spec.ts
@@ -74,6 +74,26 @@ describe('oauth adoption auth api', () => {
     })
   })
 
+  it('posts affiliate code when completing linuxdo oauth registration', async () => {
+    const { completeLinuxDoOAuthRegistration } = await import('@/api/auth')
+
+    await completeLinuxDoOAuthRegistration(
+      'invite-code',
+      {
+        adoptDisplayName: true,
+        adoptAvatar: false
+      },
+      ' AFF123 '
+    )
+
+    expect(post).toHaveBeenCalledWith('/auth/oauth/linuxdo/complete-registration', {
+      invitation_code: 'invite-code',
+      aff_code: 'AFF123',
+      adopt_display_name: true,
+      adopt_avatar: false
+    })
+  })
+
   it('posts oidc invitation completion with adoption decisions', async () => {
     const { completeOIDCOAuthRegistration } = await import('@/api/auth')
 
@@ -134,6 +154,26 @@ describe('oauth adoption auth api', () => {
     })
   })
 
+  it('posts affiliate code when creating pending wechat oauth account', async () => {
+    const { createPendingWeChatOAuthAccount } = await import('@/api/auth')
+
+    await createPendingWeChatOAuthAccount(
+      'invite-code',
+      {
+        adoptDisplayName: false,
+        adoptAvatar: true
+      },
+      'WXAFF'
+    )
+
+    expect(post).toHaveBeenCalledWith('/auth/oauth/wechat/complete-registration', {
+      invitation_code: 'invite-code',
+      aff_code: 'WXAFF',
+      adopt_display_name: false,
+      adopt_avatar: true
+    })
+  })
+
   it('classifies oauth completion results as login or bind', async () => {
     const { getOAuthCompletionKind } = await import('@/api/auth')
 
diff --git a/frontend/src/api/admin/settings.ts b/frontend/src/api/admin/settings.ts
index 0d98c9e9..defbab43 100644
--- a/frontend/src/api/admin/settings.ts
+++ b/frontend/src/api/admin/settings.ts
@@ -309,6 +309,9 @@ export interface SystemSettings {
   // Default settings
   default_balance: number;
   affiliate_rebate_rate: number;
+  affiliate_rebate_freeze_hours: number;
+  affiliate_rebate_duration_days: number;
+  affiliate_rebate_per_invitee_cap: number;
   default_concurrency: number;
   default_user_rpm_limit: number;
   default_subscriptions: DefaultSubscriptionSetting[];
@@ -494,6 +497,9 @@ export interface UpdateSettingsRequest {
   totp_enabled?: boolean; // TOTP 双因素认证
   default_balance?: number;
   affiliate_rebate_rate?: number;
+  affiliate_rebate_freeze_hours?: number;
+  affiliate_rebate_duration_days?: number;
+  affiliate_rebate_per_invitee_cap?: number;
   default_concurrency?: number;
   default_user_rpm_limit?: number;
   default_subscriptions?: DefaultSubscriptionSetting[];
diff --git a/frontend/src/api/auth.ts b/frontend/src/api/auth.ts
index f49f3a1f..bb990fc4 100644
--- a/frontend/src/api/auth.ts
+++ b/frontend/src/api/auth.ts
@@ -564,9 +564,10 @@ export async function resetPassword(request: ResetPasswordRequest): Promise<Rese
  */
 export async function completeLinuxDoOAuthRegistration(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<OAuthTokenResponse> {
-  return createPendingLinuxDoOAuthAccount(invitationCode, decision)
+  return createPendingLinuxDoOAuthAccount(invitationCode, decision, affiliateCode)
 }
 
 /**
@@ -576,27 +577,32 @@ export async function completeLinuxDoOAuthRegistration(
  */
 export async function completeOIDCOAuthRegistration(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<OAuthTokenResponse> {
-  return createPendingOIDCOAuthAccount(invitationCode, decision)
+  return createPendingOIDCOAuthAccount(invitationCode, decision, affiliateCode)
 }
 
 export async function completeWeChatOAuthRegistration(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<OAuthTokenResponse> {
-  return createPendingWeChatOAuthAccount(invitationCode, decision)
+  return createPendingWeChatOAuthAccount(invitationCode, decision, affiliateCode)
 }
 
 async function createPendingOAuthAccount(
   provider: 'linuxdo' | 'oidc' | 'wechat',
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<PendingOAuthCreateAccountResponse> {
+  const normalizedAffiliateCode = affiliateCode?.trim()
   const { data } = await apiClient.post<PendingOAuthCreateAccountResponse>(
     `/auth/oauth/${provider}/complete-registration`,
     {
       invitation_code: invitationCode,
+      ...(normalizedAffiliateCode ? { aff_code: normalizedAffiliateCode } : {}),
       ...serializeOAuthAdoptionDecision(decision)
     }
   )
@@ -605,23 +611,26 @@ async function createPendingOAuthAccount(
 
 export async function createPendingLinuxDoOAuthAccount(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<PendingOAuthCreateAccountResponse> {
-  return createPendingOAuthAccount('linuxdo', invitationCode, decision)
+  return createPendingOAuthAccount('linuxdo', invitationCode, decision, affiliateCode)
 }
 
 export async function createPendingOIDCOAuthAccount(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<PendingOAuthCreateAccountResponse> {
-  return createPendingOAuthAccount('oidc', invitationCode, decision)
+  return createPendingOAuthAccount('oidc', invitationCode, decision, affiliateCode)
 }
 
 export async function createPendingWeChatOAuthAccount(
   invitationCode: string,
-  decision?: OAuthAdoptionDecision
+  decision?: OAuthAdoptionDecision,
+  affiliateCode?: string
 ): Promise<PendingOAuthCreateAccountResponse> {
-  return createPendingOAuthAccount('wechat', invitationCode, decision)
+  return createPendingOAuthAccount('wechat', invitationCode, decision, affiliateCode)
 }
 
 export async function completePendingOAuthBindLogin(
diff --git a/frontend/src/components/auth/LinuxDoOAuthSection.vue b/frontend/src/components/auth/LinuxDoOAuthSection.vue
index c740d06f..6b245123 100644
--- a/frontend/src/components/auth/LinuxDoOAuthSection.vue
+++ b/frontend/src/components/auth/LinuxDoOAuthSection.vue
@@ -42,9 +42,11 @@
 <script setup lang="ts">
 import { useRoute } from 'vue-router'
 import { useI18n } from 'vue-i18n'
+import { resolveAffiliateReferralCode, storeOAuthAffiliateCode } from '@/utils/oauthAffiliate'
 
-withDefaults(defineProps<{
+const props = withDefaults(defineProps<{
   disabled?: boolean
+  affCode?: string
   showDivider?: boolean
 }>(), {
   showDivider: true
@@ -55,6 +57,7 @@ const { t } = useI18n()
 
 function startLogin(): void {
   const redirectTo = (route.query.redirect as string) || '/dashboard'
+  storeOAuthAffiliateCode(resolveAffiliateReferralCode(props.affCode, route.query.aff, route.query.aff_code))
   const apiBase = (import.meta.env.VITE_API_BASE_URL as string | undefined) || '/api/v1'
   const normalized = apiBase.replace(/\/$/, '')
   const startURL = `${normalized}/auth/oauth/linuxdo/start?redirect=${encodeURIComponent(redirectTo)}`
diff --git a/frontend/src/components/auth/OidcOAuthSection.vue b/frontend/src/components/auth/OidcOAuthSection.vue
index f7cc7fa3..4297f7a1 100644
--- a/frontend/src/components/auth/OidcOAuthSection.vue
+++ b/frontend/src/components/auth/OidcOAuthSection.vue
@@ -23,9 +23,11 @@
 import { computed } from 'vue'
 import { useRoute } from 'vue-router'
 import { useI18n } from 'vue-i18n'
+import { resolveAffiliateReferralCode, storeOAuthAffiliateCode } from '@/utils/oauthAffiliate'
 
 const props = withDefaults(defineProps<{
   disabled?: boolean
+  affCode?: string
   providerName?: string
   showDivider?: boolean
 }>(), {
@@ -45,6 +47,7 @@ const providerInitial = computed(() => normalizedProviderName.value.charAt(0).to
 
 function startLogin(): void {
   const redirectTo = (route.query.redirect as string) || '/dashboard'
+  storeOAuthAffiliateCode(resolveAffiliateReferralCode(props.affCode, route.query.aff, route.query.aff_code))
   const apiBase = (import.meta.env.VITE_API_BASE_URL as string | undefined) || '/api/v1'
   const normalized = apiBase.replace(/\/$/, '')
   const startURL = `${normalized}/auth/oauth/oidc/start?redirect=${encodeURIComponent(redirectTo)}`
diff --git a/frontend/src/components/auth/WechatOAuthSection.vue b/frontend/src/components/auth/WechatOAuthSection.vue
index ce90738c..c1b5be2e 100644
--- a/frontend/src/components/auth/WechatOAuthSection.vue
+++ b/frontend/src/components/auth/WechatOAuthSection.vue
@@ -33,9 +33,11 @@ import { useRoute } from 'vue-router'
 import { useI18n } from 'vue-i18n'
 import { resolveWeChatOAuthStart } from '@/api/auth'
 import { useAppStore } from '@/stores'
+import { resolveAffiliateReferralCode, storeOAuthAffiliateCode } from '@/utils/oauthAffiliate'
 
 const props = withDefaults(defineProps<{
   disabled?: boolean
+  affCode?: string
   showDivider?: boolean
 }>(), {
   showDivider: true,
@@ -84,6 +86,7 @@ function startLogin(): void {
     return
   }
   const redirectTo = (route.query.redirect as string) || '/dashboard'
+  storeOAuthAffiliateCode(resolveAffiliateReferralCode(props.affCode, route.query.aff, route.query.aff_code))
   const apiBase = (import.meta.env.VITE_API_BASE_URL as string | undefined) || '/api/v1'
   const normalized = apiBase.replace(/\/$/, '')
   const mode = resolvedStart.value.mode
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index 42d68b70..6f445986 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -989,6 +989,8 @@ export default {
       rebateRateHint: 'What you earn each time an invitee recharges',
       invitedUsers: 'Invited Users',
       availableQuota: 'Available Rebate Quota',
+      frozenQuota: 'Frozen',
+      frozenQuotaHint: 'Recently earned rebates pending release',
       totalQuota: 'Historical Rebate Quota'
     },
     transfer: {
@@ -1005,6 +1007,7 @@ export default {
       columns: {
         email: 'Email',
         username: 'Username',
+        rebate: 'Rebate',
         joinedAt: 'Joined At'
       }
     },
@@ -1012,7 +1015,8 @@ export default {
       title: 'How It Works',
       line1: 'Share your affiliate code or invite link with new users.',
       line2: 'When invitees recharge, you receive {rate} of the recharge as rebate quota.',
-      line3: 'Transfer rebate quota to balance at any time.'
+      line3: 'Transfer rebate quota to balance at any time.',
+      line4: 'Newly earned rebates may have a waiting period before they can be transferred.'
     }
   },
 
@@ -4788,6 +4792,12 @@ export default {
           enabledHint: 'When off, the affiliate menu is hidden, the aff parameter is ignored at signup, and new recharges generate no rebate. Existing rebate balances can still be transferred.',
           rebateRate: 'Global Rebate Rate',
           rebateRateHint: 'Default percentage given back to the inviter on recharges (0-100, e.g. 10 = 10%).',
+          freezeHours: 'Rebate Freeze Period (hours)',
+          freezeHoursDesc: 'New rebates will be frozen for this period before becoming available for withdrawal. 0 = no freeze.',
+          durationDays: 'Rebate Duration (days)',
+          durationDaysDesc: 'Rebate relationship expires after this many days since invitee registration. 0 = permanent.',
+          perInviteeCap: 'Per-Invitee Rebate Cap',
+          perInviteeCapDesc: 'Maximum total rebate from a single invitee. 0 = no limit.',
           customUsers: {
             title: 'Per-User Overrides',
             description: 'Set a custom invite code or exclusive rebate rate for specific users. Lists only users that have an override applied.',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index 7601d01c..e399530b 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -993,6 +993,8 @@ export default {
       rebateRateHint: '被邀请用户每次充值后你可获得的返利比例',
       invitedUsers: '邀请人数',
       availableQuota: '可转返利额度',
+      frozenQuota: '冻结中',
+      frozenQuotaHint: '新产生的返利正在冻结期中',
       totalQuota: '历史返利额度'
     },
     transfer: {
@@ -1009,6 +1011,7 @@ export default {
       columns: {
         email: '邮箱',
         username: '用户名',
+        rebate: '返利明细',
         joinedAt: '注册时间'
       }
     },
@@ -1016,7 +1019,8 @@ export default {
       title: '使用说明',
       line1: '将邀请码或邀请链接分享给新用户。',
       line2: '被邀请用户充值后，你可获得 {rate} 的返利额度。',
-      line3: '返利额度可随时转入账户余额。'
+      line3: '返利额度可随时转入账户余额。',
+      line4: '新产生的返利需要经过冻结期后才能提现。'
     }
   },
 
@@ -4951,6 +4955,12 @@ export default {
           enabledHint: '关闭后用户菜单中的邀请页面入口隐藏、注册时忽略邀请码、新充值不再产生返利。已有返利额度仍可转入余额。',
           rebateRate: '全局返利比例',
           rebateRateHint: '充值后返给邀请人的默认比例（0-100%，例如填写 10 表示返利 10%）。',
+          freezeHours: '返利冻结期（小时）',
+          freezeHoursDesc: '新产生的返利将在冻结期内无法提现。0 = 不冻结。',
+          durationDays: '返利有效期（天）',
+          durationDaysDesc: '被邀请用户注册后多少天内的充值产生返利。0 = 永久有效。',
+          perInviteeCap: '单人返利上限',
+          perInviteeCapDesc: '每个被邀请用户最多产生的返利总额。0 = 无上限。',
           customUsers: {
             title: '专属用户配置',
             description: '为指定用户设置专属邀请码或专属返利比例。仅展示已设置过专属配置的用户。',
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index 2a15ad00..86078f8f 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -130,6 +130,7 @@ export interface AffiliateInvitee {
   email: string
   username: string
   created_at?: string
+  total_rebate: number
 }
 
 export interface UserAffiliateDetail {
@@ -138,6 +139,7 @@ export interface UserAffiliateDetail {
   inviter_id?: number | null
   aff_count: number
   aff_quota: number
+  aff_frozen_quota: number
   aff_history_quota: number
   /** 当前用户作为邀请人时实际生效的返利比例（专属覆盖全局）。0-100。 */
   effective_rebate_rate_percent: number
diff --git a/frontend/src/utils/__tests__/oauthAffiliate.spec.ts b/frontend/src/utils/__tests__/oauthAffiliate.spec.ts
new file mode 100644
index 00000000..b8527229
--- /dev/null
+++ b/frontend/src/utils/__tests__/oauthAffiliate.spec.ts
@@ -0,0 +1,48 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+import {
+  clearAffiliateReferralCode,
+  clearOAuthAffiliateCode,
+  loadAffiliateReferralCode,
+  loadOAuthAffiliateCode,
+  resolveAffiliateReferralCode,
+  storeAffiliateReferralCode,
+  storeOAuthAffiliateCode
+} from '@/utils/oauthAffiliate'
+
+describe('oauthAffiliate', () => {
+  beforeEach(() => {
+    localStorage.clear()
+    sessionStorage.clear()
+    vi.useRealTimers()
+  })
+
+  it('persists affiliate referral code across pages', () => {
+    expect(resolveAffiliateReferralCode(' 5579J7CFG9PF ')).toBe('5579J7CFG9PF')
+    expect(loadAffiliateReferralCode()).toBe('5579J7CFG9PF')
+    expect(resolveAffiliateReferralCode()).toBe('5579J7CFG9PF')
+  })
+
+  it('expires stale affiliate referral code', () => {
+    const now = Date.UTC(2026, 0, 1)
+    storeAffiliateReferralCode('AFF123', now)
+
+    expect(loadAffiliateReferralCode(now + 30 * 24 * 60 * 60 * 1000 - 1)).toBe('AFF123')
+    expect(loadAffiliateReferralCode(now + 30 * 24 * 60 * 60 * 1000 + 1)).toBe('')
+    expect(localStorage.getItem('affiliate_referral_code')).toBeNull()
+  })
+
+  it('keeps oauth transient code separate from persistent referral code', () => {
+    storeAffiliateReferralCode('PERSISTED')
+    storeOAuthAffiliateCode('OAUTH')
+
+    expect(loadAffiliateReferralCode()).toBe('PERSISTED')
+    expect(loadOAuthAffiliateCode()).toBe('OAUTH')
+
+    clearOAuthAffiliateCode()
+    expect(loadOAuthAffiliateCode()).toBe('')
+    expect(loadAffiliateReferralCode()).toBe('PERSISTED')
+
+    clearAffiliateReferralCode()
+    expect(loadAffiliateReferralCode()).toBe('')
+  })
+})
diff --git a/frontend/src/utils/oauthAffiliate.ts b/frontend/src/utils/oauthAffiliate.ts
new file mode 100644
index 00000000..343a998e
--- /dev/null
+++ b/frontend/src/utils/oauthAffiliate.ts
@@ -0,0 +1,133 @@
+const OAUTH_AFFILIATE_CODE_KEY = 'oauth_aff_code'
+const AFFILIATE_REFERRAL_CODE_KEY = 'affiliate_referral_code'
+const AFFILIATE_REFERRAL_TTL_MS = 30 * 24 * 60 * 60 * 1000
+
+interface StoredAffiliateReferralCode {
+  code: string
+  expiresAt: number
+}
+
+export function normalizeOAuthAffiliateCode(value?: unknown): string {
+  const raw = Array.isArray(value) ? value[0] : value
+  return typeof raw === 'string' ? raw.trim() : ''
+}
+
+export function pickOAuthAffiliateCode(...values: unknown[]): string {
+  for (const value of values) {
+    const code = normalizeOAuthAffiliateCode(value)
+    if (code) {
+      return code
+    }
+  }
+  return ''
+}
+
+export function storeAffiliateReferralCode(value?: unknown, now = Date.now()): void {
+  if (typeof window === 'undefined') {
+    return
+  }
+  const code = normalizeOAuthAffiliateCode(value)
+  if (!code) {
+    return
+  }
+  try {
+    const payload: StoredAffiliateReferralCode = {
+      code,
+      expiresAt: now + AFFILIATE_REFERRAL_TTL_MS
+    }
+    window.localStorage.setItem(AFFILIATE_REFERRAL_CODE_KEY, JSON.stringify(payload))
+  } catch {
+    // 忽略浏览器存储异常。
+  }
+}
+
+export function loadAffiliateReferralCode(now = Date.now()): string {
+  if (typeof window === 'undefined') {
+    return ''
+  }
+  try {
+    const raw = window.localStorage.getItem(AFFILIATE_REFERRAL_CODE_KEY)
+    if (!raw) {
+      return ''
+    }
+    const parsed = JSON.parse(raw) as Partial<StoredAffiliateReferralCode>
+    const code = normalizeOAuthAffiliateCode(parsed.code)
+    const expiresAt = Number(parsed.expiresAt) || 0
+    if (!code || expiresAt <= now) {
+      clearAffiliateReferralCode()
+      return ''
+    }
+    return code
+  } catch {
+    clearAffiliateReferralCode()
+    return ''
+  }
+}
+
+export function clearAffiliateReferralCode(): void {
+  if (typeof window === 'undefined') {
+    return
+  }
+  try {
+    window.localStorage.removeItem(AFFILIATE_REFERRAL_CODE_KEY)
+  } catch {
+    // 忽略浏览器存储异常。
+  }
+}
+
+export function resolveAffiliateReferralCode(...values: unknown[]): string {
+  const code = pickOAuthAffiliateCode(...values)
+  if (code) {
+    storeAffiliateReferralCode(code)
+    return code
+  }
+  return loadAffiliateReferralCode()
+}
+
+export function storeOAuthAffiliateCode(value?: unknown): void {
+  if (typeof window === 'undefined') {
+    return
+  }
+  const code = normalizeOAuthAffiliateCode(value)
+  try {
+    if (code) {
+      window.sessionStorage.setItem(OAUTH_AFFILIATE_CODE_KEY, code)
+    } else {
+      window.sessionStorage.removeItem(OAUTH_AFFILIATE_CODE_KEY)
+    }
+  } catch {
+    // 忽略浏览器存储异常。
+  }
+}
+
+export function loadOAuthAffiliateCode(): string {
+  if (typeof window === 'undefined') {
+    return ''
+  }
+  try {
+    return normalizeOAuthAffiliateCode(window.sessionStorage.getItem(OAUTH_AFFILIATE_CODE_KEY))
+  } catch {
+    return ''
+  }
+}
+
+export function clearOAuthAffiliateCode(): void {
+  if (typeof window === 'undefined') {
+    return
+  }
+  try {
+    window.sessionStorage.removeItem(OAUTH_AFFILIATE_CODE_KEY)
+  } catch {
+    // 忽略浏览器存储异常。
+  }
+}
+
+export function clearAllAffiliateReferralCodes(): void {
+  clearOAuthAffiliateCode()
+  clearAffiliateReferralCode()
+}
+
+export function oauthAffiliatePayload(value?: unknown): { aff_code?: string } {
+  const code = normalizeOAuthAffiliateCode(value)
+  return code ? { aff_code: code } : {}
+}
diff --git a/frontend/src/views/admin/SettingsView.vue b/frontend/src/views/admin/SettingsView.vue
index 87113e59..90d10b9a 100644
--- a/frontend/src/views/admin/SettingsView.vue
+++ b/frontend/src/views/admin/SettingsView.vue
@@ -3898,6 +3898,56 @@
                 </p>
               </div>
 
+              <div>
+                <label class="input-label">
+                  {{ t('admin.settings.features.affiliate.freezeHours') }}
+                </label>
+                <input
+                  v-model.number="form.affiliate_rebate_freeze_hours"
+                  type="number"
+                  step="1"
+                  min="0"
+                  max="720"
+                  class="input"
+                />
+                <p class="mt-1 text-xs text-gray-400">
+                  {{ t('admin.settings.features.affiliate.freezeHoursDesc') }}
+                </p>
+              </div>
+
+              <div>
+                <label class="input-label">
+                  {{ t('admin.settings.features.affiliate.durationDays') }}
+                </label>
+                <input
+                  v-model.number="form.affiliate_rebate_duration_days"
+                  type="number"
+                  step="1"
+                  min="0"
+                  max="3650"
+                  class="input"
+                />
+                <p class="mt-1 text-xs text-gray-400">
+                  {{ t('admin.settings.features.affiliate.durationDaysDesc') }}
+                </p>
+              </div>
+
+              <div>
+                <label class="input-label">
+                  {{ t('admin.settings.features.affiliate.perInviteeCap') }}
+                </label>
+                <input
+                  v-model.number="form.affiliate_rebate_per_invitee_cap"
+                  type="number"
+                  step="0.01"
+                  min="0"
+                  class="input"
+                />
+                <p class="mt-1 text-xs text-gray-400">
+                  {{ t('admin.settings.features.affiliate.perInviteeCapDesc') }}
+                </p>
+              </div>
+
               <!-- 专属用户管理 -->
               <div class="border-t border-gray-100 pt-6 dark:border-dark-700">
                 <div class="mb-3 flex items-center justify-between">
@@ -5333,6 +5383,9 @@ const form = reactive<SettingsForm>({
   totp_encryption_key_configured: false,
   default_balance: 0,
   affiliate_rebate_rate: 20,
+  affiliate_rebate_freeze_hours: 0,
+  affiliate_rebate_duration_days: 0,
+  affiliate_rebate_per_invitee_cap: 0,
   default_concurrency: 1,
   default_subscriptions: [],
   force_email_on_third_party_signup: false,
@@ -6261,6 +6314,9 @@ async function saveSettings() {
         100,
         Math.max(0, Number(form.affiliate_rebate_rate) || 0),
       ),
+      affiliate_rebate_freeze_hours: Math.max(0, Math.min(720, Number(form.affiliate_rebate_freeze_hours) || 0)),
+      affiliate_rebate_duration_days: Math.max(0, Math.min(3650, Math.floor(Number(form.affiliate_rebate_duration_days) || 0))),
+      affiliate_rebate_per_invitee_cap: Math.max(0, Number(form.affiliate_rebate_per_invitee_cap) || 0),
       default_concurrency: form.default_concurrency,
       default_subscriptions: normalizedDefaultSubscriptions,
       force_email_on_third_party_signup: form.force_email_on_third_party_signup,
diff --git a/frontend/src/views/auth/EmailVerifyView.vue b/frontend/src/views/auth/EmailVerifyView.vue
index c41f8e35..46c51b83 100644
--- a/frontend/src/views/auth/EmailVerifyView.vue
+++ b/frontend/src/views/auth/EmailVerifyView.vue
@@ -167,6 +167,11 @@ import {
   isRegistrationEmailSuffixAllowed,
   normalizeRegistrationEmailSuffixWhitelist
 } from '@/utils/registrationEmailPolicy'
+import {
+  clearAllAffiliateReferralCodes,
+  loadAffiliateReferralCode,
+  oauthAffiliatePayload
+} from '@/utils/oauthAffiliate'
 
 const { t, locale } = useI18n()
 
@@ -261,7 +266,7 @@ onMounted(async () => {
       initialTurnstileToken.value = registerData.turnstile_token || ''
       promoCode.value = registerData.promo_code || ''
       invitationCode.value = registerData.invitation_code || ''
-      affCode.value = registerData.aff_code || ''
+      affCode.value = registerData.aff_code || loadAffiliateReferralCode()
       pendingAuthToken.value = registerData.pending_auth_token || activePendingSession?.token || ''
       pendingAuthTokenField.value = registerData.pending_auth_token_field || activePendingSession?.token_field || 'pending_auth_token'
       pendingProvider.value = registerData.pending_provider || activePendingSession?.provider || ''
@@ -501,6 +506,7 @@ async function handleVerify(): Promise<void> {
           password: password.value,
           verify_code: verifyCode.value.trim(),
           invitation_code: invitationCode.value || undefined,
+          ...oauthAffiliatePayload(affCode.value || loadAffiliateReferralCode()),
           adopt_display_name: pendingAdoptionDecision.value?.adoptDisplayName,
           adopt_avatar: pendingAdoptionDecision.value?.adoptAvatar
         }
@@ -533,6 +539,7 @@ async function handleVerify(): Promise<void> {
 
     // Clear session data
     sessionStorage.removeItem('register_data')
+    clearAllAffiliateReferralCodes()
 
     // Show success toast
     appStore.showSuccess(t('auth.accountCreatedSuccess', { siteName: siteName.value }))
diff --git a/frontend/src/views/auth/LinuxDoCallbackView.vue b/frontend/src/views/auth/LinuxDoCallbackView.vue
index f73d77de..fef5b90d 100644
--- a/frontend/src/views/auth/LinuxDoCallbackView.vue
+++ b/frontend/src/views/auth/LinuxDoCallbackView.vue
@@ -255,6 +255,11 @@ import {
   type OAuthTokenResponse,
   type PendingOAuthExchangeResponse
 } from '@/api/auth'
+import {
+  clearAllAffiliateReferralCodes,
+  loadOAuthAffiliateCode,
+  oauthAffiliatePayload
+} from '@/utils/oauthAffiliate'
 
 const route = useRoute()
 const router = useRouter()
@@ -568,6 +573,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
   if (getOAuthCompletionKind(completion) === 'bind') {
     const bindRedirect = sanitizeRedirectPath(completion.redirect || '/profile')
     clearPendingAuthSession()
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(bindSuccessMessage)
     await router.replace(bindRedirect)
     return
@@ -579,6 +585,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
 
   persistOAuthTokenContext(completion)
   await authStore.setToken(completion.access_token)
+  clearAllAffiliateReferralCodes()
   appStore.showSuccess(t('auth.loginSuccess'))
   await router.replace(redirect)
 }
@@ -627,18 +634,20 @@ async function handleSubmitInvitation() {
 
   isSubmitting.value = true
   try {
+    const affCode = loadOAuthAffiliateCode()
+    const decision = currentAdoptionDecision()
     const completion: LinuxDoPendingActionResponse = legacyPendingOAuthToken.value
       ? (
           await apiClient.post<LinuxDoPendingActionResponse>('/auth/oauth/linuxdo/complete-registration', {
             pending_oauth_token: legacyPendingOAuthToken.value,
             invitation_code: invitationCode.value.trim(),
-            ...serializeAdoptionDecision(currentAdoptionDecision())
+            ...oauthAffiliatePayload(affCode),
+            ...serializeAdoptionDecision(decision)
           })
         ).data
-      : await completeLinuxDoOAuthRegistration(
-          invitationCode.value.trim(),
-          currentAdoptionDecision()
-        )
+      : affCode
+        ? await completeLinuxDoOAuthRegistration(invitationCode.value.trim(), decision, affCode)
+        : await completeLinuxDoOAuthRegistration(invitationCode.value.trim(), decision)
     await finalizePendingAccountResponse(completion)
   } catch (e: unknown) {
     const err = e as { message?: string; response?: { data?: { message?: string } } }
@@ -673,6 +682,7 @@ async function handleCreateAccount(payload: PendingOAuthCreateAccountPayload) {
       password: payload.password,
       verify_code: payload.verifyCode || undefined,
       invitation_code: payload.invitationCode || undefined,
+      ...oauthAffiliatePayload(loadOAuthAffiliateCode()),
       ...serializeAdoptionDecision(currentAdoptionDecision())
     })
     await finalizePendingAccountResponse(data)
@@ -720,6 +730,7 @@ async function handleSubmitTotpChallenge() {
       totp_code: code
     })
     await authStore.setToken(completion.access_token)
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(t('auth.loginSuccess'))
     await router.replace(redirectTo.value)
   } catch (e: unknown) {
@@ -743,6 +754,7 @@ onMounted(async () => {
     if (legacyLogin) {
       persistOAuthTokenContext(legacyLogin)
       await authStore.setToken(legacyLogin.access_token)
+      clearAllAffiliateReferralCodes()
       appStore.showSuccess(t('auth.loginSuccess'))
       await router.replace(redirect)
       return
diff --git a/frontend/src/views/auth/LoginView.vue b/frontend/src/views/auth/LoginView.vue
index 44c89b23..78ba4b9d 100644
--- a/frontend/src/views/auth/LoginView.vue
+++ b/frontend/src/views/auth/LoginView.vue
@@ -186,6 +186,7 @@ import TurnstileWidget from '@/components/TurnstileWidget.vue'
 import { useAuthStore, useAppStore } from '@/stores'
 import { getPublicSettings, isTotp2FARequired, isWeChatWebOAuthEnabled } from '@/api/auth'
 import type { TotpLoginResponse } from '@/types'
+import { clearAllAffiliateReferralCodes } from '@/utils/oauthAffiliate'
 
 const { t } = useI18n()
 
@@ -355,6 +356,7 @@ async function handleLogin(): Promise<void> {
     }
 
     // Show success toast
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(t('auth.loginSuccess'))
 
     // Redirect to dashboard or intended route
@@ -397,6 +399,7 @@ async function handle2FAVerify(code: string): Promise<void> {
 
     // Close modal and show success
     show2FAModal.value = false
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(t('auth.loginSuccess'))
 
     // Redirect to dashboard or intended route
diff --git a/frontend/src/views/auth/OidcCallbackView.vue b/frontend/src/views/auth/OidcCallbackView.vue
index 873022e1..51b17dbf 100644
--- a/frontend/src/views/auth/OidcCallbackView.vue
+++ b/frontend/src/views/auth/OidcCallbackView.vue
@@ -264,6 +264,11 @@ import {
   type OAuthTokenResponse,
   type PendingOAuthExchangeResponse
 } from '@/api/auth'
+import {
+  clearAllAffiliateReferralCodes,
+  loadOAuthAffiliateCode,
+  oauthAffiliatePayload
+} from '@/utils/oauthAffiliate'
 
 const route = useRoute()
 const router = useRouter()
@@ -590,6 +595,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
   if (getOAuthCompletionKind(completion) === 'bind') {
     const bindRedirect = sanitizeRedirectPath(completion.redirect || '/profile')
     clearPendingAuthSession()
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(bindSuccessMessage)
     await router.replace(bindRedirect)
     return
@@ -601,6 +607,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
 
   persistOAuthTokenContext(completion)
   await authStore.setToken(completion.access_token)
+  clearAllAffiliateReferralCodes()
   appStore.showSuccess(t('auth.loginSuccess'))
   await router.replace(redirect)
 }
@@ -649,18 +656,20 @@ async function handleSubmitInvitation() {
 
   isSubmitting.value = true
   try {
+    const affCode = loadOAuthAffiliateCode()
+    const decision = currentAdoptionDecision()
     const completion: PendingOidcCompletion = legacyPendingOAuthToken.value
       ? (
           await apiClient.post<PendingOidcCompletion>('/auth/oauth/oidc/complete-registration', {
             pending_oauth_token: legacyPendingOAuthToken.value,
             invitation_code: invitationCode.value.trim(),
-            ...serializeAdoptionDecision(currentAdoptionDecision())
+            ...oauthAffiliatePayload(affCode),
+            ...serializeAdoptionDecision(decision)
           })
         ).data
-      : await completeOIDCOAuthRegistration(
-          invitationCode.value.trim(),
-          currentAdoptionDecision()
-        )
+      : affCode
+        ? await completeOIDCOAuthRegistration(invitationCode.value.trim(), decision, affCode)
+        : await completeOIDCOAuthRegistration(invitationCode.value.trim(), decision)
     await finalizePendingAccountResponse(completion)
   } catch (e: unknown) {
     const err = e as { message?: string; response?: { data?: { message?: string } } }
@@ -695,6 +704,7 @@ async function handleCreateAccount(payload: PendingOAuthCreateAccountPayload) {
       password: payload.password,
       verify_code: payload.verifyCode || undefined,
       invitation_code: payload.invitationCode || undefined,
+      ...oauthAffiliatePayload(loadOAuthAffiliateCode()),
       ...serializeAdoptionDecision(currentAdoptionDecision())
     })
     await finalizePendingAccountResponse(data)
@@ -742,6 +752,7 @@ async function handleSubmitTotpChallenge() {
       totp_code: code
     })
     await authStore.setToken(completion.access_token)
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(t('auth.loginSuccess'))
     await router.replace(redirectTo.value)
   } catch (e: unknown) {
@@ -767,6 +778,7 @@ onMounted(async () => {
     if (legacyLogin) {
       persistOAuthTokenContext(legacyLogin)
       await authStore.setToken(legacyLogin.access_token)
+      clearAllAffiliateReferralCodes()
       appStore.showSuccess(t('auth.loginSuccess'))
       await router.replace(redirect)
       return
diff --git a/frontend/src/views/auth/RegisterView.vue b/frontend/src/views/auth/RegisterView.vue
index 6a5e9c26..fe3e73ad 100644
--- a/frontend/src/views/auth/RegisterView.vue
+++ b/frontend/src/views/auth/RegisterView.vue
@@ -15,17 +15,20 @@
         <LinuxDoOAuthSection
           v-if="linuxdoOAuthEnabled"
           :disabled="isLoading"
+          :aff-code="formData.aff_code"
           :show-divider="false"
         />
         <WechatOAuthSection
           v-if="wechatOAuthEnabled"
           :disabled="isLoading"
+          :aff-code="formData.aff_code"
           :show-divider="false"
         />
         <OidcOAuthSection
           v-if="oidcOAuthEnabled"
           :disabled="isLoading"
           :provider-name="oidcOAuthProviderName"
+          :aff-code="formData.aff_code"
           :show-divider="false"
         />
         <div class="flex items-center gap-3">
@@ -293,6 +296,11 @@ import {
   isRegistrationEmailSuffixAllowed,
   normalizeRegistrationEmailSuffixWhitelist
 } from '@/utils/registrationEmailPolicy'
+import {
+  clearAffiliateReferralCode,
+  loadAffiliateReferralCode,
+  resolveAffiliateReferralCode
+} from '@/utils/oauthAffiliate'
 
 const { t, locale } = useI18n()
 
@@ -378,9 +386,19 @@ watch(validationToastMessage, (value, previousValue) => {
   }
 })
 
+function syncAffiliateReferralCode(): string {
+  const code = resolveAffiliateReferralCode(route.query.aff, route.query.aff_code)
+  if (code) {
+    formData.aff_code = code
+  }
+  return code
+}
+
 // ==================== Lifecycle ====================
 
 onMounted(async () => {
+  syncAffiliateReferralCode()
+
   try {
     const settings = await getPublicSettings()
     registrationEnabled.value = settings.registration_enabled
@@ -407,10 +425,7 @@ onMounted(async () => {
         await validatePromoCodeDebounced(promoParam)
       }
     }
-    const affParam = (route.query.aff as string) || (route.query.aff_code as string)
-    if (affParam) {
-      formData.aff_code = affParam.trim()
-    }
+    syncAffiliateReferralCode()
   } catch (error) {
     console.error('Failed to load public settings:', error)
   } finally {
@@ -418,6 +433,13 @@ onMounted(async () => {
   }
 })
 
+watch(
+  () => [route.query.aff, route.query.aff_code],
+  () => {
+    syncAffiliateReferralCode()
+  }
+)
+
 onUnmounted(() => {
   if (promoValidateTimeout) {
     clearTimeout(promoValidateTimeout)
@@ -702,6 +724,11 @@ async function handleRegister(): Promise<void> {
   isLoading.value = true
 
   try {
+    const affCode = formData.aff_code.trim() || loadAffiliateReferralCode()
+    if (affCode) {
+      formData.aff_code = affCode
+    }
+
     // If email verification is enabled, redirect to verification page
     if (emailVerifyEnabled.value) {
       // Store registration data in sessionStorage
@@ -713,7 +740,7 @@ async function handleRegister(): Promise<void> {
           turnstile_token: turnstileToken.value,
           promo_code: formData.promo_code || undefined,
           invitation_code: formData.invitation_code || undefined,
-          ...(formData.aff_code ? { aff_code: formData.aff_code } : {})
+          ...(affCode ? { aff_code: affCode } : {})
         })
       )
 
@@ -729,8 +756,9 @@ async function handleRegister(): Promise<void> {
       turnstile_token: turnstileEnabled.value ? turnstileToken.value : undefined,
       promo_code: formData.promo_code || undefined,
       invitation_code: formData.invitation_code || undefined,
-      ...(formData.aff_code ? { aff_code: formData.aff_code } : {})
+      ...(affCode ? { aff_code: affCode } : {})
     })
+    clearAffiliateReferralCode()
 
     // Show success toast
     appStore.showSuccess(t('auth.accountCreatedSuccess', { siteName: siteName.value }))
diff --git a/frontend/src/views/auth/WechatCallbackView.vue b/frontend/src/views/auth/WechatCallbackView.vue
index 9ecc5e47..afa5c5ab 100644
--- a/frontend/src/views/auth/WechatCallbackView.vue
+++ b/frontend/src/views/auth/WechatCallbackView.vue
@@ -340,6 +340,11 @@ import {
   type OAuthTokenResponse,
   type PendingOAuthExchangeResponse
 } from '@/api/auth'
+import {
+  clearAllAffiliateReferralCodes,
+  loadOAuthAffiliateCode,
+  oauthAffiliatePayload
+} from '@/utils/oauthAffiliate'
 
 const route = useRoute()
 const router = useRouter()
@@ -802,6 +807,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
   if (getOAuthCompletionKind(completion) === 'bind') {
     const bindRedirect = sanitizeRedirectPath(completion.redirect || '/profile')
     clearPendingAuthSession()
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(bindSuccessMessage)
     await router.replace(bindRedirect)
     return
@@ -813,6 +819,7 @@ async function finalizeCompletion(completion: PendingOAuthExchangeResponse, redi
 
   persistOAuthTokenContext(completion)
   await authStore.setToken(completion.access_token)
+  clearAllAffiliateReferralCodes()
   appStore.showSuccess(t('auth.loginSuccess'))
   await router.replace(redirect)
 }
@@ -861,18 +868,20 @@ async function handleSubmitInvitation() {
 
   isSubmitting.value = true
   try {
+    const affCode = loadOAuthAffiliateCode()
+    const decision = currentAdoptionDecision()
     const completion: PendingWeChatCompletion = legacyPendingOAuthToken.value
       ? (
           await apiClient.post<PendingWeChatCompletion>('/auth/oauth/wechat/complete-registration', {
             pending_oauth_token: legacyPendingOAuthToken.value,
             invitation_code: invitationCode.value.trim(),
-            ...serializeAdoptionDecision(currentAdoptionDecision())
+            ...oauthAffiliatePayload(affCode),
+            ...serializeAdoptionDecision(decision)
           })
         ).data
-      : await completeWeChatOAuthRegistration(
-          invitationCode.value.trim(),
-          currentAdoptionDecision()
-        )
+      : affCode
+        ? await completeWeChatOAuthRegistration(invitationCode.value.trim(), decision, affCode)
+        : await completeWeChatOAuthRegistration(invitationCode.value.trim(), decision)
     await finalizePendingAccountResponse(completion)
   } catch (e: unknown) {
     const err = e as { message?: string; response?: { data?: { message?: string } } }
@@ -907,6 +916,7 @@ async function handleCreateAccount(payload: PendingOAuthCreateAccountPayload) {
       password: payload.password,
       verify_code: payload.verifyCode || undefined,
       invitation_code: payload.invitationCode || undefined,
+      ...oauthAffiliatePayload(loadOAuthAffiliateCode()),
       ...serializeAdoptionDecision(currentAdoptionDecision())
     })
     await finalizePendingAccountResponse(data)
@@ -955,6 +965,7 @@ async function handleSubmitTotpChallenge() {
     })
     persistOAuthTokenContext(completion)
     await authStore.setToken(completion.access_token)
+    clearAllAffiliateReferralCodes()
     appStore.showSuccess(t('auth.loginSuccess'))
     await router.replace(redirectTo.value)
   } catch (e: unknown) {
@@ -1015,6 +1026,7 @@ onMounted(async () => {
     if (legacyLogin) {
       persistOAuthTokenContext(legacyLogin)
       await authStore.setToken(legacyLogin.access_token)
+      clearAllAffiliateReferralCodes()
       appStore.showSuccess(t('auth.loginSuccess'))
       await router.replace(redirect)
       return
diff --git a/frontend/src/views/auth/__tests__/EmailVerifyView.spec.ts b/frontend/src/views/auth/__tests__/EmailVerifyView.spec.ts
index 9f67a994..39bb7344 100644
--- a/frontend/src/views/auth/__tests__/EmailVerifyView.spec.ts
+++ b/frontend/src/views/auth/__tests__/EmailVerifyView.spec.ts
@@ -112,6 +112,7 @@ describe('EmailVerifyView', () => {
     apiClientPostMock.mockReset()
     authStoreState.pendingAuthSession = null
     sessionStorage.clear()
+    localStorage.clear()
 
     getPublicSettingsMock.mockResolvedValue({
       turnstile_enabled: false,
@@ -136,6 +137,7 @@ describe('EmailVerifyView', () => {
       JSON.stringify({
         email: 'fresh@example.com',
         password: 'secret-123',
+        aff_code: 'AFF123',
       })
     )
 
@@ -334,6 +336,7 @@ describe('EmailVerifyView', () => {
       email: 'fresh@example.com',
       password: 'secret-123',
       verify_code: '123456',
+      aff_code: 'AFF123',
     })
     expect(persistOAuthTokenContextMock).toHaveBeenCalledWith({
       access_token: 'oauth-access-token',
diff --git a/frontend/src/views/auth/__tests__/LinuxDoCallbackView.spec.ts b/frontend/src/views/auth/__tests__/LinuxDoCallbackView.spec.ts
index 333f8dc5..29e3a332 100644
--- a/frontend/src/views/auth/__tests__/LinuxDoCallbackView.spec.ts
+++ b/frontend/src/views/auth/__tests__/LinuxDoCallbackView.spec.ts
@@ -93,6 +93,7 @@ describe('LinuxDoCallbackView', () => {
     })
     window.location.hash = ''
     localStorage.clear()
+    sessionStorage.clear()
   })
 
   it('accepts the legacy fragment token success callback without pending-session exchange', async () => {
diff --git a/frontend/src/views/auth/__tests__/OidcCallbackView.spec.ts b/frontend/src/views/auth/__tests__/OidcCallbackView.spec.ts
index ec89512b..77af4d3d 100644
--- a/frontend/src/views/auth/__tests__/OidcCallbackView.spec.ts
+++ b/frontend/src/views/auth/__tests__/OidcCallbackView.spec.ts
@@ -97,6 +97,7 @@ describe('OidcCallbackView', () => {
     })
     window.location.hash = ''
     localStorage.clear()
+    sessionStorage.clear()
   })
 
   it('accepts the legacy fragment token success callback without pending-session exchange', async () => {
diff --git a/frontend/src/views/auth/__tests__/WechatCallbackView.spec.ts b/frontend/src/views/auth/__tests__/WechatCallbackView.spec.ts
index 7150dd7e..77897514 100644
--- a/frontend/src/views/auth/__tests__/WechatCallbackView.spec.ts
+++ b/frontend/src/views/auth/__tests__/WechatCallbackView.spec.ts
@@ -172,6 +172,7 @@ describe('WechatCallbackView', () => {
     appStoreState.cachedPublicSettings = null
     appStoreState.publicSettingsLoaded = false
     localStorage.clear()
+    sessionStorage.clear()
     locationState.current = {
       href: 'http://localhost/auth/wechat/callback',
       hash: '',
diff --git a/frontend/src/views/user/AffiliateView.vue b/frontend/src/views/user/AffiliateView.vue
index 195a1458..66417d23 100644
--- a/frontend/src/views/user/AffiliateView.vue
+++ b/frontend/src/views/user/AffiliateView.vue
@@ -9,21 +9,17 @@
 
       <template v-else-if="detail">
         <div class="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
-          <!-- 返利比例：用主色突出，让用户一眼看到「能拿多少」 -->
-          <div class="card relative overflow-hidden p-5">
-            <div class="absolute -right-6 -top-6 h-24 w-24 rounded-full bg-primary-500/10"></div>
-            <div class="relative">
-              <p class="flex items-center gap-1.5 text-sm text-gray-500 dark:text-dark-400">
-                <Icon name="dollar" size="sm" class="text-primary-500" />
-                {{ t('affiliate.stats.rebateRate') }}
-              </p>
-              <p class="mt-2 text-2xl font-semibold text-primary-600 dark:text-primary-400">
-                {{ formattedRebateRate }}<span class="ml-0.5 text-base font-medium">%</span>
-              </p>
-              <p class="mt-1 text-xs text-gray-400 dark:text-dark-500">
-                {{ t('affiliate.stats.rebateRateHint') }}
-              </p>
-            </div>
+          <div class="card p-5">
+            <p class="flex items-center gap-1.5 text-sm text-gray-500 dark:text-dark-400">
+              <Icon name="dollar" size="sm" class="text-primary-500" />
+              {{ t('affiliate.stats.rebateRate') }}
+            </p>
+            <p class="mt-2 text-2xl font-semibold text-primary-600 dark:text-primary-400">
+              {{ formattedRebateRate }}<span class="ml-0.5 text-base font-medium">%</span>
+            </p>
+            <p class="mt-1 text-xs text-gray-400 dark:text-dark-500">
+              {{ t('affiliate.stats.rebateRateHint') }}
+            </p>
           </div>
           <div class="card p-5">
             <p class="text-sm text-gray-500 dark:text-dark-400">{{ t('affiliate.stats.invitedUsers') }}</p>
@@ -42,6 +38,9 @@
             <p class="mt-2 text-2xl font-semibold text-gray-900 dark:text-white">
               {{ formatCurrency(detail.aff_history_quota) }}
             </p>
+            <p v-if="detail.aff_frozen_quota > 0" class="mt-1 text-xs text-amber-600 dark:text-amber-400">
+              {{ t('affiliate.stats.frozenQuota') }}: {{ formatCurrency(detail.aff_frozen_quota) }}
+            </p>
           </div>
         </div>
 
@@ -79,6 +78,7 @@
               <li>1. {{ t('affiliate.tips.line1') }}</li>
               <li>2. {{ t('affiliate.tips.line2', { rate: `${formattedRebateRate}%` }) }}</li>
               <li>3. {{ t('affiliate.tips.line3') }}</li>
+              <li v-if="detail.aff_frozen_quota > 0">4. {{ t('affiliate.tips.line4') }}</li>
             </ul>
           </div>
         </div>
@@ -115,6 +115,7 @@
                 <tr class="border-b border-gray-200 text-gray-500 dark:border-dark-700 dark:text-dark-400">
                   <th class="px-3 py-2 font-medium">{{ t('affiliate.invitees.columns.email') }}</th>
                   <th class="px-3 py-2 font-medium">{{ t('affiliate.invitees.columns.username') }}</th>
+                  <th class="px-3 py-2 font-medium text-right">{{ t('affiliate.invitees.columns.rebate') }}</th>
                   <th class="px-3 py-2 font-medium">{{ t('affiliate.invitees.columns.joinedAt') }}</th>
                 </tr>
               </thead>
@@ -126,6 +127,7 @@
                 >
                   <td class="px-3 py-3 text-gray-900 dark:text-white">{{ item.email || '-' }}</td>
                   <td class="px-3 py-3 text-gray-700 dark:text-gray-300">{{ item.username || '-' }}</td>
+                  <td class="px-3 py-3 text-right font-medium text-emerald-600 dark:text-emerald-400">{{ formatCurrency(item.total_rebate) }}</td>
                   <td class="px-3 py-3 text-gray-700 dark:text-gray-300">{{ formatDateTime(item.created_at) || '-' }}</td>
                 </tr>
               </tbody>

From 1a0cabbfd67f2d6b559fc86af7fa2dea5216c215 Mon Sep 17 00:00:00 2001
From: Nobody-Zhang <minecraftzhanggongbo@outlook.com>
Date: Sun, 26 Apr 2026 04:57:34 +0000
Subject: [PATCH 06/46] Fix Zpay refund endpoint handling

---
 backend/internal/payment/provider/easypay.go  | 196 ++++++++++++++++--
 .../payment/provider/easypay_refund_test.go   | 196 ++++++++++++++++++
 2 files changed, 371 insertions(+), 21 deletions(-)
 create mode 100644 backend/internal/payment/provider/easypay_refund_test.go

diff --git a/backend/internal/payment/provider/easypay.go b/backend/internal/payment/provider/easypay.go
index 37bd38b2..e7d8aab9 100644
--- a/backend/internal/payment/provider/easypay.go
+++ b/backend/internal/payment/provider/easypay.go
@@ -25,6 +25,7 @@ const (
 	easypayStatusPaid      = 1
 	easypayHTTPTimeout     = 10 * time.Second
 	maxEasypayResponseSize = 1 << 20 // 1MB
+	maxEasypayErrorSummary = 512
 	tradeStatusSuccess     = "TRADE_SUCCESS"
 	signTypeMD5            = "MD5"
 	paymentModePopup       = "popup"
@@ -42,17 +43,55 @@ type EasyPay struct {
 // config keys: pid, pkey, apiBase, notifyUrl, returnUrl, cid, cidAlipay, cidWxpay
 func NewEasyPay(instanceID string, config map[string]string) (*EasyPay, error) {
 	for _, k := range []string{"pid", "pkey", "apiBase", "notifyUrl", "returnUrl"} {
-		if config[k] == "" {
+		if strings.TrimSpace(config[k]) == "" {
 			return nil, fmt.Errorf("easypay config missing required key: %s", k)
 		}
 	}
+	cfg := make(map[string]string, len(config))
+	for k, v := range config {
+		cfg[k] = v
+	}
+	cfg["apiBase"] = normalizeEasyPayAPIBase(cfg["apiBase"])
 	return &EasyPay{
 		instanceID: instanceID,
-		config:     config,
+		config:     cfg,
 		httpClient: &http.Client{Timeout: easypayHTTPTimeout},
 	}, nil
 }
 
+func normalizeEasyPayAPIBase(apiBase string) string {
+	base := strings.TrimSpace(apiBase)
+	if base == "" {
+		return ""
+	}
+	if parsed, err := url.Parse(base); err == nil && parsed.Scheme != "" && parsed.Host != "" {
+		parsed.RawQuery = ""
+		parsed.Fragment = ""
+		parsed.RawPath = ""
+		parsed.Path = trimEasyPayEndpointPath(parsed.Path)
+		return strings.TrimRight(parsed.String(), "/")
+	}
+	return strings.TrimRight(trimEasyPayEndpointPath(base), "/")
+}
+
+func trimEasyPayEndpointPath(path string) string {
+	path = strings.TrimRight(strings.TrimSpace(path), "/")
+	lower := strings.ToLower(path)
+	for _, endpoint := range []string{"/submit.php", "/mapi.php", "/api.php"} {
+		if strings.HasSuffix(lower, endpoint) {
+			return strings.TrimRight(path[:len(path)-len(endpoint)], "/")
+		}
+	}
+	return path
+}
+
+func (e *EasyPay) apiBase() string {
+	if e == nil {
+		return ""
+	}
+	return normalizeEasyPayAPIBase(e.config["apiBase"])
+}
+
 func (e *EasyPay) Name() string        { return "EasyPay" }
 func (e *EasyPay) ProviderKey() string { return payment.TypeEasyPay }
 func (e *EasyPay) SupportedTypes() []payment.PaymentType {
@@ -104,8 +143,7 @@ func (e *EasyPay) createRedirectPayment(req payment.CreatePaymentRequest) (*paym
 	for k, v := range params {
 		q.Set(k, v)
 	}
-	base := strings.TrimRight(e.config["apiBase"], "/")
-	payURL := base + "/submit.php?" + q.Encode()
+	payURL := e.apiBase() + "/submit.php?" + q.Encode()
 	return &payment.CreatePaymentResponse{PayURL: payURL}, nil
 }
 
@@ -127,7 +165,7 @@ func (e *EasyPay) createAPIPayment(ctx context.Context, req payment.CreatePaymen
 	params["sign"] = easyPaySign(params, e.config["pkey"])
 	params["sign_type"] = signTypeMD5
 
-	body, err := e.post(ctx, strings.TrimRight(e.config["apiBase"], "/")+"/mapi.php", params)
+	body, err := e.post(ctx, e.apiBase()+"/mapi.php", params)
 	if err != nil {
 		return nil, fmt.Errorf("easypay create: %w", err)
 	}
@@ -171,7 +209,7 @@ func (e *EasyPay) QueryOrder(ctx context.Context, tradeNo string) (*payment.Quer
 		"act": "order", "pid": e.config["pid"],
 		"key": e.config["pkey"], "out_trade_no": tradeNo,
 	}
-	body, err := e.post(ctx, e.config["apiBase"]+"/api.php", params)
+	body, err := e.post(ctx, e.apiBase()+"/api.php", params)
 	if err != nil {
 		return nil, fmt.Errorf("easypay query: %w", err)
 	}
@@ -234,25 +272,128 @@ func (e *EasyPay) VerifyNotification(_ context.Context, rawBody string, _ map[st
 }
 
 func (e *EasyPay) Refund(ctx context.Context, req payment.RefundRequest) (*payment.RefundResponse, error) {
-	params := map[string]string{
-		"pid": e.config["pid"], "key": e.config["pkey"],
-		"trade_no": req.TradeNo, "out_trade_no": req.OrderID, "money": req.Amount,
+	attempts := e.refundAttempts(req)
+	if len(attempts) == 0 {
+		return nil, fmt.Errorf("easypay refund missing order identifier")
 	}
-	body, err := e.post(ctx, e.config["apiBase"]+"/api.php?act=refund", params)
-	if err != nil {
-		return nil, fmt.Errorf("easypay refund: %w", err)
+	var firstErr error
+	for i, attempt := range attempts {
+		body, status, err := e.postRaw(ctx, e.apiBase()+"/api.php?act=refund", attempt.params)
+		if err != nil {
+			return nil, fmt.Errorf("easypay refund request: %w", err)
+		}
+		if err := parseEasyPayRefundResponse(status, body); err != nil {
+			if firstErr == nil {
+				firstErr = err
+			}
+			if i+1 < len(attempts) && isEasyPayRefundOrderNotFound(err) {
+				continue
+			}
+			return nil, err
+		}
+		return &payment.RefundResponse{RefundID: attempt.refundID, Status: payment.ProviderStatusSuccess}, nil
 	}
+	return nil, firstErr
+}
+
+type easyPayRefundAttempt struct {
+	params   map[string]string
+	refundID string
+}
+
+func (e *EasyPay) refundAttempts(req payment.RefundRequest) []easyPayRefundAttempt {
+	base := map[string]string{
+		"pid": e.config["pid"], "key": e.config["pkey"], "money": req.Amount,
+	}
+	var attempts []easyPayRefundAttempt
+	if orderID := strings.TrimSpace(req.OrderID); orderID != "" {
+		params := cloneStringMap(base)
+		params["out_trade_no"] = orderID
+		attempts = append(attempts, easyPayRefundAttempt{params: params, refundID: orderID})
+	}
+	if tradeNo := strings.TrimSpace(req.TradeNo); tradeNo != "" {
+		params := cloneStringMap(base)
+		params["trade_no"] = tradeNo
+		attempts = append(attempts, easyPayRefundAttempt{params: params, refundID: tradeNo})
+	}
+	return attempts
+}
+
+func cloneStringMap(in map[string]string) map[string]string {
+	out := make(map[string]string, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
+
+func isEasyPayRefundOrderNotFound(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	lower := strings.ToLower(msg)
+	return strings.Contains(msg, "订单编号不存在") ||
+		strings.Contains(msg, "订单不存在") ||
+		strings.Contains(lower, "order not found") ||
+		strings.Contains(lower, "not exist")
+}
+
+func parseEasyPayRefundResponse(status int, body []byte) error {
+	summary := summarizeEasyPayResponse(body)
+	if status < http.StatusOK || status >= http.StatusMultipleChoices {
+		return fmt.Errorf("easypay refund HTTP %d: %s", status, summary)
+	}
+
+	trimmed := strings.TrimSpace(string(body))
+	if trimmed == "" {
+		return fmt.Errorf("easypay refund empty response (HTTP %d): %s", status, summary)
+	}
+
+	lower := strings.ToLower(trimmed)
+	if strings.HasPrefix(lower, "<!doctype html") || strings.HasPrefix(lower, "<html") ||
+		(strings.HasPrefix(lower, "<") && strings.Contains(lower, "html")) {
+		return fmt.Errorf("easypay refund non-JSON response (HTTP %d): %s", status, summary)
+	}
+
 	var resp struct {
-		Code int    `json:"code"`
+		Code any    `json:"code"`
 		Msg  string `json:"msg"`
 	}
 	if err := json.Unmarshal(body, &resp); err != nil {
-		return nil, fmt.Errorf("easypay parse refund: %w", err)
+		return fmt.Errorf("easypay refund non-JSON response (HTTP %d): %s", status, summary)
 	}
-	if resp.Code != easypayCodeSuccess {
-		return nil, fmt.Errorf("easypay refund failed: %s", resp.Msg)
+	if !easyPayResponseCodeIsSuccess(resp.Code) {
+		msg := strings.TrimSpace(resp.Msg)
+		if msg == "" {
+			msg = summary
+		}
+		return fmt.Errorf("easypay refund failed (HTTP %d): %s", status, msg)
 	}
-	return &payment.RefundResponse{RefundID: req.TradeNo, Status: payment.ProviderStatusSuccess}, nil
+	return nil
+}
+
+func easyPayResponseCodeIsSuccess(code any) bool {
+	switch v := code.(type) {
+	case float64:
+		return int(v) == easypayCodeSuccess
+	case string:
+		n, err := strconv.Atoi(strings.TrimSpace(v))
+		return err == nil && n == easypayCodeSuccess
+	default:
+		return false
+	}
+}
+
+func summarizeEasyPayResponse(body []byte) string {
+	summary := strings.Join(strings.Fields(string(body)), " ")
+	if summary == "" {
+		return "<empty>"
+	}
+	if len(summary) > maxEasypayErrorSummary {
+		return summary[:maxEasypayErrorSummary] + "..."
+	}
+	return summary
 }
 
 func (e *EasyPay) resolveCID(paymentType string) string {
@@ -269,21 +410,34 @@ func (e *EasyPay) resolveCID(paymentType string) string {
 }
 
 func (e *EasyPay) post(ctx context.Context, endpoint string, params map[string]string) ([]byte, error) {
+	body, _, err := e.postRaw(ctx, endpoint, params)
+	return body, err
+}
+
+func (e *EasyPay) postRaw(ctx context.Context, endpoint string, params map[string]string) ([]byte, int, error) {
 	form := url.Values{}
 	for k, v := range params {
 		form.Set(k, v)
 	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
 	if err != nil {
-		return nil, err
+		return nil, 0, err
 	}
 	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-	resp, err := e.httpClient.Do(req)
+	client := e.httpClient
+	if client == nil {
+		client = &http.Client{Timeout: easypayHTTPTimeout}
+	}
+	resp, err := client.Do(req)
 	if err != nil {
-		return nil, err
+		return nil, 0, err
 	}
 	defer func() { _ = resp.Body.Close() }()
-	return io.ReadAll(io.LimitReader(resp.Body, maxEasypayResponseSize))
+	body, err := io.ReadAll(io.LimitReader(resp.Body, maxEasypayResponseSize))
+	if err != nil {
+		return nil, resp.StatusCode, err
+	}
+	return body, resp.StatusCode, nil
 }
 
 func easyPaySign(params map[string]string, pkey string) string {
diff --git a/backend/internal/payment/provider/easypay_refund_test.go b/backend/internal/payment/provider/easypay_refund_test.go
new file mode 100644
index 00000000..9e0e4942
--- /dev/null
+++ b/backend/internal/payment/provider/easypay_refund_test.go
@@ -0,0 +1,196 @@
+package provider
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"strings"
+	"testing"
+
+	"github.com/Wei-Shaw/sub2api/internal/payment"
+)
+
+func TestNormalizeEasyPayAPIBase(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{input: "https://zpayz.cn", want: "https://zpayz.cn"},
+		{input: "https://zpayz.cn/", want: "https://zpayz.cn"},
+		{input: "https://zpayz.cn/mapi.php", want: "https://zpayz.cn"},
+		{input: "https://zpayz.cn/submit.php", want: "https://zpayz.cn"},
+		{input: "https://zpayz.cn/api.php", want: "https://zpayz.cn"},
+		{input: "https://zpayz.cn/api.php?act=refund", want: "https://zpayz.cn"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			t.Parallel()
+			if got := normalizeEasyPayAPIBase(tt.input); got != tt.want {
+				t.Fatalf("normalizeEasyPayAPIBase(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestEasyPayRefundNormalizesAPIBaseAndSendsOutTradeNoOnly(t *testing.T) {
+	t.Parallel()
+
+	var gotPath string
+	var gotQuery url.Values
+	var gotForm url.Values
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		gotQuery = r.URL.Query()
+		if err := r.ParseForm(); err != nil {
+			t.Errorf("ParseForm: %v", err)
+		}
+		gotForm = r.PostForm
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"code":1,"msg":"ok"}`))
+	}))
+	defer server.Close()
+
+	provider := newTestEasyPay(t, server.URL+"/mapi.php")
+	resp, err := provider.Refund(context.Background(), payment.RefundRequest{
+		TradeNo: "trade-123",
+		OrderID: "out-456",
+		Amount:  "1.50",
+	})
+	if err != nil {
+		t.Fatalf("Refund returned error: %v", err)
+	}
+	if resp == nil || resp.Status != payment.ProviderStatusSuccess {
+		t.Fatalf("Refund response = %+v, want success", resp)
+	}
+	if gotPath != "/api.php" {
+		t.Fatalf("refund path = %q, want /api.php", gotPath)
+	}
+	if gotQuery.Get("act") != "refund" {
+		t.Fatalf("refund act query = %q, want refund", gotQuery.Get("act"))
+	}
+	for key, want := range map[string]string{
+		"pid":          "pid-1",
+		"key":          "pkey-1",
+		"out_trade_no": "out-456",
+		"money":        "1.50",
+	} {
+		if got := gotForm.Get(key); got != want {
+			t.Fatalf("form[%s] = %q, want %q (form=%v)", key, got, want, gotForm)
+		}
+	}
+	if got := gotForm.Get("trade_no"); got != "" {
+		t.Fatalf("form[trade_no] = %q, want empty (form=%v)", got, gotForm)
+	}
+}
+
+func TestEasyPayRefundRetriesWithTradeNoWhenOutTradeNoNotFound(t *testing.T) {
+	t.Parallel()
+
+	var gotForms []url.Values
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/api.php" {
+			t.Errorf("refund path = %q, want /api.php", r.URL.Path)
+		}
+		if r.URL.Query().Get("act") != "refund" {
+			t.Errorf("refund act query = %q, want refund", r.URL.Query().Get("act"))
+		}
+		if err := r.ParseForm(); err != nil {
+			t.Errorf("ParseForm: %v", err)
+		}
+		gotForms = append(gotForms, r.PostForm)
+		w.Header().Set("Content-Type", "application/json")
+		if len(gotForms) == 1 {
+			_, _ = w.Write([]byte(`{"code":0,"msg":"订单编号不存在！"}`))
+			return
+		}
+		_, _ = w.Write([]byte(`{"code":1,"msg":"ok"}`))
+	}))
+	defer server.Close()
+
+	provider := newTestEasyPay(t, server.URL+"/mapi.php")
+	resp, err := provider.Refund(context.Background(), payment.RefundRequest{
+		TradeNo: "trade-123",
+		OrderID: "out-456",
+		Amount:  "1.50",
+	})
+	if err != nil {
+		t.Fatalf("Refund returned error: %v", err)
+	}
+	if resp == nil || resp.Status != payment.ProviderStatusSuccess || resp.RefundID != "trade-123" {
+		t.Fatalf("Refund response = %+v, want success with trade refund id", resp)
+	}
+	if len(gotForms) != 2 {
+		t.Fatalf("refund attempts = %d, want 2", len(gotForms))
+	}
+	if got := gotForms[0].Get("out_trade_no"); got != "out-456" {
+		t.Fatalf("first form[out_trade_no] = %q, want out-456 (form=%v)", got, gotForms[0])
+	}
+	if got := gotForms[0].Get("trade_no"); got != "" {
+		t.Fatalf("first form[trade_no] = %q, want empty (form=%v)", got, gotForms[0])
+	}
+	if got := gotForms[1].Get("trade_no"); got != "trade-123" {
+		t.Fatalf("second form[trade_no] = %q, want trade-123 (form=%v)", got, gotForms[1])
+	}
+	if got := gotForms[1].Get("out_trade_no"); got != "" {
+		t.Fatalf("second form[out_trade_no] = %q, want empty (form=%v)", got, gotForms[1])
+	}
+}
+
+func TestEasyPayRefundResponseErrors(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		statusCode int
+		body       string
+		want       string
+	}{
+		{name: "html response", statusCode: http.StatusOK, body: "<html>bad config</html>", want: "non-JSON response (HTTP 200): <html>bad config</html>"},
+		{name: "non json response", statusCode: http.StatusOK, body: "not json", want: "non-JSON response (HTTP 200): not json"},
+		{name: "non 2xx response", statusCode: http.StatusBadGateway, body: "bad gateway", want: "HTTP 502: bad gateway"},
+		{name: "empty response", statusCode: http.StatusOK, body: "", want: "empty response (HTTP 200): <empty>"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+				w.WriteHeader(tt.statusCode)
+				_, _ = w.Write([]byte(tt.body))
+			}))
+			defer server.Close()
+
+			provider := newTestEasyPay(t, server.URL)
+			_, err := provider.Refund(context.Background(), payment.RefundRequest{
+				OrderID: "out-456",
+				Amount:  "1.50",
+			})
+			if err == nil {
+				t.Fatal("Refund returned nil error")
+			}
+			if !strings.Contains(err.Error(), tt.want) {
+				t.Fatalf("Refund error = %q, want substring %q", err.Error(), tt.want)
+			}
+		})
+	}
+}
+
+func newTestEasyPay(t *testing.T, apiBase string) *EasyPay {
+	t.Helper()
+
+	provider, err := NewEasyPay("test-instance", map[string]string{
+		"pid":       "pid-1",
+		"pkey":      "pkey-1",
+		"apiBase":   apiBase,
+		"notifyUrl": "https://example.com/notify",
+		"returnUrl": "https://example.com/return",
+	})
+	if err != nil {
+		t.Fatalf("NewEasyPay: %v", err)
+	}
+	return provider
+}

From c056db740d56ce008292a7b414c804cc6f308208 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 26 Apr 2026 05:24:11 +0000
Subject: [PATCH 07/46] chore: sync VERSION to 0.1.119 [skip ci]

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index 1fcba8fa..841597f0 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.118
+0.1.119

From 798fd673e95deeacac079458686b19999006cd2b Mon Sep 17 00:00:00 2001
From: Hai Chang <haichang@microsoft.com>
Date: Sun, 26 Apr 2026 16:48:30 +1000
Subject: [PATCH 08/46] feat(httputil): decode compressed request bodies
 (zstd/gzip/deflate)

Codex CLI 0.125+ defaults to sending request bodies with
Content-Encoding: zstd. Without server-side decompression the gateway
returns 'Failed to parse request body' on /v1/responses (and any other
JSON endpoint) because gjson sees raw zstd bytes.

ReadRequestBodyWithPrealloc now inspects Content-Encoding and
transparently decodes zstd, gzip/x-gzip, and deflate bodies before
returning them, then strips the encoding headers and updates
ContentLength so downstream code can reuse the bytes safely.
Unsupported encodings produce a clear error.

Adds unit tests covering identity, zstd, gzip, deflate, unsupported
encoding, corrupt zstd payloads, nil bodies, and explicit identity.
---
 backend/internal/pkg/httputil/body.go      |  57 +++++++-
 backend/internal/pkg/httputil/body_test.go | 143 +++++++++++++++++++++
 2 files changed, 198 insertions(+), 2 deletions(-)
 create mode 100644 backend/internal/pkg/httputil/body_test.go

diff --git a/backend/internal/pkg/httputil/body.go b/backend/internal/pkg/httputil/body.go
index 69e99dc5..31bba8c5 100644
--- a/backend/internal/pkg/httputil/body.go
+++ b/backend/internal/pkg/httputil/body.go
@@ -2,8 +2,15 @@ package httputil
 
 import (
 	"bytes"
+	"compress/gzip"
+	"compress/zlib"
+	"errors"
+	"fmt"
 	"io"
 	"net/http"
+	"strings"
+
+	"github.com/klauspost/compress/zstd"
 )
 
 const (
@@ -11,7 +18,9 @@ const (
 	requestBodyReadMaxInitCap = 1 << 20
 )
 
-// ReadRequestBodyWithPrealloc reads request body with preallocated buffer based on content length.
+// ReadRequestBodyWithPrealloc reads request body with preallocated buffer based
+// on content length, transparently decoding any Content-Encoding the upstream
+// client used to compress the body (zstd, gzip, deflate).
 func ReadRequestBodyWithPrealloc(req *http.Request) ([]byte, error) {
 	if req == nil || req.Body == nil {
 		return nil, nil
@@ -33,5 +42,49 @@ func ReadRequestBodyWithPrealloc(req *http.Request) ([]byte, error) {
 	if _, err := io.Copy(buf, req.Body); err != nil {
 		return nil, err
 	}
-	return buf.Bytes(), nil
+	raw := buf.Bytes()
+
+	enc := strings.ToLower(strings.TrimSpace(req.Header.Get("Content-Encoding")))
+	if enc == "" || enc == "identity" {
+		return raw, nil
+	}
+
+	decoded, err := decompressRequestBody(enc, raw)
+	if err != nil {
+		return nil, fmt.Errorf("decode Content-Encoding %q: %w", enc, err)
+	}
+
+	req.Header.Del("Content-Encoding")
+	req.Header.Del("Content-Length")
+	req.ContentLength = int64(len(decoded))
+
+	return decoded, nil
+}
+
+func decompressRequestBody(encoding string, raw []byte) ([]byte, error) {
+	switch encoding {
+	case "zstd":
+		dec, err := zstd.NewReader(bytes.NewReader(raw))
+		if err != nil {
+			return nil, err
+		}
+		defer dec.Close()
+		return io.ReadAll(dec)
+	case "gzip", "x-gzip":
+		gr, err := gzip.NewReader(bytes.NewReader(raw))
+		if err != nil {
+			return nil, err
+		}
+		defer gr.Close()
+		return io.ReadAll(gr)
+	case "deflate":
+		zr, err := zlib.NewReader(bytes.NewReader(raw))
+		if err != nil {
+			return nil, err
+		}
+		defer zr.Close()
+		return io.ReadAll(zr)
+	default:
+		return nil, errors.New("unsupported Content-Encoding")
+	}
 }
diff --git a/backend/internal/pkg/httputil/body_test.go b/backend/internal/pkg/httputil/body_test.go
new file mode 100644
index 00000000..ed8355d5
--- /dev/null
+++ b/backend/internal/pkg/httputil/body_test.go
@@ -0,0 +1,143 @@
+package httputil
+
+import (
+	"bytes"
+	"compress/gzip"
+	"compress/zlib"
+	"net/http"
+	"strings"
+	"testing"
+
+	"github.com/klauspost/compress/zstd"
+)
+
+const samplePayload = `{"model":"gpt-5.5","input":"hi","stream":false}`
+
+func newRequestWithBody(t *testing.T, body []byte, encoding string) *http.Request {
+	t.Helper()
+	req, err := http.NewRequest(http.MethodPost, "/v1/responses", bytes.NewReader(body))
+	if err != nil {
+		t.Fatalf("NewRequest: %v", err)
+	}
+	if encoding != "" {
+		req.Header.Set("Content-Encoding", encoding)
+	}
+	req.ContentLength = int64(len(body))
+	return req
+}
+
+func TestReadRequestBodyWithPrealloc_PassesThroughIdentity(t *testing.T) {
+	req := newRequestWithBody(t, []byte(samplePayload), "")
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if string(got) != samplePayload {
+		t.Fatalf("body mismatch: got %q", got)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_DecodesZstd(t *testing.T) {
+	enc, _ := zstd.NewWriter(nil)
+	compressed := enc.EncodeAll([]byte(samplePayload), nil)
+	_ = enc.Close()
+
+	req := newRequestWithBody(t, compressed, "zstd")
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if string(got) != samplePayload {
+		t.Fatalf("body mismatch: got %q", got)
+	}
+	if req.Header.Get("Content-Encoding") != "" {
+		t.Fatalf("Content-Encoding should be cleared after decoding")
+	}
+	if req.ContentLength != int64(len(samplePayload)) {
+		t.Fatalf("ContentLength not updated: %d", req.ContentLength)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_DecodesGzip(t *testing.T) {
+	var buf bytes.Buffer
+	gw := gzip.NewWriter(&buf)
+	if _, err := gw.Write([]byte(samplePayload)); err != nil {
+		t.Fatalf("gzip write: %v", err)
+	}
+	if err := gw.Close(); err != nil {
+		t.Fatalf("gzip close: %v", err)
+	}
+
+	req := newRequestWithBody(t, buf.Bytes(), "gzip")
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if string(got) != samplePayload {
+		t.Fatalf("body mismatch: got %q", got)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_DecodesDeflate(t *testing.T) {
+	var buf bytes.Buffer
+	zw := zlib.NewWriter(&buf)
+	if _, err := zw.Write([]byte(samplePayload)); err != nil {
+		t.Fatalf("zlib write: %v", err)
+	}
+	if err := zw.Close(); err != nil {
+		t.Fatalf("zlib close: %v", err)
+	}
+
+	req := newRequestWithBody(t, buf.Bytes(), "deflate")
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if string(got) != samplePayload {
+		t.Fatalf("body mismatch: got %q", got)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_RejectsUnsupportedEncoding(t *testing.T) {
+	req := newRequestWithBody(t, []byte(samplePayload), "br")
+	_, err := ReadRequestBodyWithPrealloc(req)
+	if err == nil {
+		t.Fatal("expected error for unsupported encoding, got nil")
+	}
+	if !strings.Contains(err.Error(), "br") {
+		t.Fatalf("error should mention encoding, got %v", err)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_RejectsCorruptZstd(t *testing.T) {
+	req := newRequestWithBody(t, []byte("not actually zstd"), "zstd")
+	_, err := ReadRequestBodyWithPrealloc(req)
+	if err == nil {
+		t.Fatal("expected error for corrupt zstd body, got nil")
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_NilBody(t *testing.T) {
+	req, err := http.NewRequest(http.MethodPost, "/v1/responses", nil)
+	if err != nil {
+		t.Fatalf("NewRequest: %v", err)
+	}
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != nil {
+		t.Fatalf("expected nil body, got %q", got)
+	}
+}
+
+func TestReadRequestBodyWithPrealloc_RespectsIdentityEncoding(t *testing.T) {
+	req := newRequestWithBody(t, []byte(samplePayload), "identity")
+	got, err := ReadRequestBodyWithPrealloc(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if string(got) != samplePayload {
+		t.Fatalf("body mismatch: got %q", got)
+	}
+}

From 3022090365efdeb77e653ed509f2a1af2edd4846 Mon Sep 17 00:00:00 2001
From: Cloud370 <y@7z.ee>
Date: Sun, 26 Apr 2026 20:21:38 +0800
Subject: [PATCH 09/46] fix(anthropic): drop empty Read.pages in
 responses-to-anthropic tool input

---
 .../pkg/apicompat/anthropic_responses_test.go | 77 +++++++++++++++++++
 .../pkg/apicompat/responses_to_anthropic.go   | 64 ++++++++++++++-
 2 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index c35b51b6..facfe572 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -258,6 +258,48 @@ func TestResponsesToAnthropic_ToolUse(t *testing.T) {
 	assert.Equal(t, "tool_use", anth.Content[1].Type)
 	assert.Equal(t, "call_1", anth.Content[1].ID)
 	assert.Equal(t, "get_weather", anth.Content[1].Name)
+	assert.JSONEq(t, `{"city":"NYC"}`, string(anth.Content[1].Input))
+}
+
+func TestResponsesToAnthropic_ReadToolDropsEmptyPages(t *testing.T) {
+	resp := &ResponsesResponse{
+		ID:     "resp_read",
+		Model:  "gpt-5.5",
+		Status: "completed",
+		Output: []ResponsesOutput{
+			{
+				Type:      "function_call",
+				CallID:    "call_read",
+				Name:      "Read",
+				Arguments: `{"file_path":"/tmp/demo.py","limit":2000,"offset":0,"pages":""}`,
+			},
+		},
+	}
+
+	anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+	require.Len(t, anth.Content, 1)
+	assert.Equal(t, "tool_use", anth.Content[0].Type)
+	assert.JSONEq(t, `{"file_path":"/tmp/demo.py","limit":2000,"offset":0}`, string(anth.Content[0].Input))
+}
+
+func TestResponsesToAnthropic_PreservesEmptyStringsForOtherTools(t *testing.T) {
+	resp := &ResponsesResponse{
+		ID:     "resp_other",
+		Model:  "gpt-5.5",
+		Status: "completed",
+		Output: []ResponsesOutput{
+			{
+				Type:      "function_call",
+				CallID:    "call_other",
+				Name:      "Search",
+				Arguments: `{"query":""}`,
+			},
+		},
+	}
+
+	anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+	require.Len(t, anth.Content, 1)
+	assert.JSONEq(t, `{"query":""}`, string(anth.Content[0].Input))
 }
 
 func TestResponsesToAnthropic_Reasoning(t *testing.T) {
@@ -472,6 +514,41 @@ func TestStreamingToolCall(t *testing.T) {
 	assert.Equal(t, "tool_use", events[0].Delta.StopReason)
 }
 
+func TestStreamingReadToolDropsEmptyPages(t *testing.T) {
+	state := NewResponsesEventToAnthropicState()
+
+	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:     "response.created",
+		Response: &ResponsesResponse{ID: "resp_read_stream", Model: "gpt-5.5"},
+	}, state)
+
+	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:        "response.output_item.added",
+		OutputIndex: 0,
+		Item:        &ResponsesOutput{Type: "function_call", CallID: "call_read", Name: "Read"},
+	}, state)
+	require.Len(t, events, 1)
+	assert.Equal(t, "content_block_start", events[0].Type)
+
+	events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:        "response.function_call_arguments.delta",
+		OutputIndex: 0,
+		Delta:       `{"file_path":"/tmp/demo.py","limit":2000,"offset":0,"pages":""}`,
+	}, state)
+	assert.Len(t, events, 0)
+
+	events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:        "response.function_call_arguments.done",
+		OutputIndex: 0,
+		Arguments:   `{"file_path":"/tmp/demo.py","limit":2000,"offset":0,"pages":""}`,
+	}, state)
+	require.Len(t, events, 2)
+	assert.Equal(t, "content_block_delta", events[0].Type)
+	assert.Equal(t, "input_json_delta", events[0].Delta.Type)
+	assert.JSONEq(t, `{"file_path":"/tmp/demo.py","limit":2000,"offset":0}`, events[0].Delta.PartialJSON)
+	assert.Equal(t, "content_block_stop", events[1].Type)
+}
+
 func TestStreamingReasoning(t *testing.T) {
 	state := NewResponsesEventToAnthropicState()
 
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go
index 40bed302..489ed238 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -52,7 +52,7 @@ func ResponsesToAnthropic(resp *ResponsesResponse, model string) *AnthropicRespo
 				Type:  "tool_use",
 				ID:    fromResponsesCallID(item.CallID),
 				Name:  item.Name,
-				Input: json.RawMessage(item.Arguments),
+				Input: sanitizeAnthropicToolUseInput(item.Name, item.Arguments),
 			})
 		case "web_search_call":
 			toolUseID := "srvtoolu_" + item.ID
@@ -129,6 +129,28 @@ func responsesStatusToAnthropicStopReason(status string, details *ResponsesIncom
 	}
 }
 
+func sanitizeAnthropicToolUseInput(name string, raw string) json.RawMessage {
+	if name != "Read" || raw == "" {
+		return json.RawMessage(raw)
+	}
+
+	var input map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(raw), &input); err != nil {
+		return json.RawMessage(raw)
+	}
+
+	if pages, ok := input["pages"]; !ok || string(pages) != `""` {
+		return json.RawMessage(raw)
+	}
+
+	delete(input, "pages")
+	sanitized, err := json.Marshal(input)
+	if err != nil {
+		return json.RawMessage(raw)
+	}
+	return sanitized
+}
+
 // ---------------------------------------------------------------------------
 // Streaming: ResponsesStreamEvent → []AnthropicStreamEvent (stateful converter)
 // ---------------------------------------------------------------------------
@@ -142,6 +164,8 @@ type ResponsesEventToAnthropicState struct {
 	ContentBlockIndex int
 	ContentBlockOpen  bool
 	CurrentBlockType  string // "text" | "thinking" | "tool_use"
+	CurrentToolName   string
+	CurrentToolArgs   string
 
 	// OutputIndexToBlockIdx maps Responses output_index → Anthropic content block index.
 	OutputIndexToBlockIdx map[int]int
@@ -181,7 +205,7 @@ func ResponsesEventToAnthropicEvents(
 	case "response.function_call_arguments.delta":
 		return resToAnthHandleFuncArgsDelta(evt, state)
 	case "response.function_call_arguments.done":
-		return resToAnthHandleBlockDone(state)
+		return resToAnthHandleFuncArgsDone(evt, state)
 	case "response.output_item.done":
 		return resToAnthHandleOutputItemDone(evt, state)
 	case "response.reasoning_summary_text.delta":
@@ -278,6 +302,8 @@ func resToAnthHandleOutputItemAdded(evt *ResponsesStreamEvent, state *ResponsesE
 		state.OutputIndexToBlockIdx[evt.OutputIndex] = idx
 		state.ContentBlockOpen = true
 		state.CurrentBlockType = "tool_use"
+		state.CurrentToolName = evt.Item.Name
+		state.CurrentToolArgs = ""
 
 		events = append(events, AnthropicStreamEvent{
 			Type:  "content_block_start",
@@ -358,6 +384,11 @@ func resToAnthHandleFuncArgsDelta(evt *ResponsesStreamEvent, state *ResponsesEve
 		return nil
 	}
 
+	if state.CurrentBlockType == "tool_use" && state.CurrentToolName == "Read" {
+		state.CurrentToolArgs += evt.Delta
+		return nil
+	}
+
 	blockIdx, ok := state.OutputIndexToBlockIdx[evt.OutputIndex]
 	if !ok {
 		return nil
@@ -373,6 +404,33 @@ func resToAnthHandleFuncArgsDelta(evt *ResponsesStreamEvent, state *ResponsesEve
 	}}
 }
 
+func resToAnthHandleFuncArgsDone(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+	if state.CurrentBlockType != "tool_use" || state.CurrentToolName != "Read" {
+		return resToAnthHandleBlockDone(state)
+	}
+
+	raw := evt.Arguments
+	if raw == "" {
+		raw = state.CurrentToolArgs
+	}
+	sanitized := sanitizeAnthropicToolUseInput(state.CurrentToolName, raw)
+	if len(sanitized) == 0 {
+		return closeCurrentBlock(state)
+	}
+
+	idx := state.ContentBlockIndex
+	events := []AnthropicStreamEvent{{
+		Type:  "content_block_delta",
+		Index: &idx,
+		Delta: &AnthropicDelta{
+			Type:        "input_json_delta",
+			PartialJSON: string(sanitized),
+		},
+	}}
+	events = append(events, closeCurrentBlock(state)...)
+	return events
+}
+
 func resToAnthHandleReasoningDelta(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
 	if evt.Delta == "" {
 		return nil
@@ -524,6 +582,8 @@ func closeCurrentBlock(state *ResponsesEventToAnthropicState) []AnthropicStreamE
 	idx := state.ContentBlockIndex
 	state.ContentBlockOpen = false
 	state.ContentBlockIndex++
+	state.CurrentToolName = ""
+	state.CurrentToolArgs = ""
 	return []AnthropicStreamEvent{{
 		Type:  "content_block_stop",
 		Index: &idx,

From 615557ec20977724fd7f0752012381f7f83d1123 Mon Sep 17 00:00:00 2001
From: gaoren002 <gaoren002@users.noreply.github.com>
Date: Sun, 26 Apr 2026 17:05:19 +0000
Subject: [PATCH 10/46] fix(openai): avoid implicit image sticky sessions

---
 backend/internal/handler/openai_images.go     |  7 +---
 .../service/openai_gateway_service.go         | 37 +++++++++++++++----
 .../service/openai_gateway_service_test.go    | 35 ++++++++++++++++++
 3 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/backend/internal/handler/openai_images.go b/backend/internal/handler/openai_images.go
index 403b41ef..4d0078a7 100644
--- a/backend/internal/handler/openai_images.go
+++ b/backend/internal/handler/openai_images.go
@@ -117,12 +117,7 @@ func (h *OpenAIGatewayHandler) Images(c *gin.Context) {
 		return
 	}
 
-	sessionHash := ""
-	if parsed.Multipart {
-		sessionHash = h.gatewayService.GenerateSessionHashWithFallback(c, nil, parsed.StickySessionSeed())
-	} else {
-		sessionHash = h.gatewayService.GenerateSessionHash(c, body)
-	}
+	sessionHash := h.gatewayService.GenerateExplicitSessionHash(c, body)
 
 	maxAccountSwitches := h.maxAccountSwitches
 	switchCount := 0
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 379ebe0b..13e3ddab 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -1125,6 +1125,35 @@ func (s *OpenAIGatewayService) ExtractSessionID(c *gin.Context, body []byte) str
 	return sessionID
 }
 
+func explicitOpenAISessionID(c *gin.Context, body []byte) string {
+	if c == nil {
+		return ""
+	}
+
+	sessionID := strings.TrimSpace(c.GetHeader("session_id"))
+	if sessionID == "" {
+		sessionID = strings.TrimSpace(c.GetHeader("conversation_id"))
+	}
+	if sessionID == "" && len(body) > 0 {
+		sessionID = strings.TrimSpace(gjson.GetBytes(body, "prompt_cache_key").String())
+	}
+	return sessionID
+}
+
+// GenerateExplicitSessionHash generates a sticky-session hash only from explicit
+// client session signals. It intentionally skips content-derived fallback and is
+// used by stateless endpoints such as /v1/images.
+func (s *OpenAIGatewayService) GenerateExplicitSessionHash(c *gin.Context, body []byte) string {
+	sessionID := explicitOpenAISessionID(c, body)
+	if sessionID == "" {
+		return ""
+	}
+
+	currentHash, legacyHash := deriveOpenAISessionHashes(sessionID)
+	attachOpenAILegacySessionHashToGin(c, legacyHash)
+	return currentHash
+}
+
 // GenerateSessionHash generates a sticky-session hash for OpenAI requests.
 //
 // Priority:
@@ -1137,13 +1166,7 @@ func (s *OpenAIGatewayService) GenerateSessionHash(c *gin.Context, body []byte)
 		return ""
 	}
 
-	sessionID := strings.TrimSpace(c.GetHeader("session_id"))
-	if sessionID == "" {
-		sessionID = strings.TrimSpace(c.GetHeader("conversation_id"))
-	}
-	if sessionID == "" && len(body) > 0 {
-		sessionID = strings.TrimSpace(gjson.GetBytes(body, "prompt_cache_key").String())
-	}
+	sessionID := explicitOpenAISessionID(c, body)
 	if sessionID == "" && len(body) > 0 {
 		sessionID = deriveOpenAIContentSessionSeed(body)
 	}
diff --git a/backend/internal/service/openai_gateway_service_test.go b/backend/internal/service/openai_gateway_service_test.go
index bc900689..03b49865 100644
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -227,6 +227,41 @@ func TestOpenAIGatewayService_GenerateSessionHash_AttachesLegacyHashToContext(t
 	require.NotEmpty(t, openAILegacySessionHashFromContext(c.Request.Context()))
 }
 
+func TestOpenAIGatewayService_GenerateExplicitSessionHash_SkipsContentFallback(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	svc := &OpenAIGatewayService{}
+	body := []byte(`{"model":"gpt-image-2","prompt":"draw a cat"}`)
+
+	t.Run("stateless image body stays unstuck", func(t *testing.T) {
+		rec := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(rec)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/generations", nil)
+
+		require.Empty(t, svc.GenerateExplicitSessionHash(c, body))
+		require.Empty(t, openAILegacySessionHashFromContext(c.Request.Context()))
+	})
+
+	t.Run("prompt_cache_key is explicit", func(t *testing.T) {
+		rec := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(rec)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/generations", nil)
+
+		got := svc.GenerateExplicitSessionHash(c, []byte(`{"model":"gpt-image-2","prompt_cache_key":"image-session"}`))
+		require.Equal(t, fmt.Sprintf("%016x", xxhash.Sum64String("image-session")), got)
+		require.NotEmpty(t, openAILegacySessionHashFromContext(c.Request.Context()))
+	})
+
+	t.Run("header overrides body", func(t *testing.T) {
+		rec := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(rec)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/images/generations", nil)
+		c.Request.Header.Set("session_id", "header-session")
+
+		got := svc.GenerateExplicitSessionHash(c, []byte(`{"prompt_cache_key":"body-session"}`))
+		require.Equal(t, fmt.Sprintf("%016x", xxhash.Sum64String("header-session")), got)
+	})
+}
+
 func TestOpenAIGatewayService_GenerateSessionHashWithFallback(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	rec := httptest.NewRecorder()

From 9fe02bba7e31477ccb3b7eb039904b398169fd19 Mon Sep 17 00:00:00 2001
From: gaoren002 <gaoren002@users.noreply.github.com>
Date: Sun, 26 Apr 2026 17:04:35 +0000
Subject: [PATCH 11/46] fix(openai): strip unsupported passthrough fields

---
 .../service/openai_codex_transform.go         | 36 ++++++++++---------
 .../service/openai_codex_transform_test.go    | 21 +++++++++++
 .../service/openai_gateway_service.go         | 15 +++++++-
 .../openai_passthrough_normalization_test.go  | 33 +++++++++++++++++
 4 files changed, 87 insertions(+), 18 deletions(-)
 create mode 100644 backend/internal/service/openai_passthrough_normalization_test.go

diff --git a/backend/internal/service/openai_codex_transform.go b/backend/internal/service/openai_codex_transform.go
index e765d7e9..0e31b242 100644
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -53,6 +53,23 @@ const (
 	codexSparkImageUnsupportedText   = codexSparkImageUnsupportedMarker + "\nThe current model is gpt-5.3-codex-spark, which does not support image generation, image editing, image input, the `image_generation` tool, or Codex `image_gen`/`$imagegen` workflows. If the user asks for image generation or image editing, clearly explain this model limitation and ask them to switch to a non-Spark Codex model such as gpt-5.3-codex or gpt-5.4. Do not claim that the local environment merely lacks image_gen tooling, and do not suggest CLI fallback as the primary fix while the model remains Spark.\n</sub2api-codex-spark-image-unsupported>"
 )
 
+var openAIChatGPTInternalUnsupportedFields = []string{
+	"user",
+	"metadata",
+	"prompt_cache_retention",
+	"safety_identifier",
+	"stream_options",
+}
+
+var openAICodexOAuthUnsupportedFields = append([]string{
+	"max_output_tokens",
+	"max_completion_tokens",
+	"temperature",
+	"top_p",
+	"frequency_penalty",
+	"presence_penalty",
+}, openAIChatGPTInternalUnsupportedFields...)
+
 func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact bool) codexTransformResult {
 	result := codexTransformResult{}
 	// 工具续链需求会影响存储策略与 input 过滤逻辑。
@@ -93,23 +110,8 @@ func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact
 		}
 	}
 
-	// Strip parameters unsupported by codex models via the Responses API.
-	for _, key := range []string{
-		"max_output_tokens",
-		"max_completion_tokens",
-		"temperature",
-		"top_p",
-		"frequency_penalty",
-		"presence_penalty",
-		// prompt_cache_retention is a newer Responses API parameter (cache TTL).
-		// The ChatGPT internal Codex endpoint rejects it with
-		// "Unsupported parameter: prompt_cache_retention". Defense-in-depth
-		// for any OAuth path that reaches this transform — the Cursor
-		// Responses-shape short-circuit in ForwardAsChatCompletions strips
-		// it earlier too, but we keep this line so other OAuth callers are
-		// equally protected.
-		"prompt_cache_retention",
-	} {
+	// Strip parameters unsupported by ChatGPT internal Codex endpoint.
+	for _, key := range openAICodexOAuthUnsupportedFields {
 		if _, ok := reqBody[key]; ok {
 			delete(reqBody, key)
 			result.Modified = true
diff --git a/backend/internal/service/openai_codex_transform_test.go b/backend/internal/service/openai_codex_transform_test.go
index 75f5c55c..4bdcd2e9 100644
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -1048,6 +1048,27 @@ func TestApplyCodexOAuthTransform_StripsPromptCacheRetention(t *testing.T) {
 		"prompt_cache_retention must be stripped before forwarding to Codex upstream")
 }
 
+func TestApplyCodexOAuthTransform_StripsChatGPTInternalUnsupportedFields(t *testing.T) {
+	reqBody := map[string]any{
+		"model":                  "gpt-5.4",
+		"user":                   "user_123",
+		"metadata":               map[string]any{"trace_id": "abc"},
+		"prompt_cache_retention": "24h",
+		"safety_identifier":      "sid",
+		"stream_options":         map[string]any{"include_usage": true},
+		"input": []any{
+			map[string]any{"role": "user", "content": "hi"},
+		},
+	}
+
+	result := applyCodexOAuthTransform(reqBody, true, false)
+
+	require.True(t, result.Modified)
+	for _, field := range openAIChatGPTInternalUnsupportedFields {
+		require.NotContains(t, reqBody, field)
+	}
+}
+
 func TestApplyCodexOAuthTransform_ExtractsSystemMessages(t *testing.T) {
 	reqBody := map[string]any{
 		"model": "gpt-5.1",
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 379ebe0b..e23476e7 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -5454,7 +5454,8 @@ func extractOpenAIRequestMetaFromBody(body []byte) (model string, stream bool, p
 }
 
 // normalizeOpenAIPassthroughOAuthBody 将透传 OAuth 请求体收敛为旧链路关键行为：
-// 1) store=false 2) 非 compact 保持 stream=true；compact 强制 stream=false
+// 1) 删除 ChatGPT internal API 不支持的顶层 Responses 参数
+// 2) store=false 3) 非 compact 保持 stream=true；compact 强制 stream=false
 func normalizeOpenAIPassthroughOAuthBody(body []byte, compact bool) ([]byte, bool, error) {
 	if len(body) == 0 {
 		return body, false, nil
@@ -5463,6 +5464,18 @@ func normalizeOpenAIPassthroughOAuthBody(body []byte, compact bool) ([]byte, boo
 	normalized := body
 	changed := false
 
+	for _, field := range openAIChatGPTInternalUnsupportedFields {
+		if value := gjson.GetBytes(normalized, field); !value.Exists() {
+			continue
+		}
+		next, err := sjson.DeleteBytes(normalized, field)
+		if err != nil {
+			return body, false, fmt.Errorf("normalize passthrough body delete %s: %w", field, err)
+		}
+		normalized = next
+		changed = true
+	}
+
 	if compact {
 		if store := gjson.GetBytes(normalized, "store"); store.Exists() {
 			next, err := sjson.DeleteBytes(normalized, "store")
diff --git a/backend/internal/service/openai_passthrough_normalization_test.go b/backend/internal/service/openai_passthrough_normalization_test.go
new file mode 100644
index 00000000..492ff610
--- /dev/null
+++ b/backend/internal/service/openai_passthrough_normalization_test.go
@@ -0,0 +1,33 @@
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
+)
+
+func TestNormalizeOpenAIPassthroughOAuthBody_RemovesUnsupportedUser(t *testing.T) {
+	body := []byte(`{"model":"gpt-5.4","input":"hello","user":"user_123","metadata":{"user_id":"user_123"},"prompt_cache_retention":"24h","safety_identifier":"sid","stream_options":{"include_usage":true}}`)
+
+	normalized, changed, err := normalizeOpenAIPassthroughOAuthBody(body, false)
+	require.NoError(t, err)
+	require.True(t, changed)
+	for _, field := range openAIChatGPTInternalUnsupportedFields {
+		require.False(t, gjson.GetBytes(normalized, field).Exists(), "%s should be stripped", field)
+	}
+	require.True(t, gjson.GetBytes(normalized, "stream").Bool())
+	require.False(t, gjson.GetBytes(normalized, "store").Bool())
+}
+
+func TestNormalizeOpenAIPassthroughOAuthBody_CompactRemovesUnsupportedUser(t *testing.T) {
+	body := []byte(`{"model":"gpt-5.4","input":"hello","user":"user_123","metadata":{"user_id":"user_123"},"stream":true,"store":true}`)
+
+	normalized, changed, err := normalizeOpenAIPassthroughOAuthBody(body, true)
+	require.NoError(t, err)
+	require.True(t, changed)
+	require.False(t, gjson.GetBytes(normalized, "user").Exists())
+	require.False(t, gjson.GetBytes(normalized, "metadata").Exists())
+	require.False(t, gjson.GetBytes(normalized, "stream").Exists())
+	require.False(t, gjson.GetBytes(normalized, "store").Exists())
+}

From 53f919f8f07ad386126dfa9fb5c6b5c69dca6c3e Mon Sep 17 00:00:00 2001
From: hansnow <hansnow2012@gmail.com>
Date: Mon, 27 Apr 2026 16:47:44 +0800
Subject: [PATCH 12/46] fix(api-key): reset rate limit usage cache

---
 backend/cmd/server/wire_gen.go                |  2 +-
 .../handler/admin/admin_service_stub_test.go  | 16 ++++++++
 .../internal/handler/admin/apikey_handler.go  | 19 +++++++--
 .../handler/admin/apikey_handler_test.go      | 40 +++++++++++++++++++
 backend/internal/service/admin_service.go     | 25 ++++++++++++
 .../internal/service/billing_cache_service.go | 12 ++++++
 backend/internal/service/wire.go              | 18 ++++++++-
 7 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/backend/cmd/server/wire_gen.go b/backend/cmd/server/wire_gen.go
index f767bbea..dab35577 100644
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -65,7 +65,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	userGroupRateRepository := repository.NewUserGroupRateRepository(db)
 	billingCacheService := service.ProvideBillingCacheService(billingCache, userRepository, userSubscriptionRepository, apiKeyRepository, userRPMCache, userGroupRateRepository, configConfig)
 	apiKeyCache := repository.NewAPIKeyCache(redisClient)
-	apiKeyService := service.NewAPIKeyService(apiKeyRepository, userRepository, groupRepository, userSubscriptionRepository, userGroupRateRepository, apiKeyCache, configConfig)
+	apiKeyService := service.ProvideAPIKeyService(apiKeyRepository, userRepository, groupRepository, userSubscriptionRepository, userGroupRateRepository, apiKeyCache, configConfig, billingCacheService)
 	apiKeyAuthCacheInvalidator := service.ProvideAPIKeyAuthCacheInvalidator(apiKeyService)
 	promoService := service.NewPromoService(promoCodeRepository, userRepository, billingCacheService, client, apiKeyAuthCacheInvalidator)
 	subscriptionService := service.NewSubscriptionService(groupRepository, userSubscriptionRepository, billingCacheService, client, configConfig)
diff --git a/backend/internal/handler/admin/admin_service_stub_test.go b/backend/internal/handler/admin/admin_service_stub_test.go
index 2fe29fa3..b187b47f 100644
--- a/backend/internal/handler/admin/admin_service_stub_test.go
+++ b/backend/internal/handler/admin/admin_service_stub_test.go
@@ -565,6 +565,22 @@ func (s *stubAdminService) AdminUpdateAPIKeyGroupID(ctx context.Context, keyID i
 	return nil, service.ErrAPIKeyNotFound
 }
 
+func (s *stubAdminService) AdminResetAPIKeyRateLimitUsage(ctx context.Context, keyID int64) (*service.APIKey, error) {
+	for i := range s.apiKeys {
+		if s.apiKeys[i].ID == keyID {
+			s.apiKeys[i].Usage5h = 0
+			s.apiKeys[i].Usage1d = 0
+			s.apiKeys[i].Usage7d = 0
+			s.apiKeys[i].Window5hStart = nil
+			s.apiKeys[i].Window1dStart = nil
+			s.apiKeys[i].Window7dStart = nil
+			k := s.apiKeys[i]
+			return &k, nil
+		}
+	}
+	return nil, service.ErrAPIKeyNotFound
+}
+
 func (s *stubAdminService) ResetAccountQuota(ctx context.Context, id int64) error {
 	return nil
 }
diff --git a/backend/internal/handler/admin/apikey_handler.go b/backend/internal/handler/admin/apikey_handler.go
index 8dd245a4..5e405bdd 100644
--- a/backend/internal/handler/admin/apikey_handler.go
+++ b/backend/internal/handler/admin/apikey_handler.go
@@ -22,12 +22,13 @@ func NewAdminAPIKeyHandler(adminService service.AdminService) *AdminAPIKeyHandle
 	}
 }
 
-// AdminUpdateAPIKeyGroupRequest represents the request to update an API key's group
+// AdminUpdateAPIKeyGroupRequest represents the request to update an API key.
 type AdminUpdateAPIKeyGroupRequest struct {
-	GroupID *int64 `json:"group_id"` // nil=不修改, 0=解绑, >0=绑定到目标分组
+	GroupID             *int64 `json:"group_id"`               // nil=不修改, 0=解绑, >0=绑定到目标分组
+	ResetRateLimitUsage *bool  `json:"reset_rate_limit_usage"` // true=重置 5h/1d/7d 限速用量
 }
 
-// UpdateGroup handles updating an API key's group binding
+// UpdateGroup handles updating an API key's admin-managed fields.
 // PUT /api/v1/admin/api-keys/:id
 func (h *AdminAPIKeyHandler) UpdateGroup(c *gin.Context) {
 	keyID, err := strconv.ParseInt(c.Param("id"), 10, 64)
@@ -42,11 +43,23 @@ func (h *AdminAPIKeyHandler) UpdateGroup(c *gin.Context) {
 		return
 	}
 
+	var resetKey *service.APIKey
+	if req.ResetRateLimitUsage != nil && *req.ResetRateLimitUsage {
+		resetKey, err = h.adminService.AdminResetAPIKeyRateLimitUsage(c.Request.Context(), keyID)
+		if err != nil {
+			response.ErrorFrom(c, err)
+			return
+		}
+	}
+
 	result, err := h.adminService.AdminUpdateAPIKeyGroupID(c.Request.Context(), keyID, req.GroupID)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
+	if resetKey != nil && req.GroupID == nil {
+		result.APIKey = resetKey
+	}
 
 	resp := struct {
 		APIKey                 *dto.APIKey `json:"api_key"`
diff --git a/backend/internal/handler/admin/apikey_handler_test.go b/backend/internal/handler/admin/apikey_handler_test.go
index bf128b18..6ac6d52f 100644
--- a/backend/internal/handler/admin/apikey_handler_test.go
+++ b/backend/internal/handler/admin/apikey_handler_test.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"testing"
+	"time"
 
 	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -117,6 +118,45 @@ func TestAdminAPIKeyHandler_UpdateGroup_Unbind(t *testing.T) {
 	require.Nil(t, resp.Data.APIKey.GroupID)
 }
 
+func TestAdminAPIKeyHandler_ResetRateLimitUsage(t *testing.T) {
+	svc := newStubAdminService()
+	now := time.Now()
+	svc.apiKeys[0].Usage5h = 1.2
+	svc.apiKeys[0].Usage1d = 3.4
+	svc.apiKeys[0].Usage7d = 5.6
+	svc.apiKeys[0].Window5hStart = &now
+	svc.apiKeys[0].Window1dStart = &now
+	svc.apiKeys[0].Window7dStart = &now
+	router := setupAPIKeyHandler(svc)
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPut, "/api/v1/admin/api-keys/10", bytes.NewBufferString(`{"reset_rate_limit_usage":true}`))
+	req.Header.Set("Content-Type", "application/json")
+	router.ServeHTTP(rec, req)
+
+	require.Equal(t, http.StatusOK, rec.Code)
+
+	var resp struct {
+		Data struct {
+			APIKey struct {
+				Usage5h       float64    `json:"usage_5h"`
+				Usage1d       float64    `json:"usage_1d"`
+				Usage7d       float64    `json:"usage_7d"`
+				Window5hStart *time.Time `json:"window_5h_start"`
+				Window1dStart *time.Time `json:"window_1d_start"`
+				Window7dStart *time.Time `json:"window_7d_start"`
+			} `json:"api_key"`
+		} `json:"data"`
+	}
+	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
+	require.Zero(t, resp.Data.APIKey.Usage5h)
+	require.Zero(t, resp.Data.APIKey.Usage1d)
+	require.Zero(t, resp.Data.APIKey.Usage7d)
+	require.Nil(t, resp.Data.APIKey.Window5hStart)
+	require.Nil(t, resp.Data.APIKey.Window1dStart)
+	require.Nil(t, resp.Data.APIKey.Window7dStart)
+}
+
 func TestAdminAPIKeyHandler_UpdateGroup_ServiceError(t *testing.T) {
 	svc := &failingUpdateGroupService{
 		stubAdminService: newStubAdminService(),
diff --git a/backend/internal/service/admin_service.go b/backend/internal/service/admin_service.go
index 434f1f38..cb0c5339 100644
--- a/backend/internal/service/admin_service.go
+++ b/backend/internal/service/admin_service.go
@@ -58,6 +58,7 @@ type AdminService interface {
 
 	// API Key management (admin)
 	AdminUpdateAPIKeyGroupID(ctx context.Context, keyID int64, groupID *int64) (*AdminUpdateAPIKeyGroupIDResult, error)
+	AdminResetAPIKeyRateLimitUsage(ctx context.Context, keyID int64) (*APIKey, error)
 
 	// ReplaceUserGroup 替换用户的专属分组：授予新分组权限、迁移 Key、移除旧分组权限
 	ReplaceUserGroup(ctx context.Context, userID, oldGroupID, newGroupID int64) (*ReplaceUserGroupResult, error)
@@ -1961,6 +1962,30 @@ func (s *adminServiceImpl) AdminUpdateAPIKeyGroupID(ctx context.Context, keyID i
 	return result, nil
 }
 
+// AdminResetAPIKeyRateLimitUsage resets all API key rate-limit usage windows.
+func (s *adminServiceImpl) AdminResetAPIKeyRateLimitUsage(ctx context.Context, keyID int64) (*APIKey, error) {
+	apiKey, err := s.apiKeyRepo.GetByID(ctx, keyID)
+	if err != nil {
+		return nil, err
+	}
+	apiKey.Usage5h = 0
+	apiKey.Usage1d = 0
+	apiKey.Usage7d = 0
+	apiKey.Window5hStart = nil
+	apiKey.Window1dStart = nil
+	apiKey.Window7dStart = nil
+	if err := s.apiKeyRepo.Update(ctx, apiKey); err != nil {
+		return nil, fmt.Errorf("reset api key rate limit usage: %w", err)
+	}
+	if s.authCacheInvalidator != nil {
+		s.authCacheInvalidator.InvalidateAuthCacheByKey(ctx, apiKey.Key)
+	}
+	if s.billingCacheService != nil {
+		_ = s.billingCacheService.InvalidateAPIKeyRateLimit(ctx, apiKey.ID)
+	}
+	return apiKey, nil
+}
+
 // ReplaceUserGroup 替换用户的专属分组
 func (s *adminServiceImpl) ReplaceUserGroup(ctx context.Context, userID, oldGroupID, newGroupID int64) (*ReplaceUserGroupResult, error) {
 	if oldGroupID == newGroupID {
diff --git a/backend/internal/service/billing_cache_service.go b/backend/internal/service/billing_cache_service.go
index 4e695eb9..050db55b 100644
--- a/backend/internal/service/billing_cache_service.go
+++ b/backend/internal/service/billing_cache_service.go
@@ -508,6 +508,18 @@ func (s *BillingCacheService) InvalidateSubscription(ctx context.Context, userID
 	return nil
 }
 
+// InvalidateAPIKeyRateLimit invalidates the Redis rate-limit usage cache for an API key.
+func (s *BillingCacheService) InvalidateAPIKeyRateLimit(ctx context.Context, keyID int64) error {
+	if s.cache == nil {
+		return nil
+	}
+	if err := s.cache.InvalidateAPIKeyRateLimit(ctx, keyID); err != nil {
+		logger.LegacyPrintf("service.billing_cache", "Warning: invalidate api key rate limit cache failed for key %d: %v", keyID, err)
+		return err
+	}
+	return nil
+}
+
 // ============================================
 // API Key 限速缓存方法
 // ============================================
diff --git a/backend/internal/service/wire.go b/backend/internal/service/wire.go
index b1d9aaba..8b50e478 100644
--- a/backend/internal/service/wire.go
+++ b/backend/internal/service/wire.go
@@ -404,12 +404,28 @@ func ProvideBillingCacheService(
 	return NewBillingCacheService(cache, userRepo, subRepo, apiKeyRepo, rpmCache, rateRepo, cfg)
 }
 
+// ProvideAPIKeyService wires APIKeyService and connects rate-limit cache invalidation.
+func ProvideAPIKeyService(
+	apiKeyRepo APIKeyRepository,
+	userRepo UserRepository,
+	groupRepo GroupRepository,
+	userSubRepo UserSubscriptionRepository,
+	userGroupRateRepo UserGroupRateRepository,
+	cache APIKeyCache,
+	cfg *config.Config,
+	billingCacheService *BillingCacheService,
+) *APIKeyService {
+	svc := NewAPIKeyService(apiKeyRepo, userRepo, groupRepo, userSubRepo, userGroupRateRepo, cache, cfg)
+	svc.SetRateLimitCacheInvalidator(billingCacheService)
+	return svc
+}
+
 // ProviderSet is the Wire provider set for all services
 var ProviderSet = wire.NewSet(
 	// Core services
 	NewAuthService,
 	NewUserService,
-	NewAPIKeyService,
+	ProvideAPIKeyService,
 	ProvideAPIKeyAuthCacheInvalidator,
 	NewGroupService,
 	NewAccountService,

From 65c27d2c6948898322dcc26af0d9b968616ed5a0 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 17:21:11 +0800
Subject: [PATCH 13/46] docs: add account bulk edit scope design

---
 ...ount-bulk-edit-scope-and-compact-design.md | 233 ++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md

diff --git a/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md b/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md
new file mode 100644
index 00000000..3a1dc5ac
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md
@@ -0,0 +1,233 @@
+# Account Bulk Edit Scope And Compact Design
+
+## Summary
+
+This change expands admin account bulk edit in two directions:
+
+1. Add a second bulk-edit target scope based on the current filter result set, so operators do not need to manually select every account.
+2. Align OpenAI bulk-edit fields with single-account create/edit for the compact-related settings that are already supported elsewhere.
+
+The design keeps the existing selected-row workflow intact and adds a unified bulk-edit entry with two explicit actions:
+
+- `Bulk edit selected accounts`
+- `Bulk edit current filtered results`
+
+`Current filtered results` reuses the existing account-list filters. That means:
+
+- with no filters, it targets the whole account inventory
+- with a group filter, it targets all accounts in that group
+- with combined filters, it targets all matching accounts
+
+## Goals
+
+- Preserve the current selected-account bulk edit flow.
+- Let operators bulk edit the full current filtered result set without manual row selection.
+- Show the user the exact target scope before applying changes.
+- Reuse the current list filter semantics instead of inventing a separate "all accounts" or "by group" API.
+- Add the missing OpenAI bulk-edit fields:
+  - OAuth `codex_cli_only`
+  - API key `openai_apikey_responses_websockets_v2_mode`
+
+## Non-Goals
+
+- No new standalone "edit all accounts" route that ignores filters.
+- No new dedicated "edit group" route separate from list filters.
+- No change to the backend merge semantics for other bulk-edit fields.
+- No attempt in this change to refactor all account form components into a shared schema system.
+
+## Current State
+
+### Bulk edit entry
+
+The account list currently exposes bulk edit only through selected-row actions. `AccountsView.vue` passes `selIds`, `selPlatforms`, and `selTypes` into `BulkEditAccountModal.vue`.
+
+### Filter state
+
+The account page already keeps a central `params` object for current filters and reloads the table from that state. Group filtering already exists in `AccountTableFilters.vue`.
+
+### Bulk edit payload
+
+`BulkEditAccountModal.vue` builds a bulk update request around explicit account IDs.
+
+### OpenAI field gap
+
+Single-account create/edit already supports:
+
+- `openai_passthrough`
+- OAuth WS mode
+- API key WS mode
+- OAuth `codex_cli_only`
+
+Bulk edit currently supports:
+
+- `openai_passthrough`
+- OAuth WS mode only
+
+That leaves a real capability gap for operators managing large OpenAI account sets.
+
+## User Experience
+
+### Entry point
+
+Use one compact `Bulk edit` dropdown button in the table-level bulk actions area above the grid.
+
+The dropdown contains:
+
+- `Bulk edit selected accounts`
+- `Bulk edit current filtered results`
+
+Behavior:
+
+- If there is no row selection, the `selected accounts` action is disabled.
+- `Current filtered results` is always available.
+- The existing separate immediate `Edit` action in the selected-row bar is replaced by this unified dropdown to avoid duplicate buttons that mean different scopes.
+
+### Modal scope messaging
+
+The bulk edit modal gets a required scope descriptor prop.
+
+For `selected accounts`:
+
+- show the existing count-based info banner
+- keep using explicit selected account metadata for platform/type compatibility checks
+
+For `current filtered results`:
+
+- show a banner stating that edits apply to the current filtered result set
+- show the matched account count from a preview query
+- show a short summary of active filters when practical, especially group/search/platform/type/status filters
+
+### Safety
+
+For filtered-result mode:
+
+- disable submit if the preview count is `0`
+- refresh the target count when the modal opens
+- keep the final success toast count aligned with the backend result
+
+The modal should not silently fall back from filtered mode to selected mode.
+
+## Backend/API Design
+
+### Request model
+
+Extend bulk update to support two target modes:
+
+- explicit IDs
+- filter-based query
+
+The request shape should keep backward compatibility for the selected-ID path while allowing a filter target. The backend handler can accept a payload that contains either:
+
+- `account_ids`
+- or `filters`
+
+but not neither.
+
+The `filters` payload should reuse the existing account-list query semantics already used by `/admin/accounts` and `/admin/accounts/data`, including:
+
+- `search`
+- `platform`
+- `type`
+- `status`
+- `privacy_mode`
+- `group`
+- existing sort fields may be ignored for mutation targeting if not needed
+
+### Preview count
+
+The frontend needs an accurate target count before submit in filtered-result mode. The simplest compatible approach is:
+
+- call the existing account list endpoint with the current filters and a minimal page size strategy sufficient to obtain total count
+
+If the current API makes that awkward, add a narrow preview/count helper for bulk edit target resolution. Prefer reusing the existing listing contract first.
+
+### Target resolution
+
+For filtered-result mode, the backend must resolve matching account IDs server-side from the submitted filters rather than trusting only currently loaded page data. This is required so filtered-result mode can act on the full result set across pagination.
+
+### Compatibility metadata
+
+The frontend still needs platform/type compatibility to determine which fields to show. For filtered-result mode, derive this from the preview result set returned from the same query used to show count. If the preview spans mixed incompatible account types, show the same warnings/conditional UI that selected mode already uses.
+
+## Frontend Design
+
+### Accounts view
+
+`AccountsView.vue` will:
+
+- replace the direct selected-only bulk edit trigger with a dropdown action model
+- keep a reactive description of the pending bulk edit scope
+- pass either selected IDs or current filter params into the modal
+
+The "current filtered results" action uses the live `params` object snapshot at open time, not a mutable live subscription while the modal is already open.
+
+### Bulk edit modal
+
+`BulkEditAccountModal.vue` will accept a richer target contract, for example:
+
+- target mode
+- selected IDs or filter snapshot
+- preview count
+- preview platform/type coverage if needed
+
+The modal remains one form; only the scope banner and submission target differ.
+
+### OpenAI field alignment
+
+Add the missing OpenAI controls to bulk edit:
+
+- OAuth `codex_cli_only`
+- API key WS mode selector
+
+Rules:
+
+- OAuth accounts show OAuth WS mode and `codex_cli_only`
+- API key accounts show API key WS mode
+- mixed OpenAI OAuth/API key selections continue to show only fields that are safe for the entire target set
+
+The payload builder must write:
+
+- `extra.codex_cli_only`
+- `extra.openai_apikey_responses_websockets_v2_mode`
+- `extra.openai_apikey_responses_websockets_v2_enabled`
+
+with the same enable/disable semantics already used by single-account forms.
+
+## Testing Strategy
+
+### Frontend tests
+
+Add or extend tests for:
+
+- bulk edit dropdown actions in the accounts view
+- selected-account mode still calling bulk update by IDs
+- filtered-result mode calling bulk update with filter target
+- filtered-result mode showing preview count and blocking submit on zero matches
+- OAuth bulk edit supporting `codex_cli_only`
+- API key bulk edit supporting API key WS mode
+- no regression for existing passthrough and OAuth WS mode tests
+
+### Backend tests
+
+Add or extend tests for:
+
+- bulk update request validation for IDs vs filters
+- filtered-result mode resolving all matching accounts across pagination semantics
+- mixed-channel risk checks still running for filter-target updates if applicable
+- backward compatibility for the existing selected-ID request path
+
+## Risks
+
+- Filter semantics can drift if bulk edit reimplements list-filter parsing differently from the listing endpoints.
+- Filtered-result mode can surprise users if the active scope is not shown clearly enough.
+- Large filtered updates may affect many rows; success/error messaging must stay explicit.
+
+## Recommendation
+
+Implement this as a targeted extension of the existing bulk edit flow:
+
+- unify the entry point in the table action area
+- add filter-target bulk update support
+- align the missing OpenAI compact-related fields
+
+This keeps the mental model simple and solves the large-account-management pain without introducing a second parallel batch-edit system.

From 54de4e008cb3e33d54242473ba19618b1d9fb575 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 17:26:57 +0800
Subject: [PATCH 14/46] docs: add account bulk edit implementation plan

---
 ...-27-account-bulk-edit-scope-and-compact.md | 359 ++++++++++++++++++
 1 file changed, 359 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md

diff --git a/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md b/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md
new file mode 100644
index 00000000..42b76664
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md
@@ -0,0 +1,359 @@
+# Account Bulk Edit Scope And Compact Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add filter-result bulk edit to admin accounts, unify the table-level bulk-edit entry, and align OpenAI bulk-edit controls with the existing compact-related single-account settings.
+
+**Architecture:** Extend the existing `/admin/accounts/bulk-update` flow to accept either explicit account IDs or a server-resolved filter target. Reuse the current account-list filter contract for scope resolution, then update the accounts view and bulk-edit modal so the UI can launch either selected-account edits or current-filter-result edits from one compact dropdown. Keep the existing bulk-edit form, but expand its target contract and OpenAI-specific field coverage.
+
+**Tech Stack:** Vue 3, TypeScript, Vitest, Gin, Go service/repository layer, existing admin accounts API.
+
+---
+
+### Task 1: Add backend test coverage for filter-target bulk update
+
+**Files:**
+- Modify: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
+- Modify: `backend/internal/service/admin_service_bulk_update_test.go`
+- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
+- Test: `backend/internal/service/admin_service_bulk_update_test.go`
+
+- [ ] **Step 1: Write the failing handler test for filter-target request acceptance**
+
+```go
+func TestBulkUpdateAcceptsFilterTargetRequest(t *testing.T) {
+	// add a request body that omits account_ids and submits filters instead
+	// assert the route does not reject the request as malformed once service stubs are wired
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin -run TestBulkUpdateAcceptsFilterTargetRequest -count=1`
+Expected: FAIL because `BulkUpdateAccountsRequest` does not yet support `filters`.
+
+- [ ] **Step 3: Write the failing service test for resolving IDs from filters**
+
+```go
+func TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters(t *testing.T) {
+	// construct BulkUpdateAccountsInput with Filters and no AccountIDs
+	// stub repository list/search path to return matching IDs
+	// assert BulkUpdate is called with all matching account IDs
+}
+```
+
+- [ ] **Step 4: Run test to verify it fails**
+
+Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/service -run TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters -count=1`
+Expected: FAIL because `BulkUpdateAccountsInput` and service logic only use explicit `AccountIDs`.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
+git commit -m "test: cover filter-target account bulk update"
+```
+
+### Task 2: Implement backend filter-target bulk update
+
+**Files:**
+- Modify: `backend/internal/handler/admin/account_handler.go`
+- Modify: `backend/internal/service/admin_service.go`
+- Modify: `backend/internal/repository/account_repo.go`
+- Modify: `backend/internal/service/account_service.go`
+- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
+- Test: `backend/internal/service/admin_service_bulk_update_test.go`
+
+- [ ] **Step 1: Implement request structs and validation for filter targets**
+
+```go
+type BulkUpdateAccountFilters struct {
+	Platform    string `json:"platform"`
+	Type        string `json:"type"`
+	Status      string `json:"status"`
+	Group       string `json:"group"`
+	Search      string `json:"search"`
+	PrivacyMode string `json:"privacy_mode"`
+}
+
+type BulkUpdateAccountsRequest struct {
+	AccountIDs []int64                  `json:"account_ids"`
+	Filters    *BulkUpdateAccountFilters `json:"filters"`
+	// existing fields remain unchanged
+}
+```
+
+- [ ] **Step 2: Resolve filter targets in the service layer with one canonical path**
+
+```go
+type BulkUpdateAccountsInput struct {
+	AccountIDs []int64
+	Filters    *BulkUpdateAccountFilters
+	// existing fields remain unchanged
+}
+
+if len(input.AccountIDs) == 0 && input.Filters != nil {
+	ids, err := s.resolveBulkUpdateTargetIDs(ctx, input.Filters)
+	if err != nil {
+		return nil, err
+	}
+	input.AccountIDs = ids
+}
+```
+
+- [ ] **Step 3: Reuse existing account-search/repository logic to resolve all matching IDs**
+
+```go
+func (s *AdminService) resolveBulkUpdateTargetIDs(ctx context.Context, filters *BulkUpdateAccountFilters) ([]int64, error) {
+	// call the existing repository list/search path with the submitted filters
+	// page through all matching rows or use a dedicated ID-only query helper
+	// return unique IDs in stable order
+}
+```
+
+- [ ] **Step 4: Run targeted backend tests**
+
+Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin ./backend/internal/service -run 'TestBulkUpdateAcceptsFilterTargetRequest|TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters' -count=1`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/internal/handler/admin/account_handler.go backend/internal/service/admin_service.go backend/internal/repository/account_repo.go backend/internal/service/account_service.go backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
+git commit -m "feat: support filter-target account bulk update"
+```
+
+### Task 3: Add frontend API and modal tests for target scope
+
+**Files:**
+- Modify: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+- Create: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+- Modify: `frontend/src/api/admin/accounts.ts`
+- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+
+- [ ] **Step 1: Write the failing modal test for filter-target payload submission**
+
+```ts
+it('submits bulk edit using current filters when target mode is filtered-results', async () => {
+  // mount BulkEditAccountModal with targetMode='filtered'
+  // submit a minimal change
+  // expect adminAPI.accounts.bulkUpdate to receive { filters: ... } rather than account_ids
+})
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "filtered-results"`
+Expected: FAIL because the modal only accepts `accountIds`.
+
+- [ ] **Step 3: Write the failing accounts-view test for dropdown launch actions**
+
+```ts
+it('opens bulk edit for current filtered results from the table action dropdown', async () => {
+  // mount AccountsView with filters set
+  // click Bulk edit > current filtered results
+  // assert modal props contain filter target metadata
+})
+```
+
+- [ ] **Step 4: Run test to verify it fails**
+
+Run: `pnpm -C frontend test:run src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+Expected: FAIL because the dropdown action and target scope state do not exist yet.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts frontend/src/api/admin/accounts.ts
+git commit -m "test: cover account bulk edit target scopes"
+```
+
+### Task 4: Implement unified frontend bulk-edit target scope flow
+
+**Files:**
+- Modify: `frontend/src/views/admin/AccountsView.vue`
+- Modify: `frontend/src/components/admin/account/AccountBulkActionsBar.vue`
+- Modify: `frontend/src/components/account/BulkEditAccountModal.vue`
+- Modify: `frontend/src/api/admin/accounts.ts`
+- Modify: `frontend/src/i18n/locales/zh.ts`
+- Modify: `frontend/src/i18n/locales/en.ts`
+- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+
+- [ ] **Step 1: Add a typed frontend target contract for bulk edit**
+
+```ts
+export type AccountBulkEditTarget =
+  | { mode: 'selected'; accountIds: number[]; selectedPlatforms: AccountPlatform[]; selectedTypes: AccountType[] }
+  | { mode: 'filtered'; filters: AccountListFilters; previewCount: number; selectedPlatforms: AccountPlatform[]; selectedTypes: AccountType[] }
+```
+
+- [ ] **Step 2: Replace the single selected-row edit button with one dropdown**
+
+```vue
+<BulkEditDropdown
+  :has-selection="selectedIds.length > 0"
+  @edit-selected="openBulkEditSelected"
+  @edit-filtered="openBulkEditFiltered"
+/>
+```
+
+- [ ] **Step 3: Snapshot current filters and preview count when launching filtered mode**
+
+```ts
+const openBulkEditFiltered = async () => {
+  const filters = toBulkEditFilterSnapshot(params)
+  const preview = await adminAPI.accounts.list(1, 1, filters)
+  bulkEditTarget.value = {
+    mode: 'filtered',
+    filters,
+    previewCount: preview.pagination.total,
+    selectedPlatforms: collectPlatforms(preview.data),
+    selectedTypes: collectTypes(preview.data)
+  }
+  showBulkEdit.value = true
+}
+```
+
+- [ ] **Step 4: Update modal submission to call `bulkUpdate` with either `account_ids` or `filters`**
+
+```ts
+if (props.target.mode === 'selected') {
+  await adminAPI.accounts.bulkUpdate({ account_ids: props.target.accountIds, ...updates })
+} else {
+  await adminAPI.accounts.bulkUpdate({ filters: props.target.filters, ...updates })
+}
+```
+
+- [ ] **Step 5: Run targeted frontend tests**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+Expected: PASS
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add frontend/src/views/admin/AccountsView.vue frontend/src/components/admin/account/AccountBulkActionsBar.vue frontend/src/components/account/BulkEditAccountModal.vue frontend/src/api/admin/accounts.ts frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts
+git commit -m "feat: add filtered-result account bulk edit"
+```
+
+### Task 5: Add failing tests for missing OpenAI bulk-edit fields
+
+**Files:**
+- Modify: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+
+- [ ] **Step 1: Write the failing OAuth test for `codex_cli_only`**
+
+```ts
+it('OpenAI OAuth bulk edit can submit codex_cli_only', async () => {
+  // enable the toggle and submit
+  // expect extra.codex_cli_only to be sent
+})
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "codex_cli_only"`
+Expected: FAIL because the modal has no such control or payload mapping.
+
+- [ ] **Step 3: Write the failing API key test for API key WS mode**
+
+```ts
+it('OpenAI API key bulk edit submits API key WS mode fields', async () => {
+  // enable the API key WS mode selector and submit
+  // expect openai_apikey_responses_websockets_v2_mode and enabled flag
+})
+```
+
+- [ ] **Step 4: Run test to verify it fails**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "API key WS mode"`
+Expected: FAIL because the modal only submits OAuth WS mode.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
+git commit -m "test: cover missing OpenAI bulk edit fields"
+```
+
+### Task 6: Implement missing OpenAI bulk-edit controls and payload wiring
+
+**Files:**
+- Modify: `frontend/src/components/account/BulkEditAccountModal.vue`
+- Modify: `frontend/src/i18n/locales/zh.ts`
+- Modify: `frontend/src/i18n/locales/en.ts`
+- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+
+- [ ] **Step 1: Add UI controls for OAuth `codex_cli_only` and API key WS mode**
+
+```vue
+<div v-if="allOpenAIOAuth">
+  <!-- existing OAuth WS mode -->
+  <!-- add codex_cli_only toggle -->
+</div>
+
+<div v-if="allOpenAIAPIKey">
+  <!-- add API key WS mode selector -->
+</div>
+```
+
+- [ ] **Step 2: Mirror single-account payload semantics in the bulk-edit submit builder**
+
+```ts
+if (enableCodexCLIOnly.value) {
+  const extra = ensureExtra()
+  extra.codex_cli_only = codexCLIOnlyEnabled.value
+}
+
+if (enableOpenAIAPIKeyWSMode.value) {
+  const extra = ensureExtra()
+  extra.openai_apikey_responses_websockets_v2_mode = openaiAPIKeyResponsesWebSocketV2Mode.value
+  extra.openai_apikey_responses_websockets_v2_enabled = isOpenAIWSModeEnabled(openaiAPIKeyResponsesWebSocketV2Mode.value)
+}
+```
+
+- [ ] **Step 3: Run focused modal tests**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+Expected: PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add frontend/src/components/account/BulkEditAccountModal.vue frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
+git commit -m "feat: align OpenAI bulk edit compact settings"
+```
+
+### Task 7: Final regression verification
+
+**Files:**
+- Modify: none expected
+- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
+- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
+- Test: `backend/internal/service/admin_service_bulk_update_test.go`
+
+- [ ] **Step 1: Run frontend typecheck**
+
+Run: `pnpm -C frontend typecheck`
+Expected: PASS
+
+- [ ] **Step 2: Run focused frontend test suite**
+
+Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
+Expected: PASS
+
+- [ ] **Step 3: Run focused backend test suite**
+
+Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin ./backend/internal/service -run 'BulkUpdate|bulk update' -count=1`
+Expected: PASS
+
+- [ ] **Step 4: Commit final integration fixes if needed**
+
+```bash
+git add frontend/src/components/account/BulkEditAccountModal.vue frontend/src/views/admin/AccountsView.vue frontend/src/components/admin/account/AccountBulkActionsBar.vue frontend/src/api/admin/accounts.ts frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts backend/internal/handler/admin/account_handler.go backend/internal/service/admin_service.go backend/internal/repository/account_repo.go backend/internal/service/account_service.go frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
+git commit -m "feat: finish account bulk edit scope and compact support"
+```

From f422ac6dccf27a2310d510be1fc4c8b8a7a1e78a Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 17:32:34 +0800
Subject: [PATCH 15/46] test: cover filter-target account bulk update

---
 .../account_handler_mixed_channel_test.go     | 26 +++++++
 .../service/admin_service_bulk_update_test.go | 76 +++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/backend/internal/handler/admin/account_handler_mixed_channel_test.go b/backend/internal/handler/admin/account_handler_mixed_channel_test.go
index 24ec5bcf..929dc240 100644
--- a/backend/internal/handler/admin/account_handler_mixed_channel_test.go
+++ b/backend/internal/handler/admin/account_handler_mixed_channel_test.go
@@ -196,3 +196,29 @@ func TestAccountHandlerBulkUpdateMixedChannelConfirmSkips(t *testing.T) {
 	require.Equal(t, float64(2), data["success"])
 	require.Equal(t, float64(0), data["failed"])
 }
+
+func TestBulkUpdateAcceptsFilterTargetRequest(t *testing.T) {
+	adminSvc := newStubAdminService()
+	router := setupAccountMixedChannelRouter(adminSvc)
+
+	body, _ := json.Marshal(map[string]any{
+		"filters": map[string]any{
+			"platform":     "openai",
+			"type":         "oauth",
+			"status":       "active",
+			"group":        "12",
+			"privacy_mode": "blocked",
+			"search":       "bulk-target",
+		},
+		"schedulable": true,
+	})
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/admin/accounts/bulk-update", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	router.ServeHTTP(rec, req)
+
+	require.Equal(t, http.StatusOK, rec.Code)
+	var resp map[string]any
+	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
+	require.Equal(t, float64(0), resp["code"])
+}
diff --git a/backend/internal/service/admin_service_bulk_update_test.go b/backend/internal/service/admin_service_bulk_update_test.go
index 4845d87c..df415295 100644
--- a/backend/internal/service/admin_service_bulk_update_test.go
+++ b/backend/internal/service/admin_service_bulk_update_test.go
@@ -5,8 +5,10 @@ package service
 import (
 	"context"
 	"errors"
+	"reflect"
 	"testing"
 
+	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 	"github.com/stretchr/testify/require"
 )
 
@@ -25,6 +27,19 @@ type accountRepoStubForBulkUpdate struct {
 	getByIDCalled    []int64
 	listByGroupData  map[int64][]Account
 	listByGroupErr   map[int64]error
+	listData         []Account
+	listResult       *pagination.PaginationResult
+	listErr          error
+	listCalled       bool
+	lastListParams   pagination.PaginationParams
+	lastListFilters  struct {
+		platform    string
+		accountType string
+		status      string
+		search      string
+		groupID     int64
+		privacyMode string
+	}
 }
 
 func (s *accountRepoStubForBulkUpdate) BulkUpdate(_ context.Context, ids []int64, _ AccountBulkUpdate) (int64, error) {
@@ -73,6 +88,24 @@ func (s *accountRepoStubForBulkUpdate) ListByGroup(_ context.Context, groupID in
 	return nil, nil
 }
 
+func (s *accountRepoStubForBulkUpdate) ListWithFilters(_ context.Context, params pagination.PaginationParams, platform, accountType, status, search string, groupID int64, privacyMode string) ([]Account, *pagination.PaginationResult, error) {
+	s.listCalled = true
+	s.lastListParams = params
+	s.lastListFilters.platform = platform
+	s.lastListFilters.accountType = accountType
+	s.lastListFilters.status = status
+	s.lastListFilters.search = search
+	s.lastListFilters.groupID = groupID
+	s.lastListFilters.privacyMode = privacyMode
+	if s.listErr != nil {
+		return nil, nil, s.listErr
+	}
+	if s.listResult != nil {
+		return s.listData, s.listResult, nil
+	}
+	return s.listData, &pagination.PaginationResult{Total: int64(len(s.listData))}, nil
+}
+
 // TestAdminService_BulkUpdateAccounts_AllSuccessIDs 验证批量更新成功时返回 success_ids/failed_ids。
 func TestAdminService_BulkUpdateAccounts_AllSuccessIDs(t *testing.T) {
 	repo := &accountRepoStubForBulkUpdate{}
@@ -170,3 +203,46 @@ func TestAdminService_BulkUpdateAccounts_MixedChannelPreCheckBlocksOnExistingCon
 	// No BindGroups should have been called since the check runs before any write.
 	require.Empty(t, repo.bindGroupsCalls)
 }
+
+func TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters(t *testing.T) {
+	repo := &accountRepoStubForBulkUpdate{
+		listData: []Account{
+			{ID: 7},
+			{ID: 11},
+		},
+		listResult: &pagination.PaginationResult{Total: 2},
+	}
+	svc := &adminServiceImpl{accountRepo: repo}
+
+	schedulable := true
+	input := &BulkUpdateAccountsInput{
+		Schedulable: &schedulable,
+	}
+
+	filtersField := reflect.ValueOf(input).Elem().FieldByName("Filters")
+	require.True(t, filtersField.IsValid(), "BulkUpdateAccountsInput should expose Filters for filter-target bulk update")
+	require.Equal(t, reflect.Ptr, filtersField.Kind(), "BulkUpdateAccountsInput.Filters should be a pointer field")
+
+	filtersValue := reflect.New(filtersField.Type().Elem())
+	filtersValue.Elem().FieldByName("Platform").SetString(PlatformOpenAI)
+	filtersValue.Elem().FieldByName("Type").SetString(AccountTypeOAuth)
+	filtersValue.Elem().FieldByName("Status").SetString(StatusActive)
+	filtersValue.Elem().FieldByName("Group").SetString("12")
+	filtersValue.Elem().FieldByName("PrivacyMode").SetString(PrivacyModeCFBlocked)
+	filtersValue.Elem().FieldByName("Search").SetString("bulk-target")
+	filtersField.Set(filtersValue)
+
+	result, err := svc.BulkUpdateAccounts(context.Background(), input)
+	require.NoError(t, err)
+	require.True(t, repo.listCalled, "expected filter-target bulk update to resolve matching IDs via account list filters")
+	require.Equal(t, PlatformOpenAI, repo.lastListFilters.platform)
+	require.Equal(t, AccountTypeOAuth, repo.lastListFilters.accountType)
+	require.Equal(t, StatusActive, repo.lastListFilters.status)
+	require.Equal(t, "bulk-target", repo.lastListFilters.search)
+	require.Equal(t, int64(12), repo.lastListFilters.groupID)
+	require.Equal(t, PrivacyModeCFBlocked, repo.lastListFilters.privacyMode)
+	require.Equal(t, []int64{7, 11}, repo.bulkUpdateIDs)
+	require.Equal(t, 2, result.Success)
+	require.Equal(t, 0, result.Failed)
+	require.Equal(t, []int64{7, 11}, result.SuccessIDs)
+}

From 25c7b0d9f40e2609683702ebe4ac2a04d392a225 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 17:59:49 +0800
Subject: [PATCH 16/46] feat: support filter-target account bulk update

---
 .../internal/handler/admin/account_handler.go | 31 ++++++++-
 backend/internal/service/admin_service.go     | 68 +++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/backend/internal/handler/admin/account_handler.go b/backend/internal/handler/admin/account_handler.go
index 7454451a..3c97c753 100644
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -134,7 +134,8 @@ type UpdateAccountRequest struct {
 
 // BulkUpdateAccountsRequest represents the payload for bulk editing accounts
 type BulkUpdateAccountsRequest struct {
-	AccountIDs              []int64        `json:"account_ids" binding:"required,min=1"`
+	AccountIDs              []int64        `json:"account_ids"`
+	Filters                 *BulkUpdateAccountFilters `json:"filters"`
 	Name                    string         `json:"name"`
 	ProxyID                 *int64         `json:"proxy_id"`
 	Concurrency             *int           `json:"concurrency"`
@@ -149,6 +150,15 @@ type BulkUpdateAccountsRequest struct {
 	ConfirmMixedChannelRisk *bool          `json:"confirm_mixed_channel_risk"` // 用户确认混合渠道风险
 }
 
+type BulkUpdateAccountFilters struct {
+	Platform    string `json:"platform"`
+	Type        string `json:"type"`
+	Status      string `json:"status"`
+	Group       string `json:"group"`
+	Search      string `json:"search"`
+	PrivacyMode string `json:"privacy_mode"`
+}
+
 // CheckMixedChannelRequest represents check mixed channel risk request
 type CheckMixedChannelRequest struct {
 	Platform  string  `json:"platform" binding:"required"`
@@ -1369,6 +1379,10 @@ func (h *AccountHandler) BulkUpdate(c *gin.Context) {
 		response.BadRequest(c, "rate_multiplier must be >= 0")
 		return
 	}
+	if len(req.AccountIDs) == 0 && req.Filters == nil {
+		response.BadRequest(c, "account_ids or filters is required")
+		return
+	}
 	// base_rpm 输入校验：负值归零，超过 10000 截断
 	sanitizeExtraBaseRPM(req.Extra)
 
@@ -1394,6 +1408,7 @@ func (h *AccountHandler) BulkUpdate(c *gin.Context) {
 
 	result, err := h.adminService.BulkUpdateAccounts(c.Request.Context(), &service.BulkUpdateAccountsInput{
 		AccountIDs:            req.AccountIDs,
+		Filters:               toServiceBulkUpdateAccountFilters(req.Filters),
 		Name:                  req.Name,
 		ProxyID:               req.ProxyID,
 		Concurrency:           req.Concurrency,
@@ -1429,6 +1444,20 @@ func (h *AccountHandler) BulkUpdate(c *gin.Context) {
 	response.Success(c, result)
 }
 
+func toServiceBulkUpdateAccountFilters(filters *BulkUpdateAccountFilters) *service.BulkUpdateAccountFilters {
+	if filters == nil {
+		return nil
+	}
+	return &service.BulkUpdateAccountFilters{
+		Platform:    filters.Platform,
+		Type:        filters.Type,
+		Status:      filters.Status,
+		Group:       filters.Group,
+		Search:      filters.Search,
+		PrivacyMode: filters.PrivacyMode,
+	}
+}
+
 // ========== OAuth Handlers ==========
 
 // GenerateAuthURLRequest represents the request for generating auth URL
diff --git a/backend/internal/service/admin_service.go b/backend/internal/service/admin_service.go
index 434f1f38..86777dc9 100644
--- a/backend/internal/service/admin_service.go
+++ b/backend/internal/service/admin_service.go
@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"net/http"
 	"sort"
+	"strconv"
 	"strings"
 	"time"
 
@@ -291,6 +292,7 @@ type UpdateAccountInput struct {
 // BulkUpdateAccountsInput describes the payload for bulk updating accounts.
 type BulkUpdateAccountsInput struct {
 	AccountIDs     []int64
+	Filters        *BulkUpdateAccountFilters
 	Name           string
 	ProxyID        *int64
 	Concurrency    *int
@@ -307,6 +309,15 @@ type BulkUpdateAccountsInput struct {
 	SkipMixedChannelCheck bool
 }
 
+type BulkUpdateAccountFilters struct {
+	Platform    string
+	Type        string
+	Status      string
+	Group       string
+	Search      string
+	PrivacyMode string
+}
+
 // BulkUpdateAccountResult captures the result for a single account update.
 type BulkUpdateAccountResult struct {
 	AccountID int64  `json:"account_id"`
@@ -2286,6 +2297,14 @@ func (s *adminServiceImpl) UpdateAccount(ctx context.Context, id int64, input *U
 // BulkUpdateAccounts updates multiple accounts in one request.
 // It merges credentials/extra keys instead of overwriting the whole object.
 func (s *adminServiceImpl) BulkUpdateAccounts(ctx context.Context, input *BulkUpdateAccountsInput) (*BulkUpdateAccountsResult, error) {
+	if len(input.AccountIDs) == 0 && input.Filters != nil {
+		accountIDs, err := s.resolveBulkUpdateTargetIDs(ctx, input.Filters)
+		if err != nil {
+			return nil, err
+		}
+		input.AccountIDs = accountIDs
+	}
+
 	result := &BulkUpdateAccountsResult{
 		SuccessIDs: make([]int64, 0, len(input.AccountIDs)),
 		FailedIDs:  make([]int64, 0, len(input.AccountIDs)),
@@ -2401,6 +2420,55 @@ func (s *adminServiceImpl) BulkUpdateAccounts(ctx context.Context, input *BulkUp
 	return result, nil
 }
 
+func (s *adminServiceImpl) resolveBulkUpdateTargetIDs(ctx context.Context, filters *BulkUpdateAccountFilters) ([]int64, error) {
+	if filters == nil {
+		return nil, nil
+	}
+
+	groupID := int64(0)
+	switch strings.TrimSpace(filters.Group) {
+	case "":
+	case "ungrouped":
+		groupID = AccountListGroupUngrouped
+	default:
+		parsedGroupID, err := strconv.ParseInt(strings.TrimSpace(filters.Group), 10, 64)
+		if err != nil {
+			return nil, fmt.Errorf("invalid group filter: %w", err)
+		}
+		groupID = parsedGroupID
+	}
+
+	const pageSize = 500
+	page := 1
+	accountIDs := make([]int64, 0, pageSize)
+
+	for {
+		accounts, total, err := s.ListAccounts(
+			ctx,
+			page,
+			pageSize,
+			filters.Platform,
+			filters.Type,
+			filters.Status,
+			filters.Search,
+			groupID,
+			filters.PrivacyMode,
+			"",
+			"",
+		)
+		if err != nil {
+			return nil, err
+		}
+		for _, account := range accounts {
+			accountIDs = append(accountIDs, account.ID)
+		}
+		if int64(len(accountIDs)) >= total || len(accounts) == 0 {
+			return accountIDs, nil
+		}
+		page++
+	}
+}
+
 func (s *adminServiceImpl) DeleteAccount(ctx context.Context, id int64) error {
 	if err := s.accountRepo.Delete(ctx, id); err != nil {
 		return err

From 764afbe37a9115279fd68f67ef85e02fe35244ff Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:08:22 +0800
Subject: [PATCH 17/46] test: cover account bulk edit target scopes

---
 .../__tests__/BulkEditAccountModal.spec.ts    |  37 +++++
 .../__tests__/AccountsView.bulkEdit.spec.ts   | 152 ++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts

diff --git a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
index 7390e723..2e360978 100644
--- a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
+++ b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
@@ -217,4 +217,41 @@ describe('BulkEditAccountModal', () => {
     })
     expect(wrapper.text()).toContain('admin.accounts.openai.modelRestrictionDisabledByPassthrough')
   })
+
+  it('filtered-results 模式下应提交 filters 而不是 account_ids', async () => {
+    const wrapper = mountModal({
+      accountIds: [],
+      target: {
+        mode: 'filtered',
+        filters: {
+          platform: 'openai',
+          type: 'oauth',
+          status: 'active',
+          group: '12',
+          search: 'bulk-target',
+          privacy_mode: 'training_set_cf_blocked'
+        },
+        previewCount: 5,
+        selectedPlatforms: ['openai'],
+        selectedTypes: ['oauth']
+      }
+    })
+
+    await wrapper.get('#bulk-edit-status-enabled').setValue(true)
+    await wrapper.get('#bulk-edit-account-form').trigger('submit.prevent')
+    await flushPromises()
+
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledTimes(1)
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledWith({
+      filters: {
+        platform: 'openai',
+        type: 'oauth',
+        status: 'active',
+        group: '12',
+        search: 'bulk-target',
+        privacy_mode: 'training_set_cf_blocked'
+      },
+      status: 'active'
+    })
+  })
 })
diff --git a/frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts b/frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts
new file mode 100644
index 00000000..112baf22
--- /dev/null
+++ b/frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts
@@ -0,0 +1,152 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+import { flushPromises, mount } from '@vue/test-utils'
+
+import AccountsView from '../AccountsView.vue'
+
+const {
+  listAccounts,
+  listWithEtag,
+  getBatchTodayStats,
+  getAllProxies,
+  getAllGroups
+} = vi.hoisted(() => ({
+  listAccounts: vi.fn(),
+  listWithEtag: vi.fn(),
+  getBatchTodayStats: vi.fn(),
+  getAllProxies: vi.fn(),
+  getAllGroups: vi.fn()
+}))
+
+vi.mock('@/api/admin', () => ({
+  adminAPI: {
+    accounts: {
+      list: listAccounts,
+      listWithEtag,
+      getBatchTodayStats,
+      delete: vi.fn(),
+      batchClearError: vi.fn(),
+      batchRefresh: vi.fn(),
+      toggleSchedulable: vi.fn()
+    },
+    proxies: {
+      getAll: getAllProxies
+    },
+    groups: {
+      getAll: getAllGroups
+    }
+  }
+}))
+
+vi.mock('@/stores/app', () => ({
+  useAppStore: () => ({
+    showError: vi.fn(),
+    showSuccess: vi.fn(),
+    showInfo: vi.fn()
+  })
+}))
+
+vi.mock('@/stores/auth', () => ({
+  useAuthStore: () => ({
+    token: 'test-token'
+  })
+}))
+
+vi.mock('vue-i18n', async () => {
+  const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
+  return {
+    ...actual,
+    useI18n: () => ({
+      t: (key: string) => key
+    })
+  }
+})
+
+const DataTableStub = {
+  props: ['columns', 'data'],
+  template: '<div data-test="data-table"></div>'
+}
+
+const AccountBulkActionsBarStub = {
+  props: ['selectedIds'],
+  emits: ['edit-filtered'],
+  template: '<button data-test="edit-filtered" @click="$emit(\'edit-filtered\')">edit filtered</button>'
+}
+
+const BulkEditAccountModalStub = {
+  props: ['show', 'target'],
+  template: '<div data-test="bulk-edit-modal" :data-show="String(show)" :data-target-mode="target?.mode ?? \'\'"></div>'
+}
+
+describe('admin AccountsView bulk edit scope', () => {
+  beforeEach(() => {
+    localStorage.clear()
+
+    listAccounts.mockReset()
+    listWithEtag.mockReset()
+    getBatchTodayStats.mockReset()
+    getAllProxies.mockReset()
+    getAllGroups.mockReset()
+
+    listAccounts.mockResolvedValue({
+      items: [],
+      total: 0,
+      page: 1,
+      page_size: 20,
+      pages: 0
+    })
+    listWithEtag.mockResolvedValue({
+      notModified: true,
+      etag: null,
+      data: null
+    })
+    getBatchTodayStats.mockResolvedValue({ stats: {} })
+    getAllProxies.mockResolvedValue([])
+    getAllGroups.mockResolvedValue([])
+  })
+
+  it('opens bulk edit in filtered-results mode from the bulk actions dropdown', async () => {
+    const wrapper = mount(AccountsView, {
+      global: {
+        stubs: {
+          AppLayout: { template: '<div><slot /></div>' },
+          TablePageLayout: {
+            template: '<div><slot name="filters" /><slot name="table" /><slot name="pagination" /></div>'
+          },
+          DataTable: DataTableStub,
+          Pagination: true,
+          ConfirmDialog: true,
+          AccountTableActions: { template: '<div><slot name="beforeCreate" /><slot name="after" /></div>' },
+          AccountTableFilters: { template: '<div></div>' },
+          AccountBulkActionsBar: AccountBulkActionsBarStub,
+          AccountActionMenu: true,
+          ImportDataModal: true,
+          ReAuthAccountModal: true,
+          AccountTestModal: true,
+          AccountStatsModal: true,
+          ScheduledTestsPanel: true,
+          SyncFromCrsModal: true,
+          TempUnschedStatusModal: true,
+          ErrorPassthroughRulesModal: true,
+          TLSFingerprintProfilesModal: true,
+          CreateAccountModal: true,
+          EditAccountModal: true,
+          BulkEditAccountModal: BulkEditAccountModalStub,
+          PlatformTypeBadge: true,
+          AccountCapacityCell: true,
+          AccountStatusIndicator: true,
+          AccountTodayStatsCell: true,
+          AccountGroupsCell: true,
+          AccountUsageCell: true,
+          Icon: true
+        }
+      }
+    })
+
+    await flushPromises()
+    await wrapper.get('[data-test="edit-filtered"]').trigger('click')
+    await flushPromises()
+
+    expect(wrapper.get('[data-test="bulk-edit-modal"]').attributes('data-show')).toBe('true')
+    expect(wrapper.get('[data-test="bulk-edit-modal"]').attributes('data-target-mode')).toBe('filtered')
+  })
+})

From 2ab6b34fd1f980588582d961917bbcb4c64c4823 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:12:24 +0800
Subject: [PATCH 18/46] feat: add filtered-result account bulk edit

---
 frontend/src/api/admin/accounts.ts            | 15 +--
 .../account/BulkEditAccountModal.vue          | 69 ++++++++-----
 .../admin/account/AccountBulkActionsBar.vue   | 28 ++++--
 frontend/src/views/admin/AccountsView.vue     | 99 ++++++++++++++++++-
 4 files changed, 171 insertions(+), 40 deletions(-)

diff --git a/frontend/src/api/admin/accounts.ts b/frontend/src/api/admin/accounts.ts
index a146f1f7..8a127793 100644
--- a/frontend/src/api/admin/accounts.ts
+++ b/frontend/src/api/admin/accounts.ts
@@ -370,8 +370,8 @@ export async function batchUpdateCredentials(request: {
  * @returns Success confirmation
  */
 export async function bulkUpdate(
-  accountIds: number[],
-  updates: Record<string, unknown>
+  accountIdsOrPayload: number[] | Record<string, unknown>,
+  updates?: Record<string, unknown>
 ): Promise<{
   success: number
   failed: number
@@ -379,16 +379,19 @@ export async function bulkUpdate(
   failed_ids?: number[]
   results: Array<{ account_id: number; success: boolean; error?: string }>
   }> {
+  const payload = Array.isArray(accountIdsOrPayload)
+    ? {
+        account_ids: accountIdsOrPayload,
+        ...(updates ?? {})
+      }
+    : accountIdsOrPayload
   const { data } = await apiClient.post<{
     success: number
     failed: number
     success_ids?: number[]
     failed_ids?: number[]
     results: Array<{ account_id: number; success: boolean; error?: string }>
-  }>('/admin/accounts/bulk-update', {
-    account_ids: accountIds,
-    ...updates
-  })
+  }>('/admin/accounts/bulk-update', payload)
   return data
 }
 
diff --git a/frontend/src/components/account/BulkEditAccountModal.vue b/frontend/src/components/account/BulkEditAccountModal.vue
index 13c30cf9..b55456ff 100644
--- a/frontend/src/components/account/BulkEditAccountModal.vue
+++ b/frontend/src/components/account/BulkEditAccountModal.vue
@@ -17,7 +17,7 @@
               d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
             />
           </svg>
-          {{ t('admin.accounts.bulkEdit.selectionInfo', { count: accountIds.length }) }}
+          {{ t('admin.accounts.bulkEdit.selectionInfo', { count: targetMode === 'filtered' ? targetPreviewCount : accountIds.length }) }}
         </p>
       </div>
 
@@ -27,7 +27,7 @@
           <svg class="mr-1.5 inline h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
             <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
           </svg>
-          {{ t('admin.accounts.bulkEdit.mixedPlatformWarning', { platforms: selectedPlatforms.join(', ') }) }}
+          {{ t('admin.accounts.bulkEdit.mixedPlatformWarning', { platforms: targetSelectedPlatforms.join(', ') }) }}
         </p>
       </div>
 
@@ -227,7 +227,7 @@
 
               <ModelWhitelistSelector
                 v-model="allowedModels"
-                :platforms="selectedPlatforms"
+                :platforms="targetSelectedPlatforms"
               />
 
               <p class="text-xs text-gray-500 dark:text-gray-400">
@@ -933,6 +933,13 @@ interface Props {
   accountIds: number[]
   selectedPlatforms: AccountPlatform[]
   selectedTypes: AccountType[]
+  target?: {
+    mode: 'selected' | 'filtered'
+    filters?: Record<string, unknown>
+    previewCount?: number
+    selectedPlatforms?: AccountPlatform[]
+    selectedTypes?: AccountType[]
+  }
   proxies: ProxyConfig[]
   groups: AdminGroup[]
 }
@@ -947,40 +954,53 @@ const { t } = useI18n()
 const appStore = useAppStore()
 
 // Platform awareness
-const isMixedPlatform = computed(() => props.selectedPlatforms.length > 1)
+const targetMode = computed(() => props.target?.mode ?? 'selected')
+const targetPreviewCount = computed(() => props.target?.previewCount ?? props.accountIds.length)
+const targetSelectedPlatforms = computed(() => props.target?.selectedPlatforms ?? props.selectedPlatforms)
+const targetSelectedTypes = computed(() => props.target?.selectedTypes ?? props.selectedTypes)
+const isMixedPlatform = computed(() => targetSelectedPlatforms.value.length > 1)
 
 const allOpenAIPassthroughCapable = computed(() => {
   return (
-    props.selectedPlatforms.length === 1 &&
-    props.selectedPlatforms[0] === 'openai' &&
-    props.selectedTypes.length > 0 &&
-    props.selectedTypes.every(t => t === 'oauth' || t === 'apikey')
+    targetSelectedPlatforms.value.length === 1 &&
+    targetSelectedPlatforms.value[0] === 'openai' &&
+    targetSelectedTypes.value.length > 0 &&
+    targetSelectedTypes.value.every(t => t === 'oauth' || t === 'apikey')
   )
 })
 
 const allOpenAIOAuth = computed(() => {
   return (
-    props.selectedPlatforms.length === 1 &&
-    props.selectedPlatforms[0] === 'openai' &&
-    props.selectedTypes.length > 0 &&
-    props.selectedTypes.every(t => t === 'oauth')
+    targetSelectedPlatforms.value.length === 1 &&
+    targetSelectedPlatforms.value[0] === 'openai' &&
+    targetSelectedTypes.value.length > 0 &&
+    targetSelectedTypes.value.every(t => t === 'oauth')
+  )
+})
+
+const allOpenAIAPIKey = computed(() => {
+  return (
+    targetSelectedPlatforms.value.length === 1 &&
+    targetSelectedPlatforms.value[0] === 'openai' &&
+    targetSelectedTypes.value.length > 0 &&
+    targetSelectedTypes.value.every(t => t === 'apikey')
   )
 })
 
 // 是否全部为 Anthropic OAuth/SetupToken（RPM 配置仅在此条件下显示）
 const allAnthropicOAuthOrSetupToken = computed(() => {
   return (
-    props.selectedPlatforms.length === 1 &&
-    props.selectedPlatforms[0] === 'anthropic' &&
-    props.selectedTypes.every(t => t === 'oauth' || t === 'setup-token')
+    targetSelectedPlatforms.value.length === 1 &&
+    targetSelectedPlatforms.value[0] === 'anthropic' &&
+    targetSelectedTypes.value.every(t => t === 'oauth' || t === 'setup-token')
   )
 })
 
 const filteredPresets = computed(() => {
-  if (props.selectedPlatforms.length === 0) return []
+  if (targetSelectedPlatforms.value.length === 0) return []
 
   const dedupedPresets = new Map<string, ReturnType<typeof getPresetMappingsByPlatform>[number]>()
-  for (const platform of props.selectedPlatforms) {
+  for (const platform of targetSelectedPlatforms.value) {
     for (const preset of getPresetMappingsByPlatform(platform)) {
       const key = `${preset.from}=>${preset.to}`
       if (!dedupedPresets.has(key)) {
@@ -1291,8 +1311,8 @@ const mixedChannelConfirmed = ref(false)
 const canPreCheck = () =>
   enableGroups.value &&
   groupIds.value.length > 0 &&
-  props.selectedPlatforms.length === 1 &&
-  (props.selectedPlatforms[0] === 'antigravity' || props.selectedPlatforms[0] === 'anthropic')
+  targetSelectedPlatforms.value.length === 1 &&
+  (targetSelectedPlatforms.value[0] === 'antigravity' || targetSelectedPlatforms.value[0] === 'anthropic')
 
 const handleClose = () => {
   showMixedChannelWarning.value = false
@@ -1309,7 +1329,7 @@ const preCheckMixedChannelRisk = async (built: Record<string, unknown>): Promise
 
   try {
     const result = await adminAPI.accounts.checkMixedChannelRisk({
-      platform: props.selectedPlatforms[0],
+      platform: targetSelectedPlatforms.value[0],
       group_ids: groupIds.value
     })
     if (!result.has_risk) return true
@@ -1325,7 +1345,7 @@ const preCheckMixedChannelRisk = async (built: Record<string, unknown>): Promise
 }
 
 const handleSubmit = async () => {
-  if (props.accountIds.length === 0) {
+  if (targetMode.value === 'selected' && props.accountIds.length === 0) {
     appStore.showError(t('admin.accounts.bulkEdit.noSelection'))
     return
   }
@@ -1373,7 +1393,12 @@ const submitBulkUpdate = async (baseUpdates: Record<string, unknown>) => {
   submitting.value = true
 
   try {
-    const res = await adminAPI.accounts.bulkUpdate(props.accountIds, updates)
+    const res = targetMode.value === 'filtered' && props.target?.filters
+      ? await adminAPI.accounts.bulkUpdate({
+        filters: props.target.filters,
+        ...updates
+      })
+      : await adminAPI.accounts.bulkUpdate(props.accountIds, updates)
     const success = res.success || 0
     const failed = res.failed || 0
 
diff --git a/frontend/src/components/admin/account/AccountBulkActionsBar.vue b/frontend/src/components/admin/account/AccountBulkActionsBar.vue
index 3b987bd0..a632bdd4 100644
--- a/frontend/src/components/admin/account/AccountBulkActionsBar.vue
+++ b/frontend/src/components/admin/account/AccountBulkActionsBar.vue
@@ -1,9 +1,13 @@
 <template>
-  <div v-if="selectedIds.length > 0" class="mb-4 flex items-center justify-between p-3 bg-primary-50 rounded-lg dark:bg-primary-900/20">
+  <div class="mb-4 flex items-center justify-between rounded-lg bg-primary-50 p-3 dark:bg-primary-900/20">
     <div class="flex flex-wrap items-center gap-2">
-      <span class="text-sm font-medium text-primary-900 dark:text-primary-100">
+      <span v-if="selectedIds.length > 0" class="text-sm font-medium text-primary-900 dark:text-primary-100">
         {{ t('admin.accounts.bulkActions.selected', { count: selectedIds.length }) }}
       </span>
+      <span v-else class="text-sm font-medium text-primary-900 dark:text-primary-100">
+        {{ t('admin.accounts.bulkEdit.title') }}
+      </span>
+      <template v-if="selectedIds.length > 0">
       <button
         @click="$emit('select-page')"
         class="text-xs font-medium text-primary-700 hover:text-primary-800 dark:text-primary-300 dark:hover:text-primary-200"
@@ -17,19 +21,25 @@
       >
         {{ t('admin.accounts.bulkActions.clear') }}
       </button>
+      </template>
     </div>
     <div class="flex gap-2">
-      <button @click="$emit('delete')" class="btn btn-danger btn-sm">{{ t('admin.accounts.bulkActions.delete') }}</button>
-      <button @click="$emit('reset-status')" class="btn btn-secondary btn-sm">{{ t('admin.accounts.bulkActions.resetStatus') }}</button>
-      <button @click="$emit('refresh-token')" class="btn btn-secondary btn-sm">{{ t('admin.accounts.bulkActions.refreshToken') }}</button>
-      <button @click="$emit('toggle-schedulable', true)" class="btn btn-success btn-sm">{{ t('admin.accounts.bulkActions.enableScheduling') }}</button>
-      <button @click="$emit('toggle-schedulable', false)" class="btn btn-warning btn-sm">{{ t('admin.accounts.bulkActions.disableScheduling') }}</button>
-      <button @click="$emit('edit')" class="btn btn-primary btn-sm">{{ t('admin.accounts.bulkActions.edit') }}</button>
+      <template v-if="selectedIds.length > 0">
+        <button @click="$emit('delete')" class="btn btn-danger btn-sm">{{ t('admin.accounts.bulkActions.delete') }}</button>
+        <button @click="$emit('reset-status')" class="btn btn-secondary btn-sm">{{ t('admin.accounts.bulkActions.resetStatus') }}</button>
+        <button @click="$emit('refresh-token')" class="btn btn-secondary btn-sm">{{ t('admin.accounts.bulkActions.refreshToken') }}</button>
+        <button @click="$emit('toggle-schedulable', true)" class="btn btn-success btn-sm">{{ t('admin.accounts.bulkActions.enableScheduling') }}</button>
+        <button @click="$emit('toggle-schedulable', false)" class="btn btn-warning btn-sm">{{ t('admin.accounts.bulkActions.disableScheduling') }}</button>
+        <button @click="$emit('edit-selected')" class="btn btn-primary btn-sm">{{ t('admin.accounts.bulkActions.edit') }}</button>
+      </template>
+      <button @click="$emit('edit-filtered')" class="btn btn-primary btn-sm">
+        {{ t('admin.accounts.bulkEdit.submit') }}
+      </button>
     </div>
   </div>
 </template>
 
 <script setup lang="ts">
 import { useI18n } from 'vue-i18n'
-defineProps(['selectedIds']); defineEmits(['delete', 'edit', 'clear', 'select-page', 'toggle-schedulable', 'reset-status', 'refresh-token']); const { t } = useI18n()
+defineProps(['selectedIds']); defineEmits(['delete', 'edit-selected', 'edit-filtered', 'clear', 'select-page', 'toggle-schedulable', 'reset-status', 'refresh-token']); const { t } = useI18n()
 </script>
diff --git a/frontend/src/views/admin/AccountsView.vue b/frontend/src/views/admin/AccountsView.vue
index bc4c6215..2f061118 100644
--- a/frontend/src/views/admin/AccountsView.vue
+++ b/frontend/src/views/admin/AccountsView.vue
@@ -141,7 +141,17 @@
         </div>
       </template>
       <template #table>
-        <AccountBulkActionsBar :selected-ids="selIds" @delete="handleBulkDelete" @reset-status="handleBulkResetStatus" @refresh-token="handleBulkRefreshToken" @edit="showBulkEdit = true" @clear="clearSelection" @select-page="selectPage" @toggle-schedulable="handleBulkToggleSchedulable" />
+        <AccountBulkActionsBar
+          :selected-ids="selIds"
+          @delete="handleBulkDelete"
+          @reset-status="handleBulkResetStatus"
+          @refresh-token="handleBulkRefreshToken"
+          @edit-selected="openBulkEditSelected"
+          @edit-filtered="openBulkEditFiltered"
+          @clear="clearSelection"
+          @select-page="selectPage"
+          @toggle-schedulable="handleBulkToggleSchedulable"
+        />
         <div ref="accountTableRef" class="flex min-h-0 flex-1 flex-col overflow-hidden">
         <DataTable
           ref="dataTableRef"
@@ -303,7 +313,17 @@
     <AccountActionMenu :show="menu.show" :account="menu.acc" :position="menu.pos" @close="menu.show = false" @test="handleTest" @stats="handleViewStats" @schedule="handleSchedule" @reauth="handleReAuth" @refresh-token="handleRefresh" @recover-state="handleRecoverState" @reset-quota="handleResetQuota" @set-privacy="handleSetPrivacy" />
     <SyncFromCrsModal :show="showSync" @close="showSync = false" @synced="reload" />
     <ImportDataModal :show="showImportData" @close="showImportData = false" @imported="handleDataImported" />
-    <BulkEditAccountModal :show="showBulkEdit" :account-ids="selIds" :selected-platforms="selPlatforms" :selected-types="selTypes" :proxies="proxies" :groups="groups" @close="showBulkEdit = false" @updated="handleBulkUpdated" />
+    <BulkEditAccountModal
+      :show="showBulkEdit"
+      :account-ids="selIds"
+      :selected-platforms="selPlatforms"
+      :selected-types="selTypes"
+      :target="bulkEditTarget"
+      :proxies="proxies"
+      :groups="groups"
+      @close="showBulkEdit = false"
+      @updated="handleBulkUpdated"
+    />
     <TempUnschedStatusModal :show="showTempUnsched" :account="tempUnschedAcc" @close="showTempUnsched = false" @reset="handleTempUnschedReset" />
     <ConfirmDialog :show="showDeleteDialog" :title="t('admin.accounts.deleteAccount')" :message="t('admin.accounts.deleteConfirm', { name: deletingAcc?.name })" :confirm-text="t('common.delete')" :cancel-text="t('common.cancel')" :danger="true" @confirm="confirmDelete" @cancel="showDeleteDialog = false" />
     <ConfirmDialog :show="showExportDataDialog" :title="t('admin.accounts.dataExport')" :message="t('admin.accounts.dataExportConfirmMessage')" :confirm-text="t('admin.accounts.dataExportConfirm')" :cancel-text="t('common.cancel')" @confirm="handleExportData" @cancel="showExportDataDialog = false">
@@ -364,6 +384,29 @@ const proxies = ref<AccountProxy[]>([])
 const groups = ref<AdminGroup[]>([])
 const accountTableRef = ref<HTMLElement | null>(null)
 const dataTableRef = ref<InstanceType<typeof DataTable> | null>(null)
+type AccountBulkEditTarget =
+  | {
+      mode: 'selected'
+      accountIds: number[]
+      selectedPlatforms: AccountPlatform[]
+      selectedTypes: AccountType[]
+    }
+  | {
+      mode: 'filtered'
+      filters: {
+        platform?: string
+        type?: string
+        status?: string
+        group?: string
+        search?: string
+        privacy_mode?: string
+        sort_by?: string
+        sort_order?: AccountSortOrder
+      }
+      previewCount: number
+      selectedPlatforms: AccountPlatform[]
+      selectedTypes: AccountType[]
+    }
 const selPlatforms = computed<AccountPlatform[]>(() => {
   const platforms = new Set(
     accounts.value
@@ -387,6 +430,7 @@ const showImportData = ref(false)
 const showExportDataDialog = ref(false)
 const includeProxyOnExport = ref(true)
 const showBulkEdit = ref(false)
+const bulkEditTarget = ref<AccountBulkEditTarget | null>(null)
 const showTempUnsched = ref(false)
 const showDeleteDialog = ref(false)
 const showReAuth = ref(false)
@@ -1216,7 +1260,56 @@ const handleBulkToggleSchedulable = async (schedulable: boolean) => {
     appStore.showError(t('common.error'))
   }
 }
-const handleBulkUpdated = () => { showBulkEdit.value = false; clearSelection(); reload() }
+const buildBulkEditFilterSnapshot = () => {
+  const rawParams = toRaw(params) as Record<string, unknown>
+  return {
+    platform: typeof rawParams.platform === 'string' ? rawParams.platform : '',
+    type: typeof rawParams.type === 'string' ? rawParams.type : '',
+    status: typeof rawParams.status === 'string' ? rawParams.status : '',
+    group: typeof rawParams.group === 'string' ? rawParams.group : '',
+    search: typeof rawParams.search === 'string' ? rawParams.search : '',
+    privacy_mode: typeof rawParams.privacy_mode === 'string' ? rawParams.privacy_mode : '',
+    sort_by: typeof rawParams.sort_by === 'string' ? rawParams.sort_by : '',
+    sort_order: rawParams.sort_order === 'desc' ? 'desc' : 'asc'
+  }
+}
+
+const collectSelectionMetadata = (rows: Account[]) => {
+  const selectedPlatforms = Array.from(new Set(rows.map(account => account.platform)))
+  const selectedTypes = Array.from(new Set(rows.map(account => account.type)))
+  return { selectedPlatforms, selectedTypes }
+}
+
+const openBulkEditSelected = () => {
+  bulkEditTarget.value = {
+    mode: 'selected',
+    accountIds: [...selIds.value],
+    selectedPlatforms: [...selPlatforms.value],
+    selectedTypes: [...selTypes.value]
+  }
+  showBulkEdit.value = true
+}
+
+const openBulkEditFiltered = async () => {
+  const filters = buildBulkEditFilterSnapshot()
+  const preview = await adminAPI.accounts.list(1, 100, filters)
+  const { selectedPlatforms, selectedTypes } = collectSelectionMetadata(preview.items)
+  bulkEditTarget.value = {
+    mode: 'filtered',
+    filters,
+    previewCount: preview.total,
+    selectedPlatforms,
+    selectedTypes
+  }
+  showBulkEdit.value = true
+}
+
+const handleBulkUpdated = () => {
+  showBulkEdit.value = false
+  bulkEditTarget.value = null
+  clearSelection()
+  reload()
+}
 const handleDataImported = () => { showImportData.value = false; reload() }
 const ACCOUNT_UNGROUPED_GROUP_QUERY_VALUE = 'ungrouped'
 const ACCOUNT_PRIVACY_MODE_UNSET_QUERY_VALUE = '__unset__'

From c5a1a82223f35492281779f9078334e3a78b31f6 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:13:14 +0800
Subject: [PATCH 19/46] test: cover missing OpenAI bulk edit fields

---
 .../__tests__/BulkEditAccountModal.spec.ts    | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
index 2e360978..50d170da 100644
--- a/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
+++ b/frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
@@ -178,6 +178,45 @@ describe('BulkEditAccountModal', () => {
     expect(wrapper.find('#bulk-edit-openai-ws-mode-enabled').exists()).toBe(false)
   })
 
+  it('OpenAI OAuth 批量编辑应提交 codex_cli_only 字段', async () => {
+    const wrapper = mountModal({
+      selectedPlatforms: ['openai'],
+      selectedTypes: ['oauth']
+    })
+
+    await wrapper.get('#bulk-edit-openai-codex-cli-only-enabled').setValue(true)
+    await wrapper.get('#bulk-edit-openai-codex-cli-only-toggle').trigger('click')
+    await wrapper.get('#bulk-edit-account-form').trigger('submit.prevent')
+    await flushPromises()
+
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledTimes(1)
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledWith([1, 2], {
+      extra: {
+        codex_cli_only: true
+      }
+    })
+  })
+
+  it('OpenAI API Key 批量编辑应提交 API Key 专属 WS mode 字段', async () => {
+    const wrapper = mountModal({
+      selectedPlatforms: ['openai'],
+      selectedTypes: ['apikey']
+    })
+
+    await wrapper.get('#bulk-edit-openai-apikey-ws-mode-enabled').setValue(true)
+    await wrapper.get('[data-testid="bulk-edit-openai-apikey-ws-mode-select"]').setValue('ctx_pool')
+    await wrapper.get('#bulk-edit-account-form').trigger('submit.prevent')
+    await flushPromises()
+
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledTimes(1)
+    expect(adminAPI.accounts.bulkUpdate).toHaveBeenCalledWith([1, 2], {
+      extra: {
+        openai_apikey_responses_websockets_v2_mode: 'ctx_pool',
+        openai_apikey_responses_websockets_v2_enabled: true
+      }
+    })
+  })
+
   it('OpenAI 账号批量编辑可关闭自动透传', async () => {
     const wrapper = mountModal({
       selectedPlatforms: ['openai'],

From a161f9d045d463515f2363efed96611bbab9b041 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:15:23 +0800
Subject: [PATCH 20/46] feat: align OpenAI bulk edit compact settings

---
 .../account/BulkEditAccountModal.vue          | 107 ++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/frontend/src/components/account/BulkEditAccountModal.vue b/frontend/src/components/account/BulkEditAccountModal.vue
index b55456ff..05016a6d 100644
--- a/frontend/src/components/account/BulkEditAccountModal.vue
+++ b/frontend/src/components/account/BulkEditAccountModal.vue
@@ -698,6 +698,87 @@
         </div>
       </div>
 
+      <!-- OpenAI OAuth Codex CLI only -->
+      <div v-if="allOpenAIOAuth" class="border-t border-gray-200 pt-4 dark:border-dark-600">
+        <div class="mb-3 flex items-center justify-between">
+          <label
+            id="bulk-edit-openai-codex-cli-only-label"
+            class="input-label mb-0"
+            for="bulk-edit-openai-codex-cli-only-enabled"
+          >
+            {{ t('admin.accounts.openai.codexCLIOnly') }}
+          </label>
+          <input
+            v-model="enableCodexCLIOnly"
+            id="bulk-edit-openai-codex-cli-only-enabled"
+            type="checkbox"
+            aria-controls="bulk-edit-openai-codex-cli-only"
+            class="rounded border-gray-300 text-primary-600 focus:ring-primary-500"
+          />
+        </div>
+        <div
+          id="bulk-edit-openai-codex-cli-only"
+          :class="!enableCodexCLIOnly && 'pointer-events-none opacity-50'"
+        >
+          <p class="mb-3 text-xs text-gray-500 dark:text-gray-400">
+            {{ t('admin.accounts.openai.codexCLIOnlyDesc') }}
+          </p>
+          <button
+            id="bulk-edit-openai-codex-cli-only-toggle"
+            type="button"
+            :class="[
+              'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
+              codexCLIOnlyEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
+            ]"
+            @click="codexCLIOnlyEnabled = !codexCLIOnlyEnabled"
+          >
+            <span
+              :class="[
+                'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
+                codexCLIOnlyEnabled ? 'translate-x-5' : 'translate-x-0'
+              ]"
+            />
+          </button>
+        </div>
+      </div>
+
+      <!-- OpenAI API Key WS mode -->
+      <div v-if="allOpenAIAPIKey" class="border-t border-gray-200 pt-4 dark:border-dark-600">
+        <div class="mb-3 flex items-center justify-between">
+          <label
+            id="bulk-edit-openai-apikey-ws-mode-label"
+            class="input-label mb-0"
+            for="bulk-edit-openai-apikey-ws-mode-enabled"
+          >
+            {{ t('admin.accounts.openai.wsMode') }}
+          </label>
+          <input
+            v-model="enableOpenAIAPIKeyWSMode"
+            id="bulk-edit-openai-apikey-ws-mode-enabled"
+            type="checkbox"
+            aria-controls="bulk-edit-openai-apikey-ws-mode"
+            class="rounded border-gray-300 text-primary-600 focus:ring-primary-500"
+          />
+        </div>
+        <div
+          id="bulk-edit-openai-apikey-ws-mode"
+          :class="!enableOpenAIAPIKeyWSMode && 'pointer-events-none opacity-50'"
+        >
+          <p class="mb-3 text-xs text-gray-500 dark:text-gray-400">
+            {{ t('admin.accounts.openai.wsModeDesc') }}
+          </p>
+          <p class="mb-3 text-xs text-gray-500 dark:text-gray-400">
+            {{ t(openAIAPIKeyWSModeConcurrencyHintKey) }}
+          </p>
+          <Select
+            v-model="openaiAPIKeyResponsesWebSocketV2Mode"
+            data-testid="bulk-edit-openai-apikey-ws-mode-select"
+            :options="openAIWSModeOptions"
+            aria-labelledby="bulk-edit-openai-apikey-ws-mode-label"
+          />
+        </div>
+      </div>
+
       <!-- RPM Limit (仅全部为 Anthropic OAuth/SetupToken 时显示) -->
       <div v-if="allAnthropicOAuthOrSetupToken" class="border-t border-gray-200 pt-4 dark:border-dark-600">
         <div class="mb-3 flex items-center justify-between">
@@ -1032,6 +1113,8 @@ const enableStatus = ref(false)
 const enableGroups = ref(false)
 const enableOpenAIPassthrough = ref(false)
 const enableOpenAIWSMode = ref(false)
+const enableOpenAIAPIKeyWSMode = ref(false)
+const enableCodexCLIOnly = ref(false)
 const enableRpmLimit = ref(false)
 
 // State - field values
@@ -1055,6 +1138,8 @@ const status = ref<'active' | 'inactive'>('active')
 const groupIds = ref<number[]>([])
 const openaiPassthroughEnabled = ref(false)
 const openaiOAuthResponsesWebSocketV2Mode = ref<OpenAIWSMode>(OPENAI_WS_MODE_OFF)
+const openaiAPIKeyResponsesWebSocketV2Mode = ref<OpenAIWSMode>(OPENAI_WS_MODE_OFF)
+const codexCLIOnlyEnabled = ref(false)
 const rpmLimitEnabled = ref(false)
 const bulkBaseRpm = ref<number | null>(null)
 const bulkRpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
@@ -1096,6 +1181,9 @@ const openAIWSModeOptions = computed(() => [
 const openAIWSModeConcurrencyHintKey = computed(() =>
   resolveOpenAIWSModeConcurrencyHintKey(openaiOAuthResponsesWebSocketV2Mode.value)
 )
+const openAIAPIKeyWSModeConcurrencyHintKey = computed(() =>
+  resolveOpenAIWSModeConcurrencyHintKey(openaiAPIKeyResponsesWebSocketV2Mode.value)
+)
 
 // Model mapping helpers
 const addModelMapping = () => {
@@ -1274,6 +1362,19 @@ const buildUpdatePayload = (): Record<string, unknown> | null => {
     )
   }
 
+  if (enableOpenAIAPIKeyWSMode.value) {
+    const extra = ensureExtra()
+    extra.openai_apikey_responses_websockets_v2_mode = openaiAPIKeyResponsesWebSocketV2Mode.value
+    extra.openai_apikey_responses_websockets_v2_enabled = isOpenAIWSModeEnabled(
+      openaiAPIKeyResponsesWebSocketV2Mode.value
+    )
+  }
+
+  if (enableCodexCLIOnly.value) {
+    const extra = ensureExtra()
+    extra.codex_cli_only = codexCLIOnlyEnabled.value
+  }
+
   // RPM limit settings (写入 extra 字段)
   if (enableRpmLimit.value) {
     const extra = ensureExtra()
@@ -1364,6 +1465,8 @@ const handleSubmit = async () => {
     enableStatus.value ||
     enableGroups.value ||
     enableOpenAIWSMode.value ||
+    enableOpenAIAPIKeyWSMode.value ||
+    enableCodexCLIOnly.value ||
     enableRpmLimit.value ||
     userMsgQueueMode.value !== null
 
@@ -1462,6 +1565,8 @@ watch(
       enableGroups.value = false
       enableOpenAIPassthrough.value = false
       enableOpenAIWSMode.value = false
+      enableOpenAIAPIKeyWSMode.value = false
+      enableCodexCLIOnly.value = false
       enableRpmLimit.value = false
 
       // Reset all values
@@ -1481,6 +1586,8 @@ watch(
       status.value = 'active'
       groupIds.value = []
       openaiOAuthResponsesWebSocketV2Mode.value = OPENAI_WS_MODE_OFF
+      openaiAPIKeyResponsesWebSocketV2Mode.value = OPENAI_WS_MODE_OFF
+      codexCLIOnlyEnabled.value = false
       rpmLimitEnabled.value = false
       bulkBaseRpm.value = null
       bulkRpmStrategy.value = 'tiered'

From 53b24bc2d8cc6fd4a4c3ac04f00f43090e5a1c25 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:20:36 +0800
Subject: [PATCH 21/46] fix: tighten account bulk edit target typing

---
 frontend/src/views/admin/AccountsView.vue | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/frontend/src/views/admin/AccountsView.vue b/frontend/src/views/admin/AccountsView.vue
index 2f061118..126e4a61 100644
--- a/frontend/src/views/admin/AccountsView.vue
+++ b/frontend/src/views/admin/AccountsView.vue
@@ -318,7 +318,7 @@
       :account-ids="selIds"
       :selected-platforms="selPlatforms"
       :selected-types="selTypes"
-      :target="bulkEditTarget"
+      :target="bulkEditTarget ?? undefined"
       :proxies="proxies"
       :groups="groups"
       @close="showBulkEdit = false"
@@ -1262,6 +1262,7 @@ const handleBulkToggleSchedulable = async (schedulable: boolean) => {
 }
 const buildBulkEditFilterSnapshot = () => {
   const rawParams = toRaw(params) as Record<string, unknown>
+  const sortOrder: AccountSortOrder = rawParams.sort_order === 'desc' ? 'desc' : 'asc'
   return {
     platform: typeof rawParams.platform === 'string' ? rawParams.platform : '',
     type: typeof rawParams.type === 'string' ? rawParams.type : '',
@@ -1270,7 +1271,7 @@ const buildBulkEditFilterSnapshot = () => {
     search: typeof rawParams.search === 'string' ? rawParams.search : '',
     privacy_mode: typeof rawParams.privacy_mode === 'string' ? rawParams.privacy_mode : '',
     sort_by: typeof rawParams.sort_by === 'string' ? rawParams.sort_by : '',
-    sort_order: rawParams.sort_order === 'desc' ? 'desc' : 'asc'
+    sort_order: sortOrder
   }
 }
 

From 1eca03432a8b0a64f581122a55a73feedad9a194 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Mon, 27 Apr 2026 18:36:05 +0800
Subject: [PATCH 22/46] fix: format bulk update account request

---
 .../internal/handler/admin/account_handler.go | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/backend/internal/handler/admin/account_handler.go b/backend/internal/handler/admin/account_handler.go
index 3c97c753..02ff3a1e 100644
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -134,20 +134,20 @@ type UpdateAccountRequest struct {
 
 // BulkUpdateAccountsRequest represents the payload for bulk editing accounts
 type BulkUpdateAccountsRequest struct {
-	AccountIDs              []int64        `json:"account_ids"`
+	AccountIDs              []int64                   `json:"account_ids"`
 	Filters                 *BulkUpdateAccountFilters `json:"filters"`
-	Name                    string         `json:"name"`
-	ProxyID                 *int64         `json:"proxy_id"`
-	Concurrency             *int           `json:"concurrency"`
-	Priority                *int           `json:"priority"`
-	RateMultiplier          *float64       `json:"rate_multiplier"`
-	LoadFactor              *int           `json:"load_factor"`
-	Status                  string         `json:"status" binding:"omitempty,oneof=active inactive error"`
-	Schedulable             *bool          `json:"schedulable"`
-	GroupIDs                *[]int64       `json:"group_ids"`
-	Credentials             map[string]any `json:"credentials"`
-	Extra                   map[string]any `json:"extra"`
-	ConfirmMixedChannelRisk *bool          `json:"confirm_mixed_channel_risk"` // 用户确认混合渠道风险
+	Name                    string                    `json:"name"`
+	ProxyID                 *int64                    `json:"proxy_id"`
+	Concurrency             *int                      `json:"concurrency"`
+	Priority                *int                      `json:"priority"`
+	RateMultiplier          *float64                  `json:"rate_multiplier"`
+	LoadFactor              *int                      `json:"load_factor"`
+	Status                  string                    `json:"status" binding:"omitempty,oneof=active inactive error"`
+	Schedulable             *bool                     `json:"schedulable"`
+	GroupIDs                *[]int64                  `json:"group_ids"`
+	Credentials             map[string]any            `json:"credentials"`
+	Extra                   map[string]any            `json:"extra"`
+	ConfirmMixedChannelRisk *bool                     `json:"confirm_mixed_channel_risk"` // 用户确认混合渠道风险
 }
 
 type BulkUpdateAccountFilters struct {

From ca5d029e7cc6b3bc6d7dd7f3543da754634990c9 Mon Sep 17 00:00:00 2001
From: VitalyR <vr@vitalyr.com>
Date: Tue, 28 Apr 2026 04:53:29 +0800
Subject: [PATCH 23/46] fix(openai): honor versioned image base URLs

---
 .../internal/service/account_test_service.go  |   2 +-
 .../account_test_service_openai_image_test.go |  40 +++++
 .../internal/service/openai_images_test.go    | 138 ++++++++++++++++++
 3 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/backend/internal/service/account_test_service.go b/backend/internal/service/account_test_service.go
index c0bbc6dc..cb418550 100644
--- a/backend/internal/service/account_test_service.go
+++ b/backend/internal/service/account_test_service.go
@@ -1227,7 +1227,7 @@ func (s *AccountTestService) testOpenAIImageAPIKey(c *gin.Context, ctx context.C
 	if err != nil {
 		return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error()))
 	}
-	apiURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/v1/images/generations"
+	apiURL := buildOpenAIImagesURL(normalizedBaseURL, openAIImagesGenerationsEndpoint)
 
 	// Set SSE headers
 	c.Writer.Header().Set("Content-Type", "text/event-stream")
diff --git a/backend/internal/service/account_test_service_openai_image_test.go b/backend/internal/service/account_test_service_openai_image_test.go
index 80a2fc31..257159c4 100644
--- a/backend/internal/service/account_test_service_openai_image_test.go
+++ b/backend/internal/service/account_test_service_openai_image_test.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
 )
@@ -48,3 +49,42 @@ func TestAccountTestService_OpenAIImageOAuthHandlesOutputItemDoneFallback(t *tes
 	require.Contains(t, rec.Body.String(), "data:image/png;base64,aGVsbG8=")
 	require.Contains(t, rec.Body.String(), "\"success\":true")
 }
+
+func TestAccountTestService_OpenAIImageAPIKeyUsesConfiguredV1BaseURL(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/api/v1/admin/accounts/1/test", nil)
+
+	upstream := &httpUpstreamRecorder{
+		resp: &http.Response{
+			StatusCode: http.StatusOK,
+			Header: http.Header{
+				"Content-Type": []string{"application/json"},
+			},
+			Body: io.NopCloser(strings.NewReader(`{"data":[{"b64_json":"aGVsbG8=","revised_prompt":"draw a cat"}]}`)),
+		},
+	}
+	svc := &AccountTestService{
+		httpUpstream: upstream,
+		cfg:          &config.Config{},
+	}
+	account := &Account{
+		ID:       54,
+		Name:     "openai-apikey",
+		Platform: PlatformOpenAI,
+		Type:     AccountTypeAPIKey,
+		Credentials: map[string]any{
+			"api_key":  "test-api-key",
+			"base_url": "https://image-upstream.example/v1",
+		},
+	}
+
+	err := svc.testOpenAIImageAPIKey(c, context.Background(), account, "gpt-image-2", "draw a cat")
+	require.NoError(t, err)
+	require.NotNil(t, upstream.lastReq)
+	require.Equal(t, "https://image-upstream.example/v1/images/generations", upstream.lastReq.URL.String())
+	require.Equal(t, "Bearer test-api-key", upstream.lastReq.Header.Get("Authorization"))
+	require.Contains(t, rec.Body.String(), "data:image/png;base64,aGVsbG8=")
+	require.Contains(t, rec.Body.String(), "\"success\":true")
+}
diff --git a/backend/internal/service/openai_images_test.go b/backend/internal/service/openai_images_test.go
index 200547d4..47113d4d 100644
--- a/backend/internal/service/openai_images_test.go
+++ b/backend/internal/service/openai_images_test.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
 	"github.com/tidwall/gjson"
@@ -258,6 +259,25 @@ func TestAccountSupportsOpenAIImageCapability_OAuthSupportsNative(t *testing.T)
 	require.True(t, account.SupportsOpenAIImageCapability(OpenAIImagesCapabilityNative))
 }
 
+func TestBuildOpenAIImagesURL_HandlesVersionedBaseURL(t *testing.T) {
+	require.Equal(t,
+		"https://image-upstream.example/v1/images/generations",
+		buildOpenAIImagesURL("https://image-upstream.example/v1", openAIImagesGenerationsEndpoint),
+	)
+	require.Equal(t,
+		"https://image-upstream.example/v1/images/edits",
+		buildOpenAIImagesURL("https://image-upstream.example/v1/", openAIImagesEditsEndpoint),
+	)
+	require.Equal(t,
+		"https://image-upstream.example/v1/images/generations",
+		buildOpenAIImagesURL("https://image-upstream.example", openAIImagesGenerationsEndpoint),
+	)
+	require.Equal(t,
+		"https://image-upstream.example/v1/images/generations",
+		buildOpenAIImagesURL("https://image-upstream.example/v1/images/generations", openAIImagesGenerationsEndpoint),
+	)
+}
+
 type openAIImageTestSSEEvent struct {
 	Name string
 	Data string
@@ -371,6 +391,124 @@ func TestOpenAIGatewayServiceForwardImages_OAuthUsesResponsesAPI(t *testing.T) {
 	require.Equal(t, "draw a cat", gjson.Get(rec.Body.String(), "data.0.revised_prompt").String())
 }
 
+func TestOpenAIGatewayServiceForwardImages_APIKeyGenerationUsesConfiguredV1BaseURL(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	body := []byte(`{"model":"gpt-image-2","prompt":"draw a cat","response_format":"b64_json"}`)
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/images/generations", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = req
+
+	svc := &OpenAIGatewayService{
+		cfg: &config.Config{},
+		httpUpstream: &httpUpstreamRecorder{
+			resp: &http.Response{
+				StatusCode: http.StatusOK,
+				Header: http.Header{
+					"Content-Type": []string{"application/json"},
+					"X-Request-Id": []string{"req_img_apikey"},
+				},
+				Body: io.NopCloser(strings.NewReader(`{"created":1710000007,"data":[{"b64_json":"aGVsbG8=","revised_prompt":"draw a cat"}]}`)),
+			},
+		},
+	}
+	parsed, err := svc.ParseOpenAIImagesRequest(c, body)
+	require.NoError(t, err)
+
+	account := &Account{
+		ID:       6,
+		Name:     "openai-apikey",
+		Platform: PlatformOpenAI,
+		Type:     AccountTypeAPIKey,
+		Credentials: map[string]any{
+			"api_key":  "test-api-key",
+			"base_url": "https://image-upstream.example/v1",
+		},
+	}
+
+	result, err := svc.ForwardImages(context.Background(), c, account, body, parsed, "")
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.Equal(t, 1, result.ImageCount)
+	require.Equal(t, "gpt-image-2", result.Model)
+	require.Equal(t, "gpt-image-2", result.UpstreamModel)
+
+	upstream, ok := svc.httpUpstream.(*httpUpstreamRecorder)
+	require.True(t, ok)
+	require.NotNil(t, upstream.lastReq)
+	require.Equal(t, "https://image-upstream.example/v1/images/generations", upstream.lastReq.URL.String())
+	require.Equal(t, "Bearer test-api-key", upstream.lastReq.Header.Get("Authorization"))
+	require.Equal(t, "application/json", upstream.lastReq.Header.Get("Content-Type"))
+	require.Equal(t, "gpt-image-2", gjson.GetBytes(upstream.lastBody, "model").String())
+	require.Equal(t, http.StatusOK, rec.Code)
+	require.Equal(t, "aGVsbG8=", gjson.Get(rec.Body.String(), "data.0.b64_json").String())
+}
+
+func TestOpenAIGatewayServiceForwardImages_APIKeyEditUsesConfiguredV1BaseURL(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	var body bytes.Buffer
+	writer := multipart.NewWriter(&body)
+	require.NoError(t, writer.WriteField("model", "gpt-image-2"))
+	require.NoError(t, writer.WriteField("prompt", "replace background"))
+	imagePart, err := writer.CreateFormFile("image", "source.png")
+	require.NoError(t, err)
+	_, err = imagePart.Write([]byte("png-image-content"))
+	require.NoError(t, err)
+	require.NoError(t, writer.Close())
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/images/edits", bytes.NewReader(body.Bytes()))
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = req
+
+	svc := &OpenAIGatewayService{
+		cfg: &config.Config{},
+		httpUpstream: &httpUpstreamRecorder{
+			resp: &http.Response{
+				StatusCode: http.StatusOK,
+				Header: http.Header{
+					"Content-Type": []string{"application/json"},
+					"X-Request-Id": []string{"req_img_edit_apikey"},
+				},
+				Body: io.NopCloser(strings.NewReader(`{"created":1710000008,"data":[{"b64_json":"ZWRpdGVk","revised_prompt":"replace background"}]}`)),
+			},
+		},
+	}
+	parsed, err := svc.ParseOpenAIImagesRequest(c, body.Bytes())
+	require.NoError(t, err)
+
+	account := &Account{
+		ID:       7,
+		Name:     "openai-apikey",
+		Platform: PlatformOpenAI,
+		Type:     AccountTypeAPIKey,
+		Credentials: map[string]any{
+			"api_key":  "test-api-key",
+			"base_url": "https://image-upstream.example/v1/",
+		},
+	}
+
+	result, err := svc.ForwardImages(context.Background(), c, account, body.Bytes(), parsed, "")
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.Equal(t, 1, result.ImageCount)
+
+	upstream, ok := svc.httpUpstream.(*httpUpstreamRecorder)
+	require.True(t, ok)
+	require.NotNil(t, upstream.lastReq)
+	require.Equal(t, "https://image-upstream.example/v1/images/edits", upstream.lastReq.URL.String())
+	require.Equal(t, "Bearer test-api-key", upstream.lastReq.Header.Get("Authorization"))
+	require.Contains(t, upstream.lastReq.Header.Get("Content-Type"), "multipart/form-data")
+	require.Contains(t, string(upstream.lastBody), `name="model"`)
+	require.Contains(t, string(upstream.lastBody), "gpt-image-2")
+	require.Equal(t, http.StatusOK, rec.Code)
+	require.Equal(t, "ZWRpdGVk", gjson.Get(rec.Body.String(), "data.0.b64_json").String())
+}
+
 func TestOpenAIGatewayServiceForwardImages_OAuthStreamingTransformsEvents(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	body := []byte(`{"model":"gpt-image-2","prompt":"draw a cat","stream":true,"response_format":"url"}`)

From 3d4ca5e8d1929c11bf480a046fcb17c029ac9285 Mon Sep 17 00:00:00 2001
From: Zven <zvenhuan@gmail.com>
Date: Tue, 28 Apr 2026 10:55:29 +0800
Subject: [PATCH 24/46] fix(openai): preserve current Codex compact payload
 fields

---
 .../internal/service/openai_gateway_service.go | 13 ++++++++++++-
 .../service/openai_gateway_service_test.go     | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 13e3ddab..00b1c4a7 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -4864,7 +4864,18 @@ func normalizeOpenAICompactRequestBody(body []byte) ([]byte, bool, error) {
 	}
 
 	normalized := []byte(`{}`)
-	for _, field := range []string{"model", "input", "instructions", "previous_response_id"} {
+	// Keep the current Codex /compact schema while still dropping request-scoped
+	// fields such as prompt_cache_key, store, and stream.
+	for _, field := range []string{
+		"model",
+		"input",
+		"instructions",
+		"tools",
+		"parallel_tool_calls",
+		"reasoning",
+		"text",
+		"previous_response_id",
+	} {
 		value := gjson.GetBytes(body, field)
 		if !value.Exists() {
 			continue
diff --git a/backend/internal/service/openai_gateway_service_test.go b/backend/internal/service/openai_gateway_service_test.go
index 03b49865..b55f0d2c 100644
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -1767,6 +1767,24 @@ func TestOpenAIResponsesRequestPathSuffix(t *testing.T) {
 	}
 }
 
+func TestNormalizeOpenAICompactRequestBodyPreservesCurrentCodexPayloadFields(t *testing.T) {
+	body := []byte(`{"model":"gpt-5.5","input":[{"type":"message","role":"user","content":"compact me"}],"instructions":"compact-test","tools":[{"type":"function","name":"shell"}],"parallel_tool_calls":true,"reasoning":{"effort":"high"},"text":{"verbosity":"low"},"previous_response_id":"resp_123","store":true,"stream":true,"prompt_cache_key":"cache_123"}`)
+
+	normalized, changed, err := normalizeOpenAICompactRequestBody(body)
+
+	require.NoError(t, err)
+	require.True(t, changed)
+	require.Equal(t, "gpt-5.5", gjson.GetBytes(normalized, "model").String())
+	require.True(t, gjson.GetBytes(normalized, "tools").Exists())
+	require.True(t, gjson.GetBytes(normalized, "parallel_tool_calls").Bool())
+	require.Equal(t, "high", gjson.GetBytes(normalized, "reasoning.effort").String())
+	require.Equal(t, "low", gjson.GetBytes(normalized, "text.verbosity").String())
+	require.Equal(t, "resp_123", gjson.GetBytes(normalized, "previous_response_id").String())
+	require.False(t, gjson.GetBytes(normalized, "store").Exists())
+	require.False(t, gjson.GetBytes(normalized, "stream").Exists())
+	require.False(t, gjson.GetBytes(normalized, "prompt_cache_key").Exists())
+}
+
 func TestOpenAIBuildUpstreamRequestOpenAIPassthroughPreservesCompactPath(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	rec := httptest.NewRecorder()

From 30f55a1f729ea3d4c20e7b8a175812e1ab9f10ae Mon Sep 17 00:00:00 2001
From: DaydreamCoding <DaydreamCoding@users.noreply.github.com>
Date: Tue, 28 Apr 2026 00:34:23 +0800
Subject: [PATCH 25/46] =?UTF-8?q?feat(openai):=20OpenAI=20Fast/Flex=20Poli?=
 =?UTF-8?q?cy=20=E5=AE=8C=E6=95=B4=E5=AE=9E=E7=8E=B0=EF=BC=88HTTP=20+=20We?=
 =?UTF-8?q?bSocket=20+=20Admin=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

对称参照 Claude BetaPolicy 的 fast-mode 过滤实现，新增针对 OpenAI 上游
service_tier 字段（priority / flex，含客户端 "fast" → "priority" 归一化）的
pass / filter / block 三态策略，覆盖全部 OpenAI 入口 + admin 配置入口。

后端核心
- 新增 SettingKeyOpenAIFastPolicySettings、OpenAIFastPolicyRule、
  OpenAIFastPolicySettings 配置模型，含规则的 service_tier × action × scope
  × 模型白名单 × fallback action 维度。
- SettingService.Get/SetOpenAIFastPolicySettings；缺失时返回内置默认策略
  （所有模型的 priority 走 filter，whitelist 为空，fallback=pass）。设计
  依据：service_tier=fast 是用户级开关，与 model 字段正交，默认锁定特定
  model slug 会留下"用 gpt-4 + fast 透传 priority 上游"的绕过路径。JSON
  解析失败不再静默 fallback，slog.Warn 记录脏数据，便于运维定位。
- service_tier 归一化（trim + ToLower + fast→priority + 白名单 priority/flex）
  与策略评估（evaluateOpenAIFastPolicy）作为唯一真实来源，HTTP / WS 共用。
  抽出纯函数 evaluateOpenAIFastPolicyWithSettings，配合 ctx-bound settings
  快照（withOpenAIFastPolicyContext / openAIFastPolicySettingsFromContext），
  WS 长会话入口预取一次后所有帧复用，避免每帧打到 settingService。

HTTP 入口（4 个）
- Chat Completions、Anthropic 兼容（Messages，含 BetaFastMode→priority 二次
  命中）、原生 Responses、Passthrough Responses 全部接入
  applyOpenAIFastPolicyToBody，filter 走 sjson 顶层删除 service_tier，block
  返回 403 forbidden_error JSON。
- 4 入口统一使用 upstream 视角的 model（GetMappedModel +
  normalizeOpenAIModelForUpstream + Codex OAuth normalize 后的 slug），
  避免 chat/messages/native /responses/passthrough 因为 model 维度不同
  造成 whitelist 命中差异。
- 在 pass 路径也把客户端 "fast" 别名归一化为 "priority" 写回 body，
  否则 native /responses 与 passthrough 入口会把 "fast" 原样透传给上游
  导致 400/拒绝（chat-completions 入口的 normalizeResponsesBodyServiceTier
  此前已具备同等行为）。

WebSocket 入口
- 新增 applyOpenAIFastPolicyToWSResponseCreate：严格匹配
  type="response.create"，仅处理顶层 service_tier；filter 用 sjson 删字段，
  block 返回 typed *OpenAIFastBlockedError。
- ingress 路径在 parseClientPayload 内调用，block 命中先 Write Realtime
  风格 error event 再返回 OpenAIWSClientCloseError(StatusPolicyViolation
  =1008)，依赖底层 WebSocket Conn.Write 的同步 flush 保证 error 先于
  close。
- passthrough 路径在 RunEntry 前对 firstClientMessage 应用策略，并通过
  openAIWSPolicyEnforcingFrameConn 包装 ReadFrame 对每个 client→upstream
  帧执行策略；后续帧无 model 字段时回退到 capturedSessionModel。
  filter 闭包内同时侦测 session.update / session.created 帧的 session.model
  字段刷新 capturedSessionModel，封堵"首帧 model=gpt-4o（pass）→
  session.update 改为 gpt-5.5 → 不带 model 的 response.create fallback
  到 gpt-4o"的 mid-session 绕过路径。
- passthrough billing：requestServiceTier 在策略 filter 之后再从
  firstClientMessage 提取，filter 命中时 OpenAIForwardResult.ServiceTier
  上报 nil（default tier），与 HTTP 入口（reqBody 来自 post-filter map）
  / WS ingress（payload 来自 post-filter bytes）的语义一致。
- 错误事件 schema：{event_id: "evt_<32hex>", type: "error",
  error: {type: "forbidden_error", code: "policy_violation", message}}，
  与 OpenAI codex 客户端 error event 解析兼容。

Admin / Frontend
- dto.SystemSettings / UpdateSettingsRequest 新增
  openai_fast_policy_settings 字段（omitempty），bulk GET/PUT 接入。
- Settings 页 Gateway 页签新增 Fast/Flex Policy 表单卡片：
  service_tier × action × scope × 模型白名单 × fallback action 全字段配置。
- 前端守门：openaiFastPolicyLoaded 标志仅在 GET 真带回字段时才允许回写，
  避免 rollout/错误把默认规则覆盖成空；saveSettings 回写循环 skip 该字段，
  由专用刷新逻辑处理；仅 action=block 时发送 error_message，匹配后端
  omitempty 行为。

测试
- HTTP 路径：openai_fast_policy_test.go 覆盖默认配置（whitelist=[]，所有
  模型 priority filter）/ block 自定义错误 / scope 区分 / filter 删字段 /
  block 不改 body / block 短路上游 / Anthropic BetaFastMode 触发 OpenAI
  fast policy 等场景。
- WebSocket 路径：openai_fast_policy_ws_test.go 覆盖
    helper 单元（filter / fast→priority 归一化 / flex 透传 / block typed
    error / 无 service_tier 字节不变 / 非 response.create 帧不动 / 空 type
    帧不动 / event_id+code 字段断言 / 非字符串 service_tier 容错）+
    pass 路径 fast 别名归一化回归 +
    ingress 端到端（filter 后上游不含 service_tier / block 后客户端先收
    error event 再收 close 1008 且上游 0 写）+
    passthrough capturedSessionModel fallback 用例（whitelist 策略下首帧
    建立、缺 model 命中 fallback、缺少 fallback 时的 leak 文档化）+
    passthrough session.update / session.created 旋转 capturedSessionModel
    的 mid-session 绕过回归 +
    passthrough billing post-filter ServiceTier 与 idempotent filter 回归。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 backend/cmd/server/wire_gen.go                |    2 +-
 .../handler/admin/admin_helpers_test.go       |   64 ++
 .../internal/handler/admin/setting_handler.go |   58 +
 ...tting_handler_auth_source_defaults_test.go |    7 +-
 backend/internal/handler/dto/settings.go      |   19 +
 backend/internal/server/api_contract_test.go  |   20 +
 backend/internal/service/domain_constants.go  |    6 +
 .../service/openai_fast_policy_test.go        |  286 +++++
 .../service/openai_fast_policy_ws_test.go     | 1018 +++++++++++++++++
 .../openai_gateway_chat_completions.go        |   11 +
 .../openai_gateway_chat_completions_test.go   |   31 +
 .../service/openai_gateway_messages.go        |   13 +
 .../openai_gateway_record_usage_test.go       |   21 +-
 .../service/openai_gateway_service.go         |  372 +++++-
 .../internal/service/openai_ws_forwarder.go   |   47 +
 .../openai_ws_protocol_forward_test.go        |    1 +
 .../openai_ws_v2_passthrough_adapter.go       |  226 +++-
 backend/internal/service/setting_service.go   |   78 ++
 backend/internal/service/settings_view.go     |   54 +
 frontend/src/api/admin/settings.ts            |   29 +
 frontend/src/i18n/locales/en.ts               |   32 +
 frontend/src/i18n/locales/zh.ts               |   32 +
 frontend/src/views/admin/SettingsView.vue     |  403 +++++++
 23 files changed, 2820 insertions(+), 10 deletions(-)
 create mode 100644 backend/internal/service/openai_fast_policy_test.go
 create mode 100644 backend/internal/service/openai_fast_policy_ws_test.go

diff --git a/backend/cmd/server/wire_gen.go b/backend/cmd/server/wire_gen.go
index f767bbea..14b7db28 100644
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -186,7 +186,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	balanceNotifyService := service.ProvideBalanceNotifyService(emailService, settingRepository, accountRepository)
 	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService, tlsFingerprintProfileService, channelService, modelPricingResolver, balanceNotifyService)
 	openAITokenProvider := service.ProvideOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService, oAuthRefreshAPI)
-	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider, modelPricingResolver, channelService, balanceNotifyService)
+	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider, modelPricingResolver, channelService, balanceNotifyService, settingService)
 	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
 	opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
 	opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink)
diff --git a/backend/internal/handler/admin/admin_helpers_test.go b/backend/internal/handler/admin/admin_helpers_test.go
index 3833d32e..6df49154 100644
--- a/backend/internal/handler/admin/admin_helpers_test.go
+++ b/backend/internal/handler/admin/admin_helpers_test.go
@@ -8,6 +8,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/Wei-Shaw/sub2api/internal/handler/dto"
 	"github.com/Wei-Shaw/sub2api/internal/service"
 	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
@@ -222,3 +223,66 @@ func TestOpsWSHelpers(t *testing.T) {
 	require.True(t, isAddrInTrustedProxies(addr, prefixes))
 	require.False(t, isAddrInTrustedProxies(netip.MustParseAddr("192.168.0.1"), prefixes))
 }
+
+// TestOpenAIFastPolicySettingsFromDTO_NormalizesServiceTier 验证 admin
+// 写入路径会把 ServiceTier 的空字符串/空白/大小写归一化为
+// service.OpenAIFastTierAny ("all")，避免落盘时 "" 与 "all" 双语义。
+func TestOpenAIFastPolicySettingsFromDTO_NormalizesServiceTier(t *testing.T) {
+	t.Run("nil input returns nil", func(t *testing.T) {
+		require.Nil(t, openaiFastPolicySettingsFromDTO(nil))
+	})
+
+	t.Run("empty service_tier becomes 'all'", func(t *testing.T) {
+		in := &dto.OpenAIFastPolicySettings{
+			Rules: []dto.OpenAIFastPolicyRule{{
+				ServiceTier: "",
+				Action:      "filter",
+				Scope:       "all",
+			}},
+		}
+		out := openaiFastPolicySettingsFromDTO(in)
+		require.NotNil(t, out)
+		require.Len(t, out.Rules, 1)
+		require.Equal(t, service.OpenAIFastTierAny, out.Rules[0].ServiceTier)
+		require.Equal(t, "all", out.Rules[0].ServiceTier)
+	})
+
+	t.Run("whitespace-only service_tier becomes 'all'", func(t *testing.T) {
+		in := &dto.OpenAIFastPolicySettings{
+			Rules: []dto.OpenAIFastPolicyRule{{
+				ServiceTier: "   ",
+				Action:      "pass",
+				Scope:       "all",
+			}},
+		}
+		out := openaiFastPolicySettingsFromDTO(in)
+		require.Equal(t, service.OpenAIFastTierAny, out.Rules[0].ServiceTier)
+	})
+
+	t.Run("uppercase service_tier is lowercased", func(t *testing.T) {
+		in := &dto.OpenAIFastPolicySettings{
+			Rules: []dto.OpenAIFastPolicyRule{{
+				ServiceTier: "PRIORITY",
+				Action:      "filter",
+				Scope:       "all",
+			}},
+		}
+		out := openaiFastPolicySettingsFromDTO(in)
+		require.Equal(t, service.OpenAIFastTierPriority, out.Rules[0].ServiceTier)
+	})
+
+	t.Run("non-empty values pass through (lowercased)", func(t *testing.T) {
+		in := &dto.OpenAIFastPolicySettings{
+			Rules: []dto.OpenAIFastPolicyRule{
+				{ServiceTier: "priority", Action: "filter", Scope: "all"},
+				{ServiceTier: "flex", Action: "block", Scope: "oauth"},
+				{ServiceTier: "all", Action: "pass", Scope: "apikey"},
+			},
+		}
+		out := openaiFastPolicySettingsFromDTO(in)
+		require.Len(t, out.Rules, 3)
+		require.Equal(t, service.OpenAIFastTierPriority, out.Rules[0].ServiceTier)
+		require.Equal(t, service.OpenAIFastTierFlex, out.Rules[1].ServiceTier)
+		require.Equal(t, service.OpenAIFastTierAny, out.Rules[2].ServiceTier)
+	})
+}
diff --git a/backend/internal/handler/admin/setting_handler.go b/backend/internal/handler/admin/setting_handler.go
index 320dbd6b..d6580191 100644
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -248,9 +248,51 @@ func (h *SettingHandler) GetSettings(c *gin.Context) {
 
 		AffiliateEnabled: settings.AffiliateEnabled,
 	}
+
+	// OpenAI fast policy (stored under a dedicated setting key)
+	if fastPolicy, err := h.settingService.GetOpenAIFastPolicySettings(c.Request.Context()); err != nil {
+		slog.Error("openai_fast_policy_settings_get_failed", "error", err)
+	} else if fastPolicy != nil {
+		payload.OpenAIFastPolicySettings = openaiFastPolicySettingsToDTO(fastPolicy)
+	}
+
 	response.Success(c, systemSettingsResponseData(payload, authSourceDefaults))
 }
 
+// openaiFastPolicySettingsToDTO converts service -> dto for OpenAI fast policy.
+func openaiFastPolicySettingsToDTO(s *service.OpenAIFastPolicySettings) *dto.OpenAIFastPolicySettings {
+	if s == nil {
+		return nil
+	}
+	rules := make([]dto.OpenAIFastPolicyRule, len(s.Rules))
+	for i, r := range s.Rules {
+		rules[i] = dto.OpenAIFastPolicyRule(r)
+	}
+	return &dto.OpenAIFastPolicySettings{Rules: rules}
+}
+
+// openaiFastPolicySettingsFromDTO converts dto -> service for OpenAI fast policy.
+//
+// 规范化 ServiceTier：在 DTO 进入 service 层之前统一把空字符串归一为
+// service.OpenAIFastTierAny ("all")，避免管理员保存时空串与 "all" 同时
+// 表达"匹配任意 tier"造成数据库取值的二义性。其它非空值原样透传，由
+// service.SetOpenAIFastPolicySettings 负责合法值校验。
+func openaiFastPolicySettingsFromDTO(s *dto.OpenAIFastPolicySettings) *service.OpenAIFastPolicySettings {
+	if s == nil {
+		return nil
+	}
+	rules := make([]service.OpenAIFastPolicyRule, len(s.Rules))
+	for i, r := range s.Rules {
+		rules[i] = service.OpenAIFastPolicyRule(r)
+		tier := strings.ToLower(strings.TrimSpace(rules[i].ServiceTier))
+		if tier == "" {
+			tier = service.OpenAIFastTierAny
+		}
+		rules[i].ServiceTier = tier
+	}
+	return &service.OpenAIFastPolicySettings{Rules: rules}
+}
+
 // UpdateSettingsRequest 更新设置请求
 type UpdateSettingsRequest struct {
 	// 注册设置
@@ -452,6 +494,9 @@ type UpdateSettingsRequest struct {
 
 	// Affiliate (邀请返利) feature switch
 	AffiliateEnabled *bool `json:"affiliate_enabled"`
+
+	// OpenAI fast/flex policy (optional, only updated when provided)
+	OpenAIFastPolicySettings *dto.OpenAIFastPolicySettings `json:"openai_fast_policy_settings,omitempty"`
 }
 
 // UpdateSettings 更新系统设置
@@ -1350,6 +1395,14 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 		return
 	}
 
+	// Update OpenAI fast policy (stored under dedicated key, only when provided).
+	if req.OpenAIFastPolicySettings != nil {
+		if err := h.settingService.SetOpenAIFastPolicySettings(c.Request.Context(), openaiFastPolicySettingsFromDTO(req.OpenAIFastPolicySettings)); err != nil {
+			response.BadRequest(c, err.Error())
+			return
+		}
+	}
+
 	// Update payment configuration (integrated into system settings).
 	// Skip if no payment fields were provided (prevents accidental wipe).
 	if h.paymentConfigService != nil && hasPaymentFields(req) {
@@ -1555,6 +1608,11 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 
 		AffiliateEnabled: updatedSettings.AffiliateEnabled,
 	}
+	if fastPolicy, err := h.settingService.GetOpenAIFastPolicySettings(c.Request.Context()); err != nil {
+		slog.Error("openai_fast_policy_settings_get_failed", "error", err)
+	} else if fastPolicy != nil {
+		payload.OpenAIFastPolicySettings = openaiFastPolicySettingsToDTO(fastPolicy)
+	}
 	response.Success(c, systemSettingsResponseData(payload, updatedAuthSourceDefaults))
 }
 
diff --git a/backend/internal/handler/admin/setting_handler_auth_source_defaults_test.go b/backend/internal/handler/admin/setting_handler_auth_source_defaults_test.go
index 9a33a93a..085fd2ca 100644
--- a/backend/internal/handler/admin/setting_handler_auth_source_defaults_test.go
+++ b/backend/internal/handler/admin/setting_handler_auth_source_defaults_test.go
@@ -26,7 +26,12 @@ func (s *settingHandlerRepoStub) Get(ctx context.Context, key string) (*service.
 }
 
 func (s *settingHandlerRepoStub) GetValue(ctx context.Context, key string) (string, error) {
-	panic("unexpected GetValue call")
+	if s.values != nil {
+		if value, ok := s.values[key]; ok {
+			return value, nil
+		}
+	}
+	return "", nil
 }
 
 func (s *settingHandlerRepoStub) Set(ctx context.Context, key, value string) error {
diff --git a/backend/internal/handler/dto/settings.go b/backend/internal/handler/dto/settings.go
index 92ae4dc6..b865d703 100644
--- a/backend/internal/handler/dto/settings.go
+++ b/backend/internal/handler/dto/settings.go
@@ -198,6 +198,9 @@ type SystemSettings struct {
 
 	// Affiliate (邀请返利) feature switch
 	AffiliateEnabled bool `json:"affiliate_enabled"`
+
+	// OpenAI fast/flex policy
+	OpenAIFastPolicySettings *OpenAIFastPolicySettings `json:"openai_fast_policy_settings,omitempty"`
 }
 
 type DefaultSubscriptionSetting struct {
@@ -294,6 +297,22 @@ type BetaPolicySettings struct {
 	Rules []BetaPolicyRule `json:"rules"`
 }
 
+// OpenAIFastPolicyRule OpenAI fast/flex 策略规则 DTO
+type OpenAIFastPolicyRule struct {
+	ServiceTier          string   `json:"service_tier"`
+	Action               string   `json:"action"`
+	Scope                string   `json:"scope"`
+	ErrorMessage         string   `json:"error_message,omitempty"`
+	ModelWhitelist       []string `json:"model_whitelist,omitempty"`
+	FallbackAction       string   `json:"fallback_action,omitempty"`
+	FallbackErrorMessage string   `json:"fallback_error_message,omitempty"`
+}
+
+// OpenAIFastPolicySettings OpenAI fast 策略配置 DTO
+type OpenAIFastPolicySettings struct {
+	Rules []OpenAIFastPolicyRule `json:"rules"`
+}
+
 // ParseCustomMenuItems parses a JSON string into a slice of CustomMenuItem.
 // Returns empty slice on empty/invalid input.
 func ParseCustomMenuItems(raw string) []CustomMenuItem {
diff --git a/backend/internal/server/api_contract_test.go b/backend/internal/server/api_contract_test.go
index ca6fd0cc..f24a1677 100644
--- a/backend/internal/server/api_contract_test.go
+++ b/backend/internal/server/api_contract_test.go
@@ -748,6 +748,16 @@ func TestAPIContracts(t *testing.T) {
 					"payment_visible_method_alipay_enabled": true,
 					"payment_visible_method_wxpay_enabled": false,
 					"openai_advanced_scheduler_enabled": true,
+					"openai_fast_policy_settings": {
+						"rules": [
+							{
+								"service_tier": "priority",
+								"action": "filter",
+								"scope": "all",
+								"fallback_action": "pass"
+							}
+						]
+					},
 					"custom_menu_items": [],
 					"custom_endpoints": [],
 					"payment_enabled": false,
@@ -930,6 +940,16 @@ func TestAPIContracts(t *testing.T) {
 					"payment_visible_method_alipay_enabled": false,
 					"payment_visible_method_wxpay_enabled": false,
 					"openai_advanced_scheduler_enabled": false,
+					"openai_fast_policy_settings": {
+						"rules": [
+							{
+								"service_tier": "priority",
+								"action": "filter",
+								"scope": "all",
+								"fallback_action": "pass"
+							}
+						]
+					},
 					"payment_enabled": false,
 					"payment_min_amount": 0,
 					"payment_max_amount": 0,
diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go
index 0ef4a486..e1b175c3 100644
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -306,6 +306,12 @@ const (
 	// SettingKeyBetaPolicySettings stores JSON config for beta policy rules.
 	SettingKeyBetaPolicySettings = "beta_policy_settings"
 
+	// SettingKeyOpenAIFastPolicySettings stores JSON config for OpenAI
+	// service_tier (fast/flex) policy rules. Mirrors BetaPolicySettings but
+	// targets OpenAI's body-level service_tier field instead of Claude's
+	// anthropic-beta header.
+	SettingKeyOpenAIFastPolicySettings = "openai_fast_policy_settings"
+
 	// =========================
 	// Claude Code Version Check
 	// =========================
diff --git a/backend/internal/service/openai_fast_policy_test.go b/backend/internal/service/openai_fast_policy_test.go
new file mode 100644
index 00000000..b52da614
--- /dev/null
+++ b/backend/internal/service/openai_fast_policy_test.go
@@ -0,0 +1,286 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"testing"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/stretchr/testify/require"
+)
+
+type openAIFastPolicyRepoStub struct {
+	values map[string]string
+}
+
+func (s *openAIFastPolicyRepoStub) Get(ctx context.Context, key string) (*Setting, error) {
+	panic("unexpected Get call")
+}
+
+func (s *openAIFastPolicyRepoStub) GetValue(ctx context.Context, key string) (string, error) {
+	if v, ok := s.values[key]; ok {
+		return v, nil
+	}
+	return "", ErrSettingNotFound
+}
+
+func (s *openAIFastPolicyRepoStub) Set(ctx context.Context, key, value string) error {
+	if s.values == nil {
+		s.values = map[string]string{}
+	}
+	s.values[key] = value
+	return nil
+}
+
+func (s *openAIFastPolicyRepoStub) GetMultiple(ctx context.Context, keys []string) (map[string]string, error) {
+	panic("unexpected GetMultiple call")
+}
+
+func (s *openAIFastPolicyRepoStub) SetMultiple(ctx context.Context, settings map[string]string) error {
+	panic("unexpected SetMultiple call")
+}
+
+func (s *openAIFastPolicyRepoStub) GetAll(ctx context.Context) (map[string]string, error) {
+	panic("unexpected GetAll call")
+}
+
+func (s *openAIFastPolicyRepoStub) Delete(ctx context.Context, key string) error {
+	panic("unexpected Delete call")
+}
+
+func newOpenAIGatewayServiceWithSettings(t *testing.T, settings *OpenAIFastPolicySettings) *OpenAIGatewayService {
+	t.Helper()
+	repo := &openAIFastPolicyRepoStub{values: map[string]string{}}
+	if settings != nil {
+		raw, err := json.Marshal(settings)
+		require.NoError(t, err)
+		repo.values[SettingKeyOpenAIFastPolicySettings] = string(raw)
+	}
+	return &OpenAIGatewayService{
+		settingService: NewSettingService(repo, &config.Config{}),
+	}
+}
+
+func TestEvaluateOpenAIFastPolicy_DefaultFiltersAllModelsPriority(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// 默认策略对所有模型生效（whitelist 为空），因为 codex 的 service_tier=fast
+	// 是用户级开关，与 model 正交。
+	// gpt-5.5 + priority → filter
+	action, _ := svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionFilter, action)
+
+	// gpt-5.5-turbo → filter
+	action, _ = svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5-turbo", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionFilter, action)
+
+	// gpt-4 + priority → filter（默认策略覆盖所有模型）
+	action, _ = svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-4", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionFilter, action)
+
+	// gpt-5.5 + flex → pass (tier doesn't match)
+	action, _ = svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5", OpenAIFastTierFlex)
+	require.Equal(t, BetaPolicyActionPass, action)
+
+	// empty tier → pass
+	action, _ = svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5", "")
+	require.Equal(t, BetaPolicyActionPass, action)
+}
+
+func TestEvaluateOpenAIFastPolicy_BlockRuleCarriesMessage(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "fast mode is not allowed",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	action, msg := svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionBlock, action)
+	require.Equal(t, "fast mode is not allowed", msg)
+}
+
+func TestEvaluateOpenAIFastPolicy_ScopeFiltersOAuth(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier: OpenAIFastTierAny,
+			Action:      BetaPolicyActionFilter,
+			Scope:       BetaPolicyScopeOAuth,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+
+	// OAuth account → rule matches
+	oauthAccount := &Account{Platform: PlatformOpenAI, Type: AccountTypeOAuth}
+	action, _ := svc.evaluateOpenAIFastPolicy(context.Background(), oauthAccount, "gpt-4", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionFilter, action)
+
+	// API Key account → rule skipped → pass
+	apiKeyAccount := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+	action, _ = svc.evaluateOpenAIFastPolicy(context.Background(), apiKeyAccount, "gpt-4", OpenAIFastTierPriority)
+	require.Equal(t, BetaPolicyActionPass, action)
+}
+
+func TestApplyOpenAIFastPolicyToBody_FilterRemovesField(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// gpt-5.5 fast → service_tier stripped
+	body := []byte(`{"model":"gpt-5.5","service_tier":"priority","messages":[]}`)
+	updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.NoError(t, err)
+	require.NotContains(t, string(updated), `"service_tier"`)
+
+	// Client sending "fast" (alias for priority) also filtered
+	body = []byte(`{"model":"gpt-5.5","service_tier":"fast"}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.NoError(t, err)
+	require.NotContains(t, string(updated), `"service_tier"`)
+
+	// gpt-4 priority → 默认策略对所有模型 filter，service_tier 被移除
+	body = []byte(`{"model":"gpt-4","service_tier":"priority"}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-4", body)
+	require.NoError(t, err)
+	require.NotContains(t, string(updated), `"service_tier"`)
+
+	// No service_tier → no-op
+	body = []byte(`{"model":"gpt-5.5"}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.NoError(t, err)
+	require.Equal(t, string(body), string(updated))
+}
+
+// TestApplyOpenAIFastPolicyToBody_OfficialTiersBypassDefaultRule 验证扩展白名单后
+// 客户端显式发送的 OpenAI 官方合法 tier（auto/default/scale）能透传到上游而不被
+// 静默剥离。默认策略只针对 priority，所以这些 tier 落在 fall-through pass 分支。
+func TestApplyOpenAIFastPolicyToBody_OfficialTiersBypassDefaultRule(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	for _, tier := range []string{"auto", "default", "scale"} {
+		body := []byte(`{"model":"gpt-5.5","service_tier":"` + tier + `"}`)
+		updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+		require.NoError(t, err, "tier %q should pass without error", tier)
+		require.Contains(t, string(updated), `"service_tier":"`+tier+`"`,
+			"tier %q should be preserved in body under default rule", tier)
+	}
+
+	// evaluate 层也应判定为 pass（默认规则 ServiceTier=priority 与 auto/default/scale 不匹配）
+	for _, tier := range []string{"auto", "default", "scale"} {
+		action, _ := svc.evaluateOpenAIFastPolicy(context.Background(), account, "gpt-5.5", tier)
+		require.Equal(t, BetaPolicyActionPass, action, "tier %q should evaluate to pass", tier)
+	}
+}
+
+// TestApplyOpenAIFastPolicyToBody_AllRuleStripsOfficialTiers 验证管理员显式配置
+// ServiceTier=all + Action=filter 规则后，auto/default/scale 等官方 tier 也会
+// 被剥离。这是符合预期的——首条匹配 short-circuit，"all" 覆盖任意已识别 tier。
+func TestApplyOpenAIFastPolicyToBody_AllRuleStripsOfficialTiers(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier: OpenAIFastTierAny,
+			Action:      BetaPolicyActionFilter,
+			Scope:       BetaPolicyScopeAll,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	for _, tier := range []string{"auto", "default", "scale", "priority", "flex"} {
+		body := []byte(`{"model":"gpt-5.5","service_tier":"` + tier + `"}`)
+		updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+		require.NoError(t, err)
+		require.NotContains(t, string(updated), `"service_tier"`,
+			"tier %q should be stripped under ServiceTier=all + filter rule", tier)
+	}
+}
+
+// TestApplyOpenAIFastPolicyToBody_UnknownTierStripped 验证真未知 tier 仍被剥离
+// （normalize 返回 nil → normalizeResponsesBodyServiceTier 删除字段；
+// applyOpenAIFastPolicyToBody 在 normTier 为空时直接 no-op，因为字段已不可能存在
+// 于经过前置归一化的请求里。这里直接调 apply 验证它对未识别值不会异常）。
+func TestApplyOpenAIFastPolicyToBody_UnknownTierStripped(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// normalize 阶段会将未知值剥离
+	require.Nil(t, normalizeOpenAIServiceTier("xxx"))
+
+	// applyOpenAIFastPolicyToBody 收到未识别 tier 时不报错，body 透传不变
+	// （不属于本函数职责——上层 normalizeResponsesBodyServiceTier 已剥离）
+	body := []byte(`{"model":"gpt-5.5","service_tier":"xxx"}`)
+	updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.NoError(t, err)
+	require.Equal(t, string(body), string(updated))
+}
+
+func TestApplyOpenAIFastPolicyToBody_BlockReturnsTypedError(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "fast mode is blocked for gpt-5.5",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	body := []byte(`{"model":"gpt-5.5","service_tier":"priority"}`)
+	updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.Error(t, err)
+	var blocked *OpenAIFastBlockedError
+	require.True(t, errors.As(err, &blocked))
+	require.Contains(t, blocked.Message, "fast mode is blocked")
+	require.Equal(t, string(body), string(updated)) // body not mutated on block
+}
+
+func TestSetOpenAIFastPolicySettings_Validation(t *testing.T) {
+	repo := &openAIFastPolicyRepoStub{values: map[string]string{}}
+	svc := NewSettingService(repo, &config.Config{})
+
+	// Invalid action rejected
+	err := svc.SetOpenAIFastPolicySettings(context.Background(), &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier: OpenAIFastTierPriority,
+			Action:      "bogus",
+			Scope:       BetaPolicyScopeAll,
+		}},
+	})
+	require.Error(t, err)
+
+	// Invalid service_tier rejected
+	err = svc.SetOpenAIFastPolicySettings(context.Background(), &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier: "turbo",
+			Action:      BetaPolicyActionPass,
+			Scope:       BetaPolicyScopeAll,
+		}},
+	})
+	require.Error(t, err)
+
+	// Valid settings persisted
+	err = svc.SetOpenAIFastPolicySettings(context.Background(), &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier: OpenAIFastTierPriority,
+			Action:      BetaPolicyActionFilter,
+			Scope:       BetaPolicyScopeAll,
+		}},
+	})
+	require.NoError(t, err)
+
+	got, err := svc.GetOpenAIFastPolicySettings(context.Background())
+	require.NoError(t, err)
+	require.Len(t, got.Rules, 1)
+	require.Equal(t, OpenAIFastTierPriority, got.Rules[0].ServiceTier)
+}
diff --git a/backend/internal/service/openai_fast_policy_ws_test.go b/backend/internal/service/openai_fast_policy_ws_test.go
new file mode 100644
index 00000000..3316a242
--- /dev/null
+++ b/backend/internal/service/openai_fast_policy_ws_test.go
@@ -0,0 +1,1018 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
+	coderws "github.com/coder/websocket"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
+)
+
+// --- Helper-level (unit) tests for applyOpenAIFastPolicyToWSResponseCreate ---
+
+func TestWSResponseCreate_FilterStripsServiceTier(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority","input":[{"type":"input_text","text":"hi"}]}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.NotContains(t, string(updated), `"service_tier"`, "filter action should strip service_tier")
+	// Other fields preserved.
+	require.Equal(t, "response.create", gjson.GetBytes(updated, "type").String())
+	require.Equal(t, "gpt-5.5", gjson.GetBytes(updated, "model").String())
+	require.Equal(t, "hi", gjson.GetBytes(updated, "input.0.text").String())
+}
+
+func TestWSResponseCreate_FastNormalizedToPriorityThenFiltered(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Verbatim "fast" → normalized to "priority" → matches default rule → filter.
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"fast"}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.NotContains(t, string(updated), `"service_tier"`)
+
+	// Mixed-case + whitespace variant should also normalize and filter.
+	frame = []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"  Fast  "}`)
+	updated, blocked, err = svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.NotContains(t, string(updated), `"service_tier"`)
+}
+
+func TestWSResponseCreate_FlexPassThrough(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Default policy targets priority only; flex is left untouched.
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"flex"}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.Equal(t, "flex", gjson.GetBytes(updated, "service_tier").String(), "flex frames must reach upstream untouched under default policy")
+}
+
+func TestWSResponseCreate_BlockReturnsTypedError(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "ws fast blocked",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority"}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.NotNil(t, blocked)
+	require.Equal(t, "ws fast blocked", blocked.Message)
+	// On block, payload returned unchanged so caller can inspect / log it.
+	require.Equal(t, string(frame), string(updated))
+}
+
+func TestWSResponseCreate_NoServiceTierUntouched(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","input":[]}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.Equal(t, string(frame), string(updated), "no service_tier present must result in zero mutation")
+}
+
+func TestWSResponseCreate_NonResponseCreateFrameUntouched(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionFilter,
+			Scope:          BetaPolicyScopeAll,
+			ModelWhitelist: []string{"*"},
+			FallbackAction: BetaPolicyActionFilter,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// response.cancel happens to carry a service_tier-shaped field — must not be touched.
+	frame := []byte(`{"type":"response.cancel","service_tier":"priority"}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.Equal(t, string(frame), string(updated))
+}
+
+// TestWSResponseCreate_EmptyTypeFrameUntouched is the A1 regression: the
+// helper used to treat empty type as response.create, which risked stripping
+// fields from malformed / unknown client events. After the A1 fix only a
+// strict "response.create" match triggers policy.
+func TestWSResponseCreate_EmptyTypeFrameUntouched(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionFilter,
+			Scope:          BetaPolicyScopeAll,
+			ModelWhitelist: []string{"*"},
+			FallbackAction: BetaPolicyActionFilter,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Frame with no "type" field: must pass through completely unchanged
+	// even with a service_tier-shaped field present.
+	frame := []byte(`{"service_tier":"priority","model":"gpt-5.5"}`)
+	updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.Equal(t, string(frame), string(updated), "empty type must NOT be policy-checked — Realtime spec requires type, malformed frames are passed through")
+
+	// Explicit empty string also passes through.
+	frame = []byte(`{"type":"","service_tier":"priority","model":"gpt-5.5"}`)
+	updated, blocked, err = svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.Equal(t, string(frame), string(updated))
+}
+
+// TestBuildOpenAIFastPolicyBlockedWSEvent_HasEventIDAndCode is the B1
+// regression: the rendered Realtime error event must carry a non-empty
+// event_id (so clients can correlate the rejection) and a stable error.code
+// ("policy_violation"). The HTTP-side equivalent is the 403 permission_error
+// JSON body emitted by writeOpenAIFastPolicyBlockedResponse.
+func TestBuildOpenAIFastPolicyBlockedWSEvent_HasEventIDAndCode(t *testing.T) {
+	bytes := buildOpenAIFastPolicyBlockedWSEvent(&OpenAIFastBlockedError{Message: "blocked because reasons"})
+	require.NotNil(t, bytes)
+
+	require.Equal(t, "error", gjson.GetBytes(bytes, "type").String())
+	require.Equal(t, "invalid_request_error", gjson.GetBytes(bytes, "error.type").String())
+	require.Equal(t, "policy_violation", gjson.GetBytes(bytes, "error.code").String())
+	require.Equal(t, "blocked because reasons", gjson.GetBytes(bytes, "error.message").String())
+
+	eventID := gjson.GetBytes(bytes, "event_id").String()
+	require.NotEmpty(t, eventID, "event_id must be present so clients can correlate the rejection in their logs")
+	require.True(t, strings.HasPrefix(eventID, "evt_"), "event_id should follow the evt_<rand> Realtime convention; got %q", eventID)
+
+	// Sanity check: two consecutive events get distinct IDs.
+	other := buildOpenAIFastPolicyBlockedWSEvent(&OpenAIFastBlockedError{Message: "second"})
+	otherID := gjson.GetBytes(other, "event_id").String()
+	require.NotEqual(t, eventID, otherID, "event_id must be random per-event")
+}
+
+// TestBuildOpenAIFastPolicyBlockedWSEvent_NilSafe ensures the helper returns
+// nil for a nil error (defensive guard for callers that always invoke it).
+func TestBuildOpenAIFastPolicyBlockedWSEvent_NilSafe(t *testing.T) {
+	require.Nil(t, buildOpenAIFastPolicyBlockedWSEvent(nil))
+}
+
+// --- D5: passthrough wrapper FrameConn — capturedSessionModel fallback ---
+
+// fakePassthroughFrameConn replays a fixed sequence of client frames into the
+// policy-enforcing wrapper, then returns io.EOF. Captures all Write attempts
+// for write-side assertions (none expected in the D5 test, since the wrapper
+// only filters reads).
+type fakePassthroughFrameConn struct {
+	reads     [][]byte
+	idx       int
+	writes    [][]byte
+	closeOnce bool
+}
+
+func (f *fakePassthroughFrameConn) ReadFrame(ctx context.Context) (coderws.MessageType, []byte, error) {
+	if f.idx >= len(f.reads) {
+		return coderws.MessageText, nil, errOpenAIWSConnClosed
+	}
+	payload := f.reads[f.idx]
+	f.idx++
+	return coderws.MessageText, payload, nil
+}
+
+func (f *fakePassthroughFrameConn) WriteFrame(ctx context.Context, msgType coderws.MessageType, payload []byte) error {
+	cp := append([]byte(nil), payload...)
+	f.writes = append(f.writes, cp)
+	return nil
+}
+
+func (f *fakePassthroughFrameConn) Close() error {
+	f.closeOnce = true
+	return nil
+}
+
+// gpt55WhitelistFastPolicy 返回一份强制带 model whitelist 的策略，用于
+// 验证 capturedSessionModel fallback 的语义（默认策略 whitelist 为空时
+// fallback 路径无法被观察到）。
+func gpt55WhitelistFastPolicy() *OpenAIFastPolicySettings {
+	return &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionFilter,
+			Scope:          BetaPolicyScopeAll,
+			ModelWhitelist: []string{"gpt-5.5", "gpt-5.5*"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+}
+
+// TestPolicyEnforcingFrameConn_FollowupFrameWithoutModelUsesCapturedModel is
+// the D5 regression: in passthrough mode a follow-up response.create frame
+// without a "model" field must still hit the policy via the session-level
+// model captured from the first frame. Without the fallback an empty model
+// would miss a model whitelist and silently leak service_tier=priority
+// through to the upstream.
+func TestPolicyEnforcingFrameConn_FollowupFrameWithoutModelUsesCapturedModel(t *testing.T) {
+	// 此处特意使用带 whitelist 的策略，以便观察 capturedSessionModel
+	// fallback 是否生效（默认策略 whitelist 为空，fallback 与否结果一致，
+	// 不能用来覆盖此回归）。
+	svc := newOpenAIGatewayServiceWithSettings(t, gpt55WhitelistFastPolicy())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Simulate the passthrough adapter capturing model from the first frame.
+	firstFrame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority"}`)
+	capturedSessionModel := openAIWSPassthroughPolicyModelForFrame(account, firstFrame)
+	require.Equal(t, "gpt-5.5", capturedSessionModel)
+
+	// Follow-up frame deliberately omits "model" — Realtime allows this.
+	followupFrame := []byte(`{"type":"response.create","service_tier":"priority"}`)
+
+	inner := &fakePassthroughFrameConn{
+		reads: [][]byte{followupFrame},
+	}
+	wrapper := &openAIWSPolicyEnforcingFrameConn{
+		inner: inner,
+		filter: func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+			if msgType != coderws.MessageText {
+				return payload, nil, nil
+			}
+			model := openAIWSPassthroughPolicyModelForFrame(account, payload)
+			if model == "" {
+				model = capturedSessionModel
+			}
+			return svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, model, payload)
+		},
+	}
+
+	// Read the follow-up frame through the wrapper. The policy MUST still
+	// trigger filter (gpt-5.5 + priority → filter), so the service_tier
+	// field is gone by the time the relay sees it.
+	_, payload, err := wrapper.ReadFrame(context.Background())
+	require.NoError(t, err)
+	require.NotContains(t, string(payload), `"service_tier"`,
+		"D5 regression: empty model on follow-up frame must fall back to capturedSessionModel; whitelist policy filters service_tier=priority for gpt-5.5")
+	require.Equal(t, "response.create", gjson.GetBytes(payload, "type").String())
+}
+
+// TestPolicyEnforcingFrameConn_WithoutCapturedFallbackPolicyMisses pins the
+// inverse: when the wrapper has NO capturedSessionModel fallback (model is
+// empty per-frame and no fallback is wired up), the policy fails to match
+// the model whitelist and the frame leaks through unchanged. This documents
+// exactly the leak the D5 fix prevents.
+func TestPolicyEnforcingFrameConn_WithoutCapturedFallbackPolicyMisses(t *testing.T) {
+	// 同样使用带 whitelist 的策略以观察 leak。
+	svc := newOpenAIGatewayServiceWithSettings(t, gpt55WhitelistFastPolicy())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	followupFrame := []byte(`{"type":"response.create","service_tier":"priority"}`)
+	inner := &fakePassthroughFrameConn{reads: [][]byte{followupFrame}}
+	wrapper := &openAIWSPolicyEnforcingFrameConn{
+		inner: inner,
+		filter: func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+			// NO fallback — emulate the pre-fix behavior.
+			model := openAIWSPassthroughPolicyModelForFrame(account, payload)
+			return svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, model, payload)
+		},
+	}
+
+	_, payload, err := wrapper.ReadFrame(context.Background())
+	require.NoError(t, err)
+	// Pre-fix: empty model misses ["gpt-5.5","gpt-5.5*"] whitelist → fallback=pass → service_tier kept.
+	require.Contains(t, string(payload), `"service_tier"`,
+		"sanity: without capturedSessionModel fallback the leak (D5) reproduces — confirms the fix is load-bearing")
+}
+
+// --- Ingress end-to-end test (filter path) ---
+
+// TestWSResponseCreate_IngressFiltersServiceTierBeforeUpstream wires up the
+// real ProxyResponsesWebSocketFromClient ingress session pipeline against a
+// captureConn upstream and asserts that a client frame with service_tier=fast
+// is normalized + filtered out before being written upstream. This is the
+// integration flavour of TestWSResponseCreate_FilterStripsServiceTier.
+func TestWSResponseCreate_IngressFiltersServiceTierBeforeUpstream(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	cfg := &config.Config{}
+	cfg.Security.URLAllowlist.Enabled = false
+	cfg.Security.URLAllowlist.AllowInsecureHTTP = true
+	cfg.Gateway.OpenAIWS.Enabled = true
+	cfg.Gateway.OpenAIWS.OAuthEnabled = true
+	cfg.Gateway.OpenAIWS.APIKeyEnabled = true
+	cfg.Gateway.OpenAIWS.ResponsesWebsocketsV2 = true
+	cfg.Gateway.OpenAIWS.MaxConnsPerAccount = 1
+	cfg.Gateway.OpenAIWS.MinIdlePerAccount = 0
+	cfg.Gateway.OpenAIWS.MaxIdlePerAccount = 1
+	cfg.Gateway.OpenAIWS.QueueLimitPerConn = 8
+	cfg.Gateway.OpenAIWS.DialTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.ReadTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.WriteTimeoutSeconds = 3
+
+	captureConn := &openAIWSCaptureConn{
+		events: [][]byte{
+			[]byte(`{"type":"response.completed","response":{"id":"resp_ws_filter_1","model":"gpt-5.5","usage":{"input_tokens":1,"output_tokens":1}}}`),
+		},
+	}
+	captureDialer := &openAIWSCaptureDialer{conn: captureConn}
+	pool := newOpenAIWSConnPool(cfg)
+	pool.setClientDialerForTest(captureDialer)
+
+	repo := &openAIFastPolicyRepoStub{values: map[string]string{}}
+	defaultJSON, err := json.Marshal(DefaultOpenAIFastPolicySettings())
+	require.NoError(t, err)
+	repo.values[SettingKeyOpenAIFastPolicySettings] = string(defaultJSON)
+
+	svc := &OpenAIGatewayService{
+		cfg:              cfg,
+		httpUpstream:     &httpUpstreamRecorder{},
+		cache:            &stubGatewayCache{},
+		openaiWSResolver: NewOpenAIWSProtocolResolver(cfg),
+		toolCorrector:    NewCodexToolCorrector(),
+		openaiWSPool:     pool,
+		settingService:   NewSettingService(repo, cfg),
+	}
+
+	account := &Account{
+		ID:          901,
+		Name:        "openai-ws-filter",
+		Platform:    PlatformOpenAI,
+		Type:        AccountTypeAPIKey,
+		Status:      StatusActive,
+		Schedulable: true,
+		Concurrency: 1,
+		Credentials: map[string]any{"api_key": "sk-test"},
+		Extra: map[string]any{
+			"responses_websockets_v2_enabled": true,
+		},
+	}
+
+	serverErrCh := make(chan error, 1)
+	wsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		conn, err := coderws.Accept(w, r, &coderws.AcceptOptions{
+			CompressionMode: coderws.CompressionContextTakeover,
+		})
+		if err != nil {
+			serverErrCh <- err
+			return
+		}
+		defer func() { _ = conn.CloseNow() }()
+
+		rec := httptest.NewRecorder()
+		ginCtx, _ := gin.CreateTestContext(rec)
+		req := r.Clone(r.Context())
+		req.Header = req.Header.Clone()
+		req.Header.Set("User-Agent", "unit-test-agent/1.0")
+		ginCtx.Request = req
+
+		readCtx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
+		_, firstMessage, readErr := conn.Read(readCtx)
+		cancel()
+		if readErr != nil {
+			serverErrCh <- readErr
+			return
+		}
+		serverErrCh <- svc.ProxyResponsesWebSocketFromClient(r.Context(), ginCtx, conn, account, "sk-test", firstMessage, nil)
+	}))
+	defer wsServer.Close()
+
+	dialCtx, cancelDial := context.WithTimeout(context.Background(), 3*time.Second)
+	clientConn, _, err := coderws.Dial(dialCtx, "ws"+strings.TrimPrefix(wsServer.URL, "http"), nil)
+	cancelDial()
+	require.NoError(t, err)
+	defer func() { _ = clientConn.CloseNow() }()
+
+	writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
+	require.NoError(t, clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.5","stream":false,"service_tier":"fast"}`)))
+	cancelWrite()
+
+	readCtx, cancelRead := context.WithTimeout(context.Background(), 3*time.Second)
+	_, event, readErr := clientConn.Read(readCtx)
+	cancelRead()
+	require.NoError(t, readErr)
+	require.Equal(t, "response.completed", gjson.GetBytes(event, "type").String())
+
+	require.NoError(t, clientConn.Close(coderws.StatusNormalClosure, "done"))
+
+	select {
+	case serverErr := <-serverErrCh:
+		require.NoError(t, serverErr)
+	case <-time.After(5 * time.Second):
+		t.Fatal("等待 ingress websocket 结束超时")
+	}
+
+	require.Len(t, captureConn.writes, 1, "上游应只收到一条 response.create")
+	upstream := captureConn.writes[0]
+	_, hasServiceTier := upstream["service_tier"]
+	require.False(t, hasServiceTier, "上游收到的 response.create 不应包含 service_tier 字段（已被 fast policy filter 删除）")
+	require.Equal(t, "response.create", upstream["type"])
+	require.Equal(t, "gpt-5.5", upstream["model"])
+}
+
+// TestWSResponseCreate_IngressBlockSendsErrorEventAndSkipsUpstream is the
+// integration flavour of TestWSResponseCreate_BlockReturnsTypedError. It
+// asserts that with a custom block rule, the client receives a Realtime-style
+// error event AND the upstream FrameConn never receives the offending frame.
+func TestWSResponseCreate_IngressBlockSendsErrorEventAndSkipsUpstream(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	cfg := &config.Config{}
+	cfg.Security.URLAllowlist.Enabled = false
+	cfg.Security.URLAllowlist.AllowInsecureHTTP = true
+	cfg.Gateway.OpenAIWS.Enabled = true
+	cfg.Gateway.OpenAIWS.OAuthEnabled = true
+	cfg.Gateway.OpenAIWS.APIKeyEnabled = true
+	cfg.Gateway.OpenAIWS.ResponsesWebsocketsV2 = true
+	cfg.Gateway.OpenAIWS.MaxConnsPerAccount = 1
+	cfg.Gateway.OpenAIWS.MinIdlePerAccount = 0
+	cfg.Gateway.OpenAIWS.MaxIdlePerAccount = 1
+	cfg.Gateway.OpenAIWS.QueueLimitPerConn = 8
+	cfg.Gateway.OpenAIWS.DialTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.ReadTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.WriteTimeoutSeconds = 3
+
+	captureConn := &openAIWSCaptureConn{
+		// No events queued; the upstream should never get written to anyway.
+	}
+	captureDialer := &openAIWSCaptureDialer{conn: captureConn}
+	pool := newOpenAIWSConnPool(cfg)
+	pool.setClientDialerForTest(captureDialer)
+
+	blockSettings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "ws priority blocked for testing",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	repo := &openAIFastPolicyRepoStub{values: map[string]string{}}
+	raw, err := json.Marshal(blockSettings)
+	require.NoError(t, err)
+	repo.values[SettingKeyOpenAIFastPolicySettings] = string(raw)
+
+	svc := &OpenAIGatewayService{
+		cfg:              cfg,
+		httpUpstream:     &httpUpstreamRecorder{},
+		cache:            &stubGatewayCache{},
+		openaiWSResolver: NewOpenAIWSProtocolResolver(cfg),
+		toolCorrector:    NewCodexToolCorrector(),
+		openaiWSPool:     pool,
+		settingService:   NewSettingService(repo, cfg),
+	}
+
+	account := &Account{
+		ID:          902,
+		Name:        "openai-ws-block",
+		Platform:    PlatformOpenAI,
+		Type:        AccountTypeAPIKey,
+		Status:      StatusActive,
+		Schedulable: true,
+		Concurrency: 1,
+		Credentials: map[string]any{"api_key": "sk-test"},
+		Extra: map[string]any{
+			"responses_websockets_v2_enabled": true,
+		},
+	}
+
+	serverErrCh := make(chan error, 1)
+	wsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		conn, err := coderws.Accept(w, r, &coderws.AcceptOptions{
+			CompressionMode: coderws.CompressionContextTakeover,
+		})
+		if err != nil {
+			serverErrCh <- err
+			return
+		}
+		defer func() { _ = conn.CloseNow() }()
+
+		rec := httptest.NewRecorder()
+		ginCtx, _ := gin.CreateTestContext(rec)
+		req := r.Clone(r.Context())
+		req.Header = req.Header.Clone()
+		req.Header.Set("User-Agent", "unit-test-agent/1.0")
+		ginCtx.Request = req
+
+		readCtx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
+		_, firstMessage, readErr := conn.Read(readCtx)
+		cancel()
+		if readErr != nil {
+			serverErrCh <- readErr
+			return
+		}
+		proxyErr := svc.ProxyResponsesWebSocketFromClient(r.Context(), ginCtx, conn, account, "sk-test", firstMessage, nil)
+		// Mirror the production handler (openai_gateway_handler.go:1325-1328):
+		// when the proxy returns an OpenAIWSClientCloseError, surface its
+		// status code to the client via a graceful close handshake. Without
+		// this the deferred CloseNow() above would tear down the TCP
+		// connection without sending a close frame, and the C3 timing
+		// assertion (next read returns CloseStatus=1008) would see EOF
+		// instead.
+		var closeErr *OpenAIWSClientCloseError
+		if errors.As(proxyErr, &closeErr) {
+			_ = conn.Close(closeErr.StatusCode(), closeErr.Reason())
+		}
+		serverErrCh <- proxyErr
+	}))
+	defer wsServer.Close()
+
+	dialCtx, cancelDial := context.WithTimeout(context.Background(), 3*time.Second)
+	clientConn, _, err := coderws.Dial(dialCtx, "ws"+strings.TrimPrefix(wsServer.URL, "http"), nil)
+	cancelDial()
+	require.NoError(t, err)
+	defer func() { _ = clientConn.CloseNow() }()
+
+	writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
+	require.NoError(t, clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.5","stream":false,"service_tier":"priority"}`)))
+	cancelWrite()
+
+	// C3 timing assertion: the FIRST frame the client reads must be the
+	// error event — not a close frame. coder/websocket@v1.8.14 Conn.Write is
+	// synchronous (writeFrame Flushes the bufio writer at write.go:307-311
+	// before returning) and the close handshake re-acquires the same
+	// writeFrameMu, so this ordering is enforced by the library itself; this
+	// assertion guards against future refactors that might break it.
+	readCtx, cancelRead := context.WithTimeout(context.Background(), 3*time.Second)
+	_, event, readErr := clientConn.Read(readCtx)
+	cancelRead()
+	require.NoError(t, readErr, "first read must succeed and return the error event before any close frame")
+	require.Equal(t, "error", gjson.GetBytes(event, "type").String())
+	require.Equal(t, "invalid_request_error", gjson.GetBytes(event, "error.type").String())
+	// B1 regression: event_id + error.code must be populated.
+	require.Equal(t, "policy_violation", gjson.GetBytes(event, "error.code").String())
+	require.NotEmpty(t, gjson.GetBytes(event, "event_id").String(), "event_id must be present so clients can correlate")
+	require.Contains(t, gjson.GetBytes(event, "error.message").String(), "ws priority blocked for testing")
+
+	// Next read must surface the close frame (as a CloseError). This
+	// asserts the [error event, close] ordering — i.e. the close did NOT
+	// race ahead of the data frame.
+	readCtx2, cancelRead2 := context.WithTimeout(context.Background(), 3*time.Second)
+	_, _, secondReadErr := clientConn.Read(readCtx2)
+	cancelRead2()
+	require.Error(t, secondReadErr, "after the error event the connection must surface a close")
+	require.Equal(t, coderws.StatusPolicyViolation, coderws.CloseStatus(secondReadErr),
+		"close status must be PolicyViolation; got %v", secondReadErr)
+
+	select {
+	case serverErr := <-serverErrCh:
+		// Server returns an OpenAIWSClientCloseError — handler closes the WS;
+		// here we just assert it surfaced as the typed close error.
+		require.Error(t, serverErr)
+		var closeErr *OpenAIWSClientCloseError
+		require.True(t, errors.As(serverErr, &closeErr), "block 应返回 OpenAIWSClientCloseError，得到 %T: %v", serverErr, serverErr)
+		require.Equal(t, coderws.StatusPolicyViolation, closeErr.StatusCode())
+	case <-time.After(5 * time.Second):
+		t.Fatal("等待 ingress 关闭超时")
+	}
+
+	// Critical: the offending frame must NEVER reach the upstream.
+	// captureDialer.DialCount may legitimately be 0 or 1 depending on whether
+	// the lease was acquired before policy fired; either way, no writes.
+	require.Empty(t, captureConn.writes, "block 命中后上游不应收到 response.create")
+}
+
+// --- HTTP-side gap-filling tests (already covered by existing tests but
+// requested to be split out explicitly) ---
+
+// TestApplyOpenAIFastPolicyToBody_BlockShortCircuitsUpstream confirms that
+// applyOpenAIFastPolicyToBody surfaces a *OpenAIFastBlockedError when the rule
+// action is "block", and that the body is left untouched. The caller (chat
+// completions / messages handlers) inspects this typed error and skips the
+// upstream HTTP call entirely — see openai_gateway_chat_completions.go:175 and
+// openai_gateway_messages.go:149.
+func TestApplyOpenAIFastPolicyToBody_BlockShortCircuitsUpstream(t *testing.T) {
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "priority blocked",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	body := []byte(`{"model":"gpt-5.5","service_tier":"priority","input":[]}`)
+	updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+	require.Error(t, err)
+	var blocked *OpenAIFastBlockedError
+	require.True(t, errors.As(err, &blocked), "block must surface as typed error so caller can skip upstream HTTP request")
+	require.Equal(t, "priority blocked", blocked.Message)
+	require.Equal(t, string(body), string(updated), "block must not mutate body")
+}
+
+// TestForwardAsAnthropicMessages_BetaFastModeTriggersOpenAIFastPolicy verifies
+// the Anthropic-compat entrypoint chain: anthropic-beta: fast-mode → BetaFastMode
+// detection → ServiceTier="priority" injection (openai_gateway_messages.go:60)
+// → applyOpenAIFastPolicyToBody filter on default policy → upstream body has
+// no service_tier. We exercise the same internal pipeline (Anthropic→Responses
+// + BetaFastMode + policy) without spinning up a real upstream HTTP server.
+func TestForwardAsAnthropicMessages_BetaFastModeTriggersOpenAIFastPolicy(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Step 1: parse Anthropic request (mirrors openai_gateway_messages.go:38-50).
+	anthropicBody := []byte(`{"model":"gpt-5.5","max_tokens":64,"messages":[{"role":"user","content":"hi"}]}`)
+	var anthropicReq apicompat.AnthropicRequest
+	require.NoError(t, json.Unmarshal(anthropicBody, &anthropicReq))
+	responsesReq, err := apicompat.AnthropicToResponses(&anthropicReq)
+	require.NoError(t, err)
+
+	// Step 2: BetaFastMode header → service_tier="priority" (mirrors line 58-61).
+	headers := http.Header{}
+	headers.Set("anthropic-beta", claude.BetaFastMode)
+	require.True(t, containsBetaToken(headers.Get("anthropic-beta"), claude.BetaFastMode))
+	responsesReq.ServiceTier = "priority"
+	responsesReq.Model = "gpt-5.5"
+
+	// Step 3: marshal & apply fast policy (mirrors line 78 + 149).
+	responsesBody, err := json.Marshal(responsesReq)
+	require.NoError(t, err)
+	require.Equal(t, "priority", gjson.GetBytes(responsesBody, "service_tier").String(), "前置：beta 翻译应当注入 priority")
+
+	upstreamBody, policyErr := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", responsesBody)
+	require.NoError(t, policyErr)
+
+	// Step 4: assert that policy filtered the field before the upstream HTTP request.
+	require.NotContains(t, string(upstreamBody), `"service_tier"`, "default policy 命中 gpt-5.5 priority 应当 filter 掉 service_tier")
+}
+
+// --- Fix1: passthrough capturedSessionModel must follow session.update ---
+
+// TestPolicyEnforcingFrameConn_SessionUpdateRotatesCapturedModel covers the
+// fix1 bypass: client opens with a whitelist-miss model (gpt-4o → pass under
+// gpt-5.5 whitelist), rotates to gpt-5.5 via session.update, then sends
+// response.create without "model". Without the session.update sniffing the
+// follow-up frame would fall back to the stale gpt-4o capture and pass — the
+// fix updates capturedSessionModel from session.* events so the fallback now
+// resolves to gpt-5.5 and the policy filters service_tier.
+func TestPolicyEnforcingFrameConn_SessionUpdateRotatesCapturedModel(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, gpt55WhitelistFastPolicy())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Frame 1: response.create with whitelist-miss model — under default
+	// rule fallback=pass, service_tier stays.
+	first := []byte(`{"type":"response.create","model":"gpt-4o","service_tier":"priority"}`)
+	// Frame 2: session.update rotates the session model to gpt-5.5.
+	rotate := []byte(`{"type":"session.update","session":{"model":"gpt-5.5"}}`)
+	// Frame 3: response.create WITHOUT model — must inherit gpt-5.5.
+	followup := []byte(`{"type":"response.create","service_tier":"priority"}`)
+
+	inner := &fakePassthroughFrameConn{reads: [][]byte{first, rotate, followup}}
+
+	// Replicate the production wiring in openai_ws_v2_passthrough_adapter.go
+	// so capturedSessionModel state is shared across frames.
+	capturedSessionModel := openAIWSPassthroughPolicyModelForFrame(account, first)
+	require.Equal(t, "gpt-4o", capturedSessionModel)
+	wrapper := &openAIWSPolicyEnforcingFrameConn{
+		inner: inner,
+		filter: func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+			if msgType != coderws.MessageText {
+				return payload, nil, nil
+			}
+			if updated := openAIWSPassthroughPolicyModelFromSessionFrame(account, payload); updated != "" {
+				capturedSessionModel = updated
+			}
+			model := openAIWSPassthroughPolicyModelForFrame(account, payload)
+			if model == "" {
+				model = capturedSessionModel
+			}
+			return svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, model, payload)
+		},
+	}
+
+	// Frame 1: gpt-4o miss whitelist → pass (service_tier preserved).
+	_, payload1, err := wrapper.ReadFrame(context.Background())
+	require.NoError(t, err)
+	require.Contains(t, string(payload1), `"service_tier"`, "frame1: gpt-4o miss whitelist → pass keeps service_tier")
+
+	// Frame 2: session.update — not response.create, untouched, but its
+	// side effect updates capturedSessionModel to gpt-5.5.
+	_, payload2, err := wrapper.ReadFrame(context.Background())
+	require.NoError(t, err)
+	require.Equal(t, string(rotate), string(payload2), "session.update frame is forwarded verbatim")
+	require.Equal(t, "gpt-5.5", capturedSessionModel, "fix1: session.update must rotate capturedSessionModel")
+
+	// Frame 3: empty model + new captured gpt-5.5 → matches whitelist → filter.
+	_, payload3, err := wrapper.ReadFrame(context.Background())
+	require.NoError(t, err)
+	require.NotContains(t, string(payload3), `"service_tier"`,
+		"fix1: post-rotate response.create without model must use refreshed capturedSessionModel and trigger filter")
+}
+
+// TestPolicyModelFromSessionFrame_OnlySessionUpdate covers the negative
+// branches of openAIWSPassthroughPolicyModelFromSessionFrame: only
+// client→upstream session.update frames rotate the captured model;
+// server→client events (session.created) and unrelated frames must not.
+func TestPolicyModelFromSessionFrame_OnlySessionUpdate(t *testing.T) {
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// session.created is a server→client event in the OpenAI Realtime
+	// protocol — clients never send it, so this filter (which only runs on
+	// the client→upstream direction) must ignore it even if it appears.
+	created := []byte(`{"type":"session.created","session":{"model":"gpt-5.5"}}`)
+	require.Empty(t, openAIWSPassthroughPolicyModelFromSessionFrame(account, created))
+
+	// Non-session.* frames must NOT trigger rotation.
+	notSession := []byte(`{"type":"response.create","session":{"model":"gpt-9"}}`)
+	require.Empty(t, openAIWSPassthroughPolicyModelFromSessionFrame(account, notSession))
+
+	// Missing session.model returns empty — caller keeps the old captured value.
+	noModel := []byte(`{"type":"session.update","session":{"voice":"alloy"}}`)
+	require.Empty(t, openAIWSPassthroughPolicyModelFromSessionFrame(account, noModel))
+}
+
+// --- Fix2: native /responses normalize "fast" → "priority" on pass ---
+
+// TestApplyOpenAIFastPolicyToBody_PassNormalizesFastAlias is the fix2
+// regression. Before the fix, when action=pass, applyOpenAIFastPolicyToBody
+// returned the body unchanged so a raw "fast" alias would leak to the
+// upstream OpenAI API (which does not accept "fast"). The fix normalizes
+// "fast" → "priority" on pass too.
+func TestApplyOpenAIFastPolicyToBody_PassNormalizesFastAlias(t *testing.T) {
+	// Use a policy that deliberately misses gpt-4 so the action is pass.
+	settings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionFilter,
+			Scope:          BetaPolicyScopeAll,
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, settings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// gpt-4 + "fast" → fallback pass. Body must be rewritten to "priority".
+	body := []byte(`{"model":"gpt-4","service_tier":"fast"}`)
+	updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-4", body)
+	require.NoError(t, err)
+	require.Equal(t, "priority", gjson.GetBytes(updated, "service_tier").String(),
+		"fix2: pass action must still normalize 'fast' → 'priority' so upstream OpenAI accepts the slug")
+
+	// Already-canonical "priority" on pass: zero mutation (byte-equal).
+	body = []byte(`{"model":"gpt-4","service_tier":"priority"}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-4", body)
+	require.NoError(t, err)
+	require.Equal(t, string(body), string(updated))
+
+	// Mixed-case alias → normalized.
+	body = []byte(`{"model":"gpt-4","service_tier":"  Fast  "}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-4", body)
+	require.NoError(t, err)
+	require.Equal(t, "priority", gjson.GetBytes(updated, "service_tier").String())
+
+	// Unrecognized tier → still no-op (not normalized, since normTier == "").
+	body = []byte(`{"model":"gpt-4","service_tier":"turbo"}`)
+	updated, err = svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-4", body)
+	require.NoError(t, err)
+	require.Equal(t, string(body), string(updated))
+}
+
+// --- Fix3: passthrough billing must reflect post-filter service_tier ---
+
+// TestPassthroughBilling_PostFilterServiceTier is the fix3 regression. The
+// passthrough adapter (openai_ws_v2_passthrough_adapter.go) now extracts
+// requestServiceTier from firstClientMessage AFTER applyOpenAIFastPolicy
+// has rewritten it, so a filter hit causes billing to report nil (default
+// tier) instead of the user-requested "priority". This test pins the
+// contract those two helpers must uphold for the adapter's billing path.
+func TestPassthroughBilling_PostFilterServiceTier(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	raw := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority"}`)
+
+	// Pre-filter sanity: extracting from the raw frame would (incorrectly,
+	// pre-fix) report "priority" — this is the very thing the adapter
+	// must NOT do anymore.
+	pre := extractOpenAIServiceTierFromBody(raw)
+	require.NotNil(t, pre)
+	require.Equal(t, "priority", *pre,
+		"sanity: raw first frame carries priority that pre-fix billing would have reported")
+
+	// Apply policy filter (default rule: gpt-5.5 + priority → filter).
+	filtered, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", raw)
+	require.NoError(t, err)
+	require.Nil(t, blocked)
+	require.NotContains(t, string(filtered), `"service_tier"`)
+
+	// Post-filter: extracting from the rewritten frame returns nil. This
+	// is the value the adapter now passes to OpenAIForwardResult.ServiceTier,
+	// so billing records "default" instead of "priority".
+	post := extractOpenAIServiceTierFromBody(filtered)
+	require.Nil(t, post, "fix3: post-filter extraction must return nil so passthrough billing reports default tier instead of the requested priority")
+
+	// And the byte-level invariant the adapter relies on: filtering an
+	// already-filtered frame is a no-op (idempotent), so re-running the
+	// policy doesn't accidentally re-introduce the field.
+	again, blocked2, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", filtered)
+	require.NoError(t, err)
+	require.Nil(t, blocked2)
+	require.Equal(t, string(filtered), string(again),
+		"policy is idempotent: filtering an already-filtered frame leaves bytes unchanged")
+}
+
+// TestApplyOpenAIFastPolicyToBody_NonStringServiceTier covers the test gap
+// flagged in the review: when a client sends service_tier as a non-string
+// (number, null, object, etc.) the policy must NOT panic and must NOT
+// pretend the field was filtered. Behavior: skip policy entirely (treat as
+// "no usable tier"), forward body unchanged. This mirrors the HTTP entry's
+// type-assertion `reqBody["service_tier"].(string); ok` guard.
+func TestApplyOpenAIFastPolicyToBody_NonStringServiceTier(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Number — gjson .String() coerces to "1" which is not a recognized
+	// tier alias; normalize returns "" → policy no-ops.
+	cases := [][]byte{
+		[]byte(`{"model":"gpt-5.5","service_tier":1}`),
+		[]byte(`{"model":"gpt-5.5","service_tier":null}`),
+		[]byte(`{"model":"gpt-5.5","service_tier":{"nested":"priority"}}`),
+		[]byte(`{"model":"gpt-5.5","service_tier":["priority"]}`),
+		[]byte(`{"model":"gpt-5.5","service_tier":true}`),
+	}
+	for _, body := range cases {
+		updated, err := svc.applyOpenAIFastPolicyToBody(context.Background(), account, "gpt-5.5", body)
+		require.NoError(t, err, "non-string service_tier must not error: %s", string(body))
+		require.Equal(t, string(body), string(updated),
+			"non-string service_tier must pass through unchanged: %s", string(body))
+	}
+
+	// Same guard for the WS response.create entry.
+	for _, body := range cases {
+		frame := body
+		updated, blocked, err := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", frame)
+		require.NoError(t, err, "non-string service_tier ws frame must not error: %s", string(frame))
+		require.Nil(t, blocked, "non-string service_tier must not trigger block: %s", string(frame))
+		require.Equal(t, string(frame), string(updated),
+			"non-string service_tier ws frame must pass through unchanged: %s", string(frame))
+	}
+}
+
+// TestPassthroughBilling_MultiTurnServiceTierFollowsFilteredFrames covers the
+// multi-turn passthrough billing regression: OpenAI Realtime / Responses WS
+// allows the client to ship a different service_tier on each response.create
+// frame (per-response field, see codex-rs/core/src/client.rs
+// build_responses_request which re-fills the field on every request). Before
+// the fix the adapter only captured service_tier from firstClientMessage so
+// turn 2/3 billing was wrong. After the fix the filter closure refreshes an
+// atomic.Pointer[string] on every successful response.create frame.
+//
+// This test pins the four legs of the semantic contract:
+//   - turn 1: service_tier=priority hits the default whitelist filter, so
+//     after filter the upstream sees no tier → billing is nil.
+//   - turn 2: service_tier=flex passes (default rule targets priority only),
+//     billing should now reflect "flex".
+//   - turn 3: response.create without any service_tier — the upstream will
+//     treat it as default; we choose to mirror that and overwrite billing
+//     to nil rather than carry over "flex" from turn 2.
+//   - non-response.create frame (response.cancel here) carrying a stray
+//     service_tier-shaped field must NOT clobber the billing pointer.
+func TestPassthroughBilling_MultiTurnServiceTierFollowsFilteredFrames(t *testing.T) {
+	svc := newOpenAIGatewayServiceWithSettings(t, DefaultOpenAIFastPolicySettings())
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	// Mirror the production filter closure (openai_ws_v2_passthrough_adapter.go
+	// proxyResponsesWebSocketV2Passthrough) so this test fails if the
+	// production code drops the per-frame Store.
+	var requestServiceTierPtr atomic.Pointer[string]
+	capturedSessionModel := ""
+	filter := func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+		if msgType != coderws.MessageText {
+			return payload, nil, nil
+		}
+		if updated := openAIWSPassthroughPolicyModelFromSessionFrame(account, payload); updated != "" {
+			capturedSessionModel = updated
+		}
+		model := openAIWSPassthroughPolicyModelForFrame(account, payload)
+		if model == "" {
+			model = capturedSessionModel
+		}
+		out, blocked, policyErr := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, model, payload)
+		if policyErr == nil && blocked == nil &&
+			strings.TrimSpace(gjson.GetBytes(payload, "type").String()) == "response.create" {
+			requestServiceTierPtr.Store(extractOpenAIServiceTierFromBody(out))
+		}
+		return out, blocked, policyErr
+	}
+
+	// First-frame initialization mirrors the adapter: extract from the
+	// post-filter payload so a filter-on-first-frame zeroes billing too.
+	firstFrame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority"}`)
+	firstOut, firstBlocked, firstErr := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", firstFrame)
+	require.NoError(t, firstErr)
+	require.Nil(t, firstBlocked)
+	requestServiceTierPtr.Store(extractOpenAIServiceTierFromBody(firstOut))
+	capturedSessionModel = openAIWSPassthroughPolicyModelForFrame(account, firstFrame)
+	require.Nil(t, requestServiceTierPtr.Load(),
+		"turn 1: filter strips service_tier=priority, billing must reflect upstream-actual nil tier")
+
+	// Turn 2: client switches to flex, should pass and update billing.
+	turn2 := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"flex"}`)
+	out2, blocked2, err2 := filter(coderws.MessageText, turn2)
+	require.NoError(t, err2)
+	require.Nil(t, blocked2)
+	require.Equal(t, "flex", gjson.GetBytes(out2, "service_tier").String(), "turn 2: flex must pass to upstream untouched")
+	tier2 := requestServiceTierPtr.Load()
+	require.NotNil(t, tier2, "turn 2: billing must update to reflect flex")
+	require.Equal(t, "flex", *tier2)
+
+	// A non-response.create frame with a stray service_tier-shaped field
+	// must NOT overwrite the billing pointer (those frames don't carry
+	// per-response service_tier in the Realtime spec).
+	cancelFrame := []byte(`{"type":"response.cancel","service_tier":"priority"}`)
+	_, blockedCancel, errCancel := filter(coderws.MessageText, cancelFrame)
+	require.NoError(t, errCancel)
+	require.Nil(t, blockedCancel)
+	tierAfterCancel := requestServiceTierPtr.Load()
+	require.NotNil(t, tierAfterCancel, "response.cancel must not clobber billing tier to nil")
+	require.Equal(t, "flex", *tierAfterCancel,
+		"non-response.create frames must not update billing tier even if they carry a service_tier-shaped field")
+
+	// Turn 3: response.create without any service_tier. We deliberately
+	// overwrite billing back to nil so it tracks what the upstream actually
+	// sees on this turn (default tier).
+	turn3 := []byte(`{"type":"response.create","model":"gpt-5.5"}`)
+	out3, blocked3, err3 := filter(coderws.MessageText, turn3)
+	require.NoError(t, err3)
+	require.Nil(t, blocked3)
+	require.Equal(t, string(turn3), string(out3), "turn 3 has no service_tier — filter must not mutate")
+	require.Nil(t, requestServiceTierPtr.Load(),
+		"turn 3: response.create without service_tier overwrites billing to nil to match upstream default")
+}
+
+// TestPassthroughBilling_BlockedFrameDoesNotMutateServiceTier locks in the
+// "block keeps previous" semantic: when policy returns block on a
+// response.create frame, that frame is never sent upstream, so billing tier
+// must keep the previous turn's value rather than getting silently zeroed.
+func TestPassthroughBilling_BlockedFrameDoesNotMutateServiceTier(t *testing.T) {
+	blockSettings := &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{{
+			ServiceTier:    OpenAIFastTierPriority,
+			Action:         BetaPolicyActionBlock,
+			Scope:          BetaPolicyScopeAll,
+			ErrorMessage:   "blocked",
+			ModelWhitelist: []string{"gpt-5.5"},
+			FallbackAction: BetaPolicyActionPass,
+		}},
+	}
+	svc := newOpenAIGatewayServiceWithSettings(t, blockSettings)
+	account := &Account{Platform: PlatformOpenAI, Type: AccountTypeAPIKey}
+
+	var requestServiceTierPtr atomic.Pointer[string]
+	flexValue := "flex"
+	requestServiceTierPtr.Store(&flexValue) // simulate prior turn billed as flex
+
+	filter := func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+		if msgType != coderws.MessageText {
+			return payload, nil, nil
+		}
+		out, blocked, policyErr := svc.applyOpenAIFastPolicyToWSResponseCreate(context.Background(), account, "gpt-5.5", payload)
+		if policyErr == nil && blocked == nil &&
+			strings.TrimSpace(gjson.GetBytes(payload, "type").String()) == "response.create" {
+			requestServiceTierPtr.Store(extractOpenAIServiceTierFromBody(out))
+		}
+		return out, blocked, policyErr
+	}
+
+	frame := []byte(`{"type":"response.create","model":"gpt-5.5","service_tier":"priority"}`)
+	_, blocked, err := filter(coderws.MessageText, frame)
+	require.NoError(t, err)
+	require.NotNil(t, blocked, "policy must block this frame")
+
+	tier := requestServiceTierPtr.Load()
+	require.NotNil(t, tier, "blocked frame must not clobber prior billing tier to nil")
+	require.Equal(t, "flex", *tier,
+		"blocked frame is never sent upstream; billing must retain the previous turn's tier")
+}
diff --git a/backend/internal/service/openai_gateway_chat_completions.go b/backend/internal/service/openai_gateway_chat_completions.go
index 663066a3..5822ae4c 100644
--- a/backend/internal/service/openai_gateway_chat_completions.go
+++ b/backend/internal/service/openai_gateway_chat_completions.go
@@ -171,6 +171,17 @@ func (s *OpenAIGatewayService) ForwardAsChatCompletions(
 		}
 	}
 
+	// 4b. Apply OpenAI fast policy (may filter service_tier or block the request).
+	updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, responsesBody)
+	if policyErr != nil {
+		var blocked *OpenAIFastBlockedError
+		if errors.As(policyErr, &blocked) {
+			writeChatCompletionsError(c, http.StatusForbidden, "permission_error", blocked.Message)
+		}
+		return nil, policyErr
+	}
+	responsesBody = updatedBody
+
 	// 5. Get access token
 	token, _, err := s.GetAccessToken(ctx, account)
 	if err != nil {
diff --git a/backend/internal/service/openai_gateway_chat_completions_test.go b/backend/internal/service/openai_gateway_chat_completions_test.go
index a00fb71c..6846e03a 100644
--- a/backend/internal/service/openai_gateway_chat_completions_test.go
+++ b/backend/internal/service/openai_gateway_chat_completions_test.go
@@ -19,8 +19,22 @@ func TestNormalizeResponsesRequestServiceTier(t *testing.T) {
 	normalizeResponsesRequestServiceTier(req)
 	require.Equal(t, "flex", req.ServiceTier)
 
+	// OpenAI 官方合法 tier 应被透传保留。
+	req.ServiceTier = "auto"
+	normalizeResponsesRequestServiceTier(req)
+	require.Equal(t, "auto", req.ServiceTier)
+
 	req.ServiceTier = "default"
 	normalizeResponsesRequestServiceTier(req)
+	require.Equal(t, "default", req.ServiceTier)
+
+	req.ServiceTier = "scale"
+	normalizeResponsesRequestServiceTier(req)
+	require.Equal(t, "scale", req.ServiceTier)
+
+	// 真未知值仍被剥离。
+	req.ServiceTier = "turbo"
+	normalizeResponsesRequestServiceTier(req)
 	require.Empty(t, req.ServiceTier)
 }
 
@@ -37,8 +51,25 @@ func TestNormalizeResponsesBodyServiceTier(t *testing.T) {
 	require.Equal(t, "flex", tier)
 	require.Equal(t, "flex", gjson.GetBytes(body, "service_tier").String())
 
+	// OpenAI 官方 tier 直接保留在 body 中（透传上游）。
+	body, tier, err = normalizeResponsesBodyServiceTier([]byte(`{"model":"gpt-5.1","service_tier":"auto"}`))
+	require.NoError(t, err)
+	require.Equal(t, "auto", tier)
+	require.Equal(t, "auto", gjson.GetBytes(body, "service_tier").String())
+
 	body, tier, err = normalizeResponsesBodyServiceTier([]byte(`{"model":"gpt-5.1","service_tier":"default"}`))
 	require.NoError(t, err)
+	require.Equal(t, "default", tier)
+	require.Equal(t, "default", gjson.GetBytes(body, "service_tier").String())
+
+	body, tier, err = normalizeResponsesBodyServiceTier([]byte(`{"model":"gpt-5.1","service_tier":"scale"}`))
+	require.NoError(t, err)
+	require.Equal(t, "scale", tier)
+	require.Equal(t, "scale", gjson.GetBytes(body, "service_tier").String())
+
+	// 真未知值才会被删除。
+	body, tier, err = normalizeResponsesBodyServiceTier([]byte(`{"model":"gpt-5.1","service_tier":"turbo"}`))
+	require.NoError(t, err)
 	require.Empty(t, tier)
 	require.False(t, gjson.GetBytes(body, "service_tier").Exists())
 }
diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go
index 2a0a72eb..4e0ebb2e 100644
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -143,6 +143,19 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 		}
 	}
 
+	// 4c. Apply OpenAI fast policy (may filter service_tier or block the request).
+	// Mirrors the Claude anthropic-beta "fast-mode-2026-02-01" filter, but keyed
+	// on the body-level service_tier field (priority/flex).
+	updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, responsesBody)
+	if policyErr != nil {
+		var blocked *OpenAIFastBlockedError
+		if errors.As(policyErr, &blocked) {
+			writeAnthropicError(c, http.StatusForbidden, "forbidden_error", blocked.Message)
+		}
+		return nil, policyErr
+	}
+	responsesBody = updatedBody
+
 	// 5. Get access token
 	token, _, err := s.GetAccessToken(ctx, account)
 	if err != nil {
diff --git a/backend/internal/service/openai_gateway_record_usage_test.go b/backend/internal/service/openai_gateway_record_usage_test.go
index 9665c4c8..47ff4e3b 100644
--- a/backend/internal/service/openai_gateway_record_usage_test.go
+++ b/backend/internal/service/openai_gateway_record_usage_test.go
@@ -148,6 +148,7 @@ func newOpenAIRecordUsageServiceForTest(usageRepo UsageLogRepository, userRepo U
 		nil,
 		nil,
 		nil,
+		nil,
 	)
 	svc.userGroupRateResolver = newUserGroupRateResolver(
 		rateRepo,
@@ -826,18 +827,29 @@ func TestNormalizeOpenAIServiceTier(t *testing.T) {
 		require.Equal(t, "priority", *got)
 	})
 
-	t.Run("default ignored", func(t *testing.T) {
-		require.Nil(t, normalizeOpenAIServiceTier("default"))
+	t.Run("openai official tiers preserved", func(t *testing.T) {
+		// OpenAI 官方文档定义的合法 tier 值都应被透传保留，避免因白名单过窄
+		// 静默剥离客户端显式发送的合法字段。Codex 客户端只发 priority/flex，
+		// 所以扩大白名单对 Codex 流量零影响（见 codex-rs/core/src/client.rs）。
+		for _, tier := range []string{"priority", "flex", "auto", "default", "scale"} {
+			got := normalizeOpenAIServiceTier(tier)
+			require.NotNil(t, got, "tier %q should not be normalized to nil", tier)
+			require.Equal(t, tier, *got)
+		}
 	})
 
 	t.Run("invalid ignored", func(t *testing.T) {
 		require.Nil(t, normalizeOpenAIServiceTier("turbo"))
+		require.Nil(t, normalizeOpenAIServiceTier("xxx"))
 	})
 }
 
 func TestExtractOpenAIServiceTier(t *testing.T) {
 	require.Equal(t, "priority", *extractOpenAIServiceTier(map[string]any{"service_tier": "fast"}))
 	require.Equal(t, "flex", *extractOpenAIServiceTier(map[string]any{"service_tier": "flex"}))
+	require.Equal(t, "auto", *extractOpenAIServiceTier(map[string]any{"service_tier": "auto"}))
+	require.Equal(t, "default", *extractOpenAIServiceTier(map[string]any{"service_tier": "default"}))
+	require.Equal(t, "scale", *extractOpenAIServiceTier(map[string]any{"service_tier": "scale"}))
 	require.Nil(t, extractOpenAIServiceTier(map[string]any{"service_tier": 1}))
 	require.Nil(t, extractOpenAIServiceTier(nil))
 }
@@ -845,7 +857,10 @@ func TestExtractOpenAIServiceTier(t *testing.T) {
 func TestExtractOpenAIServiceTierFromBody(t *testing.T) {
 	require.Equal(t, "priority", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"fast"}`)))
 	require.Equal(t, "flex", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"flex"}`)))
-	require.Nil(t, extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"default"}`)))
+	require.Equal(t, "auto", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"auto"}`)))
+	require.Equal(t, "default", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"default"}`)))
+	require.Equal(t, "scale", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"scale"}`)))
+	require.Nil(t, extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"turbo"}`)))
 	require.Nil(t, extractOpenAIServiceTierFromBody(nil))
 }
 
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 13e3ddab..a7407476 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -334,6 +334,7 @@ type OpenAIGatewayService struct {
 	resolver              *ModelPricingResolver
 	channelService        *ChannelService
 	balanceNotifyService  *BalanceNotifyService
+	settingService        *SettingService
 
 	openaiWSPoolOnce              sync.Once
 	openaiWSStateStoreOnce        sync.Once
@@ -372,6 +373,7 @@ func NewOpenAIGatewayService(
 	resolver *ModelPricingResolver,
 	channelService *ChannelService,
 	balanceNotifyService *BalanceNotifyService,
+	settingService *SettingService,
 ) *OpenAIGatewayService {
 	svc := &OpenAIGatewayService{
 		accountRepo:         accountRepo,
@@ -402,6 +404,7 @@ func NewOpenAIGatewayService(
 		resolver:              resolver,
 		channelService:        channelService,
 		balanceNotifyService:  balanceNotifyService,
+		settingService:        settingService,
 		responseHeaderFilter:  compileResponseHeaderFilter(cfg),
 		codexSnapshotThrottle: newAccountWriteThrottle(openAICodexSnapshotPersistMinInterval),
 	}
@@ -2310,6 +2313,48 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 		disablePatch()
 	}
 
+	// Apply OpenAI fast policy (参照 Claude BetaPolicy 的 fast-mode 过滤)：
+	// 针对 body 的 service_tier 字段（"priority" 即 fast，"flex"），按策略
+	// 执行 filter（删除字段）或 block（拒绝请求）。对 gpt-5.5 等模型屏蔽
+	// fast 时在此生效。
+	//
+	// 注意：
+	//   1. 此处统一使用 upstreamModel（已经过 GetMappedModel +
+	//      normalizeOpenAIModelForUpstream + Codex OAuth normalize），与
+	//      chat-completions / messages 入口保持一致，避免不同入口因为模型
+	//      维度不同而出现 whitelist 命中差异。
+	//   2. action=pass 时也要把 raw "fast" 归一化为 "priority" 写回 body，
+	//      否则 native /responses 入口透传 "fast" 给上游会被拒。chat-
+	//      completions 入口由 normalizeResponsesBodyServiceTier 完成同一
+	//      行为，这里手工实现等效逻辑。
+	if rawTier, ok := reqBody["service_tier"].(string); ok {
+		if normTier := normalizedOpenAIServiceTierValue(rawTier); normTier != "" {
+			action, errMsg := s.evaluateOpenAIFastPolicy(ctx, account, upstreamModel, normTier)
+			switch action {
+			case BetaPolicyActionBlock:
+				msg := errMsg
+				if msg == "" {
+					msg = fmt.Sprintf("openai service_tier=%s is not allowed for model %s", normTier, upstreamModel)
+				}
+				blocked := &OpenAIFastBlockedError{Message: msg}
+				writeOpenAIFastPolicyBlockedResponse(c, blocked)
+				return nil, blocked
+			case BetaPolicyActionFilter:
+				delete(reqBody, "service_tier")
+				bodyModified = true
+				disablePatch()
+			default:
+				// pass：若客户端传的是别名 "fast"，归一化为 "priority"
+				// 后写回 body，确保上游收到的是其能识别的规范值。
+				if normTier != rawTier {
+					reqBody["service_tier"] = normTier
+					bodyModified = true
+					markPatchSet("service_tier", normTier)
+				}
+			}
+		}
+	}
+
 	// Re-serialize body only if modified
 	if bodyModified {
 		serializedByPatch := false
@@ -2758,6 +2803,26 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
 		body = sanitizedBody
 	}
 
+	// Apply OpenAI fast policy to the passthrough body (filter/block by service_tier).
+	// 统一使用 upstream 视角的 model：透传路径下 body 已经过 compact 映射 +
+	// OAuth normalize，body 中的 model 字段即上游真正会看到的 slug。
+	// 这样可以与 chat-completions / messages / native /responses 入口的
+	// upstreamModel 保持一致，避免 whitelist 命中差异。当 body 中没有
+	// model 字段时退回 reqModel。
+	policyModel := strings.TrimSpace(gjson.GetBytes(body, "model").String())
+	if policyModel == "" {
+		policyModel = reqModel
+	}
+	updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, policyModel, body)
+	if policyErr != nil {
+		var blocked *OpenAIFastBlockedError
+		if errors.As(policyErr, &blocked) {
+			writeOpenAIFastPolicyBlockedResponse(c, blocked)
+		}
+		return nil, policyErr
+	}
+	body = updatedBody
+
 	logger.LegacyPrintf("service.openai_gateway",
 		"[OpenAI 自动透传] 命中自动透传分支: account=%d name=%s type=%s model=%s stream=%v",
 		account.ID,
@@ -5590,14 +5655,319 @@ func normalizeOpenAIServiceTier(raw string) *string {
 	if value == "fast" {
 		value = "priority"
 	}
+	// 放过 OpenAI 官方文档定义的所有合法 tier 值：priority/flex/auto/default/scale。
+	// 对 Codex 客户端零影响（Codex 只发 priority 或 flex，见 codex-rs/core/src/client.rs），
+	// 但能让直连 OpenAI SDK 的用户透传 auto/default/scale 以便抓包/调试。
+	// 真未知值仍返回 nil，由 normalizeResponsesBodyServiceTier 从 body 中删除。
 	switch value {
-	case "priority", "flex":
+	case "priority", "flex", "auto", "default", "scale":
 		return &value
 	default:
 		return nil
 	}
 }
 
+// OpenAIFastBlockedError indicates a request was rejected by the OpenAI fast
+// policy (action=block). Mirrors BetaBlockedError on the Claude side.
+type OpenAIFastBlockedError struct {
+	Message string
+}
+
+func (e *OpenAIFastBlockedError) Error() string { return e.Message }
+
+// evaluateOpenAIFastPolicy returns the action and error message that should be
+// applied for a request with the given account/model/service_tier. When the
+// policy service is unavailable or no rule matches, it returns
+// (BetaPolicyActionPass, "") so callers can short-circuit safely.
+//
+// Matching rules:
+//   - Scope filters by account type (all / oauth / apikey / bedrock)
+//   - ServiceTier must be empty (= any), "all", or equal the normalized tier
+//   - ModelWhitelist narrows the rule to specific models; FallbackAction
+//     handles the non-matching case (default: pass)
+//
+// 与 Claude BetaPolicy 的差异（保留首条匹配 short-circuit）：
+//   - BetaPolicy 处理的是 anthropic-beta header 中的 token 集合，不同
+//     规则可能针对不同 token，filter 需要累加成 set；block 则 first-match。
+//   - OpenAI fast policy 操作的是单个字段 service_tier：filter 即删字段，
+//     没有可累加的对象。一次请求只携带一个 service_tier，规则的 tier
+//     维度天然互斥；同一 (scope, tier) 下若多条规则的 model whitelist
+//     发生重叠，admin 可通过规则顺序明确意图。因此采用 first-match 而
+//     非 BetaPolicy 那样的"block 覆盖 filter 覆盖 pass"语义。
+func (s *OpenAIGatewayService) evaluateOpenAIFastPolicy(ctx context.Context, account *Account, model, serviceTier string) (action, errMsg string) {
+	if s == nil || s.settingService == nil {
+		return BetaPolicyActionPass, ""
+	}
+	tier := strings.ToLower(strings.TrimSpace(serviceTier))
+	if tier == "" {
+		return BetaPolicyActionPass, ""
+	}
+	settings := openAIFastPolicySettingsFromContext(ctx)
+	if settings == nil {
+		fetched, err := s.settingService.GetOpenAIFastPolicySettings(ctx)
+		if err != nil || fetched == nil {
+			return BetaPolicyActionPass, ""
+		}
+		settings = fetched
+	}
+	return evaluateOpenAIFastPolicyWithSettings(settings, account, model, tier)
+}
+
+// evaluateOpenAIFastPolicyWithSettings is the pure-function core extracted so
+// long-lived sessions (e.g. WS) can prefetch settings once and avoid hitting
+// the settingService on every frame. See WSSession entry and
+// openAIFastPolicySettingsFromContext for the caching glue.
+func evaluateOpenAIFastPolicyWithSettings(settings *OpenAIFastPolicySettings, account *Account, model, tier string) (action, errMsg string) {
+	if settings == nil {
+		return BetaPolicyActionPass, ""
+	}
+	isOAuth := account != nil && account.IsOAuth()
+	isBedrock := account != nil && account.IsBedrock()
+	for _, rule := range settings.Rules {
+		if !betaPolicyScopeMatches(rule.Scope, isOAuth, isBedrock) {
+			continue
+		}
+		ruleTier := strings.ToLower(strings.TrimSpace(rule.ServiceTier))
+		if ruleTier != "" && ruleTier != OpenAIFastTierAny && ruleTier != tier {
+			continue
+		}
+		eff := BetaPolicyRule{
+			Action:               rule.Action,
+			ErrorMessage:         rule.ErrorMessage,
+			ModelWhitelist:       rule.ModelWhitelist,
+			FallbackAction:       rule.FallbackAction,
+			FallbackErrorMessage: rule.FallbackErrorMessage,
+		}
+		return resolveRuleAction(eff, model)
+	}
+	return BetaPolicyActionPass, ""
+}
+
+// openAIFastPolicyCtxKey 是 context 中预取的 OpenAIFastPolicySettings 缓存
+// 键，仅用于 WebSocket 长会话内多帧复用同一份策略快照，避免每帧 DB 命中。
+//
+// Trade-off：策略变更不会影响当前 WS session（只影响新 session）。这是
+// 有意为之 —— 对长会话来说，"策略一致性"比"立刻生效"更重要，且 Claude
+// BetaPolicy 的 gin.Context 缓存也是同样取舍。需要 hot-reload 时管理员
+// 可以通过踢断 session 强制刷新。
+type openAIFastPolicyCtxKeyType struct{}
+
+var openAIFastPolicyCtxKey = openAIFastPolicyCtxKeyType{}
+
+// withOpenAIFastPolicyContext 将一份 settings 快照绑定到 context，供该 ctx
+// 衍生 goroutine 中的 evaluateOpenAIFastPolicy 复用。
+func withOpenAIFastPolicyContext(ctx context.Context, settings *OpenAIFastPolicySettings) context.Context {
+	if ctx == nil || settings == nil {
+		return ctx
+	}
+	return context.WithValue(ctx, openAIFastPolicyCtxKey, settings)
+}
+
+func openAIFastPolicySettingsFromContext(ctx context.Context) *OpenAIFastPolicySettings {
+	if ctx == nil {
+		return nil
+	}
+	if v, ok := ctx.Value(openAIFastPolicyCtxKey).(*OpenAIFastPolicySettings); ok {
+		return v
+	}
+	return nil
+}
+
+// applyOpenAIFastPolicyToBody applies the OpenAI fast policy to a raw request
+// body. When action=filter it removes the service_tier field; when
+// action=block it returns (body, *OpenAIFastBlockedError). On pass it
+// normalizes the service_tier value (e.g. client alias "fast" → "priority"),
+// rewriting the body so the upstream receives a slug it recognizes.
+//
+// Rationale for normalize-on-pass: chat-completions / messages 入口在调用本
+// 函数之前已经通过 normalizeResponsesBodyServiceTier 把 service_tier 归一化
+// 到了上游可识别值；passthrough（OpenAI 自动透传） / native /responses 等
+// 入口没有这一前置步骤，pass 路径下若不在此处归一化，"fast" 就会被原样
+// 透传到 OpenAI 上游导致 400/拒绝。把归一化收敛到本函数，所有入口行为一致。
+func (s *OpenAIGatewayService) applyOpenAIFastPolicyToBody(ctx context.Context, account *Account, model string, body []byte) ([]byte, error) {
+	if len(body) == 0 {
+		return body, nil
+	}
+	rawTier := gjson.GetBytes(body, "service_tier").String()
+	if rawTier == "" {
+		return body, nil
+	}
+	normTier := normalizedOpenAIServiceTierValue(rawTier)
+	if normTier == "" {
+		return body, nil
+	}
+	action, errMsg := s.evaluateOpenAIFastPolicy(ctx, account, model, normTier)
+	switch action {
+	case BetaPolicyActionBlock:
+		msg := errMsg
+		if msg == "" {
+			msg = fmt.Sprintf("openai service_tier=%s is not allowed for model %s", normTier, model)
+		}
+		return body, &OpenAIFastBlockedError{Message: msg}
+	case BetaPolicyActionFilter:
+		trimmed, err := sjson.DeleteBytes(body, "service_tier")
+		if err != nil {
+			return body, fmt.Errorf("strip service_tier from body: %w", err)
+		}
+		return trimmed, nil
+	default:
+		// pass：把别名（如 "fast"）写回为规范值（"priority"）。
+		if normTier == rawTier {
+			return body, nil
+		}
+		updated, err := sjson.SetBytes(body, "service_tier", normTier)
+		if err != nil {
+			return body, fmt.Errorf("normalize service_tier on pass: %w", err)
+		}
+		return updated, nil
+	}
+}
+
+// writeOpenAIFastPolicyBlockedResponse writes a 403 JSON response for a
+// request blocked by the OpenAI fast policy.
+func writeOpenAIFastPolicyBlockedResponse(c *gin.Context, err *OpenAIFastBlockedError) {
+	if c == nil || err == nil {
+		return
+	}
+	c.JSON(http.StatusForbidden, gin.H{
+		"error": gin.H{
+			"type":    "permission_error",
+			"message": err.Message,
+		},
+	})
+}
+
+// applyOpenAIFastPolicyToWSResponseCreate evaluates the OpenAI fast policy
+// against a single client→upstream WebSocket frame whose top-level
+// "type"=="response.create". It mirrors the HTTP-side
+// applyOpenAIFastPolicyToBody contract but operates on a Realtime/Responses
+// WS payload:
+//
+//   - pass: returns frame unchanged (newBytes == frame, blocked == nil)
+//   - filter: returns a copy with top-level service_tier removed
+//   - block: returns (frame, *OpenAIFastBlockedError)
+//
+// Only frames whose "type" field strictly equals "response.create" are
+// inspected/mutated. Any other frame type — including the empty string —
+// passes through untouched. The OpenAI Realtime client-event spec requires
+// "type" to be set, so an empty type is treated as a malformed frame we do
+// not police; the upstream is the source of truth for rejecting it.
+//
+// service_tier lives at the top level of response.create — same as the
+// Responses HTTP body shape (see openai_gateway_chat_completions.go:304 +
+// extractOpenAIServiceTierFromBody at line 5593, and the test fixture at
+// openai_ws_forwarder_ingress_session_test.go:402). We therefore only need
+// to inspect / strip the top-level field; there is no nested form in the
+// schema today.
+//
+// The caller is responsible for choosing the upstream model passed in —
+// this helper does not re-derive it.
+func (s *OpenAIGatewayService) applyOpenAIFastPolicyToWSResponseCreate(
+	ctx context.Context,
+	account *Account,
+	model string,
+	frame []byte,
+) ([]byte, *OpenAIFastBlockedError, error) {
+	if len(frame) == 0 {
+		return frame, nil, nil
+	}
+	if !gjson.ValidBytes(frame) {
+		return frame, nil, nil
+	}
+	frameType := strings.TrimSpace(gjson.GetBytes(frame, "type").String())
+	// Strict match: only response.create is policy-checked. Empty / other
+	// types pass through untouched so we never accidentally strip fields
+	// from response.cancel, conversation.item.create, or any future
+	// client-event the spec adds. The Realtime spec requires "type" on
+	// every client event, so an empty type is malformed input — let the
+	// upstream reject it rather than guessing at our layer.
+	if frameType != "response.create" {
+		return frame, nil, nil
+	}
+	rawTier := gjson.GetBytes(frame, "service_tier").String()
+	if rawTier == "" {
+		return frame, nil, nil
+	}
+	normTier := normalizedOpenAIServiceTierValue(rawTier)
+	if normTier == "" {
+		return frame, nil, nil
+	}
+	action, errMsg := s.evaluateOpenAIFastPolicy(ctx, account, model, normTier)
+	switch action {
+	case BetaPolicyActionBlock:
+		msg := errMsg
+		if msg == "" {
+			msg = fmt.Sprintf("openai service_tier=%s is not allowed for model %s", normTier, model)
+		}
+		return frame, &OpenAIFastBlockedError{Message: msg}, nil
+	case BetaPolicyActionFilter:
+		trimmed, err := sjson.DeleteBytes(frame, "service_tier")
+		if err != nil {
+			return frame, nil, fmt.Errorf("strip service_tier from ws frame: %w", err)
+		}
+		return trimmed, nil, nil
+	default:
+		return frame, nil, nil
+	}
+}
+
+// newOpenAIFastPolicyWSEventID returns a Realtime-style event_id for a
+// server-emitted error event. Matches the loose "evt_<rand>" convention used
+// by upstream Realtime servers; the exact value is not load-bearing and is
+// only required for client-side log correlation. We reuse the existing
+// google/uuid dependency rather than pulling a new one.
+func newOpenAIFastPolicyWSEventID() string {
+	id, err := uuid.NewRandom()
+	if err != nil {
+		// Extremely unlikely; fall back to a fixed prefix so the field is
+		// still non-empty and the schema stays self-consistent.
+		return "evt_openai_fast_policy"
+	}
+	// Strip dashes so it visually matches "evt_<hex>" rather than UUID v4
+	// canonical form, mirroring what real Realtime traces look like.
+	return "evt_" + strings.ReplaceAll(id.String(), "-", "")
+}
+
+// buildOpenAIFastPolicyBlockedWSEvent renders an OpenAI Realtime/Responses
+// style "error" event payload for a request blocked by the OpenAI fast
+// policy. The shape mirrors Realtime error events as observed in upstream
+// traces and per the spec's server "error" event:
+//
+//	{
+//	  "event_id": "evt_<random>",
+//	  "type": "error",
+//	  "error": {
+//	    "type": "invalid_request_error",
+//	    "code": "policy_violation",
+//	    "message": "..."
+//	  }
+//	}
+//
+// event_id lets clients correlate the rejection in their logs; "code" gives
+// programmatic clients a stable identifier (HTTP-side equivalent is the
+// 403 permission_error JSON body).
+func buildOpenAIFastPolicyBlockedWSEvent(err *OpenAIFastBlockedError) []byte {
+	if err == nil {
+		return nil
+	}
+	eventID := newOpenAIFastPolicyWSEventID()
+	payload, mErr := json.Marshal(map[string]any{
+		"event_id": eventID,
+		"type":     "error",
+		"error": map[string]any{
+			"type":    "invalid_request_error",
+			"code":    "policy_violation",
+			"message": err.Message,
+		},
+	})
+	if mErr != nil {
+		// Fallback to a minimal hand-rolled payload; Marshal of the literal
+		// shape above should never fail in practice.
+		return []byte(`{"event_id":"` + eventID + `","type":"error","error":{"type":"invalid_request_error","code":"policy_violation","message":"openai fast policy blocked this request"}}`)
+	}
+	return payload
+}
+
 func sanitizeEmptyBase64InputImagesInOpenAIBody(body []byte) ([]byte, bool, error) {
 	if len(body) == 0 || !bytes.Contains(body, []byte(`"image_url"`)) || !bytes.Contains(body, []byte(`base64,`)) {
 		return body, false, nil
diff --git a/backend/internal/service/openai_ws_forwarder.go b/backend/internal/service/openai_ws_forwarder.go
index 8c0222e2..dedbce1e 100644
--- a/backend/internal/service/openai_ws_forwarder.go
+++ b/backend/internal/service/openai_ws_forwarder.go
@@ -2366,6 +2366,15 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
 		return errors.New("token is empty")
 	}
 
+	// 预取一次 OpenAI Fast Policy settings，绑定到 ctx，让该 WS session
+	// 内所有帧的 evaluateOpenAIFastPolicy 调用复用同一份快照，避免每帧
+	// 进入 DB / settingRepo。Trade-off 见 withOpenAIFastPolicyContext 注释。
+	if s.settingService != nil {
+		if settings, err := s.settingService.GetOpenAIFastPolicySettings(ctx); err == nil && settings != nil {
+			ctx = withOpenAIFastPolicyContext(ctx, settings)
+		}
+	}
+
 	wsDecision := s.getOpenAIWSProtocolResolver().Resolve(account)
 	modeRouterV2Enabled := s != nil && s.cfg != nil && s.cfg.Gateway.OpenAIWS.ModeRouterV2Enabled
 	ingressMode := OpenAIWSIngressModeCtxPool
@@ -2524,6 +2533,44 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
 			normalized = next
 		}
 
+		// Apply OpenAI Fast Policy on the response.create frame using the same
+		// evaluator/normalize/scope rules as the HTTP entrypoints. This is the
+		// single integration point for all WS ingress turns (first + follow-up
+		// frames flow through here).
+		//
+		// Model fallback: parseClientPayload above rejects any frame whose
+		// "model" field is missing (line ~2493-2500), so by the time we
+		// reach this point upstreamModel is always derived from a non-empty
+		// per-frame model. The capturedSessionModel fallback used in the
+		// passthrough adapter is therefore not needed in this path.
+		policyApplied, blocked, policyErr := s.applyOpenAIFastPolicyToWSResponseCreate(ctx, account, upstreamModel, normalized)
+		if policyErr != nil {
+			return openAIWSClientPayload{}, NewOpenAIWSClientCloseError(coderws.StatusPolicyViolation, "invalid websocket request payload", policyErr)
+		}
+		if blocked != nil {
+			// Send a Realtime-style error event to the client first, then
+			// signal the handler to close the connection with PolicyViolation.
+			// We intentionally do NOT forward this frame upstream.
+			//
+			// coder/websocket@v1.8.14 Conn.Write is synchronous and flushes
+			// the underlying bufio writer before returning (write.go:42 →
+			// 307-311), and the subsequent close handshake re-acquires the
+			// same writeFrameMu, so the error event is guaranteed to reach
+			// the kernel send buffer before any close frame is queued.
+			eventBytes := buildOpenAIFastPolicyBlockedWSEvent(blocked)
+			if eventBytes != nil {
+				writeCtx, cancel := context.WithTimeout(ctx, s.openAIWSWriteTimeout())
+				_ = clientConn.Write(writeCtx, coderws.MessageText, eventBytes)
+				cancel()
+			}
+			return openAIWSClientPayload{}, NewOpenAIWSClientCloseError(
+				coderws.StatusPolicyViolation,
+				blocked.Message,
+				blocked,
+			)
+		}
+		normalized = policyApplied
+
 		return openAIWSClientPayload{
 			payloadRaw:         normalized,
 			rawForHash:         trimmed,
diff --git a/backend/internal/service/openai_ws_protocol_forward_test.go b/backend/internal/service/openai_ws_protocol_forward_test.go
index 66e5db93..f3936de1 100644
--- a/backend/internal/service/openai_ws_protocol_forward_test.go
+++ b/backend/internal/service/openai_ws_protocol_forward_test.go
@@ -618,6 +618,7 @@ func TestNewOpenAIGatewayService_InitializesOpenAIWSResolver(t *testing.T) {
 		nil,
 		nil,
 		nil,
+		nil,
 	)
 
 	decision := svc.getOpenAIWSProtocolResolver().Resolve(nil)
diff --git a/backend/internal/service/openai_ws_v2_passthrough_adapter.go b/backend/internal/service/openai_ws_v2_passthrough_adapter.go
index cda2e351..3dbb199a 100644
--- a/backend/internal/service/openai_ws_v2_passthrough_adapter.go
+++ b/backend/internal/service/openai_ws_v2_passthrough_adapter.go
@@ -21,6 +21,109 @@ type openAIWSClientFrameConn struct {
 	conn *coderws.Conn
 }
 
+// openAIWSPolicyEnforcingFrameConn wraps a client-side FrameConn and runs
+// every client→upstream frame through the OpenAI Fast Policy. It is the
+// passthrough-relay equivalent of the parseClientPayload integration in the
+// ingress session path. filter returns:
+//   - newPayload, nil, nil: forward the (possibly mutated) payload
+//   - _, *OpenAIFastBlockedError, nil: block — the wrapper sends an error
+//     event via onBlock and surfaces a transport-level error so the relay
+//     stops reading from the client.
+//   - _, _, err: a transport error other than block.
+type openAIWSPolicyEnforcingFrameConn struct {
+	inner   openaiwsv2.FrameConn
+	filter  func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error)
+	onBlock func(blocked *OpenAIFastBlockedError)
+}
+
+var _ openaiwsv2.FrameConn = (*openAIWSPolicyEnforcingFrameConn)(nil)
+
+func (c *openAIWSPolicyEnforcingFrameConn) ReadFrame(ctx context.Context) (coderws.MessageType, []byte, error) {
+	if c == nil || c.inner == nil {
+		return coderws.MessageText, nil, errOpenAIWSConnClosed
+	}
+	msgType, payload, err := c.inner.ReadFrame(ctx)
+	if err != nil {
+		return msgType, payload, err
+	}
+	if c.filter == nil {
+		return msgType, payload, nil
+	}
+	updated, blocked, filterErr := c.filter(msgType, payload)
+	if filterErr != nil {
+		return msgType, payload, filterErr
+	}
+	if blocked != nil {
+		if c.onBlock != nil {
+			c.onBlock(blocked)
+		}
+		return msgType, nil, NewOpenAIWSClientCloseError(coderws.StatusPolicyViolation, blocked.Message, blocked)
+	}
+	return msgType, updated, nil
+}
+
+func (c *openAIWSPolicyEnforcingFrameConn) WriteFrame(ctx context.Context, msgType coderws.MessageType, payload []byte) error {
+	if c == nil || c.inner == nil {
+		return errOpenAIWSConnClosed
+	}
+	return c.inner.WriteFrame(ctx, msgType, payload)
+}
+
+func (c *openAIWSPolicyEnforcingFrameConn) Close() error {
+	if c == nil || c.inner == nil {
+		return nil
+	}
+	return c.inner.Close()
+}
+
+// openAIWSPassthroughPolicyModelForFrame returns the upstream-perspective
+// model name that should be passed to evaluateOpenAIFastPolicy for a single
+// passthrough WS frame. Mirrors the HTTP-side normalization
+// (account.GetMappedModel + normalizeOpenAIModelForUpstream) so the WS path
+// matches model whitelists identically.
+func openAIWSPassthroughPolicyModelForFrame(account *Account, payload []byte) string {
+	if account == nil || len(payload) == 0 {
+		return ""
+	}
+	original := strings.TrimSpace(gjson.GetBytes(payload, "model").String())
+	if original == "" {
+		return ""
+	}
+	return normalizeOpenAIModelForUpstream(account, account.GetMappedModel(original))
+}
+
+// openAIWSPassthroughPolicyModelFromSessionFrame returns the upstream model
+// derived from a session.update frame's session.model field. Returns "" when
+// the frame is not a session.update event or carries no session.model. Used
+// by the per-frame policy filter (client→upstream direction) to keep
+// capturedSessionModel in sync with the session-level model the client may
+// rotate mid-session.
+//
+// Realtime / Responses WS lets the client change the session model after
+// the WS handshake via:
+//
+//	{"type":"session.update","session":{"model":"gpt-5.5", ...}}
+//
+// If we only capture the model from the very first frame, a client can ship
+// gpt-4o on the first response.create (whitelisted as pass), then
+// session.update to gpt-5.5, then send response.create without "model" so
+// the per-frame resolver returns "" and the stale capturedSessionModel falls
+// back to gpt-4o — defeating the gpt-5.5 fast-policy filter.
+func openAIWSPassthroughPolicyModelFromSessionFrame(account *Account, payload []byte) string {
+	if account == nil || len(payload) == 0 {
+		return ""
+	}
+	frameType := strings.TrimSpace(gjson.GetBytes(payload, "type").String())
+	if frameType != "session.update" {
+		return ""
+	}
+	original := strings.TrimSpace(gjson.GetBytes(payload, "session.model").String())
+	if original == "" {
+		return ""
+	}
+	return normalizeOpenAIModelForUpstream(account, account.GetMappedModel(original))
+}
+
 const openaiWSV2PassthroughModeFields = "ws_mode=passthrough ws_router=v2"
 
 var _ openaiwsv2.FrameConn = (*openAIWSClientFrameConn)(nil)
@@ -77,7 +180,6 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
 		return errors.New("token is empty")
 	}
 	requestModel := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "model").String())
-	requestServiceTier := extractOpenAIServiceTierFromBody(firstClientMessage)
 	requestPreviousResponseID := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "previous_response_id").String())
 	logOpenAIWSV2Passthrough(
 		"relay_start account_id=%d model=%s previous_response_id=%s first_message_type=%s first_message_bytes=%d",
@@ -88,6 +190,59 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
 		len(firstClientMessage),
 	)
 
+	// Apply OpenAI Fast Policy on the first response.create frame. Subsequent
+	// frames are filtered via a wrapping FrameConn below so every client→
+	// upstream frame goes through the same policy evaluator/normalize/scope as
+	// HTTP entrypoints.
+	//
+	// We capture the session-level model from the first frame here so the
+	// per-frame filter (below) can fall back to it when a follow-up frame
+	// omits "model" — Realtime clients are allowed to send response.create
+	// without re-stating the model, in which case the upstream uses the model
+	// negotiated at session.update time. Without this fallback, an empty
+	// model would miss the default ["gpt-5.5","gpt-5.5*"] whitelist and be
+	// silently passed through, defeating the policy on every frame after
+	// the first.
+	capturedSessionModel := openAIWSPassthroughPolicyModelForFrame(account, firstClientMessage)
+	updatedFirst, blocked, policyErr := s.applyOpenAIFastPolicyToWSResponseCreate(ctx, account, capturedSessionModel, firstClientMessage)
+	if policyErr != nil {
+		return fmt.Errorf("apply openai fast policy on first ws frame: %w", policyErr)
+	}
+	if blocked != nil {
+		// coder/websocket@v1.8.14 Conn.Write is synchronous: it acquires
+		// writeFrameMu, writes the entire frame, and Flushes the underlying
+		// bufio writer before returning (write.go:42 → write.go:307-311).
+		// The subsequent close handshake re-acquires the same writeFrameMu
+		// to send the close frame, so the error event is guaranteed to
+		// reach the kernel send buffer before any close frame is queued.
+		// No explicit flush hop is required here.
+		eventBytes := buildOpenAIFastPolicyBlockedWSEvent(blocked)
+		if eventBytes != nil {
+			writeCtx, cancelWrite := context.WithTimeout(ctx, s.openAIWSWriteTimeout())
+			_ = clientConn.Write(writeCtx, coderws.MessageText, eventBytes)
+			cancelWrite()
+		}
+		return NewOpenAIWSClientCloseError(coderws.StatusPolicyViolation, blocked.Message, blocked)
+	}
+	firstClientMessage = updatedFirst
+
+	// 在 policy filter 之后再提取 service_tier 用于 billing 上报：filter
+	// 命中时 service_tier 已经从 firstClientMessage 中删除，billing 应当
+	// 反映上游实际处理的 tier（nil = default），而不是用户最初请求的
+	// "priority"。HTTP 入口（line ~2728 extractOpenAIServiceTier(reqBody)）
+	// 与 WS ingress（openai_ws_forwarder.go:2991 取自 payload）的语义一致。
+	//
+	// 多轮 passthrough：OpenAI Realtime / Responses WS 协议允许客户端在
+	// 同一连接的不同 response.create 帧上发送不同 service_tier（参考
+	// codex-rs/core/src/client.rs build_responses_request 每次重新填值）。
+	// 因此使用 atomic.Pointer[string] 在 filter（runClientToUpstream
+	// goroutine）和 OnTurnComplete / final result（runUpstreamToClient
+	// goroutine）之间同步当前 turn 的 service_tier。
+	// extractOpenAIServiceTierFromBody 返回 *string，本身是指针类型，
+	// 可直接 Store/Load 而无需额外封装。
+	var requestServiceTierPtr atomic.Pointer[string]
+	requestServiceTierPtr.Store(extractOpenAIServiceTierFromBody(firstClientMessage))
+
 	wsURL, err := s.buildOpenAIResponsesWSURL(account)
 	if err != nil {
 		return fmt.Errorf("build ws url: %w", err)
@@ -152,9 +307,72 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
 	}
 
 	completedTurns := atomic.Int32{}
+	policyClientConn := &openAIWSPolicyEnforcingFrameConn{
+		inner: &openAIWSClientFrameConn{conn: clientConn},
+		// 注意线程安全：filter 仅在 runClientToUpstream 这一条
+		// goroutine 中被调用（passthrough_relay.go: ReadFrame loop），
+		// capturedSessionModel 的读写都发生在该 goroutine 内，因此无需
+		// 加锁/原子化。
+		filter: func(msgType coderws.MessageType, payload []byte) ([]byte, *OpenAIFastBlockedError, error) {
+			if msgType != coderws.MessageText {
+				return payload, nil, nil
+			}
+			// 在评估策略前先刷新 capturedSessionModel：客户端可能通过
+			// session.update 修改 session-level model（Realtime /
+			// Responses WS 协议允许），如果不刷新就会出现
+			// "首帧 model=gpt-4o（pass）→ session.update 改成 gpt-5.5
+			// → 不带 model 的 response.create fallback 到 gpt-4o" 的
+			// 绕过路径。这里只看 session.update 事件中的 session.model
+			// 字段，response.create 自己的 model 仍然由其本帧字段决定。
+			if updated := openAIWSPassthroughPolicyModelFromSessionFrame(account, payload); updated != "" {
+				capturedSessionModel = updated
+			}
+			// Per-frame model first; if the client omits "model" on a
+			// follow-up frame (legal in Realtime), fall back to the
+			// session-level model captured from the first frame so the
+			// model whitelist still resolves. An empty model would miss
+			// any whitelist and silently fall back to pass.
+			model := openAIWSPassthroughPolicyModelForFrame(account, payload)
+			if model == "" {
+				model = capturedSessionModel
+			}
+			out, blocked, policyErr := s.applyOpenAIFastPolicyToWSResponseCreate(ctx, account, model, payload)
+			// 多轮 passthrough billing：仅在成功（non-block / non-err）
+			// 的 response.create 帧上更新 requestServiceTierPtr，使用
+			// filter 处理后的 payload，与首帧 policy-after-extract 语义
+			// 保持一致（参见上方 extractOpenAIServiceTierFromBody 注释）。
+			//   - 非 response.create 帧（response.cancel /
+			//     conversation.item.create / session.update 等）不携带
+			//     per-response service_tier，不应覆盖前一轮值。
+			//   - blocked != nil：该帧不会发送上游，billing tier 应保持
+			//     上一轮值。
+			//   - policyErr != nil：异常路径，保持上一轮值。
+			//   - 不带 service_tier 的 response.create 会让
+			//     extractOpenAIServiceTierFromBody 返回 nil；这里有意
+			//     覆盖（Store(nil)），因为 OpenAI 上游对该帧实际不传
+			//     service_tier 时按 default 处理，billing 应如实反映。
+			if policyErr == nil && blocked == nil &&
+				strings.TrimSpace(gjson.GetBytes(payload, "type").String()) == "response.create" {
+				requestServiceTierPtr.Store(extractOpenAIServiceTierFromBody(out))
+			}
+			return out, blocked, policyErr
+		},
+		onBlock: func(blocked *OpenAIFastBlockedError) {
+			// See note above on Conn.Write being synchronous w.r.t. flush;
+			// no explicit flush is required to ensure the error event lands
+			// before the close frame.
+			eventBytes := buildOpenAIFastPolicyBlockedWSEvent(blocked)
+			if eventBytes == nil {
+				return
+			}
+			writeCtx, cancel := context.WithTimeout(ctx, s.openAIWSWriteTimeout())
+			_ = clientConn.Write(writeCtx, coderws.MessageText, eventBytes)
+			cancel()
+		},
+	}
 	relayResult, relayExit := openaiwsv2.RunEntry(openaiwsv2.EntryInput{
 		Ctx:                ctx,
-		ClientConn:         &openAIWSClientFrameConn{conn: clientConn},
+		ClientConn:         policyClientConn,
 		UpstreamConn:       upstreamFrameConn,
 		FirstClientMessage: firstClientMessage,
 		Options: openaiwsv2.RelayOptions{
@@ -179,7 +397,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
 						CacheReadInputTokens:     turn.Usage.CacheReadInputTokens,
 					},
 					Model:           turn.RequestModel,
-					ServiceTier:     requestServiceTier,
+					ServiceTier:     requestServiceTierPtr.Load(),
 					Stream:          true,
 					OpenAIWSMode:    true,
 					ResponseHeaders: cloneHeader(handshakeHeaders),
@@ -227,7 +445,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
 			CacheReadInputTokens:     relayResult.Usage.CacheReadInputTokens,
 		},
 		Model:           relayResult.RequestModel,
-		ServiceTier:     requestServiceTier,
+		ServiceTier:     requestServiceTierPtr.Load(),
 		Stream:          true,
 		OpenAIWSMode:    true,
 		ResponseHeaders: cloneHeader(handshakeHeaders),
diff --git a/backend/internal/service/setting_service.go b/backend/internal/service/setting_service.go
index 33316031..966b4b84 100644
--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -3259,6 +3259,84 @@ func (s *SettingService) SetBetaPolicySettings(ctx context.Context, settings *Be
 	return s.settingRepo.Set(ctx, SettingKeyBetaPolicySettings, string(data))
 }
 
+// GetOpenAIFastPolicySettings 获取 OpenAI fast 策略配置
+func (s *SettingService) GetOpenAIFastPolicySettings(ctx context.Context) (*OpenAIFastPolicySettings, error) {
+	value, err := s.settingRepo.GetValue(ctx, SettingKeyOpenAIFastPolicySettings)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			return DefaultOpenAIFastPolicySettings(), nil
+		}
+		return nil, fmt.Errorf("get openai fast policy settings: %w", err)
+	}
+	if value == "" {
+		return DefaultOpenAIFastPolicySettings(), nil
+	}
+
+	var settings OpenAIFastPolicySettings
+	if err := json.Unmarshal([]byte(value), &settings); err != nil {
+		// JSON 损坏时静默 fallback 到默认配置会让策略意外失效（管理员配
+		// 置的 block/filter 规则被忽略）。记录 Warn 让运维能在出现异常
+		// 行为时定位到 settings 表里的脏数据。
+		slog.Warn("failed to unmarshal openai fast policy settings, falling back to defaults",
+			"error", err,
+			"key", SettingKeyOpenAIFastPolicySettings)
+		return DefaultOpenAIFastPolicySettings(), nil
+	}
+
+	return &settings, nil
+}
+
+// SetOpenAIFastPolicySettings 设置 OpenAI fast 策略配置
+func (s *SettingService) SetOpenAIFastPolicySettings(ctx context.Context, settings *OpenAIFastPolicySettings) error {
+	if settings == nil {
+		return fmt.Errorf("settings cannot be nil")
+	}
+
+	validActions := map[string]bool{
+		BetaPolicyActionPass: true, BetaPolicyActionFilter: true, BetaPolicyActionBlock: true,
+	}
+	validScopes := map[string]bool{
+		BetaPolicyScopeAll: true, BetaPolicyScopeOAuth: true, BetaPolicyScopeAPIKey: true, BetaPolicyScopeBedrock: true,
+	}
+	validTiers := map[string]bool{
+		OpenAIFastTierAny: true, OpenAIFastTierPriority: true, OpenAIFastTierFlex: true,
+	}
+
+	for i, rule := range settings.Rules {
+		tier := strings.ToLower(strings.TrimSpace(rule.ServiceTier))
+		if tier == "" {
+			tier = OpenAIFastTierAny
+		}
+		if !validTiers[tier] {
+			return fmt.Errorf("rule[%d]: invalid service_tier %q", i, rule.ServiceTier)
+		}
+		settings.Rules[i].ServiceTier = tier
+		if !validActions[rule.Action] {
+			return fmt.Errorf("rule[%d]: invalid action %q", i, rule.Action)
+		}
+		if !validScopes[rule.Scope] {
+			return fmt.Errorf("rule[%d]: invalid scope %q", i, rule.Scope)
+		}
+		for j, pattern := range rule.ModelWhitelist {
+			trimmed := strings.TrimSpace(pattern)
+			if trimmed == "" {
+				return fmt.Errorf("rule[%d]: model_whitelist[%d] cannot be empty", i, j)
+			}
+			settings.Rules[i].ModelWhitelist[j] = trimmed
+		}
+		if rule.FallbackAction != "" && !validActions[rule.FallbackAction] {
+			return fmt.Errorf("rule[%d]: invalid fallback_action %q", i, rule.FallbackAction)
+		}
+	}
+
+	data, err := json.Marshal(settings)
+	if err != nil {
+		return fmt.Errorf("marshal openai fast policy settings: %w", err)
+	}
+
+	return s.settingRepo.Set(ctx, SettingKeyOpenAIFastPolicySettings, string(data))
+}
+
 // SetStreamTimeoutSettings 设置流超时处理配置
 func (s *SettingService) SetStreamTimeoutSettings(ctx context.Context, settings *StreamTimeoutSettings) error {
 	if settings == nil {
diff --git a/backend/internal/service/settings_view.go b/backend/internal/service/settings_view.go
index 5ec7d313..c0962ff0 100644
--- a/backend/internal/service/settings_view.go
+++ b/backend/internal/service/settings_view.go
@@ -405,3 +405,57 @@ func DefaultBetaPolicySettings() *BetaPolicySettings {
 		},
 	}
 }
+
+// OpenAI Fast Policy 策略常量
+// OpenAI 的 "fast 模式" 通过请求体中的 service_tier 字段识别：
+//   - "priority"（客户端可传 "fast"，归一化为 "priority"）：fast 模式
+//   - "flex"：低优先级模式
+//   - 省略：normal 默认
+//
+// 本策略复用 BetaPolicyAction*/BetaPolicyScope* 常量语义，只是匹配键从
+// anthropic-beta header 换成 body 的 service_tier 字段。
+const (
+	OpenAIFastTierAny      = "all"      // 匹配任意已识别的 service_tier
+	OpenAIFastTierPriority = "priority" // 仅匹配 fast（priority）
+	OpenAIFastTierFlex     = "flex"     // 仅匹配 flex
+)
+
+// OpenAIFastPolicyRule 单条 OpenAI fast/flex 策略规则
+type OpenAIFastPolicyRule struct {
+	ServiceTier          string   `json:"service_tier"`                     // "priority" | "flex" | "auto" | "default" | "scale" | "all"
+	Action               string   `json:"action"`                           // "pass" | "filter" | "block"
+	Scope                string   `json:"scope"`                            // "all" | "oauth" | "apikey" | "bedrock"
+	ErrorMessage         string   `json:"error_message,omitempty"`          // 自定义错误消息 (action=block 时生效)
+	ModelWhitelist       []string `json:"model_whitelist,omitempty"`        // 模型匹配模式列表（为空=对所有模型生效）
+	FallbackAction       string   `json:"fallback_action,omitempty"`        // 未匹配白名单的模型的处理方式
+	FallbackErrorMessage string   `json:"fallback_error_message,omitempty"` // 未匹配白名单时的自定义错误消息 (fallback_action=block 时生效)
+}
+
+// OpenAIFastPolicySettings OpenAI fast 策略配置
+type OpenAIFastPolicySettings struct {
+	Rules []OpenAIFastPolicyRule `json:"rules"`
+}
+
+// DefaultOpenAIFastPolicySettings 返回默认的 OpenAI fast 策略配置。
+// 默认对所有模型的 priority（fast）请求执行 filter，即剔除 service_tier 字段，
+// 让上游按 normal 优先级处理。
+//
+// 为什么 ModelWhitelist 为空（=对所有模型生效）：
+// codex 客户端的 service_tier=fast 是用户级开关，与 model 字段正交。即使
+// 用户使用 gpt-4 + fast，priority 配额仍会被消耗。如果默认规则只锁
+// gpt-5.5*，"用 gpt-4 + fast 透传 priority 上游" 这条路径就会绕过策略。
+// 与 codex 真实语义对齐，默认对所有模型生效；管理员若需要只针对特定
+// 模型，可在 admin UI 中显式配置 model_whitelist。
+func DefaultOpenAIFastPolicySettings() *OpenAIFastPolicySettings {
+	return &OpenAIFastPolicySettings{
+		Rules: []OpenAIFastPolicyRule{
+			{
+				ServiceTier:    OpenAIFastTierPriority,
+				Action:         BetaPolicyActionFilter,
+				Scope:          BetaPolicyScopeAll,
+				ModelWhitelist: []string{},
+				FallbackAction: BetaPolicyActionPass,
+			},
+		},
+	}
+}
diff --git a/frontend/src/api/admin/settings.ts b/frontend/src/api/admin/settings.ts
index defbab43..e8ab6af5 100644
--- a/frontend/src/api/admin/settings.ts
+++ b/frontend/src/api/admin/settings.ts
@@ -484,6 +484,9 @@ export interface SystemSettings {
 
   // Affiliate (邀请返利) feature switch
   affiliate_enabled: boolean;
+
+  // OpenAI fast/flex policy
+  openai_fast_policy_settings?: OpenAIFastPolicySettings;
 }
 
 export interface UpdateSettingsRequest {
@@ -648,6 +651,9 @@ export interface UpdateSettingsRequest {
 
   // Affiliate (邀请返利) feature switch
   affiliate_enabled?: boolean;
+
+  // OpenAI fast/flex policy
+  openai_fast_policy_settings?: OpenAIFastPolicySettings;
 }
 
 /**
@@ -875,6 +881,29 @@ export async function updateRectifierSettings(
   return data;
 }
 
+// ==================== OpenAI Fast Policy Settings ====================
+
+/**
+ * OpenAI fast/flex policy rule interface.
+ * Matches backend dto.OpenAIFastPolicyRule.
+ */
+export interface OpenAIFastPolicyRule {
+  service_tier: "all" | "priority" | "flex";
+  action: "pass" | "filter" | "block";
+  scope: "all" | "oauth" | "apikey" | "bedrock";
+  error_message?: string;
+  model_whitelist?: string[];
+  fallback_action?: "pass" | "filter" | "block";
+  fallback_error_message?: string;
+}
+
+/**
+ * OpenAI fast/flex policy settings interface.
+ */
+export interface OpenAIFastPolicySettings {
+  rules: OpenAIFastPolicyRule[];
+}
+
 // ==================== Beta Policy Settings ====================
 
 /**
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index 6f445986..c66ca55b 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -5535,6 +5535,38 @@ export default {
         presetOpusOnlyDesc: 'Pass for Opus, filter others',
         commonPatterns: 'Common patterns'
       },
+      openaiFastPolicy: {
+        title: 'OpenAI Fast/Flex Policy',
+        description: 'Intercept, filter, or pass OpenAI fast(priority) / flex requests based on the request body service_tier field. Applies to the OpenAI gateway only.',
+        empty: 'No rules configured. Click the button below to add one.',
+        ruleHeader: 'Rule #{index}',
+        removeRule: 'Remove rule',
+        addRule: 'Add rule',
+        saveHint: 'Saved together with system settings (click the global Save button at the bottom of the page).',
+        serviceTier: 'service_tier match',
+        tierAll: 'All tiers',
+        tierPriority: 'priority (fast)',
+        tierFlex: 'flex',
+        action: 'Action',
+        actionPass: 'Pass (keep service_tier)',
+        actionFilter: 'Filter (remove service_tier)',
+        actionBlock: 'Block (reject request)',
+        scope: 'Scope',
+        scopeAll: 'All accounts',
+        scopeOAuth: 'OAuth only',
+        scopeAPIKey: 'API Key only',
+        scopeBedrock: 'Bedrock only',
+        errorMessage: 'Error message',
+        errorMessagePlaceholder: 'Custom error message when blocked',
+        errorMessageHint: 'Leave empty for the default message.',
+        modelWhitelist: 'Model whitelist',
+        modelWhitelistHint: 'Leave empty to apply to all models. Supports exact match and wildcard prefix (e.g., gpt-5.5*).',
+        modelPatternPlaceholder: 'e.g., gpt-5.5 or gpt-5.5*',
+        addModelPattern: 'Add model pattern',
+        fallbackAction: 'Fallback action',
+        fallbackActionHint: 'Action for models not matching the whitelist.',
+        fallbackErrorMessagePlaceholder: 'Custom error message when non-whitelisted models are blocked'
+      },
       wechatConnect: {
         title: 'WeChat Connect',
         description: 'Third-party login configuration for WeChat Open Platform or Official Account / Mini Program.',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index e399530b..77d1c93c 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -5695,6 +5695,38 @@ export default {
         presetOpusOnlyDesc: 'Opus 透传，其他模型过滤',
         commonPatterns: '常用模式'
       },
+      openaiFastPolicy: {
+        title: 'OpenAI Fast/Flex 策略',
+        description: '基于请求体 service_tier 字段拦截/过滤/透传 OpenAI fast(priority) 与 flex 请求；仅作用于 OpenAI 网关。',
+        empty: '尚未配置任何规则。点击下方按钮新增。',
+        ruleHeader: '规则 #{index}',
+        removeRule: '删除规则',
+        addRule: '新增规则',
+        saveHint: '保存时随系统设置一起提交（点击页面底部「保存」按钮）。',
+        serviceTier: 'service_tier 匹配',
+        tierAll: '全部 tier',
+        tierPriority: 'priority（fast）',
+        tierFlex: 'flex',
+        action: '处理方式',
+        actionPass: '透传（保留 service_tier）',
+        actionFilter: '过滤（移除 service_tier）',
+        actionBlock: '拦截（拒绝请求）',
+        scope: '生效范围',
+        scopeAll: '全部账号',
+        scopeOAuth: '仅 OAuth 账号',
+        scopeAPIKey: '仅 API Key 账号',
+        scopeBedrock: '仅 Bedrock 账号',
+        errorMessage: '错误消息',
+        errorMessagePlaceholder: '拦截时返回的自定义错误消息',
+        errorMessageHint: '留空则使用默认错误消息。',
+        modelWhitelist: '模型白名单',
+        modelWhitelistHint: '留空表示对所有模型生效；支持精确匹配与通配符（如 gpt-5.5*）。',
+        modelPatternPlaceholder: '例如: gpt-5.5 或 gpt-5.5*',
+        addModelPattern: '添加模型规则',
+        fallbackAction: '未匹配模型处理方式',
+        fallbackActionHint: '当请求模型不在白名单中时的处理方式。',
+        fallbackErrorMessagePlaceholder: '未匹配模型被拦截时返回的自定义错误消息'
+      },
       wechatConnect: {
         title: '微信登录',
         description: '用于微信开放平台或公众号/小程序的第三方登录配置。',
diff --git a/frontend/src/views/admin/SettingsView.vue b/frontend/src/views/admin/SettingsView.vue
index 90d10b9a..ad0587b8 100644
--- a/frontend/src/views/admin/SettingsView.vue
+++ b/frontend/src/views/admin/SettingsView.vue
@@ -949,6 +949,285 @@
               </template>
             </div>
           </div>
+          <!-- OpenAI Fast/Flex Policy Settings -->
+          <div class="card">
+            <div
+              class="border-b border-gray-100 px-6 py-4 dark:border-dark-700"
+            >
+              <h2 class="text-lg font-semibold text-gray-900 dark:text-white">
+                {{ t("admin.settings.openaiFastPolicy.title") }}
+              </h2>
+              <p class="mt-1 text-sm text-gray-500 dark:text-gray-400">
+                {{ t("admin.settings.openaiFastPolicy.description") }}
+              </p>
+            </div>
+            <div class="space-y-5 p-6">
+              <!-- Empty state -->
+              <div
+                v-if="openaiFastPolicyForm.rules.length === 0"
+                class="rounded-lg border border-dashed border-gray-200 p-6 text-center text-sm text-gray-500 dark:border-dark-600 dark:text-gray-400"
+              >
+                {{ t("admin.settings.openaiFastPolicy.empty") }}
+              </div>
+
+              <!-- Rule Cards -->
+              <div
+                v-for="(rule, ruleIndex) in openaiFastPolicyForm.rules"
+                :key="ruleIndex"
+                class="rounded-lg border border-gray-200 p-4 dark:border-dark-600"
+              >
+                <div class="mb-3 flex items-center justify-between">
+                  <span
+                    class="text-sm font-medium text-gray-900 dark:text-white"
+                  >
+                    {{
+                      t("admin.settings.openaiFastPolicy.ruleHeader", {
+                        index: ruleIndex + 1,
+                      })
+                    }}
+                  </span>
+                  <button
+                    type="button"
+                    @click="removeOpenAIFastPolicyRule(ruleIndex)"
+                    class="rounded p-1 text-red-400 transition-colors hover:bg-red-50 hover:text-red-600 dark:hover:bg-red-900/20"
+                    :title="t('admin.settings.openaiFastPolicy.removeRule')"
+                  >
+                    <svg
+                      class="h-4 w-4"
+                      fill="none"
+                      viewBox="0 0 24 24"
+                      stroke="currentColor"
+                      stroke-width="2"
+                    >
+                      <path
+                        stroke-linecap="round"
+                        stroke-linejoin="round"
+                        d="M6 18L18 6M6 6l12 12"
+                      />
+                    </svg>
+                  </button>
+                </div>
+
+                <div class="grid grid-cols-1 gap-4 md:grid-cols-3">
+                  <!-- Service Tier -->
+                  <div>
+                    <label
+                      class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                    >
+                      {{ t("admin.settings.openaiFastPolicy.serviceTier") }}
+                    </label>
+                    <Select
+                      :modelValue="rule.service_tier"
+                      @update:modelValue="
+                        rule.service_tier = $event as
+                          | 'all'
+                          | 'priority'
+                          | 'flex'
+                      "
+                      :options="openaiFastPolicyTierOptions"
+                    />
+                  </div>
+
+                  <!-- Action -->
+                  <div>
+                    <label
+                      class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                    >
+                      {{ t("admin.settings.openaiFastPolicy.action") }}
+                    </label>
+                    <Select
+                      :modelValue="rule.action"
+                      @update:modelValue="
+                        rule.action = $event as 'pass' | 'filter' | 'block'
+                      "
+                      :options="openaiFastPolicyActionOptions"
+                    />
+                  </div>
+
+                  <!-- Scope -->
+                  <div>
+                    <label
+                      class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                    >
+                      {{ t("admin.settings.openaiFastPolicy.scope") }}
+                    </label>
+                    <Select
+                      :modelValue="rule.scope"
+                      @update:modelValue="
+                        rule.scope = $event as
+                          | 'all'
+                          | 'oauth'
+                          | 'apikey'
+                          | 'bedrock'
+                      "
+                      :options="openaiFastPolicyScopeOptions"
+                    />
+                  </div>
+                </div>
+
+                <!-- Error Message (only when action=block) -->
+                <div v-if="rule.action === 'block'" class="mt-3">
+                  <label
+                    class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                  >
+                    {{ t("admin.settings.openaiFastPolicy.errorMessage") }}
+                  </label>
+                  <input
+                    v-model="rule.error_message"
+                    type="text"
+                    class="input"
+                    :placeholder="
+                      t(
+                        'admin.settings.openaiFastPolicy.errorMessagePlaceholder',
+                      )
+                    "
+                  />
+                  <p class="mt-1 text-xs text-gray-400 dark:text-gray-500">
+                    {{ t("admin.settings.openaiFastPolicy.errorMessageHint") }}
+                  </p>
+                </div>
+
+                <!-- Model Whitelist -->
+                <div class="mt-3">
+                  <label
+                    class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                  >
+                    {{ t("admin.settings.openaiFastPolicy.modelWhitelist") }}
+                  </label>
+                  <p class="mb-2 text-xs text-gray-400 dark:text-gray-500">
+                    {{
+                      t("admin.settings.openaiFastPolicy.modelWhitelistHint")
+                    }}
+                  </p>
+                  <div
+                    v-for="(_, patternIdx) in rule.model_whitelist || []"
+                    :key="patternIdx"
+                    class="mb-1.5 flex items-center gap-2"
+                  >
+                    <input
+                      v-model="rule.model_whitelist![patternIdx]"
+                      type="text"
+                      class="input input-sm flex-1"
+                      :placeholder="
+                        t(
+                          'admin.settings.openaiFastPolicy.modelPatternPlaceholder',
+                        )
+                      "
+                    />
+                    <button
+                      type="button"
+                      @click="
+                        removeOpenAIFastPolicyModelPattern(rule, patternIdx)
+                      "
+                      class="shrink-0 rounded p-1 text-red-400 transition-colors hover:bg-red-50 hover:text-red-600 dark:hover:bg-red-900/20"
+                    >
+                      <svg
+                        class="h-4 w-4"
+                        fill="none"
+                        viewBox="0 0 24 24"
+                        stroke="currentColor"
+                        stroke-width="2"
+                      >
+                        <path
+                          stroke-linecap="round"
+                          stroke-linejoin="round"
+                          d="M6 18L18 6M6 6l12 12"
+                        />
+                      </svg>
+                    </button>
+                  </div>
+                  <button
+                    type="button"
+                    @click="addOpenAIFastPolicyModelPattern(rule)"
+                    class="mb-2 inline-flex items-center gap-1 text-xs text-primary-600 transition-colors hover:text-primary-700 dark:text-primary-400 dark:hover:text-primary-300"
+                  >
+                    <svg
+                      class="h-3.5 w-3.5"
+                      fill="none"
+                      viewBox="0 0 24 24"
+                      stroke="currentColor"
+                      stroke-width="2"
+                    >
+                      <path
+                        stroke-linecap="round"
+                        stroke-linejoin="round"
+                        d="M12 4v16m8-8H4"
+                      />
+                    </svg>
+                    {{ t("admin.settings.openaiFastPolicy.addModelPattern") }}
+                  </button>
+                </div>
+
+                <!-- Fallback Action (only when model_whitelist is non-empty) -->
+                <div
+                  v-if="
+                    rule.model_whitelist && rule.model_whitelist.length > 0
+                  "
+                  class="mt-3"
+                >
+                  <label
+                    class="mb-1 block text-xs font-medium text-gray-600 dark:text-gray-400"
+                  >
+                    {{ t("admin.settings.openaiFastPolicy.fallbackAction") }}
+                  </label>
+                  <Select
+                    :modelValue="rule.fallback_action || 'pass'"
+                    @update:modelValue="
+                      rule.fallback_action = $event as
+                        | 'pass'
+                        | 'filter'
+                        | 'block'
+                    "
+                    :options="openaiFastPolicyActionOptions"
+                  />
+                  <p class="mt-1 text-xs text-gray-400 dark:text-gray-500">
+                    {{
+                      t("admin.settings.openaiFastPolicy.fallbackActionHint")
+                    }}
+                  </p>
+                  <div v-if="rule.fallback_action === 'block'" class="mt-2">
+                    <input
+                      v-model="rule.fallback_error_message"
+                      type="text"
+                      class="input"
+                      :placeholder="
+                        t(
+                          'admin.settings.openaiFastPolicy.fallbackErrorMessagePlaceholder',
+                        )
+                      "
+                    />
+                  </div>
+                </div>
+              </div>
+
+              <!-- Add Rule Button -->
+              <div>
+                <button
+                  type="button"
+                  @click="addOpenAIFastPolicyRule"
+                  class="btn btn-secondary btn-sm inline-flex items-center gap-1"
+                >
+                  <svg
+                    class="h-4 w-4"
+                    fill="none"
+                    viewBox="0 0 24 24"
+                    stroke="currentColor"
+                    stroke-width="2"
+                  >
+                    <path
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                      d="M12 4v16m8-8H4"
+                    />
+                  </svg>
+                  {{ t("admin.settings.openaiFastPolicy.addRule") }}
+                </button>
+                <p class="mt-2 text-xs text-gray-400 dark:text-gray-500">
+                  {{ t("admin.settings.openaiFastPolicy.saveHint") }}
+                </p>
+              </div>
+            </div>
+          </div>
         </div>
         <!-- /Tab: Gateway -->
 
@@ -5199,6 +5478,7 @@ import type {
   SystemSettings,
   UpdateSettingsRequest,
   DefaultSubscriptionSetting,
+  OpenAIFastPolicyRule,
   WeChatConnectMode,
   WebSearchEmulationConfig,
   WebSearchProviderConfig,
@@ -5337,6 +5617,14 @@ const betaPolicyForm = reactive({
   }>,
 });
 
+// OpenAI Fast/Flex Policy 状态
+const openaiFastPolicyForm = reactive({
+  rules: [] as OpenAIFastPolicyRule[],
+});
+// 标记 openai_fast_policy_settings 是否已成功从后端加载，
+// 避免后端 GET 出错或字段缺失时，保存把默认规则覆盖成空数组。
+const openaiFastPolicyLoaded = ref(false);
+
 const tablePageSizeMin = 5;
 const tablePageSizeMax = 1000;
 const tablePageSizeDefault = 20;
@@ -6116,6 +6404,23 @@ async function loadSettings() {
     );
     form.oidc_connect_client_secret = "";
 
+    // Load OpenAI fast/flex policy rules from bulk settings.
+    // 仅当 payload 真的包含该字段时填充并标记为已加载；否则保持表单空值，
+    // 让 saveSettings 在未加载时跳过该字段，防止覆盖后端默认规则。
+    if (
+      settings.openai_fast_policy_settings &&
+      Array.isArray(settings.openai_fast_policy_settings.rules)
+    ) {
+      openaiFastPolicyForm.rules =
+        settings.openai_fast_policy_settings.rules.map((rule) => ({
+          ...rule,
+          model_whitelist: rule.model_whitelist
+            ? [...rule.model_whitelist]
+            : [],
+        }));
+      openaiFastPolicyLoaded.value = true;
+    }
+
     // Load web search emulation config separately
     await loadWebSearchConfig();
   } catch (error: unknown) {
@@ -6460,10 +6765,39 @@ async function saveSettings() {
       affiliate_enabled: form.affiliate_enabled,
     };
 
+    // 仅当 openai_fast_policy_settings 已成功从后端加载时才回写，
+    // 否则省略整个字段，让后端保留既有规则（含默认值）。
+    if (openaiFastPolicyLoaded.value) {
+      payload.openai_fast_policy_settings = {
+        rules: openaiFastPolicyForm.rules.map((rule) => {
+          const whitelist = (rule.model_whitelist || [])
+            .map((p) => p.trim())
+            .filter((p) => p !== "");
+          const hasWhitelist = whitelist.length > 0;
+          return {
+            service_tier: rule.service_tier,
+            action: rule.action,
+            scope: rule.scope,
+            error_message:
+              rule.action === "block" ? rule.error_message : undefined,
+            model_whitelist: hasWhitelist ? whitelist : undefined,
+            fallback_action: hasWhitelist
+              ? rule.fallback_action || "pass"
+              : undefined,
+            fallback_error_message:
+              hasWhitelist && rule.fallback_action === "block"
+                ? rule.fallback_error_message
+                : undefined,
+          };
+        }),
+      };
+    }
+
     appendAuthSourceDefaultsToUpdateRequest(payload, authSourceDefaults);
 
     const updated = await adminAPI.settings.updateSettings(payload);
     for (const [key, value] of Object.entries(updated)) {
+      if (key === "openai_fast_policy_settings") continue;
       if (value !== null && value !== undefined) {
         (form as Record<string, unknown>)[key] = value;
       }
@@ -6507,6 +6841,20 @@ async function saveSettings() {
       form.wechat_connect_mode,
     );
     form.oidc_connect_client_secret = "";
+    // Refresh OpenAI fast/flex policy from server response
+    if (
+      updated.openai_fast_policy_settings &&
+      Array.isArray(updated.openai_fast_policy_settings.rules)
+    ) {
+      openaiFastPolicyForm.rules =
+        updated.openai_fast_policy_settings.rules.map((rule) => ({
+          ...rule,
+          model_whitelist: rule.model_whitelist
+            ? [...rule.model_whitelist]
+            : [],
+        }));
+      openaiFastPolicyLoaded.value = true;
+    }
     // Save web search emulation config separately (errors handled internally)
     const wsOk = await saveWebSearchConfig();
     // Refresh cached settings so sidebar/header update immediately
@@ -6846,6 +7194,61 @@ async function loadBetaPolicySettings() {
   }
 }
 
+// ==================== OpenAI Fast/Flex Policy ====================
+
+const openaiFastPolicyTierOptions = computed(() => [
+  { value: "all", label: t("admin.settings.openaiFastPolicy.tierAll") },
+  {
+    value: "priority",
+    label: t("admin.settings.openaiFastPolicy.tierPriority"),
+  },
+  { value: "flex", label: t("admin.settings.openaiFastPolicy.tierFlex") },
+]);
+
+const openaiFastPolicyActionOptions = computed(() => [
+  { value: "pass", label: t("admin.settings.openaiFastPolicy.actionPass") },
+  { value: "filter", label: t("admin.settings.openaiFastPolicy.actionFilter") },
+  { value: "block", label: t("admin.settings.openaiFastPolicy.actionBlock") },
+]);
+
+const openaiFastPolicyScopeOptions = computed(() => [
+  { value: "all", label: t("admin.settings.openaiFastPolicy.scopeAll") },
+  { value: "oauth", label: t("admin.settings.openaiFastPolicy.scopeOAuth") },
+  { value: "apikey", label: t("admin.settings.openaiFastPolicy.scopeAPIKey") },
+  {
+    value: "bedrock",
+    label: t("admin.settings.openaiFastPolicy.scopeBedrock"),
+  },
+]);
+
+function addOpenAIFastPolicyRule() {
+  openaiFastPolicyForm.rules.push({
+    service_tier: "priority",
+    action: "filter",
+    scope: "all",
+    error_message: "",
+    model_whitelist: [],
+    fallback_action: "pass",
+    fallback_error_message: "",
+  });
+}
+
+function removeOpenAIFastPolicyRule(index: number) {
+  openaiFastPolicyForm.rules.splice(index, 1);
+}
+
+function addOpenAIFastPolicyModelPattern(rule: OpenAIFastPolicyRule) {
+  if (!rule.model_whitelist) rule.model_whitelist = [];
+  rule.model_whitelist.push("");
+}
+
+function removeOpenAIFastPolicyModelPattern(
+  rule: OpenAIFastPolicyRule,
+  idx: number,
+) {
+  rule.model_whitelist?.splice(idx, 1);
+}
+
 async function saveBetaPolicySettings() {
   betaPolicySaving.value = true;
   try {

From 04b2866f65f31c044a991e9f2c1b299927a2ac1b Mon Sep 17 00:00:00 2001
From: ivanvolt <ivanvolt815@gmail.com>
Date: Tue, 28 Apr 2026 16:26:09 +0800
Subject: [PATCH 26/46] fix: use Responses-compatible function tool_choice
 format

---
 .../pkg/apicompat/anthropic_responses_test.go | 37 ++++++++++-
 .../pkg/apicompat/anthropic_to_responses.go   |  6 +-
 .../chatcompletions_responses_test.go         |  2 +
 .../apicompat/chatcompletions_to_responses.go |  6 +-
 .../responses_to_anthropic_request.go         | 15 ++++-
 .../service/openai_codex_transform.go         | 62 +++++++++++++++++--
 .../service/openai_codex_transform_test.go    | 38 ++++++++++++
 7 files changed, 150 insertions(+), 16 deletions(-)

diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index facfe572..e8b25c2b 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -991,9 +991,40 @@ func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
 	var tc map[string]any
 	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
 	assert.Equal(t, "function", tc["type"])
-	fn, ok := tc["function"].(map[string]any)
-	require.True(t, ok)
-	assert.Equal(t, "get_weather", fn["name"])
+	assert.Equal(t, "get_weather", tc["name"])
+	assert.NotContains(t, tc, "function")
+}
+
+func TestResponsesToAnthropicRequest_ToolChoiceFunctionName(t *testing.T) {
+	req := &ResponsesRequest{
+		Model:      "gpt-5.2",
+		Input:      json.RawMessage(`[{"role":"user","content":"Hello"}]`),
+		ToolChoice: json.RawMessage(`{"type":"function","name":"get_weather"}`),
+	}
+
+	resp, err := ResponsesToAnthropicRequest(req)
+	require.NoError(t, err)
+
+	var tc map[string]string
+	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+	assert.Equal(t, "tool", tc["type"])
+	assert.Equal(t, "get_weather", tc["name"])
+}
+
+func TestResponsesToAnthropicRequest_ToolChoiceLegacyFunctionName(t *testing.T) {
+	req := &ResponsesRequest{
+		Model:      "gpt-5.2",
+		Input:      json.RawMessage(`[{"role":"user","content":"Hello"}]`),
+		ToolChoice: json.RawMessage(`{"type":"function","function":{"name":"get_weather"}}`),
+	}
+
+	resp, err := ResponsesToAnthropicRequest(req)
+	require.NoError(t, err)
+
+	var tc map[string]string
+	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+	assert.Equal(t, "tool", tc["type"])
+	assert.Equal(t, "get_weather", tc["name"])
 }
 
 // ---------------------------------------------------------------------------
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index 485262e8..268f9f22 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -75,7 +75,7 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
 //	{"type":"auto"}            → "auto"
 //	{"type":"any"}             → "required"
 //	{"type":"none"}            → "none"
-//	{"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
+//	{"type":"tool","name":"X"} → {"type":"function","name":"X"}
 func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
 	var tc struct {
 		Type string `json:"type"`
@@ -94,8 +94,8 @@ func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage
 		return json.Marshal("none")
 	case "tool":
 		return json.Marshal(map[string]any{
-			"type":     "function",
-			"function": map[string]string{"name": tc.Name},
+			"type": "function",
+			"name": tc.Name,
 		})
 	default:
 		// Pass through unknown types as-is
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
index c140449a..35d42999 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -281,6 +281,8 @@ func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) {
 	var tc map[string]any
 	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
 	assert.Equal(t, "function", tc["type"])
+	assert.Equal(t, "get_weather", tc["name"])
+	assert.NotContains(t, tc, "function")
 }
 
 func TestChatCompletionsToResponses_ServiceTier(t *testing.T) {
diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
index c2725406..64ef5781 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -420,7 +420,7 @@ func convertChatToolsToResponses(tools []ChatTool, functions []ChatFunction) []R
 //
 //	"auto" → "auto"
 //	"none" → "none"
-//	{"name":"X"} → {"type":"function","function":{"name":"X"}}
+//	{"name":"X"} → {"type":"function","name":"X"}
 func convertChatFunctionCallToToolChoice(raw json.RawMessage) (json.RawMessage, error) {
 	// Try string first ("auto", "none", etc.) — pass through as-is.
 	var s string
@@ -436,7 +436,7 @@ func convertChatFunctionCallToToolChoice(raw json.RawMessage) (json.RawMessage,
 		return nil, err
 	}
 	return json.Marshal(map[string]any{
-		"type":     "function",
-		"function": map[string]string{"name": obj.Name},
+		"type": "function",
+		"name": obj.Name,
 	})
 }
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
index 49426b88..8fa652f2 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
@@ -428,7 +428,8 @@ func normalizeAnthropicInputSchema(schema json.RawMessage) json.RawMessage {
 //	"auto"                                     → {"type":"auto"}
 //	"required"                                 → {"type":"any"}
 //	"none"                                     → {"type":"none"}
-//	{"type":"function","function":{"name":"X"}} → {"type":"tool","name":"X"}
+//	{"type":"function","name":"X"}                 → {"type":"tool","name":"X"}
+//	{"type":"function","function":{"name":"X"}}     → {"type":"tool","name":"X"} // legacy
 func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage, error) {
 	// Try as string first
 	var s string
@@ -448,14 +449,22 @@ func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage
 	// Try as object with type=function
 	var tc struct {
 		Type     string `json:"type"`
+		Name     string `json:"name"`
 		Function struct {
 			Name string `json:"name"`
 		} `json:"function"`
 	}
-	if err := json.Unmarshal(raw, &tc); err == nil && tc.Type == "function" && tc.Function.Name != "" {
+	if err := json.Unmarshal(raw, &tc); err == nil && tc.Type == "function" {
+		name := strings.TrimSpace(tc.Name)
+		if name == "" {
+			name = strings.TrimSpace(tc.Function.Name)
+		}
+		if name == "" {
+			return raw, nil
+		}
 		return json.Marshal(map[string]string{
 			"type": "tool",
-			"name": tc.Function.Name,
+			"name": name,
 		})
 	}
 
diff --git a/backend/internal/service/openai_codex_transform.go b/backend/internal/service/openai_codex_transform.go
index e765d7e9..0fda16b0 100644
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -141,9 +141,7 @@ func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact
 			if name, ok := fcObj["name"].(string); ok && strings.TrimSpace(name) != "" {
 				reqBody["tool_choice"] = map[string]any{
 					"type": "function",
-					"function": map[string]any{
-						"name": name,
-					},
+					"name": name,
 				}
 			}
 		}
@@ -219,9 +217,38 @@ func normalizeCodexToolChoice(reqBody map[string]any) bool {
 		return false
 	}
 	choiceType := strings.TrimSpace(firstNonEmptyString(choiceMap["type"]))
-	if choiceType == "" || codexToolsContainType(reqBody["tools"], choiceType) {
+	if choiceType == "" {
 		return false
 	}
+	modified := false
+	if choiceType == "function" {
+		name := strings.TrimSpace(firstNonEmptyString(choiceMap["name"]))
+		if name == "" {
+			if function, ok := choiceMap["function"].(map[string]any); ok {
+				name = strings.TrimSpace(firstNonEmptyString(function["name"]))
+			}
+		}
+		if name == "" {
+			reqBody["tool_choice"] = "auto"
+			return true
+		}
+		if strings.TrimSpace(firstNonEmptyString(choiceMap["name"])) != name {
+			choiceMap["name"] = name
+			modified = true
+		}
+		if _, ok := choiceMap["function"]; ok {
+			delete(choiceMap, "function")
+			modified = true
+		}
+		if !codexToolsContainFunctionName(reqBody["tools"], name) {
+			reqBody["tool_choice"] = "auto"
+			return true
+		}
+		return modified
+	}
+	if codexToolsContainType(reqBody["tools"], choiceType) {
+		return modified
+	}
 	reqBody["tool_choice"] = "auto"
 	return true
 }
@@ -243,6 +270,33 @@ func codexToolsContainType(rawTools any, toolType string) bool {
 	return false
 }
 
+func codexToolsContainFunctionName(rawTools any, name string) bool {
+	tools, ok := rawTools.([]any)
+	if !ok || strings.TrimSpace(name) == "" {
+		return false
+	}
+	normalizedName := strings.TrimSpace(name)
+	for _, rawTool := range tools {
+		tool, ok := rawTool.(map[string]any)
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(firstNonEmptyString(tool["type"])) != "function" {
+			continue
+		}
+		toolName := strings.TrimSpace(firstNonEmptyString(tool["name"]))
+		if toolName == "" {
+			if function, ok := tool["function"].(map[string]any); ok {
+				toolName = strings.TrimSpace(firstNonEmptyString(function["name"]))
+			}
+		}
+		if toolName == normalizedName {
+			return true
+		}
+	}
+	return false
+}
+
 func normalizeCodexToolRoleMessages(input []any) ([]any, bool) {
 	if len(input) == 0 {
 		return input, false
diff --git a/backend/internal/service/openai_codex_transform_test.go b/backend/internal/service/openai_codex_transform_test.go
index 75f5c55c..8d9f8574 100644
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -249,6 +249,44 @@ func TestApplyCodexOAuthTransform_PreservesKnownToolChoice(t *testing.T) {
 	require.Equal(t, "custom", choice["type"])
 }
 
+func TestApplyCodexOAuthTransform_NormalizesLegacyFunctionToolChoice(t *testing.T) {
+	reqBody := map[string]any{
+		"model": "gpt-5.4",
+		"tools": []any{
+			map[string]any{"type": "function", "name": "shell"},
+		},
+		"tool_choice": map[string]any{
+			"type":     "function",
+			"function": map[string]any{"name": "shell"},
+		},
+	}
+
+	applyCodexOAuthTransform(reqBody, true, false)
+
+	choice, ok := reqBody["tool_choice"].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "function", choice["type"])
+	require.Equal(t, "shell", choice["name"])
+	require.NotContains(t, choice, "function")
+}
+
+func TestApplyCodexOAuthTransform_DowngradesMissingFunctionToolChoice(t *testing.T) {
+	reqBody := map[string]any{
+		"model": "gpt-5.4",
+		"tools": []any{
+			map[string]any{"type": "function", "name": "shell"},
+		},
+		"tool_choice": map[string]any{
+			"type":     "function",
+			"function": map[string]any{"name": "missing"},
+		},
+	}
+
+	applyCodexOAuthTransform(reqBody, true, false)
+
+	require.Equal(t, "auto", reqBody["tool_choice"])
+}
+
 func TestApplyCodexOAuthTransform_AddsFallbackNameForFunctionCallInput(t *testing.T) {
 	reqBody := map[string]any{
 		"model": "gpt-5.4",

From 6327573534d903efc1d63d0e3af92d683d3d293e Mon Sep 17 00:00:00 2001
From: alfadb <alfadb@163.com>
Date: Tue, 28 Apr 2026 19:12:48 +0800
Subject: [PATCH 27/46] fix(gateway): wrap Anthropic stream EOF as failover
 error before client output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anthropic streaming path (gateway_service.go) returned a plain error on
upstream SSE read failure, so the handler-level UpstreamFailoverError check
never fired and the client received a bare `stream_read_error` event,
breaking long-running tasks even when no bytes had been written yet.

The most common trigger is HTTP/2 GOAWAY from api.anthropic.com edge
backends doing graceful rotation: Go's http.Transport surfaces this as
`unexpected EOF` and never auto-retries.

Mirror what the OpenAI and antigravity gateways already do: when the read
error happens before any byte has reached the client (`!c.Writer.Written()`),
return `*UpstreamFailoverError{StatusCode: 502, RetryableOnSameAccount: true}`
so the handler can retry on the same or another account. After client
output has begun, SSE has no resume protocol — keep the existing passthrough
behavior.

Tests cover both branches via streamReadCloser-based fixtures.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 backend/internal/service/gateway_service.go   | 14 ++++
 .../service/gateway_streaming_test.go         | 70 +++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 6be19ba6..911bc6fc 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -7041,6 +7041,20 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 					sendErrorEvent("response_too_large")
 					return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, ev.err
 				}
+				// 上游中途读错误（unexpected EOF / connection reset 等，常见于 HTTP/2 GOAWAY）：
+				// 若尚未向客户端写过任何字节，包成 UpstreamFailoverError 让 handler 层走 failover/重试。
+				// 已经开始写流时 SSE 协议无 resume，只能透传错误事件给客户端。
+				if !c.Writer.Written() {
+					logger.LegacyPrintf("service.gateway", "Upstream stream read error before any client output (account=%d), failing over: %v", account.ID, ev.err)
+					body, _ := json.Marshal(map[string]string{
+						"error": fmt.Sprintf("upstream stream disconnected: %s", ev.err),
+					})
+					return nil, &UpstreamFailoverError{
+						StatusCode:             http.StatusBadGateway,
+						ResponseBody:           body,
+						RetryableOnSameAccount: true,
+					}
+				}
 				sendErrorEvent("stream_read_error")
 				return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream read error: %w", ev.err)
 			}
diff --git a/backend/internal/service/gateway_streaming_test.go b/backend/internal/service/gateway_streaming_test.go
index b1584827..389831fa 100644
--- a/backend/internal/service/gateway_streaming_test.go
+++ b/backend/internal/service/gateway_streaming_test.go
@@ -4,9 +4,11 @@ package service
 
 import (
 	"context"
+	"errors"
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"strings"
 	"testing"
 	"time"
 
@@ -218,3 +220,71 @@ func TestHandleStreamingResponse_SpecialCharactersInJSON(t *testing.T) {
 	body := rec.Body.String()
 	require.Contains(t, body, "content_block_delta", "响应应包含转发的 SSE 事件")
 }
+
+// 上游中途读错误（如 HTTP/2 GOAWAY 触发的 unexpected EOF）发生在向客户端写入任何字节前：
+// 网关应返回 *UpstreamFailoverError 触发账号 failover/重试，而不是把错误事件直接发给客户端。
+func TestHandleStreamingResponse_StreamReadErrorBeforeOutput_TriggersFailover(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	svc := newMinimalGatewayService()
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{"Content-Type": []string{"text/event-stream"}},
+		Body:       &streamReadCloser{err: io.ErrUnexpectedEOF},
+	}
+
+	result, err := svc.handleStreamingResponse(context.Background(), resp, c, &Account{ID: 1}, time.Now(), "model", "model", false)
+
+	require.Error(t, err)
+	require.Nil(t, result, "失败移交场景下不应返回 streamingResult")
+
+	var failoverErr *UpstreamFailoverError
+	require.True(t, errors.As(err, &failoverErr), "未输出过字节时 stream read error 必须包成 UpstreamFailoverError，期望: %v", err)
+	require.Equal(t, http.StatusBadGateway, failoverErr.StatusCode)
+	require.True(t, failoverErr.RetryableOnSameAccount, "GOAWAY 类错误应允许同账号重试")
+	require.Contains(t, string(failoverErr.ResponseBody), "upstream stream disconnected")
+
+	// 客户端应收不到任何 stream_read_error 事件，由 handler 层根据 failover 结果再决定
+	require.NotContains(t, rec.Body.String(), "stream_read_error")
+}
+
+// 上游已经发送过事件（c.Writer 已写过字节）后再发生读错误：
+// SSE 协议无 resume，网关只能透传 stream_read_error 错误事件给客户端，不能 failover。
+func TestHandleStreamingResponse_StreamReadErrorAfterOutput_PassesThrough(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	svc := newMinimalGatewayService()
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+
+	// 第一次 Read 返回完整 SSE 事件让网关向 client 写入字节，第二次 Read 返回 EOF
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{"Content-Type": []string{"text/event-stream"}},
+		Body: &streamReadCloser{
+			payload: []byte("data: {\"type\":\"message_start\",\"message\":{\"usage\":{\"input_tokens\":5}}}\n\n"),
+			err:     io.ErrUnexpectedEOF,
+		},
+	}
+
+	result, err := svc.handleStreamingResponse(context.Background(), resp, c, &Account{ID: 1}, time.Now(), "model", "model", false)
+
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "stream read error", "已开始流后应透传普通 stream read error")
+	require.NotNil(t, result, "透传场景下应返回已收集的 streamingResult")
+
+	// 不应被错误地包成 failover error
+	var failoverErr *UpstreamFailoverError
+	require.False(t, errors.As(err, &failoverErr), "已经向客户端写过字节时不能再 failover")
+
+	// 客户端必须收到 stream_read_error 事件
+	body := rec.Body.String()
+	require.True(t,
+		strings.Contains(body, "stream_read_error"),
+		"已开始流后必须发送 stream_read_error 事件给客户端，实际响应: %q", body)
+}

From 4c474616b994665a104c5eeb1ccd8c5e96a31ddf Mon Sep 17 00:00:00 2001
From: alfadb <alfadb@163.com>
Date: Tue, 28 Apr 2026 20:24:17 +0800
Subject: [PATCH 28/46] fix(gateway): emit Anthropic-standard SSE error events
 and failover body
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two follow-ups to PR #2066's failover-wrap fix:

1. Failover ResponseBody (`UpstreamFailoverError.ResponseBody`) was encoded
   as `{"error": "<msg>"}` (string field). `ExtractUpstreamErrorMessage`
   probes for `error.message`, `detail`, or top-level `message` only — so
   `handleFailoverExhausted` and downstream passthrough rules saw an empty
   message, losing the EOF root cause in ops logs. Re-encode as the
   Anthropic standard shape `{"type":"error","error":{"type":"upstream_disconnected","message":"..."}}`.
   (Addresses the inline review comment from copilot-pull-request-reviewer
   on Wei-Shaw/sub2api#2066.)

2. The streaming `event: error` SSE frame for `response_too_large`,
   `stream_read_error`, and `stream_timeout` was non-standard
   (`{"error":"<reason>"}`). Anthropic SDKs (and Claude Code) expect
   `{"type":"error","error":{"type":"...","message":"..."}}` and parse
   `error.type`/`error.message` accordingly. Refactor `sendErrorEvent` to
   take both reason and message, and emit the standard frame so client
   SDKs surface a real diagnostic message instead of a generic stream error.

This does not by itself prevent task interruption on long-stream EOF
(SSE has no resume; client-side retry remains the only complete fix), but
it gives both server-side ops logs and client-side error UIs a meaningful
upstream message so users know the next step is to retry.

Tests updated to assert the new body shape on both branches plus a new
assertion that `ExtractUpstreamErrorMessage` returns a non-empty string.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 backend/internal/service/gateway_service.go   | 38 +++++++++++++++----
 .../service/gateway_streaming_test.go         | 21 +++++++---
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 911bc6fc..4c4a9b82 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -6871,14 +6871,31 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 	}
 	lastDataAt := time.Now()
 
-	// 仅发送一次错误事件，避免多次写入导致协议混乱（写失败时尽力通知客户端）
+	// 仅发送一次错误事件，避免多次写入导致协议混乱（写失败时尽力通知客户端）。
+	// 事件格式遵循 Anthropic SSE 标准：{"type":"error","error":{"type":<reason>,"message":<message>}}
+	// 这样 Anthropic SDK / Claude Code 等客户端能按标准 error 类型解析，UI 能显示具体错误文案，
+	// 服务端 ExtractUpstreamErrorMessage 也能从透传的 body 中提取 message。
 	errorEventSent := false
-	sendErrorEvent := func(reason string) {
+	sendErrorEvent := func(reason, message string) {
 		if errorEventSent {
 			return
 		}
 		errorEventSent = true
-		_, _ = fmt.Fprintf(w, "event: error\ndata: {\"error\":\"%s\"}\n\n", reason)
+		if message == "" {
+			message = reason
+		}
+		body, err := json.Marshal(map[string]any{
+			"type": "error",
+			"error": map[string]string{
+				"type":    reason,
+				"message": message,
+			},
+		})
+		if err != nil {
+			// json.Marshal 不可能在已知 string-only 输入上失败，保守 fallback
+			body = []byte(fmt.Sprintf(`{"type":"error","error":{"type":%q,"message":%q}}`, reason, message))
+		}
+		_, _ = fmt.Fprintf(w, "event: error\ndata: %s\n\n", body)
 		flusher.Flush()
 	}
 
@@ -7038,16 +7055,21 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 				// 客户端未断开，正常的错误处理
 				if errors.Is(ev.err, bufio.ErrTooLong) {
 					logger.LegacyPrintf("service.gateway", "SSE line too long: account=%d max_size=%d error=%v", account.ID, maxLineSize, ev.err)
-					sendErrorEvent("response_too_large")
+					sendErrorEvent("response_too_large", fmt.Sprintf("upstream SSE line exceeded %d bytes", maxLineSize))
 					return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, ev.err
 				}
 				// 上游中途读错误（unexpected EOF / connection reset 等，常见于 HTTP/2 GOAWAY）：
 				// 若尚未向客户端写过任何字节，包成 UpstreamFailoverError 让 handler 层走 failover/重试。
 				// 已经开始写流时 SSE 协议无 resume，只能透传错误事件给客户端。
+				disconnectMsg := fmt.Sprintf("upstream stream disconnected: %s", ev.err)
 				if !c.Writer.Written() {
 					logger.LegacyPrintf("service.gateway", "Upstream stream read error before any client output (account=%d), failing over: %v", account.ID, ev.err)
-					body, _ := json.Marshal(map[string]string{
-						"error": fmt.Sprintf("upstream stream disconnected: %s", ev.err),
+					body, _ := json.Marshal(map[string]any{
+						"type": "error",
+						"error": map[string]string{
+							"type":    "upstream_disconnected",
+							"message": disconnectMsg,
+						},
 					})
 					return nil, &UpstreamFailoverError{
 						StatusCode:             http.StatusBadGateway,
@@ -7055,7 +7077,7 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 						RetryableOnSameAccount: true,
 					}
 				}
-				sendErrorEvent("stream_read_error")
+				sendErrorEvent("stream_read_error", disconnectMsg)
 				return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream read error: %w", ev.err)
 			}
 			line := ev.line
@@ -7114,7 +7136,7 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 			if s.rateLimitService != nil {
 				s.rateLimitService.HandleStreamTimeout(ctx, account, originalModel)
 			}
-			sendErrorEvent("stream_timeout")
+			sendErrorEvent("stream_timeout", fmt.Sprintf("upstream stream idle for %s", streamInterval))
 			return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 
 		case <-keepaliveCh:
diff --git a/backend/internal/service/gateway_streaming_test.go b/backend/internal/service/gateway_streaming_test.go
index 389831fa..f3a52553 100644
--- a/backend/internal/service/gateway_streaming_test.go
+++ b/backend/internal/service/gateway_streaming_test.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"net/http"
 	"net/http/httptest"
-	"strings"
 	"testing"
 	"time"
 
@@ -246,7 +245,15 @@ func TestHandleStreamingResponse_StreamReadErrorBeforeOutput_TriggersFailover(t
 	require.True(t, errors.As(err, &failoverErr), "未输出过字节时 stream read error 必须包成 UpstreamFailoverError，期望: %v", err)
 	require.Equal(t, http.StatusBadGateway, failoverErr.StatusCode)
 	require.True(t, failoverErr.RetryableOnSameAccount, "GOAWAY 类错误应允许同账号重试")
-	require.Contains(t, string(failoverErr.ResponseBody), "upstream stream disconnected")
+
+	// ResponseBody 必须是 Anthropic 标准 error 格式：
+	// 1) ExtractUpstreamErrorMessage 能正确从 error.message 提取消息（被 handleFailoverExhausted / ops 日志依赖）
+	// 2) error.type 标记为 upstream_disconnected
+	extractedMsg := ExtractUpstreamErrorMessage(failoverErr.ResponseBody)
+	require.NotEmpty(t, extractedMsg, "ExtractUpstreamErrorMessage 必须从 ResponseBody 取到非空 message，否则 ops 日志会丢失诊断信息")
+	require.Contains(t, extractedMsg, "upstream stream disconnected")
+	require.Contains(t, string(failoverErr.ResponseBody), `"type":"error"`)
+	require.Contains(t, string(failoverErr.ResponseBody), `"upstream_disconnected"`)
 
 	// 客户端应收不到任何 stream_read_error 事件，由 handler 层根据 failover 结果再决定
 	require.NotContains(t, rec.Body.String(), "stream_read_error")
@@ -282,9 +289,11 @@ func TestHandleStreamingResponse_StreamReadErrorAfterOutput_PassesThrough(t *tes
 	var failoverErr *UpstreamFailoverError
 	require.False(t, errors.As(err, &failoverErr), "已经向客户端写过字节时不能再 failover")
 
-	// 客户端必须收到 stream_read_error 事件
+	// 客户端必须收到 Anthropic 标准格式的 SSE error 事件，error.type=stream_read_error，
+	// error.message 含具体根因（让 SDK 能解析、UI 能显示具体错误）
 	body := rec.Body.String()
-	require.True(t,
-		strings.Contains(body, "stream_read_error"),
-		"已开始流后必须发送 stream_read_error 事件给客户端，实际响应: %q", body)
+	require.Contains(t, body, "event: error\n", "必须按 Anthropic SSE 标准发送 error 事件帧")
+	require.Contains(t, body, `"type":"error"`, "data 必须含 type:error 顶层字段（Anthropic 标准）")
+	require.Contains(t, body, `"stream_read_error"`, "error.type 必须为 stream_read_error")
+	require.Contains(t, body, "upstream stream disconnected", "error.message 必须包含具体根因，Claude Code 等客户端才能显示有效错误文案")
 }

From 7452fad8205b2a5ece283ba4ae00741303e1ae00 Mon Sep 17 00:00:00 2001
From: Oganneson <git@oganneson.com>
Date: Tue, 28 Apr 2026 20:36:50 +0800
Subject: [PATCH 29/46] fix(openai): drop reasoning items from /v1/responses
 input on OAuth path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #1957

The OAuth path forwards client requests to chatgpt.com/backend-api/codex/responses,
where applyCodexOAuthTransform forces store=false (chatgpt.com's codex backend
rejects store=true). Reasoning items emitted under store=false are NEVER
persisted upstream, so any rs_* reference that a client carries forward in a
subsequent input[] array triggers a guaranteed upstream 404:

    Item with id 'rs_...' not found. Items are not persisted when `store` is
    set to false. Try again with `store` set to true, or remove this item
    from your input.

sub2api wraps this as 502 "Upstream request failed" and the conversation
breaks on every multi-turn /v1/responses request that uses reasoning + tools
(reproducible with gpt-5.5; gpt-5.4 happens to dodge it because the upstream
does not emit reasoning items for that model).

Affected clients include any that follow the OpenAI Responses API spec and
replay prior assistant items verbatim — in practice this hit OpenClaw and
similar agent harnesses on every turn ≥2 with tool use.

The fix: in filterCodexInput, drop input items with type == "reasoning"
entirely. The model never reads reasoning summary text from input (only
encrypted_content can carry reasoning context across turns, and chatgpt.com
under store=false does not emit it), so this is a no-op for the model itself
and a clean removal of unreachable upstream lookups.

Scope is intentionally narrow:
  * Only OAuth account requests (account.Type == AccountTypeOAuth) reach
    applyCodexOAuthTransform / filterCodexInput.
  * API-key accounts going to api.openai.com/v1/responses are unaffected
    (store=true works there, rs_* persists, multi-turn already works).
  * Anthropic / Gemini platform groups go through different transforms and
    are unaffected.
  * /v1/chat/completions is unaffected (no reasoning items).
  * item_reference items (different type) are unaffected — only type ==
    "reasoning" is dropped.

Verification:
  * Existing tests pass: go test ./internal/service/ -run Codex|Tool|OAuth
  * New regression test asserts reasoning items are dropped under both
    preserveReferences=true and preserveReferences=false.
  * End-to-end repro on gpt-5.5 multi-turn + tools: pre-patch 502, post-patch
    200. Repro on gpt-5.4 unchanged. Three-turn deep loop on gpt-5.5 passes.
---
 .../service/openai_codex_transform.go         |  8 +++
 .../service/openai_codex_transform_test.go    | 53 +++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/backend/internal/service/openai_codex_transform.go b/backend/internal/service/openai_codex_transform.go
index e765d7e9..59fb7a33 100644
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -853,6 +853,14 @@ func filterCodexInput(input []any, preserveReferences bool) []any {
 		}
 		typ, _ := m["type"].(string)
 
+		// chatgpt.com codex backend (OAuth path) does not persist reasoning
+		// items because applyCodexOAuthTransform forces store=false. Any rs_*
+		// reference replayed in input is guaranteed to 404 upstream
+		// ("Item with id 'rs_...' not found"). Drop reasoning items entirely.
+		if typ == "reasoning" {
+			continue
+		}
+
 		// 仅修正真正的 tool/function call 标识，避免误改普通 message/reasoning id；
 		// 若 item_reference 指向 legacy call_* 标识，则仅修正该引用本身。
 		fixCallIDPrefix := func(id string) string {
diff --git a/backend/internal/service/openai_codex_transform_test.go b/backend/internal/service/openai_codex_transform_test.go
index 75f5c55c..b392cf96 100644
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -1,6 +1,8 @@
 package service
 
 import (
+	"fmt"
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -1094,3 +1096,54 @@ func TestIsInstructionsEmpty(t *testing.T) {
 		})
 	}
 }
+
+func TestFilterCodexInput_DropsReasoningItemsRegardlessOfPreserveReferences(t *testing.T) {
+	// Reasoning items in input[] reference rs_* IDs that were emitted by
+	// chatgpt.com under store=false (forced by applyCodexOAuthTransform).
+	// They are never persisted upstream, so forwarding them produces a
+	// guaranteed 404 ("Item with id 'rs_...' not found"). Drop them
+	// regardless of preserveReferences. See: Wei-Shaw/sub2api issue #1957.
+
+	build := func() []any {
+		return []any{
+			map[string]any{"type": "message", "id": "msg_0", "role": "user", "content": "hi"},
+			map[string]any{
+				"type":    "reasoning",
+				"id":      "rs_0672f12450da0b9c0169f07220a6c08198b68c2455ced99344",
+				"summary": []any{},
+			},
+			map[string]any{"type": "function_call", "id": "fc_1", "call_id": "call_1", "name": "tool"},
+			map[string]any{"type": "function_call_output", "call_id": "call_1", "output": "{}"},
+		}
+	}
+
+	for _, preserve := range []bool{true, false} {
+		preserve := preserve
+		t.Run(fmt.Sprintf("preserveReferences=%v", preserve), func(t *testing.T) {
+			filtered := filterCodexInput(build(), preserve)
+
+			for _, raw := range filtered {
+				item, ok := raw.(map[string]any)
+				require.True(t, ok)
+				require.NotEqual(t, "reasoning", item["type"],
+					"reasoning items must be dropped from input on the OAuth path")
+				if id, ok := item["id"].(string); ok {
+					require.False(t, strings.HasPrefix(id, "rs_"),
+						"no item carrying an rs_* id should survive the filter")
+				}
+			}
+
+			// Sanity check: the non-reasoning items should still be present.
+			gotTypes := make(map[string]int)
+			for _, raw := range filtered {
+				item, ok := raw.(map[string]any)
+				require.True(t, ok)
+				gotTypes[item["type"].(string)]++
+			}
+			require.Equal(t, 1, gotTypes["message"])
+			require.Equal(t, 1, gotTypes["function_call"])
+			require.Equal(t, 1, gotTypes["function_call_output"])
+			require.Equal(t, 0, gotTypes["reasoning"])
+		})
+	}
+}

From da4b078df22c295b3dd665aea1714bcf14184bb9 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 14:41:35 +0800
Subject: [PATCH 30/46] chore: update sponsors

---
 README.md                         |   7 +++++++
 README_CN.md                      |   7 +++++++
 README_JA.md                      |   7 +++++++
 assets/partners/logos/pateway.png | Bin 0 -> 8228 bytes
 4 files changed, 21 insertions(+)
 create mode 100644 assets/partners/logos/pateway.png

diff --git a/README.md b/README.md
index 3e609d65..718730c6 100644
--- a/README.md
+++ b/README.md
@@ -101,6 +101,13 @@ Sub2API is an AI API gateway platform designed to distribute and manage API quot
 <td>Thanks to Bestproxy for sponsoring this project! <a href="https://bestproxy.com/?keyword=a2e8iuol">Bestproxy</a> provides high-purity residential IPs with dedicated one-IP-per-account support. By combining real home networks with fingerprint isolation, it enables link environment isolation and reduces the probability of association-based risk control.</td>
 </tr>
 
+<tr>
+<td width="180"><a href="https://pateway.ai/?ch=1tsfr51"><img src="assets/partners/logos/pateway.png" alt="pateway" width="150"></a></td>
+<td>Thanks to PatewayAI for sponsoring this project! PatewayAI is a premium model API relay service provider built for heavy AI developers, focused on direct official connections. Offering the full Claude series and Codex series models, 100% sourced directly from official providers — no dilution, no substitution, open to verification. Billing is fully transparent with token-level invoices that can be audited line by line.
+Enterprise-grade high concurrency is also supported, with a dedicated management platform for enterprise clients. Enterprise customers can sign formal contracts and receive invoices. Visit the official website for more details and contact information.
+Register now via <a href="https://pateway.ai/?ch=1tsfr51">this link</a> to receive $3 in trial credits. User top-ups start as low as 60% off, and referring friends earns both parties rewards — referral bonuses up to $150.</td>
+</tr>
+
 </table>
 
 ## Ecosystem
diff --git a/README_CN.md b/README_CN.md
index add32a17..24600e0e 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -100,6 +100,13 @@ Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅的
 <td>感谢 Bestproxy 赞助了本项目！<a href="https://bestproxy.com/?keyword=a2e8iuol">Bestproxy</a> 是一家提供高纯度住宅IP，支持一号一IP独享，结合真实家庭网络与指纹隔离，可实现链路环境隔离，降低关联风控概率。</td>
 </tr>
 
+<tr>
+<td width="180"><a href="https://pateway.ai/?ch=1tsfr51"><img src="assets/partners/logos/pateway.png" alt="pateway" width="150"></a></td>
+<td>感谢 PatewayAI 赞助了本项目！PatewayAI 是一家面向重度 AI 开发者、专注官方直连的高品质模型 API 中转服务商。提供 Claude 全系列与 Codex 系列模型，100% 官方源直供，不掺假不注水，欢迎检验。计费透明，Token 级账单可逐笔核验。
+同时支持企业级高并发，并为企业客户提供了专业的管理平台，企业客户可签订正式合同并开具发票，更多详情进入官网获取联系方式。
+现在通过 <a href="https://pateway.ai/?ch=1tsfr51">此链接</a> 注册即送 $3 试用额度，用户充值低至 6 折，邀请好友双向赠送，邀请奖励可达 $150。</td>
+</tr>
+
 </table>
 
 ## 生态项目
diff --git a/README_JA.md b/README_JA.md
index ccd595b9..1e89610c 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -100,6 +100,13 @@ Sub2API は、AI 製品のサブスクリプションから API クォータを
 <td>Bestproxy のご支援に感謝します！<a href="https://bestproxy.com/?keyword=a2e8iuol">Bestproxy</a> は高純度の住宅IPを提供し、1アカウント1IP専有をサポートしています。実際の家庭ネットワークとフィンガープリント分離を組み合わせることで、リンク環境の分離を実現し、関連付けによるリスク管理の確率を低減します。</td>
 </tr>
 
+<tr>
+<td width="180"><a href="https://pateway.ai/?ch=1tsfr51"><img src="assets/partners/logos/pateway.png" alt="pateway" width="150"></a></td>
+<td>PatewayAI のご支援に感謝します！PatewayAI は、ヘビーAI開発者向けに公式直結を重視した高品質モデルAPIリレーサービスプロバイダーです。Claude 全シリーズおよび Codex シリーズモデルを提供し、100%公式ソースから直接供給 — 偽りなし、水増しなし、検証歓迎。課金は完全透明で、トークン単位の請求書を1件ずつ監査可能です。
+エンタープライズ級の高同時接続にも対応し、法人顧客向けに専用管理プラットフォームを提供しています。法人顧客は正式な契約を締結し、請求書の発行が可能です。詳細は公式サイトでお問い合わせください。
+<a href="https://pateway.ai/?ch=1tsfr51">こちらのリンク</a>から登録すると、$3 のトライアルクレジットがもらえます。チャージは最大40%オフ、友達紹介で双方にボーナス付与 — 紹介報酬は最大 $150。</td>
+</tr>
+
 </table>
 
 ## エコシステム
diff --git a/assets/partners/logos/pateway.png b/assets/partners/logos/pateway.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ca3489a9248cd12668f05c73d1fe929ef1795c8
GIT binary patch
literal 8228
zcmeHsiB}S9{5O^Nre@kTHE)`hsg;?gC6#ODl4zK_iCbnasJM#?qGgMjTj7?u<}QZh
z0_Mu?S{j*}3!vhXp^*~d2CnFDy61iWhxa|_$AKBn%$#{<p69!KmSh|2>jy=Tih@9(
zgAfZ-I}m7ZHSqtd@SnhTKs_rG*olN%xI}<J2M+)K?E$?kkOhGhX%JImhnND+q#lxC
zNm^W5?l>0teOcJ8-&)mQXutgZ>c51K0-I;@dnkWB>(@Pb3@LP)ow_It%D5Lg`d9E*
ziB12!jPV%9=bO8<@xaN>>#9#<f@N?>jE!aX@26ROzxy^A*n{3=2uTAwj}yj!fI#=0
zEo}Axk0&kZ033^X^uMS6|Co*J1Yj|@B_F>MO}sieF;RV7IE%K)BhSUwJuS7FCMP&U
zu+<l#j<!<bt_MJ%y21Sw4)d*lg`O2NiHeS{Dm}e<?gX)ZENFyI@TB@S&R0hPF<_vU
zy!N-3Y)+VbF5+Xu>(tVZ8_`O_iN}S%Ek95giM9yDcQfmcrkUdt_`Z#{0|NtU11mHm
zIS{B_Lb>`FLva|My1^k=z;PZP9u6-nJ5(aOWu<$T6cKRyBG$sjMjm|!1S$cuCg0L!
zx?hBY^RwF~`!}Uy<$DUrN)m}zM=C3U0am<?4+$9`dT;i4<xe-LKfZS?h<3WBx8e20
z?xrbHXXdadMD4#TbpDOqSEuyvdG$v1747;9-IpQOXC)*g`ZFZ{JAFjDUoLcMX>suj
z<W94jh7~@lj0DHKLJQcV9Ax3D=wV<Gp)lf1`<^4m+*PWwd8o;13^Yl;NhRHz)${Z?
z@ZwK;pX@qXLZt$<x~r8Tx|7|W34)3y+QRhiPV@ToQSTA*4v!!XQ}Ii+HA>iV*xeXN
zaJvK~RZP0Jy$&2tMYeI8+=e^f-9B#pIJdXrX=!yPf5cYm(dP2pf!?ycpnI7+JyH8K
zsKdkdA(R?BG8RhFF7K8lVC^g7Rd}655fErS$$48x)*GdHjX-qT6>t;qXj{L=Jki7w
zhB<X{AYsQIBiN)yT>vt<=Z7)g>$1G=SRI0Z&*-SiJ}-vm!~$g}H%W;2xT@;ZDFzi*
z%R^z79mzD<hp-9n+F3W1bztGOx4VGHNYm<uOWZyr6wo$_dw08JS$~&9e&ki{+nBdz
zkag+OrMmnty7;J5$<ek_gy)rTcCiGg6L=234e=sWkL)L&WWsJa0l{uBeeQ;T{&wW3
zcv3)zM_ce?y9K(4r~5tQL)GWSCcz)=UREyf`Lw7DFrQRgmz=21LLg3o_?VkBtALNk
z=3M}Vs-!c=2Z+j$k=y6}n~g&(#@E)@tDR~ct2iy-)e_N-i%X})x>6vPYa3Ht>_*)6
zR}1*Q6*m<~Y31;I+j^A7yzPt^$W6ohMj6XhXL`n9;s`-n1b2wy7=aCUxr~2#=!n_#
zmjkQ--9nG3!m*jqY9Q>W)7X-|xwUC0>}{oLD0he2wL%jicK>9{rkQtWDC_~b!@ac|
zYDN0|_lu~7nx$$1vxL%-`U9jqu6Dhe*!j^bbp4T#<C7)zKKGTg{)(enC-;Fo4fOS6
zHe7xj5y3sXDH8CqGJpuHp$0_RsyvAsPojU4EdwT(?F+hhE5D-%YiF}~0Ju=X2eYj2
z{NUZy$*Cz^PtSE8@b1AOArdrm-e88Jk=vyi_a~)K=&m3QODQvadKn#tZfj7o_Ld>r
z6tUC>BK!{9;$~NElwed$3Uid5tZk^gp&@PdN+#t4g**tW&Ci#U7BVjL-0hlKoaBap
za?4Vjm!qSj0}P9scUpdUKtA5l&dbvHiEJB^jUZvVWlwP$eO8WXoKxP?@6{gk+iz@B
z=TsZs%t&721vf`@hPb(->ll`Y9;z#^4bFwT)H$7Hlj*SLsj189_+~}3=PLp<{N34|
zqizC8S9KXD!s3d)5yOOKNtBfQ2^5Rc({|0x&EM*~5&d_lFtP@(O)-_bIr&N;v@5Th
zd&Maj^2>2}-eFZ;A<bL|CX^fV!PCT63RpYX5wQ@HB35ZiSmRiX%f5d4`wdBlF1>Q1
z^@HPY;`<aIziMr3vxl+iGk-XaHfat-I7*o@D{f`rl_DY{yaj)Qnf>5=C<iC$C>IhO
z>>&G?kUXztX50R&j5|%PRffFZx*m8=?e^5&`n~hIKcIBO+#A020uLZO$h$jRywL(Z
z^R1zaj%Kz%q4mtjUF{)8>Xd8H0?FmPZ;0ZWEzS+>`v65FeSMpz-Mv@1-5u}0z;a!3
zT%x&`99g`3vpf^Uaso;7Ol#bSUGTr>r8sMI;c&qiLc(*5gPaR*QSUncb5)O~axA!%
zsL{g=3#7;A_yEVHh$h-2CxsKoS!1XU8BA`UuRYoo7RcrVHM_D05L*g`J}w)pt9LQ&
zO|(*`PVqqDWbUUS&SLFx;n$3oUHz<y#`3jS5x|o4cg6100B@q6&^q8es-Y`A?mj=S
zL`}uYPFZ4AqsN&*4`lBz2@%23S_}Go3rRlu3a&&m?O(wckMhFPvwa$u`mxM&EsDso
z<lE@>?<*}lKazp3{!=A3q9<%(fmBiNrh#X)CQMrX>Oky*jvI5pvJb|uVwHhr&@zlD
zhr`f6Z(x<D+U*8b3W$TQs~pB9Hks#>`XltR1B1i=ChC{uRo$S@juT_t)wTtW{9M9{
z9e5^|JnCG9GSyKp7pDX+0`DIvhseL7s@B`kX!55r2Z5d|YirYElLec+z(5ARi;qs&
z;&Yn4gky_bJr4stVroJnOhF{TWM#t*S~YBKs0X#zkrg|N>AtJIbjQwan0tR3(!IG6
zX+ne50{N;1S?KC|@Ra$s86zun|0#2QcG^O+sFh+pz59zx-X-n07JsVz+q*x9mzl1U
zld5HCrtQ&kTC^&eT+W|=g&g0x?KeN%do6K1k#e{67nkb_6*`jgsI{X5(XQ@qE48q*
zGc~_l3N%y?WeVLiTpwd;bGDX>Lx#9|KJI#z>9KoU*t%}76Lg~9HrlsU*=yp|d_BVt
zt+Ad}-~?2n7Pq2pIf`5cDZU+ou5dRz88nbzdRp8g6U!dyp>$nN6(XwS_n9C3^FcQO
zg3B96bXwmSZd!RI++$&N|DS#x<!wXF4-2T8(Lw)6?RPP;vGqHzbq&m;(0L2nKc0!5
zdZvuRh;}W`8lZ_Vms9r;yk+-KY>anm)J<-^$5iQQ(`?JQ?ZI@T=Ov`m0};|@&w>7y
zvYvPoYieL<h|~1!j2yCh1H+Fnncf64wrco{k3lGqZd0AXav42o0e76{%5e>@QH;>k
z*{IhS;Xw^3m%>@gAwxy9&DpASXIrV#+151rHmUf;-(6#WP&UFBTGYnKBc>hl{eA)N
zfdE993BA?NTG38thPO@Txa4F5Uj~Qdrja}6e{q4>bp4P}M^;aOHhNvMjO()9HDeR0
zA7r%lMSW+XAdR`6;g8Zdb{0AtE|XB0J^JmkVeC2QwY?Ov?FM}hnd^3)7P@-qi${~m
zyN2_`5Z8WOf7A+pdxC=`tW7twmXU3EB8<^#?~|&`iX0x>c6T)Up%lbD!AehXuJ~mA
zN8`Gl1nb~&l(={5x-mSknL+dj{3Th+6>`LyS|}j9X5PrBna_n@mJ)=DcD-@KJAHfT
zxOx1Sca1%yC4Vy5Sv;w$KYg-h_8oubErVm)s}IH(t3#hOGw@WF<-*3VVoh7AY~w?>
zxcj_s{B3577ldkzH45r{#$NmSO=6*#q;h^u-%_r8Be=+3xw<fE<<CRuN>58^X!~il
z*=Tg4S`B{GDdavU{+a&6UOWE1pSs4`{36=9havgpMY_()$#t_{6HP~Kv4^bq&8taQ
z<^D3Wt<_0VVTBYvY!_v0ZK_ZWi7mA8%_Tp%|9k(FVkS#Ct)v^Ohgs7f99IiVEQ3=q
zMh{c9N|RcZmwo^vNP$C8os3^)-XiVgYZN{a?P_v1=<pqijkL1?1{+b=NB=TZ#MSNj
z5;ZhJV+#a=K(sq`lDuV{IF2|E5mkdoMkOQ=3^AMKKt56L)Z0L??R92|lLHZ|raaTu
z&b2k}bX(x(J+Vb8sv?^g0qEBvC&Dnhn0_pf!h@qbn;bWLk!VsvZe>m5C+U@q36xQB
z{;gAa2#@HQeULo0+{`i{^?0VHj6gHHL_FnR$sa)D_i+61@_O0xqhfyo5XK3!xeCFo
zFV6f-dtLiMbg|05@sY%HDPBlQ3WD?w>@^+)PCKm1nEg6pGJ*WwfN;0Z7ZU93bP_pZ
zb&)D(YN=p|9o!_G@Y?<`@z>KEn9R6K5UMC8!+X9%N}y!?DBE5_I7f>!5Toy8E0w&H
zmf|5<+`H#k8Ikm>4{_3~0s0L9opGyV2#Ohms_o~Uu7C$Pj!cqh<+J<mTN%Y~`*Rwx
z%JsjV9}`b1y(9*N$eLwqyPs7+u_{E#vRoxa+~zPfM7jD+_<|QInewp$00(11stV?|
z)Nk)&v2+Y#6#z<JyyJq^`QgD`+V_SkumoF8t9fIG#jiqyk0T{~k0Iz8B#h5F%t6E!
zQ)eKe3X<STyeaXVR*=2(^?HJZjn~}Bz8bDpI=uvzxP0rOS2fV?8^>;Z2z!lebH{%h
zB1v2;P&9f=z?V;}E^VwYF)0Zlj2y6Jx0-De1k}%aHm2&K;mM6QYiKQ+)oJ2<5v7Ze
z@3su;BC=P->Fz~GCbyE<e}3IP&}H+7aY=A+u!nO*jl#>V+M#yfV|{1AipViBP}iV`
zcA9p}X8X8kGH?v5hGKQ+UDt1tp81eY=BtR#qe?dJ&0D)|D5$4janV*=$aJTR!Qz05
zCYS=vRv(=}_orzRp_MQBtByS-tyPwf!ySSc0ksR#qzzje;?@L)ChJ|e+O|D&ROo(`
zUIk(8ELG?l!%NjmR`=$3*ozgrz1%CQT5~b_b&lxsy$t~M;W@?6-NN*)kpf2QnLp$p
ze=B?6xN<b@)8%wI`nxP_mgjuK=PU$b3@88^xxMu4rdE$Uj$86Eh|4Q`pQ!#=*SgQ&
z^{)^O7y_p%T9EAHxghXi`Yhr>c^WCG?m0Ms&HoEKUIn$h00}Uo<C87(YJwjqI9yNF
zBC58x3sMLD%gxK4!|+Z3jza2AEzZr&ebw}2<h}ioffys47o*1L%Mgm=(}K{{CR#_j
zt6ntJYN}kwIP^``$M5UpW1ljWDP#0?FIk13dnUOMLzRID+p!XWW9S@B(<t3feyQ#=
zOiM~iY9!zho{nvGZ<Si;Qf9VWQ%wxc>s|iL%q!X<wa9_kob9P^^Yb;C9ys-0T|qYT
z=Mj|L9~qhkaSO(M4L^ejty+~!*c`-C#eQP#Tta?>i~l?REd?~gtI=~>X1aO4UXOOc
z$F*b{sZ()YOpi+;sa#<N4X`5CZczk1b3>KbM}KcR`OVv-Y}yX0LU;g${dU=DWuraQ
zbG=)Z=%zsp0tX5jvvsHJ%ZT6URj2+#-e2z$^0mxAgxw&_KU&{dVU3aN3oRmlzF?%*
z$Dsv`)G<AN3l>($O6~EM?x~L{Xp#szWmU)~Hcu8u^+b|ButXpuraTFefSdo(1U^<O
z)%+xJG8m2N<E^f~$H}LwK<*S4czo;H?r4a0gzLR43;s8z8d=q6=|mM$WFY0of1R<F
za$gliVkKc!SUpDR+7K3s(C&_LADSO+5skL6L4Q>`Bb;~@U~vn7L@zWp(EzTMxqjJ3
zrIPiVJ@O<&Q#Dec&tn+hxJjaX#<ss9%_K0>t>ai#fGcBVA{y7d<f%)+HYfdhN3v3E
zuGbGa+)1vVM+aT$_r^L%xbbcQ=|p1_fl`N$n`)t7a8wG;JpHOOpkqEa_XrV&f4vr!
zsgNotT_!PB1ouap1IUgl44lA4rKD%4jkB}E=j1!ygvmZ8tEhp`ZpvGkdU|-g=UFpw
z3FEUpPQ;stBUWz;M;w#xHaJL`C5Rn(n?-%AOuXsd($;pET5Z;BVWS7m?<?Tn0w}0?
zneEvlg?(vnEbSB(6>H8$C-!yLC=~rJe^A)V)DxNMhpn-u=VT6KSJuSjV`wNuN_i1?
zC;TZw<pmU|irFQd&_f7ZsJxyhqMzB;);8U5K#8H)mp8sUlJX%?;1B0~2PmIQ2!aaE
zA27<gI+o6k6rgq`AK!B%e%yMkl|EcBFw#nUQd-@1Ch1yKfXJ_&;o;1feCE>VhSra>
z%8mZ+)Du}F4})}>4h=?ec^^VtuQ&AjZAFR)H#6u7WkkcRH5N|Tg8gdzbKDdMd8+wL
z&4~be>c4ZbRat5PUD??dVBitkILz1OiL4RBD5#y$OPB4*KfP`AW{20SRWEGSqH66j
zDW*Oy-7l_7SkP8I@J`k}xE7^($CWT`GvfHfm90$Up#J2D3-cAdMdLv$6Ko75WP-^o
zTQf-?x=4&?HW}Th>I)EdZ!MoO%snqP(*fXd3n?=X6<=z2aFlv(@Az~DYA!*95M7sd
zcegG6MT^q8816}iw=;3tcYD4?aHnw^=dwk*P%)5}+ti^5@ujr+qJQ!|=NX+-(O2rU
z#Y_lAk68YAO|@5@zJMaMRI}D(R|Q49);5Kg^?pN#WS)a-R?VUr%1Dh-Rd2^cRPc5U
zG|}(EMrCV~1?{%c+zM;JsW<}SE#h38?Wno46BN%73DzLM8Irdw1vvl3nXV-3Ys)5P
zL%xxKrm?Sc=S~*Cl;q<523G!0k8>IK{%l;9_3<n3lN5yKTb!gG@g8i?%)IN3aIv!=
zuB`f~`mR>>QK4O5VslMGzy*g_Tuey*p-X@u!!xZs{WLwN5M`x(1qX}tX>r5Nh1US3
zB1|3-Oz;3p2V}2ObaGR;MXo^y_L520%$HvFmO;4TA^!NktUWW}UR5<-;J;=^toB>}
zS>kE8ZV0i|_9Dzbl$>uFrYhIy9^S_S=n-oRrNOYi*5HC%9v!L<>GI7RJ^l2x>cW_M
zNPylIpM-0-CZAa!LNuIXP&@$@j?Eii2tg4109A;@dNkWP++D9L4GSEUWP+!kL+pNI
zIXCz92Gvz9mM=XzyC7$7L#IMhC+?<=N@G#}gax}m56gl0j?*_3umXo19*s*9!AEfh
za$a-e(-GK^2)Cfj2Q#5DNHka^jM%5TmY<Zgzn1ee5cN|9GH?Y<iI*N?(|pOBIsZV$
zy@&IdjI}n>SLuXaas0@)e9GPM$Zz&iW`Gic<R80*?>b14ZE9r_9(Q?FvC{1tX7)#Q
zCN-J+%J<;YF41ANfX)KUs7nsjP@F{A80h%-wamjeUKhlv{OlS;+HE=N*}yY+;>$i?
zYn)5d704jJaY4*V@6rwx`KZ=XF>_40>sU~dO^IuN<7}WPTp~3Pn4#Xe1_0%k(iNdW
zU^!M#4QH`qKb7Qh$EU-?wd3drEZ0^_XMy*8Ro!C2EN70OQ0_g=ye0AMuB@>N(1rm}
z|5#eq`x8JP(tDTyeK4vsf^g{!e<m!)qY4o;j1<N;bvJR&!hz<jkRgZ31=mr*{EAt_
zld&D1c>}CdM)ukxef+m;9U%h;nXmQ}8MjqxayF*Wn>i^Ib?{kwi90nYh`}_F7pDT0
z7SLpKb91Lrw~4d`Ak@`__p%#|5zqV0(@QPPY*%}*Ij}$?Aj&eCOvPPa!{S<?p$@KT
zcem4y)}a(6&Xu`9WrkqUt<ysn5pf7BbW>0+%wWz<F#NO$0e~t41A`hsGy_DUc)g5N
zt=~gT-3Z33TIt|>&Wv4;>i+<oD%$2^R|o{hI$Yo|332o0O*~ZUvAjq?Z`|8=8|j$Y
z?%v*%%W$}zQS*Tz(>5iDu!_}91x5}V!{;!Xrw)%OLyn%RCNVy*xWy@0_+_;<jRgS*
zQa+Sc&^|&kG5IHy5P8;BG!``KoTufv$-pjIhp&Y%23Yv4XW@OMTzoPRHnut!rvo<s
z8CY@K7UUbguQ1~gVvUo5J97qfdk9dhq8f?k$xu4x_bafExpq&8=tF-7`l<_vSe3(Y
zO~y;>szT}g+_Qt7WHNcw>_AYifx-8!HEu_U{brZ9VeuL=uBf;eJ=Wi;DiY&K-#9Up
z{07r8MUXN(G2Yxs(~p<1uyHj{L`|uz-EeQ+h@0AA;|QK)<oLcZ=h8w}3()h9<d=iz
z;dP9Cj5RQq%he;Z3=Iu+^ZKUC{cKTb^s#<mCdGGlRSeRpHff!ZbU#P;eS(w5h%ps|
z&wv0Aux)6SJ@J-0DsZb_PHr<SAz|8O`Ry%r5A@Ae{eA#|<Z2-uvE$L2H|1XHud|C3
zp(Gd9bR~RzEaS<-sB`k2={4m1vn0#H3MgBi>%D8fjGCs7TF(c@=NLxy#GbhiWWW_g
z=kF^U@3i%7rJ_2&{G*e@SnT*b@~Yb2dj7@b^xf0yA+D{Jl#MU5dKSHxd0~jGIRfTN
z-i@Q0{<do#BeUCCZu_@h)+yaht=E_eVSW(|BIhc>gh5@O18k3*SS-XP=a{}xMGv|5
zkw(z9h5$Dg1&<f^wC(>-)#=2Jw2TM4*D23gh0~C3cM@Slv|%w_cdBi&4oJ!CirS>D
z^7%A|tbMIa;G}LD;49QpKLw0g%K_vZNLzsPrsd{g<f0aa!8K%@L&uiqFrw1{9JTPo
z7buTO_c?052+uOCU~IMLQ6CA}HaUnQW6KA47@L>dil|_02~tjRXL|R(jAN7(rU`}@
zr^Uzh%9ee*9NeLtRlDSNSyMQXPW3cQ*xBxRf3NhkN1#O|d;u|^jRUh^p~|SW1OMsH
zX+4?&cu)2_C*H*jXmvHIgDk=D-zR>cYmxU>9@kO8YS{!JAiL!!b5Y$p^;5&=O)i4V
zt#a6U-5Q@|N6^6xU-S{*gPPN~>Le}3>YRZ{KpE3rJWXG~KCpVa6f-d<C_Ek~p$dsA
zfgfzEvp|brmEj0dq|OWo6j~JNb)OnA=c9SvhQ6y-p;{*wmSz~Y{?#4+6i|Mb4yr>W
zc|gxIuN`tjwJ3moxXSk|5Xd_XJ6nd9!Fw<fQ)y`YAWiux_%)qvK3(xk?EB^PSdQr-
zA+Nt->?zglzaFvxf5TbMFb$x_kc-_}nUwcxwgwK!CxE?iSsum?*H@t9u3kL_3QK#Z
zzLUOjzAQnv5C>qP1;58$>$(hv?22yh3Xod?jbn?tiXic7Cspn4Y9RX6@I*kaKz_*5
z_{F^kBoz8kPgt2&0aRV)0KfqXg5*Gpdhq#bdjL^;cE-Wy8bQ;~O88d;@gxR<tz$L-
z8Fl~!Zhk8i^iLQ<wHml)1|eN1<5P*Uk$MDVV28|QYwW`YYfxwfkjxP4Fe)&HT{aA+
zE>jp1SbwYf-R-NCzd&!V0orgMD}vq!(q+X}($FcbD*64ChSa<jz~50w(yHVe#5Xgt
z%LV~6>-du*z%!ci0ZKkKfNFWCmBIN$xoa1IFHF$<ZF9wpKy7NyK-;@Qr|v}`>waNw
zws^hlujDg;8};bnMdLK)Tfh^GZ4)Ww-j*Q)TuM#ML|W*6YfFnQEU<zYFW|yo;(+4&
z+jzq&i#^+a?|}D(6eFlTZ+W>CoS$ZiEiZn&r|7ruT5<{&sFUC4vAZ@<Yy`M;fNl?9
z*H0@2B6ata{_|!1PG10=BoN_Q$zBlXpR>xpP2@#U5j9}fel790q5Hq5{-2+XPrG~W
X>0Tf#^eJP21p`6MtWE2#-A?=;CF9gr

literal 0
HcmV?d00001


From 4b6954f9f05876b07c03f62e8492e061f9cb7bfb Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Wed, 29 Apr 2026 15:01:02 +0800
Subject: [PATCH 31/46] feat(ops): allow retention days = 0 to wipe table on
 each scheduled cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Background / 背景

The ops cleanup task currently rejects retention days < 1 in both validate
and normalize, so operators who want minimal-history setups (e.g. high
churn deployments that prefer near-realtime cleanup) cannot express that
intent through the UI. The only options are 1+ days, which keeps at least
24h of history regardless of cron frequency.

ops 清理任务目前在 validate 和 normalize 两处都拒绝小于 1 的保留天数，
让希望尽量不留历史的运维场景（高吞吐部署 + 想用近实时清理）无法通过 UI
表达。最低只能配 1，等于不管 cron 多频繁，至少都会保留 24 小时的历史。

Purpose / 目的

Let admins set retention days to 0, meaning "every scheduled cleanup
run wipes the corresponding table(s) entirely". Combined with a more
frequent cron (e.g. `0 * * * *`) this yields effectively rolling cleanup.

允许管理员把保留天数设为 0，语义为"每次定时清理时把对应表全部清空"。
搭配更频繁的 cron（比如每小时整点）即可获得近似滚动清理的效果。

Changes / 改动内容

Backend

- service/ops_settings.go: validate accepts [0, 365]; normalize only
  refills default 30 when value is < 0 (negative is treated as legacy
  bad data, 0 is honoured)
- service/ops_cleanup_service.go: introduce `opsCleanupPlan(now, days)`
  returning `(cutoff, truncate, ok)`. days==0 returns truncate=true and
  short-circuits to a new `truncateOpsTable` helper that uses
  `TRUNCATE TABLE` (O(1), no WAL, no VACUUM pressure). days>0 keeps
  the existing batched DELETE path unchanged. Empty tables skip
  TRUNCATE to avoid the ACCESS EXCLUSIVE lock entirely
- Extract `isMissingRelationError` helper to dedupe the "table not
  yet created" tolerance shared by both delete and truncate paths
- Add unit tests for `opsCleanupPlan` (three branches) and
  `isMissingRelationError`

后端

- service/ops_settings.go: validate 接受 [0, 365]；normalize 仅在 < 0
  时回填默认 30（负数视为脏数据，0 被尊重）
- service/ops_cleanup_service.go: 抽 `opsCleanupPlan(now, days)` 返回
  `(cutoff, truncate, ok)`。days==0 → truncate=true，走新增
  `truncateOpsTable`（TRUNCATE TABLE，O(1)，无 WAL、无 VACUUM 压力）；
  days>0 仍走原批量 DELETE 路径，行为完全不变。空表跳过 TRUNCATE，
  避免无意义的 ACCESS EXCLUSIVE 锁
- 抽 `isMissingRelationError` helper 复用 delete / truncate 两处的
  "表不存在"宽容判断
- 补 `opsCleanupPlan` 三分支 + `isMissingRelationError` 单元测试

Frontend

- OpsSettingsDialog.vue: validation accepts [0, 365]; input min=0
- i18n (zh/en): hint mentions "0 = wipe all on every cleanup",
  validation message updated to 0-365 range

前端

- OpsSettingsDialog.vue: 校验放宽到 [0, 365]，input min 改 0
- i18n（zh/en）：hint 补"0 = 每次清理时清空所有"，错误提示改 0-365

Trade-offs / 取舍

- TRUNCATE requires ACCESS EXCLUSIVE lock briefly, but ops tables only
  have the cleanup task as a writer, so the lock is invisible to other
  workloads
- Empty-table guard avoids the lock when there is nothing to clean
- Negative values are still treated as legacy bad data and replaced
  with default 30 to preserve compatibility
---
 .../internal/service/ops_cleanup_service.go   | 97 ++++++++++++++++---
 .../service/ops_cleanup_service_test.go       | 64 ++++++++++++
 backend/internal/service/ops_settings.go      | 21 ++--
 frontend/src/i18n/locales/en.ts               |  4 +-
 frontend/src/i18n/locales/zh.ts               |  4 +-
 .../ops/components/OpsSettingsDialog.vue      | 12 +--
 6 files changed, 167 insertions(+), 35 deletions(-)
 create mode 100644 backend/internal/service/ops_cleanup_service_test.go

diff --git a/backend/internal/service/ops_cleanup_service.go b/backend/internal/service/ops_cleanup_service.go
index 08a10a02..44ec1ad1 100644
--- a/backend/internal/service/ops_cleanup_service.go
+++ b/backend/internal/service/ops_cleanup_service.go
@@ -184,6 +184,25 @@ func (c opsCleanupDeletedCounts) String() string {
 	)
 }
 
+// opsCleanupPlan 把"保留天数"翻译成具体的清理动作。
+//   - days < 0  → 跳过该项清理（ok=false），保留兼容老数据
+//   - days == 0 → TRUNCATE TABLE（O(1) 全清），truncate=true
+//   - days > 0  → 批量 DELETE 早于 now-N天 的行，cutoff = now - N 天
+//
+// 之所以 days==0 走 TRUNCATE 而非"now+24h cutoff + DELETE"：
+//   - 速度从 O(N) 降到 O(1)，对百万行级表毫秒完成
+//   - 无 WAL 写入、无后续 VACUUM 压力
+//   - 这些 ops 表只有 cleanup 任务自己写，TRUNCATE 的 ACCESS EXCLUSIVE 锁影响可忽略
+func opsCleanupPlan(now time.Time, days int) (cutoff time.Time, truncate, ok bool) {
+	if days < 0 {
+		return time.Time{}, false, false
+	}
+	if days == 0 {
+		return time.Time{}, true, true
+	}
+	return now.AddDate(0, 0, -days), false, true
+}
+
 func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDeletedCounts, error) {
 	out := opsCleanupDeletedCounts{}
 	if s == nil || s.db == nil || s.cfg == nil {
@@ -194,34 +213,42 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
 
 	now := time.Now().UTC()
 
-	// Error-like tables: error logs / retry attempts / alert events.
-	if days := s.cfg.Ops.Cleanup.ErrorLogRetentionDays; days > 0 {
-		cutoff := now.AddDate(0, 0, -days)
-		n, err := deleteOldRowsByID(ctx, s.db, "ops_error_logs", "created_at", cutoff, batchSize, false)
+	// runOne 把"truncate? cutoff? batched delete?"封装到一处，
+	// 让三组清理（错误日志类 / 分钟指标 / 小时+日预聚合）调用方只关心表名和列名。
+	runOne := func(truncate bool, cutoff time.Time, table, timeCol string, castDate bool) (int64, error) {
+		if truncate {
+			return truncateOpsTable(ctx, s.db, table)
+		}
+		return deleteOldRowsByID(ctx, s.db, table, timeCol, cutoff, batchSize, castDate)
+	}
+
+	// Error-like tables: error logs / retry attempts / alert events / system logs / cleanup audits.
+	if cutoff, truncate, ok := opsCleanupPlan(now, s.cfg.Ops.Cleanup.ErrorLogRetentionDays); ok {
+		n, err := runOne(truncate, cutoff, "ops_error_logs", "created_at", false)
 		if err != nil {
 			return out, err
 		}
 		out.errorLogs = n
 
-		n, err = deleteOldRowsByID(ctx, s.db, "ops_retry_attempts", "created_at", cutoff, batchSize, false)
+		n, err = runOne(truncate, cutoff, "ops_retry_attempts", "created_at", false)
 		if err != nil {
 			return out, err
 		}
 		out.retryAttempts = n
 
-		n, err = deleteOldRowsByID(ctx, s.db, "ops_alert_events", "created_at", cutoff, batchSize, false)
+		n, err = runOne(truncate, cutoff, "ops_alert_events", "created_at", false)
 		if err != nil {
 			return out, err
 		}
 		out.alertEvents = n
 
-		n, err = deleteOldRowsByID(ctx, s.db, "ops_system_logs", "created_at", cutoff, batchSize, false)
+		n, err = runOne(truncate, cutoff, "ops_system_logs", "created_at", false)
 		if err != nil {
 			return out, err
 		}
 		out.systemLogs = n
 
-		n, err = deleteOldRowsByID(ctx, s.db, "ops_system_log_cleanup_audits", "created_at", cutoff, batchSize, false)
+		n, err = runOne(truncate, cutoff, "ops_system_log_cleanup_audits", "created_at", false)
 		if err != nil {
 			return out, err
 		}
@@ -229,9 +256,8 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
 	}
 
 	// Minute-level metrics snapshots.
-	if days := s.cfg.Ops.Cleanup.MinuteMetricsRetentionDays; days > 0 {
-		cutoff := now.AddDate(0, 0, -days)
-		n, err := deleteOldRowsByID(ctx, s.db, "ops_system_metrics", "created_at", cutoff, batchSize, false)
+	if cutoff, truncate, ok := opsCleanupPlan(now, s.cfg.Ops.Cleanup.MinuteMetricsRetentionDays); ok {
+		n, err := runOne(truncate, cutoff, "ops_system_metrics", "created_at", false)
 		if err != nil {
 			return out, err
 		}
@@ -239,15 +265,14 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
 	}
 
 	// Pre-aggregation tables (hourly/daily).
-	if days := s.cfg.Ops.Cleanup.HourlyMetricsRetentionDays; days > 0 {
-		cutoff := now.AddDate(0, 0, -days)
-		n, err := deleteOldRowsByID(ctx, s.db, "ops_metrics_hourly", "bucket_start", cutoff, batchSize, false)
+	if cutoff, truncate, ok := opsCleanupPlan(now, s.cfg.Ops.Cleanup.HourlyMetricsRetentionDays); ok {
+		n, err := runOne(truncate, cutoff, "ops_metrics_hourly", "bucket_start", false)
 		if err != nil {
 			return out, err
 		}
 		out.hourlyPreagg = n
 
-		n, err = deleteOldRowsByID(ctx, s.db, "ops_metrics_daily", "bucket_date", cutoff, batchSize, true)
+		n, err = runOne(truncate, cutoff, "ops_metrics_daily", "bucket_date", true)
 		if err != nil {
 			return out, err
 		}
@@ -303,7 +328,7 @@ WHERE id IN (SELECT id FROM batch)
 		res, err := db.ExecContext(ctx, q, cutoff, batchSize)
 		if err != nil {
 			// If ops tables aren't present yet (partial deployments), treat as no-op.
-			if strings.Contains(strings.ToLower(err.Error()), "does not exist") && strings.Contains(strings.ToLower(err.Error()), "relation") {
+			if isMissingRelationError(err) {
 				return total, nil
 			}
 			return total, err
@@ -320,6 +345,46 @@ WHERE id IN (SELECT id FROM batch)
 	return total, nil
 }
 
+// truncateOpsTable 用 TRUNCATE TABLE 清空指定表，先 SELECT COUNT(*) 取得清空前行数用于 heartbeat。
+//
+// 与 deleteOldRowsByID 的差异：
+//   - 不可指定 WHERE 条件，仅用于 days==0 的"清空全部"语义
+//   - O(1) 释放表的物理存储页，毫秒级完成，无 WAL 写入、无 VACUUM 压力
+//   - 需要 ACCESS EXCLUSIVE 锁，但 ops 表只有清理任务自己写入，瞬间锁影响可忽略
+//
+// 表不存在（部分部署）静默返回 0，与 deleteOldRowsByID 保持一致。
+func truncateOpsTable(ctx context.Context, db *sql.DB, table string) (int64, error) {
+	if db == nil {
+		return 0, nil
+	}
+	var count int64
+	if err := db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&count); err != nil {
+		if isMissingRelationError(err) {
+			return 0, nil
+		}
+		return 0, fmt.Errorf("count %s: %w", table, err)
+	}
+	if count == 0 {
+		return 0, nil
+	}
+	if _, err := db.ExecContext(ctx, fmt.Sprintf("TRUNCATE TABLE %s", table)); err != nil {
+		if isMissingRelationError(err) {
+			return 0, nil
+		}
+		return 0, fmt.Errorf("truncate %s: %w", table, err)
+	}
+	return count, nil
+}
+
+// isMissingRelationError 判断 PG 报错是否为"表不存在"，用于让清理任务在部分部署场景静默跳过。
+func isMissingRelationError(err error) bool {
+	if err == nil {
+		return false
+	}
+	s := strings.ToLower(err.Error())
+	return strings.Contains(s, "does not exist") && strings.Contains(s, "relation")
+}
+
 func (s *OpsCleanupService) tryAcquireLeaderLock(ctx context.Context) (func(), bool) {
 	if s == nil {
 		return nil, false
diff --git a/backend/internal/service/ops_cleanup_service_test.go b/backend/internal/service/ops_cleanup_service_test.go
new file mode 100644
index 00000000..86657d27
--- /dev/null
+++ b/backend/internal/service/ops_cleanup_service_test.go
@@ -0,0 +1,64 @@
+package service
+
+import (
+	"testing"
+	"time"
+)
+
+func TestOpsCleanupPlan(t *testing.T) {
+	now := time.Date(2026, 4, 29, 12, 0, 0, 0, time.UTC)
+
+	cases := []struct {
+		name         string
+		days         int
+		wantOK       bool
+		wantTruncate bool
+		wantCutoff   time.Time
+	}{
+		{name: "negative skips", days: -1, wantOK: false},
+		{name: "zero truncates", days: 0, wantOK: true, wantTruncate: true},
+		{name: "positive yields past cutoff", days: 7, wantOK: true, wantCutoff: now.AddDate(0, 0, -7)},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			cutoff, truncate, ok := opsCleanupPlan(now, tc.days)
+			if ok != tc.wantOK {
+				t.Fatalf("ok = %v, want %v", ok, tc.wantOK)
+			}
+			if !ok {
+				return
+			}
+			if truncate != tc.wantTruncate {
+				t.Fatalf("truncate = %v, want %v", truncate, tc.wantTruncate)
+			}
+			if !tc.wantTruncate && !cutoff.Equal(tc.wantCutoff) {
+				t.Fatalf("cutoff = %v, want %v", cutoff, tc.wantCutoff)
+			}
+		})
+	}
+}
+
+func TestIsMissingRelationError(t *testing.T) {
+	cases := []struct {
+		name string
+		err  error
+		want bool
+	}{
+		{name: "nil is not missing", err: nil, want: false},
+		{name: "match relation does not exist", err: fakeErr(`pq: relation "ops_error_logs" does not exist`), want: true},
+		{name: "match case-insensitive", err: fakeErr(`ERROR: Relation "x" Does Not Exist`), want: true},
+		{name: "non-matching error", err: fakeErr("connection refused"), want: false},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := isMissingRelationError(tc.err); got != tc.want {
+				t.Fatalf("got %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+type fakeErr string
+
+func (e fakeErr) Error() string { return string(e) }
diff --git a/backend/internal/service/ops_settings.go b/backend/internal/service/ops_settings.go
index 5871166c..ecc3a94b 100644
--- a/backend/internal/service/ops_settings.go
+++ b/backend/internal/service/ops_settings.go
@@ -387,13 +387,15 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
 	if cfg.DataRetention.CleanupSchedule == "" {
 		cfg.DataRetention.CleanupSchedule = "0 2 * * *"
 	}
-	if cfg.DataRetention.ErrorLogRetentionDays <= 0 {
+	// 保留天数：0 表示每次定时清理全部（清空所有），> 0 表示按天数保留；
+	// 仅在拿到非法的负数时回填默认值，避免覆盖用户主动设的 0。
+	if cfg.DataRetention.ErrorLogRetentionDays < 0 {
 		cfg.DataRetention.ErrorLogRetentionDays = 30
 	}
-	if cfg.DataRetention.MinuteMetricsRetentionDays <= 0 {
+	if cfg.DataRetention.MinuteMetricsRetentionDays < 0 {
 		cfg.DataRetention.MinuteMetricsRetentionDays = 30
 	}
-	if cfg.DataRetention.HourlyMetricsRetentionDays <= 0 {
+	if cfg.DataRetention.HourlyMetricsRetentionDays < 0 {
 		cfg.DataRetention.HourlyMetricsRetentionDays = 30
 	}
 	// Normalize auto refresh interval (default 30 seconds)
@@ -406,14 +408,15 @@ func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
 	if cfg == nil {
 		return errors.New("invalid config")
 	}
-	if cfg.DataRetention.ErrorLogRetentionDays < 1 || cfg.DataRetention.ErrorLogRetentionDays > 365 {
-		return errors.New("error_log_retention_days must be between 1 and 365")
+	// 保留天数：0 表示每次清理全部，1-365 表示按天数保留。
+	if cfg.DataRetention.ErrorLogRetentionDays < 0 || cfg.DataRetention.ErrorLogRetentionDays > 365 {
+		return errors.New("error_log_retention_days must be between 0 and 365")
 	}
-	if cfg.DataRetention.MinuteMetricsRetentionDays < 1 || cfg.DataRetention.MinuteMetricsRetentionDays > 365 {
-		return errors.New("minute_metrics_retention_days must be between 1 and 365")
+	if cfg.DataRetention.MinuteMetricsRetentionDays < 0 || cfg.DataRetention.MinuteMetricsRetentionDays > 365 {
+		return errors.New("minute_metrics_retention_days must be between 0 and 365")
 	}
-	if cfg.DataRetention.HourlyMetricsRetentionDays < 1 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
-		return errors.New("hourly_metrics_retention_days must be between 1 and 365")
+	if cfg.DataRetention.HourlyMetricsRetentionDays < 0 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
+		return errors.New("hourly_metrics_retention_days must be between 0 and 365")
 	}
 	if cfg.AutoRefreshIntervalSec < 15 || cfg.AutoRefreshIntervalSec > 300 {
 		return errors.New("auto_refresh_interval_seconds must be between 15 and 300")
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index c66ca55b..270cd660 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -4648,7 +4648,7 @@ export default {
         errorLogRetentionDays: 'Error Log Retention Days',
         minuteMetricsRetentionDays: 'Minute Metrics Retention Days',
         hourlyMetricsRetentionDays: 'Hourly Metrics Retention Days',
-        retentionDaysHint: 'Recommended 7-90 days, longer periods will consume more storage',
+        retentionDaysHint: 'Recommended 7-90 days; longer periods consume more storage. Set to 0 to wipe all history on every scheduled cleanup',
         aggregation: 'Pre-aggregation Tasks',
         enableAggregation: 'Enable Pre-aggregation',
         aggregationHint: 'Pre-aggregation improves query performance for long time windows',
@@ -4678,7 +4678,7 @@ export default {
         autoRefreshCountdown: 'Auto refresh: {seconds}s',
         validation: {
           title: 'Please fix the following issues',
-          retentionDaysRange: 'Retention days must be between 1-365 days',
+          retentionDaysRange: 'Retention days must be between 0 and 365 (0 = wipe all on every cleanup)',
           slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
           ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
           requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index 77d1c93c..fdfc9e41 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -4810,7 +4810,7 @@ export default {
         errorLogRetentionDays: '错误日志保留天数',
         minuteMetricsRetentionDays: '分钟指标保留天数',
         hourlyMetricsRetentionDays: '小时指标保留天数',
-        retentionDaysHint: '建议保留7-90天，过长会占用存储空间',
+        retentionDaysHint: '建议保留 7-90 天，过长会占用存储空间；填 0 表示每次定时清理时清空所有历史',
         aggregation: '预聚合任务',
         enableAggregation: '启用预聚合任务',
         aggregationHint: '预聚合可提升长时间窗口查询性能',
@@ -4841,7 +4841,7 @@ export default {
         autoRefreshCountdown: '自动刷新：{seconds}s',
         validation: {
           title: '请先修正以下问题',
-          retentionDaysRange: '保留天数必须在1-365天之间',
+          retentionDaysRange: '保留天数必须在 0-365 天之间（0 = 每次清理时清空所有）',
           slaMinPercentRange: 'SLA最低百分比必须在0-100之间',
           ttftP99MaxRange: 'TTFT P99最大值必须大于等于0',
           requestErrorRateMaxRange: '请求错误率最大值必须在0-100之间',
diff --git a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
index 542f111d..5dba5b1d 100644
--- a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
+++ b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
@@ -136,13 +136,13 @@ const validation = computed(() => {
   // 验证高级设置
   if (advancedSettings.value) {
     const { error_log_retention_days, minute_metrics_retention_days, hourly_metrics_retention_days } = advancedSettings.value.data_retention
-    if (error_log_retention_days < 1 || error_log_retention_days > 365) {
+    if (error_log_retention_days < 0 || error_log_retention_days > 365) {
       errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
     }
-    if (minute_metrics_retention_days < 1 || minute_metrics_retention_days > 365) {
+    if (minute_metrics_retention_days < 0 || minute_metrics_retention_days > 365) {
       errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
     }
-    if (hourly_metrics_retention_days < 1 || hourly_metrics_retention_days > 365) {
+    if (hourly_metrics_retention_days < 0 || hourly_metrics_retention_days > 365) {
       errors.push(t('admin.ops.settings.validation.retentionDaysRange'))
     }
   }
@@ -431,7 +431,7 @@ async function saveAllSettings() {
                 <input
                   v-model.number="advancedSettings.data_retention.error_log_retention_days"
                   type="number"
-                  min="1"
+                  min="0"
                   max="365"
                   class="input"
                 />
@@ -441,7 +441,7 @@ async function saveAllSettings() {
                 <input
                   v-model.number="advancedSettings.data_retention.minute_metrics_retention_days"
                   type="number"
-                  min="1"
+                  min="0"
                   max="365"
                   class="input"
                 />
@@ -451,7 +451,7 @@ async function saveAllSettings() {
                 <input
                   v-model.number="advancedSettings.data_retention.hourly_metrics_retention_days"
                   type="number"
-                  min="1"
+                  min="0"
                   max="365"
                   class="input"
                 />

From d78478e8668f0547f9639c812f2bb2641f80166f Mon Sep 17 00:00:00 2001
From: alfadb <alfadb@163.com>
Date: Wed, 29 Apr 2026 15:44:54 +0800
Subject: [PATCH 32/46] fix(gateway): sanitize stream errors to avoid leaking
 infrastructure topology

(*net.OpError).Error() concatenates Source/Addr fields, so the previous
disconnectMsg surfaced internal source IP/port and upstream server address
to clients via SSE error frames and UpstreamFailoverError.ResponseBody
(reported by @Wei-Shaw on PR #2066).

- Add sanitizeStreamError that maps known errors (io.ErrUnexpectedEOF,
  context.Canceled, syscall.ECONNRESET/EPIPE/ETIMEDOUT/...) to fixed
  descriptions and falls back to a generic placeholder, with an explicit
  *net.OpError branch that drops Source/Addr fields entirely.
- Use sanitized message in client-facing disconnectMsg; full ev.err is
  still preserved in the existing operator log line for diagnosis.
- Tests cover net.OpError redaction, the failover ResponseBody path, and
  every known sanitized error mapping.
---
 backend/internal/service/gateway_service.go   | 50 +++++++++-
 .../service/gateway_streaming_test.go         | 96 +++++++++++++++++++
 2 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 4c4a9b82..aea0ba94 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -11,6 +11,7 @@ import (
 	"io"
 	"log/slog"
 	mathrand "math/rand"
+	"net"
 	"net/http"
 	"net/url"
 	"os"
@@ -20,6 +21,7 @@ import (
 	"strconv"
 	"strings"
 	"sync/atomic"
+	"syscall"
 	"time"
 
 	"github.com/Wei-Shaw/sub2api/internal/config"
@@ -6434,6 +6436,49 @@ func (s *GatewayService) shouldFailoverOn400(respBody []byte) bool {
 	return false
 }
 
+// sanitizeStreamError 返回不含网络地址的客户端可见错误描述。
+// 默认 (*net.OpError).Error() 会拼接 Source/Addr 字段，泄露内部 IP/端口与上游
+// 服务器地址（例如 "read tcp 10.0.0.1:54321->52.1.2.3:443: read: connection
+// reset by peer"）。该函数只保留可识别的错误类别，原始 err 仍在调用点写入日志。
+func sanitizeStreamError(err error) string {
+	if err == nil {
+		return ""
+	}
+	switch {
+	case errors.Is(err, io.ErrUnexpectedEOF):
+		return "unexpected EOF"
+	case errors.Is(err, io.EOF):
+		return "EOF"
+	case errors.Is(err, context.Canceled):
+		return "canceled"
+	case errors.Is(err, context.DeadlineExceeded):
+		return "deadline exceeded"
+	case errors.Is(err, syscall.ECONNRESET):
+		return "connection reset by peer"
+	case errors.Is(err, syscall.ECONNABORTED):
+		return "connection aborted"
+	case errors.Is(err, syscall.ETIMEDOUT):
+		return "connection timed out"
+	case errors.Is(err, syscall.EPIPE):
+		return "broken pipe"
+	case errors.Is(err, syscall.ECONNREFUSED):
+		return "connection refused"
+	}
+	var netErr *net.OpError
+	if errors.As(err, &netErr) {
+		if netErr.Timeout() {
+			if netErr.Op != "" {
+				return netErr.Op + " timeout"
+			}
+			return "i/o timeout"
+		}
+		if netErr.Op != "" {
+			return netErr.Op + " network error"
+		}
+	}
+	return "upstream connection error"
+}
+
 // ExtractUpstreamErrorMessage 从上游响应体中提取错误消息
 // 支持 Claude 风格的错误格式：{"type":"error","error":{"type":"...","message":"..."}}
 func ExtractUpstreamErrorMessage(body []byte) string {
@@ -7061,7 +7106,10 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 				// 上游中途读错误（unexpected EOF / connection reset 等，常见于 HTTP/2 GOAWAY）：
 				// 若尚未向客户端写过任何字节，包成 UpstreamFailoverError 让 handler 层走 failover/重试。
 				// 已经开始写流时 SSE 协议无 resume，只能透传错误事件给客户端。
-				disconnectMsg := fmt.Sprintf("upstream stream disconnected: %s", ev.err)
+				// 注意:面向客户端的 disconnectMsg 必须用 sanitizeStreamError 剥离地址,
+				// 默认 *net.OpError 的 Error() 会泄露内部 IP/端口和上游地址。完整 ev.err
+				// 仅在下方 LegacyPrintf 内部日志中保留供运维诊断。
+				disconnectMsg := "upstream stream disconnected: " + sanitizeStreamError(ev.err)
 				if !c.Writer.Written() {
 					logger.LegacyPrintf("service.gateway", "Upstream stream read error before any client output (account=%d), failing over: %v", account.ID, ev.err)
 					body, _ := json.Marshal(map[string]any{
diff --git a/backend/internal/service/gateway_streaming_test.go b/backend/internal/service/gateway_streaming_test.go
index f3a52553..ef09a882 100644
--- a/backend/internal/service/gateway_streaming_test.go
+++ b/backend/internal/service/gateway_streaming_test.go
@@ -6,8 +6,10 @@ import (
 	"context"
 	"errors"
 	"io"
+	"net"
 	"net/http"
 	"net/http/httptest"
+	"syscall"
 	"testing"
 	"time"
 
@@ -297,3 +299,97 @@ func TestHandleStreamingResponse_StreamReadErrorAfterOutput_PassesThrough(t *tes
 	require.Contains(t, body, `"stream_read_error"`, "error.type 必须为 stream_read_error")
 	require.Contains(t, body, "upstream stream disconnected", "error.message 必须包含具体根因，Claude Code 等客户端才能显示有效错误文案")
 }
+
+// 默认 (*net.OpError).Error() 会拼接 Source/Addr 字段，泄露内部 IP/端口与上游
+// 服务器地址。sanitizeStreamError 必须剥离这些信息，避免基础设施拓扑通过
+// failover ResponseBody 或 SSE error 帧返回给客户端。
+func TestSanitizeStreamError_StripsNetworkAddresses(t *testing.T) {
+	src, err := net.ResolveTCPAddr("tcp", "10.0.0.1:54321")
+	require.NoError(t, err)
+	dst, err := net.ResolveTCPAddr("tcp", "52.1.2.3:443")
+	require.NoError(t, err)
+
+	raw := &net.OpError{
+		Op:     "read",
+		Net:    "tcp",
+		Source: src,
+		Addr:   dst,
+		Err:    syscall.ECONNRESET,
+	}
+
+	// 前置：原始 Error() 确实包含会泄露的字段（避免测试在 Go 行为变化时静默通过）
+	require.Contains(t, raw.Error(), "10.0.0.1")
+	require.Contains(t, raw.Error(), "52.1.2.3")
+
+	got := sanitizeStreamError(raw)
+	require.NotContains(t, got, "10.0.0.1", "不得泄露内部源 IP")
+	require.NotContains(t, got, "54321", "不得泄露源端口")
+	require.NotContains(t, got, "52.1.2.3", "不得泄露上游目标 IP")
+	require.NotContains(t, got, "443", "不得泄露上游端口")
+	require.Equal(t, "connection reset by peer", got)
+}
+
+func TestSanitizeStreamError_KnownErrors(t *testing.T) {
+	cases := []struct {
+		name string
+		err  error
+		want string
+	}{
+		{"unexpected EOF", io.ErrUnexpectedEOF, "unexpected EOF"},
+		{"EOF", io.EOF, "EOF"},
+		{"context canceled", context.Canceled, "canceled"},
+		{"deadline exceeded", context.DeadlineExceeded, "deadline exceeded"},
+		{"ECONNRESET 直接", syscall.ECONNRESET, "connection reset by peer"},
+		{"EPIPE", syscall.EPIPE, "broken pipe"},
+		{"ETIMEDOUT", syscall.ETIMEDOUT, "connection timed out"},
+		{"未识别错误兜底", errors.New("weird internal error"), "upstream connection error"},
+		{"nil 返回空串", nil, ""},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.want, sanitizeStreamError(tc.err))
+		})
+	}
+}
+
+// failover ResponseBody 必须用 sanitize 过的消息，避免泄露给客户端 / 写入 ops 日志
+// 时携带内部地址信息。
+func TestHandleStreamingResponse_FailoverBodyDoesNotLeakAddresses(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	svc := newMinimalGatewayService()
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+
+	src, _ := net.ResolveTCPAddr("tcp", "10.0.0.1:54321")
+	dst, _ := net.ResolveTCPAddr("tcp", "52.1.2.3:443")
+	netErr := &net.OpError{
+		Op:     "read",
+		Net:    "tcp",
+		Source: src,
+		Addr:   dst,
+		Err:    syscall.ECONNRESET,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{"Content-Type": []string{"text/event-stream"}},
+		Body:       &streamReadCloser{err: netErr},
+	}
+
+	_, err := svc.handleStreamingResponse(context.Background(), resp, c, &Account{ID: 1}, time.Now(), "model", "model", false)
+	require.Error(t, err)
+
+	var failoverErr *UpstreamFailoverError
+	require.True(t, errors.As(err, &failoverErr))
+
+	body := string(failoverErr.ResponseBody)
+	require.NotContains(t, body, "10.0.0.1", "failover ResponseBody 不得泄露内部源 IP")
+	require.NotContains(t, body, "54321")
+	require.NotContains(t, body, "52.1.2.3", "failover ResponseBody 不得泄露上游 IP")
+	require.NotContains(t, body, "443")
+	// 仍然包含可诊断的根因
+	require.Contains(t, body, "connection reset by peer")
+	require.Contains(t, body, "upstream stream disconnected")
+}

From 93d91e20b9da4dd1986085ee62fb716213df0c5b Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 16:53:09 +0800
Subject: [PATCH 33/46] fix(vertex): audit fixes for Vertex Service Account
 feature (#1977)

- Security: force token_uri to Google default, preventing SSRF via crafted service account JSON
- Dedup: extract shared getVertexServiceAccountAccessToken() to eliminate ~35 lines of duplication between ClaudeTokenProvider and GeminiTokenProvider
- Fix: apply model mapping + Vertex model ID normalization in forward_as_responses and forward_as_chat_completions paths
- Fix: exclude service_account from AI Studio endpoint selection (Vertex cannot serve generativelanguage.googleapis.com)
- Feature: add model restriction/mapping UI for service_account in EditAccountModal
- Dedup: extract VERTEX_LOCATION_OPTIONS to shared constants
- i18n: replace all hardcoded Chinese strings in Vertex UI with translation keys
---
 .../internal/service/claude_token_provider.go |  37 +--
 .../gateway_forward_as_chat_completions.go    |   9 +-
 .../service/gateway_forward_as_responses.go   |   9 +-
 .../service/gemini_messages_compat_service.go |   4 +
 .../internal/service/gemini_token_provider.go |  37 +--
 .../service/vertex_service_account.go         |  48 +++-
 .../components/account/CreateAccountModal.vue |  71 +----
 .../components/account/EditAccountModal.vue   | 266 ++++++++++++++----
 frontend/src/constants/account.ts             |  48 ++++
 frontend/src/i18n/locales/en.ts               |  20 ++
 frontend/src/i18n/locales/zh.ts               |  20 ++
 11 files changed, 378 insertions(+), 191 deletions(-)

diff --git a/backend/internal/service/claude_token_provider.go b/backend/internal/service/claude_token_provider.go
index 9292979f..d70379c1 100644
--- a/backend/internal/service/claude_token_provider.go
+++ b/backend/internal/service/claude_token_provider.go
@@ -162,40 +162,5 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 }
 
 func (p *ClaudeTokenProvider) getServiceAccountAccessToken(ctx context.Context, account *Account) (string, error) {
-	key, err := parseVertexServiceAccountKey(account)
-	if err != nil {
-		return "", err
-	}
-	cacheKey := vertexServiceAccountCacheKey(account, key)
-
-	if p.tokenCache != nil {
-		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-			return token, nil
-		}
-	}
-
-	locked := false
-	if p.tokenCache != nil {
-		var lockErr error
-		locked, lockErr = p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
-		if lockErr == nil && locked {
-			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
-		} else if lockErr != nil {
-			slog.Warn("vertex_service_account_token_lock_failed", "account_id", account.ID, "error", lockErr)
-		} else {
-			time.Sleep(claudeLockWaitTime)
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
-			}
-		}
-	}
-
-	accessToken, ttl, err := exchangeVertexServiceAccountToken(ctx, key)
-	if err != nil {
-		return "", err
-	}
-	if p.tokenCache != nil {
-		_ = p.tokenCache.SetAccessToken(ctx, cacheKey, accessToken, ttl)
-	}
-	return accessToken, nil
+	return getVertexServiceAccountAccessToken(ctx, p.tokenCache, account)
 }
diff --git a/backend/internal/service/gateway_forward_as_chat_completions.go b/backend/internal/service/gateway_forward_as_chat_completions.go
index c531667e..7ac77f77 100644
--- a/backend/internal/service/gateway_forward_as_chat_completions.go
+++ b/backend/internal/service/gateway_forward_as_chat_completions.go
@@ -61,10 +61,15 @@ func (s *GatewayService) ForwardAsChatCompletions(
 
 	// 4. Model mapping
 	mappedModel := originalModel
-	if account.Type == AccountTypeAPIKey {
+	if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount {
 		mappedModel = account.GetMappedModel(originalModel)
 	}
-	if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
+	if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type == AccountTypeServiceAccount {
+		normalized := normalizeVertexAnthropicModelID(claude.NormalizeModelID(originalModel))
+		if normalized != originalModel {
+			mappedModel = normalized
+		}
+	} else if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
 		normalized := claude.NormalizeModelID(originalModel)
 		if normalized != originalModel {
 			mappedModel = normalized
diff --git a/backend/internal/service/gateway_forward_as_responses.go b/backend/internal/service/gateway_forward_as_responses.go
index 647193d6..8f8a1e94 100644
--- a/backend/internal/service/gateway_forward_as_responses.go
+++ b/backend/internal/service/gateway_forward_as_responses.go
@@ -58,10 +58,15 @@ func (s *GatewayService) ForwardAsResponses(
 	// 4. Model mapping
 	mappedModel := originalModel
 	reasoningEffort := ExtractResponsesReasoningEffortFromBody(body)
-	if account.Type == AccountTypeAPIKey {
+	if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount {
 		mappedModel = account.GetMappedModel(originalModel)
 	}
-	if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
+	if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type == AccountTypeServiceAccount {
+		normalized := normalizeVertexAnthropicModelID(claude.NormalizeModelID(originalModel))
+		if normalized != originalModel {
+			mappedModel = normalized
+		}
+	} else if mappedModel == originalModel && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
 		normalized := claude.NormalizeModelID(originalModel)
 		if normalized != originalModel {
 			mappedModel = normalized
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index 20293ac8..ea0c0d7d 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -515,6 +515,10 @@ func (s *GeminiMessagesCompatService) SelectAccountForAIStudioEndpoints(ctx cont
 			}
 			// Code Assist OAuth tokens often lack AI Studio scopes for models listing.
 			return 3
+		case AccountTypeServiceAccount:
+			// Vertex service accounts use aiplatform.googleapis.com, not the AI Studio
+			// endpoint (generativelanguage.googleapis.com), so they cannot serve these requests.
+			return 999
 		default:
 			return 10
 		}
diff --git a/backend/internal/service/gemini_token_provider.go b/backend/internal/service/gemini_token_provider.go
index c22f2131..172b9411 100644
--- a/backend/internal/service/gemini_token_provider.go
+++ b/backend/internal/service/gemini_token_provider.go
@@ -172,42 +172,7 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 }
 
 func (p *GeminiTokenProvider) getServiceAccountAccessToken(ctx context.Context, account *Account) (string, error) {
-	key, err := parseVertexServiceAccountKey(account)
-	if err != nil {
-		return "", err
-	}
-	cacheKey := vertexServiceAccountCacheKey(account, key)
-
-	if p.tokenCache != nil {
-		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-			return token, nil
-		}
-	}
-
-	locked := false
-	if p.tokenCache != nil {
-		var lockErr error
-		locked, lockErr = p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
-		if lockErr == nil && locked {
-			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
-		} else if lockErr != nil {
-			slog.Warn("vertex_service_account_token_lock_failed", "account_id", account.ID, "error", lockErr)
-		} else {
-			time.Sleep(200 * time.Millisecond)
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
-			}
-		}
-	}
-
-	accessToken, ttl, err := exchangeVertexServiceAccountToken(ctx, key)
-	if err != nil {
-		return "", err
-	}
-	if p.tokenCache != nil {
-		_ = p.tokenCache.SetAccessToken(ctx, cacheKey, accessToken, ttl)
-	}
-	return accessToken, nil
+	return getVertexServiceAccountAccessToken(ctx, p.tokenCache, account)
 }
 
 func GeminiTokenCacheKey(account *Account) string {
diff --git a/backend/internal/service/vertex_service_account.go b/backend/internal/service/vertex_service_account.go
index d4130b93..4430cf81 100644
--- a/backend/internal/service/vertex_service_account.go
+++ b/backend/internal/service/vertex_service_account.go
@@ -9,6 +9,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"log/slog"
 	"net/http"
 	"net/url"
 	"regexp"
@@ -23,6 +24,7 @@ const (
 	vertexDefaultTokenURL         = "https://oauth2.googleapis.com/token"
 	vertexCloudPlatformScope      = "https://www.googleapis.com/auth/cloud-platform"
 	vertexServiceAccountCacheSkew = 5 * time.Minute
+	vertexLockWaitTime            = 200 * time.Millisecond
 	vertexAnthropicVersion        = "vertex-2023-10-16"
 )
 
@@ -123,9 +125,8 @@ func parseVertexServiceAccountJSON(raw []byte) (*vertexServiceAccountKey, error)
 	if strings.TrimSpace(key.ProjectID) == "" {
 		return nil, errors.New("service account json missing project_id")
 	}
-	if strings.TrimSpace(key.TokenURI) == "" {
-		key.TokenURI = vertexDefaultTokenURL
-	}
+	// Always use the well-known Google token endpoint to prevent SSRF via crafted token_uri.
+	key.TokenURI = vertexDefaultTokenURL
 	return &key, nil
 }
 
@@ -141,6 +142,47 @@ func vertexServiceAccountCacheKey(account *Account, key *vertexServiceAccountKey
 	return "vertex:service_account:" + fingerprint
 }
 
+// getVertexServiceAccountAccessToken obtains an access token for a Vertex service account,
+// using the shared cache and distributed lock to avoid redundant exchanges.
+func getVertexServiceAccountAccessToken(ctx context.Context, cache GeminiTokenCache, account *Account) (string, error) {
+	key, err := parseVertexServiceAccountKey(account)
+	if err != nil {
+		return "", err
+	}
+	cacheKey := vertexServiceAccountCacheKey(account, key)
+
+	if cache != nil {
+		if token, err := cache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+			return token, nil
+		}
+	}
+
+	locked := false
+	if cache != nil {
+		var lockErr error
+		locked, lockErr = cache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
+		if lockErr == nil && locked {
+			defer func() { _ = cache.ReleaseRefreshLock(ctx, cacheKey) }()
+		} else if lockErr != nil {
+			slog.Warn("vertex_service_account_token_lock_failed", "account_id", account.ID, "error", lockErr)
+		} else {
+			time.Sleep(vertexLockWaitTime)
+			if token, err := cache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
+				return token, nil
+			}
+		}
+	}
+
+	accessToken, ttl, err := exchangeVertexServiceAccountToken(ctx, key)
+	if err != nil {
+		return "", err
+	}
+	if cache != nil {
+		_ = cache.SetAccessToken(ctx, cacheKey, accessToken, ttl)
+	}
+	return accessToken, nil
+}
+
 func exchangeVertexServiceAccountToken(ctx context.Context, key *vertexServiceAccountKey) (string, time.Duration, error) {
 	now := time.Now()
 	claims := jwt.MapClaims{
diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index e7a790ec..d38c31c5 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -276,7 +276,7 @@
           v-if="accountCategory === 'service_account'"
           class="mt-3 rounded-lg border border-sky-200 bg-sky-50 px-3 py-2 text-xs text-sky-800 dark:border-sky-800/40 dark:bg-sky-900/20 dark:text-sky-200"
         >
-          <p>使用 Google Cloud Service Account JSON 通过 Vertex AI 调用 Anthropic Claude。建议配置模型映射，将客户端 Claude 模型名映射到 Vertex 模型 ID。</p>
+          <p>{{ t('admin.accounts.vertexAnthropicHint') }}</p>
         </div>
       </div>
 
@@ -479,7 +479,7 @@
           v-if="accountCategory === 'service_account'"
           class="mt-3 rounded-lg border border-sky-200 bg-sky-50 px-3 py-2 text-xs text-sky-800 dark:border-sky-800/40 dark:bg-sky-900/20 dark:text-sky-200"
         >
-          <p>使用 Google Cloud Service Account JSON 访问 Vertex AI Gemini。建议将 Vertex 账号放入独立分组，避免和 AI Studio/Gemini OAuth 同模型混调。</p>
+          <p>{{ t('admin.accounts.vertexGeminiHint') }}</p>
         </div>
 
         <!-- OAuth Type Selection (only show when oauth-based is selected) -->
@@ -827,10 +827,10 @@
               <div class="min-w-0">
                 <div class="flex items-center gap-2 text-sm font-medium text-gray-900 dark:text-white">
                   <Icon name="upload" size="sm" />
-                  <span>{{ vertexClientEmail ? '已读取 Service Account JSON' : '拖入 Service Account JSON' }}</span>
+                  <span>{{ vertexClientEmail ? t('admin.accounts.vertexSaJsonLoaded') : t('admin.accounts.vertexSaJsonDrop') }}</span>
                 </div>
                 <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
-                  {{ vertexClientEmail ? '密钥内容不会在表单中显示。' : '把 .json 文件拖到这里，或点击按钮选择文件。' }}
+                  {{ vertexClientEmail ? t('admin.accounts.vertexSaJsonKeyHidden') : t('admin.accounts.vertexSaJsonDropHint') }}
                 </p>
               </div>
               <button
@@ -839,7 +839,7 @@
                 @click="vertexServiceAccountFileInput?.click()"
               >
                 <Icon name="upload" size="sm" />
-                选择 JSON
+                {{ t('admin.accounts.vertexSaJsonSelectBtn') }}
               </button>
             </div>
             <div
@@ -850,7 +850,7 @@
               <div class="truncate">Client Email: <span class="font-mono">{{ vertexClientEmail }}</span></div>
             </div>
           </div>
-          <p class="input-hint">上传或拖入 JSON 后会自动读取 project_id，密钥内容仅用于创建账号提交。</p>
+          <p class="input-hint">{{ t('admin.accounts.vertexSaJsonUploadHint') }}</p>
         </div>
 
         <div class="grid grid-cols-1 gap-4 sm:grid-cols-2">
@@ -861,7 +861,7 @@
               type="text"
               class="input font-mono"
               readonly
-              placeholder="从 JSON 自动读取"
+              :placeholder="t('admin.accounts.vertexProjectIdPlaceholder')"
             />
           </div>
           <div>
@@ -872,7 +872,7 @@
               class="input font-mono"
             >
               <optgroup
-                v-for="group in vertexLocationOptions"
+                v-for="group in VERTEX_LOCATION_OPTIONS"
                 :key="group.label"
                 :label="group.label"
               >
@@ -885,7 +885,7 @@
                 </option>
               </optgroup>
             </select>
-            <p class="input-hint">不同 Vertex 模型可用 location 可能不同，这里选择账号默认 endpoint location。</p>
+            <p class="input-hint">{{ t('admin.accounts.vertexLocationHint') }}</p>
           </div>
         </div>
       </div>
@@ -3132,6 +3132,7 @@ import QuotaLimitCard from '@/components/account/QuotaLimitCard.vue'
 import { applyInterceptWarmup } from '@/components/account/credentialsBuilder'
 import { formatDateTimeLocalInput, parseDateTimeLocalInput } from '@/utils/format'
 import { createStableObjectKeyResolver } from '@/utils/stableObjectKey'
+import { VERTEX_LOCATION_OPTIONS } from '@/constants/account'
 import {
   OPENAI_WS_MODE_CTX_POOL,
   OPENAI_WS_MODE_OFF,
@@ -3318,52 +3319,6 @@ const vertexProjectId = ref('')
 const vertexClientEmail = ref('')
 const vertexLocation = ref('global')
 const vertexServiceAccountDragActive = ref(false)
-const vertexLocationOptions = [
-  {
-    label: 'Common',
-    options: [
-      { value: 'us-central1', label: 'us-central1 (Iowa)' },
-      { value: 'global', label: 'global' },
-      { value: 'us', label: 'us' },
-      { value: 'eu', label: 'eu' }
-    ]
-  },
-  {
-    label: 'United States',
-    options: [
-      { value: 'us-east1', label: 'us-east1 (South Carolina)' },
-      { value: 'us-east4', label: 'us-east4 (Northern Virginia)' },
-      { value: 'us-east5', label: 'us-east5 (Columbus)' },
-      { value: 'us-south1', label: 'us-south1 (Dallas)' },
-      { value: 'us-west1', label: 'us-west1 (Oregon)' },
-      { value: 'us-west4', label: 'us-west4 (Las Vegas)' }
-    ]
-  },
-  {
-    label: 'Europe',
-    options: [
-      { value: 'europe-west1', label: 'europe-west1 (Belgium)' },
-      { value: 'europe-west2', label: 'europe-west2 (London)' },
-      { value: 'europe-west3', label: 'europe-west3 (Frankfurt)' },
-      { value: 'europe-west4', label: 'europe-west4 (Netherlands)' },
-      { value: 'europe-west6', label: 'europe-west6 (Zurich)' },
-      { value: 'europe-west8', label: 'europe-west8 (Milan)' },
-      { value: 'europe-west9', label: 'europe-west9 (Paris)' }
-    ]
-  },
-  {
-    label: 'Asia Pacific',
-    options: [
-      { value: 'asia-east1', label: 'asia-east1 (Taiwan)' },
-      { value: 'asia-east2', label: 'asia-east2 (Hong Kong)' },
-      { value: 'asia-northeast1', label: 'asia-northeast1 (Tokyo)' },
-      { value: 'asia-northeast3', label: 'asia-northeast3 (Seoul)' },
-      { value: 'asia-south1', label: 'asia-south1 (Mumbai)' },
-      { value: 'asia-southeast1', label: 'asia-southeast1 (Singapore)' },
-      { value: 'australia-southeast1', label: 'australia-southeast1 (Sydney)' }
-    ]
-  }
-] as const
 const tempUnschedEnabled = ref(false)
 const tempUnschedRules = ref<TempUnschedRuleForm[]>([])
 const getModelMappingKey = createStableObjectKeyResolver<ModelMapping>('create-model-mapping')
@@ -4251,7 +4206,7 @@ const applyVertexServiceAccountJson = (value: string) => {
     const clientEmail = typeof parsed.client_email === 'string' ? parsed.client_email.trim() : ''
     const privateKey = typeof parsed.private_key === 'string' ? parsed.private_key.trim() : ''
     if (!projectId || !clientEmail || !privateKey) {
-      appStore.showError('Service Account JSON 缺少 project_id、client_email 或 private_key')
+      appStore.showError(t('admin.accounts.vertexSaJsonMissingFields'))
       return false
     }
     vertexProjectId.value = projectId
@@ -4259,7 +4214,7 @@ const applyVertexServiceAccountJson = (value: string) => {
     vertexServiceAccountJson.value = JSON.stringify(parsed)
     return true
   } catch {
-    appStore.showError('Service Account JSON 格式无效')
+    appStore.showError(t('admin.accounts.vertexSaJsonInvalid'))
     return false
   }
 }
@@ -4406,7 +4361,7 @@ const handleSubmit = async () => {
       return
     }
     if (!vertexLocation.value.trim()) {
-      appStore.showError('请填写 Vertex location')
+      appStore.showError(t('admin.accounts.vertexLocationRequired'))
       return
     }
     const credentials: Record<string, unknown> = {
diff --git a/frontend/src/components/account/EditAccountModal.vue b/frontend/src/components/account/EditAccountModal.vue
index 69e2186b..56874474 100644
--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -577,9 +577,9 @@
               type="text"
               class="input font-mono"
               readonly
-              placeholder="从 JSON 自动读取"
+              :placeholder="t('admin.accounts.vertexProjectIdPlaceholder')"
             />
-            <p class="input-hint">Service Account JSON 不在编辑页显示；需要更换 JSON 时请删除账号后重新创建。</p>
+            <p class="input-hint">{{ t('admin.accounts.vertexSaJsonEditHint') }}</p>
           </div>
           <div>
             <label class="input-label">Location</label>
@@ -589,7 +589,7 @@
               class="input font-mono"
             >
               <optgroup
-                v-for="group in vertexLocationOptions"
+                v-for="group in VERTEX_LOCATION_OPTIONS"
                 :key="group.label"
                 :label="group.label"
               >
@@ -602,7 +602,182 @@
                 </option>
               </optgroup>
             </select>
-            <p class="input-hint">不同 Vertex 模型可用 location 可能不同，这里选择账号默认 endpoint location。</p>
+            <p class="input-hint">{{ t('admin.accounts.vertexLocationHint') }}</p>
+          </div>
+        </div>
+
+        <!-- Model Restriction Section for Service Account -->
+        <div class="border-t border-gray-200 pt-4 dark:border-dark-600">
+          <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
+
+          <!-- Mode Toggle -->
+          <div class="mb-4 flex gap-2">
+            <button
+              type="button"
+              @click="modelRestrictionMode = 'whitelist'"
+              :class="[
+                'flex-1 rounded-lg px-4 py-2 text-sm font-medium transition-all',
+                modelRestrictionMode === 'whitelist'
+                  ? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
+                  : 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
+              ]"
+            >
+              <svg
+                class="mr-1.5 inline h-4 w-4"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+              >
+                <path
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                  stroke-width="2"
+                  d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"
+                />
+              </svg>
+              {{ t('admin.accounts.modelWhitelist') }}
+            </button>
+            <button
+              type="button"
+              @click="modelRestrictionMode = 'mapping'"
+              :class="[
+                'flex-1 rounded-lg px-4 py-2 text-sm font-medium transition-all',
+                modelRestrictionMode === 'mapping'
+                  ? 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400'
+                  : 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
+              ]"
+            >
+              <svg
+                class="mr-1.5 inline h-4 w-4"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+              >
+                <path
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                  stroke-width="2"
+                  d="M8 7h12m0 0l-4-4m4 4l-4 4m0 6H4m0 0l4 4m-4-4l4-4"
+                />
+              </svg>
+              {{ t('admin.accounts.modelMapping') }}
+            </button>
+          </div>
+
+          <!-- Whitelist Mode -->
+          <div v-if="modelRestrictionMode === 'whitelist'">
+            <ModelWhitelistSelector v-model="allowedModels" :platform="account?.platform || 'anthropic'" />
+            <p class="text-xs text-gray-500 dark:text-gray-400">
+              {{ t('admin.accounts.selectedModels', { count: allowedModels.length }) }}
+              <span v-if="allowedModels.length === 0">{{
+                t('admin.accounts.supportsAllModels')
+              }}</span>
+            </p>
+          </div>
+
+          <!-- Mapping Mode -->
+          <div v-else>
+            <div class="mb-3 rounded-lg bg-purple-50 p-3 dark:bg-purple-900/20">
+              <p class="text-xs text-purple-700 dark:text-purple-400">
+                <svg
+                  class="mr-1 inline h-4 w-4"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                    stroke-width="2"
+                    d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
+                  />
+                </svg>
+                {{ t('admin.accounts.mapRequestModels') }}
+              </p>
+            </div>
+
+            <!-- Model Mapping List -->
+            <div v-if="modelMappings.length > 0" class="mb-3 space-y-2">
+              <div
+                v-for="(mapping, index) in modelMappings"
+                :key="getModelMappingKey(mapping)"
+                class="flex items-center gap-2"
+              >
+                <input
+                  v-model="mapping.from"
+                  type="text"
+                  class="input flex-1"
+                  :placeholder="t('admin.accounts.requestModel')"
+                />
+                <svg
+                  class="h-4 w-4 flex-shrink-0 text-gray-400"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                    stroke-width="2"
+                    d="M14 5l7 7m0 0l-7 7m7-7H3"
+                  />
+                </svg>
+                <input
+                  v-model="mapping.to"
+                  type="text"
+                  class="input flex-1"
+                  :placeholder="t('admin.accounts.actualModel')"
+                />
+                <button
+                  type="button"
+                  @click="removeModelMapping(index)"
+                  class="rounded-lg p-2 text-red-500 transition-colors hover:bg-red-50 hover:text-red-600 dark:hover:bg-red-900/20"
+                >
+                  <svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                    <path
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                      stroke-width="2"
+                      d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"
+                    />
+                  </svg>
+                </button>
+              </div>
+            </div>
+
+            <button
+              type="button"
+              @click="addModelMapping"
+              class="mb-3 w-full rounded-lg border-2 border-dashed border-gray-300 px-4 py-2 text-gray-600 transition-colors hover:border-gray-400 hover:text-gray-700 dark:border-dark-500 dark:text-gray-400 dark:hover:border-dark-400 dark:hover:text-gray-300"
+            >
+              <svg
+                class="mr-1 inline h-4 w-4"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+              >
+                <path
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                  stroke-width="2"
+                  d="M12 4v16m8-8H4"
+                />
+              </svg>
+              {{ t('admin.accounts.addMapping') }}
+            </button>
+
+            <!-- Quick Add Buttons -->
+            <div class="flex flex-wrap gap-2">
+              <button
+                v-for="preset in presetMappings"
+                :key="preset.label"
+                type="button"
+                @click="addPresetMapping(preset.from, preset.to)"
+                :class="['rounded-lg px-3 py-1 text-xs transition-colors', preset.color]"
+              >
+                + {{ preset.label }}
+              </button>
+            </div>
           </div>
         </div>
       </div>
@@ -1959,6 +2134,7 @@ import QuotaLimitCard from '@/components/account/QuotaLimitCard.vue'
 import { applyInterceptWarmup } from '@/components/account/credentialsBuilder'
 import { formatDateTime, formatDateTimeLocalInput, parseDateTimeLocalInput } from '@/utils/format'
 import { createStableObjectKeyResolver } from '@/utils/stableObjectKey'
+import { VERTEX_LOCATION_OPTIONS } from '@/constants/account'
 import {
   OPENAI_WS_MODE_CTX_POOL,
   OPENAI_WS_MODE_OFF,
@@ -2030,52 +2206,6 @@ const editBedrockApiKeyValue = ref('')
 const editVertexProjectId = ref('')
 const editVertexClientEmail = ref('')
 const editVertexLocation = ref('us-central1')
-const vertexLocationOptions = [
-  {
-    label: 'Common',
-    options: [
-      { value: 'us-central1', label: 'us-central1 (Iowa)' },
-      { value: 'global', label: 'global' },
-      { value: 'us', label: 'us' },
-      { value: 'eu', label: 'eu' }
-    ]
-  },
-  {
-    label: 'United States',
-    options: [
-      { value: 'us-east1', label: 'us-east1 (South Carolina)' },
-      { value: 'us-east4', label: 'us-east4 (Northern Virginia)' },
-      { value: 'us-east5', label: 'us-east5 (Columbus)' },
-      { value: 'us-south1', label: 'us-south1 (Dallas)' },
-      { value: 'us-west1', label: 'us-west1 (Oregon)' },
-      { value: 'us-west4', label: 'us-west4 (Las Vegas)' }
-    ]
-  },
-  {
-    label: 'Europe',
-    options: [
-      { value: 'europe-west1', label: 'europe-west1 (Belgium)' },
-      { value: 'europe-west2', label: 'europe-west2 (London)' },
-      { value: 'europe-west3', label: 'europe-west3 (Frankfurt)' },
-      { value: 'europe-west4', label: 'europe-west4 (Netherlands)' },
-      { value: 'europe-west6', label: 'europe-west6 (Zurich)' },
-      { value: 'europe-west8', label: 'europe-west8 (Milan)' },
-      { value: 'europe-west9', label: 'europe-west9 (Paris)' }
-    ]
-  },
-  {
-    label: 'Asia Pacific',
-    options: [
-      { value: 'asia-east1', label: 'asia-east1 (Taiwan)' },
-      { value: 'asia-east2', label: 'asia-east2 (Hong Kong)' },
-      { value: 'asia-northeast1', label: 'asia-northeast1 (Tokyo)' },
-      { value: 'asia-northeast3', label: 'asia-northeast3 (Seoul)' },
-      { value: 'asia-south1', label: 'asia-south1 (Mumbai)' },
-      { value: 'asia-southeast1', label: 'asia-southeast1 (Singapore)' },
-      { value: 'australia-southeast1', label: 'australia-southeast1 (Sydney)' }
-    ]
-  }
-] as const
 const isBedrockAPIKeyMode = computed(() =>
   props.account?.type === 'bedrock' &&
   (props.account?.credentials as Record<string, unknown>)?.auth_mode === 'apikey'
@@ -2564,6 +2694,26 @@ const syncFormFromAccount = (newAccount: Account | null) => {
     editVertexProjectId.value = (credentials.project_id as string) || ''
     editVertexClientEmail.value = (credentials.client_email as string) || ''
     editVertexLocation.value = (credentials.location as string) || (credentials.vertex_location as string) || 'us-central1'
+
+    // Load model mappings for service_account
+    const existingMappings = credentials.model_mapping as Record<string, string> | undefined
+    if (existingMappings && typeof existingMappings === 'object') {
+      const entries = Object.entries(existingMappings)
+      const isWhitelistMode = entries.length > 0 && entries.every(([from, to]) => from === to)
+      if (isWhitelistMode) {
+        modelRestrictionMode.value = 'whitelist'
+        allowedModels.value = entries.map(([from]) => from)
+        modelMappings.value = []
+      } else {
+        modelRestrictionMode.value = 'mapping'
+        modelMappings.value = entries.map(([from, to]) => ({ from, to }))
+        allowedModels.value = []
+      }
+    } else {
+      modelRestrictionMode.value = 'whitelist'
+      modelMappings.value = []
+      allowedModels.value = []
+    }
   } else {
     const platformDefaultUrl =
       newAccount.platform === 'openai'
@@ -3160,20 +3310,20 @@ const handleSubmit = async () => {
       const newCredentials: Record<string, unknown> = { ...currentCredentials }
 
       if (!editVertexProjectId.value.trim()) {
-        appStore.showError('Service Account JSON 缺少 project_id')
+        appStore.showError(t('admin.accounts.vertexSaJsonMissingProjectId'))
         return
       }
       if (!editVertexClientEmail.value.trim()) {
-        appStore.showError('Service Account JSON 缺少 client_email')
+        appStore.showError(t('admin.accounts.vertexSaJsonMissingClientEmail'))
         return
       }
       if (!editVertexLocation.value.trim()) {
-        appStore.showError('请填写 Vertex location')
+        appStore.showError(t('admin.accounts.vertexLocationRequired'))
         return
       }
 
       if (!currentCredentials.service_account_json && !currentCredentials.service_account) {
-        appStore.showError('请上传 Service Account JSON')
+        appStore.showError(t('admin.accounts.vertexSaJsonRequired'))
         return
       }
       newCredentials.project_id = editVertexProjectId.value.trim()
@@ -3181,6 +3331,14 @@ const handleSubmit = async () => {
       newCredentials.location = editVertexLocation.value.trim()
       newCredentials.tier_id = 'vertex'
 
+      // Add model mapping if configured
+      const modelMapping = buildModelMappingObject(modelRestrictionMode.value, allowedModels.value, modelMappings.value)
+      if (modelMapping) {
+        newCredentials.model_mapping = modelMapping
+      } else {
+        delete newCredentials.model_mapping
+      }
+
       applyInterceptWarmup(newCredentials, interceptWarmupRequests.value, 'edit')
       if (!applyTempUnschedConfig(newCredentials)) {
         return
diff --git a/frontend/src/constants/account.ts b/frontend/src/constants/account.ts
index dcfc7fae..776de4fa 100644
--- a/frontend/src/constants/account.ts
+++ b/frontend/src/constants/account.ts
@@ -13,3 +13,51 @@ export type QuotaThresholdType = typeof QUOTA_THRESHOLD_TYPE_FIXED | typeof QUOT
 export const QUOTA_RESET_MODE_ROLLING = 'rolling' as const
 export const QUOTA_RESET_MODE_FIXED = 'fixed' as const
 export type QuotaResetMode = typeof QUOTA_RESET_MODE_ROLLING | typeof QUOTA_RESET_MODE_FIXED
+
+/** Vertex AI location options for Service Account accounts */
+export const VERTEX_LOCATION_OPTIONS = [
+  {
+    label: 'Common',
+    options: [
+      { value: 'us-central1', label: 'us-central1 (Iowa)' },
+      { value: 'global', label: 'global' },
+      { value: 'us', label: 'us' },
+      { value: 'eu', label: 'eu' }
+    ]
+  },
+  {
+    label: 'United States',
+    options: [
+      { value: 'us-east1', label: 'us-east1 (South Carolina)' },
+      { value: 'us-east4', label: 'us-east4 (Northern Virginia)' },
+      { value: 'us-east5', label: 'us-east5 (Columbus)' },
+      { value: 'us-south1', label: 'us-south1 (Dallas)' },
+      { value: 'us-west1', label: 'us-west1 (Oregon)' },
+      { value: 'us-west4', label: 'us-west4 (Las Vegas)' }
+    ]
+  },
+  {
+    label: 'Europe',
+    options: [
+      { value: 'europe-west1', label: 'europe-west1 (Belgium)' },
+      { value: 'europe-west2', label: 'europe-west2 (London)' },
+      { value: 'europe-west3', label: 'europe-west3 (Frankfurt)' },
+      { value: 'europe-west4', label: 'europe-west4 (Netherlands)' },
+      { value: 'europe-west6', label: 'europe-west6 (Zurich)' },
+      { value: 'europe-west8', label: 'europe-west8 (Milan)' },
+      { value: 'europe-west9', label: 'europe-west9 (Paris)' }
+    ]
+  },
+  {
+    label: 'Asia Pacific',
+    options: [
+      { value: 'asia-east1', label: 'asia-east1 (Taiwan)' },
+      { value: 'asia-east2', label: 'asia-east2 (Hong Kong)' },
+      { value: 'asia-northeast1', label: 'asia-northeast1 (Tokyo)' },
+      { value: 'asia-northeast3', label: 'asia-northeast3 (Seoul)' },
+      { value: 'asia-south1', label: 'asia-south1 (Mumbai)' },
+      { value: 'asia-southeast1', label: 'asia-southeast1 (Singapore)' },
+      { value: 'australia-southeast1', label: 'australia-southeast1 (Sydney)' }
+    ]
+  }
+] as const
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index 270cd660..0425955f 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -2815,6 +2815,26 @@ export default {
       claudeConsole: 'Claude Console',
       bedrockLabel: 'AWS Bedrock',
       bedrockDesc: 'SigV4 / API Key',
+      vertexLabel: 'Vertex',
+      vertexDesc: 'Service Account',
+      vertexAnthropicHint: 'Use a Google Cloud Service Account JSON to call Anthropic Claude via Vertex AI. It is recommended to configure model mapping to map client Claude model names to Vertex model IDs.',
+      vertexGeminiHint: 'Use a Google Cloud Service Account JSON to access Vertex AI Gemini. It is recommended to place Vertex accounts in a separate group to avoid mixing with AI Studio/Gemini OAuth on the same models.',
+      vertexSaJsonLabel: 'Service Account JSON',
+      vertexSaJsonLoaded: 'Service Account JSON loaded',
+      vertexSaJsonDrop: 'Drop Service Account JSON here',
+      vertexSaJsonKeyHidden: 'Key content is not displayed in the form.',
+      vertexSaJsonDropHint: 'Drag a .json file here, or click the button to select one.',
+      vertexSaJsonSelectBtn: 'Select JSON',
+      vertexSaJsonUploadHint: 'After uploading or dropping a JSON file, the project_id will be auto-extracted. Key content is only used for account creation.',
+      vertexSaJsonEditHint: 'Service Account JSON is not shown on the edit page; to change the JSON, delete the account and recreate it.',
+      vertexProjectIdPlaceholder: 'Auto-extracted from JSON',
+      vertexLocationHint: 'Available locations vary by Vertex model. Select the default endpoint location for this account.',
+      vertexLocationRequired: 'Please enter a Vertex location',
+      vertexSaJsonMissingFields: 'Service Account JSON is missing project_id, client_email, or private_key',
+      vertexSaJsonMissingProjectId: 'Service Account JSON is missing project_id',
+      vertexSaJsonMissingClientEmail: 'Service Account JSON is missing client_email',
+      vertexSaJsonInvalid: 'Service Account JSON format is invalid',
+      vertexSaJsonRequired: 'Please upload a Service Account JSON',
       oauthSetupToken: 'OAuth / Setup Token',
       addMethod: 'Add Method',
       setupTokenLongLived: 'Setup Token (Long-lived)',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index fdfc9e41..a8656a7b 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -2963,6 +2963,26 @@ export default {
       claudeConsole: 'Claude Console',
       bedrockLabel: 'AWS Bedrock',
       bedrockDesc: 'SigV4 / API Key',
+      vertexLabel: 'Vertex',
+      vertexDesc: 'Service Account',
+      vertexAnthropicHint: '使用 Google Cloud Service Account JSON 通过 Vertex AI 调用 Anthropic Claude。建议配置模型映射，将客户端 Claude 模型名映射到 Vertex 模型 ID。',
+      vertexGeminiHint: '使用 Google Cloud Service Account JSON 访问 Vertex AI Gemini。建议将 Vertex 账号放入独立分组，避免和 AI Studio/Gemini OAuth 同模型混调。',
+      vertexSaJsonLabel: 'Service Account JSON',
+      vertexSaJsonLoaded: '已读取 Service Account JSON',
+      vertexSaJsonDrop: '拖入 Service Account JSON',
+      vertexSaJsonKeyHidden: '密钥内容不会在表单中显示。',
+      vertexSaJsonDropHint: '把 .json 文件拖到这里，或点击按钮选择文件。',
+      vertexSaJsonSelectBtn: '选择 JSON',
+      vertexSaJsonUploadHint: '上传或拖入 JSON 后会自动读取 project_id，密钥内容仅用于创建账号提交。',
+      vertexSaJsonEditHint: 'Service Account JSON 不在编辑页显示；需要更换 JSON 时请删除账号后重新创建。',
+      vertexProjectIdPlaceholder: '从 JSON 自动读取',
+      vertexLocationHint: '不同 Vertex 模型可用 location 可能不同，这里选择账号默认 endpoint location。',
+      vertexLocationRequired: '请填写 Vertex location',
+      vertexSaJsonMissingFields: 'Service Account JSON 缺少 project_id、client_email 或 private_key',
+      vertexSaJsonMissingProjectId: 'Service Account JSON 缺少 project_id',
+      vertexSaJsonMissingClientEmail: 'Service Account JSON 缺少 client_email',
+      vertexSaJsonInvalid: 'Service Account JSON 格式无效',
+      vertexSaJsonRequired: '请上传 Service Account JSON',
       oauthSetupToken: 'OAuth / Setup Token',
       addMethod: '添加方式',
       setupTokenLongLived: 'Setup Token（长期有效）',

From 28dc34b6a38b670920dc9c02819e0fd95ee33037 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Wed, 29 Apr 2026 17:38:08 +0800
Subject: [PATCH 34/46] fix(openai): avoid inferred WS continuation on explicit
 tool replay

---
 .../internal/service/openai_ws_forwarder.go   |  26 +-
 ...penai_ws_forwarder_ingress_session_test.go | 268 ++++++++++++++++++
 .../openai_ws_forwarder_ingress_test.go       | 108 ++++---
 3 files changed, 356 insertions(+), 46 deletions(-)

diff --git a/backend/internal/service/openai_ws_forwarder.go b/backend/internal/service/openai_ws_forwarder.go
index dedbce1e..023217b2 100644
--- a/backend/internal/service/openai_ws_forwarder.go
+++ b/backend/internal/service/openai_ws_forwarder.go
@@ -1366,16 +1366,25 @@ func setPreviousResponseIDToRawPayload(payload []byte, previousResponseID string
 func shouldInferIngressFunctionCallOutputPreviousResponseID(
 	storeDisabled bool,
 	turn int,
-	hasFunctionCallOutput bool,
+	signals ToolContinuationSignals,
 	currentPreviousResponseID string,
 	expectedPreviousResponseID string,
 ) bool {
-	if !storeDisabled || turn <= 1 || !hasFunctionCallOutput {
+	if !storeDisabled || turn <= 1 || !signals.HasFunctionCallOutput {
 		return false
 	}
 	if strings.TrimSpace(currentPreviousResponseID) != "" {
 		return false
 	}
+	if signals.HasFunctionCallOutputMissingCallID {
+		return false
+	}
+	// If the client already sent tool-call context or item_reference anchors,
+	// treat this as a full replay / self-contained continuation payload rather
+	// than downgrading it into an inferred delta continuation.
+	if signals.HasToolCallContext || signals.HasItemReferenceForAllCallIDs {
+		return false
+	}
 	return strings.TrimSpace(expectedPreviousResponseID) != ""
 }
 
@@ -3179,13 +3188,22 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
 		skipBeforeTurn = false
 		currentPreviousResponseID := openAIWSPayloadStringFromRaw(currentPayload, "previous_response_id")
 		expectedPrev := strings.TrimSpace(lastTurnResponseID)
-		hasFunctionCallOutput := gjson.GetBytes(currentPayload, `input.#(type=="function_call_output")`).Exists()
+		toolSignals := ToolContinuationSignals{
+			HasFunctionCallOutput: gjson.GetBytes(currentPayload, `input.#(type=="function_call_output")`).Exists(),
+		}
+		if toolSignals.HasFunctionCallOutput {
+			var currentReqBody map[string]any
+			if err := json.Unmarshal(currentPayload, &currentReqBody); err == nil {
+				toolSignals = AnalyzeToolContinuationSignals(currentReqBody)
+			}
+		}
+		hasFunctionCallOutput := toolSignals.HasFunctionCallOutput
 		// store=false + function_call_output 场景必须有续链锚点。
 		// 若客户端未传 previous_response_id，优先回填上一轮响应 ID，避免上游报 call_id 无法关联。
 		if shouldInferIngressFunctionCallOutputPreviousResponseID(
 			storeDisabled,
 			turn,
-			hasFunctionCallOutput,
+			toolSignals,
 			currentPreviousResponseID,
 			expectedPrev,
 		) {
diff --git a/backend/internal/service/openai_ws_forwarder_ingress_session_test.go b/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
index 6bf9a9ff..701f069a 100644
--- a/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
+++ b/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
@@ -1354,6 +1354,274 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFun
 	require.False(t, gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").Exists(), "上一轮缺失 response.id 时不应自动补齐 previous_response_id")
 }
 
+func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFunctionCallOutputSkipsAutoAttachWhenToolCallContextPresent(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	cfg := &config.Config{}
+	cfg.Security.URLAllowlist.Enabled = false
+	cfg.Security.URLAllowlist.AllowInsecureHTTP = true
+	cfg.Gateway.OpenAIWS.Enabled = true
+	cfg.Gateway.OpenAIWS.OAuthEnabled = true
+	cfg.Gateway.OpenAIWS.APIKeyEnabled = true
+	cfg.Gateway.OpenAIWS.ResponsesWebsocketsV2 = true
+	cfg.Gateway.OpenAIWS.MaxConnsPerAccount = 1
+	cfg.Gateway.OpenAIWS.MinIdlePerAccount = 0
+	cfg.Gateway.OpenAIWS.MaxIdlePerAccount = 1
+	cfg.Gateway.OpenAIWS.QueueLimitPerConn = 8
+	cfg.Gateway.OpenAIWS.DialTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.ReadTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.WriteTimeoutSeconds = 3
+
+	captureConn := &openAIWSCaptureConn{
+		events: [][]byte{
+			[]byte(`{"type":"response.completed","response":{"id":"resp_auto_prev_ctx_1","model":"gpt-5.1","usage":{"input_tokens":1,"output_tokens":1}}}`),
+			[]byte(`{"type":"response.completed","response":{"id":"resp_auto_prev_ctx_2","model":"gpt-5.1","usage":{"input_tokens":1,"output_tokens":1}}}`),
+		},
+	}
+	captureDialer := &openAIWSQueueDialer{
+		conns: []openAIWSClientConn{captureConn},
+	}
+	pool := newOpenAIWSConnPool(cfg)
+	pool.setClientDialerForTest(captureDialer)
+
+	svc := &OpenAIGatewayService{
+		cfg:              cfg,
+		httpUpstream:     &httpUpstreamRecorder{},
+		cache:            &stubGatewayCache{},
+		openaiWSResolver: NewOpenAIWSProtocolResolver(cfg),
+		toolCorrector:    NewCodexToolCorrector(),
+		openaiWSPool:     pool,
+	}
+
+	account := &Account{
+		ID:          114,
+		Name:        "openai-ingress-tool-context",
+		Platform:    PlatformOpenAI,
+		Type:        AccountTypeAPIKey,
+		Status:      StatusActive,
+		Schedulable: true,
+		Concurrency: 1,
+		Credentials: map[string]any{
+			"api_key": "sk-test",
+		},
+		Extra: map[string]any{
+			"responses_websockets_v2_enabled": true,
+		},
+	}
+
+	serverErrCh := make(chan error, 1)
+	wsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		conn, err := coderws.Accept(w, r, &coderws.AcceptOptions{
+			CompressionMode: coderws.CompressionContextTakeover,
+		})
+		if err != nil {
+			serverErrCh <- err
+			return
+		}
+		defer func() {
+			_ = conn.CloseNow()
+		}()
+
+		rec := httptest.NewRecorder()
+		ginCtx, _ := gin.CreateTestContext(rec)
+		req := r.Clone(r.Context())
+		req.Header = req.Header.Clone()
+		req.Header.Set("User-Agent", "unit-test-agent/1.0")
+		ginCtx.Request = req
+
+		readCtx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
+		msgType, firstMessage, readErr := conn.Read(readCtx)
+		cancel()
+		if readErr != nil {
+			serverErrCh <- readErr
+			return
+		}
+		if msgType != coderws.MessageText && msgType != coderws.MessageBinary {
+			serverErrCh <- errors.New("unsupported websocket client message type")
+			return
+		}
+
+		serverErrCh <- svc.ProxyResponsesWebSocketFromClient(r.Context(), ginCtx, conn, account, "sk-test", firstMessage, nil)
+	}))
+	defer wsServer.Close()
+
+	dialCtx, cancelDial := context.WithTimeout(context.Background(), 3*time.Second)
+	clientConn, _, err := coderws.Dial(dialCtx, "ws"+strings.TrimPrefix(wsServer.URL, "http"), nil)
+	cancelDial()
+	require.NoError(t, err)
+	defer func() {
+		_ = clientConn.CloseNow()
+	}()
+
+	writeMessage := func(payload string) {
+		writeCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+		defer cancel()
+		require.NoError(t, clientConn.Write(writeCtx, coderws.MessageText, []byte(payload)))
+	}
+	readMessage := func() []byte {
+		readCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+		defer cancel()
+		msgType, message, readErr := clientConn.Read(readCtx)
+		require.NoError(t, readErr)
+		require.Equal(t, coderws.MessageText, msgType)
+		return message
+	}
+
+	writeMessage(`{"type":"response.create","model":"gpt-5.1","stream":false,"store":false,"input":[{"type":"input_text","text":"hello"}]}`)
+	firstTurn := readMessage()
+	require.Equal(t, "resp_auto_prev_ctx_1", gjson.GetBytes(firstTurn, "response.id").String())
+
+	writeMessage(`{"type":"response.create","model":"gpt-5.1","stream":false,"store":false,"input":[{"type":"function_call","call_id":"call_ctx_1","name":"shell","arguments":"{}"},{"type":"function_call_output","call_id":"call_ctx_1","output":"ok"},{"type":"message","role":"user","content":[{"type":"input_text","text":"retry"}]}]}`)
+	secondTurn := readMessage()
+	require.Equal(t, "resp_auto_prev_ctx_2", gjson.GetBytes(secondTurn, "response.id").String())
+
+	require.NoError(t, clientConn.Close(coderws.StatusNormalClosure, "done"))
+	select {
+	case serverErr := <-serverErrCh:
+		require.NoError(t, serverErr)
+	case <-time.After(5 * time.Second):
+		t.Fatal("等待 ingress websocket 结束超时")
+	}
+
+	require.Equal(t, 1, captureDialer.DialCount())
+	require.Len(t, captureConn.writes, 2)
+	require.False(t, gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").Exists(), "请求已包含 function_call 上下文时不应自动补齐 previous_response_id")
+}
+
+func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFunctionCallOutputSkipsAutoAttachWhenItemReferencesPresent(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	cfg := &config.Config{}
+	cfg.Security.URLAllowlist.Enabled = false
+	cfg.Security.URLAllowlist.AllowInsecureHTTP = true
+	cfg.Gateway.OpenAIWS.Enabled = true
+	cfg.Gateway.OpenAIWS.OAuthEnabled = true
+	cfg.Gateway.OpenAIWS.APIKeyEnabled = true
+	cfg.Gateway.OpenAIWS.ResponsesWebsocketsV2 = true
+	cfg.Gateway.OpenAIWS.MaxConnsPerAccount = 1
+	cfg.Gateway.OpenAIWS.MinIdlePerAccount = 0
+	cfg.Gateway.OpenAIWS.MaxIdlePerAccount = 1
+	cfg.Gateway.OpenAIWS.QueueLimitPerConn = 8
+	cfg.Gateway.OpenAIWS.DialTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.ReadTimeoutSeconds = 3
+	cfg.Gateway.OpenAIWS.WriteTimeoutSeconds = 3
+
+	captureConn := &openAIWSCaptureConn{
+		events: [][]byte{
+			[]byte(`{"type":"response.completed","response":{"id":"resp_auto_prev_ref_1","model":"gpt-5.1","usage":{"input_tokens":1,"output_tokens":1}}}`),
+			[]byte(`{"type":"response.completed","response":{"id":"resp_auto_prev_ref_2","model":"gpt-5.1","usage":{"input_tokens":1,"output_tokens":1}}}`),
+		},
+	}
+	captureDialer := &openAIWSQueueDialer{
+		conns: []openAIWSClientConn{captureConn},
+	}
+	pool := newOpenAIWSConnPool(cfg)
+	pool.setClientDialerForTest(captureDialer)
+
+	svc := &OpenAIGatewayService{
+		cfg:              cfg,
+		httpUpstream:     &httpUpstreamRecorder{},
+		cache:            &stubGatewayCache{},
+		openaiWSResolver: NewOpenAIWSProtocolResolver(cfg),
+		toolCorrector:    NewCodexToolCorrector(),
+		openaiWSPool:     pool,
+	}
+
+	account := &Account{
+		ID:          115,
+		Name:        "openai-ingress-item-reference",
+		Platform:    PlatformOpenAI,
+		Type:        AccountTypeAPIKey,
+		Status:      StatusActive,
+		Schedulable: true,
+		Concurrency: 1,
+		Credentials: map[string]any{
+			"api_key": "sk-test",
+		},
+		Extra: map[string]any{
+			"responses_websockets_v2_enabled": true,
+		},
+	}
+
+	serverErrCh := make(chan error, 1)
+	wsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		conn, err := coderws.Accept(w, r, &coderws.AcceptOptions{
+			CompressionMode: coderws.CompressionContextTakeover,
+		})
+		if err != nil {
+			serverErrCh <- err
+			return
+		}
+		defer func() {
+			_ = conn.CloseNow()
+		}()
+
+		rec := httptest.NewRecorder()
+		ginCtx, _ := gin.CreateTestContext(rec)
+		req := r.Clone(r.Context())
+		req.Header = req.Header.Clone()
+		req.Header.Set("User-Agent", "unit-test-agent/1.0")
+		ginCtx.Request = req
+
+		readCtx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
+		msgType, firstMessage, readErr := conn.Read(readCtx)
+		cancel()
+		if readErr != nil {
+			serverErrCh <- readErr
+			return
+		}
+		if msgType != coderws.MessageText && msgType != coderws.MessageBinary {
+			serverErrCh <- errors.New("unsupported websocket client message type")
+			return
+		}
+
+		serverErrCh <- svc.ProxyResponsesWebSocketFromClient(r.Context(), ginCtx, conn, account, "sk-test", firstMessage, nil)
+	}))
+	defer wsServer.Close()
+
+	dialCtx, cancelDial := context.WithTimeout(context.Background(), 3*time.Second)
+	clientConn, _, err := coderws.Dial(dialCtx, "ws"+strings.TrimPrefix(wsServer.URL, "http"), nil)
+	cancelDial()
+	require.NoError(t, err)
+	defer func() {
+		_ = clientConn.CloseNow()
+	}()
+
+	writeMessage := func(payload string) {
+		writeCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+		defer cancel()
+		require.NoError(t, clientConn.Write(writeCtx, coderws.MessageText, []byte(payload)))
+	}
+	readMessage := func() []byte {
+		readCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+		defer cancel()
+		msgType, message, readErr := clientConn.Read(readCtx)
+		require.NoError(t, readErr)
+		require.Equal(t, coderws.MessageText, msgType)
+		return message
+	}
+
+	writeMessage(`{"type":"response.create","model":"gpt-5.1","stream":false,"store":false,"input":[{"type":"input_text","text":"hello"}]}`)
+	firstTurn := readMessage()
+	require.Equal(t, "resp_auto_prev_ref_1", gjson.GetBytes(firstTurn, "response.id").String())
+
+	writeMessage(`{"type":"response.create","model":"gpt-5.1","stream":false,"store":false,"input":[{"type":"item_reference","id":"call_ref_1"},{"type":"function_call_output","call_id":"call_ref_1","output":"ok"},{"type":"message","role":"user","content":[{"type":"input_text","text":"retry"}]}]}`)
+	secondTurn := readMessage()
+	require.Equal(t, "resp_auto_prev_ref_2", gjson.GetBytes(secondTurn, "response.id").String())
+
+	require.NoError(t, clientConn.Close(coderws.StatusNormalClosure, "done"))
+	select {
+	case serverErr := <-serverErrCh:
+		require.NoError(t, serverErr)
+	case <-time.After(5 * time.Second):
+		t.Fatal("等待 ingress websocket 结束超时")
+	}
+
+	require.Equal(t, 1, captureDialer.DialCount())
+	require.Len(t, captureConn.writes, 2)
+	require.False(t, gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").Exists(), "请求已包含 item_reference 锚点时不应自动补齐 previous_response_id")
+}
+
 func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PreflightPingFailReconnectsBeforeTurn(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	prevPreflightPingIdle := openAIWSIngressPreflightPingIdle
diff --git a/backend/internal/service/openai_ws_forwarder_ingress_test.go b/backend/internal/service/openai_ws_forwarder_ingress_test.go
index ff35cb01..5bc5db4e 100644
--- a/backend/internal/service/openai_ws_forwarder_ingress_test.go
+++ b/backend/internal/service/openai_ws_forwarder_ingress_test.go
@@ -232,67 +232,91 @@ func TestShouldInferIngressFunctionCallOutputPreviousResponseID(t *testing.T) {
 		name                    string
 		storeDisabled           bool
 		turn                    int
-		hasFunctionCallOutput   bool
+		signals                 ToolContinuationSignals
 		currentPreviousResponse string
 		expectedPrevious        string
 		want                    bool
 	}{
 		{
-			name:                  "infer_when_all_conditions_match",
-			storeDisabled:         true,
-			turn:                  2,
-			hasFunctionCallOutput: true,
-			expectedPrevious:      "resp_1",
-			want:                  true,
+			name:            "infer_when_all_conditions_match",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			expectedPrevious: "resp_1",
+			want:            true,
 		},
 		{
-			name:                  "skip_when_store_enabled",
-			storeDisabled:         false,
-			turn:                  2,
-			hasFunctionCallOutput: true,
-			expectedPrevious:      "resp_1",
-			want:                  false,
+			name:            "skip_when_store_enabled",
+			storeDisabled:   false,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			expectedPrevious: "resp_1",
+			want:            false,
 		},
 		{
-			name:                  "skip_on_first_turn",
-			storeDisabled:         true,
-			turn:                  1,
-			hasFunctionCallOutput: true,
-			expectedPrevious:      "resp_1",
-			want:                  false,
+			name:            "skip_on_first_turn",
+			storeDisabled:   true,
+			turn:            1,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			expectedPrevious: "resp_1",
+			want:            false,
 		},
 		{
-			name:                  "skip_without_function_call_output",
-			storeDisabled:         true,
-			turn:                  2,
-			hasFunctionCallOutput: false,
-			expectedPrevious:      "resp_1",
-			want:                  false,
+			name:            "skip_without_function_call_output",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{},
+			expectedPrevious: "resp_1",
+			want:            false,
 		},
 		{
-			name:                    "skip_when_request_already_has_previous_response_id",
-			storeDisabled:           true,
-			turn:                    2,
-			hasFunctionCallOutput:   true,
+			name:          "skip_when_request_already_has_previous_response_id",
+			storeDisabled: true,
+			turn:          2,
+			signals:       ToolContinuationSignals{HasFunctionCallOutput: true},
 			currentPreviousResponse: "resp_client",
 			expectedPrevious:        "resp_1",
 			want:                    false,
 		},
 		{
-			name:                  "skip_when_last_turn_response_id_missing",
-			storeDisabled:         true,
-			turn:                  2,
-			hasFunctionCallOutput: true,
-			expectedPrevious:      "",
-			want:                  false,
+			name:            "skip_when_last_turn_response_id_missing",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			expectedPrevious: "",
+			want:            false,
 		},
 		{
-			name:                  "trim_whitespace_before_judgement",
-			storeDisabled:         true,
-			turn:                  2,
-			hasFunctionCallOutput: true,
-			expectedPrevious:      "   resp_2   ",
-			want:                  true,
+			name:            "trim_whitespace_before_judgement",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			expectedPrevious: "   resp_2   ",
+			want:            true,
+		},
+		{
+			name:            "skip_when_tool_call_context_already_present",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasToolCallContext: true},
+			expectedPrevious: "resp_2",
+			want:            false,
+		},
+		{
+			name:            "skip_when_item_reference_already_covers_all_call_ids",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasItemReferenceForAllCallIDs: true},
+			expectedPrevious: "resp_2",
+			want:            false,
+		},
+		{
+			name:            "skip_when_function_call_output_missing_call_id",
+			storeDisabled:   true,
+			turn:            2,
+			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasFunctionCallOutputMissingCallID: true},
+			expectedPrevious: "resp_2",
+			want:            false,
 		},
 	}
 
@@ -303,7 +327,7 @@ func TestShouldInferIngressFunctionCallOutputPreviousResponseID(t *testing.T) {
 			got := shouldInferIngressFunctionCallOutputPreviousResponseID(
 				tt.storeDisabled,
 				tt.turn,
-				tt.hasFunctionCallOutput,
+				tt.signals,
 				tt.currentPreviousResponse,
 				tt.expectedPrevious,
 			)

From f7c13af11fa380d47635174645da9b6dd995cda3 Mon Sep 17 00:00:00 2001
From: KnowSky404 <git@knowsky404.com>
Date: Wed, 29 Apr 2026 18:02:19 +0800
Subject: [PATCH 35/46] fix: format ingress continuation test

---
 .../openai_ws_forwarder_ingress_test.go       | 98 +++++++++----------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/backend/internal/service/openai_ws_forwarder_ingress_test.go b/backend/internal/service/openai_ws_forwarder_ingress_test.go
index 5bc5db4e..08597f0c 100644
--- a/backend/internal/service/openai_ws_forwarder_ingress_test.go
+++ b/backend/internal/service/openai_ws_forwarder_ingress_test.go
@@ -238,85 +238,85 @@ func TestShouldInferIngressFunctionCallOutputPreviousResponseID(t *testing.T) {
 		want                    bool
 	}{
 		{
-			name:            "infer_when_all_conditions_match",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:             "infer_when_all_conditions_match",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true},
 			expectedPrevious: "resp_1",
-			want:            true,
+			want:             true,
 		},
 		{
-			name:            "skip_when_store_enabled",
-			storeDisabled:   false,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:             "skip_when_store_enabled",
+			storeDisabled:    false,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true},
 			expectedPrevious: "resp_1",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:            "skip_on_first_turn",
-			storeDisabled:   true,
-			turn:            1,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:             "skip_on_first_turn",
+			storeDisabled:    true,
+			turn:             1,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true},
 			expectedPrevious: "resp_1",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:            "skip_without_function_call_output",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{},
+			name:             "skip_without_function_call_output",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{},
 			expectedPrevious: "resp_1",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:          "skip_when_request_already_has_previous_response_id",
-			storeDisabled: true,
-			turn:          2,
-			signals:       ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:                    "skip_when_request_already_has_previous_response_id",
+			storeDisabled:           true,
+			turn:                    2,
+			signals:                 ToolContinuationSignals{HasFunctionCallOutput: true},
 			currentPreviousResponse: "resp_client",
 			expectedPrevious:        "resp_1",
 			want:                    false,
 		},
 		{
-			name:            "skip_when_last_turn_response_id_missing",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:             "skip_when_last_turn_response_id_missing",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true},
 			expectedPrevious: "",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:            "trim_whitespace_before_judgement",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true},
+			name:             "trim_whitespace_before_judgement",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true},
 			expectedPrevious: "   resp_2   ",
-			want:            true,
+			want:             true,
 		},
 		{
-			name:            "skip_when_tool_call_context_already_present",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasToolCallContext: true},
+			name:             "skip_when_tool_call_context_already_present",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true, HasToolCallContext: true},
 			expectedPrevious: "resp_2",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:            "skip_when_item_reference_already_covers_all_call_ids",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasItemReferenceForAllCallIDs: true},
+			name:             "skip_when_item_reference_already_covers_all_call_ids",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true, HasItemReferenceForAllCallIDs: true},
 			expectedPrevious: "resp_2",
-			want:            false,
+			want:             false,
 		},
 		{
-			name:            "skip_when_function_call_output_missing_call_id",
-			storeDisabled:   true,
-			turn:            2,
-			signals:         ToolContinuationSignals{HasFunctionCallOutput: true, HasFunctionCallOutputMissingCallID: true},
+			name:             "skip_when_function_call_output_missing_call_id",
+			storeDisabled:    true,
+			turn:             2,
+			signals:          ToolContinuationSignals{HasFunctionCallOutput: true, HasFunctionCallOutputMissingCallID: true},
 			expectedPrevious: "resp_2",
-			want:            false,
+			want:             false,
 		},
 	}
 

From 7ce5b8321573e6628c4449382ab5507ac1ff5aae Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 21:00:30 +0800
Subject: [PATCH 36/46] chore: remove superpowers docs

---
 ...-27-account-bulk-edit-scope-and-compact.md | 359 ------------------
 ...ount-bulk-edit-scope-and-compact-design.md | 233 ------------
 2 files changed, 592 deletions(-)
 delete mode 100644 docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md
 delete mode 100644 docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md

diff --git a/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md b/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md
deleted file mode 100644
index 42b76664..00000000
--- a/docs/superpowers/plans/2026-04-27-account-bulk-edit-scope-and-compact.md
+++ /dev/null
@@ -1,359 +0,0 @@
-# Account Bulk Edit Scope And Compact Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Add filter-result bulk edit to admin accounts, unify the table-level bulk-edit entry, and align OpenAI bulk-edit controls with the existing compact-related single-account settings.
-
-**Architecture:** Extend the existing `/admin/accounts/bulk-update` flow to accept either explicit account IDs or a server-resolved filter target. Reuse the current account-list filter contract for scope resolution, then update the accounts view and bulk-edit modal so the UI can launch either selected-account edits or current-filter-result edits from one compact dropdown. Keep the existing bulk-edit form, but expand its target contract and OpenAI-specific field coverage.
-
-**Tech Stack:** Vue 3, TypeScript, Vitest, Gin, Go service/repository layer, existing admin accounts API.
-
----
-
-### Task 1: Add backend test coverage for filter-target bulk update
-
-**Files:**
-- Modify: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
-- Modify: `backend/internal/service/admin_service_bulk_update_test.go`
-- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
-- Test: `backend/internal/service/admin_service_bulk_update_test.go`
-
-- [ ] **Step 1: Write the failing handler test for filter-target request acceptance**
-
-```go
-func TestBulkUpdateAcceptsFilterTargetRequest(t *testing.T) {
-	// add a request body that omits account_ids and submits filters instead
-	// assert the route does not reject the request as malformed once service stubs are wired
-}
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin -run TestBulkUpdateAcceptsFilterTargetRequest -count=1`
-Expected: FAIL because `BulkUpdateAccountsRequest` does not yet support `filters`.
-
-- [ ] **Step 3: Write the failing service test for resolving IDs from filters**
-
-```go
-func TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters(t *testing.T) {
-	// construct BulkUpdateAccountsInput with Filters and no AccountIDs
-	// stub repository list/search path to return matching IDs
-	// assert BulkUpdate is called with all matching account IDs
-}
-```
-
-- [ ] **Step 4: Run test to verify it fails**
-
-Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/service -run TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters -count=1`
-Expected: FAIL because `BulkUpdateAccountsInput` and service logic only use explicit `AccountIDs`.
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
-git commit -m "test: cover filter-target account bulk update"
-```
-
-### Task 2: Implement backend filter-target bulk update
-
-**Files:**
-- Modify: `backend/internal/handler/admin/account_handler.go`
-- Modify: `backend/internal/service/admin_service.go`
-- Modify: `backend/internal/repository/account_repo.go`
-- Modify: `backend/internal/service/account_service.go`
-- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
-- Test: `backend/internal/service/admin_service_bulk_update_test.go`
-
-- [ ] **Step 1: Implement request structs and validation for filter targets**
-
-```go
-type BulkUpdateAccountFilters struct {
-	Platform    string `json:"platform"`
-	Type        string `json:"type"`
-	Status      string `json:"status"`
-	Group       string `json:"group"`
-	Search      string `json:"search"`
-	PrivacyMode string `json:"privacy_mode"`
-}
-
-type BulkUpdateAccountsRequest struct {
-	AccountIDs []int64                  `json:"account_ids"`
-	Filters    *BulkUpdateAccountFilters `json:"filters"`
-	// existing fields remain unchanged
-}
-```
-
-- [ ] **Step 2: Resolve filter targets in the service layer with one canonical path**
-
-```go
-type BulkUpdateAccountsInput struct {
-	AccountIDs []int64
-	Filters    *BulkUpdateAccountFilters
-	// existing fields remain unchanged
-}
-
-if len(input.AccountIDs) == 0 && input.Filters != nil {
-	ids, err := s.resolveBulkUpdateTargetIDs(ctx, input.Filters)
-	if err != nil {
-		return nil, err
-	}
-	input.AccountIDs = ids
-}
-```
-
-- [ ] **Step 3: Reuse existing account-search/repository logic to resolve all matching IDs**
-
-```go
-func (s *AdminService) resolveBulkUpdateTargetIDs(ctx context.Context, filters *BulkUpdateAccountFilters) ([]int64, error) {
-	// call the existing repository list/search path with the submitted filters
-	// page through all matching rows or use a dedicated ID-only query helper
-	// return unique IDs in stable order
-}
-```
-
-- [ ] **Step 4: Run targeted backend tests**
-
-Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin ./backend/internal/service -run 'TestBulkUpdateAcceptsFilterTargetRequest|TestAdminServiceBulkUpdateAccounts_ResolvesIDsFromFilters' -count=1`
-Expected: PASS
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add backend/internal/handler/admin/account_handler.go backend/internal/service/admin_service.go backend/internal/repository/account_repo.go backend/internal/service/account_service.go backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
-git commit -m "feat: support filter-target account bulk update"
-```
-
-### Task 3: Add frontend API and modal tests for target scope
-
-**Files:**
-- Modify: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-- Create: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-- Modify: `frontend/src/api/admin/accounts.ts`
-- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-
-- [ ] **Step 1: Write the failing modal test for filter-target payload submission**
-
-```ts
-it('submits bulk edit using current filters when target mode is filtered-results', async () => {
-  // mount BulkEditAccountModal with targetMode='filtered'
-  // submit a minimal change
-  // expect adminAPI.accounts.bulkUpdate to receive { filters: ... } rather than account_ids
-})
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "filtered-results"`
-Expected: FAIL because the modal only accepts `accountIds`.
-
-- [ ] **Step 3: Write the failing accounts-view test for dropdown launch actions**
-
-```ts
-it('opens bulk edit for current filtered results from the table action dropdown', async () => {
-  // mount AccountsView with filters set
-  // click Bulk edit > current filtered results
-  // assert modal props contain filter target metadata
-})
-```
-
-- [ ] **Step 4: Run test to verify it fails**
-
-Run: `pnpm -C frontend test:run src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-Expected: FAIL because the dropdown action and target scope state do not exist yet.
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts frontend/src/api/admin/accounts.ts
-git commit -m "test: cover account bulk edit target scopes"
-```
-
-### Task 4: Implement unified frontend bulk-edit target scope flow
-
-**Files:**
-- Modify: `frontend/src/views/admin/AccountsView.vue`
-- Modify: `frontend/src/components/admin/account/AccountBulkActionsBar.vue`
-- Modify: `frontend/src/components/account/BulkEditAccountModal.vue`
-- Modify: `frontend/src/api/admin/accounts.ts`
-- Modify: `frontend/src/i18n/locales/zh.ts`
-- Modify: `frontend/src/i18n/locales/en.ts`
-- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-
-- [ ] **Step 1: Add a typed frontend target contract for bulk edit**
-
-```ts
-export type AccountBulkEditTarget =
-  | { mode: 'selected'; accountIds: number[]; selectedPlatforms: AccountPlatform[]; selectedTypes: AccountType[] }
-  | { mode: 'filtered'; filters: AccountListFilters; previewCount: number; selectedPlatforms: AccountPlatform[]; selectedTypes: AccountType[] }
-```
-
-- [ ] **Step 2: Replace the single selected-row edit button with one dropdown**
-
-```vue
-<BulkEditDropdown
-  :has-selection="selectedIds.length > 0"
-  @edit-selected="openBulkEditSelected"
-  @edit-filtered="openBulkEditFiltered"
-/>
-```
-
-- [ ] **Step 3: Snapshot current filters and preview count when launching filtered mode**
-
-```ts
-const openBulkEditFiltered = async () => {
-  const filters = toBulkEditFilterSnapshot(params)
-  const preview = await adminAPI.accounts.list(1, 1, filters)
-  bulkEditTarget.value = {
-    mode: 'filtered',
-    filters,
-    previewCount: preview.pagination.total,
-    selectedPlatforms: collectPlatforms(preview.data),
-    selectedTypes: collectTypes(preview.data)
-  }
-  showBulkEdit.value = true
-}
-```
-
-- [ ] **Step 4: Update modal submission to call `bulkUpdate` with either `account_ids` or `filters`**
-
-```ts
-if (props.target.mode === 'selected') {
-  await adminAPI.accounts.bulkUpdate({ account_ids: props.target.accountIds, ...updates })
-} else {
-  await adminAPI.accounts.bulkUpdate({ filters: props.target.filters, ...updates })
-}
-```
-
-- [ ] **Step 5: Run targeted frontend tests**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-Expected: PASS
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add frontend/src/views/admin/AccountsView.vue frontend/src/components/admin/account/AccountBulkActionsBar.vue frontend/src/components/account/BulkEditAccountModal.vue frontend/src/api/admin/accounts.ts frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts
-git commit -m "feat: add filtered-result account bulk edit"
-```
-
-### Task 5: Add failing tests for missing OpenAI bulk-edit fields
-
-**Files:**
-- Modify: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-
-- [ ] **Step 1: Write the failing OAuth test for `codex_cli_only`**
-
-```ts
-it('OpenAI OAuth bulk edit can submit codex_cli_only', async () => {
-  // enable the toggle and submit
-  // expect extra.codex_cli_only to be sent
-})
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "codex_cli_only"`
-Expected: FAIL because the modal has no such control or payload mapping.
-
-- [ ] **Step 3: Write the failing API key test for API key WS mode**
-
-```ts
-it('OpenAI API key bulk edit submits API key WS mode fields', async () => {
-  // enable the API key WS mode selector and submit
-  // expect openai_apikey_responses_websockets_v2_mode and enabled flag
-})
-```
-
-- [ ] **Step 4: Run test to verify it fails**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts -t "API key WS mode"`
-Expected: FAIL because the modal only submits OAuth WS mode.
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
-git commit -m "test: cover missing OpenAI bulk edit fields"
-```
-
-### Task 6: Implement missing OpenAI bulk-edit controls and payload wiring
-
-**Files:**
-- Modify: `frontend/src/components/account/BulkEditAccountModal.vue`
-- Modify: `frontend/src/i18n/locales/zh.ts`
-- Modify: `frontend/src/i18n/locales/en.ts`
-- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-
-- [ ] **Step 1: Add UI controls for OAuth `codex_cli_only` and API key WS mode**
-
-```vue
-<div v-if="allOpenAIOAuth">
-  <!-- existing OAuth WS mode -->
-  <!-- add codex_cli_only toggle -->
-</div>
-
-<div v-if="allOpenAIAPIKey">
-  <!-- add API key WS mode selector -->
-</div>
-```
-
-- [ ] **Step 2: Mirror single-account payload semantics in the bulk-edit submit builder**
-
-```ts
-if (enableCodexCLIOnly.value) {
-  const extra = ensureExtra()
-  extra.codex_cli_only = codexCLIOnlyEnabled.value
-}
-
-if (enableOpenAIAPIKeyWSMode.value) {
-  const extra = ensureExtra()
-  extra.openai_apikey_responses_websockets_v2_mode = openaiAPIKeyResponsesWebSocketV2Mode.value
-  extra.openai_apikey_responses_websockets_v2_enabled = isOpenAIWSModeEnabled(openaiAPIKeyResponsesWebSocketV2Mode.value)
-}
-```
-
-- [ ] **Step 3: Run focused modal tests**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-Expected: PASS
-
-- [ ] **Step 4: Commit**
-
-```bash
-git add frontend/src/components/account/BulkEditAccountModal.vue frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts
-git commit -m "feat: align OpenAI bulk edit compact settings"
-```
-
-### Task 7: Final regression verification
-
-**Files:**
-- Modify: none expected
-- Test: `frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts`
-- Test: `frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-- Test: `backend/internal/handler/admin/account_handler_mixed_channel_test.go`
-- Test: `backend/internal/service/admin_service_bulk_update_test.go`
-
-- [ ] **Step 1: Run frontend typecheck**
-
-Run: `pnpm -C frontend typecheck`
-Expected: PASS
-
-- [ ] **Step 2: Run focused frontend test suite**
-
-Run: `pnpm -C frontend test:run src/components/account/__tests__/BulkEditAccountModal.spec.ts src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts`
-Expected: PASS
-
-- [ ] **Step 3: Run focused backend test suite**
-
-Run: `GOCACHE=/tmp/go-build GOMODCACHE=/tmp/go-mod go test ./backend/internal/handler/admin ./backend/internal/service -run 'BulkUpdate|bulk update' -count=1`
-Expected: PASS
-
-- [ ] **Step 4: Commit final integration fixes if needed**
-
-```bash
-git add frontend/src/components/account/BulkEditAccountModal.vue frontend/src/views/admin/AccountsView.vue frontend/src/components/admin/account/AccountBulkActionsBar.vue frontend/src/api/admin/accounts.ts frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts backend/internal/handler/admin/account_handler.go backend/internal/service/admin_service.go backend/internal/repository/account_repo.go backend/internal/service/account_service.go frontend/src/components/account/__tests__/BulkEditAccountModal.spec.ts frontend/src/views/admin/__tests__/AccountsView.bulkEdit.spec.ts backend/internal/handler/admin/account_handler_mixed_channel_test.go backend/internal/service/admin_service_bulk_update_test.go
-git commit -m "feat: finish account bulk edit scope and compact support"
-```
diff --git a/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md b/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md
deleted file mode 100644
index 3a1dc5ac..00000000
--- a/docs/superpowers/specs/2026-04-27-account-bulk-edit-scope-and-compact-design.md
+++ /dev/null
@@ -1,233 +0,0 @@
-# Account Bulk Edit Scope And Compact Design
-
-## Summary
-
-This change expands admin account bulk edit in two directions:
-
-1. Add a second bulk-edit target scope based on the current filter result set, so operators do not need to manually select every account.
-2. Align OpenAI bulk-edit fields with single-account create/edit for the compact-related settings that are already supported elsewhere.
-
-The design keeps the existing selected-row workflow intact and adds a unified bulk-edit entry with two explicit actions:
-
-- `Bulk edit selected accounts`
-- `Bulk edit current filtered results`
-
-`Current filtered results` reuses the existing account-list filters. That means:
-
-- with no filters, it targets the whole account inventory
-- with a group filter, it targets all accounts in that group
-- with combined filters, it targets all matching accounts
-
-## Goals
-
-- Preserve the current selected-account bulk edit flow.
-- Let operators bulk edit the full current filtered result set without manual row selection.
-- Show the user the exact target scope before applying changes.
-- Reuse the current list filter semantics instead of inventing a separate "all accounts" or "by group" API.
-- Add the missing OpenAI bulk-edit fields:
-  - OAuth `codex_cli_only`
-  - API key `openai_apikey_responses_websockets_v2_mode`
-
-## Non-Goals
-
-- No new standalone "edit all accounts" route that ignores filters.
-- No new dedicated "edit group" route separate from list filters.
-- No change to the backend merge semantics for other bulk-edit fields.
-- No attempt in this change to refactor all account form components into a shared schema system.
-
-## Current State
-
-### Bulk edit entry
-
-The account list currently exposes bulk edit only through selected-row actions. `AccountsView.vue` passes `selIds`, `selPlatforms`, and `selTypes` into `BulkEditAccountModal.vue`.
-
-### Filter state
-
-The account page already keeps a central `params` object for current filters and reloads the table from that state. Group filtering already exists in `AccountTableFilters.vue`.
-
-### Bulk edit payload
-
-`BulkEditAccountModal.vue` builds a bulk update request around explicit account IDs.
-
-### OpenAI field gap
-
-Single-account create/edit already supports:
-
-- `openai_passthrough`
-- OAuth WS mode
-- API key WS mode
-- OAuth `codex_cli_only`
-
-Bulk edit currently supports:
-
-- `openai_passthrough`
-- OAuth WS mode only
-
-That leaves a real capability gap for operators managing large OpenAI account sets.
-
-## User Experience
-
-### Entry point
-
-Use one compact `Bulk edit` dropdown button in the table-level bulk actions area above the grid.
-
-The dropdown contains:
-
-- `Bulk edit selected accounts`
-- `Bulk edit current filtered results`
-
-Behavior:
-
-- If there is no row selection, the `selected accounts` action is disabled.
-- `Current filtered results` is always available.
-- The existing separate immediate `Edit` action in the selected-row bar is replaced by this unified dropdown to avoid duplicate buttons that mean different scopes.
-
-### Modal scope messaging
-
-The bulk edit modal gets a required scope descriptor prop.
-
-For `selected accounts`:
-
-- show the existing count-based info banner
-- keep using explicit selected account metadata for platform/type compatibility checks
-
-For `current filtered results`:
-
-- show a banner stating that edits apply to the current filtered result set
-- show the matched account count from a preview query
-- show a short summary of active filters when practical, especially group/search/platform/type/status filters
-
-### Safety
-
-For filtered-result mode:
-
-- disable submit if the preview count is `0`
-- refresh the target count when the modal opens
-- keep the final success toast count aligned with the backend result
-
-The modal should not silently fall back from filtered mode to selected mode.
-
-## Backend/API Design
-
-### Request model
-
-Extend bulk update to support two target modes:
-
-- explicit IDs
-- filter-based query
-
-The request shape should keep backward compatibility for the selected-ID path while allowing a filter target. The backend handler can accept a payload that contains either:
-
-- `account_ids`
-- or `filters`
-
-but not neither.
-
-The `filters` payload should reuse the existing account-list query semantics already used by `/admin/accounts` and `/admin/accounts/data`, including:
-
-- `search`
-- `platform`
-- `type`
-- `status`
-- `privacy_mode`
-- `group`
-- existing sort fields may be ignored for mutation targeting if not needed
-
-### Preview count
-
-The frontend needs an accurate target count before submit in filtered-result mode. The simplest compatible approach is:
-
-- call the existing account list endpoint with the current filters and a minimal page size strategy sufficient to obtain total count
-
-If the current API makes that awkward, add a narrow preview/count helper for bulk edit target resolution. Prefer reusing the existing listing contract first.
-
-### Target resolution
-
-For filtered-result mode, the backend must resolve matching account IDs server-side from the submitted filters rather than trusting only currently loaded page data. This is required so filtered-result mode can act on the full result set across pagination.
-
-### Compatibility metadata
-
-The frontend still needs platform/type compatibility to determine which fields to show. For filtered-result mode, derive this from the preview result set returned from the same query used to show count. If the preview spans mixed incompatible account types, show the same warnings/conditional UI that selected mode already uses.
-
-## Frontend Design
-
-### Accounts view
-
-`AccountsView.vue` will:
-
-- replace the direct selected-only bulk edit trigger with a dropdown action model
-- keep a reactive description of the pending bulk edit scope
-- pass either selected IDs or current filter params into the modal
-
-The "current filtered results" action uses the live `params` object snapshot at open time, not a mutable live subscription while the modal is already open.
-
-### Bulk edit modal
-
-`BulkEditAccountModal.vue` will accept a richer target contract, for example:
-
-- target mode
-- selected IDs or filter snapshot
-- preview count
-- preview platform/type coverage if needed
-
-The modal remains one form; only the scope banner and submission target differ.
-
-### OpenAI field alignment
-
-Add the missing OpenAI controls to bulk edit:
-
-- OAuth `codex_cli_only`
-- API key WS mode selector
-
-Rules:
-
-- OAuth accounts show OAuth WS mode and `codex_cli_only`
-- API key accounts show API key WS mode
-- mixed OpenAI OAuth/API key selections continue to show only fields that are safe for the entire target set
-
-The payload builder must write:
-
-- `extra.codex_cli_only`
-- `extra.openai_apikey_responses_websockets_v2_mode`
-- `extra.openai_apikey_responses_websockets_v2_enabled`
-
-with the same enable/disable semantics already used by single-account forms.
-
-## Testing Strategy
-
-### Frontend tests
-
-Add or extend tests for:
-
-- bulk edit dropdown actions in the accounts view
-- selected-account mode still calling bulk update by IDs
-- filtered-result mode calling bulk update with filter target
-- filtered-result mode showing preview count and blocking submit on zero matches
-- OAuth bulk edit supporting `codex_cli_only`
-- API key bulk edit supporting API key WS mode
-- no regression for existing passthrough and OAuth WS mode tests
-
-### Backend tests
-
-Add or extend tests for:
-
-- bulk update request validation for IDs vs filters
-- filtered-result mode resolving all matching accounts across pagination semantics
-- mixed-channel risk checks still running for filter-target updates if applicable
-- backward compatibility for the existing selected-ID request path
-
-## Risks
-
-- Filter semantics can drift if bulk edit reimplements list-filter parsing differently from the listing endpoints.
-- Filtered-result mode can surprise users if the active scope is not shown clearly enough.
-- Large filtered updates may affect many rows; success/error messaging must stay explicit.
-
-## Recommendation
-
-Implement this as a targeted extension of the existing bulk edit flow:
-
-- unify the entry point in the table action area
-- add filter-target bulk update support
-- align the missing OpenAI compact-related fields
-
-This keeps the mental model simple and solves the large-account-management pain without introducing a second parallel batch-edit system.

From 5e54d492be59fb4254427704c68cb39d2fbe1616 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 21:35:18 +0800
Subject: [PATCH 37/46] fix(lint): check type assertion error in codex
 transform test

The errcheck linter flagged an unchecked type assertion on
item["type"].(string). Use the two-value form with require.True
to satisfy the linter and fail clearly on unexpected types.
---
 backend/internal/service/openai_codex_transform_test.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/internal/service/openai_codex_transform_test.go b/backend/internal/service/openai_codex_transform_test.go
index c6f147d8..7ab6bfc0 100644
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -1176,7 +1176,9 @@ func TestFilterCodexInput_DropsReasoningItemsRegardlessOfPreserveReferences(t *t
 			for _, raw := range filtered {
 				item, ok := raw.(map[string]any)
 				require.True(t, ok)
-				gotTypes[item["type"].(string)]++
+				typ, ok := item["type"].(string)
+				require.True(t, ok)
+				gotTypes[typ]++
 			}
 			require.Equal(t, 1, gotTypes["message"])
 			require.Equal(t, 1, gotTypes["function_call"])

From 40feb86ba4657d5cb4ef3077d2a4a38abbd8b395 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 22:11:45 +0800
Subject: [PATCH 38/46] fix(httputil): add decompression bomb guard and fix
 errcheck lint

---
 backend/internal/pkg/httputil/body.go | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/backend/internal/pkg/httputil/body.go b/backend/internal/pkg/httputil/body.go
index 31bba8c5..cee12948 100644
--- a/backend/internal/pkg/httputil/body.go
+++ b/backend/internal/pkg/httputil/body.go
@@ -16,6 +16,9 @@ import (
 const (
 	requestBodyReadInitCap    = 512
 	requestBodyReadMaxInitCap = 1 << 20
+	// maxDecompressedBodySize limits the decompressed request body to 64 MB
+	// to prevent decompression bomb attacks.
+	maxDecompressedBodySize = 64 << 20
 )
 
 // ReadRequestBodyWithPrealloc reads request body with preallocated buffer based
@@ -69,21 +72,21 @@ func decompressRequestBody(encoding string, raw []byte) ([]byte, error) {
 			return nil, err
 		}
 		defer dec.Close()
-		return io.ReadAll(dec)
+		return io.ReadAll(io.LimitReader(dec, maxDecompressedBodySize))
 	case "gzip", "x-gzip":
 		gr, err := gzip.NewReader(bytes.NewReader(raw))
 		if err != nil {
 			return nil, err
 		}
-		defer gr.Close()
-		return io.ReadAll(gr)
+		defer func() { _ = gr.Close() }()
+		return io.ReadAll(io.LimitReader(gr, maxDecompressedBodySize))
 	case "deflate":
 		zr, err := zlib.NewReader(bytes.NewReader(raw))
 		if err != nil {
 			return nil, err
 		}
-		defer zr.Close()
-		return io.ReadAll(zr)
+		defer func() { _ = zr.Close() }()
+		return io.ReadAll(io.LimitReader(zr, maxDecompressedBodySize))
 	default:
 		return nil, errors.New("unsupported Content-Encoding")
 	}

From 8bf2a7b88a14188acf34b6e0693d81dfc42559b7 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Wed, 29 Apr 2026 22:48:39 +0800
Subject: [PATCH 39/46] fix(scheduler): resolve SetSnapshot race conditions and
 remove usage throttle

Backend: Fix three race conditions in SetSnapshot that caused account
scheduling anomalies and broken sticky sessions:
- Use Lua CAS script for atomic version activation, preventing version
  rollback when concurrent goroutines write snapshots simultaneously
- Add UnlockBucket to release rebuild lock immediately after completion
  instead of waiting 30s TTL expiry
- Replace immediate DEL of old snapshots with 60s EXPIRE grace period,
  preventing readers from hitting empty ZRANGE during version switches

Frontend: Remove serial queue throttle (1-2s delay per request) from
usage loading since backend now uses passive sampling. All usage
requests execute immediately in parallel.
---
 ...eway_handler_warmup_intercept_unit_test.go |  3 +
 .../account_repo_integration_test.go          |  4 +
 .../internal/repository/scheduler_cache.go    | 83 +++++++++++++++---
 backend/internal/service/scheduler_cache.go   |  2 +
 .../scheduler_snapshot_hydration_test.go      |  4 +
 .../service/scheduler_snapshot_service.go     |  3 +
 frontend/src/utils/usageLoadQueue.ts          | 87 ++-----------------
 7 files changed, 91 insertions(+), 95 deletions(-)

diff --git a/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go b/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go
index 71030140..57554cf9 100644
--- a/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go
+++ b/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go
@@ -50,6 +50,9 @@ func (f *fakeSchedulerCache) UpdateLastUsed(_ context.Context, _ map[int64]time.
 func (f *fakeSchedulerCache) TryLockBucket(_ context.Context, _ service.SchedulerBucket, _ time.Duration) (bool, error) {
 	return true, nil
 }
+func (f *fakeSchedulerCache) UnlockBucket(_ context.Context, _ service.SchedulerBucket) error {
+	return nil
+}
 func (f *fakeSchedulerCache) ListBuckets(_ context.Context) ([]service.SchedulerBucket, error) {
 	return nil, nil
 }
diff --git a/backend/internal/repository/account_repo_integration_test.go b/backend/internal/repository/account_repo_integration_test.go
index b249bb61..d1cea9eb 100644
--- a/backend/internal/repository/account_repo_integration_test.go
+++ b/backend/internal/repository/account_repo_integration_test.go
@@ -64,6 +64,10 @@ func (s *schedulerCacheRecorder) TryLockBucket(ctx context.Context, bucket servi
 	return true, nil
 }
 
+func (s *schedulerCacheRecorder) UnlockBucket(ctx context.Context, bucket service.SchedulerBucket) error {
+	return nil
+}
+
 func (s *schedulerCacheRecorder) ListBuckets(ctx context.Context) ([]service.SchedulerBucket, error) {
 	return nil, nil
 }
diff --git a/backend/internal/repository/scheduler_cache.go b/backend/internal/repository/scheduler_cache.go
index add0e501..8e1f9f56 100644
--- a/backend/internal/repository/scheduler_cache.go
+++ b/backend/internal/repository/scheduler_cache.go
@@ -24,6 +24,49 @@ const (
 
 	defaultSchedulerSnapshotMGetChunkSize  = 128
 	defaultSchedulerSnapshotWriteChunkSize = 256
+
+	// snapshotGraceTTLSeconds 旧快照过期的宽限期（秒）。
+	// 替代立即 DEL，让正在读取旧版本的 reader 有足够时间完成 ZRANGE。
+	snapshotGraceTTLSeconds = 60
+)
+
+var (
+	// activateSnapshotScript 原子 CAS 切换快照版本。
+	// 仅当新版本号 >= 当前激活版本时才切换，防止并发写入导致版本回滚。
+	// 旧快照使用 EXPIRE 设置宽限期而非立即 DEL，避免与 reader 竞态。
+	//
+	// KEYS[1] = activeKey     (sched:active:{bucket})
+	// KEYS[2] = readyKey      (sched:ready:{bucket})
+	// KEYS[3] = bucketSetKey  (sched:buckets)
+	// KEYS[4] = snapshotKey   (新写入的快照 key)
+	// ARGV[1] = 新版本号字符串
+	// ARGV[2] = bucket 字符串 (用于 SADD)
+	// ARGV[3] = 快照 key 前缀 (用于构造旧快照 key)
+	// ARGV[4] = 宽限期 TTL 秒数
+	//
+	// 返回 1 = 已激活, 0 = 版本过旧未激活
+	activateSnapshotScript = redis.NewScript(`
+local currentActive = redis.call('GET', KEYS[1])
+local newVersion = tonumber(ARGV[1])
+
+if currentActive ~= false then
+	local curVersion = tonumber(currentActive)
+	if curVersion and newVersion < curVersion then
+		redis.call('DEL', KEYS[4])
+		return 0
+	end
+end
+
+redis.call('SET', KEYS[1], ARGV[1])
+redis.call('SET', KEYS[2], '1')
+redis.call('SADD', KEYS[3], ARGV[2])
+
+if currentActive ~= false and currentActive ~= ARGV[1] then
+	redis.call('EXPIRE', ARGV[3] .. currentActive, tonumber(ARGV[4]))
+end
+
+return 1
+`)
 )
 
 type schedulerCache struct {
@@ -108,9 +151,9 @@ func (c *schedulerCache) GetSnapshot(ctx context.Context, bucket service.Schedul
 }
 
 func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.SchedulerBucket, accounts []service.Account) error {
-	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
-	oldActive, _ := c.rdb.Get(ctx, activeKey).Result()
-
+	// Phase 1: 分配新版本号并写入快照数据。
+	// INCR 保证每个调用方获得唯一递增版本号。
+	// 写入的 snapshotKey 是新的版本化 key，reader 尚不知晓，因此无竞态。
 	versionKey := schedulerBucketKey(schedulerVersionPrefix, bucket)
 	version, err := c.rdb.Incr(ctx, versionKey).Result()
 	if err != nil {
@@ -124,7 +167,6 @@ func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.Schedul
 		return err
 	}
 
-	pipe := c.rdb.Pipeline()
 	if len(accounts) > 0 {
 		// 使用序号作为 score，保持数据库返回的排序语义。
 		members := make([]redis.Z, 0, len(accounts))
@@ -134,6 +176,7 @@ func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.Schedul
 				Member: strconv.FormatInt(account.ID, 10),
 			})
 		}
+		pipe := c.rdb.Pipeline()
 		for start := 0; start < len(members); start += c.writeChunkSize {
 			end := start + c.writeChunkSize
 			if end > len(members) {
@@ -141,18 +184,25 @@ func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.Schedul
 			}
 			pipe.ZAdd(ctx, snapshotKey, members[start:end]...)
 		}
-	} else {
-		pipe.Del(ctx, snapshotKey)
-	}
-	pipe.Set(ctx, activeKey, versionStr, 0)
-	pipe.Set(ctx, schedulerBucketKey(schedulerReadyPrefix, bucket), "1", 0)
-	pipe.SAdd(ctx, schedulerBucketSetKey, bucket.String())
-	if _, err := pipe.Exec(ctx); err != nil {
-		return err
+		if _, err := pipe.Exec(ctx); err != nil {
+			return err
+		}
 	}
 
-	if oldActive != "" && oldActive != versionStr {
-		_ = c.rdb.Del(ctx, schedulerSnapshotKey(bucket, oldActive)).Err()
+	// Phase 2: 原子 CAS 激活版本。
+	// Lua 脚本保证：仅当新版本 >= 当前激活版本时才切换 active 指针，
+	// 防止并发写入导致版本回滚。
+	// 旧快照使用 EXPIRE 宽限期而非立即 DEL，避免 reader 竞态。
+	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
+	readyKey := schedulerBucketKey(schedulerReadyPrefix, bucket)
+	snapshotKeyPrefix := fmt.Sprintf("%s%d:%s:%s:v", schedulerSnapshotPrefix, bucket.GroupID, bucket.Platform, bucket.Mode)
+
+	keys := []string{activeKey, readyKey, schedulerBucketSetKey, snapshotKey}
+	args := []any{versionStr, bucket.String(), snapshotKeyPrefix, snapshotGraceTTLSeconds}
+
+	_, err = activateSnapshotScript.Run(ctx, c.rdb, keys, args...).Result()
+	if err != nil {
+		return err
 	}
 
 	return nil
@@ -232,6 +282,11 @@ func (c *schedulerCache) TryLockBucket(ctx context.Context, bucket service.Sched
 	return c.rdb.SetNX(ctx, key, time.Now().UnixNano(), ttl).Result()
 }
 
+func (c *schedulerCache) UnlockBucket(ctx context.Context, bucket service.SchedulerBucket) error {
+	key := schedulerBucketKey(schedulerLockPrefix, bucket)
+	return c.rdb.Del(ctx, key).Err()
+}
+
 func (c *schedulerCache) ListBuckets(ctx context.Context) ([]service.SchedulerBucket, error) {
 	raw, err := c.rdb.SMembers(ctx, schedulerBucketSetKey).Result()
 	if err != nil {
diff --git a/backend/internal/service/scheduler_cache.go b/backend/internal/service/scheduler_cache.go
index f36135e0..f9794c82 100644
--- a/backend/internal/service/scheduler_cache.go
+++ b/backend/internal/service/scheduler_cache.go
@@ -59,6 +59,8 @@ type SchedulerCache interface {
 	UpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error
 	// TryLockBucket 尝试获取分桶重建锁。
 	TryLockBucket(ctx context.Context, bucket SchedulerBucket, ttl time.Duration) (bool, error)
+	// UnlockBucket 释放分桶重建锁。
+	UnlockBucket(ctx context.Context, bucket SchedulerBucket) error
 	// ListBuckets 返回已注册的分桶集合。
 	ListBuckets(ctx context.Context) ([]SchedulerBucket, error)
 	// GetOutboxWatermark 读取 outbox 水位。
diff --git a/backend/internal/service/scheduler_snapshot_hydration_test.go b/backend/internal/service/scheduler_snapshot_hydration_test.go
index 5c0b289b..0b32c2ad 100644
--- a/backend/internal/service/scheduler_snapshot_hydration_test.go
+++ b/backend/internal/service/scheduler_snapshot_hydration_test.go
@@ -44,6 +44,10 @@ func (c *snapshotHydrationCache) TryLockBucket(ctx context.Context, bucket Sched
 	return true, nil
 }
 
+func (c *snapshotHydrationCache) UnlockBucket(ctx context.Context, bucket SchedulerBucket) error {
+	return nil
+}
+
 func (c *snapshotHydrationCache) ListBuckets(ctx context.Context) ([]SchedulerBucket, error) {
 	return nil, nil
 }
diff --git a/backend/internal/service/scheduler_snapshot_service.go b/backend/internal/service/scheduler_snapshot_service.go
index 62b6993d..a68cdf0c 100644
--- a/backend/internal/service/scheduler_snapshot_service.go
+++ b/backend/internal/service/scheduler_snapshot_service.go
@@ -544,6 +544,9 @@ func (s *SchedulerSnapshotService) rebuildBucket(ctx context.Context, bucket Sch
 	if !ok {
 		return nil
 	}
+	defer func() {
+		_ = s.cache.UnlockBucket(ctx, bucket)
+	}()
 
 	rebuildCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
 	defer cancel()
diff --git a/frontend/src/utils/usageLoadQueue.ts b/frontend/src/utils/usageLoadQueue.ts
index 7bea5679..042b1240 100644
--- a/frontend/src/utils/usageLoadQueue.ts
+++ b/frontend/src/utils/usageLoadQueue.ts
@@ -1,93 +1,18 @@
 /**
- * Usage request scheduler — throttles Anthropic API calls by proxy exit.
+ * Usage request scheduler.
  *
- * Anthropic OAuth/setup-token accounts sharing the same proxy exit are placed
- * into a serial queue with a random 1–2s delay between requests, preventing
- * upstream 429 rate-limit errors.
- *
- * Proxy identity = host:port:username — two proxy records pointing to the
- * same exit share a single queue. Accounts without a proxy go into a
- * "direct" queue.
- *
- * All other platforms bypass the queue and execute immediately.
+ * All platforms execute immediately without queuing — the backend uses
+ * passive sampling so upstream 429 rate-limit errors are no longer a concern.
  */
 
 import type { Account } from '@/types'
 
-const GROUP_DELAY_MIN_MS = 1000
-const GROUP_DELAY_MAX_MS = 2000
-
-type Task<T> = {
-  fn: () => Promise<T>
-  resolve: (value: T) => void
-  reject: (reason: unknown) => void
-}
-
-const queues = new Map<string, Task<unknown>[]>()
-const running = new Set<string>()
-
-/** Whether this account needs throttled queuing. */
-function needsThrottle(account: Account): boolean {
-  return (
-    account.platform === 'anthropic' &&
-    (account.type === 'oauth' || account.type === 'setup-token')
-  )
-}
-
-/** Build a queue key from proxy connection details. */
-function buildGroupKey(account: Account): string {
-  const proxy = account.proxy
-  const proxyIdentity = proxy
-    ? `${proxy.host}:${proxy.port}:${proxy.username || ''}`
-    : 'direct'
-  return `anthropic:${proxyIdentity}`
-}
-
-async function drain(groupKey: string) {
-  if (running.has(groupKey)) return
-  running.add(groupKey)
-
-  const queue = queues.get(groupKey)
-  while (queue && queue.length > 0) {
-    const task = queue.shift()!
-    try {
-      const result = await task.fn()
-      task.resolve(result)
-    } catch (err) {
-      task.reject(err)
-    }
-    if (queue.length > 0) {
-      const jitter = GROUP_DELAY_MIN_MS + Math.random() * (GROUP_DELAY_MAX_MS - GROUP_DELAY_MIN_MS)
-      await new Promise((r) => setTimeout(r, jitter))
-    }
-  }
-
-  running.delete(groupKey)
-  queues.delete(groupKey)
-}
-
 /**
- * Schedule a usage fetch. Anthropic accounts are queued by proxy exit;
- * all other platforms execute immediately.
+ * Schedule a usage fetch. All requests execute immediately.
  */
 export function enqueueUsageRequest<T>(
-  account: Account,
+  _account: Account,
   fn: () => Promise<T>
 ): Promise<T> {
-  // Non-Anthropic → fire immediately, no queuing
-  if (!needsThrottle(account)) {
-    return fn()
-  }
-
-  const key = buildGroupKey(account)
-
-  return new Promise<T>((resolve, reject) => {
-    let queue = queues.get(key)
-    if (!queue) {
-      queue = []
-      queues.set(key, queue)
-    }
-    queue.push({ fn, resolve, reject } as Task<unknown>)
-    drain(key)
-  })
+  return fn()
 }

From 8ad099baa6057f0dfed32ded1f04fc5ea5a38717 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 15:08:59 +0000
Subject: [PATCH 40/46] chore: sync VERSION to 0.1.120 [skip ci]

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index 841597f0..27f3bc3e 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.119
+0.1.120

From f084d30d6530cc76d76c9ff6a5cd20bb6628988e Mon Sep 17 00:00:00 2001
From: DaydreamCoding <DaydreamCoding@users.noreply.github.com>
Date: Thu, 16 Apr 2026 21:23:19 +0800
Subject: [PATCH 41/46] =?UTF-8?q?fix:=20=E6=81=A2=E5=A4=8D=E8=A1=A8?=
 =?UTF-8?q?=E6=A0=BC=E5=88=86=E9=A1=B5=E5=A4=A7=E5=B0=8F=20localStorage=20?=
 =?UTF-8?q?=E6=8C=81=E4=B9=85=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- usePersistedPageSize: 恢复 localStorage 读写，以系统配置为 fallback
- useTableLoader: handlePageSizeChange 时写入 localStorage
- Pagination.vue: handlePageSizeChange 时写入 localStorage

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 frontend/src/components/common/Pagination.vue |  2 ++
 .../src/composables/usePersistedPageSize.ts   | 28 ++++++++++++++++---
 frontend/src/composables/useTableLoader.ts    |  3 +-
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/frontend/src/components/common/Pagination.vue b/frontend/src/components/common/Pagination.vue
index 2bfc6872..9b4ac200 100644
--- a/frontend/src/components/common/Pagination.vue
+++ b/frontend/src/components/common/Pagination.vue
@@ -123,6 +123,7 @@ import { useI18n } from 'vue-i18n'
 import Icon from '@/components/icons/Icon.vue'
 import Select from './Select.vue'
 import { getConfiguredTablePageSizeOptions, normalizeTablePageSize } from '@/utils/tablePreferences'
+import { setPersistedPageSize } from '@/composables/usePersistedPageSize'
 
 const { t } = useI18n()
 
@@ -224,6 +225,7 @@ const goToPage = (newPage: number) => {
 const handlePageSizeChange = (value: string | number | boolean | null) => {
   if (value === null || typeof value === 'boolean') return
   const newPageSize = normalizeTablePageSize(typeof value === 'string' ? parseInt(value, 10) : value)
+  setPersistedPageSize(newPageSize)
   emit('update:pageSize', newPageSize)
 }
 
diff --git a/frontend/src/composables/usePersistedPageSize.ts b/frontend/src/composables/usePersistedPageSize.ts
index 366619ea..972373d1 100644
--- a/frontend/src/composables/usePersistedPageSize.ts
+++ b/frontend/src/composables/usePersistedPageSize.ts
@@ -1,9 +1,29 @@
 import { getConfiguredTableDefaultPageSize, normalizeTablePageSize } from '@/utils/tablePreferences'
 
-/**
- * 读取当前系统配置的表格默认每页条数。
- * 不再使用本地持久化缓存，所有页面统一以通用表格设置为准。
- */
+const STORAGE_KEY = 'table-page-size'
+
 export function getPersistedPageSize(fallback = getConfiguredTableDefaultPageSize()): number {
+  if (typeof window !== 'undefined') {
+    try {
+      const stored = window.localStorage.getItem(STORAGE_KEY)
+      if (stored !== null) {
+        const parsed = Number(stored)
+        if (Number.isFinite(parsed)) {
+          return normalizeTablePageSize(parsed)
+        }
+      }
+    } catch (error) {
+      console.warn('Failed to read persisted page size:', error)
+    }
+  }
   return normalizeTablePageSize(getConfiguredTableDefaultPageSize() || fallback)
 }
+
+export function setPersistedPageSize(size: number): void {
+  if (typeof window === 'undefined') return
+  try {
+    window.localStorage.setItem(STORAGE_KEY, String(size))
+  } catch (error) {
+    console.warn('Failed to persist page size:', error)
+  }
+}
diff --git a/frontend/src/composables/useTableLoader.ts b/frontend/src/composables/useTableLoader.ts
index c288f42e..67c1dcdb 100644
--- a/frontend/src/composables/useTableLoader.ts
+++ b/frontend/src/composables/useTableLoader.ts
@@ -1,7 +1,7 @@
 import { ref, reactive, onUnmounted, toRaw } from 'vue'
 import { useDebounceFn } from '@vueuse/core'
 import type { BasePaginationResponse, FetchOptions } from '@/types'
-import { getPersistedPageSize } from './usePersistedPageSize'
+import { getPersistedPageSize, setPersistedPageSize } from './usePersistedPageSize'
 
 interface PaginationState {
   page: number
@@ -88,6 +88,7 @@ export function useTableLoader<T, P extends Record<string, any>>(options: TableL
   const handlePageSizeChange = (size: number) => {
     pagination.page_size = size
     pagination.page = 1
+    setPersistedPageSize(size)
     load()
   }
 

From 733627cf9d3fe337f24f8f658979646a52f3a5ba Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Thu, 30 Apr 2026 11:38:11 +0800
Subject: [PATCH 42/46] fix: improve sticky session scheduling

---
 backend/internal/handler/gateway_handler.go   |  44 ++++++
 .../internal/repository/scheduler_cache.go    |  58 +++++++
 .../scheduler_cache_integration_test.go       |  16 ++
 .../repository/scheduler_cache_unit_test.go   |  40 +++++
 backend/internal/service/gateway_service.go   | 142 ++++++++++++++++--
 5 files changed, 290 insertions(+), 10 deletions(-)

diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index ef532559..7b082b07 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -262,6 +262,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}
 	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
 
+	// [DEBUG-STICKY] 打印会话 hash 生成结果
+	reqLog.Info("sticky.session_hash_generated",
+		zap.String("session_hash", sessionHash),
+		zap.String("metadata_user_id_raw", parsedReq.MetadataUserID),
+	)
+
 	// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
 	platform := ""
 	if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
@@ -278,6 +284,11 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	var sessionBoundAccountID int64
 	if sessionKey != "" {
 		sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
+		// [DEBUG-STICKY] 打印粘性会话查询结果
+		reqLog.Info("sticky.cache_lookup",
+			zap.String("session_key", sessionKey),
+			zap.Int64("bound_account_id", sessionBoundAccountID),
+		)
 		if sessionBoundAccountID > 0 {
 			prefetchedGroupID := int64(0)
 			if apiKey.GroupID != nil {
@@ -286,6 +297,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			ctx := service.WithPrefetchedStickySession(c.Request.Context(), sessionBoundAccountID, prefetchedGroupID, h.metadataBridgeEnabled())
 			c.Request = c.Request.WithContext(ctx)
 		}
+	} else {
+		reqLog.Info("sticky.no_session_key", zap.String("session_hash", sessionHash))
 	}
 	// 判断是否真的绑定了粘性会话：有 sessionKey 且已经绑定到某个账号
 	hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
@@ -536,6 +549,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 
 		for {
 			// 选择支持该模型的账号
+			reqLog.Info("sticky.selecting_account",
+				zap.String("session_key", sessionKey),
+				zap.Int64("sticky_bound_account_id", sessionBoundAccountID),
+				zap.Bool("has_bound_session", hasBoundSession),
+				zap.Int("failed_account_count", len(fs.FailedAccountIDs)),
+			)
 			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID, subject.UserID)
 			if err != nil {
 				if len(fs.FailedAccountIDs) == 0 {
@@ -569,6 +588,16 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			account := selection.Account
 			setOpsSelectedAccount(c, account.ID, account.Platform)
 
+			// [DEBUG-STICKY] 打印账号选择结果
+			reqLog.Info("sticky.account_selected",
+				zap.Int64("selected_account_id", account.ID),
+				zap.String("account_name", account.Name),
+				zap.Bool("slot_acquired", selection.Acquired),
+				zap.Bool("has_wait_plan", selection.WaitPlan != nil),
+				zap.Int64("sticky_bound_account_id", sessionBoundAccountID),
+				zap.Bool("sticky_honored", sessionBoundAccountID > 0 && sessionBoundAccountID == account.ID),
+			)
+
 			// 检查请求拦截（预热请求、SUGGESTION MODE等）
 			if account.IsInterceptWarmupEnabled() {
 				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -635,6 +664,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				}
 				// Slot acquired: no longer waiting in queue.
 				releaseWait()
+				reqLog.Info("sticky.bind_after_wait",
+					zap.String("session_key", sessionKey),
+					zap.Int64("account_id", account.ID),
+				)
 				if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
 					reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
 				}
@@ -829,6 +862,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				}
 			}
 
+			// 绑定粘性会话（成功转发后绑定/刷新）
+			// - 无现有绑定（首次请求）：创建绑定
+			// - 选中账号与粘性账号一致：刷新 TTL
+			// - 粘性账号因负载/RPM 被跳过、选中了其他账号：不覆盖原绑定，
+			//   下次请求粘性账号恢复后仍可命中
+			if sessionKey != "" && (sessionBoundAccountID == 0 || sessionBoundAccountID == account.ID) {
+				if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
+					reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
+				}
+			}
+
 			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 			userAgent := c.GetHeader("User-Agent")
 			clientIP := ip.GetClientIP(c)
diff --git a/backend/internal/repository/scheduler_cache.go b/backend/internal/repository/scheduler_cache.go
index 8e1f9f56..590ddaa3 100644
--- a/backend/internal/repository/scheduler_cache.go
+++ b/backend/internal/repository/scheduler_cache.go
@@ -449,11 +449,69 @@ func buildSchedulerMetadataAccount(account service.Account) service.Account {
 		SessionWindowStart:      account.SessionWindowStart,
 		SessionWindowEnd:        account.SessionWindowEnd,
 		SessionWindowStatus:     account.SessionWindowStatus,
+		AccountGroups:           filterSchedulerAccountGroups(account.AccountGroups),
+		GroupIDs:                filterSchedulerGroupIDs(account.GroupIDs, account.AccountGroups),
 		Credentials:             filterSchedulerCredentials(account.Credentials),
 		Extra:                   filterSchedulerExtra(account.Extra),
 	}
 }
 
+func filterSchedulerAccountGroups(accountGroups []service.AccountGroup) []service.AccountGroup {
+	if len(accountGroups) == 0 {
+		return nil
+	}
+
+	filtered := make([]service.AccountGroup, 0, len(accountGroups))
+	for _, ag := range accountGroups {
+		if ag.GroupID <= 0 {
+			continue
+		}
+		filtered = append(filtered, service.AccountGroup{
+			AccountID: ag.AccountID,
+			GroupID:   ag.GroupID,
+			Priority:  ag.Priority,
+			CreatedAt: ag.CreatedAt,
+		})
+	}
+	if len(filtered) == 0 {
+		return nil
+	}
+	return filtered
+}
+
+func filterSchedulerGroupIDs(groupIDs []int64, accountGroups []service.AccountGroup) []int64 {
+	if len(groupIDs) == 0 && len(accountGroups) == 0 {
+		return nil
+	}
+
+	seen := make(map[int64]struct{}, len(groupIDs)+len(accountGroups))
+	filtered := make([]int64, 0, len(groupIDs)+len(accountGroups))
+	for _, id := range groupIDs {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		filtered = append(filtered, id)
+	}
+	for _, ag := range accountGroups {
+		if ag.GroupID <= 0 {
+			continue
+		}
+		if _, ok := seen[ag.GroupID]; ok {
+			continue
+		}
+		seen[ag.GroupID] = struct{}{}
+		filtered = append(filtered, ag.GroupID)
+	}
+	if len(filtered) == 0 {
+		return nil
+	}
+	return filtered
+}
+
 func filterSchedulerCredentials(credentials map[string]any) map[string]any {
 	if len(credentials) == 0 {
 		return nil
diff --git a/backend/internal/repository/scheduler_cache_integration_test.go b/backend/internal/repository/scheduler_cache_integration_test.go
index 134a6a07..948c2c73 100644
--- a/backend/internal/repository/scheduler_cache_integration_test.go
+++ b/backend/internal/repository/scheduler_cache_integration_test.go
@@ -56,6 +56,15 @@ func TestSchedulerCacheSnapshotUsesSlimMetadataButKeepsFullAccount(t *testing.T)
 		SessionWindowStart:     &now,
 		SessionWindowEnd:       &windowEnd,
 		SessionWindowStatus:    "active",
+		GroupIDs:               []int64{bucket.GroupID},
+		AccountGroups: []service.AccountGroup{
+			{
+				AccountID: 101,
+				GroupID:   bucket.GroupID,
+				Priority:  5,
+				Group:     &service.Group{ID: bucket.GroupID, Name: "gemini-group"},
+			},
+		},
 	}
 
 	require.NoError(t, cache.SetSnapshot(ctx, bucket, []service.Account{account}))
@@ -79,10 +88,17 @@ func TestSchedulerCacheSnapshotUsesSlimMetadataButKeepsFullAccount(t *testing.T)
 	require.Equal(t, 4, got.GetMaxSessions())
 	require.Equal(t, 11, got.GetSessionIdleTimeoutMinutes())
 	require.Nil(t, got.Extra["unused_large_field"])
+	require.Equal(t, []int64{bucket.GroupID}, got.GroupIDs)
+	require.Len(t, got.AccountGroups, 1)
+	require.Equal(t, account.ID, got.AccountGroups[0].AccountID)
+	require.Equal(t, bucket.GroupID, got.AccountGroups[0].GroupID)
+	require.Nil(t, got.AccountGroups[0].Group)
 
 	full, err := cache.GetAccount(ctx, account.ID)
 	require.NoError(t, err)
 	require.NotNil(t, full)
 	require.Equal(t, "secret-access-token", full.GetCredential("access_token"))
 	require.Equal(t, strings.Repeat("x", 4096), full.GetCredential("huge_blob"))
+	require.Len(t, full.AccountGroups, 1)
+	require.NotNil(t, full.AccountGroups[0].Group)
 }
diff --git a/backend/internal/repository/scheduler_cache_unit_test.go b/backend/internal/repository/scheduler_cache_unit_test.go
index bcfd0e7a..33f3b581 100644
--- a/backend/internal/repository/scheduler_cache_unit_test.go
+++ b/backend/internal/repository/scheduler_cache_unit_test.go
@@ -31,3 +31,43 @@ func TestBuildSchedulerMetadataAccount_KeepsOpenAIWSFlags(t *testing.T) {
 	require.Equal(t, true, got.Extra["mixed_scheduling"])
 	require.Nil(t, got.Extra["unused_large_field"])
 }
+
+func TestBuildSchedulerMetadataAccount_KeepsSlimGroupMembership(t *testing.T) {
+	account := service.Account{
+		ID:       42,
+		Platform: service.PlatformAnthropic,
+		GroupIDs: []int64{7, 9, 7, 0},
+		AccountGroups: []service.AccountGroup{
+			{
+				AccountID: 42,
+				GroupID:   7,
+				Priority:  2,
+				Account:   &service.Account{ID: 42, Name: "drop-from-metadata"},
+				Group:     &service.Group{ID: 7, Name: "drop-from-metadata"},
+			},
+			{
+				AccountID: 42,
+				GroupID:   11,
+				Priority:  3,
+				Group:     &service.Group{ID: 11, Name: "drop-from-metadata"},
+			},
+			{
+				AccountID: 42,
+				GroupID:   0,
+				Priority:  4,
+			},
+		},
+	}
+
+	got := buildSchedulerMetadataAccount(account)
+
+	require.Equal(t, []int64{7, 9, 11}, got.GroupIDs)
+	require.Len(t, got.AccountGroups, 2)
+	require.Equal(t, int64(42), got.AccountGroups[0].AccountID)
+	require.Equal(t, int64(7), got.AccountGroups[0].GroupID)
+	require.Equal(t, 2, got.AccountGroups[0].Priority)
+	require.Nil(t, got.AccountGroups[0].Account)
+	require.Nil(t, got.AccountGroups[0].Group)
+	require.Equal(t, int64(11), got.AccountGroups[1].GroupID)
+	require.Nil(t, got.Groups)
+}
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index f3cae916..d1f12009 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -654,15 +654,31 @@ func (s *GatewayService) GenerateSessionHash(parsed *ParsedRequest) string {
 
 	// 1. 最高优先级：从 metadata.user_id 提取 session_xxx
 	if parsed.MetadataUserID != "" {
-		if uid := ParseMetadataUserID(parsed.MetadataUserID); uid != nil && uid.SessionID != "" {
+		uid := ParseMetadataUserID(parsed.MetadataUserID)
+		if uid != nil && uid.SessionID != "" {
+			slog.Info("sticky.hash_source",
+				"source", "metadata_user_id",
+				"session_id", uid.SessionID,
+				"device_id", uid.DeviceID,
+				"is_new_format", uid.IsNewFormat,
+			)
 			return uid.SessionID
 		}
+		slog.Info("sticky.hash_metadata_parse_failed",
+			"metadata_user_id", parsed.MetadataUserID,
+			"parsed_nil", uid == nil,
+		)
 	}
 
 	// 2. 提取带 cache_control: {type: "ephemeral"} 的内容
 	cacheableContent := s.extractCacheableContent(parsed)
 	if cacheableContent != "" {
-		return s.hashContent(cacheableContent)
+		hash := s.hashContent(cacheableContent)
+		slog.Info("sticky.hash_source",
+			"source", "cacheable_content",
+			"hash", hash,
+		)
+		return hash
 	}
 
 	// 3. 最后 fallback: 使用 session上下文 + system + 所有消息的完整摘要串
@@ -702,7 +718,13 @@ func (s *GatewayService) GenerateSessionHash(parsed *ParsedRequest) string {
 		}
 	}
 	if combined.Len() > 0 {
-		return s.hashContent(combined.String())
+		hash := s.hashContent(combined.String())
+		slog.Info("sticky.hash_source",
+			"source", "message_content_fallback",
+			"hash", hash,
+			"content_len", combined.Len(),
+		)
+		return hash
 	}
 
 	return ""
@@ -1406,14 +1428,29 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 	}
 
 	var stickyAccountID int64
+	var stickySource string
 	if prefetch := prefetchedStickyAccountIDFromContext(ctx, groupID); prefetch > 0 {
 		stickyAccountID = prefetch
+		stickySource = "prefetch"
 	} else if sessionHash != "" && s.cache != nil {
 		if accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash); err == nil {
 			stickyAccountID = accountID
+			stickySource = "cache"
 		}
 	}
 
+	// [DEBUG-STICKY] 调度器入口日志
+	slog.Info("sticky.scheduler_entry",
+		"group_id", derefGroupID(groupID),
+		"session_hash", shortSessionHash(sessionHash),
+		"sticky_account_id", stickyAccountID,
+		"sticky_source", stickySource,
+		"model", requestedModel,
+		"load_batch", cfg.LoadBatchEnabled,
+		"has_concurrency_svc", s.concurrencyService != nil,
+		"excluded_count", len(excludedIDs),
+	)
+
 	if s.debugModelRoutingEnabled() && requestedModel != "" {
 		groupPlatform := ""
 		if group != nil {
@@ -1589,6 +1626,13 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 		if len(routingCandidates) > 0 {
 			// 1.5. 在路由账号范围内检查粘性会话
 			if sessionHash != "" && stickyAccountID > 0 {
+				slog.Debug("sticky.layer1_5_checking",
+					"sticky_account_id", stickyAccountID,
+					"in_routing_list", containsInt64(routingAccountIDs, stickyAccountID),
+					"is_excluded", isExcluded(stickyAccountID),
+					"in_account_map", func() bool { _, ok := accountByID[stickyAccountID]; return ok }(),
+					"session", shortSessionHash(sessionHash),
+				)
 				if containsInt64(routingAccountIDs, stickyAccountID) && !isExcluded(stickyAccountID) {
 					// 粘性账号在路由列表中，优先使用
 					if stickyAccount, ok := accountByID[stickyAccountID]; ok {
@@ -1612,6 +1656,11 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 									stickyCacheMissReason = "session_limit"
 									// 继续到负载感知选择
 								} else {
+									slog.Debug("sticky.layer1_5_hit",
+										"account_id", stickyAccountID,
+										"session", shortSessionHash(sessionHash),
+										"result", "slot_acquired",
+									)
 									if s.debugModelRoutingEnabled() {
 										logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), stickyAccountID)
 									}
@@ -1762,27 +1811,65 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 				// 检查账户是否需要清理粘性会话绑定
 				clearSticky := shouldClearStickySession(account, requestedModel)
 				if clearSticky {
+					slog.Debug("sticky.layer1_5_no_routing_clear",
+						"account_id", accountID,
+						"reason", "should_clear_sticky_session",
+						"session", shortSessionHash(sessionHash),
+					)
 					_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
 				}
-				if !clearSticky && s.isAccountInGroup(account, groupID) &&
-					s.isAccountAllowedForPlatform(account, platform, useMixed) &&
-					(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) &&
-					s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) &&
-					s.isAccountSchedulableForQuota(account) &&
-					s.isAccountSchedulableForWindowCost(ctx, account, true) &&
 
-					s.isAccountSchedulableForRPM(ctx, account, true) { // 粘性会话窗口费用+RPM 检查
+				// 注意：不再检查 isAccountInGroup，因为 accountByID 已经从按分组过滤的
+				// accounts 列表构建，账号一定在分组内。而 scheduler snapshot 缓存
+				// 反序列化后 AccountGroups 字段为空，导致 isAccountInGroup 永远返回 false。
+				platformOK := s.isAccountAllowedForPlatform(account, platform, useMixed)
+				modelSupported := requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)
+				modelSchedulable := s.isAccountSchedulableForModelSelection(ctx, account, requestedModel)
+				quotaOK := s.isAccountSchedulableForQuota(account)
+				windowCostOK := s.isAccountSchedulableForWindowCost(ctx, account, true)
+				rpmOK := s.isAccountSchedulableForRPM(ctx, account, true)
+				schedulable := s.isAccountSchedulableForSelection(account)
+
+				slog.Debug("sticky.layer1_5_no_routing_checks",
+					"account_id", accountID,
+					"session", shortSessionHash(sessionHash),
+					"clear_sticky", clearSticky,
+					"schedulable", schedulable,
+					"platform_ok", platformOK,
+					"model_supported", modelSupported,
+					"model_schedulable", modelSchedulable,
+					"quota_ok", quotaOK,
+					"window_cost_ok", windowCostOK,
+					"rpm_ok", rpmOK,
+				)
+
+				if !clearSticky && platformOK && modelSupported && modelSchedulable && quotaOK && windowCostOK && rpmOK && schedulable {
 					result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
 					if err == nil && result.Acquired {
 						// 会话数量限制检查
 						if !s.checkAndRegisterSession(ctx, account, sessionHash) {
 							result.ReleaseFunc() // 释放槽位，继续到 Layer 2
+							slog.Debug("sticky.layer1_5_no_routing_miss",
+								"account_id", accountID,
+								"reason", "session_limit",
+								"session", shortSessionHash(sessionHash),
+							)
 						} else {
+							slog.Debug("sticky.layer1_5_no_routing_hit",
+								"account_id", accountID,
+								"session", shortSessionHash(sessionHash),
+								"result", "slot_acquired",
+							)
 							if s.cache != nil {
 								_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL)
 							}
 							return s.newSelectionResult(ctx, account, true, result.ReleaseFunc, nil)
 						}
+					} else {
+						slog.Debug("sticky.layer1_5_no_routing_slot_busy",
+							"account_id", accountID,
+							"session", shortSessionHash(sessionHash),
+						)
 					}
 
 					waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID)
@@ -1791,6 +1878,11 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 						if !s.checkAndRegisterSession(ctx, account, sessionHash) {
 							// 会话限制已满，继续到 Layer 2
 						} else {
+							slog.Debug("sticky.layer1_5_no_routing_hit",
+								"account_id", accountID,
+								"session", shortSessionHash(sessionHash),
+								"result", "wait_plan",
+							)
 							return s.newSelectionResult(ctx, account, false, nil, &AccountWaitPlan{
 								AccountID:      accountID,
 								MaxConcurrency: account.Concurrency,
@@ -1799,12 +1891,42 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 							})
 						}
 					}
+				} else if !clearSticky {
+					slog.Debug("sticky.layer1_5_no_routing_miss",
+						"account_id", accountID,
+						"reason", "gate_check_failed",
+						"session", shortSessionHash(sessionHash),
+					)
 				}
+			} else {
+				slog.Debug("sticky.layer1_5_no_routing_miss",
+					"account_id", accountID,
+					"reason", "account_not_in_map",
+					"session", shortSessionHash(sessionHash),
+				)
 			}
 		}
+	} else if len(routingAccountIDs) == 0 && sessionHash != "" {
+		slog.Debug("sticky.layer1_5_no_routing_skip",
+			"sticky_account_id", stickyAccountID,
+			"is_excluded", func() bool { return stickyAccountID > 0 && isExcluded(stickyAccountID) }(),
+			"session", shortSessionHash(sessionHash),
+			"reason", func() string {
+				if stickyAccountID == 0 {
+					return "no_sticky_binding"
+				}
+				return "sticky_account_excluded"
+			}(),
+		)
 	}
 
 	// ============ Layer 2: 负载感知选择 ============
+	slog.Debug("sticky.layer2_fallback",
+		"session", shortSessionHash(sessionHash),
+		"sticky_account_id", stickyAccountID,
+		"reason", "sticky_not_used_falling_back_to_load_balance",
+		"total_accounts", len(accounts),
+	)
 	candidates := make([]*Account, 0, len(accounts))
 	for i := range accounts {
 		acc := &accounts[i]

From 094e1171efb4c1886d11d7bbf71088a043b7a4aa Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Thu, 30 Apr 2026 12:02:08 +0800
Subject: [PATCH 43/46] fix(openai): infer previous response for item
 references

---
 .gitignore                                             |  2 +-
 backend/internal/service/openai_ws_forwarder.go        | 10 ++++++----
 .../openai_ws_forwarder_ingress_session_test.go        |  4 ++--
 .../service/openai_ws_forwarder_ingress_test.go        |  4 ++--
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index a61f406d..cf251f07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,7 +122,7 @@ scripts
 .code-review-state
 #openspec/
 code-reviews/
-#AGENTS.md
+AGENTS.md
 backend/cmd/server/server
 deploy/docker-compose.override.yml
 .gocache/
diff --git a/backend/internal/service/openai_ws_forwarder.go b/backend/internal/service/openai_ws_forwarder.go
index 023217b2..d1386b1b 100644
--- a/backend/internal/service/openai_ws_forwarder.go
+++ b/backend/internal/service/openai_ws_forwarder.go
@@ -1379,10 +1379,12 @@ func shouldInferIngressFunctionCallOutputPreviousResponseID(
 	if signals.HasFunctionCallOutputMissingCallID {
 		return false
 	}
-	// If the client already sent tool-call context or item_reference anchors,
-	// treat this as a full replay / self-contained continuation payload rather
-	// than downgrading it into an inferred delta continuation.
-	if signals.HasToolCallContext || signals.HasItemReferenceForAllCallIDs {
+	// If the client already sent the actual tool-call context, treat this as
+	// a full replay / self-contained continuation payload rather than
+	// downgrading it into an inferred delta continuation. item_reference alone
+	// is not enough on the store=false WS path: it still needs a valid prior
+	// response anchor so upstream can resolve the referenced function_call.
+	if signals.HasToolCallContext {
 		return false
 	}
 	return strings.TrimSpace(expectedPreviousResponseID) != ""
diff --git a/backend/internal/service/openai_ws_forwarder_ingress_session_test.go b/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
index 701f069a..30fd4142 100644
--- a/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
+++ b/backend/internal/service/openai_ws_forwarder_ingress_session_test.go
@@ -1488,7 +1488,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFun
 	require.False(t, gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").Exists(), "请求已包含 function_call 上下文时不应自动补齐 previous_response_id")
 }
 
-func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFunctionCallOutputSkipsAutoAttachWhenItemReferencesPresent(t *testing.T) {
+func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFunctionCallOutputAutoAttachWhenOnlyItemReferencesPresent(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 
 	cfg := &config.Config{}
@@ -1619,7 +1619,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_StoreDisabledFun
 
 	require.Equal(t, 1, captureDialer.DialCount())
 	require.Len(t, captureConn.writes, 2)
-	require.False(t, gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").Exists(), "请求已包含 item_reference 锚点时不应自动补齐 previous_response_id")
+	require.Equal(t, "resp_auto_prev_ref_1", gjson.Get(requestToJSONString(captureConn.writes[1]), "previous_response_id").String(), "仅有 item_reference 不足以自包含 function_call_output，应回填上一轮响应 ID")
 }
 
 func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PreflightPingFailReconnectsBeforeTurn(t *testing.T) {
diff --git a/backend/internal/service/openai_ws_forwarder_ingress_test.go b/backend/internal/service/openai_ws_forwarder_ingress_test.go
index 08597f0c..c735f50a 100644
--- a/backend/internal/service/openai_ws_forwarder_ingress_test.go
+++ b/backend/internal/service/openai_ws_forwarder_ingress_test.go
@@ -303,12 +303,12 @@ func TestShouldInferIngressFunctionCallOutputPreviousResponseID(t *testing.T) {
 			want:             false,
 		},
 		{
-			name:             "skip_when_item_reference_already_covers_all_call_ids",
+			name:             "infer_when_only_item_reference_covers_call_ids",
 			storeDisabled:    true,
 			turn:             2,
 			signals:          ToolContinuationSignals{HasFunctionCallOutput: true, HasItemReferenceForAllCallIDs: true},
 			expectedPrevious: "resp_2",
-			want:             false,
+			want:             true,
 		},
 		{
 			name:             "skip_when_function_call_output_missing_call_id",

From 73b872998e2e44dc8c11e6aec4d55a34fa5badeb Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Thu, 30 Apr 2026 13:38:22 +0800
Subject: [PATCH 44/46] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Anthropic=20?=
 =?UTF-8?q?=E7=BC=93=E5=AD=98=20TTL=20=E6=B3=A8=E5=85=A5=E5=BC=80=E5=85=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../internal/handler/admin/setting_handler.go |  18 ++-
 backend/internal/handler/dto/settings.go      |   7 +-
 backend/internal/service/domain_constants.go  |   2 +
 .../service/gateway_body_order_test.go        | 135 ++++++++++++++++++
 backend/internal/service/gateway_service.go   | 122 ++++++++++++++--
 backend/internal/service/setting_service.go   | 102 ++++++++-----
 backend/internal/service/settings_view.go     |   7 +-
 frontend/src/api/admin/settings.ts            |   2 +
 frontend/src/i18n/locales/en.ts               |   2 +
 frontend/src/i18n/locales/zh.ts               |   2 +
 frontend/src/views/admin/SettingsView.vue     |  28 ++++
 .../admin/__tests__/SettingsView.spec.ts      |  21 +++
 12 files changed, 394 insertions(+), 54 deletions(-)

diff --git a/backend/internal/handler/admin/setting_handler.go b/backend/internal/handler/admin/setting_handler.go
index d6580191..59f4fe85 100644
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -209,6 +209,7 @@ func (h *SettingHandler) GetSettings(c *gin.Context) {
 		EnableFingerprintUnification:           settings.EnableFingerprintUnification,
 		EnableMetadataPassthrough:              settings.EnableMetadataPassthrough,
 		EnableCCHSigning:                       settings.EnableCCHSigning,
+		EnableAnthropicCacheTTL1hInjection:     settings.EnableAnthropicCacheTTL1hInjection,
 		WebSearchEmulationEnabled:              settings.WebSearchEmulationEnabled,
 		PaymentVisibleMethodAlipaySource:       settings.PaymentVisibleMethodAlipaySource,
 		PaymentVisibleMethodWxpaySource:        settings.PaymentVisibleMethodWxpaySource,
@@ -441,9 +442,10 @@ type UpdateSettingsRequest struct {
 	BackendModeEnabled bool `json:"backend_mode_enabled"`
 
 	// Gateway forwarding behavior
-	EnableFingerprintUnification *bool `json:"enable_fingerprint_unification"`
-	EnableMetadataPassthrough    *bool `json:"enable_metadata_passthrough"`
-	EnableCCHSigning             *bool `json:"enable_cch_signing"`
+	EnableFingerprintUnification       *bool `json:"enable_fingerprint_unification"`
+	EnableMetadataPassthrough          *bool `json:"enable_metadata_passthrough"`
+	EnableCCHSigning                   *bool `json:"enable_cch_signing"`
+	EnableAnthropicCacheTTL1hInjection *bool `json:"enable_anthropic_cache_ttl_1h_injection"`
 
 	// Payment visible method routing
 	PaymentVisibleMethodAlipaySource  *string `json:"payment_visible_method_alipay_source"`
@@ -1273,6 +1275,12 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 			}
 			return previousSettings.EnableCCHSigning
 		}(),
+		EnableAnthropicCacheTTL1hInjection: func() bool {
+			if req.EnableAnthropicCacheTTL1hInjection != nil {
+				return *req.EnableAnthropicCacheTTL1hInjection
+			}
+			return previousSettings.EnableAnthropicCacheTTL1hInjection
+		}(),
 		PaymentVisibleMethodAlipaySource: func() string {
 			if req.PaymentVisibleMethodAlipaySource != nil {
 				return strings.TrimSpace(*req.PaymentVisibleMethodAlipaySource)
@@ -1570,6 +1578,7 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 		EnableFingerprintUnification:           updatedSettings.EnableFingerprintUnification,
 		EnableMetadataPassthrough:              updatedSettings.EnableMetadataPassthrough,
 		EnableCCHSigning:                       updatedSettings.EnableCCHSigning,
+		EnableAnthropicCacheTTL1hInjection:     updatedSettings.EnableAnthropicCacheTTL1hInjection,
 		PaymentVisibleMethodAlipaySource:       updatedSettings.PaymentVisibleMethodAlipaySource,
 		PaymentVisibleMethodWxpaySource:        updatedSettings.PaymentVisibleMethodWxpaySource,
 		PaymentVisibleMethodAlipayEnabled:      updatedSettings.PaymentVisibleMethodAlipayEnabled,
@@ -1949,6 +1958,9 @@ func diffSettings(before *service.SystemSettings, after *service.SystemSettings,
 	if before.EnableCCHSigning != after.EnableCCHSigning {
 		changed = append(changed, "enable_cch_signing")
 	}
+	if before.EnableAnthropicCacheTTL1hInjection != after.EnableAnthropicCacheTTL1hInjection {
+		changed = append(changed, "enable_anthropic_cache_ttl_1h_injection")
+	}
 	if before.PaymentVisibleMethodAlipaySource != after.PaymentVisibleMethodAlipaySource {
 		changed = append(changed, "payment_visible_method_alipay_source")
 	}
diff --git a/backend/internal/handler/dto/settings.go b/backend/internal/handler/dto/settings.go
index b865d703..492be170 100644
--- a/backend/internal/handler/dto/settings.go
+++ b/backend/internal/handler/dto/settings.go
@@ -142,9 +142,10 @@ type SystemSettings struct {
 	BackendModeEnabled bool `json:"backend_mode_enabled"`
 
 	// Gateway forwarding behavior
-	EnableFingerprintUnification bool `json:"enable_fingerprint_unification"`
-	EnableMetadataPassthrough    bool `json:"enable_metadata_passthrough"`
-	EnableCCHSigning             bool `json:"enable_cch_signing"`
+	EnableFingerprintUnification       bool `json:"enable_fingerprint_unification"`
+	EnableMetadataPassthrough          bool `json:"enable_metadata_passthrough"`
+	EnableCCHSigning                   bool `json:"enable_cch_signing"`
+	EnableAnthropicCacheTTL1hInjection bool `json:"enable_anthropic_cache_ttl_1h_injection"`
 
 	// Web Search Emulation
 	WebSearchEmulationEnabled bool `json:"web_search_emulation_enabled"`
diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go
index bddcf6ab..bb32540b 100644
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -336,6 +336,8 @@ const (
 	SettingKeyEnableMetadataPassthrough = "enable_metadata_passthrough"
 	// SettingKeyEnableCCHSigning 是否对 billing header 中的 cch 进行 xxHash64 签名（默认 false）
 	SettingKeyEnableCCHSigning = "enable_cch_signing"
+	// SettingKeyEnableAnthropicCacheTTL1hInjection 是否对 Anthropic OAuth/SetupToken 请求体注入 1h cache_control ttl（默认 false）
+	SettingKeyEnableAnthropicCacheTTL1hInjection = "enable_anthropic_cache_ttl_1h_injection"
 
 	// Balance Low Notification
 	SettingKeyBalanceLowNotifyEnabled     = "balance_low_notify_enabled"      // 全局开关
diff --git a/backend/internal/service/gateway_body_order_test.go b/backend/internal/service/gateway_body_order_test.go
index e6c9de7d..e0c3cafd 100644
--- a/backend/internal/service/gateway_body_order_test.go
+++ b/backend/internal/service/gateway_body_order_test.go
@@ -1,13 +1,91 @@
 package service
 
 import (
+	"context"
+	"errors"
 	"strings"
 	"testing"
 
+	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
 	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
 )
 
+type gatewayTTLSettingRepo struct {
+	data map[string]string
+}
+
+func (r *gatewayTTLSettingRepo) Get(context.Context, string) (*Setting, error) {
+	return nil, ErrSettingNotFound
+}
+
+func (r *gatewayTTLSettingRepo) GetValue(_ context.Context, key string) (string, error) {
+	if r == nil {
+		return "", ErrSettingNotFound
+	}
+	v, ok := r.data[key]
+	if !ok {
+		return "", ErrSettingNotFound
+	}
+	return v, nil
+}
+
+func (r *gatewayTTLSettingRepo) Set(_ context.Context, key, value string) error {
+	if r == nil {
+		return errors.New("setting repo is nil")
+	}
+	if r.data == nil {
+		r.data = map[string]string{}
+	}
+	r.data[key] = value
+	return nil
+}
+
+func (r *gatewayTTLSettingRepo) GetMultiple(_ context.Context, keys []string) (map[string]string, error) {
+	result := make(map[string]string)
+	if r == nil {
+		return result, nil
+	}
+	for _, key := range keys {
+		if v, ok := r.data[key]; ok {
+			result[key] = v
+		}
+	}
+	return result, nil
+}
+
+func (r *gatewayTTLSettingRepo) SetMultiple(_ context.Context, settings map[string]string) error {
+	if r == nil {
+		return errors.New("setting repo is nil")
+	}
+	if r.data == nil {
+		r.data = map[string]string{}
+	}
+	for key, value := range settings {
+		r.data[key] = value
+	}
+	return nil
+}
+
+func (r *gatewayTTLSettingRepo) GetAll(context.Context) (map[string]string, error) {
+	result := make(map[string]string)
+	if r == nil {
+		return result, nil
+	}
+	for key, value := range r.data {
+		result[key] = value
+	}
+	return result, nil
+}
+
+func (r *gatewayTTLSettingRepo) Delete(_ context.Context, key string) error {
+	if r != nil {
+		delete(r.data, key)
+	}
+	return nil
+}
+
 func assertJSONTokenOrder(t *testing.T, body string, tokens ...string) {
 	t.Helper()
 
@@ -71,3 +149,60 @@ func TestEnforceCacheControlLimit_PreservesTopLevelFieldOrder(t *testing.T) {
 	assertJSONTokenOrder(t, resultStr, `"alpha"`, `"system"`, `"messages"`, `"omega"`)
 	require.Equal(t, 4, strings.Count(resultStr, `"cache_control"`))
 }
+
+func TestInjectAnthropicCacheControlTTL1h_OnlyUpdatesExistingEphemeralCacheControl(t *testing.T) {
+	body := []byte(`{"alpha":1,"cache_control":{"type":"ephemeral"},"system":[{"type":"text","text":"sys","cache_control":{"type":"ephemeral","ttl":"5m"}},{"type":"text","text":"plain"}],"messages":[{"role":"user","content":[{"type":"text","text":"hi","cache_control":{"type":"ephemeral"}},{"type":"text","text":"non","cache_control":{"type":"persistent","ttl":"5m"}}]}],"tools":[{"name":"a","input_schema":{},"cache_control":{"type":"ephemeral"}}],"omega":2}`)
+
+	result := injectAnthropicCacheControlTTL1h(body)
+	resultStr := string(result)
+
+	assertJSONTokenOrder(t, resultStr, `"alpha"`, `"cache_control"`, `"system"`, `"messages"`, `"tools"`, `"omega"`)
+	require.Equal(t, "1h", gjson.GetBytes(result, "cache_control.ttl").String())
+	require.Equal(t, "1h", gjson.GetBytes(result, "system.0.cache_control.ttl").String())
+	require.False(t, gjson.GetBytes(result, "system.1.cache_control").Exists())
+	require.Equal(t, "1h", gjson.GetBytes(result, "messages.0.content.0.cache_control.ttl").String())
+	require.Equal(t, "5m", gjson.GetBytes(result, "messages.0.content.1.cache_control.ttl").String())
+	require.Equal(t, "1h", gjson.GetBytes(result, "tools.0.cache_control.ttl").String())
+}
+
+func TestGatewayCacheTTLGlobalSetting_TargetResolution(t *testing.T) {
+	repo := &gatewayTTLSettingRepo{data: map[string]string{
+		SettingKeyEnableAnthropicCacheTTL1hInjection: "true",
+	}}
+	gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{})
+	svc := &GatewayService{
+		settingService: NewSettingService(repo, &config.Config{}),
+	}
+	account := &Account{Platform: PlatformAnthropic, Type: AccountTypeOAuth}
+
+	target, ok := svc.resolveCacheTTLUsageOverrideTarget(context.Background(), account)
+	require.True(t, ok)
+	require.Equal(t, cacheTTLTarget5m, target)
+
+	account.Extra = map[string]any{
+		"cache_ttl_override_enabled": true,
+		"cache_ttl_override_target":  "1h",
+	}
+	target, ok = svc.resolveCacheTTLUsageOverrideTarget(context.Background(), account)
+	require.True(t, ok)
+	require.Equal(t, cacheTTLTarget1h, target)
+}
+
+func TestGatewayCacheTTLGlobalSetting_RequestInjectionScope(t *testing.T) {
+	repo := &gatewayTTLSettingRepo{data: map[string]string{
+		SettingKeyEnableAnthropicCacheTTL1hInjection: "true",
+	}}
+	gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{})
+	svc := &GatewayService{
+		settingService: NewSettingService(repo, &config.Config{}),
+	}
+
+	require.True(t, svc.shouldInjectAnthropicCacheTTL1h(context.Background(), &Account{Platform: PlatformAnthropic, Type: AccountTypeOAuth}))
+	require.True(t, svc.shouldInjectAnthropicCacheTTL1h(context.Background(), &Account{Platform: PlatformAnthropic, Type: AccountTypeSetupToken}))
+	require.False(t, svc.shouldInjectAnthropicCacheTTL1h(context.Background(), &Account{Platform: PlatformAnthropic, Type: AccountTypeAPIKey}))
+	require.False(t, svc.shouldInjectAnthropicCacheTTL1h(context.Background(), &Account{Platform: PlatformOpenAI, Type: AccountTypeOAuth}))
+
+	repo.data[SettingKeyEnableAnthropicCacheTTL1hInjection] = "false"
+	gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{})
+	require.False(t, svc.shouldInjectAnthropicCacheTTL1h(context.Background(), &Account{Platform: PlatformAnthropic, Type: AccountTypeOAuth}))
+}
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index d1f12009..074013c3 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -62,6 +62,11 @@ const (
 	claudeMimicDebugInfoKey = "claude_mimic_debug_info"
 )
 
+const (
+	cacheTTLTarget5m = "5m"
+	cacheTTLTarget1h = "1h"
+)
+
 // ForceCacheBillingContextKey 强制缓存计费上下文键
 // 用于粘性会话切换时，将 input_tokens 转为 cache_read_input_tokens 计费
 type forceCacheBillingKeyType struct{}
@@ -4226,6 +4231,87 @@ func enforceCacheControlLimit(body []byte) []byte {
 	return body
 }
 
+// injectAnthropicCacheControlTTL1h 将已有 ephemeral cache_control 块的 ttl 强制写为 1h。
+// 仅修改已经存在的 cache_control，不新增缓存断点。
+func injectAnthropicCacheControlTTL1h(body []byte) []byte {
+	return forceEphemeralCacheControlTTL(body, cacheTTLTarget1h)
+}
+
+func forceEphemeralCacheControlTTL(body []byte, ttl string) []byte {
+	if len(body) == 0 || ttl == "" {
+		return body
+	}
+	out := body
+	var paths []string
+	addPath := func(path string, value gjson.Result) {
+		cc := value.Get("cache_control")
+		if !cc.Exists() || cc.Get("type").String() != "ephemeral" {
+			return
+		}
+		if cc.Get("ttl").String() == ttl {
+			return
+		}
+		paths = append(paths, path+".cache_control.ttl")
+	}
+
+	if topCC := gjson.GetBytes(body, "cache_control"); topCC.Exists() && topCC.Get("type").String() == "ephemeral" && topCC.Get("ttl").String() != ttl {
+		paths = append(paths, "cache_control.ttl")
+	}
+
+	system := gjson.GetBytes(body, "system")
+	if system.IsArray() {
+		idx := -1
+		system.ForEach(func(_, block gjson.Result) bool {
+			idx++
+			addPath(fmt.Sprintf("system.%d", idx), block)
+			return true
+		})
+	}
+
+	messages := gjson.GetBytes(body, "messages")
+	if messages.IsArray() {
+		msgIdx := -1
+		messages.ForEach(func(_, msg gjson.Result) bool {
+			msgIdx++
+			content := msg.Get("content")
+			if !content.IsArray() {
+				return true
+			}
+			contentIdx := -1
+			content.ForEach(func(_, block gjson.Result) bool {
+				contentIdx++
+				addPath(fmt.Sprintf("messages.%d.content.%d", msgIdx, contentIdx), block)
+				return true
+			})
+			return true
+		})
+	}
+
+	tools := gjson.GetBytes(body, "tools")
+	if tools.IsArray() {
+		idx := -1
+		tools.ForEach(func(_, tool gjson.Result) bool {
+			idx++
+			addPath(fmt.Sprintf("tools.%d", idx), tool)
+			return true
+		})
+	}
+
+	for _, path := range paths {
+		if next, err := sjson.SetBytes(out, path, ttl); err == nil {
+			out = next
+		}
+	}
+	return out
+}
+
+func (s *GatewayService) shouldInjectAnthropicCacheTTL1h(ctx context.Context, account *Account) bool {
+	if account == nil || !account.IsAnthropicOAuthOrSetupToken() || s == nil || s.settingService == nil {
+		return false
+	}
+	return s.settingService.IsAnthropicCacheTTL1hInjectionEnabled(ctx)
+}
+
 // Forward 转发请求到Claude API
 func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, parsed *ParsedRequest) (*ForwardResult, error) {
 	startTime := time.Now()
@@ -4385,6 +4471,10 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		logger.LegacyPrintf("service.gateway", "Model mapping applied: %s -> %s (account: %s, source=%s)", originalModel, mappedModel, account.Name, mappingSource)
 	}
 
+	if s.shouldInjectAnthropicCacheTTL1h(ctx, account) {
+		body = injectAnthropicCacheControlTTL1h(body)
+	}
+
 	// 获取凭证
 	token, tokenType, err := s.GetAccessToken(ctx, account)
 	if err != nil {
@@ -7225,9 +7315,9 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 			}
 		}
 
-		// Cache TTL Override: 重写 SSE 事件中的 cache_creation 分类
-		if account.IsCacheTTLOverrideEnabled() {
-			overrideTarget := account.GetCacheTTLOverrideTarget()
+		// Cache TTL Override: 重写 SSE 事件中的 cache_creation 分类。
+		// 账号级设置优先；全局 1h 请求注入开启时，默认把 usage 计费归回 5m。
+		if overrideTarget, ok := s.resolveCacheTTLUsageOverrideTarget(ctx, account); ok {
 			if eventType == "message_start" {
 				if msg, ok := event["message"].(map[string]any); ok {
 					if u, ok := msg["usage"].(map[string]any); ok {
@@ -7634,6 +7724,19 @@ func rewriteCacheCreationJSON(usageObj map[string]any, target string) bool {
 	return true
 }
 
+func (s *GatewayService) resolveCacheTTLUsageOverrideTarget(ctx context.Context, account *Account) (string, bool) {
+	if account == nil {
+		return "", false
+	}
+	if account.IsCacheTTLOverrideEnabled() {
+		return account.GetCacheTTLOverrideTarget(), true
+	}
+	if account.IsAnthropicOAuthOrSetupToken() && s != nil && s.settingService != nil && s.settingService.IsAnthropicCacheTTL1hInjectionEnabled(ctx) {
+		return cacheTTLTarget5m, true
+	}
+	return "", false
+}
+
 func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *http.Response, c *gin.Context, account *Account, originalModel, mappedModel string) (*ClaudeUsage, error) {
 	// 更新5h窗口状态
 	s.rateLimitService.UpdateSessionWindow(ctx, account, resp.Header)
@@ -7670,9 +7773,9 @@ func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *h
 		}
 	}
 
-	// Cache TTL Override: 重写 non-streaming 响应中的 cache_creation 分类
-	if account.IsCacheTTLOverrideEnabled() {
-		overrideTarget := account.GetCacheTTLOverrideTarget()
+	// Cache TTL Override: 重写 non-streaming 响应中的 cache_creation 分类。
+	// 账号级设置优先；全局 1h 请求注入开启时，默认把 usage 计费归回 5m。
+	if overrideTarget, ok := s.resolveCacheTTLUsageOverrideTarget(ctx, account); ok {
 		if applyCacheTTLOverride(&response.Usage, overrideTarget) {
 			// 同步更新 body JSON 中的嵌套 cache_creation 对象
 			if newBody, err := sjson.SetBytes(body, "usage.cache_creation.ephemeral_5m_input_tokens", response.Usage.CacheCreation5mTokens); err == nil {
@@ -8240,10 +8343,11 @@ func (s *GatewayService) recordUsageCore(ctx context.Context, input *recordUsage
 		result.Usage.InputTokens = 0
 	}
 
-	// Cache TTL Override: 确保计费时 token 分类与账号设置一致
+	// Cache TTL Override: 确保计费时 token 分类与账号设置一致。
+	// 账号级设置优先；全局 1h 请求注入开启时，默认把 usage 计费归回 5m。
 	cacheTTLOverridden := false
-	if account.IsCacheTTLOverrideEnabled() {
-		applyCacheTTLOverride(&result.Usage, account.GetCacheTTLOverrideTarget())
+	if overrideTarget, ok := s.resolveCacheTTLUsageOverrideTarget(ctx, account); ok {
+		applyCacheTTLOverride(&result.Usage, overrideTarget)
 		cacheTTLOverridden = (result.Usage.CacheCreation5mTokens + result.Usage.CacheCreation1hTokens) > 0
 	}
 
diff --git a/backend/internal/service/setting_service.go b/backend/internal/service/setting_service.go
index 966b4b84..2bae686a 100644
--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -82,10 +82,11 @@ const backendModeDBTimeout = 5 * time.Second
 
 // cachedGatewayForwardingSettings 缓存网关转发行为设置（进程内缓存，60s TTL）
 type cachedGatewayForwardingSettings struct {
-	fingerprintUnification bool
-	metadataPassthrough    bool
-	cchSigning             bool
-	expiresAt              int64 // unix nano
+	fingerprintUnification       bool
+	metadataPassthrough          bool
+	cchSigning                   bool
+	anthropicCacheTTL1hInjection bool
+	expiresAt                    int64 // unix nano
 }
 
 var gatewayForwardingCache atomic.Value // *cachedGatewayForwardingSettings
@@ -1245,6 +1246,7 @@ func (s *SettingService) buildSystemSettingsUpdates(ctx context.Context, setting
 	updates[SettingKeyEnableFingerprintUnification] = strconv.FormatBool(settings.EnableFingerprintUnification)
 	updates[SettingKeyEnableMetadataPassthrough] = strconv.FormatBool(settings.EnableMetadataPassthrough)
 	updates[SettingKeyEnableCCHSigning] = strconv.FormatBool(settings.EnableCCHSigning)
+	updates[SettingKeyEnableAnthropicCacheTTL1hInjection] = strconv.FormatBool(settings.EnableAnthropicCacheTTL1hInjection)
 	updates[SettingPaymentVisibleMethodAlipaySource] = settings.PaymentVisibleMethodAlipaySource
 	updates[SettingPaymentVisibleMethodWxpaySource] = settings.PaymentVisibleMethodWxpaySource
 	updates[SettingPaymentVisibleMethodAlipayEnabled] = strconv.FormatBool(settings.PaymentVisibleMethodAlipayEnabled)
@@ -1305,10 +1307,11 @@ func (s *SettingService) refreshCachedSettings(settings *SystemSettings) {
 	})
 	gatewayForwardingSF.Forget("gateway_forwarding")
 	gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{
-		fingerprintUnification: settings.EnableFingerprintUnification,
-		metadataPassthrough:    settings.EnableMetadataPassthrough,
-		cchSigning:             settings.EnableCCHSigning,
-		expiresAt:              time.Now().Add(gatewayForwardingCacheTTL).UnixNano(),
+		fingerprintUnification:       settings.EnableFingerprintUnification,
+		metadataPassthrough:          settings.EnableMetadataPassthrough,
+		cchSigning:                   settings.EnableCCHSigning,
+		anthropicCacheTTL1hInjection: settings.EnableAnthropicCacheTTL1hInjection,
+		expiresAt:                    time.Now().Add(gatewayForwardingCacheTTL).UnixNano(),
 	})
 	openAIAdvancedSchedulerSettingSF.Forget(openAIAdvancedSchedulerSettingKey)
 	openAIAdvancedSchedulerSettingCache.Store(&cachedOpenAIAdvancedSchedulerSetting{
@@ -1415,22 +1418,30 @@ func (s *SettingService) IsBackendModeEnabled(ctx context.Context) bool {
 	return false
 }
 
-// GetGatewayForwardingSettings returns cached gateway forwarding settings.
-// Uses in-process atomic.Value cache with 60s TTL, zero-lock hot path.
-// Returns (fingerprintUnification, metadataPassthrough, cchSigning).
-func (s *SettingService) GetGatewayForwardingSettings(ctx context.Context) (fingerprintUnification, metadataPassthrough, cchSigning bool) {
+type gatewayForwardingSettingsResult struct {
+	fp, mp, cch, cacheTTL1h bool
+}
+
+func (s *SettingService) getGatewayForwardingSettingsCached(ctx context.Context) gatewayForwardingSettingsResult {
 	if cached, ok := gatewayForwardingCache.Load().(*cachedGatewayForwardingSettings); ok && cached != nil {
 		if time.Now().UnixNano() < cached.expiresAt {
-			return cached.fingerprintUnification, cached.metadataPassthrough, cached.cchSigning
+			return gatewayForwardingSettingsResult{
+				fp:         cached.fingerprintUnification,
+				mp:         cached.metadataPassthrough,
+				cch:        cached.cchSigning,
+				cacheTTL1h: cached.anthropicCacheTTL1hInjection,
+			}
 		}
 	}
-	type gwfResult struct {
-		fp, mp, cch bool
-	}
 	val, _, _ := gatewayForwardingSF.Do("gateway_forwarding", func() (any, error) {
 		if cached, ok := gatewayForwardingCache.Load().(*cachedGatewayForwardingSettings); ok && cached != nil {
 			if time.Now().UnixNano() < cached.expiresAt {
-				return gwfResult{cached.fingerprintUnification, cached.metadataPassthrough, cached.cchSigning}, nil
+				return gatewayForwardingSettingsResult{
+					fp:         cached.fingerprintUnification,
+					mp:         cached.metadataPassthrough,
+					cch:        cached.cchSigning,
+					cacheTTL1h: cached.anthropicCacheTTL1hInjection,
+				}, nil
 			}
 		}
 		dbCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), gatewayForwardingDBTimeout)
@@ -1439,16 +1450,18 @@ func (s *SettingService) GetGatewayForwardingSettings(ctx context.Context) (fing
 			SettingKeyEnableFingerprintUnification,
 			SettingKeyEnableMetadataPassthrough,
 			SettingKeyEnableCCHSigning,
+			SettingKeyEnableAnthropicCacheTTL1hInjection,
 		})
 		if err != nil {
 			slog.Warn("failed to get gateway forwarding settings", "error", err)
 			gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{
-				fingerprintUnification: true,
-				metadataPassthrough:    false,
-				cchSigning:             false,
-				expiresAt:              time.Now().Add(gatewayForwardingErrorTTL).UnixNano(),
+				fingerprintUnification:       true,
+				metadataPassthrough:          false,
+				cchSigning:                   false,
+				anthropicCacheTTL1hInjection: false,
+				expiresAt:                    time.Now().Add(gatewayForwardingErrorTTL).UnixNano(),
 			})
-			return gwfResult{true, false, false}, nil
+			return gatewayForwardingSettingsResult{fp: true}, nil
 		}
 		fp := true
 		if v, ok := values[SettingKeyEnableFingerprintUnification]; ok && v != "" {
@@ -1456,18 +1469,33 @@ func (s *SettingService) GetGatewayForwardingSettings(ctx context.Context) (fing
 		}
 		mp := values[SettingKeyEnableMetadataPassthrough] == "true"
 		cch := values[SettingKeyEnableCCHSigning] == "true"
+		cacheTTL1h := values[SettingKeyEnableAnthropicCacheTTL1hInjection] == "true"
 		gatewayForwardingCache.Store(&cachedGatewayForwardingSettings{
-			fingerprintUnification: fp,
-			metadataPassthrough:    mp,
-			cchSigning:             cch,
-			expiresAt:              time.Now().Add(gatewayForwardingCacheTTL).UnixNano(),
+			fingerprintUnification:       fp,
+			metadataPassthrough:          mp,
+			cchSigning:                   cch,
+			anthropicCacheTTL1hInjection: cacheTTL1h,
+			expiresAt:                    time.Now().Add(gatewayForwardingCacheTTL).UnixNano(),
 		})
-		return gwfResult{fp, mp, cch}, nil
+		return gatewayForwardingSettingsResult{fp: fp, mp: mp, cch: cch, cacheTTL1h: cacheTTL1h}, nil
 	})
-	if r, ok := val.(gwfResult); ok {
-		return r.fp, r.mp, r.cch
+	if r, ok := val.(gatewayForwardingSettingsResult); ok {
+		return r
 	}
-	return true, false, false // fail-open defaults
+	return gatewayForwardingSettingsResult{fp: true}
+}
+
+// GetGatewayForwardingSettings returns cached gateway forwarding settings.
+// Uses in-process atomic.Value cache with 60s TTL, zero-lock hot path.
+// Returns (fingerprintUnification, metadataPassthrough, cchSigning).
+func (s *SettingService) GetGatewayForwardingSettings(ctx context.Context) (fingerprintUnification, metadataPassthrough, cchSigning bool) {
+	result := s.getGatewayForwardingSettingsCached(ctx)
+	return result.fp, result.mp, result.cch
+}
+
+// IsAnthropicCacheTTL1hInjectionEnabled 检查是否对 Anthropic OAuth/SetupToken 请求体注入 1h cache_control ttl。
+func (s *SettingService) IsAnthropicCacheTTL1hInjectionEnabled(ctx context.Context) bool {
+	return s.getGatewayForwardingSettingsCached(ctx).cacheTTL1h
 }
 
 // IsEmailVerifyEnabled 检查是否开启邮件验证
@@ -1880,12 +1908,13 @@ func (s *SettingService) InitializeDefaultSettings(ctx context.Context) error {
 		SettingKeyMaxClaudeCodeVersion: "",
 
 		// 分组隔离（默认不允许未分组 Key 调度）
-		SettingKeyAllowUngroupedKeyScheduling:    "false",
-		SettingPaymentVisibleMethodAlipaySource:  "",
-		SettingPaymentVisibleMethodWxpaySource:   "",
-		SettingPaymentVisibleMethodAlipayEnabled: "false",
-		SettingPaymentVisibleMethodWxpayEnabled:  "false",
-		openAIAdvancedSchedulerSettingKey:        "false",
+		SettingKeyAllowUngroupedKeyScheduling:        "false",
+		SettingKeyEnableAnthropicCacheTTL1hInjection: "false",
+		SettingPaymentVisibleMethodAlipaySource:      "",
+		SettingPaymentVisibleMethodWxpaySource:       "",
+		SettingPaymentVisibleMethodAlipayEnabled:     "false",
+		SettingPaymentVisibleMethodWxpayEnabled:      "false",
+		openAIAdvancedSchedulerSettingKey:            "false",
 	}
 
 	return s.settingRepo.SetMultiple(ctx, defaults)
@@ -2228,6 +2257,7 @@ func (s *SettingService) parseSettings(settings map[string]string) *SystemSettin
 	}
 	result.EnableMetadataPassthrough = settings[SettingKeyEnableMetadataPassthrough] == "true"
 	result.EnableCCHSigning = settings[SettingKeyEnableCCHSigning] == "true"
+	result.EnableAnthropicCacheTTL1hInjection = settings[SettingKeyEnableAnthropicCacheTTL1hInjection] == "true"
 
 	// Web search emulation: quick enabled check from the JSON config
 	if raw := settings[SettingKeyWebSearchEmulationConfig]; raw != "" {
diff --git a/backend/internal/service/settings_view.go b/backend/internal/service/settings_view.go
index c0962ff0..41c01cca 100644
--- a/backend/internal/service/settings_view.go
+++ b/backend/internal/service/settings_view.go
@@ -149,9 +149,10 @@ type SystemSettings struct {
 	BackendModeEnabled bool
 
 	// Gateway forwarding behavior
-	EnableFingerprintUnification bool // 是否统一 OAuth 账号的指纹头（默认 true）
-	EnableMetadataPassthrough    bool // 是否透传客户端原始 metadata（默认 false）
-	EnableCCHSigning             bool // 是否对 billing header cch 进行签名（默认 false）
+	EnableFingerprintUnification       bool // 是否统一 OAuth 账号的指纹头（默认 true）
+	EnableMetadataPassthrough          bool // 是否透传客户端原始 metadata（默认 false）
+	EnableCCHSigning                   bool // 是否对 billing header cch 进行签名（默认 false）
+	EnableAnthropicCacheTTL1hInjection bool // 是否对 Anthropic OAuth/SetupToken 请求体注入 1h cache_control ttl（默认 false）
 
 	// Web Search Emulation
 	WebSearchEmulationEnabled bool // 是否启用 web search 模拟
diff --git a/frontend/src/api/admin/settings.ts b/frontend/src/api/admin/settings.ts
index e8ab6af5..35eef9de 100644
--- a/frontend/src/api/admin/settings.ts
+++ b/frontend/src/api/admin/settings.ts
@@ -439,6 +439,7 @@ export interface SystemSettings {
   enable_fingerprint_unification: boolean;
   enable_metadata_passthrough: boolean;
   enable_cch_signing: boolean;
+  enable_anthropic_cache_ttl_1h_injection: boolean;
   web_search_emulation_enabled?: boolean;
 
   // Payment configuration
@@ -609,6 +610,7 @@ export interface UpdateSettingsRequest {
   enable_fingerprint_unification?: boolean;
   enable_metadata_passthrough?: boolean;
   enable_cch_signing?: boolean;
+  enable_anthropic_cache_ttl_1h_injection?: boolean;
   // Payment configuration
   payment_enabled?: boolean;
   payment_min_amount?: number;
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index 0425955f..2da121fb 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -5019,6 +5019,8 @@ export default {
         metadataPassthroughHint: 'Pass through client\'s original metadata.user_id without rewriting. May improve upstream cache hit rates.',
         cchSigning: 'CCH Signing',
         cchSigningHint: 'Sign the billing header in forwarded requests with CCH hash. When disabled, the placeholder is preserved.',
+        anthropicCacheTTL1hInjection: 'Anthropic Cache TTL Injection',
+        anthropicCacheTTL1hInjectionHint: 'When enabled, existing ephemeral cache_control blocks in Anthropic OAuth/Setup Token request bodies are forced to 1h; response usage is billed back as 5m by default, with account-level TTL billing override taking priority.',
       },
       webSearchEmulation: {
         title: 'Web Search Emulation',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index a8656a7b..7d266522 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -5178,6 +5178,8 @@ export default {
         metadataPassthroughHint: '透传客户端原始 metadata.user_id，不进行重写。可能提高上游缓存命中率。',
         cchSigning: 'CCH 签名',
         cchSigningHint: '对转发请求的 billing header 进行 CCH 哈希签名。关闭时保留原始占位符。',
+        anthropicCacheTTL1hInjection: 'Anthropic 缓存 TTL 注入',
+        anthropicCacheTTL1hInjectionHint: '开启后，对 Anthropic OAuth/Setup Token 请求体中已有的 ephemeral 缓存块强制写入 1h；响应 usage 默认按 5m 回写计费，账号级 TTL 计费设置优先。',
       },
       webSearchEmulation: {
         title: 'Web Search 模拟',
diff --git a/frontend/src/views/admin/SettingsView.vue b/frontend/src/views/admin/SettingsView.vue
index ad0587b8..13cb0b2c 100644
--- a/frontend/src/views/admin/SettingsView.vue
+++ b/frontend/src/views/admin/SettingsView.vue
@@ -3057,6 +3057,31 @@
                 </div>
                 <Toggle v-model="form.enable_cch_signing" />
               </div>
+
+              <!-- Anthropic Cache TTL 1h Injection -->
+              <div class="flex items-center justify-between">
+                <div>
+                  <label
+                    class="text-sm font-medium text-gray-700 dark:text-gray-300"
+                  >
+                    {{
+                      t(
+                        "admin.settings.gatewayForwarding.anthropicCacheTTL1hInjection",
+                      )
+                    }}
+                  </label>
+                  <p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{
+                      t(
+                        "admin.settings.gatewayForwarding.anthropicCacheTTL1hInjectionHint",
+                      )
+                    }}
+                  </p>
+                </div>
+                <Toggle
+                  v-model="form.enable_anthropic_cache_ttl_1h_injection"
+                />
+              </div>
             </div>
           </div>
           <!-- Web Search Emulation -->
@@ -5810,6 +5835,7 @@ const form = reactive<SettingsForm>({
   enable_fingerprint_unification: true,
   enable_metadata_passthrough: false,
   enable_cch_signing: false,
+  enable_anthropic_cache_ttl_1h_injection: false,
   // Balance & quota notification
   balance_low_notify_enabled: false,
   balance_low_notify_threshold: 0,
@@ -6718,6 +6744,8 @@ async function saveSettings() {
       enable_fingerprint_unification: form.enable_fingerprint_unification,
       enable_metadata_passthrough: form.enable_metadata_passthrough,
       enable_cch_signing: form.enable_cch_signing,
+      enable_anthropic_cache_ttl_1h_injection:
+        form.enable_anthropic_cache_ttl_1h_injection,
       // Payment configuration
       payment_enabled: form.payment_enabled,
       payment_min_amount: Number(form.payment_min_amount) || 0,
diff --git a/frontend/src/views/admin/__tests__/SettingsView.spec.ts b/frontend/src/views/admin/__tests__/SettingsView.spec.ts
index 239c474e..4ab475ad 100644
--- a/frontend/src/views/admin/__tests__/SettingsView.spec.ts
+++ b/frontend/src/views/admin/__tests__/SettingsView.spec.ts
@@ -362,6 +362,7 @@ const baseSettingsResponse = {
   enable_fingerprint_unification: true,
   enable_metadata_passthrough: false,
   enable_cch_signing: false,
+  enable_anthropic_cache_ttl_1h_injection: false,
   payment_enabled: true,
   payment_min_amount: 1,
   payment_max_amount: 10000,
@@ -567,6 +568,26 @@ describe("admin SettingsView payment visible method controls", () => {
     expect(payload).not.toHaveProperty("payment_visible_method_wxpay_enabled");
   });
 
+  it("submits Anthropic cache TTL injection gateway setting", async () => {
+    getSettings.mockResolvedValueOnce({
+      ...baseSettingsResponse,
+      enable_anthropic_cache_ttl_1h_injection: true,
+    });
+
+    const wrapper = mountView();
+
+    await flushPromises();
+    await wrapper.find("form").trigger("submit.prevent");
+    await flushPromises();
+
+    expect(updateSettings).toHaveBeenCalledTimes(1);
+    expect(updateSettings).toHaveBeenCalledWith(
+      expect.objectContaining({
+        enable_anthropic_cache_ttl_1h_injection: true,
+      }),
+    );
+  });
+
   it("updates provider enablement immediately and reloads providers", async () => {
     const provider = {
       id: 7,

From 9d801595c95eb5f5616bca0ec409a42d73325987 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Thu, 30 Apr 2026 13:48:27 +0800
Subject: [PATCH 45/46] =?UTF-8?q?test:=20=E6=9B=B4=E6=96=B0=E7=AE=A1?=
 =?UTF-8?q?=E7=90=86=E5=91=98=E8=AE=BE=E7=BD=AE=E5=A5=91=E7=BA=A6=E5=AD=97?=
 =?UTF-8?q?=E6=AE=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/internal/server/api_contract_test.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/internal/server/api_contract_test.go b/backend/internal/server/api_contract_test.go
index f24a1677..607b93dc 100644
--- a/backend/internal/server/api_contract_test.go
+++ b/backend/internal/server/api_contract_test.go
@@ -740,6 +740,7 @@ func TestAPIContracts(t *testing.T) {
 					"allow_ungrouped_key_scheduling": false,
 					"backend_mode_enabled": false,
 					"enable_cch_signing": false,
+					"enable_anthropic_cache_ttl_1h_injection": false,
 					"enable_fingerprint_unification": true,
 					"enable_metadata_passthrough": false,
 					"web_search_emulation_enabled": false,
@@ -934,6 +935,7 @@ func TestAPIContracts(t *testing.T) {
 					"enable_fingerprint_unification": true,
 					"enable_metadata_passthrough": false,
 					"enable_cch_signing": false,
+					"enable_anthropic_cache_ttl_1h_injection": false,
 					"web_search_emulation_enabled": false,
 					"payment_visible_method_alipay_source": "",
 					"payment_visible_method_wxpay_source": "",

From 48912014a16e2dd1cfca8b7cad785d0e8e7bfeec Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 30 Apr 2026 06:06:12 +0000
Subject: [PATCH 46/46] chore: sync VERSION to 0.1.121 [skip ci]

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index 27f3bc3e..025c3166 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.120
+0.1.121