chore(wip): 保存订制改动以便合并上游

- windsurf: client/pool/local_ls/tool_emulation/tool_names/models 调整 - handler: admin account_data / failover_loop / gateway_handler - repository: scheduler_cache 及测试 - service: windsurf_chat_service / windsurf_gateway_service - deploy: compose 合并为单文件（含 windsurf-ls profile），Dockerfile.ls - cmd: 新增 dump_ls_models / dump_preamble / test_windsurf_tools 辅助工具
2026-04-24 11:14:36 +08:00 · 2026-04-24 11:14:36 +08:00 · 002066e700
commit 002066e700
parent 2a4103298e
19 changed files with 873 additions and 195 deletions
--- a/backend/cmd/dump_ls_models/main.go
+++ b/backend/cmd/dump_ls_models/main.go
@ -0,0 +1,42 @@
+// dump_ls_models prints the full GetCascadeModelConfigs response from the LS
+// for the given account JWT. Used to reconcile sub2api's static catalog with
+// the authoritative runtime list.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"net/url"
+	"os"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
+)
+
+func main() {
+	jwt := flag.String("jwt", os.Getenv("WINDSURF_JWT"), "session token")
+	apiURL := flag.String("api", "https://server.self-serve.windsurf.com", "api_server_url")
+	flag.Parse()
+	if *jwt == "" {
+		fmt.Fprintln(os.Stderr, "need -jwt or WINDSURF_JWT")
+		os.Exit(2)
+	}
+	u, _ := url.Parse(*apiURL)
+	c, err := windsurf.NewClient(u.String(), "")
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "client:", err)
+		os.Exit(1)
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+	models, err := c.ListModels(ctx, *jwt)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "ListModels:", err)
+		os.Exit(1)
+	}
+	fmt.Printf("# %d models returned by GetCascadeModelConfigs\n", len(models))
+	out, _ := json.MarshalIndent(models, "", "  ")
+	fmt.Println(string(out))
+}
--- a/backend/cmd/dump_preamble/main.go
+++ b/backend/cmd/dump_preamble/main.go
@ -0,0 +1,21 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
+)
+
+func main() {
+	tc := "auto"
+	if len(os.Args) > 1 {
+		tc = os.Args[1]
+	}
+	tools := []windsurf.OpenAITool{
+		{Type: "function", Function: windsurf.OpenAIFunction{Name: "read_file", Description: "Read a file", Parameters: json.RawMessage(`{"type":"object"}`)}},
+		{Type: "function", Function: windsurf.OpenAIFunction{Name: "find_file", Description: "Find files", Parameters: json.RawMessage(`{"type":"object"}`)}},
+	}
+	fmt.Println(windsurf.BuildToolPreambleForProto(tools, tc))
+}
--- a/backend/cmd/test_windsurf_tools/main.go
+++ b/backend/cmd/test_windsurf_tools/main.go
@ -0,0 +1,247 @@
+// test_windsurf_tools validates Cascade tool-calling end-to-end.
+//
+// Same flow as test_windsurf_minimal but injects an OpenAI-format tools[]
+// preamble into SendUserCascadeMessage and parses <tool_call> blocks back
+// out of the trajectory text.
+//
+// Usage:
+//
+//	WINDSURF_JWT='devin-session-token$...' \
+//	WINDSURF_CSRF_TOKEN='ad2d9f01-...' \
+//	WINDSURF_USER_ID='devin-user$...' \
+//	WINDSURF_TEAM_ID='devin-team$account-...' \
+//	WINDSURF_LS_PORT=42099 \
+//	  go run ./cmd/test_windsurf_tools -verbose
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
+)
+
+type cliFlags struct {
+	jwt       string
+	model     string
+	prompt    string
+	verbose   bool
+	timeout   time.Duration
+	userID    string
+	teamID    string
+	csrfToken string
+	lsPort    int
+	toolChoice string
+	roundtrip bool
+}
+
+func parseFlags() cliFlags {
+	var f cliFlags
+	flag.StringVar(&f.jwt, "jwt", os.Getenv("WINDSURF_JWT"), "session token")
+	flag.StringVar(&f.model, "model", os.Getenv("WINDSURF_MODEL"), "model UID (optional, auto-picks cheapest)")
+	flag.StringVar(&f.prompt, "prompt", "Find every Go file in backend/internal/pkg/windsurf whose name contains 'tool', then read the first 40 lines of tool_emulation.go. Use the tools.", "user prompt")
+	flag.BoolVar(&f.verbose, "verbose", false, "verbose")
+	flag.DurationVar(&f.timeout, "timeout", 90*time.Second, "per-step timeout")
+	flag.StringVar(&f.userID, "user-id", os.Getenv("WINDSURF_USER_ID"), "user id")
+	flag.StringVar(&f.teamID, "team-id", os.Getenv("WINDSURF_TEAM_ID"), "team id")
+	flag.StringVar(&f.csrfToken, "csrf", os.Getenv("WINDSURF_CSRF_TOKEN"), "LS CSRF token")
+	flag.IntVar(&f.lsPort, "ls-port", envInt("WINDSURF_LS_PORT", 42099), "LS port")
+	flag.StringVar(&f.toolChoice, "tool-choice", "auto", "auto | required | none | <tool_name>")
+	flag.BoolVar(&f.roundtrip, "roundtrip", false, "after first turn, inject fake tool_result and test Turn 2")
+	flag.Parse()
+	return f
+}
+
+func envInt(k string, dflt int) int {
+	v := os.Getenv(k)
+	if v == "" {
+		return dflt
+	}
+	var n int
+	fmt.Sscanf(v, "%d", &n)
+	if n == 0 {
+		return dflt
+	}
+	return n
+}
+
+func main() {
+	f := parseFlags()
+	if f.jwt == "" || f.csrfToken == "" || f.userID == "" || f.teamID == "" {
+		fmt.Fprintln(os.Stderr, "missing WINDSURF_JWT / CSRF / USER_ID / TEAM_ID")
+		os.Exit(2)
+	}
+
+	// Build tools[] — realistic coding tools: read_file, find_file, grep, list_dir
+	tools := []windsurf.OpenAITool{
+		{Type: "function", Function: windsurf.OpenAIFunction{
+			Name:        "read_file",
+			Description: "Read the contents of a file. Use when you need to see what's inside a specific file.",
+			Parameters: json.RawMessage(`{"type":"object","properties":{
+				"path":{"type":"string","description":"Absolute or repo-relative file path"},
+				"start_line":{"type":"integer","description":"Optional 1-indexed start line","minimum":1},
+				"end_line":{"type":"integer","description":"Optional 1-indexed inclusive end line","minimum":1}
+			},"required":["path"]}`),
+		}},
+		{Type: "function", Function: windsurf.OpenAIFunction{
+			Name:        "find_file",
+			Description: "Find files by glob pattern. Use when looking for files whose path matches a pattern.",
+			Parameters: json.RawMessage(`{"type":"object","properties":{
+				"pattern":{"type":"string","description":"Glob pattern, e.g. **/*.go or src/**/test_*.py"},
+				"max_results":{"type":"integer","default":50}
+			},"required":["pattern"]}`),
+		}},
+		{Type: "function", Function: windsurf.OpenAIFunction{
+			Name:        "grep",
+			Description: "Search file contents by regex. Use when looking for code that matches a text pattern.",
+			Parameters: json.RawMessage(`{"type":"object","properties":{
+				"regex":{"type":"string","description":"POSIX/PCRE regex"},
+				"path_glob":{"type":"string","description":"Optional path glob filter, e.g. **/*.ts"},
+				"case_insensitive":{"type":"boolean","default":false}
+			},"required":["regex"]}`),
+		}},
+		{Type: "function", Function: windsurf.OpenAIFunction{
+			Name:        "list_dir",
+			Description: "List files and sub-directories at a path. Use for shallow directory exploration.",
+			Parameters: json.RawMessage(`{"type":"object","properties":{
+				"path":{"type":"string","description":"Directory path"}
+			},"required":["path"]}`),
+		}},
+	}
+	// Resolve tool_choice: "auto" | "required" | "none" | tool_name → object
+	var toolChoice interface{} = f.toolChoice
+	if f.toolChoice != "auto" && f.toolChoice != "required" && f.toolChoice != "none" {
+		toolChoice = map[string]any{"type": "function", "function": map[string]any{"name": f.toolChoice}}
+	}
+	preamble := windsurf.BuildToolPreambleForProto(tools, toolChoice)
+	if preamble == "" {
+		fmt.Fprintln(os.Stderr, "empty preamble"); os.Exit(1)
+	}
+	if f.verbose {
+		fmt.Printf("── Preamble (%d bytes) head 200 chars ──\n%s…\n\n",
+			len(preamble), truncate(preamble, 200))
+	}
+
+	// LS client — note: user_id/team_id are not used by LS client directly,
+	// only by the remote account status APIs. Warmup sends a JWT only.
+	lsClient := windsurf.NewLocalLSClient(f.lsPort, f.csrfToken)
+	_ = f.userID
+	_ = f.teamID
+
+	// Pick model: use given or default to Claude 4.5 Haiku (cheapest Claude)
+	pickedModel := f.model
+	if pickedModel == "" {
+		pickedModel = "MODEL_PRIVATE_11" // claude-4.5-haiku
+	}
+
+	// Warmup
+	ctx, cancel := context.WithTimeout(context.Background(), f.timeout)
+	defer cancel()
+	if err := lsClient.WarmupCascade(ctx, f.jwt); err != nil {
+		fmt.Fprintln(os.Stderr, "WarmupCascade:", err); os.Exit(1)
+	}
+	fmt.Println("✅ WarmupCascade")
+
+	// StartCascade
+	cascadeID, err := lsClient.StartCascade(ctx, f.jwt)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "StartCascade:", err); os.Exit(1)
+	}
+	fmt.Printf("✅ StartCascade cascade_id=%s\n", cascadeID)
+
+	// Call StreamCascadeChat (full flow incl. trajectory polling)
+	res, err := lsClient.StreamCascadeChat(ctx, f.jwt, pickedModel, f.prompt, preamble, cascadeID, 0)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "StreamCascadeChat:", err); os.Exit(1)
+	}
+	fmt.Printf("✅ StreamCascadeChat text_len=%d thinking_len=%d native_tool_calls=%d\n",
+		len(res.Text), len(res.Thinking), len(res.ToolCalls))
+
+	fmt.Println("\n── Raw Text ──")
+	fmt.Println(res.Text)
+	if res.Thinking != "" && f.verbose {
+		fmt.Println("\n── Thinking ──")
+		fmt.Println(res.Thinking)
+	}
+
+	// Parse tool calls from text
+	parsed := windsurf.ParseToolCallsFromText(res.Text)
+	fmt.Printf("\n── Parsed tool_calls: %d ──\n", len(parsed.ToolCalls))
+	for i, tc := range parsed.ToolCalls {
+		fmt.Printf("[%d] id=%s name=%s args=%s\n", i, tc.ID, tc.Name, tc.ArgumentsJSON)
+	}
+	fmt.Printf("\n── Text after stripping tool_call: ──\n%s\n", parsed.Text)
+
+	if len(parsed.ToolCalls) == 0 && len(res.ToolCalls) == 0 {
+		fmt.Fprintln(os.Stderr, "\n❌ NO TOOL CALLS produced")
+		os.Exit(1)
+	}
+	fmt.Println("\n✅ tool-calling E2E works")
+
+	// ───── Turn 2: inject fake tool_result and see if model continues ─────
+	if f.roundtrip && len(parsed.ToolCalls) > 0 {
+		tc := parsed.ToolCalls[0]
+		// Snapshot step count after Turn 1
+		ctxSnap, cancelSnap := context.WithTimeout(context.Background(), 10*time.Second)
+		stepsT1, _ := lsClient.GetTrajectorySteps(ctxSnap, cascadeID, 0)
+		cancelSnap()
+		fmt.Printf("\n── After Turn 1: trajectory has %d steps ──\n", len(stepsT1))
+		for i, s := range stepsT1 {
+			txt := s.ResponseText
+			if len(txt) > 80 { txt = txt[:80] + "..." }
+			fmt.Printf("  step[%d] type=%d text=%q\n", i, s.Type, txt)
+		}
+
+		fakeResult := `["cmd/server/main.go","cmd/test_windsurf_tools/main.go","internal/pkg/windsurf/tool_emulation.go"]`
+		turn2 := fmt.Sprintf(
+			`<tool_result tool_call_id="%s">%s</tool_result>`+"\n\nBased on the tool result above, tell me which files look test-related.",
+			tc.ID, fakeResult)
+		ctx2, cancel2 := context.WithTimeout(context.Background(), f.timeout)
+		defer cancel2()
+		res2, err := lsClient.StreamCascadeChat(ctx2, f.jwt, pickedModel, turn2, preamble, cascadeID, 0)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "\n❌ Turn2 StreamCascadeChat:", err)
+			os.Exit(1)
+		}
+		fmt.Printf("\n── Turn 2 response (text_len=%d thinking_len=%d) ──\n%s\n",
+			len(res2.Text), len(res2.Thinking), res2.Text)
+		parsed2 := windsurf.ParseToolCallsFromText(res2.Text)
+		fmt.Printf("\n── Turn 2 parsed tool_calls: %d ──\n", len(parsed2.ToolCalls))
+		for i, tc := range parsed2.ToolCalls {
+			fmt.Printf("[%d] id=%s name=%s args=%s\n", i, tc.ID, tc.Name, tc.ArgumentsJSON)
+		}
+		if len(parsed2.Text) > 20 && !containsIgnore(res2.Text, "i don't have access") {
+			fmt.Println("\n✅ round-trip works: model consumed tool_result and produced text")
+		} else {
+			fmt.Println("\n⚠️ round-trip suspicious: short or refusal text")
+		}
+		// Snapshot after Turn 2
+		ctxSnap2, cancelSnap2 := context.WithTimeout(context.Background(), 10*time.Second)
+		stepsT2, _ := lsClient.GetTrajectorySteps(ctxSnap2, cascadeID, 0)
+		cancelSnap2()
+		fmt.Printf("\n── After Turn 2: trajectory has %d steps (was %d after Turn 1) ──\n", len(stepsT2), len(stepsT1))
+		for i, s := range stepsT2 {
+			txt := s.ResponseText
+			if len(txt) > 80 { txt = txt[:80] + "..." }
+			fmt.Printf("  step[%d] type=%d text=%q\n", i, s.Type, txt)
+		}
+	}
+}
+
+func containsIgnore(haystack, needle string) bool {
+	return strings.Contains(strings.ToLower(haystack), strings.ToLower(needle))
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
+
+var _ = strings.HasPrefix
--- a/backend/internal/handler/admin/account_data.go
+++ b/backend/internal/handler/admin/account_data.go
@ -10,6 +10,7 @@ import (

 	"log/slog"

+	"github.com/Wei-Shaw/sub2api/internal/domain"
 	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/response"
@ -560,7 +561,7 @@ func validateDataAccount(item DataAccount) error {
 		return errors.New("account credentials is required")
 	}
 	switch item.Type {
-	case service.AccountTypeOAuth, service.AccountTypeSetupToken, service.AccountTypeAPIKey, service.AccountTypeUpstream:
+	case service.AccountTypeOAuth, service.AccountTypeSetupToken, service.AccountTypeAPIKey, service.AccountTypeUpstream, domain.AccountTypeBedrock, domain.AccountTypeWindsurfSession:
 	default:
 		return fmt.Errorf("account type is invalid: %s", item.Type)
 	}
--- a/backend/internal/handler/failover_loop.go
+++ b/backend/internal/handler/failover_loop.go
@ -37,6 +37,11 @@ const (
 	// Service 层在 SingleAccountRetry 模式下已做充分原地重试（最多 3 次、总等待 30s），
 	// Handler 层只需短暂间隔后重新进入 Service 层即可。
 	singleAccountBackoffDelay = 2 * time.Second
+	// stickyGraceRetries 粘性会话绑定账号的宽限重试次数：
+	// 命中 sticky 的账号在首次失败时原地重试，避免会话瞬移到其他账号导致上下文断裂。
+	stickyGraceRetries = 1
+	// stickyGraceDelay 粘性宽限重试间隔
+	stickyGraceDelay = 1500 * time.Millisecond
 )

 // FailoverState 跨循环迭代共享的 failover 状态
@ -48,6 +53,10 @@ type FailoverState struct {
 	LastFailoverErr       *service.UpstreamFailoverError
 	ForceCacheBilling     bool
 	hasBoundSession       bool
+	// stickyBoundAccountID 命中粘性会话预绑定的账号 ID（>0 时生效）
+	stickyBoundAccountID int64
+	// stickyGraceUsed 已消耗的粘性宽限次数
+	stickyGraceUsed int
 }

 // NewFailoverState 创建 failover 状态
@ -60,6 +69,12 @@ func NewFailoverState(maxSwitches int, hasBoundSession bool) *FailoverState {
 	}
 }

+// WithStickyBoundAccount 绑定粘性会话预取到的账号 ID，用于 HandleFailoverError 里的宽限判定
+func (s *FailoverState) WithStickyBoundAccount(accountID int64) *FailoverState {
+	s.stickyBoundAccountID = accountID
+	return s
+}
+
 // HandleFailoverError 处理 UpstreamFailoverError，返回下一步动作。
 // 包含：缓存计费判断、同账号重试、临时封禁、切换计数、Antigravity 延时。
 func (s *FailoverState) HandleFailoverError(
@ -91,6 +106,25 @@ func (s *FailoverState) HandleFailoverError(
 		return FailoverContinue
 	}

+	// 粘性会话宽限：如果当前账号是粘性预绑定账号，首次失败给一次原地等待重试机会，
+	// 避免因为瞬时 rate_limit 就把会话从 A 瞬移到 B 导致上下文/cascade 断裂。
+	// 仅对非 RetryableOnSameAccount 的硬失败生效（RetryableOnSameAccount 上面已处理）。
+	if s.stickyBoundAccountID > 0 &&
+		accountID == s.stickyBoundAccountID &&
+		s.stickyGraceUsed < stickyGraceRetries {
+		s.stickyGraceUsed++
+		logger.FromContext(ctx).Warn("gateway.failover_sticky_grace_retry",
+			zap.Int64("account_id", accountID),
+			zap.Int("upstream_status", failoverErr.StatusCode),
+			zap.Int("sticky_grace_used", s.stickyGraceUsed),
+			zap.Int("sticky_grace_max", stickyGraceRetries),
+		)
+		if !sleepWithContext(ctx, stickyGraceDelay) {
+			return FailoverCanceled
+		}
+		return FailoverContinue
+	}
+
 	// 同账号重试用尽，执行临时封禁
 	if failoverErr.RetryableOnSameAccount {
 		gatewayService.TempUnscheduleRetryableError(ctx, accountID, failoverErr)
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@ -294,7 +294,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0

 	if platform == service.PlatformGemini {
-		fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession)
+		fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession).WithStickyBoundAccount(sessionBoundAccountID)

 		// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 		// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
@ -524,7 +524,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}

 	for {
-		fs := NewFailoverState(h.maxAccountSwitches, hasBoundSession)
+		fs := NewFailoverState(h.maxAccountSwitches, hasBoundSession).WithStickyBoundAccount(sessionBoundAccountID)
 		retryWithFallback := false

 		for {
@ -773,6 +773,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
 					switch action {
 					case FailoverContinue:
+						// windsurf Forward 失败路径不会向客户端写任何字节（整合式调用），
+						// 连续 failover 期间客户端可能几十秒收不到任何帧导致 SDK 超时断开。
+						// 在切号前发 1 帧 SSE ping 保活（仅限流式请求）。
+						if reqStream && account.Platform == service.PlatformWindsurf {
+							h.sendFailoverKeepalivePing(c, &streamStarted)
+						}
 						continue
 					case FailoverExhausted:
 						h.handleFailoverExhausted(c, fs.LastFailoverErr, account.Platform, streamStarted)
@ -1301,6 +1307,28 @@ func (h *GatewayHandler) mapUpstreamError(statusCode int) (int, string, string)
 	}
 }

+// sendFailoverKeepalivePing 在 failover 切账号的间隙向客户端发 1 帧 Claude SSE ping，
+// 以避免客户端在连续 failover 期间长时间收不到任何字节而超时断开。
+// 第一次调用时会顺带写出 SSE 响应头并把 streamStarted 置 true，后续调用只发 ping 帧。
+func (h *GatewayHandler) sendFailoverKeepalivePing(c *gin.Context, streamStarted *bool) {
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		return
+	}
+	if streamStarted != nil && !*streamStarted {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("X-Accel-Buffering", "no")
+		*streamStarted = true
+	}
+	if _, err := fmt.Fprint(c.Writer, string(SSEPingFormatClaude)); err != nil {
+		_ = c.Error(err)
+		return
+	}
+	flusher.Flush()
+}
+
 // handleStreamingAwareError handles errors that may occur after streaming has started
 func (h *GatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
 	if streamStarted {
--- a/backend/internal/pkg/windsurf/client.go
+++ b/backend/internal/pkg/windsurf/client.go
@ -122,9 +122,11 @@ func (c *Client) GetUserStatus(ctx context.Context, token string) (*UserStatus,
 			Email      string `json:"email"`
 			PlanStatus struct {
 				PlanInfo struct {
-					PlanName             json.Number `json:"planName"`
-					MonthlyPromptCredits json.Number `json:"monthlyPromptCredits"`
-					MonthlyFlexCredits   json.Number `json:"monthlyFlexCreditPurchaseAmount"`
+					// 上游可能返回字符串（如 "Trial"）或数字，统一用 json.RawMessage 兜底
+					// 再按需解析为字符串展示；避免 json.Number 遇字符串时解码失败导致整个 userStatus 拉取失败。
+					PlanName             json.RawMessage `json:"planName"`
+					MonthlyPromptCredits json.Number     `json:"monthlyPromptCredits"`
+					MonthlyFlexCredits   json.Number     `json:"monthlyFlexCreditPurchaseAmount"`
 				} `json:"planInfo"`
 				DailyQuotaRemainingPercent  *float64    `json:"dailyQuotaRemainingPercent"`
 				WeeklyQuotaRemainingPercent *float64    `json:"weeklyQuotaRemainingPercent"`
@ -158,7 +160,7 @@ func (c *Client) GetUserStatus(ctx context.Context, token string) (*UserStatus,
 		TeamID:               us.TeamID,
 		Name:                 us.Name,
 		Email:                us.Email,
-		PlanName:             ps.PlanInfo.PlanName.String(),
+		PlanName:             planNameString(ps.PlanInfo.PlanName),
 		DailyPercent:         ps.DailyQuotaRemainingPercent,
 		WeeklyPercent:        ps.WeeklyQuotaRemainingPercent,
 		MonthlyPromptCredits: numPtr(ps.PlanInfo.MonthlyPromptCredits),
@ -168,6 +170,18 @@ func (c *Client) GetUserStatus(ctx context.Context, token string) (*UserStatus,
 	}, nil
 }

+// planNameString 把上游 planName 字段（可能是字符串也可能是数字）统一还原为字符串。
+func planNameString(raw json.RawMessage) string {
+	if len(raw) == 0 {
+		return ""
+	}
+	var s string
+	if err := json.Unmarshal(raw, &s); err == nil {
+		return s
+	}
+	return strings.Trim(string(raw), "\"")
+}
+
 // ModelInfo is one entry of GetCascadeModelConfigs response.
 type ModelInfo struct {
 	ModelUID         string  `json:"modelUid"`
--- a/backend/internal/pkg/windsurf/conversation_pool.go
+++ b/backend/internal/pkg/windsurf/conversation_pool.go
@ -143,21 +143,23 @@ func (cp *ConversationPool) pruneLoop() {

 // FingerprintBefore computes the fingerprint for resuming a conversation.
 // Hash only user/tool turns (excluding the last one) for lookup.
-func FingerprintBefore(messages []ChatMessage, modelKey string) string {
+// apiKey 参与 hash：cascade_id 绑定具体上游账号/LS，不同账号即使消息一致也不能复用，
+// 否则 failover 切号后命中旧 cascade 会触发 "panel state not found"。
+func FingerprintBefore(messages []ChatMessage, modelKey, apiKey string) string {
 	turns := stableTurns(messages)
 	if len(turns) < 2 {
 		return ""
 	}
-	return hashFingerprint(modelKey, turns[:len(turns)-1])
+	return hashFingerprint(modelKey, apiKey, turns[:len(turns)-1])
 }

 // FingerprintAfter computes the fingerprint after a successful turn.
-func FingerprintAfter(messages []ChatMessage, modelKey string) string {
+func FingerprintAfter(messages []ChatMessage, modelKey, apiKey string) string {
 	turns := stableTurns(messages)
 	if len(turns) == 0 {
 		return ""
 	}
-	return hashFingerprint(modelKey, turns)
+	return hashFingerprint(modelKey, apiKey, turns)
 }

 func stableTurns(messages []ChatMessage) []ChatMessage {
@ -170,7 +172,7 @@ func stableTurns(messages []ChatMessage) []ChatMessage {
 	return turns
 }

-func hashFingerprint(modelKey string, turns []ChatMessage) string {
+func hashFingerprint(modelKey, apiKey string, turns []ChatMessage) string {
 	type canonical struct {
 		Role    string `json:"role"`
 		Content string `json:"content"`
@ -180,6 +182,6 @@ func hashFingerprint(modelKey string, turns []ChatMessage) string {
 		cans[i] = canonical{Role: t.Role, Content: t.Content}
 	}
 	data, _ := json.Marshal(cans)
-	h := sha256.Sum256([]byte(fmt.Sprintf("%s\x00\x00%s", modelKey, data)))
+	h := sha256.Sum256([]byte(fmt.Sprintf("%s\x00\x00%s\x00\x00%s", modelKey, apiKey, data)))
 	return fmt.Sprintf("%x", h)
 }
--- a/backend/internal/pkg/windsurf/local_ls.go
+++ b/backend/internal/pkg/windsurf/local_ls.go
@ -388,6 +388,18 @@ func (l *LocalLSClient) StreamCascadeChat(ctx context.Context, token, modelUID,
 		}
 	}

+	// When reusing a cascade, capture the pre-existing step count so subsequent
+	// polls only fetch new steps. Without this, Turn 2 would re-read Turn 1's
+	// completed steps and append them again to the accumulated text, causing
+	// the response to duplicate Turn 1's prefix (including prior <tool_call>).
+	startStepIndex := 0
+	if reuseCascadeID != "" {
+		baselineSteps, berr := l.GetTrajectorySteps(ctx, cascadeID, 0)
+		if berr == nil {
+			startStepIndex = len(baselineSteps)
+		}
+	}
+
 	cascadeID, err = l.SendUserCascadeMessage(ctx, token, cascadeID, userText, modelUID, toolPreamble, modelEnumHint)
 	if err != nil {
 		return nil, fmt.Errorf("SendUserCascadeMessage: %w", err)
@ -432,7 +444,7 @@ func (l *LocalLSClient) StreamCascadeChat(ctx context.Context, token, modelUID,

 		time.Sleep(pollInterval)

-		steps, err := l.GetTrajectorySteps(ctx, cascadeID, 0)
+		steps, err := l.GetTrajectorySteps(ctx, cascadeID, startStepIndex)
 		if err != nil {
 			continue
 		}
@ -538,7 +550,7 @@ func (l *LocalLSClient) StreamCascadeChat(ctx context.Context, token, modelUID,
 			}
 			if canBreak {
 				// Final sweep: fetch one more time to get modifiedText top-up
-				finalSteps, err := l.GetTrajectorySteps(ctx, cascadeID, 0)
+				finalSteps, err := l.GetTrajectorySteps(ctx, cascadeID, startStepIndex)
 				if err == nil {
 					for idx, s := range finalSteps {
 						if s.Usage != nil {
--- a/backend/internal/pkg/windsurf/models.go
+++ b/backend/internal/pkg/windsurf/models.go
@ -95,6 +95,84 @@ var catalog = map[string]ModelMeta{
 	// Windsurf SWE
 	"swe-1.5":      {Name: "swe-1.5", Provider: "windsurf", EnumValue: 369, ModelUID: "MODEL_SWE_1_5_SLOW", Credit: 0.5},
 	"swe-1.5-fast": {Name: "swe-1.5-fast", Provider: "windsurf", EnumValue: 359, ModelUID: "MODEL_SWE_1_5", Credit: 0.5},
+	"swe-1.6":      {Name: "swe-1.6", Provider: "windsurf", ModelUID: "swe-1-6", Credit: 0.5},
+	"swe-1.6-fast": {Name: "swe-1.6-fast", Provider: "windsurf", ModelUID: "swe-1-6-fast", Credit: 0.5},
+
+	// Claude Opus 4.7 variants (Windsurf string UIDs; Credit from GetCascadeModelConfigs 2026-04-24)
+	"claude-opus-4.7-low":   {Name: "claude-opus-4.7-low", Provider: "anthropic", ModelUID: "claude-opus-4-7-low", Credit: 7},
+	"claude-opus-4.7-high":  {Name: "claude-opus-4.7-high", Provider: "anthropic", ModelUID: "claude-opus-4-7-high", Credit: 14},
+	"claude-opus-4.7-xhigh": {Name: "claude-opus-4.7-xhigh", Provider: "anthropic", ModelUID: "claude-opus-4-7-xhigh", Credit: 16},
+	"claude-opus-4.7-max":   {Name: "claude-opus-4.7-max", Provider: "anthropic", ModelUID: "claude-opus-4-7-max", Credit: 30},
+
+	// GPT-5.4 Mini variants
+	"gpt-5.4-mini-low":    {Name: "gpt-5.4-mini-low", Provider: "openai", ModelUID: "gpt-5-4-mini-low", Credit: 1.5},
+	"gpt-5.4-mini-medium": {Name: "gpt-5.4-mini-medium", Provider: "openai", ModelUID: "gpt-5-4-mini-medium", Credit: 1.5},
+	"gpt-5.4-mini-high":   {Name: "gpt-5.4-mini-high", Provider: "openai", ModelUID: "gpt-5-4-mini-high", Credit: 4.5},
+	"gpt-5.4-mini-xhigh":  {Name: "gpt-5.4-mini-xhigh", Provider: "openai", ModelUID: "gpt-5-4-mini-xhigh", Credit: 12},
+
+	// Gemini 3.1 Pro variants
+	"gemini-3.1-pro-low":  {Name: "gemini-3.1-pro-low", Provider: "google", ModelUID: "gemini-3-1-pro-low", Credit: 1},
+	"gemini-3.1-pro-high": {Name: "gemini-3.1-pro-high", Provider: "google", ModelUID: "gemini-3-1-pro-high", Credit: 2},
+
+	// Gemini 3.0 Flash variants
+	"gemini-3.0-flash-minimal": {Name: "gemini-3.0-flash-minimal", Provider: "google", ModelUID: "MODEL_GOOGLE_GEMINI_3_0_FLASH_MINIMAL", Credit: 0.75},
+	"gemini-3.0-flash-low":     {Name: "gemini-3.0-flash-low", Provider: "google", ModelUID: "MODEL_GOOGLE_GEMINI_3_0_FLASH_LOW", Credit: 1},
+	"gemini-3.0-flash-high":    {Name: "gemini-3.0-flash-high", Provider: "google", ModelUID: "MODEL_GOOGLE_GEMINI_3_0_FLASH_HIGH", Credit: 1.75},
+
+	// Kimi variants
+	"kimi-k2.5": {Name: "kimi-k2.5", Provider: "moonshot", ModelUID: "kimi-k2-5", Credit: 1},
+	"kimi-k2.6": {Name: "kimi-k2.6", Provider: "moonshot", ModelUID: "kimi-k2-6", Credit: 0},
+
+	// GLM
+	"glm-5":   {Name: "glm-5", Provider: "zhipu", ModelUID: "glm-5", Credit: 1.5},
+	"glm-5.1": {Name: "glm-5.1", Provider: "zhipu", ModelUID: "glm-5-1", Credit: 1.5},
+
+	// Minimax
+	"minimax-m2.5": {Name: "minimax-m2.5", Provider: "minimax", ModelUID: "minimax-m2-5", Credit: 1},
+
+	// GPT-5.1 (MODEL_PRIVATE_12..15, _20..23)
+	"gpt-5.1":             {Name: "gpt-5.1", Provider: "openai", ModelUID: "MODEL_PRIVATE_12", Credit: 0.5},
+	"gpt-5.1-low":         {Name: "gpt-5.1-low", Provider: "openai", ModelUID: "MODEL_PRIVATE_13", Credit: 0.5},
+	"gpt-5.1-medium":      {Name: "gpt-5.1-medium", Provider: "openai", ModelUID: "MODEL_PRIVATE_14", Credit: 1},
+	"gpt-5.1-high":        {Name: "gpt-5.1-high", Provider: "openai", ModelUID: "MODEL_PRIVATE_15", Credit: 2},
+	"gpt-5.1-fast":        {Name: "gpt-5.1-fast", Provider: "openai", ModelUID: "MODEL_PRIVATE_20", Credit: 1},
+	"gpt-5.1-low-fast":    {Name: "gpt-5.1-low-fast", Provider: "openai", ModelUID: "MODEL_PRIVATE_21", Credit: 1},
+	"gpt-5.1-medium-fast": {Name: "gpt-5.1-medium-fast", Provider: "openai", ModelUID: "MODEL_PRIVATE_22", Credit: 2},
+	"gpt-5.1-high-fast":   {Name: "gpt-5.1-high-fast", Provider: "openai", ModelUID: "MODEL_PRIVATE_23", Credit: 4},
+
+	// GPT-5.1 Codex variants
+	"gpt-5.1-codex-low":        {Name: "gpt-5.1-codex-low", Provider: "openai", ModelUID: "MODEL_GPT_5_1_CODEX_LOW", Credit: 0.5},
+	"gpt-5.1-codex-medium":     {Name: "gpt-5.1-codex-medium", Provider: "openai", ModelUID: "MODEL_PRIVATE_9", Credit: 1},
+	"gpt-5.1-codex-mini":       {Name: "gpt-5.1-codex-mini", Provider: "openai", ModelUID: "MODEL_PRIVATE_19", Credit: 0.5},
+	"gpt-5.1-codex-mini-low":   {Name: "gpt-5.1-codex-mini-low", Provider: "openai", ModelUID: "MODEL_GPT_5_1_CODEX_MINI_LOW", Credit: 0.25},
+	"gpt-5.1-codex-max-low":    {Name: "gpt-5.1-codex-max-low", Provider: "openai", ModelUID: "MODEL_GPT_5_1_CODEX_MAX_LOW", Credit: 1},
+	"gpt-5.1-codex-max-medium": {Name: "gpt-5.1-codex-max-medium", Provider: "openai", ModelUID: "MODEL_GPT_5_1_CODEX_MAX_MEDIUM", Credit: 1.25},
+	"gpt-5.1-codex-max-high":   {Name: "gpt-5.1-codex-max-high", Provider: "openai", ModelUID: "MODEL_GPT_5_1_CODEX_MAX_HIGH", Credit: 1.5},
+
+	// GPT-5.2 additional (existing: gpt-5.2, gpt-5.2-low, gpt-5.2-high, gpt-5.2-xhigh)
+	"gpt-5.2-none":        {Name: "gpt-5.2-none", Provider: "openai", ModelUID: "MODEL_GPT_5_2_NONE", Credit: 1},
+	"gpt-5.2-none-fast":   {Name: "gpt-5.2-none-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_NONE_PRIORITY", Credit: 2},
+	"gpt-5.2-low-fast":    {Name: "gpt-5.2-low-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_LOW_PRIORITY", Credit: 2},
+	"gpt-5.2-medium-fast": {Name: "gpt-5.2-medium-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_MEDIUM_PRIORITY", Credit: 4},
+	"gpt-5.2-high-fast":   {Name: "gpt-5.2-high-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_HIGH_PRIORITY", Credit: 6},
+	"gpt-5.2-xhigh-fast":  {Name: "gpt-5.2-xhigh-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_XHIGH_PRIORITY", Credit: 16},
+
+	// GPT-5.2 Codex variants
+	"gpt-5.2-codex-low":         {Name: "gpt-5.2-codex-low", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_LOW", Credit: 1},
+	"gpt-5.2-codex-medium":      {Name: "gpt-5.2-codex-medium", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_MEDIUM", Credit: 1},
+	"gpt-5.2-codex-high":        {Name: "gpt-5.2-codex-high", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_HIGH", Credit: 2},
+	"gpt-5.2-codex-xhigh":       {Name: "gpt-5.2-codex-xhigh", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_XHIGH", Credit: 3},
+	"gpt-5.2-codex-low-fast":    {Name: "gpt-5.2-codex-low-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_LOW_PRIORITY", Credit: 2},
+	"gpt-5.2-codex-medium-fast": {Name: "gpt-5.2-codex-medium-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_MEDIUM_PRIORITY", Credit: 2},
+	"gpt-5.2-codex-high-fast":   {Name: "gpt-5.2-codex-high-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_HIGH_PRIORITY", Credit: 4},
+	"gpt-5.2-codex-xhigh-fast":  {Name: "gpt-5.2-codex-xhigh-fast", Provider: "openai", ModelUID: "MODEL_GPT_5_2_CODEX_XHIGH_PRIORITY", Credit: 6},
+
+	// GPT-OSS
+	"gpt-oss-120b": {Name: "gpt-oss-120b", Provider: "openai", ModelUID: "MODEL_GPT_OSS_120B", Credit: 0.25},
+
+	// Grok — runtime UID for mini reasoning (was missing, existing grok-3-mini used legacy enum only)
+	"grok-3-mini-reasoning": {Name: "grok-3-mini-reasoning", Provider: "xai", ModelUID: "MODEL_XAI_GROK_3_MINI_REASONING", Credit: 0.125},
+	"grok-code-fast-1":      {Name: "grok-code-fast-1", Provider: "xai", ModelUID: "MODEL_PRIVATE_4", Credit: 0.5},
 }

 var (
@ -128,6 +206,8 @@ func buildLookup() {
 		"claude-opus-4-1-20250805":   "claude-4.1-opus",
 		"claude-sonnet-4-5":          "claude-4.5-sonnet",
 		"claude-sonnet-4-5-20250929": "claude-4.5-sonnet",
+		"claude-haiku-4-5":           "claude-4.5-haiku",
+		"claude-haiku-4-5-20251001":  "claude-4.5-haiku",
 		"claude-opus-4-5":            "claude-4.5-opus",
 		"claude-opus-4-5-20251101":   "claude-4.5-opus",
 		"claude-opus-4-7":            "claude-opus-4-7-medium",
@ -156,6 +236,10 @@ func buildLookup() {
 		"opus-4.6-thinking":    "claude-opus-4.6-thinking",
 		"opus-4-7":             "claude-opus-4-7-medium",
 		"opus-4.7":             "claude-opus-4-7-medium",
+		"opus-4.7-low":         "claude-opus-4.7-low",
+		"opus-4.7-high":        "claude-opus-4.7-high",
+		"opus-4.7-xhigh":       "claude-opus-4.7-xhigh",
+		"opus-4.7-max":         "claude-opus-4.7-max",
 		"sonnet-4.6":           "claude-sonnet-4.6",
 		"sonnet-4.6-thinking":  "claude-sonnet-4.6-thinking",
 		"sonnet-4.6-1m":        "claude-sonnet-4.6-1m",
--- a/backend/internal/pkg/windsurf/tool_emulation.go
+++ b/backend/internal/pkg/windsurf/tool_emulation.go
@ -36,24 +36,79 @@ const toolProtocolFooter = `

 Now respond to the user request above. Use <tool_call> if appropriate, otherwise answer directly.`

-const toolProtocolSystemHeader = `You have access to the following functions. To invoke a function, emit a block in this EXACT format:
+// toolProtocolSystemHeader — copied VERBATIM from Windsurf language_server_macos_arm
+// binary (offset ~37379200). This is the canonical tool calling system prompt
+// Cascade's native LS uses. Do not paraphrase. Format:
+//   "You are a tool calling agent..." [intro]
+//   <tools>
+//   %s
+//   </tools>
+//   "For each function call..." [rules]
+// The %s placeholder is where tool schemas are inserted by the caller.
+const toolProtocolSystemHeader = `You are a tool calling agent. You are provided with function signatures within <tools> </tools> XML tags. You may call one or more functions to assist with the user query. If available tools are not relevant in assisting with user query, just respond in natural conversational language. Don't make assumptions about what values to plug into functions. After calling & executing the functions, you will be provided with function results within <tool_response> </tool_response> XML tags.`

-<tool_call>{"name":"<function_name>","arguments":{...}}</tool_call>
+const toolProtocolCallFormatRules = `For each function call return a JSON object, with the following json schema:
+{"name": "function_name", "arguments": <args-dict>}
+Each function call should be enclosed within <tool_call> </tool_call> XML tags, only one tool per tag. For example:
+<tool_call>
+{"name": <function-name>, "arguments": <args-dict>}
+</tool_call>`

-Rules:
-1. Each <tool_call>...</tool_call> block must fit on ONE line (no line breaks inside the JSON).
-2. "arguments" must be a JSON object matching the function's parameter schema.
-3. You MAY emit MULTIPLE <tool_call> blocks if the request requires calling several functions in parallel. Emit ALL needed calls consecutively, then STOP generating.
-4. After emitting the last <tool_call> block, STOP. Do not write any explanation after it. The caller executes the functions and returns results wrapped in <tool_result tool_call_id="...">...</tool_result> tags in the next user turn.
-5. NEVER say "I don't have access to tools" or "I cannot perform that action" — the functions listed below ARE your available tools.`
+// toolProtocolExamples — verbatim Example 1-5 from Windsurf LS binary.
+// Example 2 has an intentional typo in the source (":" instead of "," after
+// "write_to_file") — kept as-is so the model sees the exact training example.
+const toolProtocolExamples = `# Examples

+Here are some examples of how to structure your responses with tool calls:
+
+Example 1: Using a single tool
+
+Let's run the test suite for our project. This will help us ensure that all our components are functioning correctly.
+<tool_call>
+{"name": "run_command", "arguments": {"CommandLine":"npm test","Cwd":"/home/project/","Blocking":true,"WaitMsBeforeAsync":0,"SafeToAutoRun":true,"explanation":"Running the test suite again after fixing the import issue."}}
+</tool_call>
+
+Example 2: Using multiple tools
+
+Let's create two new configuration files for the web application: one for the frontend and one for the backend.
+<tool_call>
+{"name": "write_to_file", "arguments": {"TargetFile": "/Users/johnsmith/webapp/frontend/frontend-config.json", "CodeContent": "{\n\"apiEndpoint\": \"https://api.example.com\",\n  \"theme\": {\n    \"primaryColor\": \"#007bff\",\n    \"secondaryColor\": \"#6c757d\",\n    \"fontFamily\": \"Arial, sans-serif\"\n  },\n  \"features\": {\n    \"darkMode\": true,\n    \"notifications\": true,\n    \"analytics\": false\n  },\n  \"version\": \"1.0.0\"\n}","explanation":"Creating a frontend json config."}}
+</tool_call>
+<tool_call>
+{"name": "write_to_file": "arguments": {"TargetFile":"/Users/johnsmith/webapp/backend/backend-config.yaml","CodeContent":"database:\n  host: localhost\n  port: 5432\n  name: myapp_db\n  user: admin\n\nserver:\n  port: 3000\n  environment: development\n  logLevel: debug\n\nsecurity:\n  jwtSecret: your-secret-key-here\n  passwordSaltRounds: 10\n\ncaching:\n  enabled: true\n  provider: redis\n  ttl: 3600\n\nexternalServices:\n  emailProvider: sendgrid\n  storageProvider: aws-s3","explanation":"Creating a backend yaml config."}}
+</tool_call>
+
+Example 3: Searching the codebase
+
+I'll help investigate and fix this issue with dimension detection in ` + "`SI._collect_factor_and_dimension()`" + `. Let me first examine the relevant code.
+<tool_call>
+{"name": "codebase_search", "arguments": {"Query":"class SI _collect_factor_and_dimension","TargetDirectories":["/working/repo/sympy/sympy/sympy/physics/units"],"explanation":"Looking for the SI class implementation in the physics/units directory to find the _collect_factor_and_dimension method."}}
+</tool_call>
+
+Example 4: Editing a file
+
+I'll add logging statements in the function to figure out the source of the error.
+<tool_call>
+{"name": "edit_file", "arguments": {"CodeMarkdownLanguage":"go","TargetFile":"/home/project/foo/bar.go","Instruction":"Add logging statements in the function to figure out the source of the error.","Blocking":true,"CodeEdit":"{{...}}\nfunc main() {\n    fmt.Println(\"Hello World\")\n{{...}}","explanation":"Adding logging statements in the function to figure out the source of the error."}}
+</tool_call>
+
+Example 5: Finishing a chain of responses
+
+Great! I've fixed the import issue and the test suite is passing again. Let me know what feature you'd like to build next!`
+
+// toolProtocolParallelDirective — verbatim <maximize_parallel_tool_calls>
+// block content from Windsurf LS binary. Tells the model to issue parallel
+// tool calls for independent operations rather than serial ones.
+const toolProtocolParallelDirective = `CRITICAL INSTRUCTION: For maximum efficiency, whenever you perform multiple operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like read_file, grep or codebase_search, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.`
+
+// toolChoiceSuffix is kept only for "none" / forced-by-name overrides.
+// "auto" (default) and "required" rely on the Windsurf-native wording above,
+// which lets the model decide naturally. "required" / forced-by-name are
+// OpenAI contract extensions — we emit an extra line when the caller asks.
 var toolChoiceSuffix = map[string]string{
-	"auto": `
-6. When a function is relevant to the user's request, you SHOULD call it rather than answering from memory. Prefer using a tool over guessing.`,
-	"required": `
-6. You MUST call at least one function for every request. Do NOT answer directly in plain text — always use a <tool_call>.`,
-	"none": `
-6. Do NOT call any functions. Answer the user's question directly in plain text.`,
+	"auto":     ``,
+	"required": `You must call at least one function before answering.`,
+	"none":     `Do not call any function. Answer in plain text only.`,
 }

 // OpenAITool represents an OpenAI-format tool definition.
@ -131,6 +186,24 @@ func BuildToolPreamble(tools []OpenAITool) string {

 // BuildToolPreambleForProto builds a system-prompt-level preamble for
 // injection via CascadeConversationalPlannerConfig.tool_calling_section.
+// Layout follows the VERBATIM Windsurf LS binary block:
+//
+//	[intro ending in <tool_response>]
+//	<tools>
+//	{json-schema-1}
+//	{json-schema-2}
+//	</tools>
+//	For each function call return a JSON object, ...
+//	<tool_call>
+//	{"name": <function-name>, "arguments": <args-dict>}
+//	</tool_call>
+//
+//	# Examples
+//	Example 1..5 verbatim
+//
+//	<maximize_parallel_tool_calls>
+//	CRITICAL INSTRUCTION: ...
+//	</maximize_parallel_tool_calls>
 func BuildToolPreambleForProto(tools []OpenAITool, toolChoice interface{}) string {
 	tools = canonicalizeOpenAITools(tools)
 	if len(tools) == 0 {
@ -139,37 +212,68 @@ func BuildToolPreambleForProto(tools []OpenAITool, toolChoice interface{}) strin
 	mode, forceName := resolveToolChoice(toolChoice)

 	var lines []string
+
+	// 1. Intro paragraph (stops with "<tool_response> </tool_response> XML tags.")
 	lines = append(lines, toolProtocolSystemHeader)

-	suffix, ok := toolChoiceSuffix[mode]
-	if !ok {
-		suffix = toolChoiceSuffix["auto"]
-	}
-	lines = append(lines, suffix)
-	if forceName != "" {
-		lines = append(lines, fmt.Sprintf(`7. You MUST call the function "%s". No other function and no direct answer.`, forceName))
-	}
-	lines = append(lines, "")
-	lines = append(lines, "Available functions:")
+	// 2. <tools> block — one JSON object per line, matching Windsurf-native shape.
+	lines = append(lines, "<tools>")
 	for _, t := range tools {
 		if t.Type != "function" {
 			continue
 		}
-		lines = append(lines, "")
-		lines = append(lines, "### "+t.Function.Name)
-		if t.Function.Description != "" {
-			lines = append(lines, t.Function.Description)
+		fn := map[string]interface{}{
+			"type":     "function",
+			"function": toolFunctionAsMap(t.Function),
 		}
-		if len(t.Function.Parameters) > 0 {
-			lines = append(lines, "Parameters:")
-			lines = append(lines, "```json")
-			lines = append(lines, formatToolSchema(t.Function.Parameters))
-			lines = append(lines, "```")
+		if b, err := json.Marshal(fn); err == nil {
+			lines = append(lines, string(b))
 		}
 	}
+	lines = append(lines, "</tools>")
+
+	// 3. JSON schema rules + <tool_call> wrapping example
+	lines = append(lines, toolProtocolCallFormatRules)
+
+	// 4. # Examples block (1-5, verbatim from Windsurf LS binary)
+	lines = append(lines, "")
+	lines = append(lines, toolProtocolExamples)
+
+	// 5. <maximize_parallel_tool_calls> directive (Windsurf-native, separate section).
+	// Wrap in the original XML section tag so the model sees the same structural cue.
+	lines = append(lines, "")
+	lines = append(lines, "<maximize_parallel_tool_calls>")
+	lines = append(lines, toolProtocolParallelDirective)
+	lines = append(lines, "</maximize_parallel_tool_calls>")
+
+	// 6. Optional behavior overrides (OpenAI tool_choice extension; NOT in
+	//    Windsurf native — emitted only when the caller explicitly asks).
+	if suffix, ok := toolChoiceSuffix[mode]; ok && suffix != "" {
+		lines = append(lines, "")
+		lines = append(lines, suffix)
+	}
+	if forceName != "" {
+		lines = append(lines, "")
+		lines = append(lines, fmt.Sprintf(`You must call the function "%s". No other function.`, forceName))
+	}
+
 	return strings.Join(lines, "\n")
 }

+func toolFunctionAsMap(f OpenAIFunction) map[string]interface{} {
+	m := map[string]interface{}{"name": f.Name}
+	if f.Description != "" {
+		m["description"] = f.Description
+	}
+	if len(f.Parameters) > 0 {
+		var params interface{}
+		if json.Unmarshal(f.Parameters, &params) == nil {
+			m["parameters"] = params
+		}
+	}
+	return m
+}
+
 func resolveToolChoice(tc interface{}) (string, string) {
 	if tc == nil {
 		return "auto", ""
@ -217,14 +321,10 @@ func NormalizeMessagesForCascade(messages []AnthropicMessage, tools []OpenAITool

 	for _, m := range messages {
 		if m.Role == "tool" {
-			id := m.ToolCallID
-			if id == "" {
-				id = "unknown"
-			}
 			content := extractToolResultPayload(m.Content)
 			out = append(out, ChatMessage{
 				Role:    "user",
-				Content: fmt.Sprintf("<tool_result tool_call_id=\"%s\">\n%s\n</tool_result>", id, content),
+				Content: fmt.Sprintf("<tool_response>\n%s\n</tool_response>", content),
 			})
 			continue
 		}
@ -346,6 +446,14 @@ type ToolCallStreamParser struct {
 	inToolCode   bool
 	inBareCall   bool
 	totalSeen    int
+	// sawToolCall flips true the moment a well-formed <tool_call> is parsed.
+	// Windsurf native protocol says the model must STOP after emitting a
+	// tool_call and wait for <tool_response> from the caller. Models (notably
+	// Claude Sonnet 4.6) often violate this by hallucinating a fake
+	// <tool_response> then continuing with a "conclusion" based on the fake
+	// data. We drop any top-level text that arrives after the first tool_call
+	// so the hallucinated conclusion never leaks to the client.
+	sawToolCall bool
 }

 // NewToolCallStreamParser creates a new parser instance.
@ -362,8 +470,8 @@ type FeedResult struct {
 const (
 	tcOpen   = "<tool_call>"
 	tcClose  = "</tool_call>"
-	trPrefix = "<tool_result"
-	trClose  = "</tool_result>"
+	trPrefix = "<tool_response"
+	trClose  = "</tool_response>"
 	tcCode   = `{"tool_code"`
 	tcBare   = `{"name"`
 )
@ -476,6 +584,7 @@ func (p *ToolCallStreamParser) consumeJSONBlock(parseFn func(string) *ToolCall)
 	tc := parseFn(jsonStr)
 	if tc != nil {
 		p.totalSeen++
+		p.sawToolCall = true
 		return tc, "", true
 	}
 	return nil, jsonStr, true
@ -523,6 +632,7 @@ func (p *ToolCallStreamParser) Feed(delta string) FeedResult {
 						ArgumentsJSON: string(argsJSON),
 					})
 					p.totalSeen++
+					p.sawToolCall = true
 				} else {
 					safeParts = append(safeParts, tcOpen+body+tcClose)
 				}
@ -604,7 +714,7 @@ func (p *ToolCallStreamParser) Feed(delta string) FeedResult {
 				}
 			}
 			emitUpto := len(p.buffer) - holdLen
-			if emitUpto > 0 {
+			if emitUpto > 0 && !p.sawToolCall {
 				safeParts = append(safeParts, p.buffer[:emitUpto])
 			}
 			p.buffer = p.buffer[emitUpto:]
@ -619,7 +729,7 @@ func (p *ToolCallStreamParser) Feed(delta string) FeedResult {
 			}
 		}

-		if best.idx > 0 {
+		if best.idx > 0 && !p.sawToolCall {
 			safeParts = append(safeParts, p.buffer[:best.idx])
 		}

@ -669,6 +779,7 @@ func (p *ToolCallStreamParser) Flush() FeedResult {
 		tc := p.parseToolCodeJSON(remaining)
 		if tc != nil {
 			p.totalSeen++
+			p.sawToolCall = true
 			return FeedResult{ToolCalls: []ToolCall{*tc}}
 		}
 		return FeedResult{Text: remaining}
@ -678,6 +789,7 @@ func (p *ToolCallStreamParser) Flush() FeedResult {
 		tc := p.parseBareToolCallJSON(remaining)
 		if tc != nil {
 			p.totalSeen++
+			p.sawToolCall = true
 			return FeedResult{ToolCalls: []ToolCall{*tc}}
 		}
 		return FeedResult{Text: remaining}
@ -713,6 +825,7 @@ func (p *ToolCallStreamParser) Flush() FeedResult {
 			ArgumentsJSON: string(argsJSON),
 		})
 		p.totalSeen++
+		p.sawToolCall = true
 		return ""
 	})

--- a/backend/internal/pkg/windsurf/tool_names.go
+++ b/backend/internal/pkg/windsurf/tool_names.go
@ -29,11 +29,24 @@ var canonicalToolAliases = map[string]string{
 }

 // NormalizeToolName canonicalizes known tool aliases while preserving unknown tool names.
+//
+// Callers use different conventions:
+//   - OpenCode / Codex: lowercase or snake_case ("read_file", "search_files")
+//     → canonicalize via aliases map.
+//   - Claude Code: PascalCase ("Read", "Bash", "Glob", "Edit", "TodoWrite", …)
+//     → must pass through untouched; lowercasing them breaks round-trip because
+//     Claude Code only recognizes its exact PascalCase names.
+//
+// Heuristic: if the input starts with an uppercase letter, treat it as the
+// caller's authoritative name and do NOT canonicalize.
 func NormalizeToolName(name string) string {
 	trimmed := strings.TrimSpace(name)
 	if trimmed == "" {
 		return ""
 	}
+	if c := trimmed[0]; c >= 'A' && c <= 'Z' {
+		return trimmed
+	}
 	if canonical, ok := canonicalToolAliases[strings.ToLower(trimmed)]; ok {
 		return canonical
 	}
--- a/backend/internal/repository/scheduler_cache.go
+++ b/backend/internal/repository/scheduler_cache.go
@ -433,6 +433,12 @@ func filterSchedulerExtra(extra map[string]any) map[string]any {
 		"responses_websockets_v2_enabled",
 		"openai_ws_enabled",
 		"openai_ws_force_http",
+		// model_rate_limits 必须进入调度快照：SetModelRateLimit 写入的模型级冷却
+		// 时间戳（accounts.extra.model_rate_limits.<modelKey>.rate_limit_reset_at）
+		// 是 isAccountSchedulableForModelSelection/IsSchedulableForModelWithContext
+		// 过滤候选账号的唯一依据。缺失会导致已限流账号被反复选中，触发 failover 切号环。
+		// 与 service.modelRateLimitsKey 常量保持字面量一致。
+		"model_rate_limits",
 	}
 	filtered := make(map[string]any)
 	for _, key := range keys {
--- a/backend/internal/repository/scheduler_cache_unit_test.go
+++ b/backend/internal/repository/scheduler_cache_unit_test.go
@ -31,3 +31,28 @@ func TestBuildSchedulerMetadataAccount_KeepsOpenAIWSFlags(t *testing.T) {
 	require.Equal(t, true, got.Extra["mixed_scheduling"])
 	require.Nil(t, got.Extra["unused_large_field"])
 }
+
+// 回归测试：model_rate_limits 必须透传到调度快照，否则选号阶段无法感知模型级限流，
+// 会出现"限流账号被反复选中 → failover 切号 → 重复切号"的死循环（对应 windsurf 日志里的现象）。
+func TestBuildSchedulerMetadataAccount_KeepsModelRateLimits(t *testing.T) {
+	modelLimits := map[string]any{
+		"claude-opus-4-7-medium": map[string]any{
+			"rate_limited_at":     "2026-04-24T02:28:51Z",
+			"rate_limit_reset_at": "2026-04-24T02:58:51Z",
+		},
+	}
+	account := service.Account{
+		ID:       7,
+		Platform: service.PlatformWindsurf,
+		Type:     service.AccountTypeSetupToken,
+		Extra: map[string]any{
+			"model_rate_limits":  modelLimits,
+			"unused_large_field": "drop-me",
+		},
+	}
+
+	got := buildSchedulerMetadataAccount(account)
+
+	require.Equal(t, modelLimits, got.Extra["model_rate_limits"], "model_rate_limits must be carried into scheduler snapshot for rate-limit-aware selection")
+	require.Nil(t, got.Extra["unused_large_field"])
+}
--- a/backend/internal/service/windsurf_chat_service.go
+++ b/backend/internal/service/windsurf_chat_service.go
@ -119,8 +119,17 @@ func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.
 		modelEnumHint = meta.EnumValue
 	}

-	fpBefore := windsurf.FingerprintBefore(messages, modelKey)
-	entry := s.pool.Checkout(fpBefore)
+	fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey)
+	// failover 切号后禁止复用 cascade：cascade_id 属于上一个账号的 LS，
+	// 在当前账号上一定会触发 "panel state not found" 浪费一次请求。
+	skipReuse := false
+	if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 {
+		skipReuse = true
+	}
+	var entry *windsurf.ConversationEntry
+	if !skipReuse {
+		entry = s.pool.Checkout(fpBefore)
+	}
 	isResume := entry != nil && entry.CascadeID != ""

 	var reuseCascadeID string
@ -142,7 +151,7 @@ func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.
 	}

 	if result.CascadeID != "" && result.Text != "" {
-		fpAfter := windsurf.FingerprintAfter(messages, modelKey)
+		fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey)
 		s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{
 			CascadeID: result.CascadeID,
 			APIKey:    apiKey,
--- a/backend/internal/service/windsurf_gateway_service.go
+++ b/backend/internal/service/windsurf_gateway_service.go
@ -355,6 +355,9 @@ func (s *WindsurfGatewayService) streamAnthropicResponse(c *gin.Context, id stri
 	c.Header("Content-Type", "text/event-stream")
 	c.Header("Cache-Control", "no-cache")
 	c.Header("Connection", "keep-alive")
+	// 与 antigravity/gateway 保持一致，显式禁用 nginx/反代缓冲，防止 SSE 在代理侧被攒齐再转发
+	// 导致 Claude Code 等客户端长时间收不到任何帧而超时断开。
+	c.Header("X-Accel-Buffering", "no")

 	writeSSE := func(event string, data any) {
 		b, _ := json.Marshal(data)
--- a/deploy/Dockerfile.ls
+++ b/deploy/Dockerfile.ls
@ -1,15 +1,20 @@
 # Windsurf Language Server Docker Image
 #
-# Usage (host network — required for CSRF loopback check):
-#   docker build -t windsurf-ls -f deploy/Dockerfile.ls .
-#   docker run -d --name windsurf-ls \
-#     --network host \
-#     -v windsurf_ls_data:/data \
-#     windsurf-ls
+# 说明:
+#   - LS 本体只监听 127.0.0.1:<LS_INTERNAL_PORT>，并且仅对 loopback peer 通过 CSRF 校验。
+#   - 为了让 LS 融入 compose 内部网络（而不是必须使用 host network），
+#     容器内启动一个 socat 把外部 0.0.0.0:<LS_PORT> 的流量转发到 127.0.0.1:<LS_INTERNAL_PORT>。
+#     LS 收到的 peer 地址仍然是 127.0.0.1，CSRF 校验通过，同时 compose 里其它服务
+#     可以直接用 `windsurf-ls:42099` 访问。
 #
-# The LS binary is auto-downloaded from Exafunction/codeium releases at build time.
-# To use a local binary instead, pass --build-arg LS_URL=file:///path or place it
-# at deploy/language_server_linux_x64 and rebuild.
+# 构建:
+#   docker build -t windsurf-ls -f deploy/Dockerfile.ls .
+#
+# 运行（一般不要单独 docker run，通过 compose 的 windsurf profile 启动）:
+#   docker compose --profile windsurf up -d
+#
+# LS 二进制在构建时从 Exafunction/codeium 的 latest release 下载。
+# 本地已有二进制时可通过 --build-arg LS_URL=file:///path 覆盖。

 FROM alpine:3.21 AS downloader

@ -41,8 +46,13 @@ RUN set -e; \

 FROM debian:bookworm-slim

+# ca-certificates: LS 访问上游 API (HTTPS)
+# netcat-openbsd : healthcheck 用的 `nc -z` 探测端口
+# socat          : loopback 端口转发，让 compose 内部网络可直达 LS
+# tini           : PID 1 init，正确回收 LS 子进程，转发信号
+# bash           : entrypoint 依赖 `wait -n`（dash/busybox 不支持）
 RUN apt-get update && apt-get install -y --no-install-recommends \
-      ca-certificates netcat-openbsd && \
+      ca-certificates netcat-openbsd socat tini bash && \
    rm -rf /var/lib/apt/lists/*

 WORKDIR /opt/windsurf
@ -51,26 +61,61 @@ COPY --from=downloader /tmp/language_server /opt/windsurf/language_server_linux_

 RUN mkdir -p /data/db

+# LS_PORT          : 容器对外暴露的监听端口（socat 绑定 0.0.0.0:LS_PORT）
+# LS_INTERNAL_PORT : LS 本体绑定的端口（LS 实际在 0.0.0.0:LS_INTERNAL_PORT 监听，
+#                    但 socat 发起的连接源地址为 127.0.0.1，CSRF 校验依旧通过）
+#                    与 LS_PORT 必须不同，否则 socat 会和 LS 抢同一端口。
 ENV LS_PORT=42099 \
+    LS_INTERNAL_PORT=42098 \
    LS_CSRF_TOKEN=ad2d9f01-4e7b-8c3a-b5f6-1d8e9a0c7b2f \
    LS_API_SERVER_URL=https://server.self-serve.windsurf.com \
    HTTPS_PROXY="" \
    HTTP_PROXY=""

-EXPOSE ${LS_PORT}
+EXPOSE 42099

-HEALTHCHECK --interval=10s --timeout=3s --start-period=15s --retries=3 \
-    CMD nc -z localhost ${LS_PORT} || exit 1
+# 健康检查: socat 端口可达即视为健康（实际会触发一次 TCP 握手到 LS）
+HEALTHCHECK --interval=10s --timeout=3s --start-period=15s --retries=5 \
+    CMD nc -z 127.0.0.1 "${LS_PORT}" || exit 1

-ENTRYPOINT ["/bin/sh", "-c", \
-    "exec /opt/windsurf/language_server_linux_x64 \
-     --api_server_url=${LS_API_SERVER_URL} \
-     --server_port=${LS_PORT} \
-     --csrf_token=${LS_CSRF_TOKEN} \
-     --register_user_url=https://api.codeium.com/register_user/ \
-     --codeium_dir=/data \
-     --database_dir=/data/db \
-     --enable_local_search=false \
-     --enable_index_service=false \
-     --enable_lsp=false \
-     --detect_proxy=false"]
+# tini 做 PID 1，确保 LS 子进程被正确收尾 + 信号转发。
+# 用 bash 而非 /bin/sh，因为 Debian 的 /bin/sh 指向 dash，不支持 `wait -n`。
+# 启动脚本逻辑:
+#   1. 后台拉起 LS，只绑 127.0.0.1:${LS_INTERNAL_PORT}
+#   2. 轮询等待 LS 真正开始监听
+#   3. 后台起 socat，0.0.0.0:${LS_PORT} → 127.0.0.1:${LS_INTERNAL_PORT}
+#   4. `wait -n` 等任一子进程退出 → 容器一并退出，交由 compose 重启策略兜底
+ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/bin/bash", "-c", "\
+set -e; \
+/opt/windsurf/language_server_linux_x64 \
+  --api_server_url=\"${LS_API_SERVER_URL}\" \
+  --server_port=\"${LS_INTERNAL_PORT}\" \
+  --csrf_token=\"${LS_CSRF_TOKEN}\" \
+  --register_user_url=https://api.codeium.com/register_user/ \
+  --codeium_dir=/data \
+  --database_dir=/data/db \
+  --enable_local_search=false \
+  --enable_index_service=false \
+  --enable_lsp=false \
+  --detect_proxy=false & \
+LS_PID=$!; \
+echo \"[entrypoint] LS started pid=$LS_PID, waiting on 127.0.0.1:${LS_INTERNAL_PORT}\"; \
+for i in $(seq 1 60); do \
+  if nc -z 127.0.0.1 \"${LS_INTERNAL_PORT}\"; then \
+    echo \"[entrypoint] LS is listening, starting socat forwarder\"; \
+    break; \
+  fi; \
+  if ! kill -0 $LS_PID 2>/dev/null; then \
+    echo \"[entrypoint] LS exited before listening\"; exit 1; \
+  fi; \
+  sleep 1; \
+done; \
+socat -d TCP-LISTEN:${LS_PORT},fork,reuseaddr,bind=0.0.0.0 TCP:127.0.0.1:${LS_INTERNAL_PORT} & \
+SOCAT_PID=$!; \
+echo \"[entrypoint] socat started pid=$SOCAT_PID, forwarding 0.0.0.0:${LS_PORT} -> 127.0.0.1:${LS_INTERNAL_PORT}\"; \
+wait -n $LS_PID $SOCAT_PID; \
+EXIT=$?; \
+echo \"[entrypoint] one of LS/socat exited with $EXIT, tearing down\"; \
+kill $LS_PID $SOCAT_PID 2>/dev/null || true; \
+exit $EXIT\
+"]
--- a/deploy/docker-compose.windsurf.yml
+++ b/deploy/docker-compose.windsurf.yml
@ -1,65 +0,0 @@
-# =============================================================================
-# Windsurf Language Server — 独立 Compose 文件
-# =============================================================================
-# 启动方式：
-#   docker compose -f docker-compose.yml -f docker-compose.windsurf.yml up -d
-#
-# 构建 LS 镜像：
-#   1. 将 language_server_linux_x64 放到 deploy/ 目录
-#   2. docker compose -f docker-compose.yml -f docker-compose.windsurf.yml build windsurf-ls
-#
-# Multi-proxy：复制 windsurf-ls 服务并修改 LS_PORT 和 HTTPS_PROXY：
-#   windsurf-ls-proxy1:
-#     extends: { service: windsurf-ls }
-#     environment:
-#       - LS_PORT=42101
-#       - HTTPS_PROXY=http://user:pass@proxy1:8080
-#       - HTTP_PROXY=http://user:pass@proxy1:8080
-#     ports: ["42101:42101"]
-# =============================================================================
-
-services:
-  # 覆盖主服务：注入 LS 连接参数 + 添加依赖
-  sub2api:
-    environment:
-      - WINDSURF_ENABLED=true
-      - WINDSURF_FIREBASE_API_KEY=${WINDSURF_FIREBASE_API_KEY:-AIzaSyDsOl-1XpT5err0Tcnx8FFod1H8gVGIycY}
-      - WINDSURF_DOCKER_HOST=host.docker.internal
-      - WINDSURF_DOCKER_PORT=${WINDSURF_LS_PORT:-42099}
-      - WINDSURF_DOCKER_CSRF_TOKEN=${LS_CSRF_TOKEN:-ad2d9f01-4e7b-8c3a-b5f6-1d8e9a0c7b2f}
-      - WINDSURF_LS_MODE=${WINDSURF_LS_MODE:-docker}
-    depends_on:
-      windsurf-ls:
-        condition: service_healthy
-
-  # ===========================================================================
-  # Windsurf Language Server (local gRPC for Cascade chat)
-  # Must use host network — LS validates CSRF tokens only from loopback.
-  # ===========================================================================
-  windsurf-ls:
-    build:
-      context: ..
-      dockerfile: deploy/Dockerfile.ls
-    image: windsurf-ls:latest
-    container_name: sub2api-windsurf-ls
-    restart: unless-stopped
-    network_mode: host
-    volumes:
-      - windsurf_ls_data:/data
-    environment:
-      - LS_PORT=42099
-      - LS_CSRF_TOKEN=${LS_CSRF_TOKEN:-ad2d9f01-4e7b-8c3a-b5f6-1d8e9a0c7b2f}
-      - LS_API_SERVER_URL=${LS_API_SERVER_URL:-https://server.self-serve.windsurf.com}
-      - HTTPS_PROXY=${LS_HTTPS_PROXY:-}
-      - HTTP_PROXY=${LS_HTTP_PROXY:-}
-      - TZ=${TZ:-Asia/Shanghai}
-    healthcheck:
-      test: ["CMD", "nc", "-z", "localhost", "42099"]
-      interval: 10s
-      timeout: 3s
-      retries: 5
-      start_period: 15s
-
-volumes:
-  windsurf_ls_data:
-    driver: local
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@ -1,20 +1,20 @@
 # =============================================================================
-# Sub2API Docker Compose Configuration
+# Sub2API - Docker Compose Configuration (All-in-One)
 # =============================================================================
-# Quick Start:
-#   1. Copy .env.example to .env and configure
-#   2. docker compose up -d
-#   3. Check logs: docker compose logs -f
+# 包含服务:
+#   - sub2api       主应用
+#   - postgres      数据库
+#   - redis         缓存
+#   - windsurf-ls   可选: Windsurf Language Server (通过 profile 启用)
 #
-# Windsurf LS (可选):
-#   需要 Windsurf Cascade 聊天功能时，额外启动 LS 容器：
-#   docker compose -f docker-compose.yml -f docker-compose.windsurf.yml up -d
+# 启动方式:
+#   默认三件套:         docker compose up -d
+#   含 Windsurf LS:     docker compose --profile windsurf up -d
 #
-# 注意事项:
-#   - JWT_SECRET / TOTP_ENCRYPTION_KEY 必须固定，多实例共享同一个值
-#   - PostgreSQL / Redis 单实例，不参与水平扩展
-#   - postgres 端口默认不对外暴露，如需调试取消注释 127.0.0.1:5433:5432
-#   - redis 端口默认不对外暴露，如需调试取消注释 127.0.0.1:6380:6379
+# 注意:
+#   - 首次启动前先复制 .env.example 为 .env 并填写必填变量
+#   - JWT_SECRET / TOTP_ENCRYPTION_KEY 多实例必须固定
+#   - windsurf-ls 镜像仅支持 linux/amd64, arm64 宿主机会通过 QEMU 模拟
 # =============================================================================

 services:
@ -23,6 +23,7 @@ services:
  # ===========================================================================
  sub2api:
    image: docker.io/zfc931912343/sub2api:latest
+    container_name: sub2api
    restart: unless-stopped
    ulimits:
      nofile:
@ -103,8 +104,8 @@ services:
      - WINDSURF_ENABLED=${WINDSURF_ENABLED:-false}
      - WINDSURF_FIREBASE_API_KEY=${WINDSURF_FIREBASE_API_KEY:-}

-      # --- Windsurf Language Server (可选，需配合 docker-compose.windsurf.yml) ---
-      - WINDSURF_DOCKER_HOST=${WINDSURF_DOCKER_HOST:-}
+      # --- Windsurf Language Server (可选，启用 windsurf profile 时生效) ---
+      - WINDSURF_DOCKER_HOST=${WINDSURF_DOCKER_HOST:-windsurf-ls}
      - WINDSURF_DOCKER_PORT=${WINDSURF_DOCKER_PORT:-42099}
      - WINDSURF_DOCKER_CSRF_TOKEN=${WINDSURF_DOCKER_CSRF_TOKEN:-}

@ -113,6 +114,9 @@ services:
        condition: service_healthy
      redis:
        condition: service_healthy
+      windsurf-ls:
+        condition: service_healthy
+        required: false
    networks:
      - sub2api-network
    healthcheck:
@ -137,11 +141,15 @@ services:
      - postgres_data:/var/lib/postgresql/data
    environment:
      # postgres:18-alpine 默认 PGDATA 在镜像内部匿名卷，必须显式指定才能持久化到命名卷
+      # 与旧版 compose 保持一致: PGDATA=/var/lib/postgresql/data，迁移无感
      - PGDATA=/var/lib/postgresql/data
      - POSTGRES_USER=${POSTGRES_USER:-sub2api}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:?POSTGRES_PASSWORD is required}
      - POSTGRES_DB=${POSTGRES_DB:-sub2api}
      - TZ=${TZ:-Asia/Shanghai}
+    # 默认不对外暴露端口。如需从宿主机调试，取消注释：
+    # ports:
+    #   - "127.0.0.1:5433:5432"
    networks:
      - sub2api-network
    healthcheck:
@ -149,10 +157,7 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
-      start_period: 10s
-    # 默认不对外暴露，如需本地调试取消注释
-    # ports:
-    #   - "127.0.0.1:5433:5432"
+      start_period: 15s

  # ===========================================================================
  # Redis Cache
@ -168,41 +173,80 @@ services:
    volumes:
      - redis_data:/data
    command: >
-      sh -c '
-        redis-server
-        --save 60 1
-        --appendonly yes
-        --appendfsync everysec
-        ${REDIS_PASSWORD:+--requirepass "$REDIS_PASSWORD"}'
+      sh -c 'if [ -n "$$REDIS_PASSWORD" ]; then
+        exec redis-server --requirepass "$$REDIS_PASSWORD" --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru;
+      else
+        exec redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru;
+      fi'
    environment:
-      - TZ=${TZ:-Asia/Shanghai}
-      - REDISCLI_AUTH=${REDIS_PASSWORD:-}
+      - REDIS_PASSWORD=${REDIS_PASSWORD:-}
+    # 默认不对外暴露端口。如需调试取消注释:
+    # ports:
+    #   - "127.0.0.1:6380:6379"
    networks:
      - sub2api-network
    healthcheck:
-      test: [ "CMD", "redis-cli", "ping" ]
+      test: [ "CMD-SHELL", "if [ -n \"$$REDIS_PASSWORD\" ]; then redis-cli -a \"$$REDIS_PASSWORD\" ping; else redis-cli ping; fi" ]
      interval: 10s
      timeout: 5s
      retries: 5
-      start_period: 5s
-    # 默认不对外暴露，如需本地调试取消注释
+      start_period: 10s
+
+  # ===========================================================================
+  # Windsurf Language Server (可选)
+  # ---------------------------------------------------------------------------
+  # 启用方式: docker compose --profile windsurf up -d
+  #
+  # 架构说明:
+  #   - LS 本体只能绑 127.0.0.1:LS_INTERNAL_PORT（CSRF 仅允许 loopback peer）。
+  #   - 容器内 socat 把 0.0.0.0:42099 → 127.0.0.1:42099，
+  #     使 compose 同网络内的服务可直接通过 `windsurf-ls:42099` 访问。
+  #
+  # 架构限制: 官方 LS 二进制仅提供 linux/amd64 & linux/arm64；非匹配平台通过 QEMU 模拟。
+  # 资源建议: CPU 1-2 cores, Memory 512MB - 1GB
+  # ===========================================================================
+  windsurf-ls:
+    image: docker.io/zfc931912343/sub2api-windsurf-ls:latest
+    container_name: sub2api-windsurf-ls
+    restart: unless-stopped
+    profiles: [ "windsurf" ]
+    volumes:
+      - windsurf_ls_data:/data
+    environment:
+      # Dockerfile.ls 的 ENTRYPOINT 消费这些键，不要改名
+      # LS_PORT         : 容器对外暴露端口（socat 监听）
+      # LS_INTERNAL_PORT: LS 本体监听端口（socat 转发目的端口）；两者必须不同
+      - LS_PORT=42099
+      - LS_INTERNAL_PORT=42098
+      - LS_CSRF_TOKEN=${WINDSURF_DOCKER_CSRF_TOKEN:?WINDSURF_DOCKER_CSRF_TOKEN is required when windsurf profile is enabled}
+      - LS_API_SERVER_URL=${LS_API_SERVER_URL:-https://server.self-serve.windsurf.com}
+      - HTTPS_PROXY=${LS_HTTPS_PROXY:-}
+      - HTTP_PROXY=${LS_HTTP_PROXY:-}
+      - TZ=${TZ:-Asia/Shanghai}
+    # 默认不对外暴露端口，仅通过内部网络通信
+    # 如需从宿主机调试，取消注释:
    # ports:
-    #   - "127.0.0.1:6380:6379"
+    #   - "127.0.0.1:42099:42099"
+    networks:
+      - sub2api-network
+    healthcheck:
+      test: [ "CMD", "nc", "-z", "127.0.0.1", "42099" ]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+      start_period: 20s

-# =============================================================================
-# Volumes
-# =============================================================================
-volumes:
-  sub2api_data:
-    driver: local
-  postgres_data:
-    driver: local
-  redis_data:
-    driver: local
-
-# =============================================================================
-# Networks
-# =============================================================================
 networks:
  sub2api-network:
+    name: sub2api-network
    driver: bridge
+
+volumes:
+  sub2api_data:
+    name: sub2api_data
+  postgres_data:
+    name: sub2api_postgres_data
+  redis_data:
+    name: sub2api_redis_data
+  windsurf_ls_data:
+    name: sub2api_windsurf_ls_data