package service import ( "context" "fmt" "log/slog" "strings" "time" "github.com/Wei-Shaw/sub2api/internal/config" "github.com/Wei-Shaw/sub2api/internal/pkg/windsurf" ) type WindsurfChatService struct { cfg config.WindsurfConfig lsService *WindsurfLSService tokenProvider *WindsurfTokenProvider pool *windsurf.ConversationPool } func NewWindsurfChatService( cfg config.WindsurfConfig, lsService *WindsurfLSService, tokenProvider *WindsurfTokenProvider, ) *WindsurfChatService { return &WindsurfChatService{ cfg: cfg, lsService: lsService, tokenProvider: tokenProvider, pool: windsurf.NewConversationPool(), } } type WindsurfChatRequest struct { AccountID int64 Model string Messages []windsurf.ChatMessage Stream bool Tools []windsurf.OpenAITool ToolChoice interface{} ToolPreamble string // computed by handler, passed through to Cascade // Images 当前 user turn 的 sidecar 图像(Cascade proto 的 SendUserCascadeMessageRequest.images field 6)。 // 内容必须已通过 ValidateCascadeImages(或等价校验)。 Images []windsurf.CascadeImage } type WindsurfChatResponse struct { Text string Thinking string Model string Mode string Usage *windsurf.StepUsage // server-reported; nil if unavailable FirstTextAt time.Time // when first text appeared (zero if no text) ToolCalls []windsurf.NativeToolCall } func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) { token, err := s.tokenProvider.GetToken(ctx, req.AccountID) if err != nil { return nil, fmt.Errorf("get token: %w", err) } modelKey := windsurf.ResolveModel(req.Model) meta := windsurf.GetModelInfo(modelKey) mode := s.resolveMode(meta) // Tool emulation requires cascade mode for proto section injection if mode == "legacy" && req.ToolPreamble != "" { mode = "cascade" } var lease *windsurf.LSLease if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" { lease, err = s.lsService.AcquireByBinding(token.LSBinding) } else { lease, err = s.lsService.Acquire(ctx, token.ProxyURL) } if err != nil { return nil, fmt.Errorf("acquire LS: %w", err) } defer lease.Release() var resp *WindsurfChatResponse switch mode { case "cascade": resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images) case "legacy": resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey) default: resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images) } if err != nil { if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 { slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err) resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey) if err == nil { resp.Mode = "legacy" } } if err != nil { return nil, fmt.Errorf("chat (%s): %w", mode, err) } } return resp, nil } func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string { configMode := s.cfg.Chat.DefaultMode if configMode == "cascade" || configMode == "legacy" { return configMode } return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff)) } var modelIdentityTemplates = map[string]string{ "anthropic": "You are %s, a large language model created by Anthropic. You are helpful, harmless, and honest. When asked about your identity or which model you are, you MUST respond that you are %s, made by Anthropic.", "openai": "You are %s, a large language model created by OpenAI. When asked about your identity, you MUST respond that you are %s, made by OpenAI.", "google": "You are %s, a large language model created by Google. When asked about your identity, you MUST respond that you are %s, made by Google.", "deepseek": "You are %s, a large language model created by DeepSeek. When asked about your identity, you MUST respond that you are %s, made by DeepSeek.", "xai": "You are %s, a large language model created by xAI. When asked about your identity, you MUST respond that you are %s, made by xAI.", } func injectModelIdentity(messages []windsurf.ChatMessage, meta *windsurf.ModelMeta, modelKey string) []windsurf.ChatMessage { if meta == nil || meta.Provider == "" { return messages } for _, m := range messages { if m.Role == "system" { return messages } } tmpl, ok := modelIdentityTemplates[meta.Provider] if !ok { return messages } displayName := modelKey if meta.Name != "" { displayName = meta.Name } identity := windsurf.ChatMessage{ Role: "system", Content: fmt.Sprintf(tmpl, displayName, displayName), } return append([]windsurf.ChatMessage{identity}, messages...) } func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string, images []windsurf.CascadeImage) (*WindsurfChatResponse, error) { modelUID := "" modelEnumHint := 0 if meta != nil { modelUID = meta.ModelUID modelEnumHint = meta.EnumValue } // ── Model identity prompt injection ── // When the client doesn't provide its own system prompt, prepend one so // the model identifies itself as the requested model rather than leaking // the underlying Windsurf/Cascade backend identity. // Skip when the client already has a system message (Claude Code / Cline) // to avoid triggering Cascade anti-injection on reasoning models. messages = injectModelIdentity(messages, meta, modelKey) // 图像能力 gate:仅在请求含图时检查。 // 策略:fail-open on RPC error;显式 supports_images=false 时拒绝(返回 CascadeModelError 触发 failover)。 if len(images) > 0 { found, ok, err := client.ModelSupportsImages(ctx, apiKey, modelUID) if err != nil { slog.Warn("windsurf_cascade_caps_fetch_failed", "model", modelUID, "error", err) // fail-open } else if found && !ok { return nil, fmt.Errorf("model %q does not support image inputs in Windsurf Cascade", modelUID) } } fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey) // failover 切号后禁止复用 cascade:cascade_id 属于上一个账号的 LS, // 在当前账号上一定会触发 "panel state not found" 浪费一次请求。 // 同时切号场景下需要提升历史预算——新账号完全没有服务端上下文, // 必须把完整聊天记录塞进文本里。 skipReuse := false switchover := false if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 { skipReuse = true switchover = true } var entry *windsurf.ConversationEntry if !skipReuse { entry = s.pool.Checkout(fpBefore) } isResume := entry != nil && entry.CascadeID != "" var reuseCascadeID string if isResume { reuseCascadeID = entry.CascadeID slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey) } userText := buildCascadeText(messages, modelUID, isResume, switchover) result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint, images) if err != nil && isResume { slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey) // panel-state-not-found 恢复:新 cascade 没有服务端历史,必须发完整聊天记录。 userText = buildCascadeText(messages, modelUID, false, true) result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint, images) } if err != nil { return nil, err } if result.CascadeID != "" && result.Text != "" { fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey) s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{ CascadeID: result.CascadeID, APIKey: apiKey, }) } return &WindsurfChatResponse{ Text: result.Text, Thinking: result.Thinking, Model: modelKey, Mode: "cascade", Usage: result.Usage, FirstTextAt: result.FirstTextAt, ToolCalls: result.ToolCalls, }, nil } func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) { modelEnum := 0 modelName := "" if meta != nil { modelEnum = meta.EnumValue modelName = meta.Name } text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName) if err != nil { return nil, err } return &WindsurfChatResponse{ Text: text, Model: modelKey, Mode: "legacy", }, nil } const ( cascadeMaxHistoryBytes = 200_000 cascade1MHistoryBytes = 900_000 // cascadeSwitchoverHistoryBytes 是切号 / panel-state-not-found 恢复场景下的 // "尽量塞进完整历史" 预算。目标是让新账号拿到尽可能完整的对话上下文。 // 3.5MB 留了 500KB 给 proto 其它字段(metadata/config/images),避开 gRPC 4MB 默认上限。 cascadeSwitchoverHistoryBytes = 3_500_000 cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns." ) func cascadeHistoryBudget(modelUID string, switchover bool) int { if switchover { return cascadeSwitchoverHistoryBytes } if strings.Contains(strings.ToLower(modelUID), "1m") { return cascade1MHistoryBytes } return cascadeMaxHistoryBytes } // buildCascadeText constructs the full text payload for SendUserCascadeMessage. // If isResume is true, only the last user message is sent (cascade already has context). // Otherwise: system prompt wrapped in , multi-turn history // with / tags, and a budget cap to trim old turns. // // switchover=true 提升历史预算到 cascadeSwitchoverHistoryBytes(~3.5MB), // 用于切号 / panel-state-not-found 恢复场景——新账号/新 cascade 没有服务端历史, // 必须把完整聊天记录塞进文本里。isResume=true 时该参数被忽略(resume 只发最后一条)。 func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume, switchover bool) string { var systemParts []string var convo []windsurf.ChatMessage for _, m := range messages { if m.Role == "system" { systemParts = append(systemParts, m.Content) } else if m.Role == "user" || m.Role == "assistant" { convo = append(convo, m) } } if len(convo) == 0 { return "" } // Resume: cascade already has context, only send last user message if isResume { return convo[len(convo)-1].Content } sysText := strings.TrimSpace(strings.Join(systemParts, "\n")) if sysText != "" { sysText = "\n" + sysText + "\n" } // Single turn: system + last message if len(convo) <= 1 { text := convo[len(convo)-1].Content if sysText != "" { text = sysText + "\n\n" + text } return text } // Multi-turn: build history with budget trimming maxBytes := cascadeHistoryBudget(modelUID, switchover) historyBytes := len(sysText) // Walk backward from second-to-last, collecting turns that fit var lines []string droppedTurns := 0 for i := len(convo) - 2; i >= 0; i-- { m := convo[i] tag := "human" if m.Role == "assistant" { tag = "assistant" } line := fmt.Sprintf("<%s>\n%s\n", tag, m.Content, tag) if historyBytes+len(line) > maxBytes && len(lines) > 0 { droppedTurns = i + 1 slog.Info("windsurf_cascade_history_trimmed", "turn", i, "total_turns", len(convo), "kept_kb", historyBytes/1024, "dropped_turns", droppedTurns, "switchover", switchover, ) break } lines = append([]string{line}, lines...) historyBytes += len(line) } if switchover && droppedTurns == 0 { slog.Info("windsurf_cascade_switchover_history", "total_turns", len(convo), "kept_kb", historyBytes/1024, "dropped_turns", 0, ) } latest := convo[len(convo)-1] text := cascadeMultiTurnPreamble + "\n\n" + strings.Join(lines, "\n\n") + "\n\n" + "\n" + latest.Content + "\n" if sysText != "" { text = sysText + "\n\n" + text } return text }