package service import ( "context" "crypto/sha256" "encoding/hex" "fmt" "log/slog" "os" "strings" "sync" "time" "github.com/Wei-Shaw/sub2api/internal/config" "github.com/Wei-Shaw/sub2api/internal/pkg/windsurf" ) // cascadeReuseTTL 是 Windsurf Cascade 会话 ID 在 Redis 中的存活时长。 // 与 LS 端 cascade 老化窗口对齐:超过 30 分钟即使本地 cache 命中, // LS 也大概率返回 panel-not-found,由 chatCascade 的回退路径兜底。 const cascadeReuseTTL = 30 * time.Minute type WindsurfChatService struct { cfg config.WindsurfConfig lsService *WindsurfLSService tokenProvider *WindsurfTokenProvider cache GatewayCache } func NewWindsurfChatService( cfg config.WindsurfConfig, lsService *WindsurfLSService, tokenProvider *WindsurfTokenProvider, cache GatewayCache, ) *WindsurfChatService { return &WindsurfChatService{ cfg: cfg, lsService: lsService, tokenProvider: tokenProvider, cache: cache, } } type WindsurfChatRequest struct { AccountID int64 // GroupID 来自 apiKey.GroupID,用于 Cascade 复用 cache 隔离。0 表示不参与缓存。 GroupID int64 // SessionHash 与 sticky session 共用,标识同一对话流。空串表示不复用 cascade。 SessionHash string Model string Messages []windsurf.ChatMessage Stream bool Tools []windsurf.OpenAITool ToolChoice interface{} ToolPreamble string // computed by handler, passed through to Cascade // Images 当前 user turn 的 sidecar 图像(Cascade proto 的 SendUserCascadeMessageRequest.images field 6)。 // 内容必须已通过 ValidateCascadeImages(或等价校验)。 Images []windsurf.CascadeImage } type WindsurfChatResponse struct { Text string Thinking string Model string Mode string Usage *windsurf.StepUsage // server-reported; nil if unavailable FirstTextAt time.Time // when first text appeared (zero if no text) ToolCalls []windsurf.NativeToolCall } func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) { token, err := s.tokenProvider.GetToken(ctx, req.AccountID) if err != nil { return nil, fmt.Errorf("get token: %w", err) } modelKey := windsurf.ResolveModel(req.Model) meta := windsurf.GetModelInfo(modelKey) mode := s.resolveMode(meta) // Tool emulation requires cascade mode for proto section injection if mode == "legacy" && req.ToolPreamble != "" { mode = "cascade" } var lease *windsurf.LSLease if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" { lease, err = s.lsService.AcquireByBinding(token.LSBinding) } else { lease, err = s.lsService.Acquire(ctx, token.ProxyURL) } if err != nil { return nil, fmt.Errorf("acquire LS: %w", err) } defer lease.Release() var resp *WindsurfChatResponse switch mode { case "cascade": resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images, req.AccountID, req.GroupID, req.SessionHash) case "legacy": resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey, req.AccountID) default: resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images, req.AccountID, req.GroupID, req.SessionHash) } if err != nil { if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 { slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err) resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey, req.AccountID) if err == nil { resp.Mode = "legacy" } } if err != nil { return nil, fmt.Errorf("chat (%s): %w", mode, err) } } return resp, nil } func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string { configMode := s.cfg.Chat.DefaultMode if configMode == "cascade" || configMode == "legacy" { return configMode } return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff)) } var modelIdentityTemplates = map[string]string{ "anthropic": "You are %s, a large language model created by Anthropic. You are helpful, harmless, and honest. When asked about your identity or which model you are, you MUST respond that you are %s, made by Anthropic.", "openai": "You are %s, a large language model created by OpenAI. When asked about your identity, you MUST respond that you are %s, made by OpenAI.", "google": "You are %s, a large language model created by Google. When asked about your identity, you MUST respond that you are %s, made by Google.", "deepseek": "You are %s, a large language model created by DeepSeek. When asked about your identity, you MUST respond that you are %s, made by DeepSeek.", "xai": "You are %s, a large language model created by xAI. When asked about your identity, you MUST respond that you are %s, made by xAI.", } func injectModelIdentity(messages []windsurf.ChatMessage, meta *windsurf.ModelMeta, modelKey string) []windsurf.ChatMessage { if meta == nil || meta.Provider == "" { return messages } for _, m := range messages { if m.Role == "system" { return messages } } tmpl, ok := modelIdentityTemplates[meta.Provider] if !ok { return messages } displayName := modelKey if meta.Name != "" { displayName = meta.Name } identity := windsurf.ChatMessage{ Role: "system", Content: fmt.Sprintf(tmpl, displayName, displayName), } return append([]windsurf.ChatMessage{identity}, messages...) } func (s *WindsurfChatService) chatCascade( ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string, images []windsurf.CascadeImage, accountID int64, groupID int64, sessionHash string, ) (*WindsurfChatResponse, error) { modelUID := "" modelEnumHint := 0 if meta != nil { modelUID = meta.ModelUID modelEnumHint = meta.EnumValue } messages = injectModelIdentity(messages, meta, modelKey) if len(images) > 0 { found, ok, err := client.ModelSupportsImagesForAccount(ctx, accountID, apiKey, modelUID) if err != nil { slog.Warn("windsurf_cascade_caps_fetch_failed", "model", modelUID, "error", err) } else if found && !ok { return nil, fmt.Errorf("model %q does not support image inputs in Windsurf Cascade", modelUID) } } // reuse 路径:sessionHash 非空且 cache 命中即复用 cascade, // userText 缩为"system + 最后一条 user"——cascade trajectory 已承载历史。 // tool_response 轮必须携带 full history;否则模型只看到工具输出,容易把 // 大段 diff/stdout 误当成用户新请求。 canReuse := sessionHash != "" && s.cache != nil && groupID != 0 && !lastUserIsToolContinuation(messages) cacheKey := "" reuseID := "" if canReuse { cacheKey = buildCascadeCacheKey(groupID, accountID, modelUID, lsEndpoint, sessionHash, sysPromptHash(messages)) if id, err := s.cache.GetCascadeID(ctx, cacheKey); err == nil && id != "" { reuseID = id } else if err != nil { slog.Warn("windsurf_cascade_cache_get_failed", "error", err) } } var userText string if reuseID != "" { userText = buildCascadeTextForReuse(messages) } else { userText = buildCascadeText(messages, modelUID) } result, err := client.StreamCascadeChatForAccount(ctx, accountID, apiKey, modelUID, userText, toolPreamble, reuseID, modelEnumHint, images) // reuse 触发 panel-not-found:清缓存 + 用 full-history 重试一次。 if err != nil && reuseID != "" && isPanelNotFound(err) { slog.Info("windsurf_cascade_reuse_invalidated", "cascade_id", reuseID, "reason", "panel_not_found") if cacheKey != "" { _ = s.cache.DeleteCascadeID(ctx, cacheKey) } userText = buildCascadeText(messages, modelUID) result, err = client.StreamCascadeChatForAccount(ctx, accountID, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint, images) } if err != nil { // 任何错误都失效缓存(保守策略,避免下次复用到坏 cascade)。 if canReuse && cacheKey != "" { _ = s.cache.DeleteCascadeID(ctx, cacheKey) } return nil, err } // 成功:写回 cache。 if canReuse && cacheKey != "" && result.CascadeID != "" { if setErr := s.cache.SetCascadeID(ctx, cacheKey, result.CascadeID, cascadeReuseTTL); setErr != nil { slog.Warn("windsurf_cascade_cache_set_failed", "error", setErr) } } toolCalls := result.ToolCalls if len(toolCalls) == 0 && shouldRunNLUFallback(meta, result.Text) { if nluCalls := windsurf.ExtractToolCallsNLU(result.Text, availableToolNames(toolPreamble)); len(nluCalls) > 0 { slog.Info("windsurf_cascade_nlu_fallback", "model", modelKey, "calls", len(nluCalls), "text_chars", len(result.Text)) toolCalls = make([]windsurf.NativeToolCall, 0, len(nluCalls)) for _, c := range nluCalls { toolCalls = append(toolCalls, windsurf.NativeToolCall{ ID: c.ID, Name: c.Name, ArgumentsJSON: c.ArgumentsJSON, }) } } } return &WindsurfChatResponse{ Text: result.Text, Thinking: result.Thinking, Model: modelKey, Mode: "cascade", Usage: result.Usage, FirstTextAt: result.FirstTextAt, ToolCalls: toolCalls, }, nil } // shouldRunNLUFallback decides whether to spend CPU on the NLU extractor. // Two cases run the fallback: (a) model is explicitly NLU-flavored — // always extract, even when no obvious signal, since these models routinely // emit half-broken intent; (b) flavor is auto and the text shows NLU // signals. tool_use flavored models (Claude family) are NEVER run through // NLU because their tags are reliable and an erroneous fallback would // invent calls the user did not request. Setting WINDSURF_NLU_FALLBACK_DISABLED=1 // short-circuits all of the above. func shouldRunNLUFallback(meta *windsurf.ModelMeta, text string) bool { if text == "" { return false } if isNLUFallbackDisabled() { return false } flavor := windsurf.ResolveEmulationFlavor(meta) switch flavor { case windsurf.EmulationFlavorNLU: return true case windsurf.EmulationFlavorAuto: return windsurf.HasNLUSignal(text) default: return false } } // availableToolNames extracts tool names from the cascade tool preamble for // validation in the NLU extractor. Format heuristic: lines like // "- TOOL_NAME(...)" or "name: TOOL_NAME". Returns nil when nothing parses // — extractor still works in best-effort mode. func availableToolNames(preamble string) []string { if preamble == "" { return nil } var names []string seen := make(map[string]struct{}) for _, raw := range strings.Split(preamble, "\n") { line := strings.TrimSpace(raw) if line == "" { continue } if name := extractToolNameFromPreambleLine(line); name != "" { if _, dup := seen[name]; !dup { seen[name] = struct{}{} names = append(names, name) } } } return names } func extractToolNameFromPreambleLine(line string) string { trim := strings.TrimLeft(line, "-* \t") if trim == "" { return "" } // "name: foo" form if strings.HasPrefix(strings.ToLower(trim), "name:") { return strings.TrimSpace(trim[len("name:"):]) } // "foo(args)" form — take identifier before "(". if idx := strings.IndexByte(trim, '('); idx > 0 { candidate := strings.TrimSpace(trim[:idx]) if isIdentifier(candidate) { return candidate } } return "" } func isIdentifier(s string) bool { if s == "" { return false } for i, r := range s { switch { case r == '_': case r >= 'a' && r <= 'z': case r >= 'A' && r <= 'Z': case r >= '0' && r <= '9' && i > 0: default: return false } } return true } // nluFallbackDisabledFn resolves the NLU-fallback kill switch. Tests may // replace it via withNLUFallbackDisabledFn to assert behaviour. Production // uses readNLUFallbackDisabledEnvOnce, which reads the env var exactly once. var nluFallbackDisabledFn = readNLUFallbackDisabledEnvOnce func isNLUFallbackDisabled() bool { return nluFallbackDisabledFn() } var ( nluFallbackDisabledCachedOnce sync.Once nluFallbackDisabledCached bool ) func readNLUFallbackDisabledEnvOnce() bool { nluFallbackDisabledCachedOnce.Do(func() { v := strings.ToLower(strings.TrimSpace(os.Getenv("WINDSURF_NLU_FALLBACK_DISABLED"))) nluFallbackDisabledCached = v == "1" || v == "true" || v == "yes" || v == "on" }) return nluFallbackDisabledCached } // buildCascadeCacheKey 构造 Cascade 复用 cache 的 key。 // 任一组件变化(账号、模型、LS 实例、会话、system prompt)都会自动 cache miss。 func buildCascadeCacheKey(groupID, accountID int64, modelUID, lsEndpoint, sessionHash, sysHash string) string { h := sha256.New() fmt.Fprintf(h, "%d|%d|%s|%s|%s|%s", groupID, accountID, modelUID, lsEndpoint, sessionHash, sysHash) return hex.EncodeToString(h.Sum(nil))[:24] } // sysPromptHash 计算 system 消息内容的指纹。system 变化必须强制开新 cascade, // 否则旧 cascade 内已建立的"角色/约束"语境会污染新对话。 func sysPromptHash(messages []windsurf.ChatMessage) string { h := sha256.New() for _, m := range messages { if m.Role == "system" { h.Write([]byte(m.Content)) h.Write([]byte{0}) } } return hex.EncodeToString(h.Sum(nil))[:16] } // isPanelNotFound 判定 LS 端"cascade panel state not found"错误。 // 与 windsurf.LocalLSClient 内部 panel-state-not-found 检测保持一致。 func isPanelNotFound(err error) bool { if err == nil { return false } s := strings.ToLower(err.Error()) if strings.Contains(s, "panel state not found") { return true } return strings.Contains(s, "not_found") && strings.Contains(s, "panel") } // buildCascadeTextForReuse 构造 reuse 模式下的 user turn 文本: // 仅含 system instructions + 最新一条 user 消息。Cascade 内部已通过 trajectory 保留前序历史。 // 注意:cacheKey 已包含 sysPromptHash,system 变化会强制 cache miss → 走 buildCascadeText 全量路径。 func buildCascadeTextForReuse(messages []windsurf.ChatMessage) string { var sysParts []string var lastUser string for _, m := range messages { if m.Role == "system" { sysParts = append(sysParts, m.Content) } } for i := len(messages) - 1; i >= 0; i-- { if messages[i].Role == "user" { lastUser = messages[i].Content break } } sys := strings.TrimSpace(strings.Join(sysParts, "\n")) if sys != "" { return "\n" + sys + "\n\n\n" + lastUser } return lastUser } func lastUserIsToolContinuation(messages []windsurf.ChatMessage) bool { for i := len(messages) - 1; i >= 0; i-- { if messages[i].Role != "user" { continue } return looksLikeToolContinuationText(messages[i].Content) } return false } func lastUserIsToolResponse(messages []windsurf.ChatMessage) bool { return lastUserIsToolContinuation(messages) } func looksLikeToolContinuationText(content string) bool { trimmed := strings.TrimSpace(content) if trimmed == "" { return false } lower := strings.ToLower(trimmed) switch { case strings.Contains(lower, ", multi-turn history uses // / tags with a budget cap to trim the oldest turns. func buildCascadeText(messages []windsurf.ChatMessage, modelUID string) string { var systemParts []string var convo []windsurf.ChatMessage for _, m := range messages { if m.Role == "system" { systemParts = append(systemParts, m.Content) } else if m.Role == "user" || m.Role == "assistant" { convo = append(convo, m) } } if len(convo) == 0 { return "" } sysText := strings.TrimSpace(strings.Join(systemParts, "\n")) if sysText != "" { sysText = "\n" + sysText + "\n" } // Single turn: system + last message if len(convo) <= 1 { text := convo[len(convo)-1].Content if sysText != "" { text = sysText + "\n\n" + text } return text } // Multi-turn: build history with budget trimming maxBytes := cascadeHistoryBudget(modelUID) historyBytes := len(sysText) var lines []string droppedTurns := 0 for i := len(convo) - 2; i >= 0; i-- { m := convo[i] tag := "human" if m.Role == "assistant" { tag = "assistant" } line := fmt.Sprintf("<%s>\n%s\n", tag, m.Content, tag) if historyBytes+len(line) > maxBytes && len(lines) > 0 { droppedTurns = i + 1 slog.Info("windsurf_cascade_history_trimmed", "turn", i, "total_turns", len(convo), "kept_kb", historyBytes/1024, "dropped_turns", droppedTurns, ) break } lines = append([]string{line}, lines...) historyBytes += len(line) } latest := convo[len(convo)-1] text := cascadeMultiTurnPreamble + "\n\n" + strings.Join(lines, "\n\n") + "\n\n" + "\n" + latest.Content + "\n" if sysText != "" { text = sysText + "\n\n" + text } return text }