package service import ( "context" "fmt" "log/slog" "strings" "time" "github.com/Wei-Shaw/sub2api/internal/config" "github.com/Wei-Shaw/sub2api/internal/pkg/windsurf" ) type WindsurfChatService struct { cfg config.WindsurfConfig lsService *WindsurfLSService tokenProvider *WindsurfTokenProvider pool *windsurf.ConversationPool } func NewWindsurfChatService( cfg config.WindsurfConfig, lsService *WindsurfLSService, tokenProvider *WindsurfTokenProvider, ) *WindsurfChatService { return &WindsurfChatService{ cfg: cfg, lsService: lsService, tokenProvider: tokenProvider, pool: windsurf.NewConversationPool(), } } type WindsurfChatRequest struct { AccountID int64 Model string Messages []windsurf.ChatMessage Stream bool Tools []windsurf.OpenAITool ToolChoice interface{} ToolPreamble string // computed by handler, passed through to Cascade } type WindsurfChatResponse struct { Text string Thinking string Model string Mode string Usage *windsurf.StepUsage // server-reported; nil if unavailable FirstTextAt time.Time // when first text appeared (zero if no text) ToolCalls []windsurf.NativeToolCall } func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) { token, err := s.tokenProvider.GetToken(ctx, req.AccountID) if err != nil { return nil, fmt.Errorf("get token: %w", err) } modelKey := windsurf.ResolveModel(req.Model) meta := windsurf.GetModelInfo(modelKey) mode := s.resolveMode(meta) // Tool emulation requires cascade mode for proto section injection if mode == "legacy" && req.ToolPreamble != "" { mode = "cascade" } var lease *windsurf.LSLease if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" { lease, err = s.lsService.AcquireByBinding(token.LSBinding) } else { lease, err = s.lsService.Acquire(ctx, token.ProxyURL) } if err != nil { return nil, fmt.Errorf("acquire LS: %w", err) } defer lease.Release() var resp *WindsurfChatResponse switch mode { case "cascade": resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint) case "legacy": resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey) default: resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint) } if err != nil { if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 { slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err) resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey) if err == nil { resp.Mode = "legacy" } } if err != nil { return nil, fmt.Errorf("chat (%s): %w", mode, err) } } return resp, nil } func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string { configMode := s.cfg.Chat.DefaultMode if configMode == "cascade" || configMode == "legacy" { return configMode } return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff)) } func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string) (*WindsurfChatResponse, error) { modelUID := "" modelEnumHint := 0 if meta != nil { modelUID = meta.ModelUID modelEnumHint = meta.EnumValue } fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey) // failover 切号后禁止复用 cascade:cascade_id 属于上一个账号的 LS, // 在当前账号上一定会触发 "panel state not found" 浪费一次请求。 skipReuse := false if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 { skipReuse = true } var entry *windsurf.ConversationEntry if !skipReuse { entry = s.pool.Checkout(fpBefore) } isResume := entry != nil && entry.CascadeID != "" var reuseCascadeID string if isResume { reuseCascadeID = entry.CascadeID slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey) } userText := buildCascadeText(messages, modelUID, isResume) result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint) if err != nil && isResume { slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey) userText = buildCascadeText(messages, modelUID, false) result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint) } if err != nil { return nil, err } if result.CascadeID != "" && result.Text != "" { fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey) s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{ CascadeID: result.CascadeID, APIKey: apiKey, }) } return &WindsurfChatResponse{ Text: result.Text, Thinking: result.Thinking, Model: modelKey, Mode: "cascade", Usage: result.Usage, FirstTextAt: result.FirstTextAt, ToolCalls: result.ToolCalls, }, nil } func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) { modelEnum := 0 modelName := "" if meta != nil { modelEnum = meta.EnumValue modelName = meta.Name } text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName) if err != nil { return nil, err } return &WindsurfChatResponse{ Text: text, Model: modelKey, Mode: "legacy", }, nil } const ( cascadeMaxHistoryBytes = 200_000 cascade1MHistoryBytes = 900_000 cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns." ) func cascadeHistoryBudget(modelUID string) int { if strings.Contains(strings.ToLower(modelUID), "1m") { return cascade1MHistoryBytes } return cascadeMaxHistoryBytes } // buildCascadeText constructs the full text payload for SendUserCascadeMessage. // If isResume is true, only the last user message is sent (cascade already has context). // Otherwise: system prompt wrapped in , multi-turn history // with / tags, and a budget cap to trim old turns. func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume bool) string { var systemParts []string var convo []windsurf.ChatMessage for _, m := range messages { if m.Role == "system" { systemParts = append(systemParts, m.Content) } else if m.Role == "user" || m.Role == "assistant" { convo = append(convo, m) } } if len(convo) == 0 { return "" } // Resume: cascade already has context, only send last user message if isResume { return convo[len(convo)-1].Content } sysText := strings.TrimSpace(strings.Join(systemParts, "\n")) if sysText != "" { sysText = "\n" + sysText + "\n" } // Single turn: system + last message if len(convo) <= 1 { text := convo[len(convo)-1].Content if sysText != "" { text = sysText + "\n\n" + text } return text } // Multi-turn: build history with budget trimming maxBytes := cascadeHistoryBudget(modelUID) historyBytes := len(sysText) // Walk backward from second-to-last, collecting turns that fit var lines []string for i := len(convo) - 2; i >= 0; i-- { m := convo[i] tag := "human" if m.Role == "assistant" { tag = "assistant" } line := fmt.Sprintf("<%s>\n%s\n", tag, m.Content, tag) if historyBytes+len(line) > maxBytes && len(lines) > 0 { slog.Info("windsurf_cascade_history_trimmed", "turn", i, "total_turns", len(convo), "kept_kb", historyBytes/1024, ) break } lines = append([]string{line}, lines...) historyBytes += len(line) } latest := convo[len(convo)-1] text := cascadeMultiTurnPreamble + "\n\n" + strings.Join(lines, "\n\n") + "\n\n" + "\n" + latest.Content + "\n" if sysText != "" { text = sysText + "\n\n" + text } return text }