sub2api/backend/internal/service/windsurf_chat_service.go
win 002066e700 chore(wip): 保存订制改动以便合并上游
- windsurf: client/pool/local_ls/tool_emulation/tool_names/models 调整
- handler: admin account_data / failover_loop / gateway_handler
- repository: scheduler_cache 及测试
- service: windsurf_chat_service / windsurf_gateway_service
- deploy: compose 合并为单文件(含 windsurf-ls profile),Dockerfile.ls
- cmd: 新增 dump_ls_models / dump_preamble / test_windsurf_tools 辅助工具
2026-04-24 11:14:36 +08:00

278 lines
8.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package service
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
)
type WindsurfChatService struct {
cfg config.WindsurfConfig
lsService *WindsurfLSService
tokenProvider *WindsurfTokenProvider
pool *windsurf.ConversationPool
}
func NewWindsurfChatService(
cfg config.WindsurfConfig,
lsService *WindsurfLSService,
tokenProvider *WindsurfTokenProvider,
) *WindsurfChatService {
return &WindsurfChatService{
cfg: cfg,
lsService: lsService,
tokenProvider: tokenProvider,
pool: windsurf.NewConversationPool(),
}
}
type WindsurfChatRequest struct {
AccountID int64
Model string
Messages []windsurf.ChatMessage
Stream bool
Tools []windsurf.OpenAITool
ToolChoice interface{}
ToolPreamble string // computed by handler, passed through to Cascade
}
type WindsurfChatResponse struct {
Text string
Thinking string
Model string
Mode string
Usage *windsurf.StepUsage // server-reported; nil if unavailable
FirstTextAt time.Time // when first text appeared (zero if no text)
ToolCalls []windsurf.NativeToolCall
}
func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) {
token, err := s.tokenProvider.GetToken(ctx, req.AccountID)
if err != nil {
return nil, fmt.Errorf("get token: %w", err)
}
modelKey := windsurf.ResolveModel(req.Model)
meta := windsurf.GetModelInfo(modelKey)
mode := s.resolveMode(meta)
// Tool emulation requires cascade mode for proto section injection
if mode == "legacy" && req.ToolPreamble != "" {
mode = "cascade"
}
var lease *windsurf.LSLease
if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" {
lease, err = s.lsService.AcquireByBinding(token.LSBinding)
} else {
lease, err = s.lsService.Acquire(ctx, token.ProxyURL)
}
if err != nil {
return nil, fmt.Errorf("acquire LS: %w", err)
}
defer lease.Release()
var resp *WindsurfChatResponse
switch mode {
case "cascade":
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
case "legacy":
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
default:
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
}
if err != nil {
if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 {
slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err)
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
if err == nil {
resp.Mode = "legacy"
}
}
if err != nil {
return nil, fmt.Errorf("chat (%s): %w", mode, err)
}
}
return resp, nil
}
func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string {
configMode := s.cfg.Chat.DefaultMode
if configMode == "cascade" || configMode == "legacy" {
return configMode
}
return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff))
}
func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string) (*WindsurfChatResponse, error) {
modelUID := ""
modelEnumHint := 0
if meta != nil {
modelUID = meta.ModelUID
modelEnumHint = meta.EnumValue
}
fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey)
// failover 切号后禁止复用 cascadecascade_id 属于上一个账号的 LS
// 在当前账号上一定会触发 "panel state not found" 浪费一次请求。
skipReuse := false
if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 {
skipReuse = true
}
var entry *windsurf.ConversationEntry
if !skipReuse {
entry = s.pool.Checkout(fpBefore)
}
isResume := entry != nil && entry.CascadeID != ""
var reuseCascadeID string
if isResume {
reuseCascadeID = entry.CascadeID
slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey)
}
userText := buildCascadeText(messages, modelUID, isResume)
result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint)
if err != nil && isResume {
slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey)
userText = buildCascadeText(messages, modelUID, false)
result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint)
}
if err != nil {
return nil, err
}
if result.CascadeID != "" && result.Text != "" {
fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey)
s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{
CascadeID: result.CascadeID,
APIKey: apiKey,
})
}
return &WindsurfChatResponse{
Text: result.Text,
Thinking: result.Thinking,
Model: modelKey,
Mode: "cascade",
Usage: result.Usage,
FirstTextAt: result.FirstTextAt,
ToolCalls: result.ToolCalls,
}, nil
}
func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) {
modelEnum := 0
modelName := ""
if meta != nil {
modelEnum = meta.EnumValue
modelName = meta.Name
}
text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName)
if err != nil {
return nil, err
}
return &WindsurfChatResponse{
Text: text,
Model: modelKey,
Mode: "legacy",
}, nil
}
const (
cascadeMaxHistoryBytes = 200_000
cascade1MHistoryBytes = 900_000
cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns."
)
func cascadeHistoryBudget(modelUID string) int {
if strings.Contains(strings.ToLower(modelUID), "1m") {
return cascade1MHistoryBytes
}
return cascadeMaxHistoryBytes
}
// buildCascadeText constructs the full text payload for SendUserCascadeMessage.
// If isResume is true, only the last user message is sent (cascade already has context).
// Otherwise: system prompt wrapped in <system_instructions>, multi-turn history
// with <human>/<assistant> tags, and a budget cap to trim old turns.
func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume bool) string {
var systemParts []string
var convo []windsurf.ChatMessage
for _, m := range messages {
if m.Role == "system" {
systemParts = append(systemParts, m.Content)
} else if m.Role == "user" || m.Role == "assistant" {
convo = append(convo, m)
}
}
if len(convo) == 0 {
return ""
}
// Resume: cascade already has context, only send last user message
if isResume {
return convo[len(convo)-1].Content
}
sysText := strings.TrimSpace(strings.Join(systemParts, "\n"))
if sysText != "" {
sysText = "<system_instructions>\n" + sysText + "\n</system_instructions>"
}
// Single turn: system + last message
if len(convo) <= 1 {
text := convo[len(convo)-1].Content
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}
// Multi-turn: build history with budget trimming
maxBytes := cascadeHistoryBudget(modelUID)
historyBytes := len(sysText)
// Walk backward from second-to-last, collecting turns that fit
var lines []string
for i := len(convo) - 2; i >= 0; i-- {
m := convo[i]
tag := "human"
if m.Role == "assistant" {
tag = "assistant"
}
line := fmt.Sprintf("<%s>\n%s\n</%s>", tag, m.Content, tag)
if historyBytes+len(line) > maxBytes && len(lines) > 0 {
slog.Info("windsurf_cascade_history_trimmed",
"turn", i,
"total_turns", len(convo),
"kept_kb", historyBytes/1024,
)
break
}
lines = append([]string{line}, lines...)
historyBytes += len(line)
}
latest := convo[len(convo)-1]
text := cascadeMultiTurnPreamble + "\n\n" +
strings.Join(lines, "\n\n") + "\n\n" +
"<human>\n" + latest.Content + "\n</human>"
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}