- windsurf: client/pool/local_ls/tool_emulation/tool_names/models 调整 - handler: admin account_data / failover_loop / gateway_handler - repository: scheduler_cache 及测试 - service: windsurf_chat_service / windsurf_gateway_service - deploy: compose 合并为单文件(含 windsurf-ls profile),Dockerfile.ls - cmd: 新增 dump_ls_models / dump_preamble / test_windsurf_tools 辅助工具
278 lines
8.1 KiB
Go
278 lines
8.1 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"log/slog"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/Wei-Shaw/sub2api/internal/config"
|
||
"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
|
||
)
|
||
|
||
type WindsurfChatService struct {
|
||
cfg config.WindsurfConfig
|
||
lsService *WindsurfLSService
|
||
tokenProvider *WindsurfTokenProvider
|
||
pool *windsurf.ConversationPool
|
||
}
|
||
|
||
func NewWindsurfChatService(
|
||
cfg config.WindsurfConfig,
|
||
lsService *WindsurfLSService,
|
||
tokenProvider *WindsurfTokenProvider,
|
||
) *WindsurfChatService {
|
||
return &WindsurfChatService{
|
||
cfg: cfg,
|
||
lsService: lsService,
|
||
tokenProvider: tokenProvider,
|
||
pool: windsurf.NewConversationPool(),
|
||
}
|
||
}
|
||
|
||
type WindsurfChatRequest struct {
|
||
AccountID int64
|
||
Model string
|
||
Messages []windsurf.ChatMessage
|
||
Stream bool
|
||
Tools []windsurf.OpenAITool
|
||
ToolChoice interface{}
|
||
ToolPreamble string // computed by handler, passed through to Cascade
|
||
}
|
||
|
||
type WindsurfChatResponse struct {
|
||
Text string
|
||
Thinking string
|
||
Model string
|
||
Mode string
|
||
Usage *windsurf.StepUsage // server-reported; nil if unavailable
|
||
FirstTextAt time.Time // when first text appeared (zero if no text)
|
||
ToolCalls []windsurf.NativeToolCall
|
||
}
|
||
|
||
func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) {
|
||
token, err := s.tokenProvider.GetToken(ctx, req.AccountID)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("get token: %w", err)
|
||
}
|
||
|
||
modelKey := windsurf.ResolveModel(req.Model)
|
||
meta := windsurf.GetModelInfo(modelKey)
|
||
|
||
mode := s.resolveMode(meta)
|
||
// Tool emulation requires cascade mode for proto section injection
|
||
if mode == "legacy" && req.ToolPreamble != "" {
|
||
mode = "cascade"
|
||
}
|
||
|
||
var lease *windsurf.LSLease
|
||
if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" {
|
||
lease, err = s.lsService.AcquireByBinding(token.LSBinding)
|
||
} else {
|
||
lease, err = s.lsService.Acquire(ctx, token.ProxyURL)
|
||
}
|
||
if err != nil {
|
||
return nil, fmt.Errorf("acquire LS: %w", err)
|
||
}
|
||
defer lease.Release()
|
||
|
||
var resp *WindsurfChatResponse
|
||
switch mode {
|
||
case "cascade":
|
||
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
|
||
case "legacy":
|
||
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
|
||
default:
|
||
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
|
||
}
|
||
|
||
if err != nil {
|
||
if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 {
|
||
slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err)
|
||
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
|
||
if err == nil {
|
||
resp.Mode = "legacy"
|
||
}
|
||
}
|
||
if err != nil {
|
||
return nil, fmt.Errorf("chat (%s): %w", mode, err)
|
||
}
|
||
}
|
||
|
||
return resp, nil
|
||
}
|
||
|
||
func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string {
|
||
configMode := s.cfg.Chat.DefaultMode
|
||
if configMode == "cascade" || configMode == "legacy" {
|
||
return configMode
|
||
}
|
||
return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff))
|
||
}
|
||
|
||
func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string) (*WindsurfChatResponse, error) {
|
||
modelUID := ""
|
||
modelEnumHint := 0
|
||
if meta != nil {
|
||
modelUID = meta.ModelUID
|
||
modelEnumHint = meta.EnumValue
|
||
}
|
||
|
||
fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey)
|
||
// failover 切号后禁止复用 cascade:cascade_id 属于上一个账号的 LS,
|
||
// 在当前账号上一定会触发 "panel state not found" 浪费一次请求。
|
||
skipReuse := false
|
||
if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 {
|
||
skipReuse = true
|
||
}
|
||
var entry *windsurf.ConversationEntry
|
||
if !skipReuse {
|
||
entry = s.pool.Checkout(fpBefore)
|
||
}
|
||
isResume := entry != nil && entry.CascadeID != ""
|
||
|
||
var reuseCascadeID string
|
||
if isResume {
|
||
reuseCascadeID = entry.CascadeID
|
||
slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey)
|
||
}
|
||
|
||
userText := buildCascadeText(messages, modelUID, isResume)
|
||
|
||
result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint)
|
||
if err != nil && isResume {
|
||
slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey)
|
||
userText = buildCascadeText(messages, modelUID, false)
|
||
result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint)
|
||
}
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
if result.CascadeID != "" && result.Text != "" {
|
||
fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey)
|
||
s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{
|
||
CascadeID: result.CascadeID,
|
||
APIKey: apiKey,
|
||
})
|
||
}
|
||
|
||
return &WindsurfChatResponse{
|
||
Text: result.Text,
|
||
Thinking: result.Thinking,
|
||
Model: modelKey,
|
||
Mode: "cascade",
|
||
Usage: result.Usage,
|
||
FirstTextAt: result.FirstTextAt,
|
||
ToolCalls: result.ToolCalls,
|
||
}, nil
|
||
}
|
||
|
||
func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) {
|
||
modelEnum := 0
|
||
modelName := ""
|
||
if meta != nil {
|
||
modelEnum = meta.EnumValue
|
||
modelName = meta.Name
|
||
}
|
||
|
||
text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return &WindsurfChatResponse{
|
||
Text: text,
|
||
Model: modelKey,
|
||
Mode: "legacy",
|
||
}, nil
|
||
}
|
||
|
||
const (
|
||
cascadeMaxHistoryBytes = 200_000
|
||
cascade1MHistoryBytes = 900_000
|
||
cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns."
|
||
)
|
||
|
||
func cascadeHistoryBudget(modelUID string) int {
|
||
if strings.Contains(strings.ToLower(modelUID), "1m") {
|
||
return cascade1MHistoryBytes
|
||
}
|
||
return cascadeMaxHistoryBytes
|
||
}
|
||
|
||
// buildCascadeText constructs the full text payload for SendUserCascadeMessage.
|
||
// If isResume is true, only the last user message is sent (cascade already has context).
|
||
// Otherwise: system prompt wrapped in <system_instructions>, multi-turn history
|
||
// with <human>/<assistant> tags, and a budget cap to trim old turns.
|
||
func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume bool) string {
|
||
var systemParts []string
|
||
var convo []windsurf.ChatMessage
|
||
|
||
for _, m := range messages {
|
||
if m.Role == "system" {
|
||
systemParts = append(systemParts, m.Content)
|
||
} else if m.Role == "user" || m.Role == "assistant" {
|
||
convo = append(convo, m)
|
||
}
|
||
}
|
||
|
||
if len(convo) == 0 {
|
||
return ""
|
||
}
|
||
|
||
// Resume: cascade already has context, only send last user message
|
||
if isResume {
|
||
return convo[len(convo)-1].Content
|
||
}
|
||
|
||
sysText := strings.TrimSpace(strings.Join(systemParts, "\n"))
|
||
if sysText != "" {
|
||
sysText = "<system_instructions>\n" + sysText + "\n</system_instructions>"
|
||
}
|
||
|
||
// Single turn: system + last message
|
||
if len(convo) <= 1 {
|
||
text := convo[len(convo)-1].Content
|
||
if sysText != "" {
|
||
text = sysText + "\n\n" + text
|
||
}
|
||
return text
|
||
}
|
||
|
||
// Multi-turn: build history with budget trimming
|
||
maxBytes := cascadeHistoryBudget(modelUID)
|
||
historyBytes := len(sysText)
|
||
|
||
// Walk backward from second-to-last, collecting turns that fit
|
||
var lines []string
|
||
for i := len(convo) - 2; i >= 0; i-- {
|
||
m := convo[i]
|
||
tag := "human"
|
||
if m.Role == "assistant" {
|
||
tag = "assistant"
|
||
}
|
||
line := fmt.Sprintf("<%s>\n%s\n</%s>", tag, m.Content, tag)
|
||
if historyBytes+len(line) > maxBytes && len(lines) > 0 {
|
||
slog.Info("windsurf_cascade_history_trimmed",
|
||
"turn", i,
|
||
"total_turns", len(convo),
|
||
"kept_kb", historyBytes/1024,
|
||
)
|
||
break
|
||
}
|
||
lines = append([]string{line}, lines...)
|
||
historyBytes += len(line)
|
||
}
|
||
|
||
latest := convo[len(convo)-1]
|
||
text := cascadeMultiTurnPreamble + "\n\n" +
|
||
strings.Join(lines, "\n\n") + "\n\n" +
|
||
"<human>\n" + latest.Content + "\n</human>"
|
||
|
||
if sysText != "" {
|
||
text = sysText + "\n\n" + text
|
||
}
|
||
return text
|
||
}
|