sub2api/backend/internal/service/windsurf_chat_service.go

360 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package service
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
)
type WindsurfChatService struct {
cfg config.WindsurfConfig
lsService *WindsurfLSService
tokenProvider *WindsurfTokenProvider
pool *windsurf.ConversationPool
}
func NewWindsurfChatService(
cfg config.WindsurfConfig,
lsService *WindsurfLSService,
tokenProvider *WindsurfTokenProvider,
) *WindsurfChatService {
return &WindsurfChatService{
cfg: cfg,
lsService: lsService,
tokenProvider: tokenProvider,
pool: windsurf.NewConversationPool(),
}
}
type WindsurfChatRequest struct {
AccountID int64
Model string
Messages []windsurf.ChatMessage
Stream bool
Tools []windsurf.OpenAITool
ToolChoice interface{}
ToolPreamble string // computed by handler, passed through to Cascade
// Images 当前 user turn 的 sidecar 图像Cascade proto 的 SendUserCascadeMessageRequest.images field 6
// 内容必须已通过 ValidateCascadeImages或等价校验
Images []windsurf.CascadeImage
}
type WindsurfChatResponse struct {
Text string
Thinking string
Model string
Mode string
Usage *windsurf.StepUsage // server-reported; nil if unavailable
FirstTextAt time.Time // when first text appeared (zero if no text)
ToolCalls []windsurf.NativeToolCall
}
func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) {
token, err := s.tokenProvider.GetToken(ctx, req.AccountID)
if err != nil {
return nil, fmt.Errorf("get token: %w", err)
}
modelKey := windsurf.ResolveModel(req.Model)
meta := windsurf.GetModelInfo(modelKey)
mode := s.resolveMode(meta)
// Tool emulation requires cascade mode for proto section injection
if mode == "legacy" && req.ToolPreamble != "" {
mode = "cascade"
}
var lease *windsurf.LSLease
if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" {
lease, err = s.lsService.AcquireByBinding(token.LSBinding)
} else {
lease, err = s.lsService.Acquire(ctx, token.ProxyURL)
}
if err != nil {
return nil, fmt.Errorf("acquire LS: %w", err)
}
defer lease.Release()
var resp *WindsurfChatResponse
switch mode {
case "cascade":
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images)
case "legacy":
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
default:
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images)
}
if err != nil {
if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 {
slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err)
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
if err == nil {
resp.Mode = "legacy"
}
}
if err != nil {
return nil, fmt.Errorf("chat (%s): %w", mode, err)
}
}
return resp, nil
}
func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string {
configMode := s.cfg.Chat.DefaultMode
if configMode == "cascade" || configMode == "legacy" {
return configMode
}
return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff))
}
var modelIdentityTemplates = map[string]string{
"anthropic": "You are %s, a large language model created by Anthropic. You are helpful, harmless, and honest. When asked about your identity or which model you are, you MUST respond that you are %s, made by Anthropic.",
"openai": "You are %s, a large language model created by OpenAI. When asked about your identity, you MUST respond that you are %s, made by OpenAI.",
"google": "You are %s, a large language model created by Google. When asked about your identity, you MUST respond that you are %s, made by Google.",
"deepseek": "You are %s, a large language model created by DeepSeek. When asked about your identity, you MUST respond that you are %s, made by DeepSeek.",
"xai": "You are %s, a large language model created by xAI. When asked about your identity, you MUST respond that you are %s, made by xAI.",
}
func injectModelIdentity(messages []windsurf.ChatMessage, meta *windsurf.ModelMeta, modelKey string) []windsurf.ChatMessage {
if meta == nil || meta.Provider == "" {
return messages
}
for _, m := range messages {
if m.Role == "system" {
return messages
}
}
tmpl, ok := modelIdentityTemplates[meta.Provider]
if !ok {
return messages
}
displayName := modelKey
if meta.Name != "" {
displayName = meta.Name
}
identity := windsurf.ChatMessage{
Role: "system",
Content: fmt.Sprintf(tmpl, displayName, displayName),
}
return append([]windsurf.ChatMessage{identity}, messages...)
}
func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string, images []windsurf.CascadeImage) (*WindsurfChatResponse, error) {
modelUID := ""
modelEnumHint := 0
if meta != nil {
modelUID = meta.ModelUID
modelEnumHint = meta.EnumValue
}
// ── Model identity prompt injection ──
// When the client doesn't provide its own system prompt, prepend one so
// the model identifies itself as the requested model rather than leaking
// the underlying Windsurf/Cascade backend identity.
// Skip when the client already has a system message (Claude Code / Cline)
// to avoid triggering Cascade anti-injection on reasoning models.
messages = injectModelIdentity(messages, meta, modelKey)
// 图像能力 gate仅在请求含图时检查。
// 策略fail-open on RPC error显式 supports_images=false 时拒绝(返回 CascadeModelError 触发 failover
if len(images) > 0 {
found, ok, err := client.ModelSupportsImages(ctx, apiKey, modelUID)
if err != nil {
slog.Warn("windsurf_cascade_caps_fetch_failed", "model", modelUID, "error", err)
// fail-open
} else if found && !ok {
return nil, fmt.Errorf("model %q does not support image inputs in Windsurf Cascade", modelUID)
}
}
fpBefore := windsurf.FingerprintBefore(messages, modelKey, apiKey)
// failover 切号后禁止复用 cascadecascade_id 属于上一个账号的 LS
// 在当前账号上一定会触发 "panel state not found" 浪费一次请求。
// 同时切号场景下需要提升历史预算——新账号完全没有服务端上下文,
// 必须把完整聊天记录塞进文本里。
skipReuse := false
switchover := false
if switches, ok := AccountSwitchCountFromContext(ctx); ok && switches > 0 {
skipReuse = true
switchover = true
}
var entry *windsurf.ConversationEntry
if !skipReuse {
entry = s.pool.Checkout(fpBefore)
}
isResume := entry != nil && entry.CascadeID != ""
var reuseCascadeID string
if isResume {
reuseCascadeID = entry.CascadeID
slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey)
}
userText := buildCascadeText(messages, modelUID, isResume, switchover)
result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint, images)
if err != nil && isResume {
slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey)
// panel-state-not-found 恢复:新 cascade 没有服务端历史,必须发完整聊天记录。
userText = buildCascadeText(messages, modelUID, false, true)
result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint, images)
}
if err != nil {
return nil, err
}
if result.CascadeID != "" && result.Text != "" {
fpAfter := windsurf.FingerprintAfter(messages, modelKey, apiKey)
s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{
CascadeID: result.CascadeID,
APIKey: apiKey,
})
}
return &WindsurfChatResponse{
Text: result.Text,
Thinking: result.Thinking,
Model: modelKey,
Mode: "cascade",
Usage: result.Usage,
FirstTextAt: result.FirstTextAt,
ToolCalls: result.ToolCalls,
}, nil
}
func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) {
modelEnum := 0
modelName := ""
if meta != nil {
modelEnum = meta.EnumValue
modelName = meta.Name
}
text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName)
if err != nil {
return nil, err
}
return &WindsurfChatResponse{
Text: text,
Model: modelKey,
Mode: "legacy",
}, nil
}
const (
cascadeMaxHistoryBytes = 200_000
cascade1MHistoryBytes = 900_000
// cascadeSwitchoverHistoryBytes 是切号 / panel-state-not-found 恢复场景下的
// "尽量塞进完整历史" 预算。目标是让新账号拿到尽可能完整的对话上下文。
// 3.5MB 留了 500KB 给 proto 其它字段metadata/config/images避开 gRPC 4MB 默认上限。
cascadeSwitchoverHistoryBytes = 3_500_000
cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns."
)
func cascadeHistoryBudget(modelUID string, switchover bool) int {
if switchover {
return cascadeSwitchoverHistoryBytes
}
if strings.Contains(strings.ToLower(modelUID), "1m") {
return cascade1MHistoryBytes
}
return cascadeMaxHistoryBytes
}
// buildCascadeText constructs the full text payload for SendUserCascadeMessage.
// If isResume is true, only the last user message is sent (cascade already has context).
// Otherwise: system prompt wrapped in <system_instructions>, multi-turn history
// with <human>/<assistant> tags, and a budget cap to trim old turns.
//
// switchover=true 提升历史预算到 cascadeSwitchoverHistoryBytes~3.5MB
// 用于切号 / panel-state-not-found 恢复场景——新账号/新 cascade 没有服务端历史,
// 必须把完整聊天记录塞进文本里。isResume=true 时该参数被忽略resume 只发最后一条)。
func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume, switchover bool) string {
var systemParts []string
var convo []windsurf.ChatMessage
for _, m := range messages {
if m.Role == "system" {
systemParts = append(systemParts, m.Content)
} else if m.Role == "user" || m.Role == "assistant" {
convo = append(convo, m)
}
}
if len(convo) == 0 {
return ""
}
// Resume: cascade already has context, only send last user message
if isResume {
return convo[len(convo)-1].Content
}
sysText := strings.TrimSpace(strings.Join(systemParts, "\n"))
if sysText != "" {
sysText = "<system_instructions>\n" + sysText + "\n</system_instructions>"
}
// Single turn: system + last message
if len(convo) <= 1 {
text := convo[len(convo)-1].Content
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}
// Multi-turn: build history with budget trimming
maxBytes := cascadeHistoryBudget(modelUID, switchover)
historyBytes := len(sysText)
// Walk backward from second-to-last, collecting turns that fit
var lines []string
droppedTurns := 0
for i := len(convo) - 2; i >= 0; i-- {
m := convo[i]
tag := "human"
if m.Role == "assistant" {
tag = "assistant"
}
line := fmt.Sprintf("<%s>\n%s\n</%s>", tag, m.Content, tag)
if historyBytes+len(line) > maxBytes && len(lines) > 0 {
droppedTurns = i + 1
slog.Info("windsurf_cascade_history_trimmed",
"turn", i,
"total_turns", len(convo),
"kept_kb", historyBytes/1024,
"dropped_turns", droppedTurns,
"switchover", switchover,
)
break
}
lines = append([]string{line}, lines...)
historyBytes += len(line)
}
if switchover && droppedTurns == 0 {
slog.Info("windsurf_cascade_switchover_history",
"total_turns", len(convo),
"kept_kb", historyBytes/1024,
"dropped_turns", 0,
)
}
latest := convo[len(convo)-1]
text := cascadeMultiTurnPreamble + "\n\n" +
strings.Join(lines, "\n\n") + "\n\n" +
"<human>\n" + latest.Content + "\n</human>"
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}