sub2api/backend/internal/service/windsurf_chat_service.go
win 8b446ffef8 fix(windsurf): fix tool call for legacy-enum models + gateway logger
Three fixes:

1. Logger: windsurf_gateway_service used zap.L() (nop) instead of
   logger.L() — all gateway-level logs were silently dropped.

2. Tool mode routing: when tools are present in the request,
   force cascade mode even for legacy-enum models. Legacy mode
   ignores toolPreamble entirely, so tool calls were never injected.

3. Model enum hint: pass meta.EnumValue through to
   SendUserCascadeMessage/buildCascadeConfig as a fallback when
   modelUID-based enum resolution returns 0. Prevents 'neither
   PlanModel nor RequestedModel specified' gRPC errors.

Tested: claude-sonnet-4-6 with tool definitions returns proper
tool_use content blocks in both streaming and non-streaming modes.
Tool result round-trip verified.
2026-04-23 23:04:02 +08:00

269 lines
7.7 KiB
Go

package service
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
)
type WindsurfChatService struct {
cfg config.WindsurfConfig
lsService *WindsurfLSService
tokenProvider *WindsurfTokenProvider
pool *windsurf.ConversationPool
}
func NewWindsurfChatService(
cfg config.WindsurfConfig,
lsService *WindsurfLSService,
tokenProvider *WindsurfTokenProvider,
) *WindsurfChatService {
return &WindsurfChatService{
cfg: cfg,
lsService: lsService,
tokenProvider: tokenProvider,
pool: windsurf.NewConversationPool(),
}
}
type WindsurfChatRequest struct {
AccountID int64
Model string
Messages []windsurf.ChatMessage
Stream bool
Tools []windsurf.OpenAITool
ToolChoice interface{}
ToolPreamble string // computed by handler, passed through to Cascade
}
type WindsurfChatResponse struct {
Text string
Thinking string
Model string
Mode string
Usage *windsurf.StepUsage // server-reported; nil if unavailable
FirstTextAt time.Time // when first text appeared (zero if no text)
ToolCalls []windsurf.NativeToolCall
}
func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) {
token, err := s.tokenProvider.GetToken(ctx, req.AccountID)
if err != nil {
return nil, fmt.Errorf("get token: %w", err)
}
modelKey := windsurf.ResolveModel(req.Model)
meta := windsurf.GetModelInfo(modelKey)
mode := s.resolveMode(meta)
// Tool emulation requires cascade mode for proto section injection
if mode == "legacy" && req.ToolPreamble != "" {
mode = "cascade"
}
var lease *windsurf.LSLease
if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" {
lease, err = s.lsService.AcquireByBinding(token.LSBinding)
} else {
lease, err = s.lsService.Acquire(ctx, token.ProxyURL)
}
if err != nil {
return nil, fmt.Errorf("acquire LS: %w", err)
}
defer lease.Release()
var resp *WindsurfChatResponse
switch mode {
case "cascade":
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
case "legacy":
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
default:
resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint)
}
if err != nil {
if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 {
slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err)
resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey)
if err == nil {
resp.Mode = "legacy"
}
}
if err != nil {
return nil, fmt.Errorf("chat (%s): %w", mode, err)
}
}
return resp, nil
}
func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string {
configMode := s.cfg.Chat.DefaultMode
if configMode == "cascade" || configMode == "legacy" {
return configMode
}
return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff))
}
func (s *WindsurfChatService) chatCascade(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, toolPreamble string, modelKey string, lsEndpoint string) (*WindsurfChatResponse, error) {
modelUID := ""
modelEnumHint := 0
if meta != nil {
modelUID = meta.ModelUID
modelEnumHint = meta.EnumValue
}
fpBefore := windsurf.FingerprintBefore(messages, modelKey)
entry := s.pool.Checkout(fpBefore)
isResume := entry != nil && entry.CascadeID != ""
var reuseCascadeID string
if isResume {
reuseCascadeID = entry.CascadeID
slog.Info("windsurf_cascade_reuse_hit", "cascade_id", reuseCascadeID[:8], "model", modelKey)
}
userText := buildCascadeText(messages, modelUID, isResume)
result, err := client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, reuseCascadeID, modelEnumHint)
if err != nil && isResume {
slog.Warn("windsurf_cascade_reuse_failed", "error", err, "model", modelKey)
userText = buildCascadeText(messages, modelUID, false)
result, err = client.StreamCascadeChat(ctx, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint)
}
if err != nil {
return nil, err
}
if result.CascadeID != "" && result.Text != "" {
fpAfter := windsurf.FingerprintAfter(messages, modelKey)
s.pool.Checkin(fpAfter, &windsurf.ConversationEntry{
CascadeID: result.CascadeID,
APIKey: apiKey,
})
}
return &WindsurfChatResponse{
Text: result.Text,
Thinking: result.Thinking,
Model: modelKey,
Mode: "cascade",
Usage: result.Usage,
FirstTextAt: result.FirstTextAt,
ToolCalls: result.ToolCalls,
}, nil
}
func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string) (*WindsurfChatResponse, error) {
modelEnum := 0
modelName := ""
if meta != nil {
modelEnum = meta.EnumValue
modelName = meta.Name
}
text, err := client.StreamLegacyChat(ctx, apiKey, messages, modelEnum, modelName)
if err != nil {
return nil, err
}
return &WindsurfChatResponse{
Text: text,
Model: modelKey,
Mode: "legacy",
}, nil
}
const (
cascadeMaxHistoryBytes = 200_000
cascade1MHistoryBytes = 900_000
cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns."
)
func cascadeHistoryBudget(modelUID string) int {
if strings.Contains(strings.ToLower(modelUID), "1m") {
return cascade1MHistoryBytes
}
return cascadeMaxHistoryBytes
}
// buildCascadeText constructs the full text payload for SendUserCascadeMessage.
// If isResume is true, only the last user message is sent (cascade already has context).
// Otherwise: system prompt wrapped in <system_instructions>, multi-turn history
// with <human>/<assistant> tags, and a budget cap to trim old turns.
func buildCascadeText(messages []windsurf.ChatMessage, modelUID string, isResume bool) string {
var systemParts []string
var convo []windsurf.ChatMessage
for _, m := range messages {
if m.Role == "system" {
systemParts = append(systemParts, m.Content)
} else if m.Role == "user" || m.Role == "assistant" {
convo = append(convo, m)
}
}
if len(convo) == 0 {
return ""
}
// Resume: cascade already has context, only send last user message
if isResume {
return convo[len(convo)-1].Content
}
sysText := strings.TrimSpace(strings.Join(systemParts, "\n"))
if sysText != "" {
sysText = "<system_instructions>\n" + sysText + "\n</system_instructions>"
}
// Single turn: system + last message
if len(convo) <= 1 {
text := convo[len(convo)-1].Content
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}
// Multi-turn: build history with budget trimming
maxBytes := cascadeHistoryBudget(modelUID)
historyBytes := len(sysText)
// Walk backward from second-to-last, collecting turns that fit
var lines []string
for i := len(convo) - 2; i >= 0; i-- {
m := convo[i]
tag := "human"
if m.Role == "assistant" {
tag = "assistant"
}
line := fmt.Sprintf("<%s>\n%s\n</%s>", tag, m.Content, tag)
if historyBytes+len(line) > maxBytes && len(lines) > 0 {
slog.Info("windsurf_cascade_history_trimmed",
"turn", i,
"total_turns", len(convo),
"kept_kb", historyBytes/1024,
)
break
}
lines = append([]string{line}, lines...)
historyBytes += len(line)
}
latest := convo[len(convo)-1]
text := cascadeMultiTurnPreamble + "\n\n" +
strings.Join(lines, "\n\n") + "\n\n" +
"<human>\n" + latest.Content + "\n</human>"
if sysText != "" {
text = sysText + "\n\n" + text
}
return text
}