sub2api/backend/internal/service/windsurf_chat_service.go

package service

import (
	"context"
	"crypto/sha256"
	"encoding/hex"
	"fmt"
	"log/slog"
	"os"
	"strings"
	"sync"
	"time"

	"github.com/Wei-Shaw/sub2api/internal/config"
	"github.com/Wei-Shaw/sub2api/internal/pkg/windsurf"
)

// cascadeReuseTTL 是 Windsurf Cascade 会话 ID 在 Redis 中的存活时长。
// 与 LS 端 cascade 老化窗口对齐：超过 30 分钟即使本地 cache 命中，
// LS 也大概率返回 panel-not-found，由 chatCascade 的回退路径兜底。
const cascadeReuseTTL = 30 * time.Minute

type WindsurfChatService struct {
	cfg           config.WindsurfConfig
	lsService     *WindsurfLSService
	tokenProvider *WindsurfTokenProvider
	cache         GatewayCache
}

func NewWindsurfChatService(
	cfg config.WindsurfConfig,
	lsService *WindsurfLSService,
	tokenProvider *WindsurfTokenProvider,
	cache GatewayCache,
) *WindsurfChatService {
	return &WindsurfChatService{
		cfg:           cfg,
		lsService:     lsService,
		tokenProvider: tokenProvider,
		cache:         cache,
	}
}

type WindsurfChatRequest struct {
	AccountID int64
	// GroupID 来自 apiKey.GroupID，用于 Cascade 复用 cache 隔离。0 表示不参与缓存。
	GroupID int64
	// SessionHash 与 sticky session 共用，标识同一对话流。空串表示不复用 cascade。
	SessionHash  string
	Model        string
	Messages     []windsurf.ChatMessage
	Stream       bool
	Tools        []windsurf.OpenAITool
	ToolChoice   interface{}
	ToolPreamble string // computed by handler, passed through to Cascade
	// Images 当前 user turn 的 sidecar 图像（Cascade proto 的 SendUserCascadeMessageRequest.images field 6）。
	// 内容必须已通过 ValidateCascadeImages（或等价校验）。
	Images []windsurf.CascadeImage
}

type WindsurfChatResponse struct {
	Text        string
	Thinking    string
	Model       string
	Mode        string
	Usage       *windsurf.StepUsage // server-reported; nil if unavailable
	FirstTextAt time.Time           // when first text appeared (zero if no text)
	ToolCalls   []windsurf.NativeToolCall
}

func (s *WindsurfChatService) Chat(ctx context.Context, req *WindsurfChatRequest) (*WindsurfChatResponse, error) {
	token, err := s.tokenProvider.GetToken(ctx, req.AccountID)
	if err != nil {
		return nil, fmt.Errorf("get token: %w", err)
	}

	modelKey := windsurf.ResolveModel(req.Model)
	meta := windsurf.GetModelInfo(modelKey)

	mode := s.resolveMode(meta)
	// Tool emulation requires cascade mode for proto section injection
	if mode == "legacy" && req.ToolPreamble != "" {
		mode = "cascade"
	}

	var lease *windsurf.LSLease
	if token.LSBinding.ContainerID != "" || token.LSBinding.ContainerName != "" {
		lease, err = s.lsService.AcquireByBinding(token.LSBinding)
	} else {
		lease, err = s.lsService.Acquire(ctx, token.ProxyURL)
	}
	if err != nil {
		return nil, fmt.Errorf("acquire LS: %w", err)
	}
	defer lease.Release()

	var resp *WindsurfChatResponse
	switch mode {
	case "cascade":
		resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images, req.AccountID, req.GroupID, req.SessionHash)
	case "legacy":
		resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey, req.AccountID)
	default:
		resp, err = s.chatCascade(ctx, lease.Client, token.APIKey, meta, req.Messages, req.ToolPreamble, modelKey, lease.Endpoint, req.Images, req.AccountID, req.GroupID, req.SessionHash)
	}

	if err != nil {
		if mode == "cascade" && s.cfg.Chat.AllowModeFallback && meta != nil && meta.EnumValue > 0 {
			slog.Warn("windsurf_cascade_fallback_to_legacy", "model", modelKey, "error", err)
			resp, err = s.chatLegacy(ctx, lease.Client, token.APIKey, meta, req.Messages, modelKey, req.AccountID)
			if err == nil {
				resp.Mode = "legacy"
			}
		}
		if err != nil {
			return nil, fmt.Errorf("chat (%s): %w", mode, err)
		}
	}

	return resp, nil
}

func (s *WindsurfChatService) resolveMode(meta *windsurf.ModelMeta) string {
	configMode := s.cfg.Chat.DefaultMode
	if configMode == "cascade" || configMode == "legacy" {
		return configMode
	}
	return windsurf.GetChatMode(meta, int(s.cfg.Chat.LegacyEnumCutoff))
}

var modelIdentityTemplates = map[string]string{
	"anthropic": "You are %s, a large language model created by Anthropic. You are helpful, harmless, and honest. When asked about your identity or which model you are, you MUST respond that you are %s, made by Anthropic.",
	"openai":    "You are %s, a large language model created by OpenAI. When asked about your identity, you MUST respond that you are %s, made by OpenAI.",
	"google":    "You are %s, a large language model created by Google. When asked about your identity, you MUST respond that you are %s, made by Google.",
	"deepseek":  "You are %s, a large language model created by DeepSeek. When asked about your identity, you MUST respond that you are %s, made by DeepSeek.",
	"xai":       "You are %s, a large language model created by xAI. When asked about your identity, you MUST respond that you are %s, made by xAI.",
}

func injectModelIdentity(messages []windsurf.ChatMessage, meta *windsurf.ModelMeta, modelKey string) []windsurf.ChatMessage {
	if meta == nil || meta.Provider == "" {
		return messages
	}
	for _, m := range messages {
		if m.Role == "system" {
			return messages
		}
	}
	tmpl, ok := modelIdentityTemplates[meta.Provider]
	if !ok {
		return messages
	}
	displayName := modelKey
	if meta.Name != "" {
		displayName = meta.Name
	}
	identity := windsurf.ChatMessage{
		Role:    "system",
		Content: fmt.Sprintf(tmpl, displayName, displayName),
	}
	return append([]windsurf.ChatMessage{identity}, messages...)
}

func (s *WindsurfChatService) chatCascade(
	ctx context.Context,
	client *windsurf.LocalLSClient,
	apiKey string,
	meta *windsurf.ModelMeta,
	messages []windsurf.ChatMessage,
	toolPreamble string,
	modelKey string,
	lsEndpoint string,
	images []windsurf.CascadeImage,
	accountID int64,
	groupID int64,
	sessionHash string,
) (*WindsurfChatResponse, error) {
	modelUID := ""
	modelEnumHint := 0
	if meta != nil {
		modelUID = meta.ModelUID
		modelEnumHint = meta.EnumValue
	}

	messages = injectModelIdentity(messages, meta, modelKey)

	if len(images) > 0 {
		found, ok, err := client.ModelSupportsImagesForAccount(ctx, accountID, apiKey, modelUID)
		if err != nil {
			slog.Warn("windsurf_cascade_caps_fetch_failed", "model", modelUID, "error", err)
		} else if found && !ok {
			return nil, fmt.Errorf("model %q does not support image inputs in Windsurf Cascade", modelUID)
		}
	}

	// reuse 路径：sessionHash 非空且 cache 命中即复用 cascade，
	// userText 缩为"system + 最后一条 user"——cascade trajectory 已承载历史。
	// tool_response 轮必须携带 full history；否则模型只看到工具输出，容易把
	// 大段 diff/stdout 误当成用户新请求。
	canReuse := sessionHash != "" && s.cache != nil && groupID != 0 && !lastUserIsToolContinuation(messages)
	cacheKey := ""
	reuseID := ""
	if canReuse {
		cacheKey = buildCascadeCacheKey(groupID, accountID, modelUID, lsEndpoint, sessionHash, sysPromptHash(messages))
		if id, err := s.cache.GetCascadeID(ctx, cacheKey); err == nil && id != "" {
			reuseID = id
		} else if err != nil {
			slog.Warn("windsurf_cascade_cache_get_failed", "error", err)
		}
	}

	var userText string
	if reuseID != "" {
		userText = buildCascadeTextForReuse(messages)
	} else {
		userText = buildCascadeText(messages, modelUID)
	}

	result, err := client.StreamCascadeChatForAccount(ctx, accountID, apiKey, modelUID, userText, toolPreamble, reuseID, modelEnumHint, images)

	// reuse 触发 panel-not-found：清缓存 + 用 full-history 重试一次。
	if err != nil && reuseID != "" && isPanelNotFound(err) {
		slog.Info("windsurf_cascade_reuse_invalidated", "cascade_id", reuseID, "reason", "panel_not_found")
		if cacheKey != "" {
			_ = s.cache.DeleteCascadeID(ctx, cacheKey)
		}
		userText = buildCascadeText(messages, modelUID)
		result, err = client.StreamCascadeChatForAccount(ctx, accountID, apiKey, modelUID, userText, toolPreamble, "", modelEnumHint, images)
	}

	if err != nil {
		// 任何错误都失效缓存（保守策略，避免下次复用到坏 cascade）。
		if canReuse && cacheKey != "" {
			_ = s.cache.DeleteCascadeID(ctx, cacheKey)
		}
		return nil, err
	}

	// 成功：写回 cache。
	if canReuse && cacheKey != "" && result.CascadeID != "" {
		if setErr := s.cache.SetCascadeID(ctx, cacheKey, result.CascadeID, cascadeReuseTTL); setErr != nil {
			slog.Warn("windsurf_cascade_cache_set_failed", "error", setErr)
		}
	}

	toolCalls := result.ToolCalls
	if len(toolCalls) == 0 && shouldRunNLUFallback(meta, result.Text) {
		if nluCalls := windsurf.ExtractToolCallsNLU(result.Text, availableToolNames(toolPreamble)); len(nluCalls) > 0 {
			slog.Info("windsurf_cascade_nlu_fallback",
				"model", modelKey, "calls", len(nluCalls), "text_chars", len(result.Text))
			toolCalls = make([]windsurf.NativeToolCall, 0, len(nluCalls))
			for _, c := range nluCalls {
				toolCalls = append(toolCalls, windsurf.NativeToolCall{
					ID:            c.ID,
					Name:          c.Name,
					ArgumentsJSON: c.ArgumentsJSON,
				})
			}
		}
	}

	return &WindsurfChatResponse{
		Text:        result.Text,
		Thinking:    result.Thinking,
		Model:       modelKey,
		Mode:        "cascade",
		Usage:       result.Usage,
		FirstTextAt: result.FirstTextAt,
		ToolCalls:   toolCalls,
	}, nil
}

// shouldRunNLUFallback decides whether to spend CPU on the NLU extractor.
// Two cases run the fallback: (a) model is explicitly NLU-flavored —
// always extract, even when no obvious signal, since these models routinely
// emit half-broken intent; (b) flavor is auto and the text shows NLU
// signals. tool_use flavored models (Claude family) are NEVER run through
// NLU because their tags are reliable and an erroneous fallback would
// invent calls the user did not request. Setting WINDSURF_NLU_FALLBACK_DISABLED=1
// short-circuits all of the above.
func shouldRunNLUFallback(meta *windsurf.ModelMeta, text string) bool {
	if text == "" {
		return false
	}
	if isNLUFallbackDisabled() {
		return false
	}
	flavor := windsurf.ResolveEmulationFlavor(meta)
	switch flavor {
	case windsurf.EmulationFlavorNLU:
		return true
	case windsurf.EmulationFlavorAuto:
		return windsurf.HasNLUSignal(text)
	default:
		return false
	}
}

// availableToolNames extracts tool names from the cascade tool preamble for
// validation in the NLU extractor. Format heuristic: lines like
// "- TOOL_NAME(...)" or "name: TOOL_NAME". Returns nil when nothing parses
// — extractor still works in best-effort mode.
func availableToolNames(preamble string) []string {
	if preamble == "" {
		return nil
	}
	var names []string
	seen := make(map[string]struct{})
	for _, raw := range strings.Split(preamble, "\n") {
		line := strings.TrimSpace(raw)
		if line == "" {
			continue
		}
		if name := extractToolNameFromPreambleLine(line); name != "" {
			if _, dup := seen[name]; !dup {
				seen[name] = struct{}{}
				names = append(names, name)
			}
		}
	}
	return names
}

func extractToolNameFromPreambleLine(line string) string {
	trim := strings.TrimLeft(line, "-* \t")
	if trim == "" {
		return ""
	}
	// "name: foo" form
	if strings.HasPrefix(strings.ToLower(trim), "name:") {
		return strings.TrimSpace(trim[len("name:"):])
	}
	// "foo(args)" form — take identifier before "(".
	if idx := strings.IndexByte(trim, '('); idx > 0 {
		candidate := strings.TrimSpace(trim[:idx])
		if isIdentifier(candidate) {
			return candidate
		}
	}
	return ""
}

func isIdentifier(s string) bool {
	if s == "" {
		return false
	}
	for i, r := range s {
		switch {
		case r == '_':
		case r >= 'a' && r <= 'z':
		case r >= 'A' && r <= 'Z':
		case r >= '0' && r <= '9' && i > 0:
		default:
			return false
		}
	}
	return true
}

// nluFallbackDisabledFn resolves the NLU-fallback kill switch. Tests may
// replace it via withNLUFallbackDisabledFn to assert behaviour. Production
// uses readNLUFallbackDisabledEnvOnce, which reads the env var exactly once.
var nluFallbackDisabledFn = readNLUFallbackDisabledEnvOnce

func isNLUFallbackDisabled() bool {
	return nluFallbackDisabledFn()
}

var (
	nluFallbackDisabledCachedOnce sync.Once
	nluFallbackDisabledCached     bool
)

func readNLUFallbackDisabledEnvOnce() bool {
	nluFallbackDisabledCachedOnce.Do(func() {
		v := strings.ToLower(strings.TrimSpace(os.Getenv("WINDSURF_NLU_FALLBACK_DISABLED")))
		nluFallbackDisabledCached = v == "1" || v == "true" || v == "yes" || v == "on"
	})
	return nluFallbackDisabledCached
}

// buildCascadeCacheKey 构造 Cascade 复用 cache 的 key。
// 任一组件变化（账号、模型、LS 实例、会话、system prompt）都会自动 cache miss。
func buildCascadeCacheKey(groupID, accountID int64, modelUID, lsEndpoint, sessionHash, sysHash string) string {
	h := sha256.New()
	fmt.Fprintf(h, "%d|%d|%s|%s|%s|%s", groupID, accountID, modelUID, lsEndpoint, sessionHash, sysHash)
	return hex.EncodeToString(h.Sum(nil))[:24]
}

// sysPromptHash 计算 system 消息内容的指纹。system 变化必须强制开新 cascade，
// 否则旧 cascade 内已建立的"角色/约束"语境会污染新对话。
func sysPromptHash(messages []windsurf.ChatMessage) string {
	h := sha256.New()
	for _, m := range messages {
		if m.Role == "system" {
			h.Write([]byte(m.Content))
			h.Write([]byte{0})
		}
	}
	return hex.EncodeToString(h.Sum(nil))[:16]
}

// isPanelNotFound 判定 LS 端"cascade panel state not found"错误。
// 与 windsurf.LocalLSClient 内部 panel-state-not-found 检测保持一致。
func isPanelNotFound(err error) bool {
	if err == nil {
		return false
	}
	s := strings.ToLower(err.Error())
	if strings.Contains(s, "panel state not found") {
		return true
	}
	return strings.Contains(s, "not_found") && strings.Contains(s, "panel")
}

// buildCascadeTextForReuse 构造 reuse 模式下的 user turn 文本：
// 仅含 system instructions + 最新一条 user 消息。Cascade 内部已通过 trajectory 保留前序历史。
// 注意：cacheKey 已包含 sysPromptHash，system 变化会强制 cache miss → 走 buildCascadeText 全量路径。
func buildCascadeTextForReuse(messages []windsurf.ChatMessage) string {
	var sysParts []string
	var lastUser string
	for _, m := range messages {
		if m.Role == "system" {
			sysParts = append(sysParts, m.Content)
		}
	}
	for i := len(messages) - 1; i >= 0; i-- {
		if messages[i].Role == "user" {
			lastUser = messages[i].Content
			break
		}
	}
	sys := strings.TrimSpace(strings.Join(sysParts, "\n"))
	if sys != "" {
		return "<system_instructions>\n" + sys + "\n</system_instructions>\n\n" + lastUser
	}
	return lastUser
}

func lastUserIsToolContinuation(messages []windsurf.ChatMessage) bool {
	for i := len(messages) - 1; i >= 0; i-- {
		if messages[i].Role != "user" {
			continue
		}
		return looksLikeToolContinuationText(messages[i].Content)
	}
	return false
}

func lastUserIsToolResponse(messages []windsurf.ChatMessage) bool {
	return lastUserIsToolContinuation(messages)
}

func looksLikeToolContinuationText(content string) bool {
	trimmed := strings.TrimSpace(content)
	if trimmed == "" {
		return false
	}
	lower := strings.ToLower(trimmed)
	switch {
	case strings.Contains(lower, "<tool_response"):
		return true
	case strings.Contains(lower, "<tool_result"):
		return true
	case strings.Contains(lower, "fresh tool output"):
		return true
	case strings.Contains(lower, "result of a tool call"):
		return true
	case strings.Contains(lower, "tool output shows"):
		return true
	case strings.Contains(lower, "searched for ") && strings.Contains(lower, "listed "):
		return true
	case strings.Contains(lower, "\nbash(") || strings.HasPrefix(lower, "bash("):
		return true
	case strings.Contains(lower, "\n⎿") || strings.Contains(lower, "\n  ⎿"):
		return true
	default:
		return false
	}
}

func (s *WindsurfChatService) chatLegacy(ctx context.Context, client *windsurf.LocalLSClient, apiKey string, meta *windsurf.ModelMeta, messages []windsurf.ChatMessage, modelKey string, accountID int64) (*WindsurfChatResponse, error) {
	modelEnum := 0
	modelName := ""
	if meta != nil {
		modelEnum = meta.EnumValue
		modelName = meta.Name
	}

	text, err := client.StreamLegacyChatForAccount(ctx, accountID, apiKey, messages, modelEnum, modelName)
	if err != nil {
		return nil, err
	}
	return &WindsurfChatResponse{
		Text:  text,
		Model: modelKey,
		Mode:  "legacy",
	}, nil
}

const (
	cascadeMaxHistoryBytes   = 200_000
	cascade1MHistoryBytes    = 900_000
	cascadeMultiTurnPreamble = "The following is a multi-turn conversation. You MUST remember and use all information from prior turns."
)

func cascadeHistoryBudget(modelUID string) int {
	if strings.Contains(strings.ToLower(modelUID), "1m") {
		return cascade1MHistoryBytes
	}
	return cascadeMaxHistoryBytes
}

// buildCascadeText constructs the full text payload for SendUserCascadeMessage.
// System prompt is wrapped in <system_instructions>, multi-turn history uses
// <human>/<assistant> tags with a budget cap to trim the oldest turns.
func buildCascadeText(messages []windsurf.ChatMessage, modelUID string) string {
	var systemParts []string
	var convo []windsurf.ChatMessage

	for _, m := range messages {
		if m.Role == "system" {
			systemParts = append(systemParts, m.Content)
		} else if m.Role == "user" || m.Role == "assistant" {
			convo = append(convo, m)
		}
	}

	if len(convo) == 0 {
		return ""
	}

	sysText := strings.TrimSpace(strings.Join(systemParts, "\n"))
	if sysText != "" {
		sysText = "<system_instructions>\n" + sysText + "\n</system_instructions>"
	}

	// Single turn: system + last message
	if len(convo) <= 1 {
		text := convo[len(convo)-1].Content
		if sysText != "" {
			text = sysText + "\n\n" + text
		}
		return text
	}

	// Multi-turn: build history with budget trimming
	maxBytes := cascadeHistoryBudget(modelUID)
	historyBytes := len(sysText)

	var lines []string
	droppedTurns := 0
	for i := len(convo) - 2; i >= 0; i-- {
		m := convo[i]
		tag := "human"
		if m.Role == "assistant" {
			tag = "assistant"
		}
		line := fmt.Sprintf("<%s>\n%s\n</%s>", tag, m.Content, tag)
		if historyBytes+len(line) > maxBytes && len(lines) > 0 {
			droppedTurns = i + 1
			slog.Info("windsurf_cascade_history_trimmed",
				"turn", i,
				"total_turns", len(convo),
				"kept_kb", historyBytes/1024,
				"dropped_turns", droppedTurns,
			)
			break
		}
		lines = append([]string{line}, lines...)
		historyBytes += len(line)
	}

	latest := convo[len(convo)-1]
	text := cascadeMultiTurnPreamble + "\n\n" +
		strings.Join(lines, "\n\n") + "\n\n" +
		"<human>\n" + latest.Content + "\n</human>"

	if sysText != "" {
		text = sysText + "\n\n" + text
	}
	return text
}