package windsurf import ( "encoding/json" "fmt" "regexp" "sort" "strings" ) // ExtractToolCallsNLU is a best-effort fallback parser used when a model // (typically GLM-4.7 / Kimi family) emits tool-call intent in free-form // text instead of well-formed tags. // // Strategy: // 1. Look for "function:NAME" / "tool_call:NAME" / "call NAME" markers. // 2. Look for the nearest JSON object after the marker as arguments. // 3. Validate the function name is in the available tool list. // // availableTools is the list of tool names the request advertised. If empty, // the extractor still tries name discovery but is best-effort. Returns nil // when no plausible tool call is found — callers should treat that as // "no tools" not "error". func ExtractToolCallsNLU(text string, availableTools []string) []ToolCall { if text == "" { return nil } available := make(map[string]struct{}, len(availableTools)) for _, name := range availableTools { if n := strings.TrimSpace(name); n != "" { available[n] = struct{}{} } } calls := nluFindMarkedCalls(text, available) if len(calls) > 0 { return calls } if len(available) > 0 { // Last-resort: some models just say "I'll use edit_file with {...}" // — try to spot any known tool name followed by a JSON object. calls = nluFindBareNameCalls(text, available) } return calls } // HasNLUSignal reports whether `text` looks like it intended to call a tool // but malformed the tags. Used to decide whether to spend CPU on the NLU // extractor when EmulationFlavor=auto. Conservative — false negatives are // fine, false positives waste a few microseconds. func HasNLUSignal(text string) bool { if text == "" { return false } lower := strings.ToLower(text) for _, kw := range nluSignalKeywords { if strings.Contains(lower, kw) { return true } } return false } var nluSignalKeywords = []string{ "tool_call", "function_call", "function:", "tool:", "arguments:", "i'll call", "i will call", "calling tool", "调用工具", "使用工具", } // nluMarkerRE matches "function: name", "tool_call: name", "call name" // followed (possibly with delimiters) by a JSON object. The name capture // stops at whitespace, comma, paren, or brace. var nluMarkerRE = regexp.MustCompile(`(?i)(?:function|tool_call|tool|call)[\s:=]+([a-zA-Z_][a-zA-Z0-9_]*)`) func nluFindMarkedCalls(text string, available map[string]struct{}) []ToolCall { matches := nluMarkerRE.FindAllStringSubmatchIndex(text, -1) if len(matches) == 0 { return nil } var calls []ToolCall seen := make(map[string]struct{}) for _, m := range matches { name := text[m[2]:m[3]] if _, ok := available[name]; len(available) > 0 && !ok { continue } if _, dup := seen[name]; dup { continue } args := nluFindNearestJSONAfter(text, m[1]) if args == "" { continue } seen[name] = struct{}{} calls = append(calls, ToolCall{ ID: nluCallID(name, len(calls)), Name: name, ArgumentsJSON: args, }) } return calls } func nluFindBareNameCalls(text string, available map[string]struct{}) []ToolCall { // Iterate available names in deterministic (alphabetical) order so the // returned slice is stable across runs and Go map randomization. Without // this, two identical inputs can yield differently ordered tool-call // slices, which makes upstream replay/retry behaviour inconsistent. names := make([]string, 0, len(available)) for name := range available { names = append(names, name) } sort.Strings(names) var calls []ToolCall seen := make(map[string]struct{}) for _, name := range names { idx := strings.Index(text, name) if idx < 0 { continue } args := nluFindNearestJSONAfter(text, idx+len(name)) if args == "" { continue } if _, dup := seen[name]; dup { continue } seen[name] = struct{}{} calls = append(calls, ToolCall{ ID: nluCallID(name, len(calls)), Name: name, ArgumentsJSON: args, }) } return calls } // nluCallID generates a stable, namespaced ID for an NLU-extracted tool // call. The numeric suffix prevents collisions when the same tool name // appears in multiple turns within a session. func nluCallID(name string, idx int) string { return fmt.Sprintf("nlu_%s_%d", name, idx) } // nluFindNearestJSONAfter scans forward from `start` and returns the first // JSON object literal it encounters. Empty string when none found within a // reasonable lookahead (4KB). func nluFindNearestJSONAfter(text string, start int) string { const lookahead = 4096 end := start + lookahead if end > len(text) { end = len(text) } region := text[start:end] open := strings.Index(region, "{") if open < 0 { return "" } depth := 0 inString := false escape := false for i := open; i < len(region); i++ { ch := region[i] if escape { escape = false continue } if ch == '\\' { escape = true continue } if ch == '"' { inString = !inString continue } if inString { continue } switch ch { case '{': depth++ case '}': depth-- if depth == 0 { candidate := region[open : i+1] if json.Valid([]byte(candidate)) { return candidate } return "" } } } return "" }