WIP commit保存以下定制工作以便后续合并 upstream v0.1.124-125: - Windsurf: tier access service, NLU extractor, cold threshold, Google login - Antigravity: client/oauth 调整 - Ops: log stream handler/broadcaster/middleware, OpsLogStreamView - Frontend: WindsurfLoginModal Google, GoogleIcon, AccountsView, sidebar/router/i18n
203 lines
5.1 KiB
Go
203 lines
5.1 KiB
Go
package windsurf
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// ExtractToolCallsNLU is a best-effort fallback parser used when a model
|
|
// (typically GLM-4.7 / Kimi family) emits tool-call intent in free-form
|
|
// text instead of well-formed <tool_use> tags.
|
|
//
|
|
// Strategy:
|
|
// 1. Look for "function:NAME" / "tool_call:NAME" / "call NAME" markers.
|
|
// 2. Look for the nearest JSON object after the marker as arguments.
|
|
// 3. Validate the function name is in the available tool list.
|
|
//
|
|
// availableTools is the list of tool names the request advertised. If empty,
|
|
// the extractor still tries name discovery but is best-effort. Returns nil
|
|
// when no plausible tool call is found — callers should treat that as
|
|
// "no tools" not "error".
|
|
func ExtractToolCallsNLU(text string, availableTools []string) []ToolCall {
|
|
if text == "" {
|
|
return nil
|
|
}
|
|
available := make(map[string]struct{}, len(availableTools))
|
|
for _, name := range availableTools {
|
|
if n := strings.TrimSpace(name); n != "" {
|
|
available[n] = struct{}{}
|
|
}
|
|
}
|
|
|
|
calls := nluFindMarkedCalls(text, available)
|
|
if len(calls) > 0 {
|
|
return calls
|
|
}
|
|
if len(available) > 0 {
|
|
// Last-resort: some models just say "I'll use edit_file with {...}"
|
|
// — try to spot any known tool name followed by a JSON object.
|
|
calls = nluFindBareNameCalls(text, available)
|
|
}
|
|
return calls
|
|
}
|
|
|
|
// HasNLUSignal reports whether `text` looks like it intended to call a tool
|
|
// but malformed the tags. Used to decide whether to spend CPU on the NLU
|
|
// extractor when EmulationFlavor=auto. Conservative — false negatives are
|
|
// fine, false positives waste a few microseconds.
|
|
func HasNLUSignal(text string) bool {
|
|
if text == "" {
|
|
return false
|
|
}
|
|
lower := strings.ToLower(text)
|
|
for _, kw := range nluSignalKeywords {
|
|
if strings.Contains(lower, kw) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
var nluSignalKeywords = []string{
|
|
"tool_call",
|
|
"function_call",
|
|
"function:",
|
|
"tool:",
|
|
"arguments:",
|
|
"i'll call",
|
|
"i will call",
|
|
"calling tool",
|
|
"调用工具",
|
|
"使用工具",
|
|
}
|
|
|
|
// nluMarkerRE matches "function: name", "tool_call: name", "call name"
|
|
// followed (possibly with delimiters) by a JSON object. The name capture
|
|
// stops at whitespace, comma, paren, or brace.
|
|
var nluMarkerRE = regexp.MustCompile(`(?i)(?:function|tool_call|tool|call)[\s:=]+([a-zA-Z_][a-zA-Z0-9_]*)`)
|
|
|
|
func nluFindMarkedCalls(text string, available map[string]struct{}) []ToolCall {
|
|
matches := nluMarkerRE.FindAllStringSubmatchIndex(text, -1)
|
|
if len(matches) == 0 {
|
|
return nil
|
|
}
|
|
var calls []ToolCall
|
|
seen := make(map[string]struct{})
|
|
for _, m := range matches {
|
|
name := text[m[2]:m[3]]
|
|
if _, ok := available[name]; len(available) > 0 && !ok {
|
|
continue
|
|
}
|
|
if _, dup := seen[name]; dup {
|
|
continue
|
|
}
|
|
args := nluFindNearestJSONAfter(text, m[1])
|
|
if args == "" {
|
|
continue
|
|
}
|
|
seen[name] = struct{}{}
|
|
calls = append(calls, ToolCall{
|
|
ID: nluCallID(name, len(calls)),
|
|
Name: name,
|
|
ArgumentsJSON: args,
|
|
})
|
|
}
|
|
return calls
|
|
}
|
|
|
|
func nluFindBareNameCalls(text string, available map[string]struct{}) []ToolCall {
|
|
// Iterate available names in deterministic (alphabetical) order so the
|
|
// returned slice is stable across runs and Go map randomization. Without
|
|
// this, two identical inputs can yield differently ordered tool-call
|
|
// slices, which makes upstream replay/retry behaviour inconsistent.
|
|
names := make([]string, 0, len(available))
|
|
for name := range available {
|
|
names = append(names, name)
|
|
}
|
|
sort.Strings(names)
|
|
|
|
var calls []ToolCall
|
|
seen := make(map[string]struct{})
|
|
for _, name := range names {
|
|
idx := strings.Index(text, name)
|
|
if idx < 0 {
|
|
continue
|
|
}
|
|
args := nluFindNearestJSONAfter(text, idx+len(name))
|
|
if args == "" {
|
|
continue
|
|
}
|
|
if _, dup := seen[name]; dup {
|
|
continue
|
|
}
|
|
seen[name] = struct{}{}
|
|
calls = append(calls, ToolCall{
|
|
ID: nluCallID(name, len(calls)),
|
|
Name: name,
|
|
ArgumentsJSON: args,
|
|
})
|
|
}
|
|
return calls
|
|
}
|
|
|
|
// nluCallID generates a stable, namespaced ID for an NLU-extracted tool
|
|
// call. The numeric suffix prevents collisions when the same tool name
|
|
// appears in multiple turns within a session.
|
|
func nluCallID(name string, idx int) string {
|
|
return fmt.Sprintf("nlu_%s_%d", name, idx)
|
|
}
|
|
|
|
// nluFindNearestJSONAfter scans forward from `start` and returns the first
|
|
// JSON object literal it encounters. Empty string when none found within a
|
|
// reasonable lookahead (4KB).
|
|
func nluFindNearestJSONAfter(text string, start int) string {
|
|
const lookahead = 4096
|
|
end := start + lookahead
|
|
if end > len(text) {
|
|
end = len(text)
|
|
}
|
|
region := text[start:end]
|
|
open := strings.Index(region, "{")
|
|
if open < 0 {
|
|
return ""
|
|
}
|
|
depth := 0
|
|
inString := false
|
|
escape := false
|
|
for i := open; i < len(region); i++ {
|
|
ch := region[i]
|
|
if escape {
|
|
escape = false
|
|
continue
|
|
}
|
|
if ch == '\\' {
|
|
escape = true
|
|
continue
|
|
}
|
|
if ch == '"' {
|
|
inString = !inString
|
|
continue
|
|
}
|
|
if inString {
|
|
continue
|
|
}
|
|
switch ch {
|
|
case '{':
|
|
depth++
|
|
case '}':
|
|
depth--
|
|
if depth == 0 {
|
|
candidate := region[open : i+1]
|
|
if json.Valid([]byte(candidate)) {
|
|
return candidate
|
|
}
|
|
return ""
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|