This preserves DeepSeek reasoning_content across chat compatibility paths.

DeepSeek thinking-mode tool-call conversations may require the assistant
  reasoning_content from previous turns to be sent back in later requests. Without
  preserving it, those conversations can fail or lose reasoning context.

  Changes:
  - Preserve assistant reasoning_content when converting Chat Completions messages
    to Responses input by wrapping it as a thinking block.
  - Add regression coverage for non-streaming DeepSeek responses.
  - Add regression coverage for streaming DeepSeek deltas.
  - Add regression coverage that request-side messages[].reasoning_content is
    passed through with tool calls.

  Tests:
  go test -tags=unit ./internal/pkg/apicompat ./internal/service -run
  'TestChatCompletionsToResponses_AssistantReasoningContentPreserved|
  TestChatCompletionsToResponses_AssistantThinkingTagPreserved|
  TestForwardAsRawChatCompletions_PreservesDeepSeekReasoningContent|
  TestForwardAsRawChatCompletions_ForcesStreamUsageUpstreamAndPassesUsageDownstream
This commit is contained in:
L494264Tt 2026-05-18 11:22:23 +08:00
parent f5bd25bea0
commit 1d47fd6300
3 changed files with 149 additions and 5 deletions

View File

@ -379,6 +379,34 @@ func TestChatCompletionsToResponses_AssistantThinkingTagPreserved(t *testing.T)
assert.Contains(t, parts[0].Text, "final answer")
}
func TestChatCompletionsToResponses_AssistantReasoningContentPreserved(t *testing.T) {
req := &ChatCompletionsRequest{
Model: "gpt-4o",
Messages: []ChatMessage{
{Role: "user", Content: json.RawMessage(`"Hi"`)},
{
Role: "assistant",
ReasoningContent: "internal plan",
Content: json.RawMessage(`"final answer"`),
},
},
}
resp, err := ChatCompletionsToResponses(req)
require.NoError(t, err)
var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 2)
var parts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[1].Content, &parts))
require.Len(t, parts, 1)
assert.Equal(t, "output_text", parts[0].Type)
assert.Contains(t, parts[0].Text, "<thinking>internal plan</thinking>")
assert.Contains(t, parts[0].Text, "final answer")
}
// ---------------------------------------------------------------------------
// ResponsesToChatCompletions tests
// ---------------------------------------------------------------------------

View File

@ -150,6 +150,13 @@ func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
// empty/nil and there are tool_calls, only function_call items are emitted.
func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
var items []ResponsesInputItem
var content strings.Builder
if m.ReasoningContent != "" {
content.WriteString("<thinking>")
content.WriteString(m.ReasoningContent)
content.WriteString("</thinking>")
}
// Emit assistant message with output_text if content is non-empty.
if len(m.Content) > 0 {
@ -158,15 +165,22 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
return nil, err
}
if s != "" {
parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
partsJSON, err := json.Marshal(parts)
if err != nil {
return nil, err
if content.Len() > 0 {
content.WriteByte('\n')
}
items = append(items, ResponsesInputItem{Role: "assistant", Content: partsJSON})
content.WriteString(s)
}
}
if content.Len() > 0 {
parts := []ResponsesContentPart{{Type: "output_text", Text: content.String()}}
partsJSON, err := json.Marshal(parts)
if err != nil {
return nil, err
}
items = append(items, ResponsesInputItem{Role: "assistant", Content: partsJSON})
}
// Emit one function_call item per tool_call.
for _, tc := range m.ToolCalls {
args := tc.Function.Arguments

View File

@ -118,6 +118,108 @@ func TestForwardAsRawChatCompletions_ForcesStreamUsageUpstreamAndPassesUsageDown
require.Contains(t, rec.Body.String(), "data: [DONE]")
}
func TestForwardAsRawChatCompletions_PreservesDeepSeekReasoningContentNonStreaming(t *testing.T) {
gin.SetMode(gin.TestMode)
body := []byte(`{"model":"deepseek-reasoner","messages":[{"role":"user","content":"hello"}],"stream":false}`)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", bytes.NewReader(body))
c.Request.Header.Set("Content-Type", "application/json")
upstreamJSON := `{"id":"chatcmpl_reasoning","object":"chat.completion","model":"deepseek-reasoner","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":"think first","content":"final answer"},"finish_reason":"stop"}],"usage":{"prompt_tokens":3,"completion_tokens":5,"total_tokens":8}}`
upstream := &httpUpstreamRecorder{resp: &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid_deepseek_reasoning_json"}},
Body: io.NopCloser(strings.NewReader(upstreamJSON)),
}}
svc := &OpenAIGatewayService{
cfg: rawChatCompletionsTestConfig(),
httpUpstream: upstream,
}
account := rawChatCompletionsTestAccount()
result, err := svc.forwardAsRawChatCompletions(context.Background(), c, account, body, "")
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, 3, result.Usage.InputTokens)
require.Equal(t, 5, result.Usage.OutputTokens)
require.Equal(t, "think first", gjson.Get(rec.Body.String(), "choices.0.message.reasoning_content").String())
require.Equal(t, "final answer", gjson.Get(rec.Body.String(), "choices.0.message.content").String())
}
func TestForwardAsRawChatCompletions_PreservesDeepSeekReasoningContentStreaming(t *testing.T) {
gin.SetMode(gin.TestMode)
body := []byte(`{"model":"deepseek-reasoner","messages":[{"role":"user","content":"hello"}],"stream":true}`)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", bytes.NewReader(body))
c.Request.Header.Set("Content-Type", "application/json")
upstreamBody := strings.Join([]string{
`data: {"id":"chatcmpl_reasoning","object":"chat.completion.chunk","model":"deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}`,
"",
`data: {"id":"chatcmpl_reasoning","object":"chat.completion.chunk","model":"deepseek-reasoner","choices":[{"index":0,"delta":{"reasoning_content":"think first"},"finish_reason":null}]}`,
"",
`data: {"id":"chatcmpl_reasoning","object":"chat.completion.chunk","model":"deepseek-reasoner","choices":[{"index":0,"delta":{"content":"final answer"},"finish_reason":null}]}`,
"",
`data: {"id":"chatcmpl_reasoning","object":"chat.completion.chunk","model":"deepseek-reasoner","choices":[],"usage":{"prompt_tokens":3,"completion_tokens":5,"total_tokens":8}}`,
"",
"data: [DONE]",
"",
}, "\n")
upstream := &httpUpstreamRecorder{resp: &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{"Content-Type": []string{"text/event-stream"}, "x-request-id": []string{"rid_deepseek_reasoning_stream"}},
Body: io.NopCloser(strings.NewReader(upstreamBody)),
}}
svc := &OpenAIGatewayService{
cfg: rawChatCompletionsTestConfig(),
httpUpstream: upstream,
}
account := rawChatCompletionsTestAccount()
result, err := svc.forwardAsRawChatCompletions(context.Background(), c, account, body, "")
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, 3, result.Usage.InputTokens)
require.Equal(t, 5, result.Usage.OutputTokens)
require.Contains(t, rec.Body.String(), `"reasoning_content":"think first"`)
require.Contains(t, rec.Body.String(), `"content":"final answer"`)
require.Contains(t, rec.Body.String(), "data: [DONE]")
}
func TestForwardAsRawChatCompletions_PreservesDeepSeekReasoningContentInRequest(t *testing.T) {
gin.SetMode(gin.TestMode)
body := []byte(`{"model":"deepseek-v4-pro","messages":[{"role":"user","content":"weather"},{"role":"assistant","reasoning_content":"need tool","content":"","tool_calls":[{"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{}"}}]},{"role":"tool","tool_call_id":"call_1","content":"cloudy"}],"stream":false}`)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", bytes.NewReader(body))
c.Request.Header.Set("Content-Type", "application/json")
upstream := &httpUpstreamRecorder{resp: &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid_deepseek_reasoning_request"}},
Body: io.NopCloser(strings.NewReader(`{"id":"chatcmpl_request","object":"chat.completion","model":"deepseek-v4-pro","choices":[{"index":0,"message":{"role":"assistant","content":"done"},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":2,"total_tokens":6}}`)),
}}
svc := &OpenAIGatewayService{
cfg: rawChatCompletionsTestConfig(),
httpUpstream: upstream,
}
account := rawChatCompletionsTestAccount()
result, err := svc.forwardAsRawChatCompletions(context.Background(), c, account, body, "")
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, "need tool", gjson.GetBytes(upstream.lastBody, "messages.1.reasoning_content").String())
require.Equal(t, "get_weather", gjson.GetBytes(upstream.lastBody, "messages.1.tool_calls.0.function.name").String())
}
func TestForwardAsRawChatCompletions_ClientDisconnectDrainsUsage(t *testing.T) {
gin.SetMode(gin.TestMode)