From 20f5340784484d8be82c2139bb40ff34f1a7b715 Mon Sep 17 00:00:00 2001 From: JIA-ss <627723154@qq.com> Date: Thu, 28 May 2026 00:38:25 +0800 Subject: [PATCH] =?UTF-8?q?fix(apicompat):=20Responses=E2=86=92Chat=20?= =?UTF-8?q?=E8=BD=AC=E6=8D=A2=E8=A1=A5=E9=BD=90=20completion=5Ftokens=5Fde?= =?UTF-8?q?tails=20=E9=80=8F=E4=BC=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI Responses API 在 gpt-5.x 等 reasoning 模型上会返回 output_tokens_details.reasoning_tokens, 但 ResponsesToChatCompletions 只映射了 input_tokens_details.cached_tokens, 导致客户端拿到的 chat.completion.usage 中 completion_tokens 出现无法解释的波动 (短 prompt 也可能 30+ token), 且缺失 reasoning_tokens 细分字段, 难以与 OpenAI 原生 Chat Completions 响应对账。 按 OpenAI 官方 CompletionUsage schema (openai/openai-go SDK completion.go) 补齐所有 token-details 字段, 全部 omitempty: prompt_tokens_details: - cached_tokens (原已支持) - audio_tokens (新增) completion_tokens_details: - reasoning_tokens (新增) - audio_tokens (新增) - accepted_prediction_tokens (新增) - rejected_prediction_tokens (新增) 实现细节: - 抽出 promptDetailsFromResponses / completionDetailsFromResponses 两个 helper, 全零字段返回 nil - 非流路径 ResponsesToChatCompletions 复用已存在的 chatUsageFromResponsesUsage helper, 消除两条路径间的重复 - 非 reasoning / 非 audio 上游 (Anthropic, Gemini, gpt-4o) 不填这些 字段, helper 返回 nil → CompletionTokensDetails 不输出, 对现有响应 字节级兼容 新增单测: - TestResponsesToChatCompletions_ReasoningTokens - TestResponsesToChatCompletions_AllTokenDetailsPassThrough - TestResponsesToChatCompletions_NoReasoningTokensWhenZero - TestResponsesEventToChatChunks_CompletedWithReasoningTokens --- .../chatcompletions_responses_test.go | 135 ++++++++++++++++++ .../apicompat/responses_to_chatcompletions.go | 58 +++++--- backend/internal/pkg/apicompat/types.go | 30 +++- 3 files changed, 198 insertions(+), 25 deletions(-) diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go index 016c2415..b03b012f 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go @@ -663,6 +663,115 @@ func TestResponsesToChatCompletions_CachedTokens(t *testing.T) { assert.Equal(t, 80, chat.Usage.PromptTokensDetails.CachedTokens) } +func TestResponsesToChatCompletions_ReasoningTokens(t *testing.T) { + resp := &ResponsesResponse{ + ID: "resp_reasoning", + Status: "completed", + Output: []ResponsesOutput{ + { + Type: "message", + Content: []ResponsesContentPart{{Type: "output_text", Text: "ping"}}, + }, + }, + Usage: &ResponsesUsage{ + InputTokens: 24, + OutputTokens: 33, + TotalTokens: 57, + OutputTokensDetails: &ResponsesOutputTokensDetails{ + ReasoningTokens: 32, + }, + }, + } + + chat := ResponsesToChatCompletions(resp, "gpt-5.5") + require.NotNil(t, chat.Usage) + assert.Equal(t, 33, chat.Usage.CompletionTokens) + require.NotNil(t, chat.Usage.CompletionTokensDetails) + assert.Equal(t, 32, chat.Usage.CompletionTokensDetails.ReasoningTokens) +} + +func TestResponsesToChatCompletions_AllTokenDetailsPassThrough(t *testing.T) { + // Covers the full OpenAI CompletionUsage detail field set so future audio + // and prediction-outputs responses propagate without further changes. + resp := &ResponsesResponse{ + ID: "resp_full_details", + Status: "completed", + Output: []ResponsesOutput{ + { + Type: "message", + Content: []ResponsesContentPart{{Type: "output_text", Text: "x"}}, + }, + }, + Usage: &ResponsesUsage{ + InputTokens: 100, + OutputTokens: 50, + TotalTokens: 150, + InputTokensDetails: &ResponsesInputTokensDetails{ + CachedTokens: 60, + AudioTokens: 4, + }, + OutputTokensDetails: &ResponsesOutputTokensDetails{ + ReasoningTokens: 30, + AudioTokens: 2, + AcceptedPredictionTokens: 10, + RejectedPredictionTokens: 3, + }, + }, + } + + chat := ResponsesToChatCompletions(resp, "gpt-5.5") + require.NotNil(t, chat.Usage) + require.NotNil(t, chat.Usage.PromptTokensDetails) + assert.Equal(t, 60, chat.Usage.PromptTokensDetails.CachedTokens) + assert.Equal(t, 4, chat.Usage.PromptTokensDetails.AudioTokens) + + require.NotNil(t, chat.Usage.CompletionTokensDetails) + assert.Equal(t, 30, chat.Usage.CompletionTokensDetails.ReasoningTokens) + assert.Equal(t, 2, chat.Usage.CompletionTokensDetails.AudioTokens) + assert.Equal(t, 10, chat.Usage.CompletionTokensDetails.AcceptedPredictionTokens) + assert.Equal(t, 3, chat.Usage.CompletionTokensDetails.RejectedPredictionTokens) + + raw, err := json.Marshal(chat.Usage) + require.NoError(t, err) + assert.Contains(t, string(raw), `"prompt_tokens_details"`) + assert.Contains(t, string(raw), `"completion_tokens_details"`) + assert.Contains(t, string(raw), `"reasoning_tokens":30`) + assert.Contains(t, string(raw), `"accepted_prediction_tokens":10`) +} + +func TestResponsesToChatCompletions_NoReasoningTokensWhenZero(t *testing.T) { + // Non-reasoning models do not return reasoning_tokens. The mapping must + // omit completion_tokens_details entirely rather than emitting a zero-valued + // field, so non-reasoning responses stay clean. + resp := &ResponsesResponse{ + ID: "resp_no_reasoning", + Status: "completed", + Output: []ResponsesOutput{ + { + Type: "message", + Content: []ResponsesContentPart{{Type: "output_text", Text: "hi"}}, + }, + }, + Usage: &ResponsesUsage{ + InputTokens: 10, + OutputTokens: 5, + TotalTokens: 15, + OutputTokensDetails: &ResponsesOutputTokensDetails{ + ReasoningTokens: 0, + }, + }, + } + + chat := ResponsesToChatCompletions(resp, "gpt-4o") + require.NotNil(t, chat.Usage) + assert.Nil(t, chat.Usage.CompletionTokensDetails) + + raw, err := json.Marshal(chat.Usage) + require.NoError(t, err) + assert.NotContains(t, string(raw), "completion_tokens_details") + assert.NotContains(t, string(raw), "reasoning_tokens") +} + func TestResponsesToChatCompletions_WebSearch(t *testing.T) { resp := &ResponsesResponse{ ID: "resp_ws", @@ -825,6 +934,32 @@ func TestResponsesEventToChatChunks_Completed(t *testing.T) { assert.Equal(t, 30, chunks[1].Usage.PromptTokensDetails.CachedTokens) } +func TestResponsesEventToChatChunks_CompletedWithReasoningTokens(t *testing.T) { + state := NewResponsesEventToChatState() + state.Model = "gpt-5.5" + state.IncludeUsage = true + + chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{ + Type: "response.completed", + Response: &ResponsesResponse{ + Status: "completed", + Usage: &ResponsesUsage{ + InputTokens: 24, + OutputTokens: 33, + TotalTokens: 57, + OutputTokensDetails: &ResponsesOutputTokensDetails{ + ReasoningTokens: 32, + }, + }, + }, + }, state) + require.Len(t, chunks, 2) + + require.NotNil(t, chunks[1].Usage) + require.NotNil(t, chunks[1].Usage.CompletionTokensDetails) + assert.Equal(t, 32, chunks[1].Usage.CompletionTokensDetails.ReasoningTokens) +} + func TestResponsesEventToChatChunks_ResponseDone(t *testing.T) { state := NewResponsesEventToChatState() state.Model = "gpt-4o" diff --git a/backend/internal/pkg/apicompat/responses_to_chatcompletions.go b/backend/internal/pkg/apicompat/responses_to_chatcompletions.go index 7e8354ee..8809b4fc 100644 --- a/backend/internal/pkg/apicompat/responses_to_chatcompletions.go +++ b/backend/internal/pkg/apicompat/responses_to_chatcompletions.go @@ -81,19 +81,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp FinishReason: finishReason, }} - if resp.Usage != nil { - usage := &ChatUsage{ - PromptTokens: resp.Usage.InputTokens, - CompletionTokens: resp.Usage.OutputTokens, - TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens, - } - if resp.Usage.InputTokensDetails != nil && resp.Usage.InputTokensDetails.CachedTokens > 0 { - usage.PromptTokensDetails = &ChatTokenDetails{ - CachedTokens: resp.Usage.InputTokensDetails.CachedTokens, - } - } - out.Usage = usage - } + out.Usage = chatUsageFromResponsesUsage(resp.Usage) return out } @@ -341,14 +329,48 @@ func chatUsageFromResponsesUsage(u *ResponsesUsage) *ChatUsage { CompletionTokens: u.OutputTokens, TotalTokens: u.InputTokens + u.OutputTokens, } - if u.InputTokensDetails != nil && u.InputTokensDetails.CachedTokens > 0 { - usage.PromptTokensDetails = &ChatTokenDetails{ - CachedTokens: u.InputTokensDetails.CachedTokens, - } - } + usage.PromptTokensDetails = promptDetailsFromResponses(u.InputTokensDetails) + usage.CompletionTokensDetails = completionDetailsFromResponses(u.OutputTokensDetails) return usage } +// promptDetailsFromResponses maps Responses-API input_tokens_details into a +// Chat-Completions prompt_tokens_details. Returns nil when nothing would be +// emitted, so upstreams that do not break down prompt usage stay clean. +func promptDetailsFromResponses(src *ResponsesInputTokensDetails) *ChatTokenDetails { + if src == nil { + return nil + } + if src.CachedTokens == 0 && src.AudioTokens == 0 { + return nil + } + return &ChatTokenDetails{ + CachedTokens: src.CachedTokens, + AudioTokens: src.AudioTokens, + } +} + +// completionDetailsFromResponses maps Responses-API output_tokens_details +// into a Chat-Completions completion_tokens_details. Mirrors the OpenAI +// official CompletionUsage schema: reasoning_tokens, audio_tokens, and +// the predicted-outputs accepted/rejected counts. Returns nil when nothing +// would be emitted so non-reasoning, non-audio responses stay clean. +func completionDetailsFromResponses(src *ResponsesOutputTokensDetails) *ChatTokenDetails { + if src == nil { + return nil + } + if src.ReasoningTokens == 0 && src.AudioTokens == 0 && + src.AcceptedPredictionTokens == 0 && src.RejectedPredictionTokens == 0 { + return nil + } + return &ChatTokenDetails{ + ReasoningTokens: src.ReasoningTokens, + AudioTokens: src.AudioTokens, + AcceptedPredictionTokens: src.AcceptedPredictionTokens, + RejectedPredictionTokens: src.RejectedPredictionTokens, + } +} + func makeChatDeltaChunk(state *ResponsesEventToChatState, delta ChatDelta) ChatCompletionsChunk { return ChatCompletionsChunk{ ID: state.ID, diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index 8b576647..b4451f23 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -362,11 +362,15 @@ func (u *ResponsesUsage) UnmarshalJSON(data []byte) error { // ResponsesInputTokensDetails breaks down input token usage. type ResponsesInputTokensDetails struct { CachedTokens int `json:"cached_tokens,omitempty"` + AudioTokens int `json:"audio_tokens,omitempty"` } // ResponsesOutputTokensDetails breaks down output token usage. type ResponsesOutputTokensDetails struct { - ReasoningTokens int `json:"reasoning_tokens,omitempty"` + ReasoningTokens int `json:"reasoning_tokens,omitempty"` + AudioTokens int `json:"audio_tokens,omitempty"` + AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"` + RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"` } // --------------------------------------------------------------------------- @@ -517,15 +521,27 @@ type ChatChoice struct { // ChatUsage holds token counts in Chat Completions format. type ChatUsage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"` + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"` + CompletionTokensDetails *ChatTokenDetails `json:"completion_tokens_details,omitempty"` } -// ChatTokenDetails provides a breakdown of token usage. +// ChatTokenDetails provides a breakdown of token usage. The same type is +// reused for both prompt_tokens_details and completion_tokens_details; +// unset fields are omitted so each side only emits the fields that apply. +// +// Field set mirrors OpenAI's official CompletionUsage schema: +// - prompt_tokens_details: cached_tokens, audio_tokens +// - completion_tokens_details: reasoning_tokens, audio_tokens, +// accepted_prediction_tokens, rejected_prediction_tokens type ChatTokenDetails struct { - CachedTokens int `json:"cached_tokens,omitempty"` + CachedTokens int `json:"cached_tokens,omitempty"` + AudioTokens int `json:"audio_tokens,omitempty"` + ReasoningTokens int `json:"reasoning_tokens,omitempty"` + AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"` + RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"` } // ChatCompletionsChunk is a single streaming chunk from POST /v1/chat/completions.