fix(apicompat): Responses→Chat 转换补齐 completion_tokens_details 透传

OpenAI Responses API 在 gpt-5.x 等 reasoning 模型上会返回
output_tokens_details.reasoning_tokens, 但 ResponsesToChatCompletions
只映射了 input_tokens_details.cached_tokens, 导致客户端拿到的
chat.completion.usage 中 completion_tokens 出现无法解释的波动
(短 prompt 也可能 30+ token), 且缺失 reasoning_tokens 细分字段,
难以与 OpenAI 原生 Chat Completions 响应对账。

按 OpenAI 官方 CompletionUsage schema (openai/openai-go SDK
completion.go) 补齐所有 token-details 字段, 全部 omitempty:

  prompt_tokens_details:
    - cached_tokens   (原已支持)
    - audio_tokens    (新增)
  completion_tokens_details:
    - reasoning_tokens             (新增)
    - audio_tokens                 (新增)
    - accepted_prediction_tokens   (新增)
    - rejected_prediction_tokens   (新增)

实现细节:
- 抽出 promptDetailsFromResponses / completionDetailsFromResponses
  两个 helper, 全零字段返回 nil
- 非流路径 ResponsesToChatCompletions 复用已存在的
  chatUsageFromResponsesUsage helper, 消除两条路径间的重复
- 非 reasoning / 非 audio 上游 (Anthropic, Gemini, gpt-4o) 不填这些
  字段, helper 返回 nil → CompletionTokensDetails 不输出, 对现有响应
  字节级兼容

新增单测:
- TestResponsesToChatCompletions_ReasoningTokens
- TestResponsesToChatCompletions_AllTokenDetailsPassThrough
- TestResponsesToChatCompletions_NoReasoningTokensWhenZero
- TestResponsesEventToChatChunks_CompletedWithReasoningTokens
This commit is contained in:
JIA-ss 2026-05-28 00:38:25 +08:00
parent 89d96f4b25
commit 20f5340784
3 changed files with 198 additions and 25 deletions

View File

@ -663,6 +663,115 @@ func TestResponsesToChatCompletions_CachedTokens(t *testing.T) {
assert.Equal(t, 80, chat.Usage.PromptTokensDetails.CachedTokens)
}
func TestResponsesToChatCompletions_ReasoningTokens(t *testing.T) {
resp := &ResponsesResponse{
ID: "resp_reasoning",
Status: "completed",
Output: []ResponsesOutput{
{
Type: "message",
Content: []ResponsesContentPart{{Type: "output_text", Text: "ping"}},
},
},
Usage: &ResponsesUsage{
InputTokens: 24,
OutputTokens: 33,
TotalTokens: 57,
OutputTokensDetails: &ResponsesOutputTokensDetails{
ReasoningTokens: 32,
},
},
}
chat := ResponsesToChatCompletions(resp, "gpt-5.5")
require.NotNil(t, chat.Usage)
assert.Equal(t, 33, chat.Usage.CompletionTokens)
require.NotNil(t, chat.Usage.CompletionTokensDetails)
assert.Equal(t, 32, chat.Usage.CompletionTokensDetails.ReasoningTokens)
}
func TestResponsesToChatCompletions_AllTokenDetailsPassThrough(t *testing.T) {
// Covers the full OpenAI CompletionUsage detail field set so future audio
// and prediction-outputs responses propagate without further changes.
resp := &ResponsesResponse{
ID: "resp_full_details",
Status: "completed",
Output: []ResponsesOutput{
{
Type: "message",
Content: []ResponsesContentPart{{Type: "output_text", Text: "x"}},
},
},
Usage: &ResponsesUsage{
InputTokens: 100,
OutputTokens: 50,
TotalTokens: 150,
InputTokensDetails: &ResponsesInputTokensDetails{
CachedTokens: 60,
AudioTokens: 4,
},
OutputTokensDetails: &ResponsesOutputTokensDetails{
ReasoningTokens: 30,
AudioTokens: 2,
AcceptedPredictionTokens: 10,
RejectedPredictionTokens: 3,
},
},
}
chat := ResponsesToChatCompletions(resp, "gpt-5.5")
require.NotNil(t, chat.Usage)
require.NotNil(t, chat.Usage.PromptTokensDetails)
assert.Equal(t, 60, chat.Usage.PromptTokensDetails.CachedTokens)
assert.Equal(t, 4, chat.Usage.PromptTokensDetails.AudioTokens)
require.NotNil(t, chat.Usage.CompletionTokensDetails)
assert.Equal(t, 30, chat.Usage.CompletionTokensDetails.ReasoningTokens)
assert.Equal(t, 2, chat.Usage.CompletionTokensDetails.AudioTokens)
assert.Equal(t, 10, chat.Usage.CompletionTokensDetails.AcceptedPredictionTokens)
assert.Equal(t, 3, chat.Usage.CompletionTokensDetails.RejectedPredictionTokens)
raw, err := json.Marshal(chat.Usage)
require.NoError(t, err)
assert.Contains(t, string(raw), `"prompt_tokens_details"`)
assert.Contains(t, string(raw), `"completion_tokens_details"`)
assert.Contains(t, string(raw), `"reasoning_tokens":30`)
assert.Contains(t, string(raw), `"accepted_prediction_tokens":10`)
}
func TestResponsesToChatCompletions_NoReasoningTokensWhenZero(t *testing.T) {
// Non-reasoning models do not return reasoning_tokens. The mapping must
// omit completion_tokens_details entirely rather than emitting a zero-valued
// field, so non-reasoning responses stay clean.
resp := &ResponsesResponse{
ID: "resp_no_reasoning",
Status: "completed",
Output: []ResponsesOutput{
{
Type: "message",
Content: []ResponsesContentPart{{Type: "output_text", Text: "hi"}},
},
},
Usage: &ResponsesUsage{
InputTokens: 10,
OutputTokens: 5,
TotalTokens: 15,
OutputTokensDetails: &ResponsesOutputTokensDetails{
ReasoningTokens: 0,
},
},
}
chat := ResponsesToChatCompletions(resp, "gpt-4o")
require.NotNil(t, chat.Usage)
assert.Nil(t, chat.Usage.CompletionTokensDetails)
raw, err := json.Marshal(chat.Usage)
require.NoError(t, err)
assert.NotContains(t, string(raw), "completion_tokens_details")
assert.NotContains(t, string(raw), "reasoning_tokens")
}
func TestResponsesToChatCompletions_WebSearch(t *testing.T) {
resp := &ResponsesResponse{
ID: "resp_ws",
@ -825,6 +934,32 @@ func TestResponsesEventToChatChunks_Completed(t *testing.T) {
assert.Equal(t, 30, chunks[1].Usage.PromptTokensDetails.CachedTokens)
}
func TestResponsesEventToChatChunks_CompletedWithReasoningTokens(t *testing.T) {
state := NewResponsesEventToChatState()
state.Model = "gpt-5.5"
state.IncludeUsage = true
chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{
Type: "response.completed",
Response: &ResponsesResponse{
Status: "completed",
Usage: &ResponsesUsage{
InputTokens: 24,
OutputTokens: 33,
TotalTokens: 57,
OutputTokensDetails: &ResponsesOutputTokensDetails{
ReasoningTokens: 32,
},
},
},
}, state)
require.Len(t, chunks, 2)
require.NotNil(t, chunks[1].Usage)
require.NotNil(t, chunks[1].Usage.CompletionTokensDetails)
assert.Equal(t, 32, chunks[1].Usage.CompletionTokensDetails.ReasoningTokens)
}
func TestResponsesEventToChatChunks_ResponseDone(t *testing.T) {
state := NewResponsesEventToChatState()
state.Model = "gpt-4o"

View File

@ -81,19 +81,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
FinishReason: finishReason,
}}
if resp.Usage != nil {
usage := &ChatUsage{
PromptTokens: resp.Usage.InputTokens,
CompletionTokens: resp.Usage.OutputTokens,
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
}
if resp.Usage.InputTokensDetails != nil && resp.Usage.InputTokensDetails.CachedTokens > 0 {
usage.PromptTokensDetails = &ChatTokenDetails{
CachedTokens: resp.Usage.InputTokensDetails.CachedTokens,
}
}
out.Usage = usage
}
out.Usage = chatUsageFromResponsesUsage(resp.Usage)
return out
}
@ -341,14 +329,48 @@ func chatUsageFromResponsesUsage(u *ResponsesUsage) *ChatUsage {
CompletionTokens: u.OutputTokens,
TotalTokens: u.InputTokens + u.OutputTokens,
}
if u.InputTokensDetails != nil && u.InputTokensDetails.CachedTokens > 0 {
usage.PromptTokensDetails = &ChatTokenDetails{
CachedTokens: u.InputTokensDetails.CachedTokens,
}
}
usage.PromptTokensDetails = promptDetailsFromResponses(u.InputTokensDetails)
usage.CompletionTokensDetails = completionDetailsFromResponses(u.OutputTokensDetails)
return usage
}
// promptDetailsFromResponses maps Responses-API input_tokens_details into a
// Chat-Completions prompt_tokens_details. Returns nil when nothing would be
// emitted, so upstreams that do not break down prompt usage stay clean.
func promptDetailsFromResponses(src *ResponsesInputTokensDetails) *ChatTokenDetails {
if src == nil {
return nil
}
if src.CachedTokens == 0 && src.AudioTokens == 0 {
return nil
}
return &ChatTokenDetails{
CachedTokens: src.CachedTokens,
AudioTokens: src.AudioTokens,
}
}
// completionDetailsFromResponses maps Responses-API output_tokens_details
// into a Chat-Completions completion_tokens_details. Mirrors the OpenAI
// official CompletionUsage schema: reasoning_tokens, audio_tokens, and
// the predicted-outputs accepted/rejected counts. Returns nil when nothing
// would be emitted so non-reasoning, non-audio responses stay clean.
func completionDetailsFromResponses(src *ResponsesOutputTokensDetails) *ChatTokenDetails {
if src == nil {
return nil
}
if src.ReasoningTokens == 0 && src.AudioTokens == 0 &&
src.AcceptedPredictionTokens == 0 && src.RejectedPredictionTokens == 0 {
return nil
}
return &ChatTokenDetails{
ReasoningTokens: src.ReasoningTokens,
AudioTokens: src.AudioTokens,
AcceptedPredictionTokens: src.AcceptedPredictionTokens,
RejectedPredictionTokens: src.RejectedPredictionTokens,
}
}
func makeChatDeltaChunk(state *ResponsesEventToChatState, delta ChatDelta) ChatCompletionsChunk {
return ChatCompletionsChunk{
ID: state.ID,

View File

@ -362,11 +362,15 @@ func (u *ResponsesUsage) UnmarshalJSON(data []byte) error {
// ResponsesInputTokensDetails breaks down input token usage.
type ResponsesInputTokensDetails struct {
CachedTokens int `json:"cached_tokens,omitempty"`
AudioTokens int `json:"audio_tokens,omitempty"`
}
// ResponsesOutputTokensDetails breaks down output token usage.
type ResponsesOutputTokensDetails struct {
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
AudioTokens int `json:"audio_tokens,omitempty"`
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
}
// ---------------------------------------------------------------------------
@ -517,15 +521,27 @@ type ChatChoice struct {
// ChatUsage holds token counts in Chat Completions format.
type ChatUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"`
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"`
CompletionTokensDetails *ChatTokenDetails `json:"completion_tokens_details,omitempty"`
}
// ChatTokenDetails provides a breakdown of token usage.
// ChatTokenDetails provides a breakdown of token usage. The same type is
// reused for both prompt_tokens_details and completion_tokens_details;
// unset fields are omitted so each side only emits the fields that apply.
//
// Field set mirrors OpenAI's official CompletionUsage schema:
// - prompt_tokens_details: cached_tokens, audio_tokens
// - completion_tokens_details: reasoning_tokens, audio_tokens,
// accepted_prediction_tokens, rejected_prediction_tokens
type ChatTokenDetails struct {
CachedTokens int `json:"cached_tokens,omitempty"`
CachedTokens int `json:"cached_tokens,omitempty"`
AudioTokens int `json:"audio_tokens,omitempty"`
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
}
// ChatCompletionsChunk is a single streaming chunk from POST /v1/chat/completions.