fix(apicompat): Responses→Chat 转换补齐 completion_tokens_details 透传
OpenAI Responses API 在 gpt-5.x 等 reasoning 模型上会返回
output_tokens_details.reasoning_tokens, 但 ResponsesToChatCompletions
只映射了 input_tokens_details.cached_tokens, 导致客户端拿到的
chat.completion.usage 中 completion_tokens 出现无法解释的波动
(短 prompt 也可能 30+ token), 且缺失 reasoning_tokens 细分字段,
难以与 OpenAI 原生 Chat Completions 响应对账。
按 OpenAI 官方 CompletionUsage schema (openai/openai-go SDK
completion.go) 补齐所有 token-details 字段, 全部 omitempty:
prompt_tokens_details:
- cached_tokens (原已支持)
- audio_tokens (新增)
completion_tokens_details:
- reasoning_tokens (新增)
- audio_tokens (新增)
- accepted_prediction_tokens (新增)
- rejected_prediction_tokens (新增)
实现细节:
- 抽出 promptDetailsFromResponses / completionDetailsFromResponses
两个 helper, 全零字段返回 nil
- 非流路径 ResponsesToChatCompletions 复用已存在的
chatUsageFromResponsesUsage helper, 消除两条路径间的重复
- 非 reasoning / 非 audio 上游 (Anthropic, Gemini, gpt-4o) 不填这些
字段, helper 返回 nil → CompletionTokensDetails 不输出, 对现有响应
字节级兼容
新增单测:
- TestResponsesToChatCompletions_ReasoningTokens
- TestResponsesToChatCompletions_AllTokenDetailsPassThrough
- TestResponsesToChatCompletions_NoReasoningTokensWhenZero
- TestResponsesEventToChatChunks_CompletedWithReasoningTokens
This commit is contained in:
parent
89d96f4b25
commit
20f5340784
@ -663,6 +663,115 @@ func TestResponsesToChatCompletions_CachedTokens(t *testing.T) {
|
||||
assert.Equal(t, 80, chat.Usage.PromptTokensDetails.CachedTokens)
|
||||
}
|
||||
|
||||
func TestResponsesToChatCompletions_ReasoningTokens(t *testing.T) {
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_reasoning",
|
||||
Status: "completed",
|
||||
Output: []ResponsesOutput{
|
||||
{
|
||||
Type: "message",
|
||||
Content: []ResponsesContentPart{{Type: "output_text", Text: "ping"}},
|
||||
},
|
||||
},
|
||||
Usage: &ResponsesUsage{
|
||||
InputTokens: 24,
|
||||
OutputTokens: 33,
|
||||
TotalTokens: 57,
|
||||
OutputTokensDetails: &ResponsesOutputTokensDetails{
|
||||
ReasoningTokens: 32,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
chat := ResponsesToChatCompletions(resp, "gpt-5.5")
|
||||
require.NotNil(t, chat.Usage)
|
||||
assert.Equal(t, 33, chat.Usage.CompletionTokens)
|
||||
require.NotNil(t, chat.Usage.CompletionTokensDetails)
|
||||
assert.Equal(t, 32, chat.Usage.CompletionTokensDetails.ReasoningTokens)
|
||||
}
|
||||
|
||||
func TestResponsesToChatCompletions_AllTokenDetailsPassThrough(t *testing.T) {
|
||||
// Covers the full OpenAI CompletionUsage detail field set so future audio
|
||||
// and prediction-outputs responses propagate without further changes.
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_full_details",
|
||||
Status: "completed",
|
||||
Output: []ResponsesOutput{
|
||||
{
|
||||
Type: "message",
|
||||
Content: []ResponsesContentPart{{Type: "output_text", Text: "x"}},
|
||||
},
|
||||
},
|
||||
Usage: &ResponsesUsage{
|
||||
InputTokens: 100,
|
||||
OutputTokens: 50,
|
||||
TotalTokens: 150,
|
||||
InputTokensDetails: &ResponsesInputTokensDetails{
|
||||
CachedTokens: 60,
|
||||
AudioTokens: 4,
|
||||
},
|
||||
OutputTokensDetails: &ResponsesOutputTokensDetails{
|
||||
ReasoningTokens: 30,
|
||||
AudioTokens: 2,
|
||||
AcceptedPredictionTokens: 10,
|
||||
RejectedPredictionTokens: 3,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
chat := ResponsesToChatCompletions(resp, "gpt-5.5")
|
||||
require.NotNil(t, chat.Usage)
|
||||
require.NotNil(t, chat.Usage.PromptTokensDetails)
|
||||
assert.Equal(t, 60, chat.Usage.PromptTokensDetails.CachedTokens)
|
||||
assert.Equal(t, 4, chat.Usage.PromptTokensDetails.AudioTokens)
|
||||
|
||||
require.NotNil(t, chat.Usage.CompletionTokensDetails)
|
||||
assert.Equal(t, 30, chat.Usage.CompletionTokensDetails.ReasoningTokens)
|
||||
assert.Equal(t, 2, chat.Usage.CompletionTokensDetails.AudioTokens)
|
||||
assert.Equal(t, 10, chat.Usage.CompletionTokensDetails.AcceptedPredictionTokens)
|
||||
assert.Equal(t, 3, chat.Usage.CompletionTokensDetails.RejectedPredictionTokens)
|
||||
|
||||
raw, err := json.Marshal(chat.Usage)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(raw), `"prompt_tokens_details"`)
|
||||
assert.Contains(t, string(raw), `"completion_tokens_details"`)
|
||||
assert.Contains(t, string(raw), `"reasoning_tokens":30`)
|
||||
assert.Contains(t, string(raw), `"accepted_prediction_tokens":10`)
|
||||
}
|
||||
|
||||
func TestResponsesToChatCompletions_NoReasoningTokensWhenZero(t *testing.T) {
|
||||
// Non-reasoning models do not return reasoning_tokens. The mapping must
|
||||
// omit completion_tokens_details entirely rather than emitting a zero-valued
|
||||
// field, so non-reasoning responses stay clean.
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_no_reasoning",
|
||||
Status: "completed",
|
||||
Output: []ResponsesOutput{
|
||||
{
|
||||
Type: "message",
|
||||
Content: []ResponsesContentPart{{Type: "output_text", Text: "hi"}},
|
||||
},
|
||||
},
|
||||
Usage: &ResponsesUsage{
|
||||
InputTokens: 10,
|
||||
OutputTokens: 5,
|
||||
TotalTokens: 15,
|
||||
OutputTokensDetails: &ResponsesOutputTokensDetails{
|
||||
ReasoningTokens: 0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
chat := ResponsesToChatCompletions(resp, "gpt-4o")
|
||||
require.NotNil(t, chat.Usage)
|
||||
assert.Nil(t, chat.Usage.CompletionTokensDetails)
|
||||
|
||||
raw, err := json.Marshal(chat.Usage)
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, string(raw), "completion_tokens_details")
|
||||
assert.NotContains(t, string(raw), "reasoning_tokens")
|
||||
}
|
||||
|
||||
func TestResponsesToChatCompletions_WebSearch(t *testing.T) {
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_ws",
|
||||
@ -825,6 +934,32 @@ func TestResponsesEventToChatChunks_Completed(t *testing.T) {
|
||||
assert.Equal(t, 30, chunks[1].Usage.PromptTokensDetails.CachedTokens)
|
||||
}
|
||||
|
||||
func TestResponsesEventToChatChunks_CompletedWithReasoningTokens(t *testing.T) {
|
||||
state := NewResponsesEventToChatState()
|
||||
state.Model = "gpt-5.5"
|
||||
state.IncludeUsage = true
|
||||
|
||||
chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{
|
||||
Type: "response.completed",
|
||||
Response: &ResponsesResponse{
|
||||
Status: "completed",
|
||||
Usage: &ResponsesUsage{
|
||||
InputTokens: 24,
|
||||
OutputTokens: 33,
|
||||
TotalTokens: 57,
|
||||
OutputTokensDetails: &ResponsesOutputTokensDetails{
|
||||
ReasoningTokens: 32,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, state)
|
||||
require.Len(t, chunks, 2)
|
||||
|
||||
require.NotNil(t, chunks[1].Usage)
|
||||
require.NotNil(t, chunks[1].Usage.CompletionTokensDetails)
|
||||
assert.Equal(t, 32, chunks[1].Usage.CompletionTokensDetails.ReasoningTokens)
|
||||
}
|
||||
|
||||
func TestResponsesEventToChatChunks_ResponseDone(t *testing.T) {
|
||||
state := NewResponsesEventToChatState()
|
||||
state.Model = "gpt-4o"
|
||||
|
||||
@ -81,19 +81,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
|
||||
FinishReason: finishReason,
|
||||
}}
|
||||
|
||||
if resp.Usage != nil {
|
||||
usage := &ChatUsage{
|
||||
PromptTokens: resp.Usage.InputTokens,
|
||||
CompletionTokens: resp.Usage.OutputTokens,
|
||||
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
|
||||
}
|
||||
if resp.Usage.InputTokensDetails != nil && resp.Usage.InputTokensDetails.CachedTokens > 0 {
|
||||
usage.PromptTokensDetails = &ChatTokenDetails{
|
||||
CachedTokens: resp.Usage.InputTokensDetails.CachedTokens,
|
||||
}
|
||||
}
|
||||
out.Usage = usage
|
||||
}
|
||||
out.Usage = chatUsageFromResponsesUsage(resp.Usage)
|
||||
|
||||
return out
|
||||
}
|
||||
@ -341,14 +329,48 @@ func chatUsageFromResponsesUsage(u *ResponsesUsage) *ChatUsage {
|
||||
CompletionTokens: u.OutputTokens,
|
||||
TotalTokens: u.InputTokens + u.OutputTokens,
|
||||
}
|
||||
if u.InputTokensDetails != nil && u.InputTokensDetails.CachedTokens > 0 {
|
||||
usage.PromptTokensDetails = &ChatTokenDetails{
|
||||
CachedTokens: u.InputTokensDetails.CachedTokens,
|
||||
}
|
||||
}
|
||||
usage.PromptTokensDetails = promptDetailsFromResponses(u.InputTokensDetails)
|
||||
usage.CompletionTokensDetails = completionDetailsFromResponses(u.OutputTokensDetails)
|
||||
return usage
|
||||
}
|
||||
|
||||
// promptDetailsFromResponses maps Responses-API input_tokens_details into a
|
||||
// Chat-Completions prompt_tokens_details. Returns nil when nothing would be
|
||||
// emitted, so upstreams that do not break down prompt usage stay clean.
|
||||
func promptDetailsFromResponses(src *ResponsesInputTokensDetails) *ChatTokenDetails {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
if src.CachedTokens == 0 && src.AudioTokens == 0 {
|
||||
return nil
|
||||
}
|
||||
return &ChatTokenDetails{
|
||||
CachedTokens: src.CachedTokens,
|
||||
AudioTokens: src.AudioTokens,
|
||||
}
|
||||
}
|
||||
|
||||
// completionDetailsFromResponses maps Responses-API output_tokens_details
|
||||
// into a Chat-Completions completion_tokens_details. Mirrors the OpenAI
|
||||
// official CompletionUsage schema: reasoning_tokens, audio_tokens, and
|
||||
// the predicted-outputs accepted/rejected counts. Returns nil when nothing
|
||||
// would be emitted so non-reasoning, non-audio responses stay clean.
|
||||
func completionDetailsFromResponses(src *ResponsesOutputTokensDetails) *ChatTokenDetails {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
if src.ReasoningTokens == 0 && src.AudioTokens == 0 &&
|
||||
src.AcceptedPredictionTokens == 0 && src.RejectedPredictionTokens == 0 {
|
||||
return nil
|
||||
}
|
||||
return &ChatTokenDetails{
|
||||
ReasoningTokens: src.ReasoningTokens,
|
||||
AudioTokens: src.AudioTokens,
|
||||
AcceptedPredictionTokens: src.AcceptedPredictionTokens,
|
||||
RejectedPredictionTokens: src.RejectedPredictionTokens,
|
||||
}
|
||||
}
|
||||
|
||||
func makeChatDeltaChunk(state *ResponsesEventToChatState, delta ChatDelta) ChatCompletionsChunk {
|
||||
return ChatCompletionsChunk{
|
||||
ID: state.ID,
|
||||
|
||||
@ -362,11 +362,15 @@ func (u *ResponsesUsage) UnmarshalJSON(data []byte) error {
|
||||
// ResponsesInputTokensDetails breaks down input token usage.
|
||||
type ResponsesInputTokensDetails struct {
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
AudioTokens int `json:"audio_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesOutputTokensDetails breaks down output token usage.
|
||||
type ResponsesOutputTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
AudioTokens int `json:"audio_tokens,omitempty"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -517,15 +521,27 @@ type ChatChoice struct {
|
||||
|
||||
// ChatUsage holds token counts in Chat Completions format.
|
||||
type ChatUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokensDetails *ChatTokenDetails `json:"prompt_tokens_details,omitempty"`
|
||||
CompletionTokensDetails *ChatTokenDetails `json:"completion_tokens_details,omitempty"`
|
||||
}
|
||||
|
||||
// ChatTokenDetails provides a breakdown of token usage.
|
||||
// ChatTokenDetails provides a breakdown of token usage. The same type is
|
||||
// reused for both prompt_tokens_details and completion_tokens_details;
|
||||
// unset fields are omitted so each side only emits the fields that apply.
|
||||
//
|
||||
// Field set mirrors OpenAI's official CompletionUsage schema:
|
||||
// - prompt_tokens_details: cached_tokens, audio_tokens
|
||||
// - completion_tokens_details: reasoning_tokens, audio_tokens,
|
||||
// accepted_prediction_tokens, rejected_prediction_tokens
|
||||
type ChatTokenDetails struct {
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
AudioTokens int `json:"audio_tokens,omitempty"`
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ChatCompletionsChunk is a single streaming chunk from POST /v1/chat/completions.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user