Merge pull request #2830 from stabey/fix/anthropic-to-responses-cache-tokens

fix(apicompat): Anthropic 转 Responses 时按 OpenAI 语义汇总 input_tokens
This commit is contained in:
Wesley Liddick 2026-05-29 11:00:19 +08:00 committed by GitHub
commit 1d46be02ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 168 additions and 8 deletions

View File

@ -1597,3 +1597,139 @@ func TestAnthropicToResponses_TemperatureStrippedForAllGpt5Variants(t *testing.T
})
}
}
// ---------------------------------------------------------------------------
// AnthropicToResponsesResponse: Anthropic input_tokens excludes cached tokens
// while OpenAI Responses input_tokens is the total including cached tokens.
// ---------------------------------------------------------------------------
func TestAnthropicToResponsesResponse_CacheTokensUseOpenAIInputSemantics(t *testing.T) {
resp := &AnthropicResponse{
ID: "msg_cache",
Model: "claude-sonnet-4-5-20250929",
Content: []AnthropicContentBlock{
{Type: "text", Text: "ok"},
},
StopReason: "end_turn",
Usage: AnthropicUsage{
InputTokens: 3318,
OutputTokens: 123,
CacheReadInputTokens: 50688,
CacheCreationInputTokens: 200,
},
}
out := AnthropicToResponsesResponse(resp)
require.NotNil(t, out.Usage)
// 3318 (uncached) + 50688 (read) + 200 (creation) = 54206
assert.Equal(t, 54206, out.Usage.InputTokens)
assert.Equal(t, 123, out.Usage.OutputTokens)
assert.Equal(t, 54329, out.Usage.TotalTokens)
require.NotNil(t, out.Usage.InputTokensDetails)
assert.Equal(t, 50688, out.Usage.InputTokensDetails.CachedTokens)
}
func TestAnthropicToResponsesResponse_NoCacheTokens(t *testing.T) {
resp := &AnthropicResponse{
ID: "msg_nocache",
Model: "claude-sonnet-4-5-20250929",
Content: []AnthropicContentBlock{
{Type: "text", Text: "ok"},
},
StopReason: "end_turn",
Usage: AnthropicUsage{
InputTokens: 100,
OutputTokens: 50,
},
}
out := AnthropicToResponsesResponse(resp)
require.NotNil(t, out.Usage)
assert.Equal(t, 100, out.Usage.InputTokens)
assert.Equal(t, 50, out.Usage.OutputTokens)
assert.Equal(t, 150, out.Usage.TotalTokens)
assert.Nil(t, out.Usage.InputTokensDetails)
}
func TestAnthropicEventToResponses_CacheTokensRoundTripFromMessageStart(t *testing.T) {
state := NewAnthropicEventToResponsesState()
// message_start carries cache fields on the initial Usage object.
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_start",
Message: &AnthropicResponse{
ID: "msg_stream_cache",
Model: "claude-sonnet-4-5-20250929",
Usage: AnthropicUsage{
InputTokens: 12,
CacheReadInputTokens: 9,
CacheCreationInputTokens: 3,
},
},
}, state)
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_delta",
Usage: &AnthropicUsage{
OutputTokens: 7,
},
}, state)
events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)
// The terminal response.completed event must include OpenAI-semantic usage.
var completed *ResponsesStreamEvent
for i := range events {
if events[i].Type == "response.completed" {
completed = &events[i]
}
}
require.NotNil(t, completed, "response.completed event must be emitted")
require.NotNil(t, completed.Response)
require.NotNil(t, completed.Response.Usage)
// 12 (uncached) + 9 (read) + 3 (creation) = 24
assert.Equal(t, 24, completed.Response.Usage.InputTokens)
assert.Equal(t, 7, completed.Response.Usage.OutputTokens)
assert.Equal(t, 31, completed.Response.Usage.TotalTokens)
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
assert.Equal(t, 9, completed.Response.Usage.InputTokensDetails.CachedTokens)
}
func TestAnthropicEventToResponses_CacheTokensFromMessageDelta(t *testing.T) {
state := NewAnthropicEventToResponsesState()
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_start",
Message: &AnthropicResponse{
ID: "msg_delta_cache",
Model: "claude-sonnet-4-5-20250929",
Usage: AnthropicUsage{InputTokens: 20},
},
}, state)
// Some upstreams only emit cache fields on the final message_delta.
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
Type: "message_delta",
Usage: &AnthropicUsage{
OutputTokens: 8,
CacheReadInputTokens: 11,
CacheCreationInputTokens: 4,
},
}, state)
events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)
var completed *ResponsesStreamEvent
for i := range events {
if events[i].Type == "response.completed" {
completed = &events[i]
}
}
require.NotNil(t, completed)
require.NotNil(t, completed.Response.Usage)
// 20 (uncached) + 11 (read) + 4 (creation) = 35
assert.Equal(t, 35, completed.Response.Usage.InputTokens)
assert.Equal(t, 8, completed.Response.Usage.OutputTokens)
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
assert.Equal(t, 11, completed.Response.Usage.InputTokensDetails.CachedTokens)
}

View File

@ -95,10 +95,16 @@ func AnthropicToResponsesResponse(resp *AnthropicResponse) *ResponsesResponse {
}
// Usage
// Anthropic's input_tokens excludes cache_read/cache_creation, while OpenAI
// Responses' input_tokens is the total including cached tokens. Add them back
// when converting so downstream consumers see OpenAI semantics.
totalInputTokens := resp.Usage.InputTokens +
resp.Usage.CacheReadInputTokens +
resp.Usage.CacheCreationInputTokens
out.Usage = &ResponsesUsage{
InputTokens: resp.Usage.InputTokens,
InputTokens: totalInputTokens,
OutputTokens: resp.Usage.OutputTokens,
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
TotalTokens: totalInputTokens + resp.Usage.OutputTokens,
}
if resp.Usage.CacheReadInputTokens > 0 {
out.Usage.InputTokensDetails = &ResponsesInputTokensDetails{
@ -150,10 +156,13 @@ type AnthropicEventToResponsesState struct {
CurrentCallID string
CurrentName string
// Usage from message_delta
InputTokens int
OutputTokens int
CacheReadInputTokens int
// Usage from message_start / message_delta. InputTokens here follows
// Anthropic semantics (excludes cached tokens); they are added back when
// emitting the OpenAI Responses usage.
InputTokens int
OutputTokens int
CacheReadInputTokens int
CacheCreationInputTokens int
}
// NewAnthropicEventToResponsesState returns an initialised stream state.
@ -225,6 +234,12 @@ func anthToResHandleMessageStart(evt *AnthropicStreamEvent, state *AnthropicEven
if evt.Message.Usage.InputTokens > 0 {
state.InputTokens = evt.Message.Usage.InputTokens
}
if evt.Message.Usage.CacheReadInputTokens > 0 {
state.CacheReadInputTokens = evt.Message.Usage.CacheReadInputTokens
}
if evt.Message.Usage.CacheCreationInputTokens > 0 {
state.CacheCreationInputTokens = evt.Message.Usage.CacheCreationInputTokens
}
}
if state.CreatedSent {
@ -392,9 +407,15 @@ func anthToResHandleMessageDelta(evt *AnthropicStreamEvent, state *AnthropicEven
// Update usage
if evt.Usage != nil {
state.OutputTokens = evt.Usage.OutputTokens
if evt.Usage.InputTokens > 0 {
state.InputTokens = evt.Usage.InputTokens
}
if evt.Usage.CacheReadInputTokens > 0 {
state.CacheReadInputTokens = evt.Usage.CacheReadInputTokens
}
if evt.Usage.CacheCreationInputTokens > 0 {
state.CacheCreationInputTokens = evt.Usage.CacheCreationInputTokens
}
}
return nil
@ -472,10 +493,13 @@ func makeResponsesCompletedEvent(
seq := state.SequenceNumber
state.SequenceNumber++
// Anthropic's input_tokens excludes cache_read/cache_creation; add them
// back to match OpenAI Responses semantics where input_tokens is the total.
totalInputTokens := state.InputTokens + state.CacheReadInputTokens + state.CacheCreationInputTokens
usage := &ResponsesUsage{
InputTokens: state.InputTokens,
InputTokens: totalInputTokens,
OutputTokens: state.OutputTokens,
TotalTokens: state.InputTokens + state.OutputTokens,
TotalTokens: totalInputTokens + state.OutputTokens,
}
if state.CacheReadInputTokens > 0 {
usage.InputTokensDetails = &ResponsesInputTokensDetails{