Merge pull request #2830 from stabey/fix/anthropic-to-responses-cache-tokens
fix(apicompat): Anthropic 转 Responses 时按 OpenAI 语义汇总 input_tokens
This commit is contained in:
commit
1d46be02ae
@ -1597,3 +1597,139 @@ func TestAnthropicToResponses_TemperatureStrippedForAllGpt5Variants(t *testing.T
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AnthropicToResponsesResponse: Anthropic input_tokens excludes cached tokens
|
||||
// while OpenAI Responses input_tokens is the total including cached tokens.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestAnthropicToResponsesResponse_CacheTokensUseOpenAIInputSemantics(t *testing.T) {
|
||||
resp := &AnthropicResponse{
|
||||
ID: "msg_cache",
|
||||
Model: "claude-sonnet-4-5-20250929",
|
||||
Content: []AnthropicContentBlock{
|
||||
{Type: "text", Text: "ok"},
|
||||
},
|
||||
StopReason: "end_turn",
|
||||
Usage: AnthropicUsage{
|
||||
InputTokens: 3318,
|
||||
OutputTokens: 123,
|
||||
CacheReadInputTokens: 50688,
|
||||
CacheCreationInputTokens: 200,
|
||||
},
|
||||
}
|
||||
|
||||
out := AnthropicToResponsesResponse(resp)
|
||||
require.NotNil(t, out.Usage)
|
||||
// 3318 (uncached) + 50688 (read) + 200 (creation) = 54206
|
||||
assert.Equal(t, 54206, out.Usage.InputTokens)
|
||||
assert.Equal(t, 123, out.Usage.OutputTokens)
|
||||
assert.Equal(t, 54329, out.Usage.TotalTokens)
|
||||
require.NotNil(t, out.Usage.InputTokensDetails)
|
||||
assert.Equal(t, 50688, out.Usage.InputTokensDetails.CachedTokens)
|
||||
}
|
||||
|
||||
func TestAnthropicToResponsesResponse_NoCacheTokens(t *testing.T) {
|
||||
resp := &AnthropicResponse{
|
||||
ID: "msg_nocache",
|
||||
Model: "claude-sonnet-4-5-20250929",
|
||||
Content: []AnthropicContentBlock{
|
||||
{Type: "text", Text: "ok"},
|
||||
},
|
||||
StopReason: "end_turn",
|
||||
Usage: AnthropicUsage{
|
||||
InputTokens: 100,
|
||||
OutputTokens: 50,
|
||||
},
|
||||
}
|
||||
|
||||
out := AnthropicToResponsesResponse(resp)
|
||||
require.NotNil(t, out.Usage)
|
||||
assert.Equal(t, 100, out.Usage.InputTokens)
|
||||
assert.Equal(t, 50, out.Usage.OutputTokens)
|
||||
assert.Equal(t, 150, out.Usage.TotalTokens)
|
||||
assert.Nil(t, out.Usage.InputTokensDetails)
|
||||
}
|
||||
|
||||
func TestAnthropicEventToResponses_CacheTokensRoundTripFromMessageStart(t *testing.T) {
|
||||
state := NewAnthropicEventToResponsesState()
|
||||
|
||||
// message_start carries cache fields on the initial Usage object.
|
||||
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
|
||||
Type: "message_start",
|
||||
Message: &AnthropicResponse{
|
||||
ID: "msg_stream_cache",
|
||||
Model: "claude-sonnet-4-5-20250929",
|
||||
Usage: AnthropicUsage{
|
||||
InputTokens: 12,
|
||||
CacheReadInputTokens: 9,
|
||||
CacheCreationInputTokens: 3,
|
||||
},
|
||||
},
|
||||
}, state)
|
||||
|
||||
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
|
||||
Type: "message_delta",
|
||||
Usage: &AnthropicUsage{
|
||||
OutputTokens: 7,
|
||||
},
|
||||
}, state)
|
||||
|
||||
events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)
|
||||
|
||||
// The terminal response.completed event must include OpenAI-semantic usage.
|
||||
var completed *ResponsesStreamEvent
|
||||
for i := range events {
|
||||
if events[i].Type == "response.completed" {
|
||||
completed = &events[i]
|
||||
}
|
||||
}
|
||||
require.NotNil(t, completed, "response.completed event must be emitted")
|
||||
require.NotNil(t, completed.Response)
|
||||
require.NotNil(t, completed.Response.Usage)
|
||||
// 12 (uncached) + 9 (read) + 3 (creation) = 24
|
||||
assert.Equal(t, 24, completed.Response.Usage.InputTokens)
|
||||
assert.Equal(t, 7, completed.Response.Usage.OutputTokens)
|
||||
assert.Equal(t, 31, completed.Response.Usage.TotalTokens)
|
||||
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
|
||||
assert.Equal(t, 9, completed.Response.Usage.InputTokensDetails.CachedTokens)
|
||||
}
|
||||
|
||||
func TestAnthropicEventToResponses_CacheTokensFromMessageDelta(t *testing.T) {
|
||||
state := NewAnthropicEventToResponsesState()
|
||||
|
||||
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
|
||||
Type: "message_start",
|
||||
Message: &AnthropicResponse{
|
||||
ID: "msg_delta_cache",
|
||||
Model: "claude-sonnet-4-5-20250929",
|
||||
Usage: AnthropicUsage{InputTokens: 20},
|
||||
},
|
||||
}, state)
|
||||
|
||||
// Some upstreams only emit cache fields on the final message_delta.
|
||||
AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
|
||||
Type: "message_delta",
|
||||
Usage: &AnthropicUsage{
|
||||
OutputTokens: 8,
|
||||
CacheReadInputTokens: 11,
|
||||
CacheCreationInputTokens: 4,
|
||||
},
|
||||
}, state)
|
||||
|
||||
events := AnthropicEventToResponsesEvents(&AnthropicStreamEvent{Type: "message_stop"}, state)
|
||||
|
||||
var completed *ResponsesStreamEvent
|
||||
for i := range events {
|
||||
if events[i].Type == "response.completed" {
|
||||
completed = &events[i]
|
||||
}
|
||||
}
|
||||
require.NotNil(t, completed)
|
||||
require.NotNil(t, completed.Response.Usage)
|
||||
// 20 (uncached) + 11 (read) + 4 (creation) = 35
|
||||
assert.Equal(t, 35, completed.Response.Usage.InputTokens)
|
||||
assert.Equal(t, 8, completed.Response.Usage.OutputTokens)
|
||||
require.NotNil(t, completed.Response.Usage.InputTokensDetails)
|
||||
assert.Equal(t, 11, completed.Response.Usage.InputTokensDetails.CachedTokens)
|
||||
}
|
||||
|
||||
@ -95,10 +95,16 @@ func AnthropicToResponsesResponse(resp *AnthropicResponse) *ResponsesResponse {
|
||||
}
|
||||
|
||||
// Usage
|
||||
// Anthropic's input_tokens excludes cache_read/cache_creation, while OpenAI
|
||||
// Responses' input_tokens is the total including cached tokens. Add them back
|
||||
// when converting so downstream consumers see OpenAI semantics.
|
||||
totalInputTokens := resp.Usage.InputTokens +
|
||||
resp.Usage.CacheReadInputTokens +
|
||||
resp.Usage.CacheCreationInputTokens
|
||||
out.Usage = &ResponsesUsage{
|
||||
InputTokens: resp.Usage.InputTokens,
|
||||
InputTokens: totalInputTokens,
|
||||
OutputTokens: resp.Usage.OutputTokens,
|
||||
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
|
||||
TotalTokens: totalInputTokens + resp.Usage.OutputTokens,
|
||||
}
|
||||
if resp.Usage.CacheReadInputTokens > 0 {
|
||||
out.Usage.InputTokensDetails = &ResponsesInputTokensDetails{
|
||||
@ -150,10 +156,13 @@ type AnthropicEventToResponsesState struct {
|
||||
CurrentCallID string
|
||||
CurrentName string
|
||||
|
||||
// Usage from message_delta
|
||||
InputTokens int
|
||||
OutputTokens int
|
||||
CacheReadInputTokens int
|
||||
// Usage from message_start / message_delta. InputTokens here follows
|
||||
// Anthropic semantics (excludes cached tokens); they are added back when
|
||||
// emitting the OpenAI Responses usage.
|
||||
InputTokens int
|
||||
OutputTokens int
|
||||
CacheReadInputTokens int
|
||||
CacheCreationInputTokens int
|
||||
}
|
||||
|
||||
// NewAnthropicEventToResponsesState returns an initialised stream state.
|
||||
@ -225,6 +234,12 @@ func anthToResHandleMessageStart(evt *AnthropicStreamEvent, state *AnthropicEven
|
||||
if evt.Message.Usage.InputTokens > 0 {
|
||||
state.InputTokens = evt.Message.Usage.InputTokens
|
||||
}
|
||||
if evt.Message.Usage.CacheReadInputTokens > 0 {
|
||||
state.CacheReadInputTokens = evt.Message.Usage.CacheReadInputTokens
|
||||
}
|
||||
if evt.Message.Usage.CacheCreationInputTokens > 0 {
|
||||
state.CacheCreationInputTokens = evt.Message.Usage.CacheCreationInputTokens
|
||||
}
|
||||
}
|
||||
|
||||
if state.CreatedSent {
|
||||
@ -392,9 +407,15 @@ func anthToResHandleMessageDelta(evt *AnthropicStreamEvent, state *AnthropicEven
|
||||
// Update usage
|
||||
if evt.Usage != nil {
|
||||
state.OutputTokens = evt.Usage.OutputTokens
|
||||
if evt.Usage.InputTokens > 0 {
|
||||
state.InputTokens = evt.Usage.InputTokens
|
||||
}
|
||||
if evt.Usage.CacheReadInputTokens > 0 {
|
||||
state.CacheReadInputTokens = evt.Usage.CacheReadInputTokens
|
||||
}
|
||||
if evt.Usage.CacheCreationInputTokens > 0 {
|
||||
state.CacheCreationInputTokens = evt.Usage.CacheCreationInputTokens
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -472,10 +493,13 @@ func makeResponsesCompletedEvent(
|
||||
seq := state.SequenceNumber
|
||||
state.SequenceNumber++
|
||||
|
||||
// Anthropic's input_tokens excludes cache_read/cache_creation; add them
|
||||
// back to match OpenAI Responses semantics where input_tokens is the total.
|
||||
totalInputTokens := state.InputTokens + state.CacheReadInputTokens + state.CacheCreationInputTokens
|
||||
usage := &ResponsesUsage{
|
||||
InputTokens: state.InputTokens,
|
||||
InputTokens: totalInputTokens,
|
||||
OutputTokens: state.OutputTokens,
|
||||
TotalTokens: state.InputTokens + state.OutputTokens,
|
||||
TotalTokens: totalInputTokens + state.OutputTokens,
|
||||
}
|
||||
if state.CacheReadInputTokens > 0 {
|
||||
usage.InputTokensDetails = &ResponsesInputTokensDetails{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user