Merge pull request #2816 from Pluviobyte/fix/long-context-cache-read-multiplier

fix(billing): apply long-context multiplier to cache_read price (#2293)
2026-05-27 15:59:11 +08:00 · 2026-05-27 15:59:11 +08:00 · b0142146af
commit b0142146af
parent 2387cf9934 b9509e823a
2 changed files with 52 additions and 0 deletions
--- a/backend/internal/service/billing_service.go
+++ b/backend/internal/service/billing_service.go
@ -535,6 +535,9 @@ func (s *BillingService) computeTokenBreakdown(
 	if applyLongCtx && s.shouldApplySessionLongContextPricing(tokens, pricing) {
 		inputPrice *= pricing.LongContextInputMultiplier
 		outputPrice *= pricing.LongContextOutputMultiplier
 		// 缓存读取本质上是输入侧的复用，应与 input 一同应用长上下文倍率；
 		// 否则 cache hit 越多，少计的费用越多（见 #2293）。
 		cacheReadPrice *= pricing.LongContextInputMultiplier
 	}
 	bd := &CostBreakdown{}
--- a/backend/internal/service/billing_service_test.go
+++ b/backend/internal/service/billing_service_test.go
@ -197,6 +197,55 @@ func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *t
 	require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10)
 }
 // 回归测试 #2293：长上下文计费触发时，cache_read_tokens 也应应用 LongContextInputMultiplier。
 // 修复前：CacheReadCost = tokens * 0.25e-6 （漏乘倍率，少计费用）。
 // 修复后：CacheReadCost = tokens * 0.25e-6 * LongContextInputMultiplier(=2.0)。
 func TestCalculateCost_OpenAIGPT54LongContextAppliesMultiplierToCacheRead(t *testing.T) {
 	svc := newTestBillingService()
 	// InputTokens + CacheReadTokens = 1000 + 300000 = 301000 > 272000 阈值
 	tokens := UsageTokens{
 		InputTokens:     1000,
 		CacheReadTokens: 300000,
 		OutputTokens:    1000,
 	}
 	cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
 	require.NoError(t, err)
 	expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0
 	expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5
 	expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6 * 2.0
 	require.InDelta(t, expectedInput, cost.InputCost, 1e-10)
 	require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10)
 	require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10,
 		"cache_read_cost should be scaled by LongContextInputMultiplier when long-context pricing applies (issue #2293)")
 	expectedTotal := expectedInput + expectedOutput + expectedCacheRead
 	require.InDelta(t, expectedTotal, cost.TotalCost, 1e-10)
 	require.InDelta(t, expectedTotal, cost.ActualCost, 1e-10)
 }
 // 阴性测试：未触发长上下文时，cache_read_price 不应被错误地乘以倍率。
 func TestCalculateCost_OpenAIGPT54NoLongContextKeepsCacheReadAtBasePrice(t *testing.T) {
 	svc := newTestBillingService()
 	// InputTokens + CacheReadTokens = 1000 + 100000 = 101000 < 272000 阈值，不触发长上下文
 	tokens := UsageTokens{
 		InputTokens:     1000,
 		CacheReadTokens: 100000,
 		OutputTokens:    1000,
 	}
 	cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
 	require.NoError(t, err)
 	expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6
 	require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10,
 		"cache_read_cost should remain at base price when below long-context threshold")
 }
 func TestGetFallbackPricing_FamilyMatching(t *testing.T) {
 	svc := newTestBillingService()