From b9509e823a779a85fab5b6854fce6790cab0216b Mon Sep 17 00:00:00 2001 From: SlientRainyDay Date: Wed, 27 May 2026 07:09:28 +0000 Subject: [PATCH] fix(billing): apply long-context multiplier to cache_read price MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When session long-context pricing is triggered in computeTokenBreakdown (e.g. GPT-5.4 / GPT-5.5 above the 272k token threshold), the multiplier was only being applied to InputPricePerToken and OutputPricePerToken. The cache_read price was left at its base value, so CacheReadCost was silently undercharged whenever a long-context session also had cache hits — which is essentially every long Codex / Claude Code session. Concretely for gpt-5.4 with 300k cache_read tokens, the bug under-billed the request by exactly 1x the LongContextInputMultiplier on the cache portion (e.g. 0.075 instead of 0.150 in the regression test). Cache reads are conceptually input-side replays, so they should scale with LongContextInputMultiplier, matching the treatment of InputPricePerToken. Adds two regression tests: - positive: long-context triggered -> cache_read scaled by 2.0x - negative: below threshold -> cache_read stays at base price Fixes #2293 Co-authored-by: Cursor --- backend/internal/service/billing_service.go | 3 ++ .../internal/service/billing_service_test.go | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/backend/internal/service/billing_service.go b/backend/internal/service/billing_service.go index 373502cf..de68b755 100644 --- a/backend/internal/service/billing_service.go +++ b/backend/internal/service/billing_service.go @@ -535,6 +535,9 @@ func (s *BillingService) computeTokenBreakdown( if applyLongCtx && s.shouldApplySessionLongContextPricing(tokens, pricing) { inputPrice *= pricing.LongContextInputMultiplier outputPrice *= pricing.LongContextOutputMultiplier + // 缓存读取本质上是输入侧的复用,应与 input 一同应用长上下文倍率; + // 否则 cache hit 越多,少计的费用越多(见 #2293)。 + cacheReadPrice *= pricing.LongContextInputMultiplier } bd := &CostBreakdown{} diff --git a/backend/internal/service/billing_service_test.go b/backend/internal/service/billing_service_test.go index df3e3a0a..73677526 100644 --- a/backend/internal/service/billing_service_test.go +++ b/backend/internal/service/billing_service_test.go @@ -197,6 +197,55 @@ func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *t require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10) } +// 回归测试 #2293:长上下文计费触发时,cache_read_tokens 也应应用 LongContextInputMultiplier。 +// 修复前:CacheReadCost = tokens * 0.25e-6 (漏乘倍率,少计费用)。 +// 修复后:CacheReadCost = tokens * 0.25e-6 * LongContextInputMultiplier(=2.0)。 +func TestCalculateCost_OpenAIGPT54LongContextAppliesMultiplierToCacheRead(t *testing.T) { + svc := newTestBillingService() + + // InputTokens + CacheReadTokens = 1000 + 300000 = 301000 > 272000 阈值 + tokens := UsageTokens{ + InputTokens: 1000, + CacheReadTokens: 300000, + OutputTokens: 1000, + } + + cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0) + require.NoError(t, err) + + expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0 + expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5 + expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6 * 2.0 + + require.InDelta(t, expectedInput, cost.InputCost, 1e-10) + require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10) + require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10, + "cache_read_cost should be scaled by LongContextInputMultiplier when long-context pricing applies (issue #2293)") + + expectedTotal := expectedInput + expectedOutput + expectedCacheRead + require.InDelta(t, expectedTotal, cost.TotalCost, 1e-10) + require.InDelta(t, expectedTotal, cost.ActualCost, 1e-10) +} + +// 阴性测试:未触发长上下文时,cache_read_price 不应被错误地乘以倍率。 +func TestCalculateCost_OpenAIGPT54NoLongContextKeepsCacheReadAtBasePrice(t *testing.T) { + svc := newTestBillingService() + + // InputTokens + CacheReadTokens = 1000 + 100000 = 101000 < 272000 阈值,不触发长上下文 + tokens := UsageTokens{ + InputTokens: 1000, + CacheReadTokens: 100000, + OutputTokens: 1000, + } + + cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0) + require.NoError(t, err) + + expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6 + require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10, + "cache_read_cost should remain at base price when below long-context threshold") +} + func TestGetFallbackPricing_FamilyMatching(t *testing.T) { svc := newTestBillingService()