From b9509e823a779a85fab5b6854fce6790cab0216b Mon Sep 17 00:00:00 2001
From: SlientRainyDay <duangthef1rst@gmail.com>
Date: Wed, 27 May 2026 07:09:28 +0000
Subject: [PATCH] fix(billing): apply long-context multiplier to cache_read
 price
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When session long-context pricing is triggered in computeTokenBreakdown
(e.g. GPT-5.4 / GPT-5.5 above the 272k token threshold), the multiplier
was only being applied to InputPricePerToken and OutputPricePerToken.
The cache_read price was left at its base value, so CacheReadCost was
silently undercharged whenever a long-context session also had cache
hits — which is essentially every long Codex / Claude Code session.

Concretely for gpt-5.4 with 300k cache_read tokens, the bug
under-billed the request by exactly 1x the LongContextInputMultiplier
on the cache portion (e.g. 0.075 instead of 0.150 in the regression
test).

Cache reads are conceptually input-side replays, so they should scale
with LongContextInputMultiplier, matching the treatment of
InputPricePerToken.

Adds two regression tests:
- positive: long-context triggered -> cache_read scaled by 2.0x
- negative: below threshold -> cache_read stays at base price

Fixes #2293

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 backend/internal/service/billing_service.go   |  3 ++
 .../internal/service/billing_service_test.go  | 49 +++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/backend/internal/service/billing_service.go b/backend/internal/service/billing_service.go
index 373502cf..de68b755 100644
--- a/backend/internal/service/billing_service.go
+++ b/backend/internal/service/billing_service.go
@@ -535,6 +535,9 @@ func (s *BillingService) computeTokenBreakdown(
 	if applyLongCtx && s.shouldApplySessionLongContextPricing(tokens, pricing) {
 		inputPrice *= pricing.LongContextInputMultiplier
 		outputPrice *= pricing.LongContextOutputMultiplier
+		// 缓存读取本质上是输入侧的复用，应与 input 一同应用长上下文倍率；
+		// 否则 cache hit 越多，少计的费用越多（见 #2293）。
+		cacheReadPrice *= pricing.LongContextInputMultiplier
 	}
 
 	bd := &CostBreakdown{}
diff --git a/backend/internal/service/billing_service_test.go b/backend/internal/service/billing_service_test.go
index df3e3a0a..73677526 100644
--- a/backend/internal/service/billing_service_test.go
+++ b/backend/internal/service/billing_service_test.go
@@ -197,6 +197,55 @@ func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *t
 	require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10)
 }
 
+// 回归测试 #2293：长上下文计费触发时，cache_read_tokens 也应应用 LongContextInputMultiplier。
+// 修复前：CacheReadCost = tokens * 0.25e-6 （漏乘倍率，少计费用）。
+// 修复后：CacheReadCost = tokens * 0.25e-6 * LongContextInputMultiplier(=2.0)。
+func TestCalculateCost_OpenAIGPT54LongContextAppliesMultiplierToCacheRead(t *testing.T) {
+	svc := newTestBillingService()
+
+	// InputTokens + CacheReadTokens = 1000 + 300000 = 301000 > 272000 阈值
+	tokens := UsageTokens{
+		InputTokens:     1000,
+		CacheReadTokens: 300000,
+		OutputTokens:    1000,
+	}
+
+	cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
+	require.NoError(t, err)
+
+	expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0
+	expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5
+	expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6 * 2.0
+
+	require.InDelta(t, expectedInput, cost.InputCost, 1e-10)
+	require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10)
+	require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10,
+		"cache_read_cost should be scaled by LongContextInputMultiplier when long-context pricing applies (issue #2293)")
+
+	expectedTotal := expectedInput + expectedOutput + expectedCacheRead
+	require.InDelta(t, expectedTotal, cost.TotalCost, 1e-10)
+	require.InDelta(t, expectedTotal, cost.ActualCost, 1e-10)
+}
+
+// 阴性测试：未触发长上下文时，cache_read_price 不应被错误地乘以倍率。
+func TestCalculateCost_OpenAIGPT54NoLongContextKeepsCacheReadAtBasePrice(t *testing.T) {
+	svc := newTestBillingService()
+
+	// InputTokens + CacheReadTokens = 1000 + 100000 = 101000 < 272000 阈值，不触发长上下文
+	tokens := UsageTokens{
+		InputTokens:     1000,
+		CacheReadTokens: 100000,
+		OutputTokens:    1000,
+	}
+
+	cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
+	require.NoError(t, err)
+
+	expectedCacheRead := float64(tokens.CacheReadTokens) * 0.25e-6
+	require.InDelta(t, expectedCacheRead, cost.CacheReadCost, 1e-10,
+		"cache_read_cost should remain at base price when below long-context threshold")
+}
+
 func TestGetFallbackPricing_FamilyMatching(t *testing.T) {
 	svc := newTestBillingService()