fix(frontend): correct Cache Hit Rate denominator to include all prompt tokens

Cache Hit Rate was calculated as cache_read / (cache_read + cache_creation),
which always yields 100% for OpenAI models since cache_creation is never
reported by the OpenAI API. The denominator should include all prompt tokens
(input_tokens + cache_read_tokens + cache_creation_tokens) so the rate
reflects the actual percentage of input tokens served from cache.

Fixes #2291
This commit is contained in:
Xch13 2026-05-22 11:45:33 +08:00
parent 16793d3af0
commit 6ba20acd66
2 changed files with 122 additions and 2 deletions

View File

@ -109,8 +109,8 @@ const chartData = computed(() => {
{
label: 'Cache Hit Rate',
data: props.trendData.map((d) => {
const total = d.cache_read_tokens + d.cache_creation_tokens
return total > 0 ? (d.cache_read_tokens / total) * 100 : 0
const totalPromptTokens = d.input_tokens + d.cache_read_tokens + d.cache_creation_tokens
return totalPromptTokens > 0 ? (d.cache_read_tokens / totalPromptTokens) * 100 : 0
}),
borderColor: chartColors.value.cacheHitRate,
backgroundColor: `${chartColors.value.cacheHitRate}20`,

View File

@ -0,0 +1,120 @@
import { describe, expect, it, vi } from 'vitest'
import { mount } from '@vue/test-utils'
import TokenUsageTrend from '../TokenUsageTrend.vue'
const messages: Record<string, string> = {
'admin.dashboard.tokenUsageTrend': 'Token Usage Trend',
'admin.dashboard.noDataAvailable': 'No data available',
}
vi.mock('vue-i18n', async () => {
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
return {
...actual,
useI18n: () => ({
t: (key: string) => messages[key] ?? key,
}),
}
})
vi.mock('vue-chartjs', () => ({
Line: {
props: ['data', 'options'],
template: '<div class="chart-data">{{ JSON.stringify(data) }}</div>',
},
}))
describe('TokenUsageTrend', () => {
it('calculates cache hit rate against all prompt tokens', () => {
const wrapper = mount(TokenUsageTrend, {
props: {
trendData: [
{
date: '2026-05-08',
requests: 1,
input_tokens: 500,
output_tokens: 100,
cache_creation_tokens: 0,
cache_read_tokens: 1500,
cost: 0.01,
actual_cost: 0.005,
},
],
},
global: {
stubs: {
LoadingSpinner: true,
},
},
})
const chartData = JSON.parse(wrapper.find('.chart-data').text())
const hitRateDataset = chartData.datasets.find(
(ds: any) => ds.label === 'Cache Hit Rate'
)
// Hit rate = 1500 / (500 + 1500 + 0) * 100 = 75%
expect(hitRateDataset.data[0]).toBe(75)
})
it('returns 0 hit rate when all prompt tokens are zero', () => {
const wrapper = mount(TokenUsageTrend, {
props: {
trendData: [
{
date: '2026-05-08',
requests: 0,
input_tokens: 0,
output_tokens: 0,
cache_creation_tokens: 0,
cache_read_tokens: 0,
cost: 0,
actual_cost: 0,
},
],
},
global: {
stubs: {
LoadingSpinner: true,
},
},
})
const chartData = JSON.parse(wrapper.find('.chart-data').text())
const hitRateDataset = chartData.datasets.find(
(ds: any) => ds.label === 'Cache Hit Rate'
)
expect(hitRateDataset.data[0]).toBe(0)
})
it('includes cache_creation_tokens in denominator for Anthropic models', () => {
const wrapper = mount(TokenUsageTrend, {
props: {
trendData: [
{
date: '2026-05-08',
requests: 1,
input_tokens: 200,
output_tokens: 50,
cache_creation_tokens: 300,
cache_read_tokens: 500,
cost: 0.02,
actual_cost: 0.01,
},
],
},
global: {
stubs: {
LoadingSpinner: true,
},
},
})
const chartData = JSON.parse(wrapper.find('.chart-data').text())
const hitRateDataset = chartData.datasets.find(
(ds: any) => ds.label === 'Cache Hit Rate'
)
// Hit rate = 500 / (200 + 500 + 300) * 100 = 50%
expect(hitRateDataset.data[0]).toBe(50)
})
})