sub2api/backend/internal/service/rpm_token_bucket_service_test.go
win 95814974de feat(rpm): add token bucket smoothing for RPM rate limiting
- New RPMTokenBucketService: per-account continuous-refill token buckets
  (rate = rpm/60 tokens/sec, capacity = rpm). No new dependencies.
- GatewayService.AcquireRPMToken() delegates to the bucket service.
- Gateway handler inserts RPM token wait BEFORE wrapReleaseOnDone in both
  Gemini and Anthropic dispatch paths; timeout returns 429 and releases slot.
- Config: gateway.rpm_smoothing.enabled (default false) + max_wait_ms (default 5000).
- 7 unit tests covering: immediate acquire, zero RPM, timeout, wait+refill,
  context cancel, account isolation, bucket reset on RPM change.
2026-04-29 01:22:54 +08:00

109 lines
3.6 KiB
Go

package service
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRPMTokenBucket_ImmediateAcquireWhenFull(t *testing.T) {
svc := NewRPMTokenBucketService()
ctx := context.Background()
// Bucket starts full (rpm=60 tokens). First 60 calls should succeed immediately.
for i := 0; i < 60; i++ {
err := svc.AcquireWithWait(ctx, 1, 60, 0)
require.NoError(t, err, "call %d should succeed immediately", i+1)
}
}
func TestRPMTokenBucket_ZeroRPMAlwaysOK(t *testing.T) {
svc := NewRPMTokenBucketService()
err := svc.AcquireWithWait(context.Background(), 42, 0, 0)
assert.NoError(t, err)
}
func TestRPMTokenBucket_TimeoutWhenExhausted(t *testing.T) {
svc := NewRPMTokenBucketService()
ctx := context.Background()
// rpm=1 → 1 token/minute. One call drains the bucket.
err := svc.AcquireWithWait(ctx, 99, 1, 5*time.Second)
require.NoError(t, err, "first call should succeed")
// Second call: bucket empty, wait time ≈ 60s which exceeds maxWait=50ms.
start := time.Now()
err = svc.AcquireWithWait(ctx, 99, 1, 50*time.Millisecond)
elapsed := time.Since(start)
assert.ErrorIs(t, err, ErrRPMWaitTimeout)
assert.Less(t, elapsed, 200*time.Millisecond, "should timeout quickly, not block")
}
func TestRPMTokenBucket_WaitsAndSucceeds(t *testing.T) {
svc := NewRPMTokenBucketService()
ctx := context.Background()
// rpm=120 → refill rate = 2 tokens/second. Drain the bucket fully.
for i := 0; i < 120; i++ {
require.NoError(t, svc.AcquireWithWait(ctx, 7, 120, 0))
}
// Next call needs to wait ~500ms for the next token. Give it 2s.
start := time.Now()
err := svc.AcquireWithWait(ctx, 7, 120, 2*time.Second)
elapsed := time.Since(start)
require.NoError(t, err, "should succeed after waiting for refill")
assert.Greater(t, elapsed, 100*time.Millisecond, "should have actually waited")
assert.Less(t, elapsed, 1500*time.Millisecond, "should not wait excessively long")
}
func TestRPMTokenBucket_ContextCancellation(t *testing.T) {
svc := NewRPMTokenBucketService()
// rpm=120 → refill = 2 tokens/second → next token in ~500ms after draining.
// maxWait = 2s (longer than 500ms refill wait) so the code blocks in time.After(~500ms).
// Context is cancelled after 30ms, which is shorter than the 500ms wait, so ctx.Done fires first.
for i := 0; i < 120; i++ {
require.NoError(t, svc.AcquireWithWait(context.Background(), 55, 120, 0))
}
ctx, cancel := context.WithCancel(context.Background())
go func() {
time.Sleep(30 * time.Millisecond)
cancel()
}()
start := time.Now()
err := svc.AcquireWithWait(ctx, 55, 120, 2*time.Second)
elapsed := time.Since(start)
assert.ErrorIs(t, err, context.Canceled)
assert.Less(t, elapsed, 200*time.Millisecond, "should respect context cancellation promptly")
}
func TestRPMTokenBucket_DifferentAccountsAreIsolated(t *testing.T) {
svc := NewRPMTokenBucketService()
ctx := context.Background()
// Drain account 1 (rpm=1).
require.NoError(t, svc.AcquireWithWait(ctx, 1, 1, 0))
// Account 2 has its own bucket and should succeed immediately.
err := svc.AcquireWithWait(ctx, 2, 1, 0)
assert.NoError(t, err, "different account should have an independent bucket")
}
func TestRPMTokenBucket_RPMChangeReplacesBucket(t *testing.T) {
svc := NewRPMTokenBucketService()
ctx := context.Background()
// Create bucket with rpm=1 and drain it.
require.NoError(t, svc.AcquireWithWait(ctx, 10, 1, 0))
// Bucket now empty with rpm=1.
// Changing RPM to 60 should reset the bucket to full (60 tokens).
err := svc.AcquireWithWait(ctx, 10, 60, 0)
assert.NoError(t, err, "new RPM should cause bucket recreation")
}