- New RPMTokenBucketService: per-account continuous-refill token buckets (rate = rpm/60 tokens/sec, capacity = rpm). No new dependencies. - GatewayService.AcquireRPMToken() delegates to the bucket service. - Gateway handler inserts RPM token wait BEFORE wrapReleaseOnDone in both Gemini and Anthropic dispatch paths; timeout returns 429 and releases slot. - Config: gateway.rpm_smoothing.enabled (default false) + max_wait_ms (default 5000). - 7 unit tests covering: immediate acquire, zero RPM, timeout, wait+refill, context cancel, account isolation, bucket reset on RPM change.
109 lines
3.6 KiB
Go
109 lines
3.6 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestRPMTokenBucket_ImmediateAcquireWhenFull(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
ctx := context.Background()
|
|
// Bucket starts full (rpm=60 tokens). First 60 calls should succeed immediately.
|
|
for i := 0; i < 60; i++ {
|
|
err := svc.AcquireWithWait(ctx, 1, 60, 0)
|
|
require.NoError(t, err, "call %d should succeed immediately", i+1)
|
|
}
|
|
}
|
|
|
|
func TestRPMTokenBucket_ZeroRPMAlwaysOK(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
err := svc.AcquireWithWait(context.Background(), 42, 0, 0)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRPMTokenBucket_TimeoutWhenExhausted(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
ctx := context.Background()
|
|
|
|
// rpm=1 → 1 token/minute. One call drains the bucket.
|
|
err := svc.AcquireWithWait(ctx, 99, 1, 5*time.Second)
|
|
require.NoError(t, err, "first call should succeed")
|
|
|
|
// Second call: bucket empty, wait time ≈ 60s which exceeds maxWait=50ms.
|
|
start := time.Now()
|
|
err = svc.AcquireWithWait(ctx, 99, 1, 50*time.Millisecond)
|
|
elapsed := time.Since(start)
|
|
assert.ErrorIs(t, err, ErrRPMWaitTimeout)
|
|
assert.Less(t, elapsed, 200*time.Millisecond, "should timeout quickly, not block")
|
|
}
|
|
|
|
func TestRPMTokenBucket_WaitsAndSucceeds(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
ctx := context.Background()
|
|
|
|
// rpm=120 → refill rate = 2 tokens/second. Drain the bucket fully.
|
|
for i := 0; i < 120; i++ {
|
|
require.NoError(t, svc.AcquireWithWait(ctx, 7, 120, 0))
|
|
}
|
|
|
|
// Next call needs to wait ~500ms for the next token. Give it 2s.
|
|
start := time.Now()
|
|
err := svc.AcquireWithWait(ctx, 7, 120, 2*time.Second)
|
|
elapsed := time.Since(start)
|
|
require.NoError(t, err, "should succeed after waiting for refill")
|
|
assert.Greater(t, elapsed, 100*time.Millisecond, "should have actually waited")
|
|
assert.Less(t, elapsed, 1500*time.Millisecond, "should not wait excessively long")
|
|
}
|
|
|
|
func TestRPMTokenBucket_ContextCancellation(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
|
|
// rpm=120 → refill = 2 tokens/second → next token in ~500ms after draining.
|
|
// maxWait = 2s (longer than 500ms refill wait) so the code blocks in time.After(~500ms).
|
|
// Context is cancelled after 30ms, which is shorter than the 500ms wait, so ctx.Done fires first.
|
|
for i := 0; i < 120; i++ {
|
|
require.NoError(t, svc.AcquireWithWait(context.Background(), 55, 120, 0))
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
go func() {
|
|
time.Sleep(30 * time.Millisecond)
|
|
cancel()
|
|
}()
|
|
|
|
start := time.Now()
|
|
err := svc.AcquireWithWait(ctx, 55, 120, 2*time.Second)
|
|
elapsed := time.Since(start)
|
|
assert.ErrorIs(t, err, context.Canceled)
|
|
assert.Less(t, elapsed, 200*time.Millisecond, "should respect context cancellation promptly")
|
|
}
|
|
|
|
func TestRPMTokenBucket_DifferentAccountsAreIsolated(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
ctx := context.Background()
|
|
|
|
// Drain account 1 (rpm=1).
|
|
require.NoError(t, svc.AcquireWithWait(ctx, 1, 1, 0))
|
|
|
|
// Account 2 has its own bucket and should succeed immediately.
|
|
err := svc.AcquireWithWait(ctx, 2, 1, 0)
|
|
assert.NoError(t, err, "different account should have an independent bucket")
|
|
}
|
|
|
|
func TestRPMTokenBucket_RPMChangeReplacesBucket(t *testing.T) {
|
|
svc := NewRPMTokenBucketService()
|
|
ctx := context.Background()
|
|
|
|
// Create bucket with rpm=1 and drain it.
|
|
require.NoError(t, svc.AcquireWithWait(ctx, 10, 1, 0))
|
|
// Bucket now empty with rpm=1.
|
|
|
|
// Changing RPM to 60 should reset the bucket to full (60 tokens).
|
|
err := svc.AcquireWithWait(ctx, 10, 60, 0)
|
|
assert.NoError(t, err, "new RPM should cause bucket recreation")
|
|
}
|