Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
2cdb896
feat(hosted keys): Implement serper hosted key
Feb 13, 2026
3e6527a
Handle required fields correctly for hosted keys
Feb 13, 2026
e5c8aec
Add rate limiting (3 tries, exponential backoff)
Feb 13, 2026
8a78f80
Add custom pricing, switch to exa as first hosted key
Feb 13, 2026
d174a6a
Add telemetry
Feb 13, 2026
c12e92c
Consolidate byok type definitions
Feb 13, 2026
2a36143
Add warning comment if default calculation is used
Feb 13, 2026
36e6464
Record usage to user stats table
Feb 13, 2026
f237d6f
Fix unit tests, use cost property
Feb 13, 2026
0a002fd
Include more metadata in cost output
Feb 13, 2026
36d49ef
Fix disabled tests
Feb 13, 2026
fbd1cdf
Fix spacing
Feb 14, 2026
dc4c611
Fix lint
Feb 14, 2026
68da290
Move knowledge cost restructuring away from generic block handler
Feb 16, 2026
ce02a30
Migrate knowledge unit tests
Feb 16, 2026
e6d98c6
Lint
Feb 16, 2026
ecdbe29
Fix broken tests
Mar 5, 2026
2325535
Merge branch 'staging' into feat/sim-provided-key
Mar 5, 2026
693a3d3
Add user based hosted key throttling
Mar 5, 2026
242d6e0
Refactor hosted key handling. Add optimistic handling of throttling f…
Mar 5, 2026
7b8e24e
Remove research as hosted key. Recommend BYOK if throtttling occurs
Mar 5, 2026
cd160d3
Make adding api keys adjustable via env vars
Mar 6, 2026
2082bc4
Remove vestigial fields from research
Mar 6, 2026
a90777a
Make billing actor id required for throttling
Mar 6, 2026
d7ea0af
Switch to round robin for api key distribution
Mar 6, 2026
1c5425e
Add helper method for adding hosted key cost
Mar 6, 2026
3832e5c
Strip leading double underscores to avoid breaking change
Mar 6, 2026
34cffdc
Lint fix
Mar 6, 2026
612ea7c
Remove falsy check in favor for explicit null check
Mar 6, 2026
a0fc749
Add more detailed metrics for different throttling types
Mar 6, 2026
5d04ae5
Fix _costDollars field
Mar 6, 2026
8eaf401
Handle hosted agent tool calls
Mar 7, 2026
ee2e123
Fail loudly if cost field isn't found
Mar 7, 2026
09a1b5c
Remove any type
Mar 7, 2026
0836131
Fix type error
Mar 7, 2026
427627a
Fix lint
Mar 7, 2026
d29d613
Fix usage log double logging data
Mar 7, 2026
3e94ce3
Fix test
Mar 7, 2026
1ccaae6
Add browseruse hosted key
Mar 6, 2026
74f0191
Add firecrawl and serper hosted keys
Mar 6, 2026
158d523
feat(hosted key): Add exa hosted key (#3221)
TheodoreSpeaks Mar 7, 2026
8137357
Fail fast on cost data not being found
Mar 7, 2026
b96074c
Add hosted key for google services
Mar 7, 2026
0b6c8a9
Add hosting configuration and pricing logic for ElevenLabs TTS tools
Mar 7, 2026
6c9bd07
Add linkup hosted key
Mar 7, 2026
945f7ea
Add jina hosted key
Mar 7, 2026
ce602ce
Add hugging face hosted key
Mar 7, 2026
ed1a142
Add perplexity hosting
Mar 7, 2026
e07cfe2
Add broader metrics for throttling
Mar 7, 2026
8d18eee
Add skill for adding hosted key
Mar 7, 2026
1ac08e5
Merge branch 'staging' into feat/hosted-key-agent
Mar 7, 2026
d7a124a
Lint, remove vestigial hosted keys not implemented
Mar 7, 2026
2280b47
Revert agent changes
Mar 7, 2026
af9d64a
fail fast
Mar 7, 2026
c1b729f
Fix build issue
Mar 7, 2026
4ee4e98
Fix build issues
Mar 7, 2026
829b8d4
Fix type error
Mar 7, 2026
8829ac3
Remove byok types that aren't implemented
Mar 7, 2026
2cccfdd
Address feedback
Mar 7, 2026
05eccf1
Use default model when model id isn't provided
Mar 7, 2026
4b073a6
Fix cost default issues
Mar 7, 2026
540aa18
Remove firecrawl error suppression
Mar 7, 2026
9f676bc
Restore original behavior for hugging face
Mar 7, 2026
a463ebc
Add mistral hosted key
Mar 9, 2026
824b602
Merge branch 'feat/mothership-copilot' into feat/hosted-key-agent
Mar 10, 2026
2743063
Merge feat/mothership-copilot into feat/hosted-key-agent (prefer ours)
Mar 10, 2026
d5120b0
Remove hugging face hosted key
Mar 10, 2026
594a800
Fix pricing mismatch is mistral and perplexity
Mar 10, 2026
2293153
Add hosted keys for parallel and brand fetch
Mar 10, 2026
32c791b
Add brandfetch hosted key
Mar 10, 2026
1ea8f83
Update types
Mar 10, 2026
87f6070
Change byok name to parallel_ai
Mar 10, 2026
bfa96d8
Add telemetry on unknown models
Mar 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add user based hosted key throttling
  • Loading branch information
Theodore Li committed Mar 5, 2026
commit 693a3d3ff8033943daa45db0e1e3addaa3cbbb7c
16 changes: 16 additions & 0 deletions apps/sim/lib/core/hosted-key-throttler/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export {
getHostedKeyThrottler,
HostedKeyThrottler,
resetHostedKeyThrottler,
} from './throttler'
export {
DEFAULT_BURST_MULTIPLIER,
THROTTLE_WINDOW_MS,
toTokenBucketConfig,
type AcquireKeyResult,
type CustomThrottle,
type PerRequestThrottle,
type ThrottleConfig,
type ThrottleDimension,
type ThrottleMode,
} from './types'
132 changes: 132 additions & 0 deletions apps/sim/lib/core/hosted-key-throttler/throttler.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { loggerMock } from '@sim/testing'
import { afterEach, beforeEach, describe, expect, it, type Mock, vi } from 'vitest'
import { HostedKeyThrottler } from './throttler'
import type { PerRequestThrottle } from './types'
import type { ConsumeResult, RateLimitStorageAdapter } from '@/lib/core/rate-limiter/storage'

vi.mock('@sim/logger', () => loggerMock)

interface MockAdapter {
consumeTokens: Mock
getTokenStatus: Mock
resetBucket: Mock
}

const createMockAdapter = (): MockAdapter => ({
consumeTokens: vi.fn(),
getTokenStatus: vi.fn(),
resetBucket: vi.fn(),
})

describe('HostedKeyThrottler', () => {
const testProvider = 'exa'
const envKeys = ['EXA_API_KEY_1', 'EXA_API_KEY_2', 'EXA_API_KEY_3']
let mockAdapter: MockAdapter
let throttler: HostedKeyThrottler
let originalEnv: NodeJS.ProcessEnv

const perRequestThrottle: PerRequestThrottle = {
mode: 'per_request',
userRequestsPerMinute: 10,
}

beforeEach(() => {
vi.clearAllMocks()
mockAdapter = createMockAdapter()
throttler = new HostedKeyThrottler(mockAdapter as RateLimitStorageAdapter)

originalEnv = { ...process.env }
process.env.EXA_API_KEY_1 = 'test-key-1'
process.env.EXA_API_KEY_2 = 'test-key-2'
process.env.EXA_API_KEY_3 = 'test-key-3'
})

afterEach(() => {
process.env = originalEnv
})

describe('acquireKey', () => {
it('should return error when no keys are configured', async () => {
delete process.env.EXA_API_KEY_1
delete process.env.EXA_API_KEY_2
delete process.env.EXA_API_KEY_3

const result = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle)

expect(result.success).toBe(false)
expect(result.error).toContain('No hosted keys configured')
})

it('should throttle user when they exceed their rate limit', async () => {
const throttledResult: ConsumeResult = {
allowed: false,
tokensRemaining: 0,
resetAt: new Date(Date.now() + 30000),
}
mockAdapter.consumeTokens.mockResolvedValue(throttledResult)

const result = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-123')

expect(result.success).toBe(false)
expect(result.userThrottled).toBe(true)
expect(result.retryAfterMs).toBeDefined()
expect(result.error).toContain('Rate limit exceeded')
})

it('should allow user within their rate limit', async () => {
const allowedResult: ConsumeResult = {
allowed: true,
tokensRemaining: 9,
resetAt: new Date(Date.now() + 60000),
}
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)

const result = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-123')

expect(result.success).toBe(true)
expect(result.userThrottled).toBeUndefined()
expect(result.key).toBe('test-key-1')
})

it('should distribute requests across keys round-robin style', async () => {
const allowedResult: ConsumeResult = {
allowed: true,
tokensRemaining: 9,
resetAt: new Date(Date.now() + 60000),
}
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)

const r1 = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-1')
const r2 = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-2')
const r3 = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-3')
const r4 = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle, 'user-4')

expect(r1.keyIndex).toBe(0)
expect(r2.keyIndex).toBe(1)
expect(r3.keyIndex).toBe(2)
expect(r4.keyIndex).toBe(0) // Wraps back
})

it('should work without userId (no per-user throttling)', async () => {
const result = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle)

expect(result.success).toBe(true)
expect(result.key).toBe('test-key-1')
expect(mockAdapter.consumeTokens).not.toHaveBeenCalled()
})

it('should handle partial key availability', async () => {
delete process.env.EXA_API_KEY_2

const result = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle)

expect(result.success).toBe(true)
expect(result.key).toBe('test-key-1')
expect(result.envVarName).toBe('EXA_API_KEY_1')

const r2 = await throttler.acquireKey(testProvider, envKeys, perRequestThrottle)
expect(r2.keyIndex).toBe(2) // Skips missing key 1
expect(r2.envVarName).toBe('EXA_API_KEY_3')
})
})
})
202 changes: 202 additions & 0 deletions apps/sim/lib/core/hosted-key-throttler/throttler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import { createLogger } from '@sim/logger'
import {
createStorageAdapter,
type RateLimitStorageAdapter,
type TokenBucketConfig,
} from '@/lib/core/rate-limiter/storage'
import {
DEFAULT_BURST_MULTIPLIER,
THROTTLE_WINDOW_MS,
toTokenBucketConfig,
type AcquireKeyResult,
type PerRequestThrottle,
type ThrottleConfig,
} from './types'

const logger = createLogger('HostedKeyThrottler')

/** Dimension name for per-user rate limiting */
const USER_REQUESTS_DIMENSION = 'user_requests'

/**
* Information about an available hosted key
*/
interface AvailableKey {
key: string
keyIndex: number
envVarName: string
}

/**
* HostedKeyThrottler provides:
* 1. Per-user rate limiting (enforced - blocks users who exceed their limit)
* 2. Least-loaded key selection (distributes requests evenly across keys)
*/
export class HostedKeyThrottler {
private storage: RateLimitStorageAdapter
/** In-memory request counters per key: "provider:keyIndex" -> count */
private keyRequestCounts = new Map<string, number>()

constructor(storage?: RateLimitStorageAdapter) {
this.storage = storage ?? createStorageAdapter()
}

/**
* Build storage key for per-user rate limiting
*/
private buildUserStorageKey(provider: string, userId: string): string {
return `hosted:${provider}:user:${userId}:${USER_REQUESTS_DIMENSION}`
}

/**
* Get available keys from environment variables
*/
private getAvailableKeys(envKeys: string[]): AvailableKey[] {
const keys: AvailableKey[] = []
for (let i = 0; i < envKeys.length; i++) {
const envVarName = envKeys[i]
const key = process.env[envVarName]
if (key) {
keys.push({ key, keyIndex: i, envVarName })
}
}
return keys
}

/**
* Get user rate limit config from throttle config
*/
private getUserRateLimitConfig(throttle: ThrottleConfig): TokenBucketConfig | null {
if (throttle.mode !== 'per_request' || !throttle.userRequestsPerMinute) {
return null
}
return toTokenBucketConfig(
throttle.userRequestsPerMinute,
throttle.burstMultiplier ?? DEFAULT_BURST_MULTIPLIER,
THROTTLE_WINDOW_MS
)
}

/**
* Check and consume user rate limit. Returns null if allowed, or retry info if throttled.
*/
private async checkUserRateLimit(
provider: string,
userId: string,
throttle: ThrottleConfig
): Promise<{ throttled: true; retryAfterMs: number } | null> {
const config = this.getUserRateLimitConfig(throttle)
if (!config) return null

const storageKey = this.buildUserStorageKey(provider, userId)

try {
const result = await this.storage.consumeTokens(storageKey, 1, config)
if (!result.allowed) {
const retryAfterMs = Math.max(0, result.resetAt.getTime() - Date.now())
logger.info(`User ${userId} throttled for ${provider}`, {
provider,
userId,
retryAfterMs,
tokensRemaining: result.tokensRemaining,
})
return { throttled: true, retryAfterMs }
}
return null
} catch (error) {
logger.error(`Error checking user rate limit for ${provider}`, { error, userId })
return null // Allow on error
}
}

/**
* Acquire the best available key.
*
* 1. Per-user throttling (enforced): Users exceeding their limit get blocked
* 2. Least-loaded key selection: Picks the key with fewest requests
*/
async acquireKey(
provider: string,
envKeys: string[],
throttle: ThrottleConfig,
userId?: string
): Promise<AcquireKeyResult> {
if (userId && throttle.mode === 'per_request' && throttle.userRequestsPerMinute) {
const userThrottleResult = await this.checkUserRateLimit(provider, userId, throttle)
if (userThrottleResult) {
return {
success: false,
userThrottled: true,
retryAfterMs: userThrottleResult.retryAfterMs,
error: `Rate limit exceeded. Please wait ${Math.ceil(userThrottleResult.retryAfterMs / 1000)} seconds.`,
}
}
}

const availableKeys = this.getAvailableKeys(envKeys)

if (availableKeys.length === 0) {
logger.warn(`No hosted keys configured for provider ${provider}`)
return {
success: false,
error: `No hosted keys configured for ${provider}`,
}
}

// Select the key with fewest requests
let leastLoaded = availableKeys[0]
let minCount = this.getKeyCount(provider, leastLoaded.keyIndex)

for (let i = 1; i < availableKeys.length; i++) {
const count = this.getKeyCount(provider, availableKeys[i].keyIndex)
if (count < minCount) {
minCount = count
leastLoaded = availableKeys[i]
}
}

this.incrementKeyCount(provider, leastLoaded.keyIndex)

logger.debug(`Selected hosted key for ${provider}`, {
provider,
keyIndex: leastLoaded.keyIndex,
envVarName: leastLoaded.envVarName,
requestCount: minCount + 1,
})

return {
success: true,
key: leastLoaded.key,
keyIndex: leastLoaded.keyIndex,
envVarName: leastLoaded.envVarName,
}
}

private getKeyCount(provider: string, keyIndex: number): number {
return this.keyRequestCounts.get(`${provider}:${keyIndex}`) ?? 0
}

private incrementKeyCount(provider: string, keyIndex: number): void {
const key = `${provider}:${keyIndex}`
this.keyRequestCounts.set(key, (this.keyRequestCounts.get(key) ?? 0) + 1)
}
}

let cachedThrottler: HostedKeyThrottler | null = null

/**
* Get the singleton HostedKeyThrottler instance
*/
export function getHostedKeyThrottler(): HostedKeyThrottler {
if (!cachedThrottler) {
cachedThrottler = new HostedKeyThrottler()
}
return cachedThrottler
}

/**
* Reset the cached throttler (for testing)
*/
export function resetHostedKeyThrottler(): void {
cachedThrottler = null
}
Loading