Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 60 additions & 3 deletions apps/sim/lib/api-key/byok.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import { and, asc, eq } from 'drizzle-orm'
import { getRotatingApiKey } from '@/lib/core/config/api-keys'
import { env } from '@/lib/core/config/env'
import { isHosted } from '@/lib/core/config/env-flags'
import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
import { getHostedKeyRateLimiter } from '@/lib/core/rate-limiter'
import { decryptSecret } from '@/lib/core/security/encryption'
import { getHostedModels } from '@/providers/models'
import { getHostedModels, getProviderHosting } from '@/providers/models'
import { PROVIDER_PLACEHOLDER_KEY } from '@/providers/utils'
import { useProvidersStore } from '@/stores/providers/store'
import type { BYOKProviderId } from '@/tools/types'
Expand Down Expand Up @@ -87,12 +89,67 @@ export async function getBYOKKey(
}
}

export interface ApiKeyResolution {
apiKey: string
isBYOK: boolean
/** Env var name of the platform key used (only when a hosted-key-pool key was acquired). */
hostedKeyEnvVar?: string
}

export async function getApiKeyWithBYOK(
provider: string,
model: string,
workspaceId: string | undefined | null,
userProvidedKey?: string
): Promise<{ apiKey: string; isBYOK: boolean }> {
userProvidedKey?: string,
userId?: string | null
): Promise<ApiKeyResolution> {
// Unified hosted-key path (flag-gated). For any provider with a hosting config:
// workspace BYOK key wins, then a user-provided key (never billed via the pool),
// otherwise acquire a platform key through the shared hosted-key framework with no
// rate limiting. Mirrors tool hosted-key precedence. Falls through to the legacy
// per-provider logic when the flag is off or no platform keys are configured,
// keeping flag-off behavior identical.
if (isHosted && workspaceId) {
const hosting = getProviderHosting(provider)
if (hosting && (await isFeatureEnabled('hosted-key-llm', { userId }))) {
const byokResult = await getBYOKKey(workspaceId, hosting.byokProviderId)
if (byokResult) {
logger.info('Using BYOK key (hosted-key-llm)', { provider, model, workspaceId })
return byokResult
}

// A user-supplied key takes precedence over the platform pool — use it as-is
// and never bill it through hosted-key metrics/cost.
if (userProvidedKey) {
return { apiKey: userProvidedKey, isBYOK: false }
}

const acquired = await getHostedKeyRateLimiter().acquireKey(
hosting.byokProviderId,
hosting.envKeyPrefix,
{ mode: 'none' },
workspaceId
)
if (acquired.success && acquired.key) {
logger.info('Using hosted platform key (hosted-key-llm)', {
provider,
model,
workspaceId,
key: acquired.envVarName,
})
return {
apiKey: acquired.key,
isBYOK: false,
hostedKeyEnvVar: acquired.envVarName,
}
Comment thread
cursor[bot] marked this conversation as resolved.
}
logger.debug('No hosted platform keys configured, falling back to legacy path', {
provider,
model,
})
}
}

const isOllamaModel =
provider === 'ollama' || useProvidersStore.getState().providers.ollama.models.includes(model)
if (isOllamaModel) {
Expand Down
95 changes: 95 additions & 0 deletions apps/sim/lib/api-key/hosted-cost.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* @vitest-environment node
*/
import { beforeEach, describe, expect, it, vi } from 'vitest'

const { mockRecordUsed, mockRecordCostCharged } = vi.hoisted(() => ({
mockRecordUsed: vi.fn(),
mockRecordCostCharged: vi.fn(),
}))

vi.mock('@/lib/monitoring/metrics', () => ({
hostedKeyMetrics: {
recordUsed: mockRecordUsed,
recordCostCharged: mockRecordCostCharged,
},
}))

import {
calculateHostedCost,
classifyHostedKeyFailure,
emitHostedKeyUsage,
} from '@/lib/api-key/hosted-cost'

describe('calculateHostedCost (tool pricing)', () => {
it('per_request returns the flat fee', () => {
expect(calculateHostedCost({ type: 'per_request', cost: 0.005 }, {}, {})).toEqual({
cost: 0.005,
})
})

it('custom returns a numeric getCost result', () => {
const pricing = { type: 'custom' as const, getCost: () => 0.42 }
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 0.42 })
})

it('custom passes through a structured getCost result with metadata', () => {
const pricing = {
type: 'custom' as const,
getCost: () => ({ cost: 1.5, metadata: { units: 3 } }),
}
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 1.5, metadata: { units: 3 } })
})

it('forwards params and response to custom getCost', () => {
const getCost = vi.fn(() => 1)
const params = { a: 1 }
const response = { b: 2 }
calculateHostedCost({ type: 'custom', getCost }, params, response)
expect(getCost).toHaveBeenCalledWith(params, response)
})
})

describe('classifyHostedKeyFailure', () => {
it('classifies structured SDK errors by status', () => {
expect(classifyHostedKeyFailure({ status: 429 })).toBe('rate_limited')
expect(classifyHostedKeyFailure({ status: 503 })).toBe('rate_limited')
expect(classifyHostedKeyFailure({ status: 401 })).toBe('auth')
expect(classifyHostedKeyFailure({ status: 403, message: 'quota exceeded' })).toBe(
'rate_limited'
)
expect(classifyHostedKeyFailure({ status: 500 })).toBe('other')
})

it('classifies message-embedded status (provider errors with no .status)', () => {
// Regression: the previous `\bunauthor\b` regex never matched "Unauthorized".
expect(classifyHostedKeyFailure(new Error('Unauthorized'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('OpenAI API error (401): bad key'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('Forbidden'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('Invalid API key provided'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('API error (429): rate limit'))).toBe('rate_limited')
expect(classifyHostedKeyFailure(new Error('Internal Server Error (500)'))).toBe('other')
})
})

describe('emitHostedKeyUsage', () => {
beforeEach(() => {
vi.clearAllMocks()
})

it('records both usage and cost with the provider/tool/key labels', () => {
emitHostedKeyUsage({
provider: 'openai',
tool: 'gpt-4o',
key: 'OPENAI_API_KEY_2',
costTotal: 0.03,
})

expect(mockRecordUsed).toHaveBeenCalledWith({
provider: 'openai',
tool: 'gpt-4o',
key: 'OPENAI_API_KEY_2',
})
expect(mockRecordCostCharged).toHaveBeenCalledWith(0.03, { provider: 'openai', tool: 'gpt-4o' })
})
})
93 changes: 93 additions & 0 deletions apps/sim/lib/api-key/hosted-cost.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { hostedKeyMetrics } from '@/lib/monitoring/metrics'
import type { ToolHostingPricing } from '@/tools/types'

export interface HostedCostResult {
/** Total billable cost in dollars. */
cost: number
/** Optional metadata about the cost (e.g. provider breakdown from `custom` pricing). */
metadata?: Record<string, unknown>
}

/**
* Cost for a hosted-key **tool** call. Tools declare config-driven pricing —
* a flat `per_request` fee or a response-derived `custom` fee. LLM providers do
* NOT use this: their cost is token-based and computed directly via
* {@link import('@/providers/utils').calculateCost}.
*/
export function calculateHostedCost(
pricing: ToolHostingPricing,
params: Record<string, unknown>,
response: Record<string, unknown>
): HostedCostResult {
switch (pricing.type) {
case 'per_request':
return { cost: pricing.cost }

case 'custom': {
const result = pricing.getCost(params, response)
return typeof result === 'number' ? { cost: result } : result
}

default: {
const exhaustiveCheck: never = pricing
throw new Error(`Unknown pricing type: ${(exhaustiveCheck as ToolHostingPricing).type}`)
}
}
}

/**
* Classify a thrown error into a hosted-key failure reason for metrics. Handles
* both structured SDK errors (numeric `.status`) and provider errors that embed
* the status in the message string (e.g. `API error (401): ...`). Some providers
* signal quota/rate-limit via 401/403 + a descriptive message, so those count as
* `rate_limited`, not `auth`.
*/
export function classifyHostedKeyFailure(error: unknown): 'rate_limited' | 'auth' | 'other' {
const status = (error as { status?: number } | null)?.status
const message = ((error as { message?: string } | null)?.message ?? '').toLowerCase()

if (status === 429 || status === 503) return 'rate_limited'
if (status === 401 || status === 403) {
return message.includes('quota') || message.includes('rate limit') ? 'rate_limited' : 'auth'
}

// No structured status (e.g. provider errors that embed it in the message).
if (status === undefined) {
if (
message.includes('quota') ||
message.includes('rate limit') ||
/\b(429|503)\b/.test(message)
)
return 'rate_limited'
if (
/\b(401|403)\b/.test(message) ||
message.includes('unauthor') ||
message.includes('forbidden') ||
message.includes('invalid api key')
)
return 'auth'
}
return 'other'
}

/**
* Emit hosted-key usage telemetry for a completed call. CloudWatch only — never
* a billing write. `recordCostCharged` self-guards on `costTotal > 0`. The
* `tool` label carries the tool id for tools, or the model id for LLM calls.
*/
export function emitHostedKeyUsage(labels: {
provider: string
tool: string
key: string
costTotal: number
}): void {
hostedKeyMetrics.recordUsed({
provider: labels.provider,
tool: labels.tool,
key: labels.key,
})
hostedKeyMetrics.recordCostCharged(labels.costTotal, {
provider: labels.provider,
tool: labels.tool,
})
}
1 change: 1 addition & 0 deletions apps/sim/lib/core/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ export const env = createEnv({
DISABLE_INVITATIONS: z.boolean().optional(), // Disable workspace invitations globally (for self-hosted deployments)
DISABLE_PUBLIC_API: z.boolean().optional(), // Disable public API access globally (for self-hosted deployments)
MOTHERSHIP_BETA_FEATURES: z.boolean().optional(), // Enable beta Mothership planning/changelog artifact surfaces
HOSTED_KEY_LLM: z.boolean().optional(), // Route hosted LLM calls through the hosted-key framework (acquire + centralized cost + metrics), no rate limiting

// Development Tools
REACT_GRAB_ENABLED: z.boolean().optional(), // Enable React Grab for UI element debugging in Cursor/AI agents (dev only)
Expand Down
8 changes: 8 additions & 0 deletions apps/sim/lib/core/config/feature-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
'user context — use enabled:true for global rollout rather than per-user targeting.',
fallback: 'MOTHERSHIP_BETA_FEATURES',
},
'hosted-key-llm': {
description:
'Route hosted LLM provider calls through the hosted-key framework (acquire + centralized ' +
'cost + metrics), with no rate limiting. Off = legacy getRotatingApiKey path. Evaluated ' +
'server-side with userId only (no orgId in the provider request), so roll out globally or ' +
'per-userId.',
fallback: 'HOSTED_KEY_LLM',
},
'table-snapshot-cache': {
description:
'Mount Sim tables into code sandboxes by reference via a version-keyed CSV snapshot in ' +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,13 @@ describe('HostedKeyRateLimiter', () => {
}
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)

process.env.EXA_API_KEY_COUNT = undefined
process.env.EXA_API_KEY_1 = undefined
process.env.EXA_API_KEY_2 = undefined
process.env.EXA_API_KEY_3 = undefined
// Empty string is falsy, so no key resolves. (Assigning `undefined` would
// leave the string "undefined" under vitest's env handling, which the
// `_1.._N` probe — used when `_COUNT` is absent — would treat as present.)
process.env.EXA_API_KEY_COUNT = ''
process.env.EXA_API_KEY_1 = ''
process.env.EXA_API_KEY_2 = ''
process.env.EXA_API_KEY_3 = ''

const result = await rateLimiter.acquireKey(
testProvider,
Expand All @@ -101,6 +104,38 @@ describe('HostedKeyRateLimiter', () => {
expect(result.error).toContain('No hosted keys configured')
})

it('mode: none returns a key without touching the queue or token bucket', async () => {
const result = await rateLimiter.acquireKey(
testProvider,
envKeyPrefix,
{ mode: 'none' },
'workspace-1'
)

expect(result.success).toBe(true)
expect(result.key).toBe('test-key-1')
expect(result.envVarName).toBe('EXA_API_KEY_1')
expect(mockQueue.enqueue).not.toHaveBeenCalled()
expect(mockAdapter.consumeTokens).not.toHaveBeenCalled()
})

it('mode: none still reports an error when no keys are configured', async () => {
process.env.EXA_API_KEY_COUNT = ''
process.env.EXA_API_KEY_1 = ''
process.env.EXA_API_KEY_2 = ''
process.env.EXA_API_KEY_3 = ''

const result = await rateLimiter.acquireKey(
testProvider,
envKeyPrefix,
{ mode: 'none' },
'workspace-1'
)

expect(result.success).toBe(false)
expect(mockQueue.enqueue).not.toHaveBeenCalled()
})

it('should rate limit billing actor when wait exceeds the queue cap', async () => {
// resetAt past the 5-minute cap forces the wait loop to bail immediately.
const rateLimitedResult: ConsumeResult = {
Expand Down
Loading
Loading