From 51f0be9cc8281e98e6baab1e2dcb69d7f5f00650 Mon Sep 17 00:00:00 2001 From: waleed Date: Wed, 10 Jun 2026 08:53:45 -0700 Subject: [PATCH 1/2] improvement(chat-voice): modernize ElevenLabs TTS to Flash v2.5 - Switch default TTS model from eleven_turbo_v2_5 to eleven_flash_v2_5 (ElevenLabs recommends Flash over Turbo in all cases; ~75ms latency) - Drop deprecated optimize_streaming_latency knob plus legacy use_pvc_as_ivc / enable_ssml_parsing flags - Move output_format to the query string and raise it from mp3_22050_32 to mp3_44100_128 for higher audio quality - Switch apply_text_normalization from off to auto for correct number/date pronunciation --- apps/sim/app/api/proxy/tts/stream/route.ts | 9 +++------ apps/sim/app/chat/hooks/use-audio-streaming.ts | 2 +- apps/sim/lib/api/contracts/media/tts-stream.ts | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/apps/sim/app/api/proxy/tts/stream/route.ts b/apps/sim/app/api/proxy/tts/stream/route.ts index d8ea97d39c..39d561522a 100644 --- a/apps/sim/app/api/proxy/tts/stream/route.ts +++ b/apps/sim/app/api/proxy/tts/stream/route.ts @@ -92,7 +92,8 @@ export const POST = withRouteHandler(async (request: NextRequest) => { return new Response('ElevenLabs service not configured', { status: 503 }) } - const endpoint = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream` + const query = new URLSearchParams({ output_format: 'mp3_44100_128' }) + const endpoint = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream?${query.toString()}` const response = await fetch(endpoint, { method: 'POST', @@ -104,17 +105,13 @@ export const POST = withRouteHandler(async (request: NextRequest) => { body: JSON.stringify({ text, model_id: modelId, - optimize_streaming_latency: 4, - output_format: 'mp3_22050_32', // Fastest format voice_settings: { stability: 0.5, similarity_boost: 0.8, style: 0.0, use_speaker_boost: false, }, - enable_ssml_parsing: false, - apply_text_normalization: 'off', - use_pvc_as_ivc: false, + apply_text_normalization: 'auto', }), }) diff --git a/apps/sim/app/chat/hooks/use-audio-streaming.ts b/apps/sim/app/chat/hooks/use-audio-streaming.ts index 6ba5b5d9aa..51db697440 100644 --- a/apps/sim/app/chat/hooks/use-audio-streaming.ts +++ b/apps/sim/app/chat/hooks/use-audio-streaming.ts @@ -79,7 +79,7 @@ export function useAudioStreaming(sharedAudioContextRef?: RefObject Date: Wed, 10 Jun 2026 09:00:18 -0700 Subject: [PATCH 2/2] improvement(chat-voice): default to Jessica voice (Flash v2.5-optimized) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the legacy Sarah default (EXAVITQu4vr4xnSDxMaL), which has no high-quality eleven_flash_v2_5 base, with Jessica (cgSgspJ2msm6clMCkdW9) — a current premade conversational voice verified against the live account and optimized for Flash v2.5. --- apps/sim/app/chat/[identifier]/chat.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/app/chat/[identifier]/chat.tsx b/apps/sim/app/chat/[identifier]/chat.tsx index f5678291c8..891727d578 100644 --- a/apps/sim/app/chat/[identifier]/chat.tsx +++ b/apps/sim/app/chat/[identifier]/chat.tsx @@ -44,7 +44,7 @@ interface ChatRequestPayload { } const DEFAULT_VOICE_SETTINGS = { - voiceId: 'EXAVITQu4vr4xnSDxMaL', // Default ElevenLabs voice (Bella) + voiceId: 'cgSgspJ2msm6clMCkdW9', // Default ElevenLabs voice (Jessica) — Flash v2.5-optimized } /**