fix(home): voice input text persistence bugs (#3737)

* fix(home): voice input text persistence bugs * fix(home): gate setIsListening on startRecognition success * fix(home): handle startRecognition failure in restartRecognition * fix(home): reset speech prefix on submit while mic is active
simstudioai · icecrasher321 · Mar 25, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
commit 83eb3ed2111c5a6a5fd3f5ffcf40ece3555f9516
diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx
@@ -106,6 +106,7 @@ const SEND_BUTTON_ACTIVE =
 const SEND_BUTTON_DISABLED = 'bg-[var(--c-808080)] dark:bg-[var(--c-808080)]'
 
 const MAX_CHAT_TEXTAREA_HEIGHT = 200
+const SPEECH_RECOGNITION_LANG = 'en-US'
 
 const DROP_OVERLAY_ICONS = [
   PdfIcon,
@@ -267,13 +268,18 @@ export function UserInput({
   const [isListening, setIsListening] = useState(false)
   const recognitionRef = useRef<SpeechRecognitionInstance | null>(null)
   const prefixRef = useRef('')
+  const valueRef = useRef(value)
 
   useEffect(() => {
     return () => {
       recognitionRef.current?.abort()
     }
   }, [])
 
+  useEffect(() => {
+    valueRef.current = value
+  }, [value])
+
   const textareaRef = mentionMenu.textareaRef
   const wasSendingRef = useRef(false)
   const atInsertPosRef = useRef<number | null>(null)
@@ -390,6 +396,84 @@ export function UserInput({
     [textareaRef]
   )
 
+  const startRecognition = useCallback((): boolean => {
+    const w = window as WindowWithSpeech
+    const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition
+    if (!SpeechRecognitionAPI) return false
+
+    const recognition = new SpeechRecognitionAPI()
+    recognition.continuous = true
+    recognition.interimResults = true
+    recognition.lang = SPEECH_RECOGNITION_LANG
+
+    recognition.onresult = (event: SpeechRecognitionEvent) => {
+      let transcript = ''
+      for (let i = 0; i < event.results.length; i++) {
+        transcript += event.results[i][0].transcript
+      }
+      const prefix = prefixRef.current
+      const newVal = prefix ? `${prefix} ${transcript}` : transcript
+      setValue(newVal)
+      valueRef.current = newVal
+    }
+
+    recognition.onend = () => {
+      if (recognitionRef.current === recognition) {
+        prefixRef.current = valueRef.current
+        try {
+          recognition.start()
+        } catch {
+          recognitionRef.current = null
+          setIsListening(false)
+        }
+      }
+    }
+
+    recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
+      if (recognitionRef.current !== recognition) return
+      if (e.error === 'aborted' || e.error === 'not-allowed') {
+        recognitionRef.current = null
+        setIsListening(false)
+      }
+    }
+
+    recognitionRef.current = recognition
+    try {
+      recognition.start()
+      return true
+    } catch {
+      recognitionRef.current = null
+      return false
+    }
+  }, [])
+
+  const restartRecognition = useCallback(
+    (newPrefix: string) => {
+      if (!recognitionRef.current) return
+      prefixRef.current = newPrefix
+      recognitionRef.current.abort()
+      recognitionRef.current = null
+      if (!startRecognition()) {
+        setIsListening(false)
+      }
+    },
+    [startRecognition]
+  )
+
+  const toggleListening = useCallback(() => {
+    if (isListening) {
+      recognitionRef.current?.stop()
+      recognitionRef.current = null
+      setIsListening(false)
+      return
+    }
+
+    prefixRef.current = value
+    if (startRecognition()) {
+      setIsListening(true)
+    }
+  }, [isListening, value, startRecognition])
+
   const handleSubmit = useCallback(() => {
     const fileAttachmentsForApi: FileAttachmentForApi[] = files.attachedFiles
       .filter((f) => !f.uploading && f.key)
@@ -407,13 +491,14 @@ export function UserInput({
       contextManagement.selectedContexts.length > 0 ? contextManagement.selectedContexts : undefined
     )
     setValue('')
+    restartRecognition('')
     files.clearAttachedFiles()
     contextManagement.clearContexts()
 
     if (textareaRef.current) {
       textareaRef.current.style.height = 'auto'
     }
-  }, [onSubmit, files, value, contextManagement, textareaRef])
+  }, [onSubmit, files, value, contextManagement, textareaRef, restartRecognition])
 
   const handleKeyDown = useCallback(
     (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
@@ -488,27 +573,33 @@ export function UserInput({
     [handleSubmit, mentionTokensWithContext, value, textareaRef]
   )
 
-  const handleInputChange = useCallback((e: React.ChangeEvent<HTMLTextAreaElement>) => {
-    const newValue = e.target.value
-    const caret = e.target.selectionStart ?? newValue.length
-
-    if (
-      caret > 0 &&
-      newValue.charAt(caret - 1) === '@' &&
-      (caret === 1 || /\s/.test(newValue.charAt(caret - 2)))
-    ) {
-      const before = newValue.slice(0, caret - 1)
-      const after = newValue.slice(caret)
-      setValue(`${before}${after}`)
-      atInsertPosRef.current = caret - 1
-      setPlusMenuOpen(true)
-      setPlusMenuSearch('')
-      setPlusMenuActiveIndex(0)
-      return
-    }
+  const handleInputChange = useCallback(
+    (e: React.ChangeEvent<HTMLTextAreaElement>) => {
+      const newValue = e.target.value
+      const caret = e.target.selectionStart ?? newValue.length
+
+      if (
+        caret > 0 &&
+        newValue.charAt(caret - 1) === '@' &&
+        (caret === 1 || /\s/.test(newValue.charAt(caret - 2)))
+      ) {
+        const before = newValue.slice(0, caret - 1)
+        const after = newValue.slice(caret)
+        const adjusted = `${before}${after}`
+        setValue(adjusted)
+        atInsertPosRef.current = caret - 1
+        setPlusMenuOpen(true)
+        setPlusMenuSearch('')
+        setPlusMenuActiveIndex(0)
+        restartRecognition(adjusted)
+        return
+      }
 
-    setValue(newValue)
-  }, [])
+      setValue(newValue)
+      restartRecognition(newValue)
+    },
+    [restartRecognition]
+  )
 
   const handleSelectAdjust = useCallback(() => {
     const textarea = textareaRef.current
@@ -536,56 +627,6 @@ export function UserInput({
     [isInitialView]
   )
 
-  const toggleListening = useCallback(() => {
-    if (isListening) {
-      recognitionRef.current?.stop()
-      recognitionRef.current = null
-      setIsListening(false)
-      return
-    }
-
-    const w = window as WindowWithSpeech
-    const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition
-    if (!SpeechRecognitionAPI) return
-
-    prefixRef.current = value
-
-    const recognition = new SpeechRecognitionAPI()
-    recognition.continuous = true
-    recognition.interimResults = true
-    recognition.lang = 'en-US'
-
-    recognition.onresult = (event: SpeechRecognitionEvent) => {
-      let transcript = ''
-      for (let i = 0; i < event.results.length; i++) {
-        transcript += event.results[i][0].transcript
-      }
-      const prefix = prefixRef.current
-      setValue(prefix ? `${prefix} ${transcript}` : transcript)
-    }
-
-    recognition.onend = () => {
-      if (recognitionRef.current === recognition) {
-        try {
-          recognition.start()
-        } catch {
-          recognitionRef.current = null
-          setIsListening(false)
-        }
-      }
-    }
-    recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
-      if (e.error === 'aborted' || e.error === 'not-allowed') {
-        recognitionRef.current = null
-        setIsListening(false)
-      }
-    }
-
-    recognitionRef.current = recognition
-    recognition.start()
-    setIsListening(true)
-  }, [isListening, value])
-
   const renderOverlayContent = useCallback(() => {
     const contexts = contextManagement.selectedContexts