fix(voice): dismiss overlay after TTS ends, show listening state for followups

Two fixes: 1) Overlay was hiding mid-TTS because dismiss timer used text.length * 80ms — ElevenLabs speaks slower, so the audio got cut off. Now scheduleDismiss is only called from playTTS's onEnded callback (plus 4s lingering after audio finishes). 2) After response, the Python script silently re-entered record() for follow-ups but the overlay disappeared, so the user had to re-wake every turn. Added a new 'listening' event — Python emits it just before each followup record(), tablet shows the orb pulsing at medium intensity with 'жду' status and the last response text preserved below. Safety: any state now arms a 60s auto-close in case Python dies and never emits idle.
2026-04-23 13:55:25 +00:00
parent 0c677df558
commit e2b2a5d82f
1 changed files with 54 additions and 17 deletions
--- a/components/VoiceOverlay.tsx
+++ b/components/VoiceOverlay.tsx
@@ -2,7 +2,7 @@
 import { useEffect, useRef, useState } from 'react'
 import { motion, AnimatePresence } from 'framer-motion'

-type VoiceState = 'idle' | 'wake' | 'command' | 'response' | 'error'
+type VoiceState = 'idle' | 'wake' | 'listening' | 'command' | 'response' | 'error'
 type Agent = 'cosmo' | 'lusya'

 interface VoiceEvent {
@@ -20,6 +20,7 @@ const AGENT_COLORS: Record<Agent, { core: string; halo: string }> = {

 const STATUS_LABEL: Record<Exclude<VoiceState, 'idle'>, string> = {
  wake: 'слушаю',
+  listening: 'жду',
  command: '',
  response: '',
  error: '',
@@ -59,29 +60,44 @@ export default function VoiceOverlay() {
    }
  }

-  const playTTS = async (textToSpeak: string, agentId: Agent) => {
+  const playTTS = async (textToSpeak: string, agentId: Agent, onEnded?: () => void) => {
    stopAudio()
-    if (!textToSpeak) return
+    if (!textToSpeak) {
+      onEnded?.()
+      return
+    }
    try {
      const r = await fetch('/api/voice/tts', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ text: textToSpeak, agent: agentId }),
      })
-      if (!r.ok) return
+      if (!r.ok) {
+        onEnded?.()
+        return
+      }
      const blob = await r.blob()
      const url = URL.createObjectURL(blob)
      audioUrlRef.current = url
      const audio = new Audio(url)
-      audio.onended = () => {
+      const finish = () => {
        if (audioUrlRef.current === url) {
          URL.revokeObjectURL(url)
          audioUrlRef.current = null
        }
+        onEnded?.()
      }
+      audio.onended = finish
+      audio.onerror = finish
      audioRef.current = audio
-      await audio.play().catch(() => {})
-    } catch {}
+      try {
+        await audio.play()
+      } catch {
+        finish()
+      }
+    } catch {
+      onEnded?.()
+    }
  }

  useEffect(() => {
@@ -98,27 +114,47 @@ export default function VoiceOverlay() {
          const currentAgent: Agent = evt.agent ?? agent
          if (evt.agent) setAgent(evt.agent)

+          // Safety: если Python упадёт и не пришлёт idle, через 60с сами закроемся.
+          const armSafety = () => scheduleDismiss(60_000)
+
          if (evt.event === 'wake') {
+            // Свежая активация: barge-in аудио, чистим текст.
            stopAudio()
            setState('wake')
            setText('')
-            scheduleDismiss(20000)
+            armSafety()
+          } else if (evt.event === 'listening') {
+            // Follow-up: сохраняем последний текст, орб мягко пульсирует.
+            setState('listening')
+            armSafety()
          } else if (evt.event === 'command') {
            setState('command')
            setText(evt.text || '')
-            scheduleDismiss(30000)
+            armSafety()
          } else if (evt.event === 'response') {
            setState('response')
            setText(evt.text || '')
-            if (evt.text) playTTS(evt.text, currentAgent)
-            scheduleDismiss(Math.max(6000, (evt.text?.length || 0) * 80))
+            // Dismiss ТОЛЬКО когда аудио доиграло (не по таймеру!).
+            // После окончания даём Python шанс прислать listening/wake/command — тогда
+            // scheduleDismiss перезатрётся. Иначе через 4с автозакрытие.
+            clearDismiss()
+            if (evt.text) {
+              playTTS(evt.text, currentAgent, () => scheduleDismiss(4000))
+            } else {
+              scheduleDismiss(4000)
+            }
          } else if (evt.event === 'error') {
            setState('error')
            setText(evt.text || 'Ошибка')
-            if (evt.text) playTTS(evt.text, currentAgent)
-            scheduleDismiss(5000)
+            clearDismiss()
+            if (evt.text) {
+              playTTS(evt.text, currentAgent, () => scheduleDismiss(3000))
+            } else {
+              scheduleDismiss(3000)
+            }
          } else if (evt.event === 'idle') {
            clearDismiss()
+            stopAudio()
            setState('idle')
          }
        } catch {}
@@ -213,6 +249,7 @@ export default function VoiceOverlay() {

 function SiriOrb({ core, halo, state }: { core: string; halo: string; state: VoiceState }) {
  const isIntense = state === 'wake'
+  const isListening = state === 'listening'
  const isResponding = state === 'response'

  return (
@@ -220,11 +257,11 @@ function SiriOrb({ core, halo, state }: { core: string; halo: string; state: Voi
      {/* Outer halo — медленное дыхание */}
      <motion.div
        animate={{
-          scale: isIntense ? [1, 1.2, 1] : [1, 1.08, 1],
-          opacity: isIntense ? [0.55, 0.2, 0.55] : [0.35, 0.15, 0.35],
+          scale: isIntense ? [1, 1.2, 1] : isListening ? [1, 1.14, 1] : [1, 1.08, 1],
+          opacity: isIntense ? [0.55, 0.2, 0.55] : isListening ? [0.45, 0.18, 0.45] : [0.35, 0.15, 0.35],
        }}
        transition={{
-          duration: isIntense ? 1.6 : 3.2,
+          duration: isIntense ? 1.6 : isListening ? 2.2 : 3.2,
          repeat: Infinity,
          ease: 'easeInOut',
        }}
@@ -237,7 +274,7 @@ function SiriOrb({ core, halo, state }: { core: string; halo: string; state: Voi
      {/* Inner ring — быстрее, с подкрученным blur */}
      <motion.div
        animate={{
-          scale: isIntense ? [1, 1.1, 1] : isResponding ? [1, 1.04, 1] : 1,
+          scale: isIntense ? [1, 1.1, 1] : isListening ? [1, 1.06, 1] : isResponding ? [1, 1.04, 1] : 1,
          rotate: isIntense ? [0, 10, -8, 0] : 0,
        }}
        transition={{