fix(voice): dismiss overlay after TTS ends, show listening state for followups
All checks were successful
Deploy / deploy (push) Successful in 3m11s

Two fixes:

1) Overlay was hiding mid-TTS because dismiss timer used
   text.length * 80ms — ElevenLabs speaks slower, so the audio got
   cut off. Now scheduleDismiss is only called from playTTS's
   onEnded callback (plus 4s lingering after audio finishes).

2) After response, the Python script silently re-entered record()
   for follow-ups but the overlay disappeared, so the user had to
   re-wake every turn. Added a new 'listening' event — Python
   emits it just before each followup record(), tablet shows the
   orb pulsing at medium intensity with 'жду' status and the last
   response text preserved below.

Safety: any state now arms a 60s auto-close in case Python dies
and never emits idle.
This commit is contained in:
Cosmo
2026-04-23 13:55:25 +00:00
parent 0c677df558
commit e2b2a5d82f

View File

@@ -2,7 +2,7 @@
import { useEffect, useRef, useState } from 'react' import { useEffect, useRef, useState } from 'react'
import { motion, AnimatePresence } from 'framer-motion' import { motion, AnimatePresence } from 'framer-motion'
type VoiceState = 'idle' | 'wake' | 'command' | 'response' | 'error' type VoiceState = 'idle' | 'wake' | 'listening' | 'command' | 'response' | 'error'
type Agent = 'cosmo' | 'lusya' type Agent = 'cosmo' | 'lusya'
interface VoiceEvent { interface VoiceEvent {
@@ -20,6 +20,7 @@ const AGENT_COLORS: Record<Agent, { core: string; halo: string }> = {
const STATUS_LABEL: Record<Exclude<VoiceState, 'idle'>, string> = { const STATUS_LABEL: Record<Exclude<VoiceState, 'idle'>, string> = {
wake: 'слушаю', wake: 'слушаю',
listening: 'жду',
command: '', command: '',
response: '', response: '',
error: '', error: '',
@@ -59,29 +60,44 @@ export default function VoiceOverlay() {
} }
} }
const playTTS = async (textToSpeak: string, agentId: Agent) => { const playTTS = async (textToSpeak: string, agentId: Agent, onEnded?: () => void) => {
stopAudio() stopAudio()
if (!textToSpeak) return if (!textToSpeak) {
onEnded?.()
return
}
try { try {
const r = await fetch('/api/voice/tts', { const r = await fetch('/api/voice/tts', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: textToSpeak, agent: agentId }), body: JSON.stringify({ text: textToSpeak, agent: agentId }),
}) })
if (!r.ok) return if (!r.ok) {
onEnded?.()
return
}
const blob = await r.blob() const blob = await r.blob()
const url = URL.createObjectURL(blob) const url = URL.createObjectURL(blob)
audioUrlRef.current = url audioUrlRef.current = url
const audio = new Audio(url) const audio = new Audio(url)
audio.onended = () => { const finish = () => {
if (audioUrlRef.current === url) { if (audioUrlRef.current === url) {
URL.revokeObjectURL(url) URL.revokeObjectURL(url)
audioUrlRef.current = null audioUrlRef.current = null
} }
onEnded?.()
} }
audio.onended = finish
audio.onerror = finish
audioRef.current = audio audioRef.current = audio
await audio.play().catch(() => {}) try {
} catch {} await audio.play()
} catch {
finish()
}
} catch {
onEnded?.()
}
} }
useEffect(() => { useEffect(() => {
@@ -98,27 +114,47 @@ export default function VoiceOverlay() {
const currentAgent: Agent = evt.agent ?? agent const currentAgent: Agent = evt.agent ?? agent
if (evt.agent) setAgent(evt.agent) if (evt.agent) setAgent(evt.agent)
// Safety: если Python упадёт и не пришлёт idle, через 60с сами закроемся.
const armSafety = () => scheduleDismiss(60_000)
if (evt.event === 'wake') { if (evt.event === 'wake') {
// Свежая активация: barge-in аудио, чистим текст.
stopAudio() stopAudio()
setState('wake') setState('wake')
setText('') setText('')
scheduleDismiss(20000) armSafety()
} else if (evt.event === 'listening') {
// Follow-up: сохраняем последний текст, орб мягко пульсирует.
setState('listening')
armSafety()
} else if (evt.event === 'command') { } else if (evt.event === 'command') {
setState('command') setState('command')
setText(evt.text || '') setText(evt.text || '')
scheduleDismiss(30000) armSafety()
} else if (evt.event === 'response') { } else if (evt.event === 'response') {
setState('response') setState('response')
setText(evt.text || '') setText(evt.text || '')
if (evt.text) playTTS(evt.text, currentAgent) // Dismiss ТОЛЬКО когда аудио доиграло (не по таймеру!).
scheduleDismiss(Math.max(6000, (evt.text?.length || 0) * 80)) // После окончания даём Python шанс прислать listening/wake/command — тогда
// scheduleDismiss перезатрётся. Иначе через 4с автозакрытие.
clearDismiss()
if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(4000))
} else {
scheduleDismiss(4000)
}
} else if (evt.event === 'error') { } else if (evt.event === 'error') {
setState('error') setState('error')
setText(evt.text || 'Ошибка') setText(evt.text || 'Ошибка')
if (evt.text) playTTS(evt.text, currentAgent) clearDismiss()
scheduleDismiss(5000) if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(3000))
} else {
scheduleDismiss(3000)
}
} else if (evt.event === 'idle') { } else if (evt.event === 'idle') {
clearDismiss() clearDismiss()
stopAudio()
setState('idle') setState('idle')
} }
} catch {} } catch {}
@@ -213,6 +249,7 @@ export default function VoiceOverlay() {
function SiriOrb({ core, halo, state }: { core: string; halo: string; state: VoiceState }) { function SiriOrb({ core, halo, state }: { core: string; halo: string; state: VoiceState }) {
const isIntense = state === 'wake' const isIntense = state === 'wake'
const isListening = state === 'listening'
const isResponding = state === 'response' const isResponding = state === 'response'
return ( return (
@@ -220,11 +257,11 @@ function SiriOrb({ core, halo, state }: { core: string; halo: string; state: Voi
{/* Outer halo — медленное дыхание */} {/* Outer halo — медленное дыхание */}
<motion.div <motion.div
animate={{ animate={{
scale: isIntense ? [1, 1.2, 1] : [1, 1.08, 1], scale: isIntense ? [1, 1.2, 1] : isListening ? [1, 1.14, 1] : [1, 1.08, 1],
opacity: isIntense ? [0.55, 0.2, 0.55] : [0.35, 0.15, 0.35], opacity: isIntense ? [0.55, 0.2, 0.55] : isListening ? [0.45, 0.18, 0.45] : [0.35, 0.15, 0.35],
}} }}
transition={{ transition={{
duration: isIntense ? 1.6 : 3.2, duration: isIntense ? 1.6 : isListening ? 2.2 : 3.2,
repeat: Infinity, repeat: Infinity,
ease: 'easeInOut', ease: 'easeInOut',
}} }}
@@ -237,7 +274,7 @@ function SiriOrb({ core, halo, state }: { core: string; halo: string; state: Voi
{/* Inner ring — быстрее, с подкрученным blur */} {/* Inner ring — быстрее, с подкрученным blur */}
<motion.div <motion.div
animate={{ animate={{
scale: isIntense ? [1, 1.1, 1] : isResponding ? [1, 1.04, 1] : 1, scale: isIntense ? [1, 1.1, 1] : isListening ? [1, 1.06, 1] : isResponding ? [1, 1.04, 1] : 1,
rotate: isIntense ? [0, 10, -8, 0] : 0, rotate: isIntense ? [0, 10, -8, 0] : 0,
}} }}
transition={{ transition={{