Files
smart-home-tablet/components/VoiceOverlay.tsx
Cosmo a780fc7bd5
All checks were successful
Deploy / deploy (push) Successful in 2m58s
feat(voice): play TTS through tablet speakers via ElevenLabs proxy
Stage 2 of voice integration — centralizes TTS on the tablet so the
Python satellite no longer needs ElevenLabs credentials or mpv.

- app/api/voice/tts — POST {text, agent}, proxies to ElevenLabs
  streaming endpoint with flash_v2_5 default, returns audio/mpeg.
  Per-agent voice id via COSMO_TTS_VOICE / LUSYA_TTS_VOICE env.
- VoiceOverlay — on response/error events fetches TTS and plays via
  HTMLAudioElement; on wake event stops playback (barge-in). Dismiss
  timer extended by text length so long responses do not cut off.
- Autoplay caveat: browser may block first playback until user taps
  anywhere on the page (FKB: enable Force Autoplay to bypass).
2026-04-23 12:52:26 +00:00

259 lines
8.2 KiB
TypeScript

'use client'
import { useEffect, useRef, useState } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
type VoiceState = 'idle' | 'wake' | 'command' | 'response' | 'error'
type Agent = 'cosmo' | 'lusya'
interface VoiceEvent {
event: VoiceState
agent?: Agent
text?: string
timestamp: string
}
const AGENT_STYLE: Record<Agent, { primary: string; secondary: string; name: string; emoji: string }> = {
cosmo: { primary: '#818cf8', secondary: '#a855f7', name: 'Cosmo', emoji: '🦞' },
lusya: { primary: '#ec4899', secondary: '#f43f5e', name: 'Люся', emoji: '👩' },
}
export default function VoiceOverlay() {
const [state, setState] = useState<VoiceState>('idle')
const [agent, setAgent] = useState<Agent>('cosmo')
const [text, setText] = useState('')
const dismissTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
const clearDismiss = () => {
if (dismissTimer.current) {
clearTimeout(dismissTimer.current)
dismissTimer.current = null
}
}
const scheduleDismiss = (ms: number) => {
clearDismiss()
dismissTimer.current = setTimeout(() => setState('idle'), ms)
}
const audioRef = useRef<HTMLAudioElement | null>(null)
const audioUrlRef = useRef<string | null>(null)
const stopAudio = () => {
if (audioRef.current) {
try {
audioRef.current.pause()
audioRef.current.src = ''
} catch {}
audioRef.current = null
}
if (audioUrlRef.current) {
URL.revokeObjectURL(audioUrlRef.current)
audioUrlRef.current = null
}
}
const playTTS = async (textToSpeak: string, agentId: Agent) => {
stopAudio()
if (!textToSpeak) return
try {
const r = await fetch('/api/voice/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: textToSpeak, agent: agentId }),
})
if (!r.ok) {
console.warn('TTS endpoint error:', r.status)
return
}
const blob = await r.blob()
const url = URL.createObjectURL(blob)
audioUrlRef.current = url
const audio = new Audio(url)
audio.onended = () => {
if (audioUrlRef.current === url) {
URL.revokeObjectURL(url)
audioUrlRef.current = null
}
}
audioRef.current = audio
await audio.play().catch(err => {
console.warn('Audio autoplay blocked:', err)
})
} catch (err) {
console.warn('TTS fetch failed:', err)
}
}
useEffect(() => {
let es: EventSource | null = null
let retry: ReturnType<typeof setTimeout> | null = null
let closedByUs = false
const connect = () => {
es = new EventSource('/api/voice/stream')
es.onmessage = (e) => {
try {
const evt: VoiceEvent = JSON.parse(e.data)
const currentAgent: Agent = evt.agent ?? agent
if (evt.agent) setAgent(evt.agent)
if (evt.event === 'wake') {
// Barge-in: cut any ongoing TTS when user speaks again
stopAudio()
setState('wake')
setText('')
scheduleDismiss(20000)
} else if (evt.event === 'command') {
setState('command')
setText(evt.text || '')
scheduleDismiss(30000)
} else if (evt.event === 'response') {
setState('response')
setText(evt.text || '')
if (evt.text) playTTS(evt.text, currentAgent)
scheduleDismiss(Math.max(6000, (evt.text?.length || 0) * 80))
} else if (evt.event === 'error') {
setState('error')
setText(evt.text || 'Ошибка')
if (evt.text) playTTS(evt.text, currentAgent)
scheduleDismiss(5000)
} else if (evt.event === 'idle') {
clearDismiss()
setState('idle')
}
} catch {}
}
es.onerror = () => {
if (closedByUs) return
es?.close()
retry = setTimeout(connect, 3000)
}
}
connect()
return () => {
closedByUs = true
clearDismiss()
stopAudio()
if (retry) clearTimeout(retry)
es?.close()
}
// agent is intentionally omitted — we always read from ref via the evt
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const isActive = state !== 'idle'
const style = AGENT_STYLE[agent]
return (
<AnimatePresence>
{isActive && (
<motion.div
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{ duration: 0.35 }}
style={{
position: 'fixed', inset: 0, zIndex: 300,
background: 'rgba(5, 5, 15, 0.78)',
backdropFilter: 'blur(24px)',
WebkitBackdropFilter: 'blur(24px)' as any,
display: 'flex', flexDirection: 'column',
alignItems: 'center', justifyContent: 'center',
gap: 36, padding: 40,
pointerEvents: 'none',
}}
>
<SiriBlob color={style.primary} color2={style.secondary} state={state} />
<div style={{ textAlign: 'center', maxWidth: 760 }}>
<div style={{
fontSize: 12, color: 'rgba(255,255,255,0.45)', fontWeight: 700,
letterSpacing: '0.22em', textTransform: 'uppercase', marginBottom: 14,
display: 'flex', alignItems: 'center', justifyContent: 'center', gap: 10,
}}>
<span style={{ fontSize: 18 }}>{style.emoji}</span>
{style.name}
{state !== 'wake' && (
<span style={{
display: 'inline-block', width: 6, height: 6, borderRadius: '50%',
background: style.primary,
marginLeft: 4,
}} />
)}
<span style={{ letterSpacing: '0.1em' }}>
{state === 'wake' ? '· слушает' : state === 'command' ? '· распознал' : state === 'response' ? '· отвечает' : state === 'error' ? '· ошибка' : ''}
</span>
</div>
<div style={{
fontSize: state === 'wake' ? 36 : 26,
fontWeight: 700,
color: state === 'error' ? '#fca5a5' : 'rgba(255,255,255,0.96)',
letterSpacing: '-0.5px', lineHeight: 1.35,
minHeight: 48,
}}>
{state === 'wake' ? 'Слушаю…' : (text || '…')}
</div>
</div>
</motion.div>
)}
</AnimatePresence>
)
}
function SiriBlob({ color, color2, state }: { color: string; color2: string; state: VoiceState }) {
const isIntense = state === 'wake'
return (
<div style={{ position: 'relative', width: 220, height: 220 }}>
{/* Outer pulsing ring */}
<motion.div
animate={{
scale: isIntense ? [1, 1.25, 1] : [1, 1.08, 1],
opacity: isIntense ? [0.5, 0.15, 0.5] : [0.35, 0.1, 0.35],
}}
transition={{
duration: isIntense ? 1.4 : 3,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 0, borderRadius: '50%',
background: `radial-gradient(circle, ${color}55 0%, transparent 70%)`,
filter: 'blur(24px)',
}}
/>
{/* Inner core */}
<motion.div
animate={{
scale: isIntense ? [1, 1.08, 1] : 1,
}}
transition={{
duration: 1.2,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 50, borderRadius: '50%',
background: `radial-gradient(circle, ${color} 0%, ${color2} 55%, transparent 80%)`,
filter: 'blur(14px)',
boxShadow: `0 0 80px ${color}66, 0 0 40px ${color}44`,
}}
/>
{/* Bright center dot */}
<motion.div
animate={{ scale: isIntense ? [1, 0.88, 1] : 1 }}
transition={{ duration: 0.8, repeat: Infinity, ease: 'easeInOut' }}
style={{
position: 'absolute', inset: 88, borderRadius: '50%',
background: `radial-gradient(circle, white 0%, ${color} 60%, transparent 100%)`,
filter: 'blur(6px)',
opacity: 0.9,
}}
/>
</div>
)
}