Files
smart-home-tablet/components/VoiceOverlay.tsx
Cosmo 93bf34f216
All checks were successful
Deploy / deploy (push) Successful in 6m53s
feat(voice): push-to-talk button — браузерный mic+VAD pipeline
Шаг 2 миграции: убираем зависимость от Python-агента для базового
голосового сценария. Тап на круглую кнопку-микрофон в правом нижнем
углу → MicVAD (Silero v5) ловит речь → автостоп по тишине → /api/voice/stt
→ /api/voice/chat → ответ через SSE и TTS как раньше.

- components/VoiceController.tsx — push-to-talk UI + MicVAD orchestration
- VoiceOverlay теперь слушает window CustomEvent('voice-local'), чтобы
  орб моргал ещё до round-trip на сервер (wake/listening мгновенно).
- public/vad/ — silero v5/legacy onnx + ort wasm + audio worklet,
  раздаются через baseAssetPath: '/vad/' (не зависит от внешнего CDN,
  важно если планшет без интернета или с RU-блоком).

Что осталось от home-voice-assistant: только wake-word. После Шага 3
(onnxruntime-web + перенос openwakeword .onnx) Python-агент уйдёт целиком.
2026-04-27 08:48:22 +00:00

312 lines
9.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use client'
import { useEffect, useRef, useState } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
type VoiceState = 'idle' | 'wake' | 'listening' | 'command' | 'response' | 'error'
type Agent = 'cosmo' | 'lusya'
interface VoiceEvent {
event: VoiceState
agent?: Agent
text?: string
timestamp: string
}
// Per-agent accent pair (inner core / outer halo). Минималистично, без имён.
const AGENT_COLORS: Record<Agent, { core: string; halo: string }> = {
cosmo: { core: '#a5b4fc', halo: '#7c3aed' },
lusya: { core: '#fbcfe8', halo: '#ec4899' },
}
const STATUS_LABEL: Record<Exclude<VoiceState, 'idle'>, string> = {
wake: 'слушаю',
listening: 'жду',
command: '',
response: '',
error: '',
}
export default function VoiceOverlay() {
const [state, setState] = useState<VoiceState>('idle')
const [agent, setAgent] = useState<Agent>('cosmo')
const [text, setText] = useState('')
const dismissTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
const audioRef = useRef<HTMLAudioElement | null>(null)
const audioUrlRef = useRef<string | null>(null)
const clearDismiss = () => {
if (dismissTimer.current) {
clearTimeout(dismissTimer.current)
dismissTimer.current = null
}
}
const scheduleDismiss = (ms: number) => {
clearDismiss()
dismissTimer.current = setTimeout(() => setState('idle'), ms)
}
const stopAudio = () => {
if (audioRef.current) {
try {
audioRef.current.pause()
audioRef.current.src = ''
} catch {}
audioRef.current = null
}
if (audioUrlRef.current) {
URL.revokeObjectURL(audioUrlRef.current)
audioUrlRef.current = null
}
}
const playTTS = async (textToSpeak: string, agentId: Agent, onEnded?: () => void) => {
stopAudio()
if (!textToSpeak) {
onEnded?.()
return
}
try {
const r = await fetch('/api/voice/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: textToSpeak, agent: agentId }),
})
if (!r.ok) {
onEnded?.()
return
}
const blob = await r.blob()
const url = URL.createObjectURL(blob)
audioUrlRef.current = url
const audio = new Audio(url)
const finish = () => {
if (audioUrlRef.current === url) {
URL.revokeObjectURL(url)
audioUrlRef.current = null
}
onEnded?.()
}
audio.onended = finish
audio.onerror = finish
audioRef.current = audio
try {
await audio.play()
} catch {
finish()
}
} catch {
onEnded?.()
}
}
useEffect(() => {
let es: EventSource | null = null
let retry: ReturnType<typeof setTimeout> | null = null
let closedByUs = false
// Единый обработчик для SSE и локальных CustomEvent от VoiceController.
const handleEvent = (evt: VoiceEvent) => {
const currentAgent: Agent = evt.agent ?? agent
if (evt.agent) setAgent(evt.agent)
// Safety: если событийный поток заглохнет, через 60с сами закроемся.
const armSafety = () => scheduleDismiss(60_000)
if (evt.event === 'wake') {
stopAudio()
setState('wake')
setText('')
armSafety()
} else if (evt.event === 'listening') {
setState('listening')
armSafety()
} else if (evt.event === 'command') {
setState('command')
setText(evt.text || '')
armSafety()
} else if (evt.event === 'response') {
setState('response')
setText(evt.text || '')
clearDismiss()
if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(4000))
} else {
scheduleDismiss(4000)
}
} else if (evt.event === 'error') {
setState('error')
setText(evt.text || 'Ошибка')
clearDismiss()
if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(3000))
} else {
scheduleDismiss(3000)
}
} else if (evt.event === 'idle') {
clearDismiss()
stopAudio()
setState('idle')
}
}
const connect = () => {
es = new EventSource('/api/voice/stream')
es.onmessage = (e) => {
try { handleEvent(JSON.parse(e.data) as VoiceEvent) } catch {}
}
es.onerror = () => {
if (closedByUs) return
es?.close()
retry = setTimeout(connect, 3000)
}
}
connect()
// Локальные события (push-to-talk до round-trip на сервер).
const onLocal = (e: Event) => {
const detail = (e as CustomEvent<VoiceEvent>).detail
if (detail) handleEvent(detail)
}
window.addEventListener('voice-local', onLocal as EventListener)
return () => {
closedByUs = true
clearDismiss()
stopAudio()
if (retry) clearTimeout(retry)
es?.close()
window.removeEventListener('voice-local', onLocal as EventListener)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const isActive = state !== 'idle'
const colors = AGENT_COLORS[agent]
const status = state !== 'idle' ? STATUS_LABEL[state] : ''
return (
<AnimatePresence>
{isActive && (
<motion.div
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{ duration: 0.35 }}
data-swipe-ignore
style={{
position: 'fixed', inset: 0, zIndex: 300,
background: 'rgba(5, 5, 15, 0.82)',
backdropFilter: 'blur(28px)',
WebkitBackdropFilter: 'blur(28px)' as any,
display: 'flex', flexDirection: 'column',
alignItems: 'center', justifyContent: 'center',
gap: 30, padding: 40,
pointerEvents: 'none',
}}
>
<SiriOrb core={colors.core} halo={colors.halo} state={state} />
{/* Subtle status (только "слушаю" — для остальных текст сам говорит за себя) */}
{status && (
<motion.div
key={state}
initial={{ opacity: 0, y: -4 }}
animate={{ opacity: 0.55, y: 0 }}
transition={{ duration: 0.3 }}
style={{
fontSize: 13, color: 'rgba(255,255,255,0.6)',
fontWeight: 600, letterSpacing: '0.15em',
textTransform: 'uppercase',
}}
>
{status}
</motion.div>
)}
{/* Текст — распознанный / ответ */}
{text && (
<motion.div
key={text.slice(0, 40)}
initial={{ opacity: 0, y: 8 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.35 }}
style={{
maxWidth: 760, textAlign: 'center',
fontSize: state === 'command' ? 20 : 24,
fontWeight: 500,
color: state === 'error' ? '#fca5a5' :
state === 'command' ? 'rgba(255,255,255,0.55)' :
'rgba(255,255,255,0.95)',
letterSpacing: '-0.3px', lineHeight: 1.4,
}}
>
{text}
</motion.div>
)}
</motion.div>
)}
</AnimatePresence>
)
}
function SiriOrb({ core, halo, state }: { core: string; halo: string; state: VoiceState }) {
const isIntense = state === 'wake'
const isListening = state === 'listening'
const isResponding = state === 'response'
return (
<div style={{ position: 'relative', width: 240, height: 240 }}>
{/* Outer halo — медленное дыхание */}
<motion.div
animate={{
scale: isIntense ? [1, 1.2, 1] : isListening ? [1, 1.14, 1] : [1, 1.08, 1],
opacity: isIntense ? [0.55, 0.2, 0.55] : isListening ? [0.45, 0.18, 0.45] : [0.35, 0.15, 0.35],
}}
transition={{
duration: isIntense ? 1.6 : isListening ? 2.2 : 3.2,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 0, borderRadius: '50%',
background: `radial-gradient(circle, ${halo}55 0%, transparent 72%)`,
filter: 'blur(32px)',
}}
/>
{/* Inner ring — быстрее, с подкрученным blur */}
<motion.div
animate={{
scale: isIntense ? [1, 1.1, 1] : isListening ? [1, 1.06, 1] : isResponding ? [1, 1.04, 1] : 1,
rotate: isIntense ? [0, 10, -8, 0] : 0,
}}
transition={{
duration: 1.3,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 40, borderRadius: '50%',
background: `radial-gradient(circle at 40% 30%, ${core} 0%, ${halo} 60%, transparent 85%)`,
filter: 'blur(16px)',
boxShadow: `0 0 80px ${halo}66, 0 0 40px ${core}55`,
}}
/>
{/* Bright core — тонкий highlight */}
<motion.div
animate={{
scale: isIntense ? [1, 0.85, 1] : 1,
opacity: isIntense ? [0.9, 0.7, 0.9] : 0.85,
}}
transition={{ duration: 0.9, repeat: Infinity, ease: 'easeInOut' }}
style={{
position: 'absolute', inset: 90, borderRadius: '50%',
background: `radial-gradient(circle at 45% 35%, rgba(255,255,255,0.9) 0%, ${core} 50%, transparent 100%)`,
filter: 'blur(8px)',
}}
/>
</div>
)
}