Files
smart-home-tablet/components/VoiceOverlay.tsx
Cosmo 05b300d472
All checks were successful
Deploy / deploy (push) Successful in 1m47s
chore(voice): security, cleanup, resilience
Безопасность:
- Rate-limit на /api/voice/chat (20/мин per cookie/IP, env VOICE_RATE_LIMIT).
  Защищает от случайных циклов и утечки PIN.
- Усечение user prompt'а до 4000 символов в /api/voice/chat.
- Tool-loop защита от циклов: если LLM дважды просит тот же tool с теми же
  args — прерываем (раньше мог уйти в бесконечный цикл при tool error'ах).

Чистка кода:
- lib/debug.ts — vlog/vwarn/verror гейтят браузерные логи за
  NEXT_PUBLIC_VOICE_DEBUG=1 (или localStorage 'voice-debug=1').
  Серверные console.log оставлены — полезны в Docker logs.
- lib/audio-wav.ts — вынесена дублированная floatToWav из VoiceController.
- Удалены orphan компоненты FocusCard.tsx и CountdownCard.tsx
  (не подключены, отвергнуты по UX-фидбеку).

Resilience:
- WakeWordDetector: drop-on-busy в onChunk — на медленных устройствах
  (Android, бюджетный CPU) backlog inference больше не копится.
- voice-history fallback на /tmp/voice-history если /data не примонтирован
  (локальная разработка / нестандартная конфигурация).
2026-04-27 12:44:18 +00:00

379 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use client'
import { useEffect, useRef, useState } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
import { X } from 'lucide-react'
import { vwarn } from '@/lib/debug'
type VoiceState = 'idle' | 'wake' | 'listening' | 'command' | 'response' | 'error'
type Agent = 'cosmo' | 'lusya'
interface VoiceEvent {
event: VoiceState
agent?: Agent
text?: string
timestamp: string
}
// Per-agent accent pair (inner core / outer halo). Минималистично, без имён.
const AGENT_COLORS: Record<Agent, { core: string; halo: string }> = {
cosmo: { core: '#a5b4fc', halo: '#7c3aed' },
lusya: { core: '#fbcfe8', halo: '#ec4899' },
}
const STATUS_LABEL: Record<Exclude<VoiceState, 'idle'>, string> = {
wake: 'слушаю',
listening: 'жду',
command: '',
response: '',
error: '',
}
export default function VoiceOverlay() {
const [state, setState] = useState<VoiceState>('idle')
const [agent, setAgent] = useState<Agent>('cosmo')
const [text, setText] = useState('')
const dismissTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
const audioRef = useRef<HTMLAudioElement | null>(null)
const audioUrlRef = useRef<string | null>(null)
const audioSourceRef = useRef<AudioBufferSourceNode | null>(null)
const clearDismiss = () => {
if (dismissTimer.current) {
clearTimeout(dismissTimer.current)
dismissTimer.current = null
}
}
const scheduleDismiss = (ms: number) => {
clearDismiss()
dismissTimer.current = setTimeout(() => setState('idle'), ms)
}
const stopAudio = () => {
if (audioSourceRef.current) {
try { audioSourceRef.current.stop() } catch {}
try { audioSourceRef.current.disconnect() } catch {}
audioSourceRef.current = null
}
if (audioRef.current) {
try {
audioRef.current.pause()
audioRef.current.src = ''
} catch {}
audioRef.current = null
}
if (audioUrlRef.current) {
URL.revokeObjectURL(audioUrlRef.current)
audioUrlRef.current = null
}
}
const playTTS = async (textToSpeak: string, agentId: Agent, onEnded?: () => void) => {
stopAudio()
if (!textToSpeak) {
onEnded?.()
return
}
try {
const r = await fetch('/api/voice/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: textToSpeak, agent: agentId }),
})
if (!r.ok) {
onEnded?.()
return
}
const blob = await r.blob()
// Сначала пытаемся через общий AudioContext (он разблокирован в start()
// VoiceController при тапе пользователя). На iOS Safari это единственный
// надёжный путь; на остальных тоже работает.
const ctx: AudioContext | undefined = (window as any).__voicePlaybackCtx
if (ctx) {
try {
if (ctx.state === 'suspended') await ctx.resume()
const arrayBuf = await blob.arrayBuffer()
const audioBuf: AudioBuffer = await new Promise((resolve, reject) => {
// decodeAudioData в Safari исторически callback-API, поддерживает оба.
try {
const p = ctx.decodeAudioData(arrayBuf, resolve, reject) as any
if (p && typeof p.then === 'function') p.then(resolve, reject)
} catch (e) { reject(e) }
})
const source = ctx.createBufferSource()
source.buffer = audioBuf
source.connect(ctx.destination)
const finish = () => {
if (audioSourceRef.current === source) audioSourceRef.current = null
onEnded?.()
}
source.onended = finish
audioSourceRef.current = source
source.start()
return
} catch (e: any) {
vwarn('[voice] AudioContext playback failed, fallback to <audio>:', e?.message || e)
}
}
// Fallback на HTMLAudioElement (на десктопе обычно тоже работает).
const url = URL.createObjectURL(blob)
audioUrlRef.current = url
const audio = new Audio(url)
const finish = () => {
if (audioUrlRef.current === url) {
URL.revokeObjectURL(url)
audioUrlRef.current = null
}
onEnded?.()
}
audio.onended = finish
audio.onerror = finish
audioRef.current = audio
try {
await audio.play()
} catch (e: any) {
vwarn('[voice] audio.play() rejected:', e?.name || e?.message || e)
finish()
}
} catch {
onEnded?.()
}
}
useEffect(() => {
let es: EventSource | null = null
let retry: ReturnType<typeof setTimeout> | null = null
let closedByUs = false
// Единый обработчик для SSE и локальных CustomEvent от VoiceController.
const handleEvent = (evt: VoiceEvent) => {
const currentAgent: Agent = evt.agent ?? agent
if (evt.agent) setAgent(evt.agent)
// Safety: если событийный поток заглохнет, через 60с сами закроемся.
const armSafety = () => scheduleDismiss(60_000)
if (evt.event === 'wake') {
stopAudio()
setState('wake')
setText('')
armSafety()
} else if (evt.event === 'listening') {
setState('listening')
armSafety()
} else if (evt.event === 'command') {
setState('command')
setText(evt.text || '')
armSafety()
} else if (evt.event === 'response') {
setState('response')
setText(evt.text || '')
clearDismiss()
if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(4000))
} else {
scheduleDismiss(4000)
}
} else if (evt.event === 'error') {
setState('error')
setText(evt.text || 'Ошибка')
clearDismiss()
if (evt.text) {
playTTS(evt.text, currentAgent, () => scheduleDismiss(3000))
} else {
scheduleDismiss(3000)
}
} else if (evt.event === 'idle') {
clearDismiss()
stopAudio()
setState('idle')
}
}
const connect = () => {
es = new EventSource('/api/voice/stream')
es.onmessage = (e) => {
try { handleEvent(JSON.parse(e.data) as VoiceEvent) } catch {}
}
es.onerror = () => {
if (closedByUs) return
es?.close()
retry = setTimeout(connect, 3000)
}
}
connect()
// Локальные события (push-to-talk до round-trip на сервер).
const onLocal = (e: Event) => {
const detail = (e as CustomEvent<VoiceEvent>).detail
if (detail) handleEvent(detail)
}
window.addEventListener('voice-local', onLocal as EventListener)
return () => {
closedByUs = true
clearDismiss()
stopAudio()
if (retry) clearTimeout(retry)
es?.close()
window.removeEventListener('voice-local', onLocal as EventListener)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const isActive = state !== 'idle'
const colors = AGENT_COLORS[agent]
const status = state !== 'idle' ? STATUS_LABEL[state] : ''
return (
<AnimatePresence>
{isActive && (
<motion.div
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{ duration: 0.35 }}
data-swipe-ignore
style={{
position: 'fixed', inset: 0, zIndex: 300,
background: 'rgba(5, 5, 15, 0.82)',
backdropFilter: 'blur(28px)',
WebkitBackdropFilter: 'blur(28px)' as any,
display: 'flex', flexDirection: 'column',
alignItems: 'center', justifyContent: 'center',
gap: 30, padding: 40,
pointerEvents: 'none',
}}
>
{/* Закрыть прослушивание. Просим VoiceController прервать активный
VAD/recording через voice-cancel, и сами закрываем UI. */}
<button
onClick={() => {
window.dispatchEvent(new CustomEvent('voice-cancel'))
clearDismiss()
stopAudio()
setState('idle')
}}
aria-label="Закрыть"
style={{
position: 'absolute', top: 24, right: 24,
width: 56, height: 56, borderRadius: '50%',
border: 'none', cursor: 'pointer',
background: 'rgba(255,255,255,0.08)',
color: 'rgba(255,255,255,0.85)',
display: 'flex', alignItems: 'center', justifyContent: 'center',
backdropFilter: 'blur(16px)',
WebkitBackdropFilter: 'blur(16px)' as any,
pointerEvents: 'auto',
}}
>
<X size={26} />
</button>
<SiriOrb core={colors.core} halo={colors.halo} state={state} />
{/* Subtle status (только "слушаю" — для остальных текст сам говорит за себя) */}
{status && (
<motion.div
key={state}
initial={{ opacity: 0, y: -4 }}
animate={{ opacity: 0.55, y: 0 }}
transition={{ duration: 0.3 }}
style={{
fontSize: 13, color: 'rgba(255,255,255,0.6)',
fontWeight: 600, letterSpacing: '0.15em',
textTransform: 'uppercase',
}}
>
{status}
</motion.div>
)}
{/* Текст — распознанный / ответ */}
{text && (
<motion.div
key={text.slice(0, 40)}
initial={{ opacity: 0, y: 8 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.35 }}
style={{
maxWidth: 760, textAlign: 'center',
fontSize: state === 'command' ? 20 : 24,
fontWeight: 500,
color: state === 'error' ? '#fca5a5' :
state === 'command' ? 'rgba(255,255,255,0.55)' :
'rgba(255,255,255,0.95)',
letterSpacing: '-0.3px', lineHeight: 1.4,
}}
>
{text}
</motion.div>
)}
</motion.div>
)}
</AnimatePresence>
)
}
function SiriOrb({ core, halo, state }: { core: string; halo: string; state: VoiceState }) {
const isIntense = state === 'wake'
const isListening = state === 'listening'
const isResponding = state === 'response'
return (
<div style={{ position: 'relative', width: 240, height: 240 }}>
{/* Outer halo — медленное дыхание */}
<motion.div
animate={{
scale: isIntense ? [1, 1.2, 1] : isListening ? [1, 1.14, 1] : [1, 1.08, 1],
opacity: isIntense ? [0.55, 0.2, 0.55] : isListening ? [0.45, 0.18, 0.45] : [0.35, 0.15, 0.35],
}}
transition={{
duration: isIntense ? 1.6 : isListening ? 2.2 : 3.2,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 0, borderRadius: '50%',
background: `radial-gradient(circle, ${halo}55 0%, transparent 72%)`,
filter: 'blur(32px)',
}}
/>
{/* Inner ring — быстрее, с подкрученным blur */}
<motion.div
animate={{
scale: isIntense ? [1, 1.1, 1] : isListening ? [1, 1.06, 1] : isResponding ? [1, 1.04, 1] : 1,
rotate: isIntense ? [0, 10, -8, 0] : 0,
}}
transition={{
duration: 1.3,
repeat: Infinity,
ease: 'easeInOut',
}}
style={{
position: 'absolute', inset: 40, borderRadius: '50%',
background: `radial-gradient(circle at 40% 30%, ${core} 0%, ${halo} 60%, transparent 85%)`,
filter: 'blur(16px)',
boxShadow: `0 0 80px ${halo}66, 0 0 40px ${core}55`,
}}
/>
{/* Bright core — тонкий highlight */}
<motion.div
animate={{
scale: isIntense ? [1, 0.85, 1] : 1,
opacity: isIntense ? [0.9, 0.7, 0.9] : 0.85,
}}
transition={{ duration: 0.9, repeat: Infinity, ease: 'easeInOut' }}
style={{
position: 'absolute', inset: 90, borderRadius: '50%',
background: `radial-gradient(circle at 45% 35%, rgba(255,255,255,0.9) 0%, ${core} 50%, transparent 100%)`,
filter: 'blur(8px)',
}}
/>
</div>
)
}