smart-home-tablet/components/VoiceController.tsx

'use client'

/**
 * Голосовой контроллер.
 *
 * UX:
 * - Idle: кнопка-микрофон (перечёркнут). Тап = «активировать ассистента» (нужен
 *   user gesture чтобы AudioContext стартанул).
 * - Active: загружаются wake-модели (один раз) → запускается wake-word listener
 *   на постоянный фон. Кнопка горит фиолетовым, говорит «Космо».
 *   - Wake-word triggered → MicVAD стартует → onSpeechEnd → STT → chat → TTS.
 *   - Параллельно тап на кнопку = ручной trigger (как раньше) если wake не
 *     срабатывает или wake тренировка ещё слабая.
 * - Tap во время Active → выключает wake и mic полностью.
 */
import { useEffect, useRef, useState } from 'react'
import { Mic, MicOff } from 'lucide-react'
import { WakeWordDetector } from '@/lib/wake-word'
import { floatToWav } from '@/lib/audio-wav'
import { vlog, vwarn, verror } from '@/lib/debug'

type Agent = 'cosmo' | 'lusya'
type ControllerState = 'idle' | 'loading' | 'listening' | 'recording' | 'busy' | 'error'

const AGENT: Agent = 'cosmo'
const WAKE_THRESHOLD = 0.5

function emitLocal(event: string, agent: Agent, text?: string) {
  window.dispatchEvent(
    new CustomEvent('voice-local', {
      detail: { event, agent, text, timestamp: new Date().toISOString() },
    }),
  )
}

export default function VoiceController() {
  const [state, setState] = useState<ControllerState>('idle')
  const wakeRef = useRef<WakeWordDetector | null>(null)
  const vadRef = useRef<any>(null)
  const busyRef = useRef(false)

  useEffect(() => {
    vlog('[VoiceController] mounted, state=idle, ждём тап на микрофон')

    // Кнопка X в overlay шлёт voice-cancel → ставим VAD на паузу
    // (НЕ destroy — иначе следующий wake снова будет ждать 1-2с на инициализацию).
    const onCancel = () => {
      vlog('[voice] cancel — пауза VAD')
      try { vadRef.current?.pause?.() } catch {}
      busyRef.current = false
      try { wakeRef.current?.resume?.() } catch {}
      setState((s) => (wakeRef.current ? 'listening' : 'idle'))
      emitLocal('idle', AGENT)
    }
    window.addEventListener('voice-cancel', onCancel)

    return () => {
      window.removeEventListener('voice-cancel', onCancel)
      try { vadRef.current?.destroy?.() } catch {}
      try { wakeRef.current?.stop?.() } catch {}
      vadRef.current = null
      wakeRef.current = null
    }
  }, [])

  // Обрабатываем результат VAD-захвата фразы и шлём по pipeline.
  const handleSpeechEnd = async (audio: Float32Array) => {
    if (busyRef.current) return
    if (audio.length < 16000 * 0.4) return
    busyRef.current = true
    setState('busy')
    emitLocal('listening', AGENT)

    try {
      const wav = floatToWav(audio, 16000)
      const sttResp = await fetch('/api/voice/stt', {
        method: 'POST',
        headers: { 'Content-Type': 'audio/wav' },
        body: wav,
      })
      if (!sttResp.ok) throw new Error(`stt ${sttResp.status}`)
      const { text } = await sttResp.json()
      const userText = (text || '').trim()
      if (!userText || userText.length < 2) {
        emitLocal('idle', AGENT)
        return
      }
      const chatResp = await fetch('/api/voice/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ text: userText, agent: AGENT }),
      })
      if (!chatResp.ok) throw new Error(`chat ${chatResp.status}`)
    } catch (e) {
      verror('[voice] pipeline error:', e)
      emitLocal('error', AGENT, 'Не получилось')
    } finally {
      busyRef.current = false
      // VAD на паузу — переиспользуем при следующем wake (без re-init).
      try { vadRef.current?.pause?.() } catch {}
      // Wake возобновляем — снова слушаем фоном.
      try { wakeRef.current?.resume?.() } catch {}
      setState((s) => (s === 'busy' ? 'listening' : s))
    }
  }

  // Однократная инициализация VAD. Создаётся в paused-состоянии и переиспользуется
  // на каждый wake — без этого пауза до первой записи ~1-2с.
  const initVAD = async () => {
    if (vadRef.current) return
    try {
      const { MicVAD } = await import('@ricky0123/vad-web')
      // Подавить VAD | debug логи из библиотеки
      const _origDebug = (console as any)._vadOrig || console.debug
      ;(console as any)._vadOrig = _origDebug
      console.debug = (...args: any[]) => {
        if (typeof args[0] === 'string' && (args[0].startsWith('VAD |') || args[0].startsWith('using default audio'))) return
        _origDebug.apply(console, args)
      }
      const vad = await MicVAD.new({
        model: 'v5',
        baseAssetPath: '/vad/',
        onnxWASMBasePath: '/vad/',
        logLevel: 'error',
        ortConfig: (ort: any) => {
          ort.env.wasm.numThreads = 1
          ort.env.wasm.simd = true
        },
        positiveSpeechThreshold: 0.6,
        negativeSpeechThreshold: 0.45,
        minSpeechMs: 160,
        redemptionMs: 750,
        onSpeechStart: () => emitLocal('wake', AGENT),
        onSpeechEnd: handleSpeechEnd,
      })
      vadRef.current = vad
      // Не вызываем start — ждём пока wake-word триггернёт.
      vlog('[voice] VAD preloaded (paused)')
    } catch (e: any) {
      verror('[voice] VAD init failed:', e?.name, e?.message, e)
      // Не вырубаем wake — может на ручной trigger ещё попробуем
      emitLocal('error', AGENT, `VAD: ${e?.message?.slice(0, 60) || 'init'}`)
    }
  }

  const onWakeDetected = async (score: number) => {
    vlog(`[wake] cosmo score=${score.toFixed(3)}`)
    if (busyRef.current) return
    // Пауза wake чтобы VAD-инициализация и команда не триггерили wake снова на эхе.
    try { wakeRef.current?.pause?.() } catch {}
    setState('recording')
    emitLocal('wake', AGENT)
    // VAD должен быть уже preloaded — мгновенный старт.
    if (!vadRef.current) await initVAD()
    try { vadRef.current?.start?.() } catch {}
  }

  const start = async () => {
    if (state !== 'idle' && state !== 'error') return
    setState('loading')

    // 0. «Audio unlock» — iOS Safari / Android Chrome не дают воспроизводить
    // звук без user-gesture. Wake-word срабатывает сам, поэтому позже TTS
    // тихо отвергнется. Создаём общий AudioContext прямо сейчас (тап = gesture)
    // и сохраняем в window — VoiceOverlay будет играть через него.
    try {
      const w = window as any
      if (!w.__voicePlaybackCtx) {
        const Ctx = w.AudioContext || w.webkitAudioContext
        if (Ctx) w.__voicePlaybackCtx = new Ctx()
      }
      const ctx: AudioContext | undefined = w.__voicePlaybackCtx
      if (ctx && ctx.state === 'suspended') await ctx.resume()
      vlog('[voice] playback AudioContext state=', ctx?.state)
    } catch (e: any) {
      vwarn('[voice] AudioContext init failed:', e?.message)
    }

    // 1. Запрос разрешения на микрофон отдельно
    try {
      const probe = await navigator.mediaDevices.getUserMedia({ audio: true })
      probe.getTracks().forEach((t) => t.stop())
    } catch (e: any) {
      verror('[voice] mic permission failed:', e?.name, e?.message)
      setState('error')
      emitLocal('error', AGENT, e?.name === 'NotAllowedError' ? 'Нет доступа к микрофону' : 'Микрофон не открылся')
      return
    }

    // 2. Запуск wake-word
    try {
      // Логируем периодически max-score и просто что pipeline жив, чтобы было
      // видно, что инференс идёт.
      let maxScore = 0
      let scoreCount = 0
      const wake = new WakeWordDetector({
        modelPath: '/wake/cosmo.onnx',
        threshold: WAKE_THRESHOLD,
        onWake: (s) => onWakeDetected(s),
        onScore: (s) => {
          if (s > maxScore) maxScore = s
          scoreCount++
          if (scoreCount % 25 === 0) {
            vlog(`[wake] alive · max score за окно=${maxScore.toFixed(3)} · scoreCount=${scoreCount}`)
            maxScore = 0
          }
          if (s > 0.15) vlog(`[wake] score=${s.toFixed(3)}`)
        },
        onError: (e) => vwarn('[wake] error', e),
      })
      await wake.start()
      wakeRef.current = wake
      setState('listening')
      // VAD НЕ прелоадим — его второй getUserMedia мешает wake-word audio.
      // Грузится при первом wake (~1-2с), но дальше переиспользуется (см. handleSpeechEnd).
    } catch (e: any) {
      verror('[wake] init failed:', e)
      setState('error')
      emitLocal('error', AGENT, `Wake: ${e?.message?.slice(0, 60) || 'init'}`)
    }
  }

  const stop = async () => {
    try { vadRef.current?.pause?.() } catch {}
    try { vadRef.current?.destroy?.() } catch {}
    vadRef.current = null
    try { await wakeRef.current?.stop?.() } catch {}
    wakeRef.current = null
    setState('idle')
    emitLocal('idle', AGENT)
  }

  // Долгий тап = ручной триггер (как раньше push-to-talk). Короткий — toggle вкл/выкл.
  // Для простоты сейчас: короткий тап в idle = активация; короткий тап в active = выкл.
  const onTap = async () => {
    vlog(`[VoiceController] tap! state=${state}`)
    if (state === 'idle' || state === 'error') {
      await start()
    } else if (state === 'listening') {
      // ручной trigger — эмулируем wake-event
      onWakeDetected(1.0)
    } else {
      await stop()
    }
  }

  const onLongPress = async () => {
    // Длинный тап всегда выключает (на случай если случайно зашли в плохое состояние)
    await stop()
  }

  // primitive long-press detection
  const pressTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
  const longPressed = useRef(false)
  const onPointerDown = () => {
    longPressed.current = false
    pressTimer.current = setTimeout(() => {
      longPressed.current = true
      onLongPress()
    }, 700)
  }
  const onPointerUp = () => {
    if (pressTimer.current) clearTimeout(pressTimer.current)
    pressTimer.current = null
    if (!longPressed.current) onTap()
  }

  const isActive = state === 'listening' || state === 'recording' || state === 'busy'
  const isLoading = state === 'loading'

  return (
    <button
      onPointerDown={onPointerDown}
      onPointerUp={onPointerUp}
      onPointerCancel={() => { if (pressTimer.current) clearTimeout(pressTimer.current); pressTimer.current = null }}
      data-swipe-ignore
      aria-label={isActive ? 'Выключить ассистента' : 'Активировать ассистента'}
      title={isActive ? 'Скажи «Космо» · долгий тап = выкл' : 'Тап = активировать'}
      style={{
        position: 'fixed',
        right: 24,
        bottom: 24,
        zIndex: 250,
        width: 64,
        height: 64,
        borderRadius: '50%',
        border: 'none',
        cursor: 'pointer',
        background: isActive
          ? 'linear-gradient(135deg, #7c3aed 0%, #a5b4fc 100%)'
          : 'rgba(255,255,255,0.08)',
        backdropFilter: 'blur(20px)',
        WebkitBackdropFilter: 'blur(20px)' as any,
        boxShadow: isActive
          ? '0 0 32px rgba(124, 58, 237, 0.6), 0 8px 24px rgba(0,0,0,0.4)'
          : '0 4px 12px rgba(0,0,0,0.3)',
        color: isActive ? '#fff' : 'rgba(255,255,255,0.65)',
        display: 'flex',
        alignItems: 'center',
        justifyContent: 'center',
        transition: 'background 0.25s, box-shadow 0.25s, transform 0.15s',
        transform: isLoading ? 'scale(0.92)' : 'scale(1)',
      }}
    >
      {isActive ? <Mic size={28} /> : <MicOff size={28} />}
    </button>
  )
}