smart-home-tablet/components/VoiceController.tsx

'use client'

/**
 * Голосовой контроллер.
 *
 * UX:
 * - Idle: кнопка-микрофон (перечёркнут). Тап = «активировать ассистента» (нужен
 *   user gesture чтобы AudioContext стартанул).
 * - Active: загружаются wake-модели (один раз) → запускается wake-word listener
 *   на постоянный фон. Кнопка горит фиолетовым, говорит «Космо».
 *   - Wake-word triggered → MicVAD стартует → onSpeechEnd → STT → chat → TTS.
 *   - Параллельно тап на кнопку = ручной trigger (как раньше) если wake не
 *     срабатывает или wake тренировка ещё слабая.
 * - Tap во время Active → выключает wake и mic полностью.
 */
import { useEffect, useRef, useState } from 'react'
import { Mic, MicOff } from 'lucide-react'
import { WakeWordDetector } from '@/lib/wake-word'

type Agent = 'cosmo' | 'lusya'
type ControllerState = 'idle' | 'loading' | 'listening' | 'recording' | 'busy' | 'error'

const AGENT: Agent = 'cosmo'
const WAKE_THRESHOLD = 0.5

function emitLocal(event: string, agent: Agent, text?: string) {
  window.dispatchEvent(
    new CustomEvent('voice-local', {
      detail: { event, agent, text, timestamp: new Date().toISOString() },
    }),
  )
}

function floatToWav(audio: Float32Array, sampleRate = 16000): Blob {
  const numSamples = audio.length
  const buffer = new ArrayBuffer(44 + numSamples * 2)
  const view = new DataView(buffer)
  writeStr(view, 0, 'RIFF')
  view.setUint32(4, 36 + numSamples * 2, true)
  writeStr(view, 8, 'WAVE')
  writeStr(view, 12, 'fmt ')
  view.setUint32(16, 16, true)
  view.setUint16(20, 1, true)
  view.setUint16(22, 1, true)
  view.setUint32(24, sampleRate, true)
  view.setUint32(28, sampleRate * 2, true)
  view.setUint16(32, 2, true)
  view.setUint16(34, 16, true)
  writeStr(view, 36, 'data')
  view.setUint32(40, numSamples * 2, true)
  let offset = 44
  for (let i = 0; i < numSamples; i++, offset += 2) {
    const s = Math.max(-1, Math.min(1, audio[i]))
    view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true)
  }
  return new Blob([buffer], { type: 'audio/wav' })
}

function writeStr(view: DataView, offset: number, s: string) {
  for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i))
}

export default function VoiceController() {
  const [state, setState] = useState<ControllerState>('idle')
  const wakeRef = useRef<WakeWordDetector | null>(null)
  const vadRef = useRef<any>(null)
  const busyRef = useRef(false)

  useEffect(() => {
    return () => {
      try { vadRef.current?.destroy?.() } catch {}
      try { wakeRef.current?.stop?.() } catch {}
      vadRef.current = null
      wakeRef.current = null
    }
  }, [])

  // Обрабатываем результат VAD-захвата фразы и шлём по pipeline.
  const handleSpeechEnd = async (audio: Float32Array) => {
    if (busyRef.current) return
    if (audio.length < 16000 * 0.4) return
    busyRef.current = true
    setState('busy')
    emitLocal('listening', AGENT)

    try {
      const wav = floatToWav(audio, 16000)
      const sttResp = await fetch('/api/voice/stt', {
        method: 'POST',
        headers: { 'Content-Type': 'audio/wav' },
        body: wav,
      })
      if (!sttResp.ok) throw new Error(`stt ${sttResp.status}`)
      const { text } = await sttResp.json()
      const userText = (text || '').trim()
      if (!userText || userText.length < 2) {
        emitLocal('idle', AGENT)
        return
      }
      const chatResp = await fetch('/api/voice/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ text: userText, agent: AGENT }),
      })
      if (!chatResp.ok) throw new Error(`chat ${chatResp.status}`)
    } catch (e) {
      console.error('[voice] pipeline error:', e)
      emitLocal('error', AGENT, 'Не получилось')
    } finally {
      busyRef.current = false
      // После обработки — возвращаем wake-режим (если активен)
      try { wakeRef.current?.resume?.() } catch {}
      setState((s) => (s === 'busy' ? 'listening' : s))
    }
  }

  // Создание VAD по запросу: либо после wake-детекта, либо после ручного тапа.
  const startVAD = async () => {
    try {
      const { MicVAD } = await import('@ricky0123/vad-web')
      const vad = await MicVAD.new({
        model: 'v5',
        baseAssetPath: '/vad/',
        onnxWASMBasePath: '/vad/',
        ortConfig: (ort: any) => {
          ort.env.wasm.numThreads = 1
          ort.env.wasm.simd = true
        },
        positiveSpeechThreshold: 0.6,
        negativeSpeechThreshold: 0.45,
        minSpeechMs: 160,
        redemptionMs: 750,
        onSpeechStart: () => {
          emitLocal('wake', AGENT)
        },
        onSpeechEnd: handleSpeechEnd,
      })
      vadRef.current = vad
      vad.start()
    } catch (e: any) {
      console.error('[voice] VAD init failed:', e?.name, e?.message, e)
      setState('error')
      emitLocal('error', AGENT, `VAD: ${e?.message?.slice(0, 60) || 'init'}`)
    }
  }

  const onWakeDetected = async (score: number) => {
    console.log(`[wake] cosmo score=${score.toFixed(3)}`)
    if (busyRef.current) return
    // Пауза wake чтобы VAD-инициализация и команда не триггерили wake снова на эхе.
    try { wakeRef.current?.pause?.() } catch {}
    setState('recording')
    emitLocal('wake', AGENT)
    // Если VAD ещё не готов — создаём; иначе reset+start.
    if (!vadRef.current) {
      await startVAD()
    } else {
      try { vadRef.current.start?.() } catch {}
    }
  }

  const start = async () => {
    if (state !== 'idle' && state !== 'error') return
    setState('loading')

    // 1. Запрос разрешения на микрофон отдельно
    try {
      const probe = await navigator.mediaDevices.getUserMedia({ audio: true })
      probe.getTracks().forEach((t) => t.stop())
    } catch (e: any) {
      console.error('[voice] mic permission failed:', e?.name, e?.message)
      setState('error')
      emitLocal('error', AGENT, e?.name === 'NotAllowedError' ? 'Нет доступа к микрофону' : 'Микрофон не открылся')
      return
    }

    // 2. Запуск wake-word
    try {
      const wake = new WakeWordDetector({
        modelPath: '/wake/cosmo.onnx',
        threshold: WAKE_THRESHOLD,
        onWake: (s) => onWakeDetected(s),
        // onScore: (s) => { if (s > 0.1) console.log('[wake] score', s.toFixed(3)) },
        onError: (e) => console.warn('[wake] error', e),
      })
      await wake.start()
      wakeRef.current = wake
      setState('listening')
    } catch (e: any) {
      console.error('[wake] init failed:', e)
      setState('error')
      emitLocal('error', AGENT, `Wake: ${e?.message?.slice(0, 60) || 'init'}`)
    }
  }

  const stop = async () => {
    try { vadRef.current?.pause?.() } catch {}
    try { vadRef.current?.destroy?.() } catch {}
    vadRef.current = null
    try { await wakeRef.current?.stop?.() } catch {}
    wakeRef.current = null
    setState('idle')
    emitLocal('idle', AGENT)
  }

  // Долгий тап = ручной триггер (как раньше push-to-talk). Короткий — toggle вкл/выкл.
  // Для простоты сейчас: короткий тап в idle = активация; короткий тап в active = выкл.
  const onTap = async () => {
    if (state === 'idle' || state === 'error') {
      await start()
    } else if (state === 'listening') {
      // ручной trigger — эмулируем wake-event
      onWakeDetected(1.0)
    } else {
      await stop()
    }
  }

  const onLongPress = async () => {
    // Длинный тап всегда выключает (на случай если случайно зашли в плохое состояние)
    await stop()
  }

  // primitive long-press detection
  const pressTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
  const longPressed = useRef(false)
  const onPointerDown = () => {
    longPressed.current = false
    pressTimer.current = setTimeout(() => {
      longPressed.current = true
      onLongPress()
    }, 700)
  }
  const onPointerUp = () => {
    if (pressTimer.current) clearTimeout(pressTimer.current)
    pressTimer.current = null
    if (!longPressed.current) onTap()
  }

  const isActive = state === 'listening' || state === 'recording' || state === 'busy'
  const isLoading = state === 'loading'

  return (
    <button
      onPointerDown={onPointerDown}
      onPointerUp={onPointerUp}
      onPointerCancel={() => { if (pressTimer.current) clearTimeout(pressTimer.current); pressTimer.current = null }}
      data-swipe-ignore
      aria-label={isActive ? 'Выключить ассистента' : 'Активировать ассистента'}
      title={isActive ? 'Скажи «Космо» · долгий тап = выкл' : 'Тап = активировать'}
      style={{
        position: 'fixed',
        right: 24,
        bottom: 24,
        zIndex: 250,
        width: 64,
        height: 64,
        borderRadius: '50%',
        border: 'none',
        cursor: 'pointer',
        background: isActive
          ? 'linear-gradient(135deg, #7c3aed 0%, #a5b4fc 100%)'
          : 'rgba(255,255,255,0.08)',
        backdropFilter: 'blur(20px)',
        WebkitBackdropFilter: 'blur(20px)' as any,
        boxShadow: isActive
          ? '0 0 32px rgba(124, 58, 237, 0.6), 0 8px 24px rgba(0,0,0,0.4)'
          : '0 4px 12px rgba(0,0,0,0.3)',
        color: isActive ? '#fff' : 'rgba(255,255,255,0.65)',
        display: 'flex',
        alignItems: 'center',
        justifyContent: 'center',
        transition: 'background 0.25s, box-shadow 0.25s, transform 0.15s',
        transform: isLoading ? 'scale(0.92)' : 'scale(1)',
      }}
    >
      {isActive ? <Mic size={28} /> : <MicOff size={28} />}
    </button>
  )
}