home-voice-assistant/satellite/modes.py

import os

from .config import GATEWAY_URL, AGENTS, FOLLOWUP_TIMEOUT, MAX_DURATION, log
from .audio import record
from .tts import speak, stop_speaking
from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY, LLM_BACKEND
from . import notifier

WAKE_THRESHOLD = float(os.getenv("WAKE_THRESHOLD", "0.5"))


def _handle_reset(text: str, agent_id: str) -> bool:
    """Команда сброса. В зависимости от backend:
    - claude:    удаляет локальный файл истории
    - openclaw:  шлёт /new в gateway
    """
    if not is_reset_command(text):
        return False

    if LLM_BACKEND == "claude":
        from .llm_claude import reset_history
        print("🔄 Сбрасываю локальную историю (Claude)")
        reset_history(agent_id)
    else:
        cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
        print("🔄 Отправляю /new в OpenClaw")
        try:
            cfg["session"].post(
                f"{cfg['gateway_url']}/v1/chat/completions",
                headers={
                    "x-ocplatform-model": cfg["voice_model"],
                    "x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
                },
                json={
                    "stream": False,
                    "messages": [{"role": "user", "content": "/new"}],
                },
                timeout=30,
            )
        except Exception:
            log.exception("Не удалось отправить /new")

    msg = "Начинаю новую сессию."
    print(f"🔄 {msg}")
    # Отправляем как response event — tablet зачитает, локально говорим только если TTS на этой машине.
    notifier.response(msg, agent_id)
    if notifier.speak_locally():
        speak(msg, agent_id)
    return True


def _conversation_loop(agent_id: str, agent_name: str = "Cosmo"):
    """Основной цикл диалога.
    Первая запись — с большим таймаутом (MAX_DURATION), дальше — короткий FOLLOWUP_TIMEOUT.
    Между итерациями шлём listening-event чтобы планшет показывал что всё ещё ждём."""
    first = True
    while True:
        if not first:
            # Follow-up — подсказываем планшету что слушаем, текст прошлого ответа сохраняется.
            notifier.listening(agent_id)
        timeout = MAX_DURATION if first else FOLLOWUP_TIMEOUT
        first = False
        text = record(initial_silence_timeout=timeout)
        if not text:
            print("😴 Тишина, жду активации...\n")
            notifier.idle()
            return

        print(f"📝 Ты → {agent_name}: {text}")
        notifier.command(text, agent_id)

        if _handle_reset(text, agent_id):
            continue

        response = ask_agent_stream(text, agent_id=agent_id)
        print(f"🤖 {agent_name}: {response}\n")
        notifier.response(response, agent_id)


def run_with_enter():
    print("\n🦞 Cosmo Satellite запущен (режим: Enter для активации)")
    if LLM_BACKEND == "claude":
        print(f"   LLM     : Claude (direct)")
    else:
        print(f"   Gateway : {GATEWAY_URL}")
    print("\nНажми Enter → говори → получи ответ. Ctrl+C для выхода.\n")

    while True:
        try:
            input("⏎  Нажми Enter и говори...")
            stop_speaking()  # barge-in
            notifier.wake("cosmo")
            _conversation_loop("cosmo", "Cosmo")

        except KeyboardInterrupt:
            print("\n👋 Выход")
            break
        except Exception as e:
            log.exception("Непредвиденная ошибка в цикле Enter")
            print(f"⚠️  Ошибка: {e} — продолжаю работу...\n")


def run_with_porcupine():
    import numpy as np
    import pyaudio
    from openwakeword.model import Model

    cosmo_model = Model(
        wakeword_models=[os.getenv("WAKE_WORD_COSMO")],
        inference_framework="onnx",
    )
    # TODO: подключить Люсю — раскомментировать когда модель lusya обучена
    # lusya_model = Model(
    #     wakeword_models=[os.getenv("WAKE_WORD_LUSYA")],
    #     inference_framework="onnx",
    # )

    audio = pyaudio.PyAudio()
    # OpenWakeWord ожидает 16 kHz mono PCM 16-bit, фреймы по 1280 семплов (80 мс)
    stream = audio.open(rate=16000, channels=1, format=pyaudio.paInt16,
                        input=True, frames_per_buffer=1280)

    print("✅ Слушаю через OpenWakeWord...")

    try:
        while True:
            try:
                pcm = stream.read(1280, exception_on_overflow=False)
                pcm = np.frombuffer(pcm, dtype=np.int16)

                cosmo_score = cosmo_model.predict(pcm)["cosmo"]
                if cosmo_score > 0.1:
                    print(f"PREDICTION cosmo: {cosmo_score:.3f}")

                if cosmo_score > WAKE_THRESHOLD:
                    print("✅ Услышал 'Космо'!")
                    stop_speaking()  # на случай если TTS ещё играет
                    notifier.wake("cosmo")
                    stream.stop_stream()
                    _conversation_loop("cosmo", "Cosmo")
                    cosmo_model.reset()
                    stream.start_stream()
                    continue

                # TODO: Люся — раскомментировать когда модель готова
                # lusya_score = lusya_model.predict(pcm)["lusya"]
                # if lusya_score > WAKE_THRESHOLD:
                #     stop_speaking()
                #     stream.stop_stream()
                #     _conversation_loop("lusya", "Люся")
                #     lusya_model.reset()
                #     stream.start_stream()
                #     continue

            except KeyboardInterrupt:
                raise
            except Exception as e:
                log.exception("Непредвиденная ошибка в wake-word цикле")
                print(f"⚠️  Ошибка: {e} — продолжаю слушать...\n")

    except KeyboardInterrupt:
        print("\n👋 Выход")
    finally:
        stream.close()
        audio.terminate()