- audio: switch VAD to webrtcvad with RMS gate + fallback to RMS - audio: honor FOLLOWUP_TIMEOUT — short silence wait after bot response - llm: retry with exponential backoff on network errors and 5xx - llm: VOICE_MAX_TOKENS env (default 300) instead of hardcoded 150 - tts: optional VAD-based barge-in (BARGE_IN_ENABLED, off by default) - tts: remove dead start_barge_in_listener / was_barge_in helpers - config: drop AGENT/LUSYA_AGENT — routing happens via session_key - modes: remove unused imports, pass FOLLOWUP_TIMEOUT to follow-up record() - docs: full rewrite of README and CLAUDE.md to match current architecture
141 lines
5.1 KiB
Python
141 lines
5.1 KiB
Python
import os
|
||
|
||
from .config import GATEWAY_URL, AGENTS, FOLLOWUP_TIMEOUT, MAX_DURATION, log
|
||
from .audio import record
|
||
from .tts import speak, stop_speaking
|
||
from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY
|
||
|
||
WAKE_THRESHOLD = float(os.getenv("WAKE_THRESHOLD", "0.5"))
|
||
|
||
|
||
def _handle_reset(text: str, agent_id: str) -> bool:
|
||
"""Команда сброса — отправляет slash-команду /new в OpenClaw (без озвучки ответа)."""
|
||
if not is_reset_command(text):
|
||
return False
|
||
|
||
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
|
||
print("🔄 Отправляю /new в OpenClaw")
|
||
try:
|
||
cfg["session"].post(
|
||
f"{cfg['gateway_url']}/v1/chat/completions",
|
||
headers={
|
||
"x-ocplatform-model": cfg["voice_model"],
|
||
"x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
|
||
},
|
||
json={
|
||
"stream": False,
|
||
"messages": [{"role": "user", "content": "/new"}],
|
||
},
|
||
timeout=30,
|
||
)
|
||
except Exception:
|
||
log.exception("Не удалось отправить /new")
|
||
|
||
msg = "Начинаю новую сессию."
|
||
print(f"🔄 {msg}")
|
||
speak(msg, agent_id)
|
||
return True
|
||
|
||
|
||
def _conversation_loop(agent_id: str, agent_name: str = "Cosmo"):
|
||
"""Основной цикл диалога.
|
||
Первая запись — с большим таймаутом (MAX_DURATION), дальше — короткий FOLLOWUP_TIMEOUT."""
|
||
first = True
|
||
while True:
|
||
timeout = MAX_DURATION if first else FOLLOWUP_TIMEOUT
|
||
first = False
|
||
text = record(initial_silence_timeout=timeout)
|
||
if not text:
|
||
print("😴 Тишина, жду активации...\n")
|
||
return
|
||
|
||
print(f"📝 Ты → {agent_name}: {text}")
|
||
|
||
if _handle_reset(text, agent_id):
|
||
continue
|
||
|
||
response = ask_agent_stream(text, agent_id=agent_id)
|
||
print(f"🤖 {agent_name}: {response}\n")
|
||
|
||
|
||
def run_with_enter():
|
||
print("\n🦞 Cosmo Satellite запущен (режим: Enter для активации)")
|
||
print(f" Gateway : {GATEWAY_URL}")
|
||
print("\nНажми Enter → говори → получи ответ. Ctrl+C для выхода.\n")
|
||
|
||
while True:
|
||
try:
|
||
input("⏎ Нажми Enter и говори...")
|
||
stop_speaking() # barge-in
|
||
_conversation_loop("cosmo", "Cosmo")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n👋 Выход")
|
||
break
|
||
except Exception as e:
|
||
log.exception("Непредвиденная ошибка в цикле Enter")
|
||
print(f"⚠️ Ошибка: {e} — продолжаю работу...\n")
|
||
|
||
|
||
def run_with_porcupine():
|
||
import numpy as np
|
||
import pyaudio
|
||
from openwakeword.model import Model
|
||
|
||
cosmo_model = Model(
|
||
wakeword_models=[os.getenv("WAKE_WORD_COSMO")],
|
||
inference_framework="onnx",
|
||
)
|
||
# TODO: подключить Люсю — раскомментировать когда модель lusya обучена
|
||
# lusya_model = Model(
|
||
# wakeword_models=[os.getenv("WAKE_WORD_LUSYA")],
|
||
# inference_framework="onnx",
|
||
# )
|
||
|
||
audio = pyaudio.PyAudio()
|
||
# OpenWakeWord ожидает 16 kHz mono PCM 16-bit, фреймы по 1280 семплов (80 мс)
|
||
stream = audio.open(rate=16000, channels=1, format=pyaudio.paInt16,
|
||
input=True, frames_per_buffer=1280)
|
||
|
||
print("✅ Слушаю через OpenWakeWord...")
|
||
|
||
try:
|
||
while True:
|
||
try:
|
||
pcm = stream.read(1280, exception_on_overflow=False)
|
||
pcm = np.frombuffer(pcm, dtype=np.int16)
|
||
|
||
cosmo_score = cosmo_model.predict(pcm)["cosmo"]
|
||
if cosmo_score > 0.1:
|
||
print(f"PREDICTION cosmo: {cosmo_score:.3f}")
|
||
|
||
if cosmo_score > WAKE_THRESHOLD:
|
||
stop_speaking() # на случай если TTS ещё играет
|
||
stream.stop_stream()
|
||
_conversation_loop("cosmo", "Cosmo")
|
||
cosmo_model.reset()
|
||
stream.start_stream()
|
||
continue
|
||
|
||
# TODO: Люся — раскомментировать когда модель готова
|
||
# lusya_score = lusya_model.predict(pcm)["lusya"]
|
||
# if lusya_score > WAKE_THRESHOLD:
|
||
# stop_speaking()
|
||
# stream.stop_stream()
|
||
# _conversation_loop("lusya", "Люся")
|
||
# lusya_model.reset()
|
||
# stream.start_stream()
|
||
# continue
|
||
|
||
except KeyboardInterrupt:
|
||
raise
|
||
except Exception as e:
|
||
log.exception("Непредвиденная ошибка в wake-word цикле")
|
||
print(f"⚠️ Ошибка: {e} — продолжаю слушать...\n")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n👋 Выход")
|
||
finally:
|
||
stream.close()
|
||
audio.terminate()
|