refactor: VAD upgrade, retry, dead code cleanup, AGENT removal
- audio: switch VAD to webrtcvad with RMS gate + fallback to RMS - audio: honor FOLLOWUP_TIMEOUT — short silence wait after bot response - llm: retry with exponential backoff on network errors and 5xx - llm: VOICE_MAX_TOKENS env (default 300) instead of hardcoded 150 - tts: optional VAD-based barge-in (BARGE_IN_ENABLED, off by default) - tts: remove dead start_barge_in_listener / was_barge_in helpers - config: drop AGENT/LUSYA_AGENT — routing happens via session_key - modes: remove unused imports, pass FOLLOWUP_TIMEOUT to follow-up record() - docs: full rewrite of README and CLAUDE.md to match current architecture
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from .config import GATEWAY_URL, AGENT, AGENTS, log
|
||||
from .config import GATEWAY_URL, AGENTS, FOLLOWUP_TIMEOUT, MAX_DURATION, log
|
||||
from .audio import record
|
||||
from .tts import speak, stop_speaking, is_speaking, start_barge_in_listener, was_barge_in
|
||||
from .tts import speak, stop_speaking
|
||||
from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY
|
||||
|
||||
WAKE_THRESHOLD = float(os.getenv("WAKE_THRESHOLD", "0.5"))
|
||||
@@ -24,7 +23,6 @@ def _handle_reset(text: str, agent_id: str) -> bool:
|
||||
"x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
|
||||
},
|
||||
json={
|
||||
"model": cfg["agent"],
|
||||
"stream": False,
|
||||
"messages": [{"role": "user", "content": "/new"}],
|
||||
},
|
||||
@@ -40,11 +38,15 @@ def _handle_reset(text: str, agent_id: str) -> bool:
|
||||
|
||||
|
||||
def _conversation_loop(agent_id: str, agent_name: str = "Cosmo"):
|
||||
"""Основной цикл диалога — слушает и отвечает пока пользователь говорит."""
|
||||
"""Основной цикл диалога.
|
||||
Первая запись — с большим таймаутом (MAX_DURATION), дальше — короткий FOLLOWUP_TIMEOUT."""
|
||||
first = True
|
||||
while True:
|
||||
text = record()
|
||||
timeout = MAX_DURATION if first else FOLLOWUP_TIMEOUT
|
||||
first = False
|
||||
text = record(initial_silence_timeout=timeout)
|
||||
if not text:
|
||||
print(f"😴 Тишина, жду активации...\n")
|
||||
print("😴 Тишина, жду активации...\n")
|
||||
return
|
||||
|
||||
print(f"📝 Ты → {agent_name}: {text}")
|
||||
@@ -59,7 +61,6 @@ def _conversation_loop(agent_id: str, agent_name: str = "Cosmo"):
|
||||
def run_with_enter():
|
||||
print("\n🦞 Cosmo Satellite запущен (режим: Enter для активации)")
|
||||
print(f" Gateway : {GATEWAY_URL}")
|
||||
print(f" Агент : {AGENT}")
|
||||
print("\nНажми Enter → говори → получи ответ. Ctrl+C для выхода.\n")
|
||||
|
||||
while True:
|
||||
@@ -97,7 +98,6 @@ def run_with_porcupine():
|
||||
input=True, frames_per_buffer=1280)
|
||||
|
||||
print("✅ Слушаю через OpenWakeWord...")
|
||||
# print("\nСкажи 'Космо' или 'Люся'...\n") # TODO: после подключения Люси
|
||||
|
||||
try:
|
||||
while True:
|
||||
@@ -110,12 +110,7 @@ def run_with_porcupine():
|
||||
print(f"PREDICTION cosmo: {cosmo_score:.3f}")
|
||||
|
||||
if cosmo_score > WAKE_THRESHOLD:
|
||||
if is_speaking():
|
||||
# Barge-in: прерываем TTS
|
||||
print("✋ Barge-in: прерываю ответ")
|
||||
stop_speaking()
|
||||
cosmo_model.reset()
|
||||
continue
|
||||
stop_speaking() # на случай если TTS ещё играет
|
||||
stream.stop_stream()
|
||||
_conversation_loop("cosmo", "Cosmo")
|
||||
cosmo_model.reset()
|
||||
@@ -124,10 +119,8 @@ def run_with_porcupine():
|
||||
|
||||
# TODO: Люся — раскомментировать когда модель готова
|
||||
# lusya_score = lusya_model.predict(pcm)["lusya"]
|
||||
# if lusya_score > 0.1:
|
||||
# print(f"PREDICTION lusya: {lusya_score:.3f}")
|
||||
# if lusya_score > 0.5:
|
||||
# print("✅ Услышала 'Люся'!")
|
||||
# if lusya_score > WAKE_THRESHOLD:
|
||||
# stop_speaking()
|
||||
# stream.stop_stream()
|
||||
# _conversation_loop("lusya", "Люся")
|
||||
# lusya_model.reset()
|
||||
|
||||
Reference in New Issue
Block a user