refactor: VAD upgrade, retry, dead code cleanup, AGENT removal
- audio: switch VAD to webrtcvad with RMS gate + fallback to RMS - audio: honor FOLLOWUP_TIMEOUT — short silence wait after bot response - llm: retry with exponential backoff on network errors and 5xx - llm: VOICE_MAX_TOKENS env (default 300) instead of hardcoded 150 - tts: optional VAD-based barge-in (BARGE_IN_ENABLED, off by default) - tts: remove dead start_barge_in_listener / was_barge_in helpers - config: drop AGENT/LUSYA_AGENT — routing happens via session_key - modes: remove unused imports, pass FOLLOWUP_TIMEOUT to follow-up record() - docs: full rewrite of README and CLAUDE.md to match current architecture
This commit is contained in:
117
satellite/llm.py
117
satellite/llm.py
@@ -1,13 +1,13 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
|
||||
from .config import AGENTS, log
|
||||
from .config import AGENTS, VOICE_MAX_TOKENS, LLM_RETRIES, log
|
||||
from .text import clean_for_speech, find_sentence_end
|
||||
from .tts import speak, play_error_sound
|
||||
|
||||
# Ключ голосовой сессии — Cosmo работает как полноценный агент
|
||||
VOICE_SESSION_KEY = os.getenv("VOICE_SESSION_KEY", "agent:main:voice:home")
|
||||
|
||||
# "stream" — режем по предложениям (быстро, но рваная интонация)
|
||||
@@ -26,67 +26,86 @@ FILLER_PATTERNS = re.compile(
|
||||
r'(?:(?:сейчас посмотрю|дай мне секунду|дай секунду|проверяю|загружаю|узнаю'
|
||||
r'|смотрю|одну секунду|я сейчас посмотрю|я проверю|попробую другой источник'
|
||||
r'|нужны конкретные числа|дай мне загрузить)[^.!?]*[.!?]?\s*)+',
|
||||
re.IGNORECASE
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def strip_fillers(text: str) -> str:
|
||||
return FILLER_PATTERNS.sub('', text).strip()
|
||||
|
||||
|
||||
|
||||
|
||||
def is_reset_command(text: str) -> bool:
|
||||
return bool(RESET_PATTERNS.search(text))
|
||||
|
||||
|
||||
def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
|
||||
def _post_with_retry(session, url, headers, payload):
|
||||
"""POST с экспоненциальным backoff. Retry на сетевые ошибки и 5xx; 4xx — сразу вверх."""
|
||||
last_exc = None
|
||||
for attempt in range(LLM_RETRIES):
|
||||
try:
|
||||
resp = session.post(url, headers=headers, json=payload, stream=True, timeout=60)
|
||||
if resp.status_code >= 500:
|
||||
raise requests.HTTPError(f"{resp.status_code} {resp.text[:200]}", response=resp)
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
except (requests.ConnectionError, requests.Timeout, requests.HTTPError) as e:
|
||||
last_exc = e
|
||||
# 4xx (кроме 408/429) не ретраим
|
||||
resp = getattr(e, "response", None)
|
||||
if isinstance(e, requests.HTTPError) and resp is not None:
|
||||
if resp.status_code < 500 and resp.status_code not in (408, 429):
|
||||
raise
|
||||
if attempt == LLM_RETRIES - 1:
|
||||
raise
|
||||
delay = 0.5 * (2 ** attempt)
|
||||
log.warning(f"Gateway retry {attempt + 1}/{LLM_RETRIES} через {delay:.1f}s: {e}")
|
||||
time.sleep(delay)
|
||||
raise last_exc # unreachable
|
||||
|
||||
|
||||
def ask_agent_stream(text: str, agent_id: str = "cosmo") -> str:
|
||||
"""Отправляет запрос к OpenClaw gateway и озвучивает ответ."""
|
||||
def _maybe_speak(t: str):
|
||||
if t.strip():
|
||||
speak(t, agent_id)
|
||||
|
||||
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
|
||||
gateway_url = cfg["gateway_url"]
|
||||
session = cfg["session"]
|
||||
agent = cfg["agent"]
|
||||
|
||||
session_key = cfg.get("session_key", VOICE_SESSION_KEY)
|
||||
|
||||
payload = {
|
||||
"stream": True,
|
||||
"messages": [{"role": "user", "content": text}],
|
||||
"max_tokens": VOICE_MAX_TOKENS,
|
||||
}
|
||||
headers = {
|
||||
"x-ocplatform-model": cfg["voice_model"],
|
||||
"x-openclaw-session-key": session_key,
|
||||
}
|
||||
|
||||
try:
|
||||
resp = session.post(
|
||||
f"{gateway_url}/v1/chat/completions",
|
||||
headers={
|
||||
"x-ocplatform-model": cfg["voice_model"],
|
||||
"x-openclaw-session-key": session_key,
|
||||
},
|
||||
json={
|
||||
"model": agent,
|
||||
"stream": True,
|
||||
"messages": [{"role": "user", "content": text}],
|
||||
"max_tokens": 150,
|
||||
},
|
||||
stream=True,
|
||||
timeout=60,
|
||||
resp = _post_with_retry(
|
||||
cfg["session"], f"{cfg['gateway_url']}/v1/chat/completions", headers, payload,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.ConnectionError:
|
||||
log.exception("Gateway недоступен")
|
||||
log.exception("Gateway недоступен после retry")
|
||||
msg = "Не могу связаться с сервером, попробуй ещё раз."
|
||||
print(f"⚠️ {msg}")
|
||||
play_error_sound()
|
||||
_maybe_speak(msg)
|
||||
return msg
|
||||
except requests.Timeout:
|
||||
log.exception("Gateway таймаут")
|
||||
log.exception("Gateway таймаут после retry")
|
||||
msg = "Сервер не ответил вовремя, попробуй ещё раз."
|
||||
print(f"⚠️ {msg}")
|
||||
play_error_sound()
|
||||
_maybe_speak(msg)
|
||||
return msg
|
||||
except requests.HTTPError:
|
||||
log.exception(f"Gateway HTTP ошибка {resp.status_code}")
|
||||
except requests.HTTPError as e:
|
||||
status = e.response.status_code if e.response is not None else "?"
|
||||
body = e.response.text if e.response is not None else ""
|
||||
log.exception(f"Gateway HTTP {status}")
|
||||
msg = "Ошибка сервера, попробуй ещё раз."
|
||||
print(f"⚠️ Gateway {resp.status_code}: {resp.text}")
|
||||
print(f"⚠️ Gateway {status}: {body[:200]}")
|
||||
play_error_sound()
|
||||
_maybe_speak(msg)
|
||||
return msg
|
||||
@@ -98,25 +117,25 @@ def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
|
||||
for line in resp.iter_lines():
|
||||
if not line or line == b"data: [DONE]":
|
||||
continue
|
||||
if line.startswith(b"data: "):
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
delta = chunk["choices"][0]["delta"].get("content", "")
|
||||
if not delta:
|
||||
continue
|
||||
|
||||
full_text += delta
|
||||
buffer += delta
|
||||
|
||||
if TTS_MODE == "stream":
|
||||
last_punct = find_sentence_end(buffer, min_len=120)
|
||||
if last_punct > -1:
|
||||
sentence = clean_for_speech(buffer[:last_punct + 1])
|
||||
_maybe_speak(sentence)
|
||||
buffer = buffer[last_punct + 1:].lstrip()
|
||||
|
||||
except (json.JSONDecodeError, KeyError, IndexError):
|
||||
if not line.startswith(b"data: "):
|
||||
continue
|
||||
try:
|
||||
chunk = json.loads(line[6:])
|
||||
delta = chunk["choices"][0]["delta"].get("content", "")
|
||||
if not delta:
|
||||
continue
|
||||
|
||||
full_text += delta
|
||||
buffer += delta
|
||||
|
||||
if TTS_MODE == "stream":
|
||||
last_punct = find_sentence_end(buffer, min_len=120)
|
||||
if last_punct > -1:
|
||||
sentence = clean_for_speech(strip_fillers(buffer[:last_punct + 1]))
|
||||
_maybe_speak(sentence)
|
||||
buffer = buffer[last_punct + 1:].lstrip()
|
||||
except (json.JSONDecodeError, KeyError, IndexError):
|
||||
continue
|
||||
except Exception as e:
|
||||
log.exception("Ошибка при чтении стрима")
|
||||
print(f"⚠️ Стрим прервался: {e}")
|
||||
@@ -132,6 +151,6 @@ def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
|
||||
_maybe_speak(result)
|
||||
else:
|
||||
if buffer.strip():
|
||||
_maybe_speak(clean_for_speech(buffer))
|
||||
_maybe_speak(clean_for_speech(strip_fillers(buffer)))
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user