refactor: VAD upgrade, retry, dead code cleanup, AGENT removal

- audio: switch VAD to webrtcvad with RMS gate + fallback to RMS - audio: honor FOLLOWUP_TIMEOUT — short silence wait after bot response - llm: retry with exponential backoff on network errors and 5xx - llm: VOICE_MAX_TOKENS env (default 300) instead of hardcoded 150 - tts: optional VAD-based barge-in (BARGE_IN_ENABLED, off by default) - tts: remove dead start_barge_in_listener / was_barge_in helpers - config: drop AGENT/LUSYA_AGENT — routing happens via session_key - modes: remove unused imports, pass FOLLOWUP_TIMEOUT to follow-up record() - docs: full rewrite of README and CLAUDE.md to match current architecture
2026-04-16 17:10:59 +03:00
parent a885cbe74b
commit a9001aef92
9 changed files with 541 additions and 358 deletions
--- a/satellite/llm.py
+++ b/satellite/llm.py
@@ -1,13 +1,13 @@
 import json
 import os
 import re
+import time
 import requests

-from .config import AGENTS, log
+from .config import AGENTS, VOICE_MAX_TOKENS, LLM_RETRIES, log
 from .text import clean_for_speech, find_sentence_end
 from .tts import speak, play_error_sound

-# Ключ голосовой сессии — Cosmo работает как полноценный агент
 VOICE_SESSION_KEY = os.getenv("VOICE_SESSION_KEY", "agent:main:voice:home")

 # "stream" — режем по предложениям (быстро, но рваная интонация)
@@ -26,67 +26,86 @@ FILLER_PATTERNS = re.compile(
    r'(?:(?:сейчас посмотрю|дай мне секунду|дай секунду|проверяю|загружаю|узнаю'
    r'|смотрю|одну секунду|я сейчас посмотрю|я проверю|попробую другой источник'
    r'|нужны конкретные числа|дай мне загрузить)[^.!?]*[.!?]?\s*)+',
-    re.IGNORECASE
+    re.IGNORECASE,
 )

+
 def strip_fillers(text: str) -> str:
    return FILLER_PATTERNS.sub('', text).strip()


-
-
 def is_reset_command(text: str) -> bool:
    return bool(RESET_PATTERNS.search(text))


-def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
+def _post_with_retry(session, url, headers, payload):
+    """POST с экспоненциальным backoff. Retry на сетевые ошибки и 5xx; 4xx — сразу вверх."""
+    last_exc = None
+    for attempt in range(LLM_RETRIES):
+        try:
+            resp = session.post(url, headers=headers, json=payload, stream=True, timeout=60)
+            if resp.status_code >= 500:
+                raise requests.HTTPError(f"{resp.status_code} {resp.text[:200]}", response=resp)
+            resp.raise_for_status()
+            return resp
+        except (requests.ConnectionError, requests.Timeout, requests.HTTPError) as e:
+            last_exc = e
+            # 4xx (кроме 408/429) не ретраим
+            resp = getattr(e, "response", None)
+            if isinstance(e, requests.HTTPError) and resp is not None:
+                if resp.status_code < 500 and resp.status_code not in (408, 429):
+                    raise
+            if attempt == LLM_RETRIES - 1:
+                raise
+            delay = 0.5 * (2 ** attempt)
+            log.warning(f"Gateway retry {attempt + 1}/{LLM_RETRIES} через {delay:.1f}s: {e}")
+            time.sleep(delay)
+    raise last_exc  # unreachable
+
+
+def ask_agent_stream(text: str, agent_id: str = "cosmo") -> str:
    """Отправляет запрос к OpenClaw gateway и озвучивает ответ."""
    def _maybe_speak(t: str):
        if t.strip():
            speak(t, agent_id)

    cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
-    gateway_url = cfg["gateway_url"]
-    session = cfg["session"]
-    agent = cfg["agent"]
-
    session_key = cfg.get("session_key", VOICE_SESSION_KEY)

+    payload = {
+        "stream": True,
+        "messages": [{"role": "user", "content": text}],
+        "max_tokens": VOICE_MAX_TOKENS,
+    }
+    headers = {
+        "x-ocplatform-model": cfg["voice_model"],
+        "x-openclaw-session-key": session_key,
+    }
+
    try:
-        resp = session.post(
-            f"{gateway_url}/v1/chat/completions",
-            headers={
-                "x-ocplatform-model": cfg["voice_model"],
-                "x-openclaw-session-key": session_key,
-            },
-            json={
-                "model": agent,
-                "stream": True,
-                "messages": [{"role": "user", "content": text}],
-                "max_tokens": 150,
-            },
-            stream=True,
-            timeout=60,
+        resp = _post_with_retry(
+            cfg["session"], f"{cfg['gateway_url']}/v1/chat/completions", headers, payload,
        )
-        resp.raise_for_status()
    except requests.ConnectionError:
-        log.exception("Gateway недоступен")
+        log.exception("Gateway недоступен после retry")
        msg = "Не могу связаться с сервером, попробуй ещё раз."
        print(f"⚠️  {msg}")
        play_error_sound()
        _maybe_speak(msg)
        return msg
    except requests.Timeout:
-        log.exception("Gateway таймаут")
+        log.exception("Gateway таймаут после retry")
        msg = "Сервер не ответил вовремя, попробуй ещё раз."
        print(f"⚠️  {msg}")
        play_error_sound()
        _maybe_speak(msg)
        return msg
-    except requests.HTTPError:
-        log.exception(f"Gateway HTTP ошибка {resp.status_code}")
+    except requests.HTTPError as e:
+        status = e.response.status_code if e.response is not None else "?"
+        body = e.response.text if e.response is not None else ""
+        log.exception(f"Gateway HTTP {status}")
        msg = "Ошибка сервера, попробуй ещё раз."
-        print(f"⚠️  Gateway {resp.status_code}: {resp.text}")
+        print(f"⚠️  Gateway {status}: {body[:200]}")
        play_error_sound()
        _maybe_speak(msg)
        return msg
@@ -98,25 +117,25 @@ def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
        for line in resp.iter_lines():
            if not line or line == b"data: [DONE]":
                continue
-            if line.startswith(b"data: "):
-                try:
-                    chunk = json.loads(line[6:])
-                    delta = chunk["choices"][0]["delta"].get("content", "")
-                    if not delta:
-                        continue
-
-                    full_text += delta
-                    buffer += delta
-
-                    if TTS_MODE == "stream":
-                        last_punct = find_sentence_end(buffer, min_len=120)
-                        if last_punct > -1:
-                            sentence = clean_for_speech(buffer[:last_punct + 1])
-                            _maybe_speak(sentence)
-                            buffer = buffer[last_punct + 1:].lstrip()
-
-                except (json.JSONDecodeError, KeyError, IndexError):
+            if not line.startswith(b"data: "):
+                continue
+            try:
+                chunk = json.loads(line[6:])
+                delta = chunk["choices"][0]["delta"].get("content", "")
+                if not delta:
                    continue
+
+                full_text += delta
+                buffer += delta
+
+                if TTS_MODE == "stream":
+                    last_punct = find_sentence_end(buffer, min_len=120)
+                    if last_punct > -1:
+                        sentence = clean_for_speech(strip_fillers(buffer[:last_punct + 1]))
+                        _maybe_speak(sentence)
+                        buffer = buffer[last_punct + 1:].lstrip()
+            except (json.JSONDecodeError, KeyError, IndexError):
+                continue
    except Exception as e:
        log.exception("Ошибка при чтении стрима")
        print(f"⚠️  Стрим прервался: {e}")
@@ -132,6 +151,6 @@ def ask_agent_stream(text: str, conv=None, agent_id: str = "cosmo") -> str:
        _maybe_speak(result)
    else:
        if buffer.strip():
-            _maybe_speak(clean_for_speech(buffer))
+            _maybe_speak(clean_for_speech(strip_fillers(buffer)))

    return result