Files
home-voice-assistant/satellite/llm.py
Cosmo e4e7529063 feat(notifier): push state events to Smart Home Tablet overlay
Adds a thin HTTP bridge so the tablet at https://tablet.digital-home.site
shows a Siri-style overlay reflecting the current assistant state
(wake / command / response / idle / error). Non-fatal: if the tablet
is offline or TABLET_URL/VOICE_API_KEY are unset, events are silently
skipped and the assistant keeps working.

- satellite/notifier.py — POST /api/voice/event with bearer token,
  reused requests.Session for keep-alive, 1.5s timeout
- satellite/modes.py — emits wake on activation, command after STT,
  response after LLM, idle on timeout
- satellite/llm.py — emits error on gateway connection/timeout/HTTP
- .env.example documents TABLET_URL and VOICE_API_KEY

Tablet side (separate repo smart-home-tablet, commit 51c3d60) exposes
POST /api/voice/event + GET /api/voice/stream (SSE) and renders a
full-screen overlay in components/VoiceOverlay.tsx.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 12:43:01 +00:00

161 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import os
import re
import time
import requests
from .config import AGENTS, VOICE_MAX_TOKENS, LLM_RETRIES, log
from .text import clean_for_speech, find_sentence_end
from .tts import speak, play_error_sound
from . import notifier
VOICE_SESSION_KEY = os.getenv("VOICE_SESSION_KEY", "agent:main:voice:home")
# "stream" — режем по предложениям (быстро, но рваная интонация)
# "full" — собираем весь ответ, потом TTS (естественно, но пауза перед началом)
TTS_MODE = os.getenv("TTS_MODE", "full")
RESET_PATTERNS = re.compile(
r"(начни|начать|создай|открой|давай).{0,10}(новую|новый|чистую|чистый).{0,10}(сессию|сессия|диалог|разговор|чат)"
r"|"
r"(сбрось|очисти|обнови).{0,10}(сессию|диалог|разговор|чат|историю|контекст)",
re.IGNORECASE,
)
# Фразы-заглушки которые агент генерирует ДО вызова инструмента
FILLER_PATTERNS = re.compile(
r'(?:(?:сейчас посмотрю|дай мне секунду|дай секунду|проверяю|загружаю|узнаю'
r'|смотрю|одну секунду|я сейчас посмотрю|я проверю|попробую другой источник'
r'|нужны конкретные числа|дай мне загрузить)[^.!?]*[.!?]?\s*)+',
re.IGNORECASE,
)
def strip_fillers(text: str) -> str:
return FILLER_PATTERNS.sub('', text).strip()
def is_reset_command(text: str) -> bool:
return bool(RESET_PATTERNS.search(text))
def _post_with_retry(session, url, headers, payload):
"""POST с экспоненциальным backoff. Retry на сетевые ошибки и 5xx; 4xx — сразу вверх."""
last_exc = None
for attempt in range(LLM_RETRIES):
try:
resp = session.post(url, headers=headers, json=payload, stream=True, timeout=60)
if resp.status_code >= 500:
raise requests.HTTPError(f"{resp.status_code} {resp.text[:200]}", response=resp)
resp.raise_for_status()
return resp
except (requests.ConnectionError, requests.Timeout, requests.HTTPError) as e:
last_exc = e
# 4xx (кроме 408/429) не ретраим
resp = getattr(e, "response", None)
if isinstance(e, requests.HTTPError) and resp is not None:
if resp.status_code < 500 and resp.status_code not in (408, 429):
raise
if attempt == LLM_RETRIES - 1:
raise
delay = 0.5 * (2 ** attempt)
log.warning(f"Gateway retry {attempt + 1}/{LLM_RETRIES} через {delay:.1f}s: {e}")
time.sleep(delay)
raise last_exc # unreachable
def ask_agent_stream(text: str, agent_id: str = "cosmo") -> str:
"""Отправляет запрос к OpenClaw gateway и озвучивает ответ."""
def _maybe_speak(t: str):
if t.strip():
speak(t, agent_id)
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
session_key = cfg.get("session_key", VOICE_SESSION_KEY)
payload = {
"stream": True,
"messages": [{"role": "user", "content": text}],
"max_tokens": VOICE_MAX_TOKENS,
}
headers = {
"x-ocplatform-model": cfg["voice_model"],
"x-openclaw-session-key": session_key,
}
try:
resp = _post_with_retry(
cfg["session"], f"{cfg['gateway_url']}/v1/chat/completions", headers, payload,
)
except requests.ConnectionError:
log.exception("Gateway недоступен после retry")
msg = "Не могу связаться с сервером, попробуй ещё раз."
print(f"⚠️ {msg}")
play_error_sound()
notifier.error(msg, agent_id)
_maybe_speak(msg)
return msg
except requests.Timeout:
log.exception("Gateway таймаут после retry")
msg = "Сервер не ответил вовремя, попробуй ещё раз."
print(f"⚠️ {msg}")
play_error_sound()
notifier.error(msg, agent_id)
_maybe_speak(msg)
return msg
except requests.HTTPError as e:
status = e.response.status_code if e.response is not None else "?"
body = e.response.text if e.response is not None else ""
log.exception(f"Gateway HTTP {status}")
msg = "Ошибка сервера, попробуй ещё раз."
print(f"⚠️ Gateway {status}: {body[:200]}")
play_error_sound()
notifier.error(msg, agent_id)
_maybe_speak(msg)
return msg
full_text = ""
buffer = ""
try:
for line in resp.iter_lines():
if not line or line == b"data: [DONE]":
continue
if not line.startswith(b"data: "):
continue
try:
chunk = json.loads(line[6:])
delta = chunk["choices"][0]["delta"].get("content", "")
if not delta:
continue
full_text += delta
buffer += delta
if TTS_MODE == "stream":
last_punct = find_sentence_end(buffer, min_len=120)
if last_punct > -1:
sentence = clean_for_speech(strip_fillers(buffer[:last_punct + 1]))
_maybe_speak(sentence)
buffer = buffer[last_punct + 1:].lstrip()
except (json.JSONDecodeError, KeyError, IndexError):
continue
except Exception as e:
log.exception("Ошибка при чтении стрима")
print(f"⚠️ Стрим прервался: {e}")
if not full_text:
msg = "Не получил ответ, попробуй ещё раз."
_maybe_speak(msg)
return msg
result = clean_for_speech(strip_fillers(full_text))
if TTS_MODE == "full":
_maybe_speak(result)
else:
if buffer.strip():
_maybe_speak(clean_for_speech(strip_fillers(buffer)))
return result