Adds a parallel LLM backend that bypasses OpenClaw and talks to Anthropic Messages API directly. Selected via LLM_BACKEND=claude in .env; default remains openclaw so nothing breaks for existing setup. Why: OpenClaw gateway adds 500-1000ms overhead on every turn (auth, memory fetch, routing). Direct Haiku 4.5 + prompt caching = faster first token and -90% cost on cached chunks. - satellite/llm_claude.py — Anthropic SDK streaming client, prompt caching on system prompt and all-but-last-2 history messages, per agent+date JSON history in HISTORY_DIR, reset_history() for the 'сбрось' command, per-agent system prompts (Cosmo / Люся), fallback to error event if SDK/key missing. - satellite/llm.py — dispatches to ask_claude_stream when backend=claude, exports LLM_BACKEND so modes.py can route reset too. - satellite/modes.py — _handle_reset calls reset_history when backend is claude, keeps /new POST for openclaw. - requirements.txt — anthropic >= 0.50.0 - .env.example — LLM_BACKEND, ANTHROPIC_API_KEY, ANTHROPIC_MODEL, HISTORY_DIR, MAX_HISTORY, HTTPS_PROXY block for non-RU egress. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
170 lines
6.8 KiB
Python
170 lines
6.8 KiB
Python
import json
|
||
import os
|
||
import re
|
||
import time
|
||
import requests
|
||
|
||
from .config import AGENTS, VOICE_MAX_TOKENS, LLM_RETRIES, log
|
||
from .text import clean_for_speech, find_sentence_end
|
||
from .tts import speak, play_error_sound
|
||
from . import notifier
|
||
|
||
VOICE_SESSION_KEY = os.getenv("VOICE_SESSION_KEY", "agent:main:voice:home")
|
||
|
||
# Feature flag — выбор LLM backend. openclaw (дефолт) или claude (прямой Anthropic).
|
||
LLM_BACKEND = os.getenv("LLM_BACKEND", "openclaw").lower()
|
||
|
||
# "stream" — режем по предложениям (быстро, но рваная интонация)
|
||
# "full" — собираем весь ответ, потом TTS (естественно, но пауза перед началом)
|
||
TTS_MODE = os.getenv("TTS_MODE", "full")
|
||
|
||
RESET_PATTERNS = re.compile(
|
||
r"(начни|начать|создай|открой|давай).{0,10}(новую|новый|чистую|чистый).{0,10}(сессию|сессия|диалог|разговор|чат)"
|
||
r"|"
|
||
r"(сбрось|очисти|обнови).{0,10}(сессию|диалог|разговор|чат|историю|контекст)",
|
||
re.IGNORECASE,
|
||
)
|
||
|
||
# Фразы-заглушки которые агент генерирует ДО вызова инструмента
|
||
FILLER_PATTERNS = re.compile(
|
||
r'(?:(?:сейчас посмотрю|дай мне секунду|дай секунду|проверяю|загружаю|узнаю'
|
||
r'|смотрю|одну секунду|я сейчас посмотрю|я проверю|попробую другой источник'
|
||
r'|нужны конкретные числа|дай мне загрузить)[^.!?]*[.!?]?\s*)+',
|
||
re.IGNORECASE,
|
||
)
|
||
|
||
|
||
def strip_fillers(text: str) -> str:
|
||
return FILLER_PATTERNS.sub('', text).strip()
|
||
|
||
|
||
def is_reset_command(text: str) -> bool:
|
||
return bool(RESET_PATTERNS.search(text))
|
||
|
||
|
||
def _post_with_retry(session, url, headers, payload):
|
||
"""POST с экспоненциальным backoff. Retry на сетевые ошибки и 5xx; 4xx — сразу вверх."""
|
||
last_exc = None
|
||
for attempt in range(LLM_RETRIES):
|
||
try:
|
||
resp = session.post(url, headers=headers, json=payload, stream=True, timeout=60)
|
||
if resp.status_code >= 500:
|
||
raise requests.HTTPError(f"{resp.status_code} {resp.text[:200]}", response=resp)
|
||
resp.raise_for_status()
|
||
return resp
|
||
except (requests.ConnectionError, requests.Timeout, requests.HTTPError) as e:
|
||
last_exc = e
|
||
# 4xx (кроме 408/429) не ретраим
|
||
resp = getattr(e, "response", None)
|
||
if isinstance(e, requests.HTTPError) and resp is not None:
|
||
if resp.status_code < 500 and resp.status_code not in (408, 429):
|
||
raise
|
||
if attempt == LLM_RETRIES - 1:
|
||
raise
|
||
delay = 0.5 * (2 ** attempt)
|
||
log.warning(f"Gateway retry {attempt + 1}/{LLM_RETRIES} через {delay:.1f}s: {e}")
|
||
time.sleep(delay)
|
||
raise last_exc # unreachable
|
||
|
||
|
||
def ask_agent_stream(text: str, agent_id: str = "cosmo") -> str:
|
||
"""Отправляет запрос к выбранному LLM backend и озвучивает ответ."""
|
||
if LLM_BACKEND == "claude":
|
||
from .llm_claude import ask_claude_stream
|
||
return ask_claude_stream(text, agent_id)
|
||
|
||
# Иначе — путь через OpenClaw (старый behaviour)
|
||
def _maybe_speak(t: str):
|
||
# Если TTS на планшете — пропускаем локальный звук, планшет зачитает по response event.
|
||
if t.strip() and notifier.speak_locally():
|
||
speak(t, agent_id)
|
||
|
||
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
|
||
session_key = cfg.get("session_key", VOICE_SESSION_KEY)
|
||
|
||
payload = {
|
||
"stream": True,
|
||
"messages": [{"role": "user", "content": text}],
|
||
"max_tokens": VOICE_MAX_TOKENS,
|
||
}
|
||
headers = {
|
||
"x-ocplatform-model": cfg["voice_model"],
|
||
"x-openclaw-session-key": session_key,
|
||
}
|
||
|
||
try:
|
||
resp = _post_with_retry(
|
||
cfg["session"], f"{cfg['gateway_url']}/v1/chat/completions", headers, payload,
|
||
)
|
||
except requests.ConnectionError:
|
||
log.exception("Gateway недоступен после retry")
|
||
msg = "Не могу связаться с сервером, попробуй ещё раз."
|
||
print(f"⚠️ {msg}")
|
||
play_error_sound()
|
||
notifier.error(msg, agent_id)
|
||
_maybe_speak(msg)
|
||
return msg
|
||
except requests.Timeout:
|
||
log.exception("Gateway таймаут после retry")
|
||
msg = "Сервер не ответил вовремя, попробуй ещё раз."
|
||
print(f"⚠️ {msg}")
|
||
play_error_sound()
|
||
notifier.error(msg, agent_id)
|
||
_maybe_speak(msg)
|
||
return msg
|
||
except requests.HTTPError as e:
|
||
status = e.response.status_code if e.response is not None else "?"
|
||
body = e.response.text if e.response is not None else ""
|
||
log.exception(f"Gateway HTTP {status}")
|
||
msg = "Ошибка сервера, попробуй ещё раз."
|
||
print(f"⚠️ Gateway {status}: {body[:200]}")
|
||
play_error_sound()
|
||
notifier.error(msg, agent_id)
|
||
_maybe_speak(msg)
|
||
return msg
|
||
|
||
full_text = ""
|
||
buffer = ""
|
||
|
||
try:
|
||
for line in resp.iter_lines():
|
||
if not line or line == b"data: [DONE]":
|
||
continue
|
||
if not line.startswith(b"data: "):
|
||
continue
|
||
try:
|
||
chunk = json.loads(line[6:])
|
||
delta = chunk["choices"][0]["delta"].get("content", "")
|
||
if not delta:
|
||
continue
|
||
|
||
full_text += delta
|
||
buffer += delta
|
||
|
||
if TTS_MODE == "stream":
|
||
last_punct = find_sentence_end(buffer, min_len=120)
|
||
if last_punct > -1:
|
||
sentence = clean_for_speech(strip_fillers(buffer[:last_punct + 1]))
|
||
_maybe_speak(sentence)
|
||
buffer = buffer[last_punct + 1:].lstrip()
|
||
except (json.JSONDecodeError, KeyError, IndexError):
|
||
continue
|
||
except Exception as e:
|
||
log.exception("Ошибка при чтении стрима")
|
||
print(f"⚠️ Стрим прервался: {e}")
|
||
|
||
if not full_text:
|
||
msg = "Не получил ответ, попробуй ещё раз."
|
||
_maybe_speak(msg)
|
||
return msg
|
||
|
||
result = clean_for_speech(strip_fillers(full_text))
|
||
|
||
if TTS_MODE == "full":
|
||
_maybe_speak(result)
|
||
else:
|
||
if buffer.strip():
|
||
_maybe_speak(clean_for_speech(strip_fillers(buffer)))
|
||
|
||
return result
|