feat(llm): direct Claude Haiku 4.5 backend with prompt caching
Adds a parallel LLM backend that bypasses OpenClaw and talks to Anthropic Messages API directly. Selected via LLM_BACKEND=claude in .env; default remains openclaw so nothing breaks for existing setup. Why: OpenClaw gateway adds 500-1000ms overhead on every turn (auth, memory fetch, routing). Direct Haiku 4.5 + prompt caching = faster first token and -90% cost on cached chunks. - satellite/llm_claude.py — Anthropic SDK streaming client, prompt caching on system prompt and all-but-last-2 history messages, per agent+date JSON history in HISTORY_DIR, reset_history() for the 'сбрось' command, per-agent system prompts (Cosmo / Люся), fallback to error event if SDK/key missing. - satellite/llm.py — dispatches to ask_claude_stream when backend=claude, exports LLM_BACKEND so modes.py can route reset too. - satellite/modes.py — _handle_reset calls reset_history when backend is claude, keeps /new POST for openclaw. - requirements.txt — anthropic >= 0.50.0 - .env.example — LLM_BACKEND, ANTHROPIC_API_KEY, ANTHROPIC_MODEL, HISTORY_DIR, MAX_HISTORY, HTTPS_PROXY block for non-RU egress. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,34 +3,42 @@ import os
|
||||
from .config import GATEWAY_URL, AGENTS, FOLLOWUP_TIMEOUT, MAX_DURATION, log
|
||||
from .audio import record
|
||||
from .tts import speak, stop_speaking
|
||||
from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY
|
||||
from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY, LLM_BACKEND
|
||||
from . import notifier
|
||||
|
||||
WAKE_THRESHOLD = float(os.getenv("WAKE_THRESHOLD", "0.5"))
|
||||
|
||||
|
||||
def _handle_reset(text: str, agent_id: str) -> bool:
|
||||
"""Команда сброса — отправляет slash-команду /new в OpenClaw (без озвучки ответа)."""
|
||||
"""Команда сброса. В зависимости от backend:
|
||||
- claude: удаляет локальный файл истории
|
||||
- openclaw: шлёт /new в gateway
|
||||
"""
|
||||
if not is_reset_command(text):
|
||||
return False
|
||||
|
||||
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
|
||||
print("🔄 Отправляю /new в OpenClaw")
|
||||
try:
|
||||
cfg["session"].post(
|
||||
f"{cfg['gateway_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"x-ocplatform-model": cfg["voice_model"],
|
||||
"x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
|
||||
},
|
||||
json={
|
||||
"stream": False,
|
||||
"messages": [{"role": "user", "content": "/new"}],
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
except Exception:
|
||||
log.exception("Не удалось отправить /new")
|
||||
if LLM_BACKEND == "claude":
|
||||
from .llm_claude import reset_history
|
||||
print("🔄 Сбрасываю локальную историю (Claude)")
|
||||
reset_history(agent_id)
|
||||
else:
|
||||
cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
|
||||
print("🔄 Отправляю /new в OpenClaw")
|
||||
try:
|
||||
cfg["session"].post(
|
||||
f"{cfg['gateway_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"x-ocplatform-model": cfg["voice_model"],
|
||||
"x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
|
||||
},
|
||||
json={
|
||||
"stream": False,
|
||||
"messages": [{"role": "user", "content": "/new"}],
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
except Exception:
|
||||
log.exception("Не удалось отправить /new")
|
||||
|
||||
msg = "Начинаю новую сессию."
|
||||
print(f"🔄 {msg}")
|
||||
|
||||
Reference in New Issue
Block a user