feat(llm): direct Claude Haiku 4.5 backend with prompt caching

Adds a parallel LLM backend that bypasses OpenClaw and talks to Anthropic Messages API directly. Selected via LLM_BACKEND=claude in .env; default remains openclaw so nothing breaks for existing setup. Why: OpenClaw gateway adds 500-1000ms overhead on every turn (auth, memory fetch, routing). Direct Haiku 4.5 + prompt caching = faster first token and -90% cost on cached chunks. - satellite/llm_claude.py — Anthropic SDK streaming client, prompt caching on system prompt and all-but-last-2 history messages, per agent+date JSON history in HISTORY_DIR, reset_history() for the 'сбрось' command, per-agent system prompts (Cosmo / Люся), fallback to error event if SDK/key missing. - satellite/llm.py — dispatches to ask_claude_stream when backend=claude, exports LLM_BACKEND so modes.py can route reset too. - satellite/modes.py — _handle_reset calls reset_history when backend is claude, keeps /new POST for openclaw. - requirements.txt — anthropic >= 0.50.0 - .env.example — LLM_BACKEND, ANTHROPIC_API_KEY, ANTHROPIC_MODEL, HISTORY_DIR, MAX_HISTORY, HTTPS_PROXY block for non-RU egress. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 13:12:39 +00:00
parent 584e21923c
commit 05de9c284b
5 changed files with 300 additions and 20 deletions
--- a/satellite/modes.py
+++ b/satellite/modes.py
@@ -3,34 +3,42 @@ import os
 from .config import GATEWAY_URL, AGENTS, FOLLOWUP_TIMEOUT, MAX_DURATION, log
 from .audio import record
 from .tts import speak, stop_speaking
-from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY
+from .llm import ask_agent_stream, is_reset_command, VOICE_SESSION_KEY, LLM_BACKEND
 from . import notifier

 WAKE_THRESHOLD = float(os.getenv("WAKE_THRESHOLD", "0.5"))


 def _handle_reset(text: str, agent_id: str) -> bool:
-    """Команда сброса — отправляет slash-команду /new в OpenClaw (без озвучки ответа)."""
+    """Команда сброса. В зависимости от backend:
+    - claude:    удаляет локальный файл истории
+    - openclaw:  шлёт /new в gateway
+    """
    if not is_reset_command(text):
        return False

-    cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
-    print("🔄 Отправляю /new в OpenClaw")
-    try:
-        cfg["session"].post(
-            f"{cfg['gateway_url']}/v1/chat/completions",
-            headers={
-                "x-ocplatform-model": cfg["voice_model"],
-                "x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
-            },
-            json={
-                "stream": False,
-                "messages": [{"role": "user", "content": "/new"}],
-            },
-            timeout=30,
-        )
-    except Exception:
-        log.exception("Не удалось отправить /new")
+    if LLM_BACKEND == "claude":
+        from .llm_claude import reset_history
+        print("🔄 Сбрасываю локальную историю (Claude)")
+        reset_history(agent_id)
+    else:
+        cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
+        print("🔄 Отправляю /new в OpenClaw")
+        try:
+            cfg["session"].post(
+                f"{cfg['gateway_url']}/v1/chat/completions",
+                headers={
+                    "x-ocplatform-model": cfg["voice_model"],
+                    "x-openclaw-session-key": cfg.get("session_key", VOICE_SESSION_KEY),
+                },
+                json={
+                    "stream": False,
+                    "messages": [{"role": "user", "content": "/new"}],
+                },
+                timeout=30,
+            )
+        except Exception:
+            log.exception("Не удалось отправить /new")

    msg = "Начинаю новую сессию."
    print(f"🔄 {msg}")