home-voice-assistant/satellite/llm.py

import json
import os
import re
import time
import requests

from .config import AGENTS, VOICE_MAX_TOKENS, LLM_RETRIES, log
from .text import clean_for_speech, find_sentence_end
from .tts import speak, play_error_sound
from . import notifier

VOICE_SESSION_KEY = os.getenv("VOICE_SESSION_KEY", "agent:main:voice:home")

# Feature flag — выбор LLM backend. openclaw (дефолт) или claude (прямой Anthropic).
LLM_BACKEND = os.getenv("LLM_BACKEND", "openclaw").lower()

# "stream" — режем по предложениям (быстро, но рваная интонация)
# "full"   — собираем весь ответ, потом TTS (естественно, но пауза перед началом)
TTS_MODE = os.getenv("TTS_MODE", "full")

RESET_PATTERNS = re.compile(
    r"(начни|начать|создай|открой|давай).{0,10}(новую|новый|чистую|чистый).{0,10}(сессию|сессия|диалог|разговор|чат)"
    r"|"
    r"(сбрось|очисти|обнови).{0,10}(сессию|диалог|разговор|чат|историю|контекст)",
    re.IGNORECASE,
)

# Фразы-заглушки которые агент генерирует ДО вызова инструмента
FILLER_PATTERNS = re.compile(
    r'(?:(?:сейчас посмотрю|дай мне секунду|дай секунду|проверяю|загружаю|узнаю'
    r'|смотрю|одну секунду|я сейчас посмотрю|я проверю|попробую другой источник'
    r'|нужны конкретные числа|дай мне загрузить)[^.!?]*[.!?]?\s*)+',
    re.IGNORECASE,
)


def strip_fillers(text: str) -> str:
    return FILLER_PATTERNS.sub('', text).strip()


def is_reset_command(text: str) -> bool:
    return bool(RESET_PATTERNS.search(text))


def _post_with_retry(session, url, headers, payload):
    """POST с экспоненциальным backoff. Retry на сетевые ошибки и 5xx; 4xx — сразу вверх."""
    last_exc = None
    for attempt in range(LLM_RETRIES):
        try:
            resp = session.post(url, headers=headers, json=payload, stream=True, timeout=60)
            if resp.status_code >= 500:
                raise requests.HTTPError(f"{resp.status_code} {resp.text[:200]}", response=resp)
            resp.raise_for_status()
            return resp
        except (requests.ConnectionError, requests.Timeout, requests.HTTPError) as e:
            last_exc = e
            # 4xx (кроме 408/429) не ретраим
            resp = getattr(e, "response", None)
            if isinstance(e, requests.HTTPError) and resp is not None:
                if resp.status_code < 500 and resp.status_code not in (408, 429):
                    raise
            if attempt == LLM_RETRIES - 1:
                raise
            delay = 0.5 * (2 ** attempt)
            log.warning(f"Gateway retry {attempt + 1}/{LLM_RETRIES} через {delay:.1f}s: {e}")
            time.sleep(delay)
    raise last_exc  # unreachable


def ask_agent_stream(text: str, agent_id: str = "cosmo") -> str:
    """Отправляет запрос к выбранному LLM backend и озвучивает ответ."""
    if LLM_BACKEND == "claude":
        from .llm_claude import ask_claude_stream
        return ask_claude_stream(text, agent_id)

    # Иначе — путь через OpenClaw (старый behaviour)
    def _maybe_speak(t: str):
        # Если TTS на планшете — пропускаем локальный звук, планшет зачитает по response event.
        if t.strip() and notifier.speak_locally():
            speak(t, agent_id)

    cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
    session_key = cfg.get("session_key", VOICE_SESSION_KEY)

    payload = {
        "stream": True,
        "messages": [{"role": "user", "content": text}],
        "max_tokens": VOICE_MAX_TOKENS,
    }
    headers = {
        "x-ocplatform-model": cfg["voice_model"],
        "x-openclaw-session-key": session_key,
    }

    try:
        resp = _post_with_retry(
            cfg["session"], f"{cfg['gateway_url']}/v1/chat/completions", headers, payload,
        )
    except requests.ConnectionError:
        log.exception("Gateway недоступен после retry")
        msg = "Не могу связаться с сервером, попробуй ещё раз."
        print(f"⚠️  {msg}")
        play_error_sound()
        notifier.error(msg, agent_id)
        _maybe_speak(msg)
        return msg
    except requests.Timeout:
        log.exception("Gateway таймаут после retry")
        msg = "Сервер не ответил вовремя, попробуй ещё раз."
        print(f"⚠️  {msg}")
        play_error_sound()
        notifier.error(msg, agent_id)
        _maybe_speak(msg)
        return msg
    except requests.HTTPError as e:
        status = e.response.status_code if e.response is not None else "?"
        body = e.response.text if e.response is not None else ""
        log.exception(f"Gateway HTTP {status}")
        msg = "Ошибка сервера, попробуй ещё раз."
        print(f"⚠️  Gateway {status}: {body[:200]}")
        play_error_sound()
        notifier.error(msg, agent_id)
        _maybe_speak(msg)
        return msg

    full_text = ""
    buffer = ""

    try:
        for line in resp.iter_lines():
            if not line or line == b"data: [DONE]":
                continue
            if not line.startswith(b"data: "):
                continue
            try:
                chunk = json.loads(line[6:])
                delta = chunk["choices"][0]["delta"].get("content", "")
                if not delta:
                    continue

                full_text += delta
                buffer += delta

                if TTS_MODE == "stream":
                    last_punct = find_sentence_end(buffer, min_len=120)
                    if last_punct > -1:
                        sentence = clean_for_speech(strip_fillers(buffer[:last_punct + 1]))
                        _maybe_speak(sentence)
                        buffer = buffer[last_punct + 1:].lstrip()
            except (json.JSONDecodeError, KeyError, IndexError):
                continue
    except Exception as e:
        log.exception("Ошибка при чтении стрима")
        print(f"⚠️  Стрим прервался: {e}")

    if not full_text:
        msg = "Не получил ответ, попробуй ещё раз."
        _maybe_speak(msg)
        return msg

    result = clean_for_speech(strip_fillers(full_text))

    if TTS_MODE == "full":
        _maybe_speak(result)
    else:
        if buffer.strip():
            _maybe_speak(clean_for_speech(strip_fillers(buffer)))

    return result