Initial commit: Cosmo Voice Satellite

Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 13:34:08 +03:00
commit 7ca8268b78
16 changed files with 1143 additions and 0 deletions
--- a/satellite/init.py
+++ b/satellite/init.py
--- a/satellite/main.py
+++ b/satellite/main.py
@@ -0,0 +1,13 @@
+"""
+Cosmo Satellite — голосовой клиент для OpenClaw Gateway
+Запуск: python -m satellite [--wake]
+"""
+import sys
+
+from .modes import run_with_enter, run_with_porcupine
+
+if __name__ == "__main__":
+    if "--wake" in sys.argv:
+        run_with_porcupine()
+    else:
+        run_with_enter()
--- a/satellite/audio.py
+++ b/satellite/audio.py
@@ -0,0 +1,104 @@
+import pyaudio
+import numpy as np
+
+from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
+from .stt import transcribe
+
+
+def record() -> str:
+    """Запись до тишины (VAD) + STT"""
+    try:
+        audio = pyaudio.PyAudio()
+        stream = audio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=16000,
+            input=True,
+            frames_per_buffer=1024,
+        )
+    except Exception as e:
+        log.exception("Не удалось открыть микрофон")
+        print(f"⚠️  Ошибка микрофона: {e}")
+        return ""
+
+    print("🎙️  Говори...")
+    frames = []
+    silent_chunks = 0
+    speaking_started = False
+    max_chunks = int(16000 / 1024 * MAX_DURATION)
+    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
+
+    try:
+        for _ in range(max_chunks):
+            data = stream.read(1024, exception_on_overflow=False)
+            frames.append(data)
+
+            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
+
+            if amplitude > SILENCE_THRESHOLD:
+                speaking_started = True
+                silent_chunks = 0
+            elif speaking_started:
+                silent_chunks += 1
+                if silent_chunks >= silence_chunks_needed:
+                    print("🔇 Конец речи")
+                    break
+    except Exception as e:
+        log.exception("Ошибка при записи аудио")
+        print(f"⚠️  Ошибка записи: {e}")
+    finally:
+        stream.stop_stream()
+        audio.terminate()
+
+    if not speaking_started:
+        return ""
+
+    return transcribe(frames)
+
+
+def record_with_timeout(timeout: float = 8.0) -> str:
+    """Слушает timeout секунд, возвращает пусто если речи не было"""
+    try:
+        audio = pyaudio.PyAudio()
+        stream = audio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=16000,
+            input=True,
+            frames_per_buffer=1024,
+        )
+    except Exception as e:
+        log.exception("Не удалось открыть микрофон (followup)")
+        print(f"⚠️  Ошибка микрофона: {e}")
+        return ""
+
+    frames = []
+    silent_chunks = 0
+    speaking_started = False
+    max_chunks = int(16000 / 1024 * timeout)
+    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
+
+    try:
+        for _ in range(max_chunks):
+            data = stream.read(1024, exception_on_overflow=False)
+            frames.append(data)
+            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
+
+            if amplitude > SILENCE_THRESHOLD:
+                speaking_started = True
+                silent_chunks = 0
+            elif speaking_started:
+                silent_chunks += 1
+                if silent_chunks >= silence_chunks_needed:
+                    break
+    except Exception as e:
+        log.exception("Ошибка при записи аудио (followup)")
+        print(f"⚠️  Ошибка записи: {e}")
+    finally:
+        stream.stop_stream()
+        audio.terminate()
+
+    if not speaking_started:
+        return ""
+
+    return transcribe(frames)
--- a/satellite/config.py
+++ b/satellite/config.py
@@ -0,0 +1,85 @@
+import os
+import sys
+import logging
+import requests as _requests
+from dotenv import load_dotenv
+from groq import Groq
+
+load_dotenv()
+
+# Логгер — ошибки в файл + короткое сообщение в консоль
+LOG_FILE = os.getenv("LOG_FILE", "errors.log")
+
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[
+        logging.FileHandler(LOG_FILE, encoding="utf-8"),
+    ],
+)
+log = logging.getLogger("cosmo")
+
+# OpenClaw Gateway — Cosmo (по умолчанию)
+GATEWAY_URL   = os.getenv("GATEWAY_URL", "http://192.168.31.103:18789")
+GATEWAY_TOKEN = os.getenv("GATEWAY_TOKEN")
+AGENT         = os.getenv("AGENT", "openclaw/main")
+VOICE_MODEL   = os.getenv("VOICE_MODEL", "openai/gpt-4o-mini")
+
+# OpenClaw Gateway — Люся
+LUSYA_GATEWAY_URL   = os.getenv("LUSYA_GATEWAY_URL", "http://192.168.31.103:18790")
+LUSYA_GATEWAY_TOKEN = os.getenv("LUSYA_GATEWAY_TOKEN", GATEWAY_TOKEN)
+LUSYA_AGENT         = os.getenv("LUSYA_AGENT", "openclaw/wife")
+LUSYA_VOICE_MODEL   = os.getenv("LUSYA_VOICE_MODEL", VOICE_MODEL)
+
+# Keep-alive HTTP сессии — переиспользуют TCP/TLS соединения
+def _make_session(token: str) -> _requests.Session:
+    s = _requests.Session()
+    s.headers.update({
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    })
+    return s
+
+
+# Конфиги агентов по имени
+AGENTS = {
+    "cosmo": {
+        "name": "Cosmo",
+        "gateway_url": GATEWAY_URL,
+        "token": GATEWAY_TOKEN,
+        "agent": AGENT,
+        "voice_model": VOICE_MODEL,
+        "tts_voice": os.getenv("COSMO_TTS_VOICE", ""),
+        "session": _make_session(GATEWAY_TOKEN),
+    },
+    "lusya": {
+        "name": "Люся",
+        "gateway_url": LUSYA_GATEWAY_URL,
+        "token": LUSYA_GATEWAY_TOKEN,
+        "agent": LUSYA_AGENT,
+        "voice_model": LUSYA_VOICE_MODEL,
+        "tts_voice": os.getenv("LUSYA_TTS_VOICE", ""),
+        "session": _make_session(LUSYA_GATEWAY_TOKEN),
+    },
+}
+
+# STT
+STT_PROVIDER  = os.getenv("STT_PROVIDER", "groq")
+WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
+WHISPER_LANG  = os.getenv("WHISPER_LANGUAGE", "ru")
+
+# Audio (на Pi: PulseAudio BT sink)
+AUDIO_SINK = os.getenv("AUDIO_SINK", "")
+
+# VAD
+SILENCE_THRESHOLD = int(os.getenv("SILENCE_THRESHOLD", "500"))
+SILENCE_DURATION  = float(os.getenv("SILENCE_DURATION", "1.5"))
+MAX_DURATION      = int(os.getenv("MAX_DURATION", "15"))
+FOLLOWUP_TIMEOUT  = float(os.getenv("FOLLOWUP_TIMEOUT", "8"))
+
+# Groq client
+groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+
+if not GATEWAY_TOKEN:
+    print("❌ GATEWAY_TOKEN не задан в .env")
+    sys.exit(1)
--- a/satellite/llm.py
+++ b/satellite/llm.py
@@ -0,0 +1,141 @@
+import json
+import re
+import requests
+from datetime import date
+
+from .config import GATEWAY_URL, VOICE_MODEL, AGENT, AGENTS, log
+from .text import clean_for_speech, find_sentence_end
+from .tts import speak
+
+SYSTEM_PROMPT = "Отвечай кратко, 1-2 предложения, без markdown, без эмодзи."
+MAX_HISTORY = int(__import__("os").getenv("MAX_HISTORY", "20"))
+
+RESET_PATTERNS = re.compile(
+    r"(начни|начать|создай|открой|давай).{0,10}(новую|новый|чистую|чистый).{0,10}(сессию|сессия|диалог|разговор|чат)"
+    r"|"
+    r"(сбрось|очисти|обнови).{0,10}(сессию|диалог|разговор|чат|историю|контекст)",
+    re.IGNORECASE,
+)
+
+
+class Conversation:
+    """Хранит историю сообщений — одна сессия на день"""
+
+    def __init__(self, agent_id: str = "cosmo"):
+        self.agent_id = agent_id
+        self.created_date = date.today()
+        self.messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+
+    def is_expired(self) -> bool:
+        return date.today() != self.created_date
+
+    def reset(self):
+        self.created_date = date.today()
+        self.messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+
+    def add_user(self, text: str):
+        self.messages.append({"role": "user", "content": text})
+        self._trim()
+
+    def add_assistant(self, text: str):
+        self.messages.append({"role": "assistant", "content": text})
+        self._trim()
+
+    def _trim(self):
+        if len(self.messages) > MAX_HISTORY + 1:
+            self.messages = [self.messages[0]] + self.messages[-(MAX_HISTORY):]
+
+
+def is_reset_command(text: str) -> bool:
+    return bool(RESET_PATTERNS.search(text))
+
+
+def ask_agent_stream(text: str, conv: "Conversation | None" = None, agent_id: str = "cosmo") -> str:
+    if conv is None:
+        conv = Conversation(agent_id)
+
+    conv.add_user(text)
+
+    cfg = AGENTS.get(agent_id, AGENTS["cosmo"])
+    gateway_url = cfg["gateway_url"]
+    session = cfg["session"]
+    agent = cfg["agent"]
+
+    try:
+        resp = session.post(
+            f"{gateway_url}/v1/chat/completions",
+            headers={"x-openclaw-model": cfg["voice_model"]},
+            json={
+                "model": agent,
+                "stream": True,
+                "messages": conv.messages,
+                "max_tokens": 150,
+            },
+            stream=True,
+            timeout=60,
+        )
+        resp.raise_for_status()
+    except requests.ConnectionError:
+        log.exception("Gateway недоступен")
+        msg = "Не могу связаться с сервером, попробуй ещё раз."
+        print(f"⚠️  {msg}")
+        speak(msg, agent_id)
+        return msg
+    except requests.Timeout:
+        log.exception("Gateway таймаут")
+        msg = "Сервер не ответил вовремя, попробуй ещё раз."
+        print(f"⚠️  {msg}")
+        speak(msg, agent_id)
+        return msg
+    except requests.HTTPError:
+        log.exception(f"Gateway HTTP ошибка {resp.status_code}")
+        msg = "Ошибка сервера, попробуй ещё раз."
+        print(f"⚠️  Gateway {resp.status_code}: {resp.text}")
+        speak(msg, agent_id)
+        return msg
+
+    full_text = ""
+    buffer = ""
+
+    try:
+        for line in resp.iter_lines():
+            if not line or line == b"data: [DONE]":
+                continue
+            if line.startswith(b"data: "):
+                try:
+                    chunk = json.loads(line[6:])
+                    delta = chunk["choices"][0]["delta"].get("content", "")
+                    if not delta:
+                        continue
+
+                    full_text += delta
+                    buffer += delta
+
+                    last_punct = find_sentence_end(buffer, min_len=60)
+                    if last_punct > -1:
+                        sentence = clean_for_speech(buffer[:last_punct + 1])
+                        if sentence.strip():
+                            print(f"🔊 Говорю: {sentence}")
+                            speak(sentence, agent_id)
+                        buffer = buffer[last_punct + 1:].lstrip()
+
+                except (json.JSONDecodeError, KeyError, IndexError):
+                    continue
+    except Exception as e:
+        log.exception("Ошибка при чтении стрима")
+        print(f"⚠️  Стрим прервался: {e}")
+
+    # Остаток
+    if buffer.strip():
+        sentence = clean_for_speech(buffer)
+        if sentence:
+            speak(sentence, agent_id)
+
+    if not full_text:
+        msg = "Не получил ответ, попробуй ещё раз."
+        speak(msg, agent_id)
+        return msg
+
+    result = clean_for_speech(full_text)
+    conv.add_assistant(full_text)
+    return result
--- a/satellite/modes.py
+++ b/satellite/modes.py
@@ -0,0 +1,170 @@
+import os
+import sys
+
+from .config import GATEWAY_URL, AGENT, FOLLOWUP_TIMEOUT, log
+from .audio import record, record_with_timeout
+from .tts import play_activation_sound, speak, stop_speaking
+from .llm import ask_agent_stream, Conversation, is_reset_command
+
+# Персистентные сессии — одна на день для каждого агента
+_sessions: dict[str, Conversation] = {}
+
+
+def _get_session(agent_id: str) -> Conversation:
+    """Возвращает текущую сессию, создаёт новую если день сменился"""
+    conv = _sessions.get(agent_id)
+    if conv is None or conv.is_expired():
+        conv = Conversation(agent_id=agent_id)
+        _sessions[agent_id] = conv
+        print(f"🆕 Новая сессия для {agent_id}")
+    return conv
+
+
+def _handle_reset(text: str, agent_id: str) -> bool:
+    """Проверяет команду сброса. Возвращает True если сброс произошёл."""
+    if is_reset_command(text):
+        _sessions[agent_id] = Conversation(agent_id=agent_id)
+        msg = "Начинаю новую сессию."
+        print(f"🔄 {msg}")
+        speak(msg, agent_id)
+        return True
+    return False
+
+
+def run_with_enter():
+    print("\n🦞 Cosmo Satellite запущен (режим: Enter для активации)")
+    print(f"   Gateway : {GATEWAY_URL}")
+    print(f"   Агент   : {AGENT}")
+    print("\nНажми Enter → говори → получи ответ. Ctrl+C для выхода.\n")
+
+    while True:
+        try:
+            input("⏎  Нажми Enter и говори...")
+            stop_speaking()  # barge-in: прервать если ещё говорит
+            play_activation_sound()
+
+            conv = _get_session("cosmo")
+
+            while True:
+                text = record()
+                if not text:
+                    print("⚠️  Ничего не распознано")
+                    break
+
+                print(f"📝 Ты: {text}")
+
+                if _handle_reset(text, "cosmo"):
+                    conv = _get_session("cosmo")
+                    break
+
+                response = ask_agent_stream(text, conv=conv)
+                print(f"🤖 Cosmo: {response}\n")
+
+                print(f"👂 Слушаю продолжение ({int(FOLLOWUP_TIMEOUT)} сек)...")
+                followup = record_with_timeout(timeout=FOLLOWUP_TIMEOUT)
+
+                if not followup:
+                    print("😴 Нет продолжения, жду активации...\n")
+                    break
+
+                text = followup
+
+        except KeyboardInterrupt:
+            print("\n👋 Выход")
+            break
+        except Exception as e:
+            log.exception("Непредвиденная ошибка в цикле Enter")
+            print(f"⚠️  Ошибка: {e} — продолжаю работу...\n")
+
+
+def run_with_porcupine():
+    """Режим продакшн — два wake word через Porcupine (для Pi)"""
+    import pvporcupine
+    import struct
+
+    from .config import AGENTS
+
+    porcupine_key    = os.getenv("PORCUPINE_KEY")
+    wake_word_cosmo  = os.getenv("WAKE_WORD_COSMO")
+    wake_word_lusya  = os.getenv("WAKE_WORD_LUSYA")
+
+    if not porcupine_key:
+        print("❌ PORCUPINE_KEY не задан в .env")
+        sys.exit(1)
+
+    keyword_paths = []
+    wake_word_map = []
+
+    if wake_word_cosmo:
+        keyword_paths.append(wake_word_cosmo)
+        wake_word_map.append("cosmo")
+    if wake_word_lusya:
+        keyword_paths.append(wake_word_lusya)
+        wake_word_map.append("lusya")
+
+    if not keyword_paths:
+        print("❌ WAKE_WORD_COSMO или WAKE_WORD_LUSYA не заданы в .env")
+        sys.exit(1)
+
+    import pyaudio
+
+    porcupine = pvporcupine.create(
+        access_key=porcupine_key,
+        keyword_paths=keyword_paths,
+    )
+
+    audio = pyaudio.PyAudio()
+    stream = audio.open(
+        rate=porcupine.sample_rate,
+        channels=1,
+        format=pyaudio.paInt16,
+        input=True,
+        frames_per_buffer=porcupine.frame_length,
+    )
+
+    print("\n🦞 Cosmo Satellite запущен (режим: wake word)")
+    for agent_id in wake_word_map:
+        cfg = AGENTS[agent_id]
+        print(f"   {cfg['name']:6s} : {cfg['gateway_url']} → {cfg['agent']}")
+    print(f"\nСкажи 'Космо' или 'Люся'...\n")
+
+    try:
+        while True:
+            try:
+                pcm = stream.read(porcupine.frame_length)
+                pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
+
+                keyword_index = porcupine.process(pcm)
+                if keyword_index >= 0:
+                    agent_id = wake_word_map[keyword_index]
+                    agent_name = AGENTS[agent_id]["name"]
+                    stop_speaking()  # barge-in: прервать если ещё говорит
+                    print(f"✅ Услышал '{agent_name}'!")
+                    play_activation_sound()
+
+                    conv = _get_session(agent_id)
+
+                    text = record()
+                    if not text:
+                        continue
+
+                    print(f"📝 Ты → {agent_name}: {text}")
+
+                    if _handle_reset(text, agent_id):
+                        continue
+
+                    response = ask_agent_stream(text, conv=conv, agent_id=agent_id)
+                    print(f"🤖 {agent_name}: {response}\n")
+
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                log.exception("Непредвиденная ошибка в цикле Porcupine")
+                print(f"⚠️  Ошибка: {e} — продолжаю слушать...\n")
+
+    except KeyboardInterrupt:
+        print("\n👋 Выход")
+    finally:
+        stream.stop_stream()
+        audio.terminate()
+        porcupine.delete()
--- a/satellite/stt.py
+++ b/satellite/stt.py
@@ -0,0 +1,55 @@
+import io
+import wave
+
+from .config import groq_client, STT_PROVIDER, WHISPER_MODEL, WHISPER_LANG, log
+
+
+def transcribe_groq_bytes(wav_bytes: bytes) -> str:
+    """Отправляет WAV байты в Groq без записи на диск"""
+    buf = io.BytesIO(wav_bytes)
+    buf.name = "audio.wav"
+    result = groq_client.audio.transcriptions.create(
+        file=buf,
+        model="whisper-large-v3-turbo",
+        language="ru",
+    )
+    return result.text
+
+
+def frames_to_wav(frames: list[bytes]) -> bytes:
+    """Конвертирует сырые PCM фреймы в WAV в памяти"""
+    buf = io.BytesIO()
+    wf = wave.open(buf, "wb")
+    wf.setnchannels(1)
+    wf.setsampwidth(2)
+    wf.setframerate(16000)
+    wf.writeframes(b"".join(frames))
+    wf.close()
+    return buf.getvalue()
+
+
+def transcribe(frames: list[bytes]) -> str:
+    """Транскрибирует аудио фреймы — всё в памяти, без диска"""
+    try:
+        wav_bytes = frames_to_wav(frames)
+
+        if STT_PROVIDER == "groq":
+            return transcribe_groq_bytes(wav_bytes)
+
+        # Whisper fallback — нужен файл на диске
+        import tempfile
+        import os
+        from faster_whisper import WhisperModel
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            f.write(wav_bytes)
+            tmp_path = f.name
+        try:
+            model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
+            segments, _ = model.transcribe(tmp_path, language=WHISPER_LANG)
+            return " ".join(s.text for s in segments).strip()
+        finally:
+            os.unlink(tmp_path)
+    except Exception as e:
+        log.exception("STT ошибка")
+        print(f"⚠️  Ошибка распознавания речи: {e}")
+        return ""
--- a/satellite/text.py
+++ b/satellite/text.py
@@ -0,0 +1,67 @@
+import re
+
+
+def clean_for_speech(text: str) -> str:
+    text = re.sub(r'\*+', '', text)          # убрать **жирный**
+    text = re.sub(r'#+\s', '', text)         # убрать ## заголовки
+    text = re.sub(r'- ', '', text)           # убрать тире списков
+    text = re.sub(r'\[.*?\]\(.*?\)', '', text)  # убрать ссылки
+    text = re.sub(r'\n+', '. ', text)        # переносы → точки
+    text = re.sub(r'\s+', ' ', text)         # лишние пробелы
+    text = re.sub(r'(\d+)\.(\s)', r'\1\2', text)
+    return text.strip()
+
+
+def find_sentence_end(text: str, min_len: int = 60) -> int:
+    """Ищет конец предложения, игнорируя ложные точки"""
+    if len(text) < min_len:
+        return -1
+
+    for match in re.finditer(r'[.!?]', text):
+        pos = match.start()
+        if pos < min_len:
+            continue
+
+        before_1 = text[max(0, pos-1):pos]    # 1 символ до
+        before_3 = text[max(0, pos-3):pos]    # 3 символа до
+        after_2  = text[pos+1:pos+3]          # 2 символа после
+        after_stripped = after_2.lstrip()
+
+        # 1. Цифра.Цифра → "0.76", "3.14"
+        if before_1.isdigit() and after_2[:1].isdigit():
+            continue
+
+        # 2. Цифра. Цифра → "1. 2 ГБ"
+        if before_1.isdigit() and after_stripped[:1].isdigit():
+            continue
+
+        # 3. Аббревиатуры → "ГБ.", "МБ.", "км.", "шт.", "руб.", "млн.", "млрд."
+        abbrevs = ["гб", "мб", "кб", "тб", "км", "см", "мм", "шт",
+                   "руб", "млн", "млрд", "тыс", "кг", "гр", "мл",
+                   "gb", "mb", "kb", "tb", "km", "ms", "kb"]
+        if any(before_3.lower().endswith(a) for a in abbrevs):
+            continue
+
+        # 4. Одиночная заглавная буква → "А.", "В.", "США." (инициалы/аббр.)
+        if len(before_3.strip()) == 1 and before_3.strip().isupper():
+            continue
+
+        # 5. После точки строчная буква → "load avg. нормально"
+        if after_stripped and after_stripped[0].islower():
+            continue
+
+        # 6. Многоточие → "..."
+        if text[pos:pos+3] == "...":
+            continue
+
+        # 7. Точка внутри URL или IP → "192.168.1.1", "example.com"
+        if before_1.isdigit() or (after_2[:1].isdigit() and "." in before_3):
+            continue
+
+        # 8. Процент с точкой → "95.5%"
+        if "%" in after_2[:2]:
+            continue
+
+        return pos
+
+    return -1
--- a/satellite/tts.py
+++ b/satellite/tts.py
@@ -0,0 +1,110 @@
+import os
+import sys
+import subprocess
+import threading
+
+from .config import AUDIO_SINK, AGENTS, log
+
+ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
+ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
+
+_elevenlabs_client = None
+_current_process: subprocess.Popen | None = None
+_process_lock = threading.Lock()
+
+
+def _get_elevenlabs():
+    global _elevenlabs_client
+    if _elevenlabs_client is None:
+        from elevenlabs.client import ElevenLabs
+        _elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    return _elevenlabs_client
+
+
+def stop_speaking():
+    """Прерывает текущее воспроизведение (barge-in)"""
+    global _current_process
+    with _process_lock:
+        if _current_process and _current_process.poll() is None:
+            _current_process.terminate()
+            try:
+                _current_process.wait(timeout=1)
+            except subprocess.TimeoutExpired:
+                _current_process.kill()
+            _current_process = None
+
+
+def is_speaking() -> bool:
+    with _process_lock:
+        return _current_process is not None and _current_process.poll() is None
+
+
+def _mpv_cmd() -> list[str]:
+    """Команда mpv для воспроизведения из stdin"""
+    cmd = ["mpv", "--no-video", "--really-quiet", "--no-terminal"]
+    if AUDIO_SINK:
+        cmd.append(f"--audio-device=pulse/{AUDIO_SINK}")
+    cmd.append("-")
+    return cmd
+
+
+def speak(text: str, agent_id: str = "cosmo"):
+    try:
+        _speak_elevenlabs(text, agent_id)
+    except Exception as e:
+        log.exception("TTS ошибка")
+        print(f"⚠️  Ошибка воспроизведения: {e}")
+
+
+def _speak_elevenlabs(text: str, agent_id: str):
+    global _current_process
+    client = _get_elevenlabs()
+    voice_id = AGENTS.get(agent_id, AGENTS["cosmo"]).get("tts_voice", "")
+
+    if not voice_id:
+        log.error(f"tts_voice не задан для {agent_id}")
+        print(f"⚠️  tts_voice не задан для {agent_id}")
+        return
+
+    audio_stream = client.text_to_speech.convert(
+        text=text,
+        voice_id=voice_id,
+        model_id=ELEVENLABS_MODEL,
+        output_format="mp3_44100_128",
+    )
+
+    with _process_lock:
+        _current_process = subprocess.Popen(
+            _mpv_cmd(), stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+        )
+        proc = _current_process
+
+    try:
+        for chunk in audio_stream:
+            if proc.poll() is not None:
+                break
+            try:
+                proc.stdin.write(chunk)
+            except BrokenPipeError:
+                break
+        proc.stdin.close()
+        proc.wait()
+    except Exception:
+        proc.kill()
+    finally:
+        with _process_lock:
+            if _current_process is proc:
+                _current_process = None
+
+
+def play_activation_sound():
+    """Звук активации после wake word"""
+    try:
+        if sys.platform == "darwin":
+            subprocess.run(["afplay", "/System/Library/Sounds/Glass.aiff"])
+        else:
+            subprocess.run(["paplay", "/usr/share/sounds/freedesktop/stereo/bell.oga"])
+    except Exception as e:
+        log.exception("Ошибка звука активации")
+        print(f"⚠️  Ошибка звука: {e}")