Initial commit: Cosmo Voice Satellite

Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 13:34:08 +03:00
commit 7ca8268b78
16 changed files with 1143 additions and 0 deletions
--- a/satellite/tts.py
+++ b/satellite/tts.py
@@ -0,0 +1,110 @@
+import os
+import sys
+import subprocess
+import threading
+
+from .config import AUDIO_SINK, AGENTS, log
+
+ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
+ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
+
+_elevenlabs_client = None
+_current_process: subprocess.Popen | None = None
+_process_lock = threading.Lock()
+
+
+def _get_elevenlabs():
+    global _elevenlabs_client
+    if _elevenlabs_client is None:
+        from elevenlabs.client import ElevenLabs
+        _elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    return _elevenlabs_client
+
+
+def stop_speaking():
+    """Прерывает текущее воспроизведение (barge-in)"""
+    global _current_process
+    with _process_lock:
+        if _current_process and _current_process.poll() is None:
+            _current_process.terminate()
+            try:
+                _current_process.wait(timeout=1)
+            except subprocess.TimeoutExpired:
+                _current_process.kill()
+            _current_process = None
+
+
+def is_speaking() -> bool:
+    with _process_lock:
+        return _current_process is not None and _current_process.poll() is None
+
+
+def _mpv_cmd() -> list[str]:
+    """Команда mpv для воспроизведения из stdin"""
+    cmd = ["mpv", "--no-video", "--really-quiet", "--no-terminal"]
+    if AUDIO_SINK:
+        cmd.append(f"--audio-device=pulse/{AUDIO_SINK}")
+    cmd.append("-")
+    return cmd
+
+
+def speak(text: str, agent_id: str = "cosmo"):
+    try:
+        _speak_elevenlabs(text, agent_id)
+    except Exception as e:
+        log.exception("TTS ошибка")
+        print(f"⚠️  Ошибка воспроизведения: {e}")
+
+
+def _speak_elevenlabs(text: str, agent_id: str):
+    global _current_process
+    client = _get_elevenlabs()
+    voice_id = AGENTS.get(agent_id, AGENTS["cosmo"]).get("tts_voice", "")
+
+    if not voice_id:
+        log.error(f"tts_voice не задан для {agent_id}")
+        print(f"⚠️  tts_voice не задан для {agent_id}")
+        return
+
+    audio_stream = client.text_to_speech.convert(
+        text=text,
+        voice_id=voice_id,
+        model_id=ELEVENLABS_MODEL,
+        output_format="mp3_44100_128",
+    )
+
+    with _process_lock:
+        _current_process = subprocess.Popen(
+            _mpv_cmd(), stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+        )
+        proc = _current_process
+
+    try:
+        for chunk in audio_stream:
+            if proc.poll() is not None:
+                break
+            try:
+                proc.stdin.write(chunk)
+            except BrokenPipeError:
+                break
+        proc.stdin.close()
+        proc.wait()
+    except Exception:
+        proc.kill()
+    finally:
+        with _process_lock:
+            if _current_process is proc:
+                _current_process = None
+
+
+def play_activation_sound():
+    """Звук активации после wake word"""
+    try:
+        if sys.platform == "darwin":
+            subprocess.run(["afplay", "/System/Library/Sounds/Glass.aiff"])
+        else:
+            subprocess.run(["paplay", "/usr/share/sounds/freedesktop/stereo/bell.oga"])
+    except Exception as e:
+        log.exception("Ошибка звука активации")
+        print(f"⚠️  Ошибка звука: {e}")