Initial commit: Cosmo Voice Satellite

Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 13:34:08 +03:00
commit 7ca8268b78
16 changed files with 1143 additions and 0 deletions
--- a/satellite/audio.py
+++ b/satellite/audio.py
@@ -0,0 +1,104 @@
+import pyaudio
+import numpy as np
+
+from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
+from .stt import transcribe
+
+
+def record() -> str:
+    """Запись до тишины (VAD) + STT"""
+    try:
+        audio = pyaudio.PyAudio()
+        stream = audio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=16000,
+            input=True,
+            frames_per_buffer=1024,
+        )
+    except Exception as e:
+        log.exception("Не удалось открыть микрофон")
+        print(f"⚠️  Ошибка микрофона: {e}")
+        return ""
+
+    print("🎙️  Говори...")
+    frames = []
+    silent_chunks = 0
+    speaking_started = False
+    max_chunks = int(16000 / 1024 * MAX_DURATION)
+    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
+
+    try:
+        for _ in range(max_chunks):
+            data = stream.read(1024, exception_on_overflow=False)
+            frames.append(data)
+
+            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
+
+            if amplitude > SILENCE_THRESHOLD:
+                speaking_started = True
+                silent_chunks = 0
+            elif speaking_started:
+                silent_chunks += 1
+                if silent_chunks >= silence_chunks_needed:
+                    print("🔇 Конец речи")
+                    break
+    except Exception as e:
+        log.exception("Ошибка при записи аудио")
+        print(f"⚠️  Ошибка записи: {e}")
+    finally:
+        stream.stop_stream()
+        audio.terminate()
+
+    if not speaking_started:
+        return ""
+
+    return transcribe(frames)
+
+
+def record_with_timeout(timeout: float = 8.0) -> str:
+    """Слушает timeout секунд, возвращает пусто если речи не было"""
+    try:
+        audio = pyaudio.PyAudio()
+        stream = audio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=16000,
+            input=True,
+            frames_per_buffer=1024,
+        )
+    except Exception as e:
+        log.exception("Не удалось открыть микрофон (followup)")
+        print(f"⚠️  Ошибка микрофона: {e}")
+        return ""
+
+    frames = []
+    silent_chunks = 0
+    speaking_started = False
+    max_chunks = int(16000 / 1024 * timeout)
+    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
+
+    try:
+        for _ in range(max_chunks):
+            data = stream.read(1024, exception_on_overflow=False)
+            frames.append(data)
+            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
+
+            if amplitude > SILENCE_THRESHOLD:
+                speaking_started = True
+                silent_chunks = 0
+            elif speaking_started:
+                silent_chunks += 1
+                if silent_chunks >= silence_chunks_needed:
+                    break
+    except Exception as e:
+        log.exception("Ошибка при записи аудио (followup)")
+        print(f"⚠️  Ошибка записи: {e}")
+    finally:
+        stream.stop_stream()
+        audio.terminate()
+
+    if not speaking_started:
+        return ""
+
+    return transcribe(frames)