Initial commit: Cosmo Voice Satellite
Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
110
satellite/tts.py
Normal file
110
satellite/tts.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
from .config import AUDIO_SINK, AGENTS, log
|
||||
|
||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
|
||||
ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
|
||||
|
||||
_elevenlabs_client = None
|
||||
_current_process: subprocess.Popen | None = None
|
||||
_process_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_elevenlabs():
|
||||
global _elevenlabs_client
|
||||
if _elevenlabs_client is None:
|
||||
from elevenlabs.client import ElevenLabs
|
||||
_elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
||||
return _elevenlabs_client
|
||||
|
||||
|
||||
def stop_speaking():
|
||||
"""Прерывает текущее воспроизведение (barge-in)"""
|
||||
global _current_process
|
||||
with _process_lock:
|
||||
if _current_process and _current_process.poll() is None:
|
||||
_current_process.terminate()
|
||||
try:
|
||||
_current_process.wait(timeout=1)
|
||||
except subprocess.TimeoutExpired:
|
||||
_current_process.kill()
|
||||
_current_process = None
|
||||
|
||||
|
||||
def is_speaking() -> bool:
|
||||
with _process_lock:
|
||||
return _current_process is not None and _current_process.poll() is None
|
||||
|
||||
|
||||
def _mpv_cmd() -> list[str]:
|
||||
"""Команда mpv для воспроизведения из stdin"""
|
||||
cmd = ["mpv", "--no-video", "--really-quiet", "--no-terminal"]
|
||||
if AUDIO_SINK:
|
||||
cmd.append(f"--audio-device=pulse/{AUDIO_SINK}")
|
||||
cmd.append("-")
|
||||
return cmd
|
||||
|
||||
|
||||
def speak(text: str, agent_id: str = "cosmo"):
|
||||
try:
|
||||
_speak_elevenlabs(text, agent_id)
|
||||
except Exception as e:
|
||||
log.exception("TTS ошибка")
|
||||
print(f"⚠️ Ошибка воспроизведения: {e}")
|
||||
|
||||
|
||||
def _speak_elevenlabs(text: str, agent_id: str):
|
||||
global _current_process
|
||||
client = _get_elevenlabs()
|
||||
voice_id = AGENTS.get(agent_id, AGENTS["cosmo"]).get("tts_voice", "")
|
||||
|
||||
if not voice_id:
|
||||
log.error(f"tts_voice не задан для {agent_id}")
|
||||
print(f"⚠️ tts_voice не задан для {agent_id}")
|
||||
return
|
||||
|
||||
audio_stream = client.text_to_speech.convert(
|
||||
text=text,
|
||||
voice_id=voice_id,
|
||||
model_id=ELEVENLABS_MODEL,
|
||||
output_format="mp3_44100_128",
|
||||
)
|
||||
|
||||
with _process_lock:
|
||||
_current_process = subprocess.Popen(
|
||||
_mpv_cmd(), stdin=subprocess.PIPE,
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
)
|
||||
proc = _current_process
|
||||
|
||||
try:
|
||||
for chunk in audio_stream:
|
||||
if proc.poll() is not None:
|
||||
break
|
||||
try:
|
||||
proc.stdin.write(chunk)
|
||||
except BrokenPipeError:
|
||||
break
|
||||
proc.stdin.close()
|
||||
proc.wait()
|
||||
except Exception:
|
||||
proc.kill()
|
||||
finally:
|
||||
with _process_lock:
|
||||
if _current_process is proc:
|
||||
_current_process = None
|
||||
|
||||
|
||||
def play_activation_sound():
|
||||
"""Звук активации после wake word"""
|
||||
try:
|
||||
if sys.platform == "darwin":
|
||||
subprocess.run(["afplay", "/System/Library/Sounds/Glass.aiff"])
|
||||
else:
|
||||
subprocess.run(["paplay", "/usr/share/sounds/freedesktop/stereo/bell.oga"])
|
||||
except Exception as e:
|
||||
log.exception("Ошибка звука активации")
|
||||
print(f"⚠️ Ошибка звука: {e}")
|
||||
Reference in New Issue
Block a user