Files
home-voice-assistant/satellite/tts.py
2026-04-14 15:28:12 +00:00

178 lines
5.8 KiB
Python

import os
import sys
import subprocess
import threading
from elevenlabs import VoiceSettings
from .config import AUDIO_SINK, AGENTS, SILENCE_THRESHOLD, log
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
_elevenlabs_client = None
_current_process: subprocess.Popen | None = None
_process_lock = threading.Lock()
def _get_elevenlabs():
global _elevenlabs_client
if _elevenlabs_client is None:
from elevenlabs.client import ElevenLabs
_elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
return _elevenlabs_client
def stop_speaking():
"""Прерывает текущее воспроизведение (barge-in)"""
global _current_process
with _process_lock:
if _current_process and _current_process.poll() is None:
_current_process.terminate()
try:
_current_process.wait(timeout=1)
except subprocess.TimeoutExpired:
_current_process.kill()
_current_process = None
def is_speaking() -> bool:
with _process_lock:
return _current_process is not None and _current_process.poll() is None
_barge_in_flag = threading.Event()
def start_barge_in_listener():
"""Запускает фоновый поток VAD — если услышал голос во время TTS, ставит флаг barge-in."""
_barge_in_flag.clear()
def _listen():
import pyaudio
import numpy as np
try:
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000,
input=True, frames_per_buffer=1024)
warmup = 8 # ~0.5s прогрев чтобы не словить эхо начала TTS
i = 0
while is_speaking():
data = stream.read(1024, exception_on_overflow=False)
i += 1
if i < warmup:
continue
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD * 1.5: # порог чуть выше чем для записи
_barge_in_flag.set()
stop_speaking()
break
stream.stop_stream()
audio.terminate()
except Exception:
pass
t = threading.Thread(target=_listen, daemon=True)
t.start()
return t
def was_barge_in() -> bool:
return _barge_in_flag.is_set()
def _mpv_cmd() -> list[str]:
"""Команда mpv для воспроизведения из stdin"""
mpv_bin = os.getenv("MPV_PATH", "mpv")
cmd = [mpv_bin, "--no-video", "--really-quiet", "--no-terminal"]
if AUDIO_SINK:
cmd.append(f"--audio-device=pulse/{AUDIO_SINK}")
cmd.append("-")
return cmd
def speak(text: str, agent_id: str = "cosmo"):
try:
_speak_elevenlabs(text, agent_id)
except Exception as e:
log.exception("TTS ошибка")
print(f"⚠️ Ошибка воспроизведения: {e}")
play_error_sound()
def _speak_elevenlabs(text: str, agent_id: str):
global _current_process
client = _get_elevenlabs()
voice_id = AGENTS.get(agent_id, AGENTS["cosmo"]).get("tts_voice", "")
if not voice_id:
log.error(f"tts_voice не задан для {agent_id}")
print(f"⚠️ tts_voice не задан для {agent_id}")
return
voice_settings = VoiceSettings(
stability=0.4, # ниже = живее интонация (для multilingual_v2)
similarity_boost=0.8,
style=0.1, # выше = эмоциональнее
use_speaker_boost=True,
speed=1.1
)
audio_stream = client.text_to_speech.convert(
text=text,
voice_id=voice_id,
model_id=ELEVENLABS_MODEL,
output_format="mp3_22050_32",
voice_settings=voice_settings,
optimize_streaming_latency=3
)
with _process_lock:
_current_process = subprocess.Popen(
_mpv_cmd(), stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
proc = _current_process
try:
for chunk in audio_stream:
if proc.poll() is not None:
break
try:
proc.stdin.write(chunk)
except BrokenPipeError:
break
proc.stdin.close()
proc.wait()
except Exception:
proc.kill()
finally:
with _process_lock:
if _current_process is proc:
_current_process = None
def _play_sound_file(filename: str, wait: bool = False):
"""Воспроизводит файл из папки sounds/ через mpv.
wait=True — блокирует до конца воспроизведения."""
sounds_dir = os.path.join(os.path.dirname(__file__), "..", "sounds")
path = os.path.normpath(os.path.join(sounds_dir, filename))
mpv_bin = os.getenv("MPV_PATH", "mpv")
cmd = [mpv_bin, "--no-video", "--really-quiet", "--no-terminal", path]
if wait:
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
else:
subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def play_activation_sound():
"""Звук активации — неблокирующий"""
try:
_play_sound_file("Success_Cosmo.mp3", wait=False)
except Exception as e:
log.warning(f"Ошибка звука активации: {e}")
def play_error_sound():
"""Звук ошибки — 'не получилось'"""
try:
_play_sound_file("Error_Cosmo.mp3")
except Exception as e:
log.warning(f"Ошибка звука ошибки: {e}")