feat: VAD-based barge-in during TTS playback

This commit is contained in:
Cosmo
2026-04-14 15:28:12 +00:00
parent cd921e1540
commit cdf8748e48

View File

@@ -4,7 +4,7 @@ import subprocess
import threading
from elevenlabs import VoiceSettings
from .config import AUDIO_SINK, AGENTS, log
from .config import AUDIO_SINK, AGENTS, SILENCE_THRESHOLD, log
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
@@ -40,6 +40,43 @@ def is_speaking() -> bool:
return _current_process is not None and _current_process.poll() is None
_barge_in_flag = threading.Event()
def start_barge_in_listener():
"""Запускает фоновый поток VAD — если услышал голос во время TTS, ставит флаг barge-in."""
_barge_in_flag.clear()
def _listen():
import pyaudio
import numpy as np
try:
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000,
input=True, frames_per_buffer=1024)
warmup = 8 # ~0.5s прогрев чтобы не словить эхо начала TTS
i = 0
while is_speaking():
data = stream.read(1024, exception_on_overflow=False)
i += 1
if i < warmup:
continue
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD * 1.5: # порог чуть выше чем для записи
_barge_in_flag.set()
stop_speaking()
break
stream.stop_stream()
audio.terminate()
except Exception:
pass
t = threading.Thread(target=_listen, daemon=True)
t.start()
return t
def was_barge_in() -> bool:
return _barge_in_flag.is_set()
def _mpv_cmd() -> list[str]:
"""Команда mpv для воспроизведения из stdin"""
mpv_bin = os.getenv("MPV_PATH", "mpv")