feat: VAD-based barge-in during TTS playback
This commit is contained in:
@@ -4,7 +4,7 @@ import subprocess
|
|||||||
import threading
|
import threading
|
||||||
from elevenlabs import VoiceSettings
|
from elevenlabs import VoiceSettings
|
||||||
|
|
||||||
from .config import AUDIO_SINK, AGENTS, log
|
from .config import AUDIO_SINK, AGENTS, SILENCE_THRESHOLD, log
|
||||||
|
|
||||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
|
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
|
||||||
ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
|
ELEVENLABS_MODEL = os.getenv("ELEVENLABS_MODEL", "eleven_flash_v2_5")
|
||||||
@@ -40,6 +40,43 @@ def is_speaking() -> bool:
|
|||||||
return _current_process is not None and _current_process.poll() is None
|
return _current_process is not None and _current_process.poll() is None
|
||||||
|
|
||||||
|
|
||||||
|
_barge_in_flag = threading.Event()
|
||||||
|
|
||||||
|
def start_barge_in_listener():
|
||||||
|
"""Запускает фоновый поток VAD — если услышал голос во время TTS, ставит флаг barge-in."""
|
||||||
|
_barge_in_flag.clear()
|
||||||
|
|
||||||
|
def _listen():
|
||||||
|
import pyaudio
|
||||||
|
import numpy as np
|
||||||
|
try:
|
||||||
|
audio = pyaudio.PyAudio()
|
||||||
|
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000,
|
||||||
|
input=True, frames_per_buffer=1024)
|
||||||
|
warmup = 8 # ~0.5s прогрев чтобы не словить эхо начала TTS
|
||||||
|
i = 0
|
||||||
|
while is_speaking():
|
||||||
|
data = stream.read(1024, exception_on_overflow=False)
|
||||||
|
i += 1
|
||||||
|
if i < warmup:
|
||||||
|
continue
|
||||||
|
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
|
||||||
|
if amplitude > SILENCE_THRESHOLD * 1.5: # порог чуть выше чем для записи
|
||||||
|
_barge_in_flag.set()
|
||||||
|
stop_speaking()
|
||||||
|
break
|
||||||
|
stream.stop_stream()
|
||||||
|
audio.terminate()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
t = threading.Thread(target=_listen, daemon=True)
|
||||||
|
t.start()
|
||||||
|
return t
|
||||||
|
|
||||||
|
def was_barge_in() -> bool:
|
||||||
|
return _barge_in_flag.is_set()
|
||||||
|
|
||||||
def _mpv_cmd() -> list[str]:
|
def _mpv_cmd() -> list[str]:
|
||||||
"""Команда mpv для воспроизведения из stdin"""
|
"""Команда mpv для воспроизведения из stdin"""
|
||||||
mpv_bin = os.getenv("MPV_PATH", "mpv")
|
mpv_bin = os.getenv("MPV_PATH", "mpv")
|
||||||
|
|||||||
Reference in New Issue
Block a user