- audio: switch VAD to webrtcvad with RMS gate + fallback to RMS - audio: honor FOLLOWUP_TIMEOUT — short silence wait after bot response - llm: retry with exponential backoff on network errors and 5xx - llm: VOICE_MAX_TOKENS env (default 300) instead of hardcoded 150 - tts: optional VAD-based barge-in (BARGE_IN_ENABLED, off by default) - tts: remove dead start_barge_in_listener / was_barge_in helpers - config: drop AGENT/LUSYA_AGENT — routing happens via session_key - modes: remove unused imports, pass FOLLOWUP_TIMEOUT to follow-up record() - docs: full rewrite of README and CLAUDE.md to match current architecture
112 lines
4.0 KiB
Python
112 lines
4.0 KiB
Python
import os
|
||
import pyaudio
|
||
import numpy as np
|
||
|
||
from .config import (
|
||
SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION,
|
||
FOLLOWUP_TIMEOUT, VAD_AGGRESSIVENESS, log,
|
||
)
|
||
from .stt import transcribe
|
||
|
||
ECHO_WARMUP = float(os.getenv("ECHO_WARMUP", "0.5")) # сек пропуска в начале — гасит эхо от TTS
|
||
|
||
try:
|
||
import webrtcvad
|
||
_vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
|
||
_VAD_OK = True
|
||
except Exception as e:
|
||
log.warning(f"webrtcvad недоступен, fallback на RMS: {e}")
|
||
_vad = None
|
||
_VAD_OK = False
|
||
|
||
# webrtcvad требует фрейм 10/20/30 мс при 8/16/32/48 кГц
|
||
SAMPLE_RATE = 16000
|
||
FRAME_MS = 30
|
||
FRAME_SAMPLES = int(SAMPLE_RATE * FRAME_MS / 1000) # 480
|
||
FRAME_BYTES = FRAME_SAMPLES * 2 # int16
|
||
|
||
|
||
def _is_speech(frame: bytes) -> bool:
|
||
"""Единое решение по VAD: webrtcvad + RMS-гейт, чтобы не ловить шёпот и эхо."""
|
||
amplitude = float(np.abs(np.frombuffer(frame, dtype=np.int16)).mean())
|
||
if amplitude < SILENCE_THRESHOLD:
|
||
return False
|
||
if _VAD_OK:
|
||
try:
|
||
return _vad.is_speech(frame, SAMPLE_RATE)
|
||
except Exception:
|
||
pass
|
||
return True # RMS уже прошёл — считаем речью
|
||
|
||
|
||
def record(initial_silence_timeout: float | None = None) -> str:
|
||
"""Запись до тишины + STT.
|
||
initial_silence_timeout — через сколько секунд выйти если пользователь вообще не начал говорить.
|
||
По умолчанию FOLLOWUP_TIMEOUT (короткое ожидание после ответа бота).
|
||
"""
|
||
if initial_silence_timeout is None:
|
||
initial_silence_timeout = FOLLOWUP_TIMEOUT
|
||
|
||
try:
|
||
audio = pyaudio.PyAudio()
|
||
stream = audio.open(
|
||
format=pyaudio.paInt16,
|
||
channels=1,
|
||
rate=SAMPLE_RATE,
|
||
input=True,
|
||
frames_per_buffer=FRAME_SAMPLES,
|
||
)
|
||
except Exception as e:
|
||
log.exception("Не удалось открыть микрофон")
|
||
print(f"⚠️ Ошибка микрофона: {e}")
|
||
return ""
|
||
|
||
print("🎙️ Говори...")
|
||
frames: list[bytes] = []
|
||
speaking_started = False
|
||
trailing_silence = 0 # фреймы тишины после начала речи
|
||
initial_silence = 0 # фреймы тишины до начала речи
|
||
|
||
max_frames = int(MAX_DURATION * 1000 / FRAME_MS)
|
||
warmup_frames = int(ECHO_WARMUP * 1000 / FRAME_MS)
|
||
silence_frames_needed = int(SILENCE_DURATION * 1000 / FRAME_MS)
|
||
initial_silence_limit = int(initial_silence_timeout * 1000 / FRAME_MS)
|
||
|
||
try:
|
||
for i in range(max_frames):
|
||
data = stream.read(FRAME_SAMPLES, exception_on_overflow=False)
|
||
if i < warmup_frames:
|
||
continue
|
||
frames.append(data)
|
||
|
||
if _is_speech(data):
|
||
speaking_started = True
|
||
trailing_silence = 0
|
||
else:
|
||
if speaking_started:
|
||
trailing_silence += 1
|
||
if trailing_silence >= silence_frames_needed:
|
||
print("🔇 Конец речи")
|
||
break
|
||
else:
|
||
initial_silence += 1
|
||
if initial_silence >= initial_silence_limit:
|
||
print("😴 Пользователь молчит, выхожу")
|
||
speaking_started = False
|
||
break
|
||
except Exception as e:
|
||
log.exception("Ошибка при записи аудио")
|
||
print(f"⚠️ Ошибка записи: {e}")
|
||
finally:
|
||
stream.stop_stream()
|
||
audio.terminate()
|
||
|
||
if not speaking_started:
|
||
return ""
|
||
|
||
text = transcribe(frames)
|
||
# отсекаем мусор от эха (одиночные знаки препинания, пробелы)
|
||
if not text or not text.strip() or len(text.strip()) < 2:
|
||
return ""
|
||
return text
|