Edit code for success run

This commit is contained in:
2026-04-12 21:58:40 +03:00
parent 128cc70ab9
commit 0a89bf5105
8 changed files with 111 additions and 101 deletions

View File

@@ -1,12 +1,15 @@
import os
import pyaudio
import numpy as np
from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
from .stt import transcribe
ECHO_WARMUP = float(os.getenv("ECHO_WARMUP", "0.5")) # сек пропуска в начале — гасит эхо от TTS
def record() -> str:
"""Запись до тишины (VAD) + STT"""
"""Запись до тишины (VAD) + STT. Игнорирует ECHO_WARMUP в начале."""
try:
audio = pyaudio.PyAudio()
stream = audio.open(
@@ -27,13 +30,13 @@ def record() -> str:
speaking_started = False
max_chunks = int(16000 / 1024 * MAX_DURATION)
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
warmup_chunks = int(16000 / 1024 * 0.3) # 0.3 сек — эхо звука активации
warmup_chunks = int(16000 / 1024 * ECHO_WARMUP)
try:
for i in range(max_chunks):
data = stream.read(1024, exception_on_overflow=False)
if i < warmup_chunks:
continue # пропускаем эхо от звука активации
continue # гасим эхо от TTS / звука активации
frames.append(data)
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
@@ -56,52 +59,8 @@ def record() -> str:
if not speaking_started:
return ""
return transcribe(frames)
def record_with_timeout(timeout: float = 8.0) -> str:
"""Слушает timeout секунд, возвращает пусто если речи не было"""
try:
audio = pyaudio.PyAudio()
stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
)
except Exception as e:
log.exception("Не удалось открыть микрофон (followup)")
print(f"⚠️ Ошибка микрофона: {e}")
text = transcribe(frames)
# отсекаем мусор от эха (одиночные знаки препинания, пробелы)
if not text or not text.strip() or len(text.strip()) < 2:
return ""
frames = []
silent_chunks = 0
speaking_started = False
max_chunks = int(16000 / 1024 * timeout)
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
try:
for _ in range(max_chunks):
data = stream.read(1024, exception_on_overflow=False)
frames.append(data)
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD:
speaking_started = True
silent_chunks = 0
elif speaking_started:
silent_chunks += 1
if silent_chunks >= silence_chunks_needed:
break
except Exception as e:
log.exception("Ошибка при записи аудио (followup)")
print(f"⚠️ Ошибка записи: {e}")
finally:
stream.stop_stream()
audio.terminate()
if not speaking_started:
return ""
return transcribe(frames)
return text