Files
home-voice-assistant/satellite/audio.py

67 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import pyaudio
import numpy as np
from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
from .stt import transcribe
ECHO_WARMUP = float(os.getenv("ECHO_WARMUP", "0.5")) # сек пропуска в начале — гасит эхо от TTS
def record() -> str:
"""Запись до тишины (VAD) + STT. Игнорирует ECHO_WARMUP в начале."""
try:
audio = pyaudio.PyAudio()
stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
)
except Exception as e:
log.exception("Не удалось открыть микрофон")
print(f"⚠️ Ошибка микрофона: {e}")
return ""
print("🎙️ Говори...")
frames = []
silent_chunks = 0
speaking_started = False
max_chunks = int(16000 / 1024 * MAX_DURATION)
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
warmup_chunks = int(16000 / 1024 * ECHO_WARMUP)
try:
for i in range(max_chunks):
data = stream.read(1024, exception_on_overflow=False)
if i < warmup_chunks:
continue # гасим эхо от TTS / звука активации
frames.append(data)
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD:
speaking_started = True
silent_chunks = 0
elif speaking_started:
silent_chunks += 1
if silent_chunks >= silence_chunks_needed:
print("🔇 Конец речи")
break
except Exception as e:
log.exception("Ошибка при записи аудио")
print(f"⚠️ Ошибка записи: {e}")
finally:
stream.stop_stream()
audio.terminate()
if not speaking_started:
return ""
text = transcribe(frames)
# отсекаем мусор от эха (одиночные знаки препинания, пробелы)
if not text or not text.strip() or len(text.strip()) < 2:
return ""
return text