Files
home-voice-assistant/satellite/audio.py
2026-04-12 15:59:25 +03:00

108 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pyaudio
import numpy as np
from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
from .stt import transcribe
def record() -> str:
"""Запись до тишины (VAD) + STT"""
try:
audio = pyaudio.PyAudio()
stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
)
except Exception as e:
log.exception("Не удалось открыть микрофон")
print(f"⚠️ Ошибка микрофона: {e}")
return ""
print("🎙️ Говори...")
frames = []
silent_chunks = 0
speaking_started = False
max_chunks = int(16000 / 1024 * MAX_DURATION)
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
warmup_chunks = int(16000 / 1024 * 0.3) # 0.3 сек — эхо звука активации
try:
for i in range(max_chunks):
data = stream.read(1024, exception_on_overflow=False)
if i < warmup_chunks:
continue # пропускаем эхо от звука активации
frames.append(data)
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD:
speaking_started = True
silent_chunks = 0
elif speaking_started:
silent_chunks += 1
if silent_chunks >= silence_chunks_needed:
print("🔇 Конец речи")
break
except Exception as e:
log.exception("Ошибка при записи аудио")
print(f"⚠️ Ошибка записи: {e}")
finally:
stream.stop_stream()
audio.terminate()
if not speaking_started:
return ""
return transcribe(frames)
def record_with_timeout(timeout: float = 8.0) -> str:
"""Слушает timeout секунд, возвращает пусто если речи не было"""
try:
audio = pyaudio.PyAudio()
stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
)
except Exception as e:
log.exception("Не удалось открыть микрофон (followup)")
print(f"⚠️ Ошибка микрофона: {e}")
return ""
frames = []
silent_chunks = 0
speaking_started = False
max_chunks = int(16000 / 1024 * timeout)
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
try:
for _ in range(max_chunks):
data = stream.read(1024, exception_on_overflow=False)
frames.append(data)
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
if amplitude > SILENCE_THRESHOLD:
speaking_started = True
silent_chunks = 0
elif speaking_started:
silent_chunks += 1
if silent_chunks >= silence_chunks_needed:
break
except Exception as e:
log.exception("Ошибка при записи аудио (followup)")
print(f"⚠️ Ошибка записи: {e}")
finally:
stream.stop_stream()
audio.terminate()
if not speaking_started:
return ""
return transcribe(frames)