108 lines
3.4 KiB
Python
108 lines
3.4 KiB
Python
import pyaudio
|
||
import numpy as np
|
||
|
||
from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
|
||
from .stt import transcribe
|
||
|
||
|
||
def record() -> str:
|
||
"""Запись до тишины (VAD) + STT"""
|
||
try:
|
||
audio = pyaudio.PyAudio()
|
||
stream = audio.open(
|
||
format=pyaudio.paInt16,
|
||
channels=1,
|
||
rate=16000,
|
||
input=True,
|
||
frames_per_buffer=1024,
|
||
)
|
||
except Exception as e:
|
||
log.exception("Не удалось открыть микрофон")
|
||
print(f"⚠️ Ошибка микрофона: {e}")
|
||
return ""
|
||
|
||
print("🎙️ Говори...")
|
||
frames = []
|
||
silent_chunks = 0
|
||
speaking_started = False
|
||
max_chunks = int(16000 / 1024 * MAX_DURATION)
|
||
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
|
||
warmup_chunks = int(16000 / 1024 * 0.3) # 0.3 сек — эхо звука активации
|
||
|
||
try:
|
||
for i in range(max_chunks):
|
||
data = stream.read(1024, exception_on_overflow=False)
|
||
if i < warmup_chunks:
|
||
continue # пропускаем эхо от звука активации
|
||
frames.append(data)
|
||
|
||
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
|
||
|
||
if amplitude > SILENCE_THRESHOLD:
|
||
speaking_started = True
|
||
silent_chunks = 0
|
||
elif speaking_started:
|
||
silent_chunks += 1
|
||
if silent_chunks >= silence_chunks_needed:
|
||
print("🔇 Конец речи")
|
||
break
|
||
except Exception as e:
|
||
log.exception("Ошибка при записи аудио")
|
||
print(f"⚠️ Ошибка записи: {e}")
|
||
finally:
|
||
stream.stop_stream()
|
||
audio.terminate()
|
||
|
||
if not speaking_started:
|
||
return ""
|
||
|
||
return transcribe(frames)
|
||
|
||
|
||
def record_with_timeout(timeout: float = 8.0) -> str:
|
||
"""Слушает timeout секунд, возвращает пусто если речи не было"""
|
||
try:
|
||
audio = pyaudio.PyAudio()
|
||
stream = audio.open(
|
||
format=pyaudio.paInt16,
|
||
channels=1,
|
||
rate=16000,
|
||
input=True,
|
||
frames_per_buffer=1024,
|
||
)
|
||
except Exception as e:
|
||
log.exception("Не удалось открыть микрофон (followup)")
|
||
print(f"⚠️ Ошибка микрофона: {e}")
|
||
return ""
|
||
|
||
frames = []
|
||
silent_chunks = 0
|
||
speaking_started = False
|
||
max_chunks = int(16000 / 1024 * timeout)
|
||
silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
|
||
|
||
try:
|
||
for _ in range(max_chunks):
|
||
data = stream.read(1024, exception_on_overflow=False)
|
||
frames.append(data)
|
||
amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
|
||
|
||
if amplitude > SILENCE_THRESHOLD:
|
||
speaking_started = True
|
||
silent_chunks = 0
|
||
elif speaking_started:
|
||
silent_chunks += 1
|
||
if silent_chunks >= silence_chunks_needed:
|
||
break
|
||
except Exception as e:
|
||
log.exception("Ошибка при записи аудио (followup)")
|
||
print(f"⚠️ Ошибка записи: {e}")
|
||
finally:
|
||
stream.stop_stream()
|
||
audio.terminate()
|
||
|
||
if not speaking_started:
|
||
return ""
|
||
|
||
return transcribe(frames)
|