home-voice-assistant/satellite/stt.py

import io
import wave

from .config import groq_client, log


def frames_to_wav(frames: list[bytes]) -> bytes:
    """Сырые PCM-фреймы → WAV в памяти (без диска)."""
    buf = io.BytesIO()
    wf = wave.open(buf, "wb")
    wf.setnchannels(1)
    wf.setsampwidth(2)
    wf.setframerate(16000)
    wf.writeframes(b"".join(frames))
    wf.close()
    return buf.getvalue()


def transcribe(frames: list[bytes]) -> str:
    """STT через Groq whisper-large-v3-turbo. Всё в памяти."""
    try:
        wav_bytes = frames_to_wav(frames)
        buf = io.BytesIO(wav_bytes)
        buf.name = "audio.wav"
        result = groq_client.audio.transcriptions.create(
            file=buf,
            model="whisper-large-v3-turbo",
            language="ru",
        )
        return result.text
    except Exception as e:
        log.exception("STT ошибка")
        print(f"⚠️  Ошибка распознавания речи: {e}")
        return ""