Switch wake word from Porcupine to openwakeword + training pipeline

- Add training/ pipeline (step_1..step_5) and own-samples flow - record_wav.py with single-shot and long-record modes, RMS-based silence filter - remove_silent.py to drop silent samples and renumber - modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later - stt.py: drop local faster-whisper fallback, Groq-only - config.py: remove unused STT_PROVIDER/WHISPER_* - llm.py: replace __import__("os") hack with proper import - tts.py: remove debug traceback in play_error_sound - requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper - deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence - README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
2026-04-13 15:40:44 +03:00
parent 0a89bf5105
commit 780f6f0084
13 changed files with 378 additions and 140 deletions
--- a/satellite/stt.py
+++ b/satellite/stt.py
@@ -1,23 +1,11 @@
 import io
 import wave

-from .config import groq_client, STT_PROVIDER, WHISPER_MODEL, WHISPER_LANG, log
-
-
-def transcribe_groq_bytes(wav_bytes: bytes) -> str:
-    """Отправляет WAV байты в Groq без записи на диск"""
-    buf = io.BytesIO(wav_bytes)
-    buf.name = "audio.wav"
-    result = groq_client.audio.transcriptions.create(
-        file=buf,
-        model="whisper-large-v3-turbo",
-        language="ru",
-    )
-    return result.text
+from .config import groq_client, log


 def frames_to_wav(frames: list[bytes]) -> bytes:
-    """Конвертирует сырые PCM фреймы в WAV в памяти"""
+    """Сырые PCM-фреймы → WAV в памяти (без диска)."""
    buf = io.BytesIO()
    wf = wave.open(buf, "wb")
    wf.setnchannels(1)
@@ -29,26 +17,17 @@ def frames_to_wav(frames: list[bytes]) -> bytes:


 def transcribe(frames: list[bytes]) -> str:
-    """Транскрибирует аудио фреймы — всё в памяти, без диска"""
+    """STT через Groq whisper-large-v3-turbo. Всё в памяти."""
    try:
        wav_bytes = frames_to_wav(frames)
-
-        if STT_PROVIDER == "groq":
-            return transcribe_groq_bytes(wav_bytes)
-
-        # Whisper fallback — нужен файл на диске
-        import tempfile
-        import os
-        from faster_whisper import WhisperModel
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
-            f.write(wav_bytes)
-            tmp_path = f.name
-        try:
-            model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
-            segments, _ = model.transcribe(tmp_path, language=WHISPER_LANG)
-            return " ".join(s.text for s in segments).strip()
-        finally:
-            os.unlink(tmp_path)
+        buf = io.BytesIO(wav_bytes)
+        buf.name = "audio.wav"
+        result = groq_client.audio.transcriptions.create(
+            file=buf,
+            model="whisper-large-v3-turbo",
+            language="ru",
+        )
+        return result.text
    except Exception as e:
        log.exception("STT ошибка")
        print(f"⚠️  Ошибка распознавания речи: {e}")