- Add training/ pipeline (step_1..step_5) and own-samples flow
- record_wav.py with single-shot and long-record modes, RMS-based silence filter
- remove_silent.py to drop silent samples and renumber
- modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later
- stt.py: drop local faster-whisper fallback, Groq-only
- config.py: remove unused STT_PROVIDER/WHISPER_*
- llm.py: replace __import__("os") hack with proper import
- tts.py: remove debug traceback in play_error_sound
- requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper
- deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence
- README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
35 lines
995 B
Python
35 lines
995 B
Python
import io
|
|
import wave
|
|
|
|
from .config import groq_client, log
|
|
|
|
|
|
def frames_to_wav(frames: list[bytes]) -> bytes:
|
|
"""Сырые PCM-фреймы → WAV в памяти (без диска)."""
|
|
buf = io.BytesIO()
|
|
wf = wave.open(buf, "wb")
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2)
|
|
wf.setframerate(16000)
|
|
wf.writeframes(b"".join(frames))
|
|
wf.close()
|
|
return buf.getvalue()
|
|
|
|
|
|
def transcribe(frames: list[bytes]) -> str:
|
|
"""STT через Groq whisper-large-v3-turbo. Всё в памяти."""
|
|
try:
|
|
wav_bytes = frames_to_wav(frames)
|
|
buf = io.BytesIO(wav_bytes)
|
|
buf.name = "audio.wav"
|
|
result = groq_client.audio.transcriptions.create(
|
|
file=buf,
|
|
model="whisper-large-v3-turbo",
|
|
language="ru",
|
|
)
|
|
return result.text
|
|
except Exception as e:
|
|
log.exception("STT ошибка")
|
|
print(f"⚠️ Ошибка распознавания речи: {e}")
|
|
return ""
|