Initial commit: Cosmo Voice Satellite
Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
55
satellite/stt.py
Normal file
55
satellite/stt.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import io
|
||||
import wave
|
||||
|
||||
from .config import groq_client, STT_PROVIDER, WHISPER_MODEL, WHISPER_LANG, log
|
||||
|
||||
|
||||
def transcribe_groq_bytes(wav_bytes: bytes) -> str:
|
||||
"""Отправляет WAV байты в Groq без записи на диск"""
|
||||
buf = io.BytesIO(wav_bytes)
|
||||
buf.name = "audio.wav"
|
||||
result = groq_client.audio.transcriptions.create(
|
||||
file=buf,
|
||||
model="whisper-large-v3-turbo",
|
||||
language="ru",
|
||||
)
|
||||
return result.text
|
||||
|
||||
|
||||
def frames_to_wav(frames: list[bytes]) -> bytes:
|
||||
"""Конвертирует сырые PCM фреймы в WAV в памяти"""
|
||||
buf = io.BytesIO()
|
||||
wf = wave.open(buf, "wb")
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(16000)
|
||||
wf.writeframes(b"".join(frames))
|
||||
wf.close()
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def transcribe(frames: list[bytes]) -> str:
|
||||
"""Транскрибирует аудио фреймы — всё в памяти, без диска"""
|
||||
try:
|
||||
wav_bytes = frames_to_wav(frames)
|
||||
|
||||
if STT_PROVIDER == "groq":
|
||||
return transcribe_groq_bytes(wav_bytes)
|
||||
|
||||
# Whisper fallback — нужен файл на диске
|
||||
import tempfile
|
||||
import os
|
||||
from faster_whisper import WhisperModel
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
f.write(wav_bytes)
|
||||
tmp_path = f.name
|
||||
try:
|
||||
model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
||||
segments, _ = model.transcribe(tmp_path, language=WHISPER_LANG)
|
||||
return " ".join(s.text for s in segments).strip()
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
except Exception as e:
|
||||
log.exception("STT ошибка")
|
||||
print(f"⚠️ Ошибка распознавания речи: {e}")
|
||||
return ""
|
||||
Reference in New Issue
Block a user