Switch wake word from Porcupine to openwakeword + training pipeline

- Add training/ pipeline (step_1..step_5) and own-samples flow
- record_wav.py with single-shot and long-record modes, RMS-based silence filter
- remove_silent.py to drop silent samples and renumber
- modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later
- stt.py: drop local faster-whisper fallback, Groq-only
- config.py: remove unused STT_PROVIDER/WHISPER_*
- llm.py: replace __import__("os") hack with proper import
- tts.py: remove debug traceback in play_error_sound
- requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper
- deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence
- README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
This commit is contained in:
2026-04-13 15:40:44 +03:00
parent 0a89bf5105
commit 780f6f0084
13 changed files with 378 additions and 140 deletions

View File

@@ -75,83 +75,67 @@ def run_with_enter():
def run_with_porcupine():
"""Режим продакшн — два wake word через Porcupine (для Pi)"""
import pvporcupine
import struct
from .config import AGENTS
porcupine_key = os.getenv("PORCUPINE_KEY")
wake_word_cosmo = os.getenv("WAKE_WORD_COSMO")
wake_word_lusya = os.getenv("WAKE_WORD_LUSYA")
if not porcupine_key:
print("❌ PORCUPINE_KEY не задан в .env")
sys.exit(1)
keyword_paths = []
wake_word_map = []
if wake_word_cosmo:
keyword_paths.append(wake_word_cosmo)
wake_word_map.append("cosmo")
if wake_word_lusya:
keyword_paths.append(wake_word_lusya)
wake_word_map.append("lusya")
if not keyword_paths:
print("❌ WAKE_WORD_COSMO или WAKE_WORD_LUSYA не заданы в .env")
sys.exit(1)
import numpy as np
import pyaudio
from openwakeword.model import Model
porcupine = pvporcupine.create(
access_key=porcupine_key,
keyword_paths=keyword_paths,
cosmo_model = Model(
wakeword_models=[os.getenv("WAKE_WORD_COSMO")],
inference_framework="onnx",
)
# TODO: подключить Люсю — раскомментировать когда модель lusya обучена
# lusya_model = Model(
# wakeword_models=[os.getenv("WAKE_WORD_LUSYA")],
# inference_framework="onnx",
# )
audio = pyaudio.PyAudio()
stream = audio.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length,
)
# OpenWakeWord ожидает 16 kHz mono PCM 16-bit, фреймы по 1280 семплов (80 мс)
stream = audio.open(rate=16000, channels=1, format=pyaudio.paInt16,
input=True, frames_per_buffer=1280)
print("\n🦞 Cosmo Satellite запущен (режим: wake word)")
for agent_id in wake_word_map:
cfg = AGENTS[agent_id]
print(f" {cfg['name']:6s} : {cfg['gateway_url']}{cfg['agent']}")
print(f"\nСкажи 'Космо' или 'Люся'...\n")
print("✅ Слушаю через OpenWakeWord...")
print("\nСкажи 'Космо'...\n")
# print("\nСкажи 'Космо' или 'Люся'...\n") # TODO: после подключения Люси
try:
while True:
try:
pcm = stream.read(porcupine.frame_length)
pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
pcm = stream.read(1280, exception_on_overflow=False)
pcm = np.frombuffer(pcm, dtype=np.int16)
keyword_index = porcupine.process(pcm)
if keyword_index >= 0:
agent_id = wake_word_map[keyword_index]
agent_name = AGENTS[agent_id]["name"]
stop_speaking() # barge-in
print(f"✅ Услышал '{agent_name}'!")
cosmo_score = cosmo_model.predict(pcm)["cosmo"]
if cosmo_score > 0.1:
print(f"PREDICTION cosmo: {cosmo_score:.3f}")
# отпускаем микрофон на время диалога
if cosmo_score > 0.5:
print("✅ Услышал 'Космо'!")
stream.stop_stream()
_conversation_loop(agent_id, agent_name)
_conversation_loop("cosmo", "Cosmo")
cosmo_model.reset()
stream.start_stream()
continue
# TODO: Люся — раскомментировать когда модель готова
# lusya_score = lusya_model.predict(pcm)["lusya"]
# if lusya_score > 0.1:
# print(f"PREDICTION lusya: {lusya_score:.3f}")
# if lusya_score > 0.5:
# print("✅ Услышала 'Люся'!")
# stream.stop_stream()
# _conversation_loop("lusya", "Люся")
# lusya_model.reset()
# stream.start_stream()
# continue
except KeyboardInterrupt:
raise
except Exception as e:
log.exception("Непредвиденная ошибка в цикле Porcupine")
log.exception("Непредвиденная ошибка в wake-word цикле")
print(f"⚠️ Ошибка: {e} — продолжаю слушать...\n")
except KeyboardInterrupt:
print("\n👋 Выход")
finally:
stream.stop_stream()
stream.close()
audio.terminate()
porcupine.delete()