Files
home-voice-assistant/record_wav.py
Daniil Klimov 780f6f0084 Switch wake word from Porcupine to openwakeword + training pipeline
- Add training/ pipeline (step_1..step_5) and own-samples flow
- record_wav.py with single-shot and long-record modes, RMS-based silence filter
- remove_silent.py to drop silent samples and renumber
- modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later
- stt.py: drop local faster-whisper fallback, Groq-only
- config.py: remove unused STT_PROVIDER/WHISPER_*
- llm.py: replace __import__("os") hack with proper import
- tts.py: remove debug traceback in play_error_sound
- requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper
- deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence
- README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
2026-04-13 15:40:44 +03:00

98 lines
3.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import sounddevice as sd
import scipy.io.wavfile as wav
import numpy as np
import os
import sys
MIN_RMS = 300 # ниже = почти тишина, не сохраняем
SAMPLE_RATE = 16000
CHUNK_DURATION = 2 # сек на один wav
INPUT_DEVICE = os.getenv("INPUT_DEVICE") # имя или индекс устройства, иначе системный default
if INPUT_DEVICE and INPUT_DEVICE.isdigit():
INPUT_DEVICE = int(INPUT_DEVICE)
if len(sys.argv) < 3:
print("Использование: python record_wav.py <имя_модели> <positive/negative> [long [секунд]]")
print("Примеры:")
print(" python record_wav.py cosmo positive # по 2с с Enter")
print(" python record_wav.py cosmo negative long # 5 минут подряд → нарезать")
print(" python record_wav.py cosmo negative long 600 # 10 минут подряд")
sys.exit(1)
MODEL_NAME = sys.argv[1]
MODE = sys.argv[2]
LONG_MODE = len(sys.argv) > 3 and sys.argv[3] == "long"
LONG_DURATION = int(sys.argv[4]) if LONG_MODE and len(sys.argv) > 4 else 300
BASE_DIR = os.path.join("training", "own_samples", MODEL_NAME, MODE)
os.makedirs(BASE_DIR, exist_ok=True)
def next_index() -> int:
files = [f for f in os.listdir(BASE_DIR) if f.endswith('.wav')]
return len(files) + 1
def record_sample():
idx = next_index()
filename = f"{idx:03d}.wav"
filepath = os.path.join(BASE_DIR, filename)
print(f"\n[!] Файл {filename} готов к записи.")
input(f"Нажмите Enter, чтобы начать запись ({CHUNK_DURATION} секунды)...")
print("Запись...")
recording = sd.rec(device=INPUT_DEVICE, frames=int(CHUNK_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
sd.wait()
rms = float(np.sqrt(np.mean(recording.astype(np.float64) ** 2)))
if rms < MIN_RMS:
print(f"⚠️ Тишина (RMS={rms:.0f} < {MIN_RMS}) — не сохраняю, повтори")
return
wav.write(filepath, SAMPLE_RATE, recording)
print(f"Сохранено: {filepath} (RMS={rms:.0f})")
def record_long(total_seconds: int):
"""Запись N секунд непрерывно, потом нарезка на CHUNK_DURATION-секундные wav."""
print(f"\n[!] Запись {total_seconds}с одним куском, потом нарежу по {CHUNK_DURATION}с.")
input("Нажмите Enter, чтобы начать (Ctrl+C прервёт сохранение)...")
print(f"🎙️ Запись... ({total_seconds}с)")
recording = sd.rec(device=INPUT_DEVICE, frames=int(total_seconds * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
try:
sd.wait()
except KeyboardInterrupt:
sd.stop()
print("\n⏹️ Прервано — сохраняю записанное")
audio = recording.flatten()
chunk_samples = CHUNK_DURATION * SAMPLE_RATE
n_chunks = len(audio) // chunk_samples
saved = skipped = 0
start_idx = next_index()
for i in range(n_chunks):
chunk = audio[i * chunk_samples:(i + 1) * chunk_samples]
rms = float(np.sqrt(np.mean(chunk.astype(np.float64) ** 2)))
if rms < MIN_RMS:
skipped += 1
continue
filename = f"{start_idx + saved:03d}.wav"
wav.write(os.path.join(BASE_DIR, filename), SAMPLE_RATE, chunk)
saved += 1
print(f"\n✅ Нарезано {n_chunks} кусков → сохранено {saved}, пропущено тихих {skipped}")
print(f"--- Режим записи: {MODEL_NAME} / {MODE}{' / LONG' if LONG_MODE else ''} ---")
if LONG_MODE:
record_long(LONG_DURATION)
else:
print("Для выхода нажмите Ctrl+C")
try:
while True:
record_sample()
except KeyboardInterrupt:
print("\nЗапись завершена.")