Switch wake word from Porcupine to openwakeword + training pipeline
- Add training/ pipeline (step_1..step_5) and own-samples flow
- record_wav.py with single-shot and long-record modes, RMS-based silence filter
- remove_silent.py to drop silent samples and renumber
- modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later
- stt.py: drop local faster-whisper fallback, Groq-only
- config.py: remove unused STT_PROVIDER/WHISPER_*
- llm.py: replace __import__("os") hack with proper import
- tts.py: remove debug traceback in play_error_sound
- requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper
- deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence
- README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
This commit is contained in:
105
record_wav.py
105
record_wav.py
@@ -1,48 +1,97 @@
|
||||
import sounddevice as sd
|
||||
import scipy.io.wavfile as wav
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 1. Проверка аргументов командной строки
|
||||
MIN_RMS = 300 # ниже = почти тишина, не сохраняем
|
||||
SAMPLE_RATE = 16000
|
||||
CHUNK_DURATION = 2 # сек на один wav
|
||||
INPUT_DEVICE = os.getenv("INPUT_DEVICE") # имя или индекс устройства, иначе системный default
|
||||
if INPUT_DEVICE and INPUT_DEVICE.isdigit():
|
||||
INPUT_DEVICE = int(INPUT_DEVICE)
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Использование: python record.py <имя_модели> <positive/negative>")
|
||||
print("Пример: python record.py cosmo positive")
|
||||
print("Использование: python record_wav.py <имя_модели> <positive/negative> [long [секунд]]")
|
||||
print("Примеры:")
|
||||
print(" python record_wav.py cosmo positive # по 2с с Enter")
|
||||
print(" python record_wav.py cosmo negative long # 5 минут подряд → нарезать")
|
||||
print(" python record_wav.py cosmo negative long 600 # 10 минут подряд")
|
||||
sys.exit(1)
|
||||
|
||||
MODEL_NAME = sys.argv[1]
|
||||
MODE = sys.argv[2]
|
||||
BASE_DIR = os.path.join("data", "wakewords", MODEL_NAME, MODE)
|
||||
LONG_MODE = len(sys.argv) > 3 and sys.argv[3] == "long"
|
||||
LONG_DURATION = int(sys.argv[4]) if LONG_MODE and len(sys.argv) > 4 else 300
|
||||
|
||||
# Создаем папку, если ее нет
|
||||
if not os.path.exists(BASE_DIR):
|
||||
os.makedirs(BASE_DIR)
|
||||
BASE_DIR = os.path.join("training", "own_samples", MODEL_NAME, MODE)
|
||||
os.makedirs(BASE_DIR, exist_ok=True)
|
||||
|
||||
|
||||
def next_index() -> int:
|
||||
files = [f for f in os.listdir(BASE_DIR) if f.endswith('.wav')]
|
||||
return len(files) + 1
|
||||
|
||||
def get_next_filename(directory):
|
||||
files = [f for f in os.listdir(directory) if f.endswith('.wav')]
|
||||
return f"{len(files) + 1:03d}.wav"
|
||||
|
||||
def record_sample():
|
||||
filename = get_next_filename(BASE_DIR)
|
||||
idx = next_index()
|
||||
filename = f"{idx:03d}.wav"
|
||||
filepath = os.path.join(BASE_DIR, filename)
|
||||
|
||||
sample_rate = 16000
|
||||
duration = 2
|
||||
|
||||
|
||||
print(f"\n[!] Файл {filename} готов к записи.")
|
||||
input("Нажмите Enter, чтобы начать запись (2 секунды)...")
|
||||
|
||||
input(f"Нажмите Enter, чтобы начать запись ({CHUNK_DURATION} секунды)...")
|
||||
|
||||
print("Запись...")
|
||||
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
|
||||
recording = sd.rec(device=INPUT_DEVICE, frames=int(CHUNK_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
|
||||
sd.wait()
|
||||
wav.write(filepath, sample_rate, recording)
|
||||
print(f"Сохранено в: {filepath}")
|
||||
rms = float(np.sqrt(np.mean(recording.astype(np.float64) ** 2)))
|
||||
if rms < MIN_RMS:
|
||||
print(f"⚠️ Тишина (RMS={rms:.0f} < {MIN_RMS}) — не сохраняю, повтори")
|
||||
return
|
||||
wav.write(filepath, SAMPLE_RATE, recording)
|
||||
print(f"Сохранено: {filepath} (RMS={rms:.0f})")
|
||||
|
||||
# 2. Основной цикл записи
|
||||
print(f"--- Режим записи: {MODEL_NAME} / {MODE} ---")
|
||||
print("Для выхода нажмите Ctrl+C")
|
||||
|
||||
try:
|
||||
while True:
|
||||
record_sample()
|
||||
except KeyboardInterrupt:
|
||||
print("\nЗапись завершена.")
|
||||
def record_long(total_seconds: int):
|
||||
"""Запись N секунд непрерывно, потом нарезка на CHUNK_DURATION-секундные wav."""
|
||||
print(f"\n[!] Запись {total_seconds}с одним куском, потом нарежу по {CHUNK_DURATION}с.")
|
||||
input("Нажмите Enter, чтобы начать (Ctrl+C прервёт сохранение)...")
|
||||
|
||||
print(f"🎙️ Запись... ({total_seconds}с)")
|
||||
recording = sd.rec(device=INPUT_DEVICE, frames=int(total_seconds * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
|
||||
try:
|
||||
sd.wait()
|
||||
except KeyboardInterrupt:
|
||||
sd.stop()
|
||||
print("\n⏹️ Прервано — сохраняю записанное")
|
||||
|
||||
audio = recording.flatten()
|
||||
chunk_samples = CHUNK_DURATION * SAMPLE_RATE
|
||||
n_chunks = len(audio) // chunk_samples
|
||||
saved = skipped = 0
|
||||
start_idx = next_index()
|
||||
|
||||
for i in range(n_chunks):
|
||||
chunk = audio[i * chunk_samples:(i + 1) * chunk_samples]
|
||||
rms = float(np.sqrt(np.mean(chunk.astype(np.float64) ** 2)))
|
||||
if rms < MIN_RMS:
|
||||
skipped += 1
|
||||
continue
|
||||
filename = f"{start_idx + saved:03d}.wav"
|
||||
wav.write(os.path.join(BASE_DIR, filename), SAMPLE_RATE, chunk)
|
||||
saved += 1
|
||||
|
||||
print(f"\n✅ Нарезано {n_chunks} кусков → сохранено {saved}, пропущено тихих {skipped}")
|
||||
|
||||
|
||||
print(f"--- Режим записи: {MODEL_NAME} / {MODE}{' / LONG' if LONG_MODE else ''} ---")
|
||||
|
||||
if LONG_MODE:
|
||||
record_long(LONG_DURATION)
|
||||
else:
|
||||
print("Для выхода нажмите Ctrl+C")
|
||||
try:
|
||||
while True:
|
||||
record_sample()
|
||||
except KeyboardInterrupt:
|
||||
print("\nЗапись завершена.")
|
||||
|
||||
Reference in New Issue
Block a user