- Add training/ pipeline (step_1..step_5) and own-samples flow
- record_wav.py with single-shot and long-record modes, RMS-based silence filter
- remove_silent.py to drop silent samples and renumber
- modes.py: openwakeword inference with reset() and quiet predictions; commented Lusya block for later
- stt.py: drop local faster-whisper fallback, Groq-only
- config.py: remove unused STT_PROVIDER/WHISPER_*
- llm.py: replace __import__("os") hack with proper import
- tts.py: remove debug traceback in play_error_sound
- requirements.txt: add openwakeword/sounddevice/scipy, drop faster-whisper
- deploy/setup.sh: validate ELEVENLABS_API_KEY and WAKE_WORD_COSMO presence
- README.md, CLAUDE.md, project_roadmap memory updated to reflect new architecture
98 lines
3.9 KiB
Python
98 lines
3.9 KiB
Python
import sounddevice as sd
|
||
import scipy.io.wavfile as wav
|
||
import numpy as np
|
||
import os
|
||
import sys
|
||
|
||
MIN_RMS = 300 # ниже = почти тишина, не сохраняем
|
||
SAMPLE_RATE = 16000
|
||
CHUNK_DURATION = 2 # сек на один wav
|
||
INPUT_DEVICE = os.getenv("INPUT_DEVICE") # имя или индекс устройства, иначе системный default
|
||
if INPUT_DEVICE and INPUT_DEVICE.isdigit():
|
||
INPUT_DEVICE = int(INPUT_DEVICE)
|
||
|
||
if len(sys.argv) < 3:
|
||
print("Использование: python record_wav.py <имя_модели> <positive/negative> [long [секунд]]")
|
||
print("Примеры:")
|
||
print(" python record_wav.py cosmo positive # по 2с с Enter")
|
||
print(" python record_wav.py cosmo negative long # 5 минут подряд → нарезать")
|
||
print(" python record_wav.py cosmo negative long 600 # 10 минут подряд")
|
||
sys.exit(1)
|
||
|
||
MODEL_NAME = sys.argv[1]
|
||
MODE = sys.argv[2]
|
||
LONG_MODE = len(sys.argv) > 3 and sys.argv[3] == "long"
|
||
LONG_DURATION = int(sys.argv[4]) if LONG_MODE and len(sys.argv) > 4 else 300
|
||
|
||
BASE_DIR = os.path.join("training", "own_samples", MODEL_NAME, MODE)
|
||
os.makedirs(BASE_DIR, exist_ok=True)
|
||
|
||
|
||
def next_index() -> int:
|
||
files = [f for f in os.listdir(BASE_DIR) if f.endswith('.wav')]
|
||
return len(files) + 1
|
||
|
||
|
||
def record_sample():
|
||
idx = next_index()
|
||
filename = f"{idx:03d}.wav"
|
||
filepath = os.path.join(BASE_DIR, filename)
|
||
|
||
print(f"\n[!] Файл {filename} готов к записи.")
|
||
input(f"Нажмите Enter, чтобы начать запись ({CHUNK_DURATION} секунды)...")
|
||
|
||
print("Запись...")
|
||
recording = sd.rec(device=INPUT_DEVICE, frames=int(CHUNK_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
|
||
sd.wait()
|
||
rms = float(np.sqrt(np.mean(recording.astype(np.float64) ** 2)))
|
||
if rms < MIN_RMS:
|
||
print(f"⚠️ Тишина (RMS={rms:.0f} < {MIN_RMS}) — не сохраняю, повтори")
|
||
return
|
||
wav.write(filepath, SAMPLE_RATE, recording)
|
||
print(f"Сохранено: {filepath} (RMS={rms:.0f})")
|
||
|
||
|
||
def record_long(total_seconds: int):
|
||
"""Запись N секунд непрерывно, потом нарезка на CHUNK_DURATION-секундные wav."""
|
||
print(f"\n[!] Запись {total_seconds}с одним куском, потом нарежу по {CHUNK_DURATION}с.")
|
||
input("Нажмите Enter, чтобы начать (Ctrl+C прервёт сохранение)...")
|
||
|
||
print(f"🎙️ Запись... ({total_seconds}с)")
|
||
recording = sd.rec(device=INPUT_DEVICE, frames=int(total_seconds * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='int16')
|
||
try:
|
||
sd.wait()
|
||
except KeyboardInterrupt:
|
||
sd.stop()
|
||
print("\n⏹️ Прервано — сохраняю записанное")
|
||
|
||
audio = recording.flatten()
|
||
chunk_samples = CHUNK_DURATION * SAMPLE_RATE
|
||
n_chunks = len(audio) // chunk_samples
|
||
saved = skipped = 0
|
||
start_idx = next_index()
|
||
|
||
for i in range(n_chunks):
|
||
chunk = audio[i * chunk_samples:(i + 1) * chunk_samples]
|
||
rms = float(np.sqrt(np.mean(chunk.astype(np.float64) ** 2)))
|
||
if rms < MIN_RMS:
|
||
skipped += 1
|
||
continue
|
||
filename = f"{start_idx + saved:03d}.wav"
|
||
wav.write(os.path.join(BASE_DIR, filename), SAMPLE_RATE, chunk)
|
||
saved += 1
|
||
|
||
print(f"\n✅ Нарезано {n_chunks} кусков → сохранено {saved}, пропущено тихих {skipped}")
|
||
|
||
|
||
print(f"--- Режим записи: {MODEL_NAME} / {MODE}{' / LONG' if LONG_MODE else ''} ---")
|
||
|
||
if LONG_MODE:
|
||
record_long(LONG_DURATION)
|
||
else:
|
||
print("Для выхода нажмите Ctrl+C")
|
||
try:
|
||
while True:
|
||
record_sample()
|
||
except KeyboardInterrupt:
|
||
print("\nЗапись завершена.")
|