Mac M1 optimizations, fix train pipeline, add Hey Cosmo wake word model
- Fix install_mac.sh: use venv + Python 3.12 (3.14 incompatible with ML libs) - Fix run_mac.sh: activate venv, add CPU thread optimization env vars - Fix agent.py: remove f-string from SYSTEM_PROMPT template (NameError on import) - Add missing deps: sounddevice, pydub, imageio-ffmpeg, omegaconf - Optimize for M1: torch.inference_mode, set_num_threads, OMP/MKL tuning - Switch to qwen2.5:3b for faster LLM responses on Mac - Switch Whisper to medium model with auto compute (small+int8 had poor Russian) - Add initial_prompt for better Russian transcription - Add open_app tool for native macOS app launching - Fix TTS: sanitize Latin text to Cyrillic for Silero compatibility - Fix wake word echo: add cooldown after TTS, reset model state, raise threshold - Make "Слушаю" TTS synchronous to avoid mic interference - Fix train Dockerfile: remove tensorflow/onnx2tf (only ONNX needed), fix deps - Fix train.sh: use wget for dataset download, add --shm-size=2g - Add trained hey_cosmo.onnx wake word model - Add TODO section to CLAUDE.md (ChatterBox TTS, Ollama Modelfile ideas) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,24 +51,17 @@ NEGATIVE_FEATURES="$DATA_DIR/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
|
||||
VALIDATION_FEATURES="$DATA_DIR/validation_set_features.npy"
|
||||
|
||||
if [ ! -f "$NEGATIVE_FEATURES" ]; then
|
||||
echo "[2/4] Скачиваю негативный датасет (~20 GB, один раз)..."
|
||||
echo "[2/4] Скачиваю негативный датасет (~17 GB + ~500 MB, один раз)..."
|
||||
echo " Это займёт время в зависимости от скорости интернета."
|
||||
docker run --rm \
|
||||
-v "$DATA_DIR:/data" \
|
||||
cosmo-wakeword-trainer \
|
||||
python -c "
|
||||
from datasets import load_dataset
|
||||
import numpy as np, os
|
||||
print('Скачиваю ACAV100M features...')
|
||||
ds = load_dataset('davidscripka/openwakeword_features', 'ACAV100M_2000_hrs_16bit', split='train')
|
||||
arr = np.array(ds['features'])
|
||||
np.save('/data/openwakeword_features_ACAV100M_2000_hrs_16bit.npy', arr)
|
||||
print('Скачиваю validation features...')
|
||||
ds_val = load_dataset('davidscripka/openwakeword_features', 'validation_set', split='train')
|
||||
arr_val = np.array(ds_val['features'])
|
||||
np.save('/data/validation_set_features.npy', arr_val)
|
||||
print('Датасет скачан.')
|
||||
"
|
||||
echo ""
|
||||
echo " Скачиваю ACAV100M features (~17 GB)..."
|
||||
wget -q --show-progress \
|
||||
-O "$NEGATIVE_FEATURES" \
|
||||
"https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
|
||||
echo " Скачиваю validation features (~500 MB)..."
|
||||
wget -q --show-progress \
|
||||
-O "$VALIDATION_FEATURES" \
|
||||
"https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/validation_set_features.npy"
|
||||
echo " Датасет готов."
|
||||
else
|
||||
echo "[2/4] Негативный датасет уже скачан. Пропускаю."
|
||||
@@ -86,6 +79,7 @@ if [ -d "$POSITIVE_DIR" ] && [ -n "$(ls "$POSITIVE_DIR"/*.wav 2>/dev/null)" ]; t
|
||||
fi
|
||||
|
||||
docker run --rm \
|
||||
--shm-size=2g \
|
||||
-v "$SCRIPT_DIR/cosmo_config.yaml:/app/cosmo_config.yaml" \
|
||||
-v "$DATA_DIR:/data" \
|
||||
-v "$MODELS_DIR:/output" \
|
||||
|
||||
Reference in New Issue
Block a user