Mac M1 optimizations, fix train pipeline, add Hey Cosmo wake word model

- Fix install_mac.sh: use venv + Python 3.12 (3.14 incompatible with ML libs) - Fix run_mac.sh: activate venv, add CPU thread optimization env vars - Fix agent.py: remove f-string from SYSTEM_PROMPT template (NameError on import) - Add missing deps: sounddevice, pydub, imageio-ffmpeg, omegaconf - Optimize for M1: torch.inference_mode, set_num_threads, OMP/MKL tuning - Switch to qwen2.5:3b for faster LLM responses on Mac - Switch Whisper to medium model with auto compute (small+int8 had poor Russian) - Add initial_prompt for better Russian transcription - Add open_app tool for native macOS app launching - Fix TTS: sanitize Latin text to Cyrillic for Silero compatibility - Fix wake word echo: add cooldown after TTS, reset model state, raise threshold - Make "Слушаю" TTS synchronous to avoid mic interference - Fix train Dockerfile: remove tensorflow/onnx2tf (only ONNX needed), fix deps - Fix train.sh: use wget for dataset download, add --shm-size=2g - Add trained hey_cosmo.onnx wake word model - Add TODO section to CLAUDE.md (ChatterBox TTS, Ollama Modelfile ideas) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 11:19:53 +03:00
parent 6010816f1d
commit 110d9cde29
15 changed files with 183 additions and 94 deletions
--- a/train_wakeword/Dockerfile
+++ b/train_wakeword/Dockerfile
@@ -1,88 +1,86 @@
-# Dockerfile для обучения wake word модели openWakeWord
-# Python 3.11 + torch 2.5 (последний совместимый с py3.11) + рабочие зависимости 2026
+# Dockerfile для обуч<EFBFBD><EFBFBD>ния wake word модели openWakeWord
+# Python 3.11 + torch (CPU) — без tensorflow (нам нужен только ONNX, не TFLite)
 FROM python:3.11-slim

 WORKDIR /app

-# Системные зависимости (включая build-essential для webrtcvad)
-RUN apt-get update && apt-get install -y \
+# Системные зависимости
+RUN apt-get update && apt-get install -y --no-install-recommends \
    git wget curl ffmpeg libsndfile1 \
-    build-essential python3-dev \
+    build-essential python3-dev cmake \
    && rm -rf /var/lib/apt/lists/*

-# Клонируем openWakeWord и piper-sample-generator
-RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
-RUN git clone https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
-
-# Torch 2.5.0 — последний для Python 3.11, CPU версия (обучение не требует GPU)
+# --- Слой 1: PyTorch (самый тяжёлый, кэшируется) ---
 RUN pip install --no-cache-dir \
    torch==2.5.0 \
    torchaudio==2.5.0 \
    --index-url https://download.pytorch.org/whl/cpu

-# Зависимости обучения с совместимыми версиями
+# --- Слой 2: ML-зависимости (без tensorflow!) ---
 RUN pip install --no-cache-dir \
    mutagen==1.47.0 \
    torchinfo==1.8.0 \
    torchmetrics==1.2.0 \
    speechbrain==1.0.3 \
-    audiomentations==0.43.1 \
-    torch-audiomentations==0.12.0 \
    pronouncing==0.2.0 \
-    "datasets==2.20.0" \
-    "pyarrow==14.0.2" \
-    "fsspec==2023.12.2" \
    acoustics==0.2.6 \
+    pyyaml "scipy<1.15" scikit-learn tqdm
+
+# --- Слой 3: Аудио-аугментация ---
+RUN pip install --no-cache-dir \
+    audiomentations==0.43.1 \
+    torch-audiomentations==0.12.0
+
+# --- Слой 4: Датасеты и ONNX ---
+RUN pip install --no-cache-dir \
+    "datasets>=2.20.0" \
+    "pyarrow>=15.0.0" \
    webrtcvad \
    onnx \
-    onnxruntime \
-    onnx2tf \
-    pyyaml scipy scikit-learn tqdm
-
-# TFLite конвертация через onnx2tf (замена мёртвого onnx_tf)
-# Патчим train.py чтобы использовал onnx2tf вместо onnx_tf
-RUN pip install --no-cache-dir \
-    tensorflow-cpu==2.21.0 \
-    tensorflow_probability==0.24.0
+    onnxruntime

+# --- Сл<D0A1><D0BB>й 5: openWakeWord ---
+RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
 RUN pip install --no-cache-dir -e /openWakeWord

-# Патч: заменяем onnx_tf на onnx2tf в train.py
-RUN python - <<'EOF'
-import re, pathlib
+# Ресурсные модели для feature extraction (melspectrogram + embedding)
+RUN mkdir -p /openWakeWord/openwakeword/resources/models && \
+    wget -q -O /openWakeWord/openwakeword/resources/models/melspectrogram.onnx \
+        "https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/melspectrogram.onnx" && \
+    wget -q -O /openWakeWord/openwakeword/resources/models/embedding_model.onnx \
+        "https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/embedding_model.onnx"
+
+# Патч train.py: убираем зависимость от onnx_tf/tensorflow (нам нужен только ONNX)
+RUN python - <<'PATCH'
+import pathlib
+
 train_py = pathlib.Path("/openWakeWord/openwakeword/train.py")
 text = train_py.read_text()
-# Заменяем импорт onnx_tf
-text = text.replace(
-    "import onnx_tf",
-    "import onnx2tf as onnx_tf_compat"
-)
-text = text.replace(
-    "from onnx_tf.backend import prepare",
-    "# onnx_tf replaced by onnx2tf"
-)
-# Заменяем вызов convert_onnx_to_tflite если он есть
-text = re.sub(
-    r"onnx_tf\.backend\.prepare\(.*?\)",
-    "None  # onnx2tf handles tflite conversion differently",
-    text, flags=re.DOTALL
-)
-train_py.write_text(text)
-print("train.py patched OK")
-EOF

-# Устанавливаем piper-sample-generator
+# Заменяем всю функцию convert_onnx_to_tflite на заглушку
+old_func = text[text.find("def convert_onnx_to_tflite("):]
+old_func = old_func[:old_func.find("\nif __name__")]
+new_func = '''def convert_onnx_to_tflite(onnx_model_path, output_path):
+    """Skipped — ONNX-only mode, TFLite not needed."""
+    return None
+'''
+text = text.replace(old_func, new_func)
+
+train_py.write_text(text)
+print("train.py patched: convert_onnx_to_tflite replaced with stub")
+PATCH
+
+# --- Слой 6: piper-sample-generator v2.0.0 (совместим с openWakeWord train.py) ---
+RUN git clone --branch v2.0.0 https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
+RUN pip install --no-cache-dir piper-phonemize || true
 RUN pip install --no-cache-dir -e /piper-sample-generator 2>/dev/null || \
    pip install --no-cache-dir piper-tts

-# Скачиваем TTS модель LibriTTS-R medium (~66 MB) для генерации примеров
+# TTS модель (.pt checkpoint) для генерации примеров
 RUN mkdir -p /piper-sample-generator/models && \
    wget -q --show-progress \
-        -O /piper-sample-generator/models/en_US-libritts_r-medium.onnx \
-        "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx" && \
-    wget -q \
-        -O /piper-sample-generator/models/en_US-libritts_r-medium.onnx.json \
-        "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx.json"
+        -O /piper-sample-generator/models/en_US-libritts_r-medium.pt \
+        "https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/en_US-libritts_r-medium.pt"

 RUN mkdir -p /data /output /samples

--- a/train_wakeword/cosmo_config.yaml
+++ b/train_wakeword/cosmo_config.yaml
@@ -40,6 +40,11 @@ batch_n_per_class:
  "adversarial_negative": 50
  "positive": 50

+# Пути для аугментации (пустые — аугментация без RIR и фонового шума)
+rir_paths: []
+background_paths: []
+background_paths_duplication_rate: []
+
 # Архитектура модели
 model_type: "dnn"
 layer_size: 32
--- a/train_wakeword/train.sh
+++ b/train_wakeword/train.sh
@@ -51,24 +51,17 @@ NEGATIVE_FEATURES="$DATA_DIR/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
 VALIDATION_FEATURES="$DATA_DIR/validation_set_features.npy"

 if [ ! -f "$NEGATIVE_FEATURES" ]; then
-    echo "[2/4] Скачиваю негативный датасет (~20 GB, один раз)..."
+    echo "[2/4] Скачиваю негативный датасет (~17 GB + ~500 MB, один раз)..."
    echo "      Это займёт время в зависимости от скорости интернета."
-    docker run --rm \
-        -v "$DATA_DIR:/data" \
-        cosmo-wakeword-trainer \
-        python -c "
-from datasets import load_dataset
-import numpy as np, os
-print('Скачиваю ACAV100M features...')
-ds = load_dataset('davidscripka/openwakeword_features', 'ACAV100M_2000_hrs_16bit', split='train')
-arr = np.array(ds['features'])
-np.save('/data/openwakeword_features_ACAV100M_2000_hrs_16bit.npy', arr)
-print('Скачиваю validation features...')
-ds_val = load_dataset('davidscripka/openwakeword_features', 'validation_set', split='train')
-arr_val = np.array(ds_val['features'])
-np.save('/data/validation_set_features.npy', arr_val)
-print('Датасет скачан.')
-"
+    echo ""
+    echo "      Скачиваю ACAV100M features (~17 GB)..."
+    wget -q --show-progress \
+        -O "$NEGATIVE_FEATURES" \
+        "https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
+    echo "      Скачиваю validation features (~500 MB)..."
+    wget -q --show-progress \
+        -O "$VALIDATION_FEATURES" \
+        "https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/validation_set_features.npy"
    echo "      Датасет готов."
 else
    echo "[2/4] Негативный датасет уже скачан. Пропускаю."
@@ -86,6 +79,7 @@ if [ -d "$POSITIVE_DIR" ] && [ -n "$(ls "$POSITIVE_DIR"/*.wav 2>/dev/null)" ]; t
 fi

 docker run --rm \
+    --shm-size=2g \
    -v "$SCRIPT_DIR/cosmo_config.yaml:/app/cosmo_config.yaml" \
    -v "$DATA_DIR:/data" \
    -v "$MODELS_DIR:/output" \