Mac M1 optimizations, fix train pipeline, add Hey Cosmo wake word model
- Fix install_mac.sh: use venv + Python 3.12 (3.14 incompatible with ML libs) - Fix run_mac.sh: activate venv, add CPU thread optimization env vars - Fix agent.py: remove f-string from SYSTEM_PROMPT template (NameError on import) - Add missing deps: sounddevice, pydub, imageio-ffmpeg, omegaconf - Optimize for M1: torch.inference_mode, set_num_threads, OMP/MKL tuning - Switch to qwen2.5:3b for faster LLM responses on Mac - Switch Whisper to medium model with auto compute (small+int8 had poor Russian) - Add initial_prompt for better Russian transcription - Add open_app tool for native macOS app launching - Fix TTS: sanitize Latin text to Cyrillic for Silero compatibility - Fix wake word echo: add cooldown after TTS, reset model state, raise threshold - Make "Слушаю" TTS synchronous to avoid mic interference - Fix train Dockerfile: remove tensorflow/onnx2tf (only ONNX needed), fix deps - Fix train.sh: use wget for dataset download, add --shm-size=2g - Add trained hey_cosmo.onnx wake word model - Add TODO section to CLAUDE.md (ChatterBox TTS, Ollama Modelfile ideas) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,88 +1,86 @@
|
||||
# Dockerfile для обучения wake word модели openWakeWord
|
||||
# Python 3.11 + torch 2.5 (последний совместимый с py3.11) + рабочие зависимости 2026
|
||||
# Dockerfile для обуч<EFBFBD><EFBFBD>ния wake word модели openWakeWord
|
||||
# Python 3.11 + torch (CPU) — без tensorflow (нам нужен только ONNX, не TFLite)
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Системные зависимости (включая build-essential для webrtcvad)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
# Системные зависимости
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
git wget curl ffmpeg libsndfile1 \
|
||||
build-essential python3-dev \
|
||||
build-essential python3-dev cmake \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Клонируем openWakeWord и piper-sample-generator
|
||||
RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
|
||||
RUN git clone https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
|
||||
|
||||
# Torch 2.5.0 — последний для Python 3.11, CPU версия (обучение не требует GPU)
|
||||
# --- Слой 1: PyTorch (самый тяжёлый, кэшируется) ---
|
||||
RUN pip install --no-cache-dir \
|
||||
torch==2.5.0 \
|
||||
torchaudio==2.5.0 \
|
||||
--index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Зависимости обучения с совместимыми версиями
|
||||
# --- Слой 2: ML-зависимости (без tensorflow!) ---
|
||||
RUN pip install --no-cache-dir \
|
||||
mutagen==1.47.0 \
|
||||
torchinfo==1.8.0 \
|
||||
torchmetrics==1.2.0 \
|
||||
speechbrain==1.0.3 \
|
||||
audiomentations==0.43.1 \
|
||||
torch-audiomentations==0.12.0 \
|
||||
pronouncing==0.2.0 \
|
||||
"datasets==2.20.0" \
|
||||
"pyarrow==14.0.2" \
|
||||
"fsspec==2023.12.2" \
|
||||
acoustics==0.2.6 \
|
||||
pyyaml "scipy<1.15" scikit-learn tqdm
|
||||
|
||||
# --- Слой 3: Аудио-аугментация ---
|
||||
RUN pip install --no-cache-dir \
|
||||
audiomentations==0.43.1 \
|
||||
torch-audiomentations==0.12.0
|
||||
|
||||
# --- Слой 4: Датасеты и ONNX ---
|
||||
RUN pip install --no-cache-dir \
|
||||
"datasets>=2.20.0" \
|
||||
"pyarrow>=15.0.0" \
|
||||
webrtcvad \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnx2tf \
|
||||
pyyaml scipy scikit-learn tqdm
|
||||
|
||||
# TFLite конвертация через onnx2tf (замена мёртвого onnx_tf)
|
||||
# Патчим train.py чтобы использовал onnx2tf вместо onnx_tf
|
||||
RUN pip install --no-cache-dir \
|
||||
tensorflow-cpu==2.21.0 \
|
||||
tensorflow_probability==0.24.0
|
||||
onnxruntime
|
||||
|
||||
# --- Сл<D0A1><D0BB>й 5: openWakeWord ---
|
||||
RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
|
||||
RUN pip install --no-cache-dir -e /openWakeWord
|
||||
|
||||
# Патч: заменяем onnx_tf на onnx2tf в train.py
|
||||
RUN python - <<'EOF'
|
||||
import re, pathlib
|
||||
# Ресурсные модели для feature extraction (melspectrogram + embedding)
|
||||
RUN mkdir -p /openWakeWord/openwakeword/resources/models && \
|
||||
wget -q -O /openWakeWord/openwakeword/resources/models/melspectrogram.onnx \
|
||||
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/melspectrogram.onnx" && \
|
||||
wget -q -O /openWakeWord/openwakeword/resources/models/embedding_model.onnx \
|
||||
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/embedding_model.onnx"
|
||||
|
||||
# Патч train.py: убираем зависимость от onnx_tf/tensorflow (нам нужен только ONNX)
|
||||
RUN python - <<'PATCH'
|
||||
import pathlib
|
||||
|
||||
train_py = pathlib.Path("/openWakeWord/openwakeword/train.py")
|
||||
text = train_py.read_text()
|
||||
# Заменяем импорт onnx_tf
|
||||
text = text.replace(
|
||||
"import onnx_tf",
|
||||
"import onnx2tf as onnx_tf_compat"
|
||||
)
|
||||
text = text.replace(
|
||||
"from onnx_tf.backend import prepare",
|
||||
"# onnx_tf replaced by onnx2tf"
|
||||
)
|
||||
# Заменяем вызов convert_onnx_to_tflite если он есть
|
||||
text = re.sub(
|
||||
r"onnx_tf\.backend\.prepare\(.*?\)",
|
||||
"None # onnx2tf handles tflite conversion differently",
|
||||
text, flags=re.DOTALL
|
||||
)
|
||||
train_py.write_text(text)
|
||||
print("train.py patched OK")
|
||||
EOF
|
||||
|
||||
# Устанавливаем piper-sample-generator
|
||||
# Заменяем всю функцию convert_onnx_to_tflite на заглушку
|
||||
old_func = text[text.find("def convert_onnx_to_tflite("):]
|
||||
old_func = old_func[:old_func.find("\nif __name__")]
|
||||
new_func = '''def convert_onnx_to_tflite(onnx_model_path, output_path):
|
||||
"""Skipped — ONNX-only mode, TFLite not needed."""
|
||||
return None
|
||||
'''
|
||||
text = text.replace(old_func, new_func)
|
||||
|
||||
train_py.write_text(text)
|
||||
print("train.py patched: convert_onnx_to_tflite replaced with stub")
|
||||
PATCH
|
||||
|
||||
# --- Слой 6: piper-sample-generator v2.0.0 (совместим с openWakeWord train.py) ---
|
||||
RUN git clone --branch v2.0.0 https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
|
||||
RUN pip install --no-cache-dir piper-phonemize || true
|
||||
RUN pip install --no-cache-dir -e /piper-sample-generator 2>/dev/null || \
|
||||
pip install --no-cache-dir piper-tts
|
||||
|
||||
# Скачиваем TTS модель LibriTTS-R medium (~66 MB) для генерации примеров
|
||||
# TTS модель (.pt checkpoint) для генерации примеров
|
||||
RUN mkdir -p /piper-sample-generator/models && \
|
||||
wget -q --show-progress \
|
||||
-O /piper-sample-generator/models/en_US-libritts_r-medium.onnx \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx" && \
|
||||
wget -q \
|
||||
-O /piper-sample-generator/models/en_US-libritts_r-medium.onnx.json \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx.json"
|
||||
-O /piper-sample-generator/models/en_US-libritts_r-medium.pt \
|
||||
"https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/en_US-libritts_r-medium.pt"
|
||||
|
||||
RUN mkdir -p /data /output /samples
|
||||
|
||||
|
||||
@@ -40,6 +40,11 @@ batch_n_per_class:
|
||||
"adversarial_negative": 50
|
||||
"positive": 50
|
||||
|
||||
# Пути для аугментации (пустые — аугментация без RIR и фонового шума)
|
||||
rir_paths: []
|
||||
background_paths: []
|
||||
background_paths_duplication_rate: []
|
||||
|
||||
# Архитектура модели
|
||||
model_type: "dnn"
|
||||
layer_size: 32
|
||||
|
||||
@@ -51,24 +51,17 @@ NEGATIVE_FEATURES="$DATA_DIR/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
|
||||
VALIDATION_FEATURES="$DATA_DIR/validation_set_features.npy"
|
||||
|
||||
if [ ! -f "$NEGATIVE_FEATURES" ]; then
|
||||
echo "[2/4] Скачиваю негативный датасет (~20 GB, один раз)..."
|
||||
echo "[2/4] Скачиваю негативный датасет (~17 GB + ~500 MB, один раз)..."
|
||||
echo " Это займёт время в зависимости от скорости интернета."
|
||||
docker run --rm \
|
||||
-v "$DATA_DIR:/data" \
|
||||
cosmo-wakeword-trainer \
|
||||
python -c "
|
||||
from datasets import load_dataset
|
||||
import numpy as np, os
|
||||
print('Скачиваю ACAV100M features...')
|
||||
ds = load_dataset('davidscripka/openwakeword_features', 'ACAV100M_2000_hrs_16bit', split='train')
|
||||
arr = np.array(ds['features'])
|
||||
np.save('/data/openwakeword_features_ACAV100M_2000_hrs_16bit.npy', arr)
|
||||
print('Скачиваю validation features...')
|
||||
ds_val = load_dataset('davidscripka/openwakeword_features', 'validation_set', split='train')
|
||||
arr_val = np.array(ds_val['features'])
|
||||
np.save('/data/validation_set_features.npy', arr_val)
|
||||
print('Датасет скачан.')
|
||||
"
|
||||
echo ""
|
||||
echo " Скачиваю ACAV100M features (~17 GB)..."
|
||||
wget -q --show-progress \
|
||||
-O "$NEGATIVE_FEATURES" \
|
||||
"https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/openwakeword_features_ACAV100M_2000_hrs_16bit.npy"
|
||||
echo " Скачиваю validation features (~500 MB)..."
|
||||
wget -q --show-progress \
|
||||
-O "$VALIDATION_FEATURES" \
|
||||
"https://huggingface.co/datasets/davidscripka/openwakeword_features/resolve/main/validation_set_features.npy"
|
||||
echo " Датасет готов."
|
||||
else
|
||||
echo "[2/4] Негативный датасет уже скачан. Пропускаю."
|
||||
@@ -86,6 +79,7 @@ if [ -d "$POSITIVE_DIR" ] && [ -n "$(ls "$POSITIVE_DIR"/*.wav 2>/dev/null)" ]; t
|
||||
fi
|
||||
|
||||
docker run --rm \
|
||||
--shm-size=2g \
|
||||
-v "$SCRIPT_DIR/cosmo_config.yaml:/app/cosmo_config.yaml" \
|
||||
-v "$DATA_DIR:/data" \
|
||||
-v "$MODELS_DIR:/output" \
|
||||
|
||||
Reference in New Issue
Block a user