Mac M1 optimizations, fix train pipeline, add Hey Cosmo wake word model

- Fix install_mac.sh: use venv + Python 3.12 (3.14 incompatible with ML libs)
- Fix run_mac.sh: activate venv, add CPU thread optimization env vars
- Fix agent.py: remove f-string from SYSTEM_PROMPT template (NameError on import)
- Add missing deps: sounddevice, pydub, imageio-ffmpeg, omegaconf
- Optimize for M1: torch.inference_mode, set_num_threads, OMP/MKL tuning
- Switch to qwen2.5:3b for faster LLM responses on Mac
- Switch Whisper to medium model with auto compute (small+int8 had poor Russian)
- Add initial_prompt for better Russian transcription
- Add open_app tool for native macOS app launching
- Fix TTS: sanitize Latin text to Cyrillic for Silero compatibility
- Fix wake word echo: add cooldown after TTS, reset model state, raise threshold
- Make "Слушаю" TTS synchronous to avoid mic interference
- Fix train Dockerfile: remove tensorflow/onnx2tf (only ONNX needed), fix deps
- Fix train.sh: use wget for dataset download, add --shm-size=2g
- Add trained hey_cosmo.onnx wake word model
- Add TODO section to CLAUDE.md (ChatterBox TTS, Ollama Modelfile ideas)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-11 11:19:53 +03:00
parent 6010816f1d
commit 110d9cde29
15 changed files with 183 additions and 94 deletions

View File

@@ -1,88 +1,86 @@
# Dockerfile для обучения wake word модели openWakeWord
# Python 3.11 + torch 2.5 (последний совместимый с py3.11) + рабочие зависимости 2026
# Dockerfile для обуч<EFBFBD><EFBFBD>ния wake word модели openWakeWord
# Python 3.11 + torch (CPU) — без tensorflow (нам нужен только ONNX, не TFLite)
FROM python:3.11-slim
WORKDIR /app
# Системные зависимости (включая build-essential для webrtcvad)
RUN apt-get update && apt-get install -y \
# Системные зависимости
RUN apt-get update && apt-get install -y --no-install-recommends \
git wget curl ffmpeg libsndfile1 \
build-essential python3-dev \
build-essential python3-dev cmake \
&& rm -rf /var/lib/apt/lists/*
# Клонируем openWakeWord и piper-sample-generator
RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
RUN git clone https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
# Torch 2.5.0 — последний для Python 3.11, CPU версия (обучение не требует GPU)
# --- Слой 1: PyTorch (самый тяжёлый, кэшируется) ---
RUN pip install --no-cache-dir \
torch==2.5.0 \
torchaudio==2.5.0 \
--index-url https://download.pytorch.org/whl/cpu
# Зависимости обучения с совместимыми версиями
# --- Слой 2: ML-зависимости (без tensorflow!) ---
RUN pip install --no-cache-dir \
mutagen==1.47.0 \
torchinfo==1.8.0 \
torchmetrics==1.2.0 \
speechbrain==1.0.3 \
audiomentations==0.43.1 \
torch-audiomentations==0.12.0 \
pronouncing==0.2.0 \
"datasets==2.20.0" \
"pyarrow==14.0.2" \
"fsspec==2023.12.2" \
acoustics==0.2.6 \
pyyaml "scipy<1.15" scikit-learn tqdm
# --- Слой 3: Аудио-аугментация ---
RUN pip install --no-cache-dir \
audiomentations==0.43.1 \
torch-audiomentations==0.12.0
# --- Слой 4: Датасеты и ONNX ---
RUN pip install --no-cache-dir \
"datasets>=2.20.0" \
"pyarrow>=15.0.0" \
webrtcvad \
onnx \
onnxruntime \
onnx2tf \
pyyaml scipy scikit-learn tqdm
# TFLite конвертация через onnx2tf (замена мёртвого onnx_tf)
# Патчим train.py чтобы использовал onnx2tf вместо onnx_tf
RUN pip install --no-cache-dir \
tensorflow-cpu==2.21.0 \
tensorflow_probability==0.24.0
onnxruntime
# --- Сл<D0A1><D0BB>й 5: openWakeWord ---
RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
RUN pip install --no-cache-dir -e /openWakeWord
# Патч: заменяем onnx_tf на onnx2tf в train.py
RUN python - <<'EOF'
import re, pathlib
# Ресурсные модели для feature extraction (melspectrogram + embedding)
RUN mkdir -p /openWakeWord/openwakeword/resources/models && \
wget -q -O /openWakeWord/openwakeword/resources/models/melspectrogram.onnx \
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/melspectrogram.onnx" && \
wget -q -O /openWakeWord/openwakeword/resources/models/embedding_model.onnx \
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/embedding_model.onnx"
# Патч train.py: убираем зависимость от onnx_tf/tensorflow (нам нужен только ONNX)
RUN python - <<'PATCH'
import pathlib
train_py = pathlib.Path("/openWakeWord/openwakeword/train.py")
text = train_py.read_text()
# Заменяем импорт onnx_tf
text = text.replace(
"import onnx_tf",
"import onnx2tf as onnx_tf_compat"
)
text = text.replace(
"from onnx_tf.backend import prepare",
"# onnx_tf replaced by onnx2tf"
)
# Заменяем вызов convert_onnx_to_tflite если он есть
text = re.sub(
r"onnx_tf\.backend\.prepare\(.*?\)",
"None # onnx2tf handles tflite conversion differently",
text, flags=re.DOTALL
)
train_py.write_text(text)
print("train.py patched OK")
EOF
# Устанавливаем piper-sample-generator
# Заменяем всю функцию convert_onnx_to_tflite на заглушку
old_func = text[text.find("def convert_onnx_to_tflite("):]
old_func = old_func[:old_func.find("\nif __name__")]
new_func = '''def convert_onnx_to_tflite(onnx_model_path, output_path):
"""Skipped — ONNX-only mode, TFLite not needed."""
return None
'''
text = text.replace(old_func, new_func)
train_py.write_text(text)
print("train.py patched: convert_onnx_to_tflite replaced with stub")
PATCH
# --- Слой 6: piper-sample-generator v2.0.0 (совместим с openWakeWord train.py) ---
RUN git clone --branch v2.0.0 https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
RUN pip install --no-cache-dir piper-phonemize || true
RUN pip install --no-cache-dir -e /piper-sample-generator 2>/dev/null || \
pip install --no-cache-dir piper-tts
# Скачиваем TTS модель LibriTTS-R medium (~66 MB) для генерации примеров
# TTS модель (.pt checkpoint) для генерации примеров
RUN mkdir -p /piper-sample-generator/models && \
wget -q --show-progress \
-O /piper-sample-generator/models/en_US-libritts_r-medium.onnx \
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx" && \
wget -q \
-O /piper-sample-generator/models/en_US-libritts_r-medium.onnx.json \
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx.json"
-O /piper-sample-generator/models/en_US-libritts_r-medium.pt \
"https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/en_US-libritts_r-medium.pt"
RUN mkdir -p /data /output /samples