Files
Daniil Klimov 110d9cde29 Mac M1 optimizations, fix train pipeline, add Hey Cosmo wake word model
- Fix install_mac.sh: use venv + Python 3.12 (3.14 incompatible with ML libs)
- Fix run_mac.sh: activate venv, add CPU thread optimization env vars
- Fix agent.py: remove f-string from SYSTEM_PROMPT template (NameError on import)
- Add missing deps: sounddevice, pydub, imageio-ffmpeg, omegaconf
- Optimize for M1: torch.inference_mode, set_num_threads, OMP/MKL tuning
- Switch to qwen2.5:3b for faster LLM responses on Mac
- Switch Whisper to medium model with auto compute (small+int8 had poor Russian)
- Add initial_prompt for better Russian transcription
- Add open_app tool for native macOS app launching
- Fix TTS: sanitize Latin text to Cyrillic for Silero compatibility
- Fix wake word echo: add cooldown after TTS, reset model state, raise threshold
- Make "Слушаю" TTS synchronous to avoid mic interference
- Fix train Dockerfile: remove tensorflow/onnx2tf (only ONNX needed), fix deps
- Fix train.sh: use wget for dataset download, add --shm-size=2g
- Add trained hey_cosmo.onnx wake word model
- Add TODO section to CLAUDE.md (ChatterBox TTS, Ollama Modelfile ideas)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 11:19:53 +03:00

91 lines
3.4 KiB
Docker
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Dockerfile для обуч<D183><D187>ния wake word модели openWakeWord
# Python 3.11 + torch (CPU) — без tensorflow (нам нужен только ONNX, не TFLite)
FROM python:3.11-slim
WORKDIR /app
# Системные зависимости
RUN apt-get update && apt-get install -y --no-install-recommends \
git wget curl ffmpeg libsndfile1 \
build-essential python3-dev cmake \
&& rm -rf /var/lib/apt/lists/*
# --- Слой 1: PyTorch (самый тяжёлый, кэшируется) ---
RUN pip install --no-cache-dir \
torch==2.5.0 \
torchaudio==2.5.0 \
--index-url https://download.pytorch.org/whl/cpu
# --- Слой 2: ML-зависимости (без tensorflow!) ---
RUN pip install --no-cache-dir \
mutagen==1.47.0 \
torchinfo==1.8.0 \
torchmetrics==1.2.0 \
speechbrain==1.0.3 \
pronouncing==0.2.0 \
acoustics==0.2.6 \
pyyaml "scipy<1.15" scikit-learn tqdm
# --- Слой 3: Аудио-аугментация ---
RUN pip install --no-cache-dir \
audiomentations==0.43.1 \
torch-audiomentations==0.12.0
# --- Слой 4: Датасеты и ONNX ---
RUN pip install --no-cache-dir \
"datasets>=2.20.0" \
"pyarrow>=15.0.0" \
webrtcvad \
onnx \
onnxruntime
# --- Сл<D0A1><D0BB>й 5: openWakeWord ---
RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord
RUN pip install --no-cache-dir -e /openWakeWord
# Ресурсные модели для feature extraction (melspectrogram + embedding)
RUN mkdir -p /openWakeWord/openwakeword/resources/models && \
wget -q -O /openWakeWord/openwakeword/resources/models/melspectrogram.onnx \
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/melspectrogram.onnx" && \
wget -q -O /openWakeWord/openwakeword/resources/models/embedding_model.onnx \
"https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/embedding_model.onnx"
# Патч train.py: убираем зависимость от onnx_tf/tensorflow (нам нужен только ONNX)
RUN python - <<'PATCH'
import pathlib
train_py = pathlib.Path("/openWakeWord/openwakeword/train.py")
text = train_py.read_text()
# Заменяем всю функцию convert_onnx_to_tflite на заглушку
old_func = text[text.find("def convert_onnx_to_tflite("):]
old_func = old_func[:old_func.find("\nif __name__")]
new_func = '''def convert_onnx_to_tflite(onnx_model_path, output_path):
"""Skipped — ONNX-only mode, TFLite not needed."""
return None
'''
text = text.replace(old_func, new_func)
train_py.write_text(text)
print("train.py patched: convert_onnx_to_tflite replaced with stub")
PATCH
# --- Слой 6: piper-sample-generator v2.0.0 (совместим с openWakeWord train.py) ---
RUN git clone --branch v2.0.0 https://github.com/rhasspy/piper-sample-generator /piper-sample-generator
RUN pip install --no-cache-dir piper-phonemize || true
RUN pip install --no-cache-dir -e /piper-sample-generator 2>/dev/null || \
pip install --no-cache-dir piper-tts
# TTS модель (.pt checkpoint) для генерации примеров
RUN mkdir -p /piper-sample-generator/models && \
wget -q --show-progress \
-O /piper-sample-generator/models/en_US-libritts_r-medium.pt \
"https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/en_US-libritts_r-medium.pt"
RUN mkdir -p /data /output /samples
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]