# Dockerfile для обуч��ния wake word модели openWakeWord # Python 3.11 + torch (CPU) — без tensorflow (нам нужен только ONNX, не TFLite) FROM python:3.11-slim WORKDIR /app # Системные зависимости RUN apt-get update && apt-get install -y --no-install-recommends \ git wget curl ffmpeg libsndfile1 \ build-essential python3-dev cmake \ && rm -rf /var/lib/apt/lists/* # --- Слой 1: PyTorch (самый тяжёлый, кэшируется) --- RUN pip install --no-cache-dir \ torch==2.5.0 \ torchaudio==2.5.0 \ --index-url https://download.pytorch.org/whl/cpu # --- Слой 2: ML-зависимости (без tensorflow!) --- RUN pip install --no-cache-dir \ mutagen==1.47.0 \ torchinfo==1.8.0 \ torchmetrics==1.2.0 \ speechbrain==1.0.3 \ pronouncing==0.2.0 \ acoustics==0.2.6 \ pyyaml "scipy<1.15" scikit-learn tqdm # --- Слой 3: Аудио-аугментация --- RUN pip install --no-cache-dir \ audiomentations==0.43.1 \ torch-audiomentations==0.12.0 # --- Слой 4: Датасеты и ONNX --- RUN pip install --no-cache-dir \ "datasets>=2.20.0" \ "pyarrow>=15.0.0" \ webrtcvad \ onnx \ onnxruntime # --- Сл��й 5: openWakeWord --- RUN git clone https://github.com/dscripka/openWakeWord /openWakeWord RUN pip install --no-cache-dir -e /openWakeWord # Ресурсные модели для feature extraction (melspectrogram + embedding) RUN mkdir -p /openWakeWord/openwakeword/resources/models && \ wget -q -O /openWakeWord/openwakeword/resources/models/melspectrogram.onnx \ "https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/melspectrogram.onnx" && \ wget -q -O /openWakeWord/openwakeword/resources/models/embedding_model.onnx \ "https://github.com/dscripka/openWakeWord/releases/download/v0.5.1/embedding_model.onnx" # Патч train.py: убираем зависимость от onnx_tf/tensorflow (нам нужен только ONNX) RUN python - <<'PATCH' import pathlib train_py = pathlib.Path("/openWakeWord/openwakeword/train.py") text = train_py.read_text() # Заменяем всю функцию convert_onnx_to_tflite на заглушку old_func = text[text.find("def convert_onnx_to_tflite("):] old_func = old_func[:old_func.find("\nif __name__")] new_func = '''def convert_onnx_to_tflite(onnx_model_path, output_path): """Skipped — ONNX-only mode, TFLite not needed.""" return None ''' text = text.replace(old_func, new_func) train_py.write_text(text) print("train.py patched: convert_onnx_to_tflite replaced with stub") PATCH # --- Слой 6: piper-sample-generator v2.0.0 (совместим с openWakeWord train.py) --- RUN git clone --branch v2.0.0 https://github.com/rhasspy/piper-sample-generator /piper-sample-generator RUN pip install --no-cache-dir piper-phonemize || true RUN pip install --no-cache-dir -e /piper-sample-generator 2>/dev/null || \ pip install --no-cache-dir piper-tts # TTS модель (.pt checkpoint) для генерации примеров RUN mkdir -p /piper-sample-generator/models && \ wget -q --show-progress \ -O /piper-sample-generator/models/en_US-libritts_r-medium.pt \ "https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/en_US-libritts_r-medium.pt" RUN mkdir -p /data /output /samples COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"]