Initial commit: Cosmo voice assistant

Полностью локальный голосовой ассистент на Python. Стек: - Wake word: openWakeWord (onnxruntime) - STT: RealtimeSTT + faster-whisper + Silero VAD (CUDA) - LLM-агент: smolagents ToolCallingAgent + Ollama qwen2.5:7b - TTS: Silero V4 (torch.hub) + sounddevice - Shell: Git Bash (Windows) / bash (macOS) Поддерживает Windows и macOS. Агент с памятью и tool calling — находит программы самостоятельно, запоминает пути, выполняет произвольные shell-команды. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 15:58:12 +03:00
commit 6010816f1d
23 changed files with 1969 additions and 0 deletions
--- a/cosmo/tts.py
+++ b/cosmo/tts.py
@@ -0,0 +1,79 @@
+"""
+TTS модуль на базе Silero V4 (torch.hub) + sounddevice.
+Silero — лучший русскоязычный офлайн TTS.
+Модель скачивается автоматически при первом запуске (~50 MB).
+"""
+
+import threading
+import numpy as np
+import sounddevice as sd
+from loguru import logger
+
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+
+class TTS:
+    def __init__(self, config: dict):
+        tts_cfg = config.get("tts", {})
+        self.enabled = tts_cfg.get("enabled", True)
+        self.speaker = tts_cfg.get("silero_speaker", "xenia")
+        self.sample_rate = tts_cfg.get("sample_rate", 48000)
+        self._lock = threading.Lock()
+        self._model = None
+
+        if not self.enabled:
+            return
+        if not TORCH_AVAILABLE:
+            logger.warning("torch не установлен — TTS отключён")
+            self.enabled = False
+            return
+
+        self._load_model()
+
+    def _load_model(self):
+        try:
+            logger.info(f"Загружаю Silero TTS (голос: {self.speaker}, {self.sample_rate} Hz)...")
+            # torch.hub кэширует модель в ~/.cache/torch/hub
+            model, _ = torch.hub.load(
+                repo_or_dir="snakers4/silero-models",
+                model="silero_tts",
+                language="ru",
+                speaker="v4_ru",
+                trust_repo=True,
+            )
+            self._model = model
+            logger.info("Silero TTS готов")
+        except Exception as e:
+            logger.error(f"Ошибка загрузки Silero TTS: {e}")
+            logger.warning("TTS отключён")
+            self.enabled = False
+
+    def say(self, text: str):
+        """Произнести текст синхронно."""
+        if not self.enabled or self._model is None:
+            logger.info(f"[TTS]: {text}")
+            return
+
+        logger.debug(f"TTS: '{text}'")
+        with self._lock:
+            try:
+                with torch.no_grad():
+                    audio = self._model.apply_tts(
+                        text=text,
+                        speaker=self.speaker,
+                        sample_rate=self.sample_rate,
+                    )
+                audio_np = audio.numpy() if hasattr(audio, "numpy") else np.array(audio)
+                sd.play(audio_np, samplerate=self.sample_rate)
+                sd.wait()
+            except Exception as e:
+                logger.error(f"Ошибка TTS: {e}")
+
+    def say_async(self, text: str):
+        """Произнести текст асинхронно."""
+        t = threading.Thread(target=self.say, args=(text,), daemon=True)
+        t.start()