Edit code for success run

2026-04-12 21:58:40 +03:00
parent 128cc70ab9
commit 0a89bf5105
8 changed files with 111 additions and 101 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,9 @@ Thumbs.db
 # Claude Code
 .claude/
 # Data
 data/
 # Training
 training/
--- a/record_wav.py
+++ b/record_wav.py
@@ -0,0 +1,48 @@
 import sounddevice as sd
 import scipy.io.wavfile as wav
 import os
 import sys
 # 1. Проверка аргументов командной строки
 if len(sys.argv) < 3:
    print("Использование: python record.py <имя_модели> <positive/negative>")
    print("Пример: python record.py cosmo positive")
    sys.exit(1)
 MODEL_NAME = sys.argv[1]
 MODE = sys.argv[2]
 BASE_DIR = os.path.join("data", "wakewords", MODEL_NAME, MODE)
 # Создаем папку, если ее нет
 if not os.path.exists(BASE_DIR):
    os.makedirs(BASE_DIR)
 def get_next_filename(directory):
    files = [f for f in os.listdir(directory) if f.endswith('.wav')]
    return f"{len(files) + 1:03d}.wav"
 def record_sample():
    filename = get_next_filename(BASE_DIR)
    filepath = os.path.join(BASE_DIR, filename)
    sample_rate = 16000
    duration = 2 
    print(f"\n[!] Файл {filename} готов к записи.")
    input("Нажмите Enter, чтобы начать запись (2 секунды)...")
    print("Запись...")
    recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()
    wav.write(filepath, sample_rate, recording)
    print(f"Сохранено в: {filepath}")
 # 2. Основной цикл записи
 print(f"--- Режим записи: {MODEL_NAME} / {MODE} ---")
 print("Для выхода нажмите Ctrl+C")
 try:
    while True:
        record_sample()
 except KeyboardInterrupt:
    print("\nЗапись завершена.")
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,4 @@ numpy
 groq
 elevenlabs
 # Раскомментировать когда будет Pi + Porcupine:
-# pvporcupine
+# pvporcupine
--- a/satellite/audio.py
+++ b/satellite/audio.py
@@ -1,12 +1,15 @@
 import os
 import pyaudio
 import numpy as np
 from .config import SILENCE_THRESHOLD, SILENCE_DURATION, MAX_DURATION, log
 from .stt import transcribe
 ECHO_WARMUP = float(os.getenv("ECHO_WARMUP", "0.5"))  # сек пропуска в начале — гасит эхо от TTS
 def record() -> str:
-    """Запись до тишины (VAD) + STT"""
+    """Запись до тишины (VAD) + STT. Игнорирует ECHO_WARMUP в начале."""
    try:
        audio = pyaudio.PyAudio()
        stream = audio.open(
@@ -27,13 +30,13 @@ def record() -> str:
    speaking_started = False
    max_chunks = int(16000 / 1024 * MAX_DURATION)
    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
-    warmup_chunks = int(16000 / 1024 * 0.3)  # 0.3 сек — эхо звука активации
+    warmup_chunks = int(16000 / 1024 * ECHO_WARMUP)
    try:
        for i in range(max_chunks):
            data = stream.read(1024, exception_on_overflow=False)
            if i < warmup_chunks:
-                continue  # пропускаем эхо от звука активации
+                continue  # гасим эхо от TTS / звука активации
            frames.append(data)
            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
@@ -56,52 +59,8 @@ def record() -> str:
    if not speaking_started:
        return ""
-    return transcribe(frames)
+    text = transcribe(frames)
-
+    # отсекаем мусор от эха (одиночные знаки препинания, пробелы)
-
+    if not text or not text.strip() or len(text.strip()) < 2:
 def record_with_timeout(timeout: float = 8.0) -> str:
    """Слушает timeout секунд, возвращает пусто если речи не было"""
    try:
        audio = pyaudio.PyAudio()
        stream = audio.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=16000,
            input=True,
            frames_per_buffer=1024,
        )
    except Exception as e:
        log.exception("Не удалось открыть микрофон (followup)")
        print(f"⚠️  Ошибка микрофона: {e}")
        return ""
-
+    return text
    frames = []
    silent_chunks = 0
    speaking_started = False
    max_chunks = int(16000 / 1024 * timeout)
    silence_chunks_needed = int(16000 / 1024 * SILENCE_DURATION)
    try:
        for _ in range(max_chunks):
            data = stream.read(1024, exception_on_overflow=False)
            frames.append(data)
            amplitude = np.abs(np.frombuffer(data, dtype=np.int16)).mean()
            if amplitude > SILENCE_THRESHOLD:
                speaking_started = True
                silent_chunks = 0
            elif speaking_started:
                silent_chunks += 1
                if silent_chunks >= silence_chunks_needed:
                    break
    except Exception as e:
        log.exception("Ошибка при записи аудио (followup)")
        print(f"⚠️  Ошибка записи: {e}")
    finally:
        stream.stop_stream()
        audio.terminate()
    if not speaking_started:
        return ""
    return transcribe(frames)
--- a/satellite/llm.py
+++ b/satellite/llm.py
@@ -79,21 +79,21 @@ def ask_agent_stream(text: str, conv: "Conversation | None" = None, agent_id: st
        log.exception("Gateway недоступен")
        msg = "Не могу связаться с сервером, попробуй ещё раз."
        print(f"⚠️  {msg}")
-        #play_error_sound()
+        play_error_sound()
        speak(msg, agent_id)
        return msg
    except requests.Timeout:
        log.exception("Gateway таймаут")
        msg = "Сервер не ответил вовремя, попробуй ещё раз."
        print(f"⚠️  {msg}")
-        #play_error_sound()
+        play_error_sound()
        speak(msg, agent_id)
        return msg
    except requests.HTTPError:
        log.exception(f"Gateway HTTP ошибка {resp.status_code}")
        msg = "Ошибка сервера, попробуй ещё раз."
        print(f"⚠️  Gateway {resp.status_code}: {resp.text}")
-        #play_error_sound()
+        play_error_sound()
        speak(msg, agent_id)
        return msg
--- a/satellite/modes.py
+++ b/satellite/modes.py
@@ -1,9 +1,9 @@
 import os
 import sys
-from .config import GATEWAY_URL, AGENT, FOLLOWUP_TIMEOUT, log
+from .config import GATEWAY_URL, AGENT, log
-from .audio import record, record_with_timeout
+from .audio import record
-from .tts import play_activation_sound, speak, stop_speaking
+from .tts import speak, stop_speaking
 from .llm import ask_agent_stream, Conversation, is_reset_command
 # Персистентные сессии — одна на день для каждого агента
@@ -31,6 +31,29 @@ def _handle_reset(text: str, agent_id: str) -> bool:
    return False
 def _conversation_loop(agent_id: str, agent_name: str = "Cosmo"):
    """Основной цикл диалога — слушает и отвечает пока пользователь говорит.
    Выходит когда в течение MAX_DURATION не было речи."""
    conv = _get_session(agent_id)
    while True:
        text = record()
        if not text:
            print(f"😴 Тишина, жду активации...\n")
            return
        print(f"📝 Ты → {agent_name}: {text}")
        if _handle_reset(text, agent_id):
            conv = _get_session(agent_id)
            continue
        response = ask_agent_stream(text, conv=conv, agent_id=agent_id)
        print(f"🤖 {agent_name}: {response}\n")
        # после ответа — следующая итерация с новым record()
        # record() сам гасит эхо через ECHO_WARMUP
 def run_with_enter():
    print("\n🦞 Cosmo Satellite запущен (режим: Enter для активации)")
    print(f"   Gateway : {GATEWAY_URL}")
@@ -40,34 +63,8 @@ def run_with_enter():
    while True:
        try:
            input("⏎  Нажми Enter и говори...")
-            stop_speaking()  # barge-in: прервать если ещё говорит
+            stop_speaking()  # barge-in
-            play_activation_sound()
+            _conversation_loop("cosmo", "Cosmo")
            conv = _get_session("cosmo")
            while True:
                text = record()
                if not text:
                    print("⚠️  Ничего не распознано")
                    break
                print(f"📝 Ты: {text}")
                if _handle_reset(text, "cosmo"):
                    conv = _get_session("cosmo")
                    break
                response = ask_agent_stream(text, conv=conv)
                print(f"🤖 Cosmo: {response}\n")
                print(f"👂 Слушаю продолжение ({int(FOLLOWUP_TIMEOUT)} сек)...")
                followup = record_with_timeout(timeout=FOLLOWUP_TIMEOUT)
                if not followup:
                    print("😴 Нет продолжения, жду активации...\n")
                    break
                text = followup
        except KeyboardInterrupt:
            print("\n👋 Выход")
@@ -138,23 +135,13 @@ def run_with_porcupine():
                if keyword_index >= 0:
                    agent_id = wake_word_map[keyword_index]
                    agent_name = AGENTS[agent_id]["name"]
-                    stop_speaking()  # barge-in: прервать если ещё говорит
+                    stop_speaking()  # barge-in
                    print(f"✅ Услышал '{agent_name}'!")
                    play_activation_sound()
-                    conv = _get_session(agent_id)
+                    # отпускаем микрофон на время диалога
-
+                    stream.stop_stream()
-                    text = record()
+                    _conversation_loop(agent_id, agent_name)
-                    if not text:
+                    stream.start_stream()
                        continue
                    print(f"📝 Ты → {agent_name}: {text}")
                    if _handle_reset(text, agent_id):
                        continue
                    response = ask_agent_stream(text, conv=conv, agent_id=agent_id)
                    print(f"🤖 {agent_name}: {response}\n")
            except KeyboardInterrupt:
                raise
--- a/satellite/tts.py
+++ b/satellite/tts.py
@@ -2,6 +2,7 @@ import os
 import sys
 import subprocess
 import threading
 from elevenlabs import VoiceSettings
 from .config import AUDIO_SINK, AGENTS, log
@@ -68,11 +69,20 @@ def _speak_elevenlabs(text: str, agent_id: str):
        print(f"⚠️  tts_voice не задан для {agent_id}")
        return
    voice_settings = VoiceSettings(
        stability=0.5,
        similarity_boost=0.75,
        style=0.0,
        use_speaker_boost=True,
        speed=1.1  # Значение от 0.7 до 1.2. 1.1 — это ускорение на 10%
    )
    audio_stream = client.text_to_speech.convert(
        text=text,
        voice_id=voice_id,
        model_id=ELEVENLABS_MODEL,
        output_format="mp3_44100_128",
        voice_settings=voice_settings
    )
    with _process_lock:
--- a/sounds/Success_Cosmo.mp3
+++ b/sounds/Success_Cosmo.mp3