Initial commit: Cosmo Voice Satellite
Two-agent voice assistant (Cosmo + Люся) via OpenClaw Gateway. Streaming STT (Groq) + LLM + TTS (ElevenLabs) pipeline with keep-alive sessions, barge-in, and daily conversation sessions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
67
satellite/text.py
Normal file
67
satellite/text.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import re
|
||||
|
||||
|
||||
def clean_for_speech(text: str) -> str:
|
||||
text = re.sub(r'\*+', '', text) # убрать **жирный**
|
||||
text = re.sub(r'#+\s', '', text) # убрать ## заголовки
|
||||
text = re.sub(r'- ', '', text) # убрать тире списков
|
||||
text = re.sub(r'\[.*?\]\(.*?\)', '', text) # убрать ссылки
|
||||
text = re.sub(r'\n+', '. ', text) # переносы → точки
|
||||
text = re.sub(r'\s+', ' ', text) # лишние пробелы
|
||||
text = re.sub(r'(\d+)\.(\s)', r'\1\2', text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def find_sentence_end(text: str, min_len: int = 60) -> int:
|
||||
"""Ищет конец предложения, игнорируя ложные точки"""
|
||||
if len(text) < min_len:
|
||||
return -1
|
||||
|
||||
for match in re.finditer(r'[.!?]', text):
|
||||
pos = match.start()
|
||||
if pos < min_len:
|
||||
continue
|
||||
|
||||
before_1 = text[max(0, pos-1):pos] # 1 символ до
|
||||
before_3 = text[max(0, pos-3):pos] # 3 символа до
|
||||
after_2 = text[pos+1:pos+3] # 2 символа после
|
||||
after_stripped = after_2.lstrip()
|
||||
|
||||
# 1. Цифра.Цифра → "0.76", "3.14"
|
||||
if before_1.isdigit() and after_2[:1].isdigit():
|
||||
continue
|
||||
|
||||
# 2. Цифра. Цифра → "1. 2 ГБ"
|
||||
if before_1.isdigit() and after_stripped[:1].isdigit():
|
||||
continue
|
||||
|
||||
# 3. Аббревиатуры → "ГБ.", "МБ.", "км.", "шт.", "руб.", "млн.", "млрд."
|
||||
abbrevs = ["гб", "мб", "кб", "тб", "км", "см", "мм", "шт",
|
||||
"руб", "млн", "млрд", "тыс", "кг", "гр", "мл",
|
||||
"gb", "mb", "kb", "tb", "km", "ms", "kb"]
|
||||
if any(before_3.lower().endswith(a) for a in abbrevs):
|
||||
continue
|
||||
|
||||
# 4. Одиночная заглавная буква → "А.", "В.", "США." (инициалы/аббр.)
|
||||
if len(before_3.strip()) == 1 and before_3.strip().isupper():
|
||||
continue
|
||||
|
||||
# 5. После точки строчная буква → "load avg. нормально"
|
||||
if after_stripped and after_stripped[0].islower():
|
||||
continue
|
||||
|
||||
# 6. Многоточие → "..."
|
||||
if text[pos:pos+3] == "...":
|
||||
continue
|
||||
|
||||
# 7. Точка внутри URL или IP → "192.168.1.1", "example.com"
|
||||
if before_1.isdigit() or (after_2[:1].isdigit() and "." in before_3):
|
||||
continue
|
||||
|
||||
# 8. Процент с точкой → "95.5%"
|
||||
if "%" in after_2[:2]:
|
||||
continue
|
||||
|
||||
return pos
|
||||
|
||||
return -1
|
||||
Reference in New Issue
Block a user