Add russian translate
This commit is contained in:
@@ -1,5 +1,79 @@
|
||||
import re
|
||||
|
||||
from num2words import num2words
|
||||
import pymorphy3
|
||||
|
||||
_morph = pymorphy3.MorphAnalyzer()
|
||||
|
||||
# Падеж по предлогу перед временем
|
||||
_PREP_CASE = {
|
||||
"с": "gent", "со": "gent", "до": "gent", "от": "gent", "после": "gent", "около": "gent",
|
||||
"к": "datv", "ко": "datv",
|
||||
"в": "accs", "во": "accs", "на": "accs", "через": "accs", "за": "accs",
|
||||
"перед": "ablt", "между": "ablt",
|
||||
"о": "loct", "об": "loct", "при": "loct",
|
||||
}
|
||||
|
||||
|
||||
def _inflect_num(n: int, case: str, gender: str = "masc") -> str:
|
||||
"""Число → слова в нужном падеже (одиннадцать → одиннадцати)."""
|
||||
words = num2words(n, lang="ru", to="cardinal")
|
||||
if case == "nomn":
|
||||
return words
|
||||
parts = words.split()
|
||||
out = []
|
||||
for w in parts:
|
||||
p = _morph.parse(w)[0]
|
||||
infl = p.inflect({case})
|
||||
out.append(infl.word if infl else w)
|
||||
return " ".join(out)
|
||||
|
||||
|
||||
def _hours_word(n: int, case: str) -> str:
|
||||
"""Правильная форма 'час': 1 час, 2-4 часа, 5+ часов — с учётом падежа."""
|
||||
last2 = n % 100
|
||||
last1 = n % 10
|
||||
if 11 <= last2 <= 14:
|
||||
base = "часов"
|
||||
elif last1 == 1:
|
||||
base = "час"
|
||||
elif 2 <= last1 <= 4:
|
||||
base = "часа"
|
||||
else:
|
||||
base = "часов"
|
||||
if case in ("nomn", "accs"):
|
||||
return base
|
||||
p = _morph.parse(base)[0]
|
||||
infl = p.inflect({case, "plur" if base == "часов" else "sing"})
|
||||
return infl.word if infl else base
|
||||
|
||||
|
||||
def _minutes_word(n: int, case: str) -> str:
|
||||
last2 = n % 100
|
||||
last1 = n % 10
|
||||
if 11 <= last2 <= 14:
|
||||
base = "минут"
|
||||
elif last1 == 1:
|
||||
base = "минута"
|
||||
elif 2 <= last1 <= 4:
|
||||
base = "минуты"
|
||||
else:
|
||||
base = "минут"
|
||||
if case in ("nomn", "accs"):
|
||||
return base
|
||||
p = _morph.parse(base)[0]
|
||||
infl = p.inflect({case})
|
||||
return infl.word if infl else base
|
||||
|
||||
|
||||
def _format_time(h: int, mm: int, case: str) -> str:
|
||||
h_words = _inflect_num(h, case, gender="masc")
|
||||
out = f"{h_words} {_hours_word(h, case)}"
|
||||
if mm:
|
||||
m_words = _inflect_num(mm, case, gender="femn")
|
||||
out += f" {m_words} {_minutes_word(mm, case)}"
|
||||
return out
|
||||
|
||||
|
||||
# Единицы измерения со слэшем — раскрываем до чтения слэша
|
||||
UNIT_SLASH = [
|
||||
@@ -29,6 +103,20 @@ def clean_for_speech(text: str) -> str:
|
||||
text = re.sub(r'(^|\s)-(\d)', r'\1минус \2', text)
|
||||
text = re.sub(r'±(\d)', r'плюс-минус \1', text)
|
||||
|
||||
# время "HH:MM" → слова в падеже по предшествующему предлогу
|
||||
def _time_repl(m):
|
||||
prep = (m.group(1) or "").lower()
|
||||
h, mm = int(m.group(2)), int(m.group(3))
|
||||
if not (0 <= h <= 23 and 0 <= mm <= 59):
|
||||
return m.group(0)
|
||||
case = _PREP_CASE.get(prep, "nomn")
|
||||
words = _format_time(h, mm, case)
|
||||
return f"{prep} {words}" if prep else words
|
||||
text = re.sub(
|
||||
r'(?:\b(с|со|до|от|после|около|к|ко|в|во|на|через|за|перед|между|о|об|при)\s+)?(\d{1,2}):(\d{2})\b',
|
||||
_time_repl, text, flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
# дроби и отношения "12/15" → "12 из 15", "5/10" → "5 из 10"
|
||||
text = re.sub(r'(\d+)\s*/\s*(\d+)', r'\1 из \2', text)
|
||||
# одиночный слэш — как союз "или"
|
||||
|
||||
Reference in New Issue
Block a user