Files
smart-home-tablet/app/api/voice/stt/route.ts
Cosmo eeac2eefb3
All checks were successful
Deploy / deploy (push) Successful in 5m44s
feat(voice): server-side LLM/STT — porting Python satellite into tablet
Шаг 1 миграции голосового стека из home-voice-assistant в сам tablet:

- /api/voice/chat — Claude Haiku 4.5 с tool-loop (max 4 раунда), prompt
  caching на system + старой истории, история в /data/voice-history/.
  Эмитит command/response/error в voice-bus → орб моргает как раньше.
- /api/voice/stt — Groq whisper-large-v3-turbo, multipart или raw audio.
- lib/voice-text.ts — порт clean_for_speech (без pymorphy3, время в
  именительном падеже) и strip_fillers + RESET_PATTERNS.
- lib/voice-executors.ts — tool executors через loopback fetch на
  существующие /api/voice/tools/* и /api/voice/timer.
- Поддержка ANTHROPIC_PROXY/GROQ_PROXY (fallback на HTTPS_PROXY).

После деплоя нужны GROQ_API_KEY и ANTHROPIC_API_KEY в tablet.env.
Шаги 2 (push-to-talk в браузере) и 3 (wake-word) — отдельно.
2026-04-27 08:24:19 +00:00

72 lines
2.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
export const dynamic = 'force-dynamic'
export const runtime = 'nodejs'
import { NextResponse } from 'next/server'
import Groq from 'groq-sdk'
import { HttpsProxyAgent } from 'https-proxy-agent'
import { toFile } from 'groq-sdk/uploads'
const STT_MODEL = process.env.GROQ_STT_MODEL || 'whisper-large-v3-turbo'
let _client: Groq | null = null
function client(): Groq {
if (_client) return _client
const apiKey = process.env.GROQ_API_KEY
if (!apiKey) throw new Error('GROQ_API_KEY not set')
const proxy = process.env.GROQ_PROXY || process.env.HTTPS_PROXY || ''
const httpAgent = proxy ? new HttpsProxyAgent(proxy) : undefined
_client = new Groq({ apiKey, httpAgent })
return _client
}
// Принимает либо multipart/form-data с полем "file",
// либо raw audio в теле (Content-Type: audio/* — например audio/webm).
// Возвращает {text: string}.
export async function POST(req: Request) {
let audio: { name: string; data: Buffer; mime: string }
const ct = req.headers.get('content-type') || ''
try {
if (ct.startsWith('multipart/form-data')) {
const fd = await req.formData()
const file = fd.get('file')
if (!(file instanceof Blob)) {
return NextResponse.json({ error: 'file field required' }, { status: 400 })
}
const ab = await file.arrayBuffer()
audio = {
name: (file as any).name || 'audio.webm',
data: Buffer.from(ab),
mime: file.type || 'audio/webm',
}
} else {
const ab = await req.arrayBuffer()
if (!ab.byteLength) {
return NextResponse.json({ error: 'empty body' }, { status: 400 })
}
audio = {
name: 'audio.webm',
data: Buffer.from(ab),
mime: ct || 'audio/webm',
}
}
} catch (e) {
return NextResponse.json({ error: 'failed_to_read_body' }, { status: 400 })
}
try {
const file = await toFile(audio.data, audio.name, { type: audio.mime })
const result = await client().audio.transcriptions.create({
file,
model: STT_MODEL,
language: 'ru',
})
const text = (result as any).text || ''
return NextResponse.json({ text })
} catch (e: any) {
console.error('[voice/stt] groq error:', e?.message || e)
return NextResponse.json({ error: 'stt_failed', detail: String(e?.message || e) }, { status: 502 })
}
}