feat(voice): server-side LLM/STT — porting Python satellite into tablet
All checks were successful
Deploy / deploy (push) Successful in 5m44s
All checks were successful
Deploy / deploy (push) Successful in 5m44s
Шаг 1 миграции голосового стека из home-voice-assistant в сам tablet: - /api/voice/chat — Claude Haiku 4.5 с tool-loop (max 4 раунда), prompt caching на system + старой истории, история в /data/voice-history/. Эмитит command/response/error в voice-bus → орб моргает как раньше. - /api/voice/stt — Groq whisper-large-v3-turbo, multipart или raw audio. - lib/voice-text.ts — порт clean_for_speech (без pymorphy3, время в именительном падеже) и strip_fillers + RESET_PATTERNS. - lib/voice-executors.ts — tool executors через loopback fetch на существующие /api/voice/tools/* и /api/voice/timer. - Поддержка ANTHROPIC_PROXY/GROQ_PROXY (fallback на HTTPS_PROXY). После деплоя нужны GROQ_API_KEY и ANTHROPIC_API_KEY в tablet.env. Шаги 2 (push-to-talk в браузере) и 3 (wake-word) — отдельно.
This commit is contained in:
155
app/api/voice/chat/route.ts
Normal file
155
app/api/voice/chat/route.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
export const dynamic = 'force-dynamic'
|
||||
export const runtime = 'nodejs'
|
||||
|
||||
import { NextResponse } from 'next/server'
|
||||
import Anthropic from '@anthropic-ai/sdk'
|
||||
import { ProxyAgent } from 'undici'
|
||||
|
||||
import { voiceBus } from '@/lib/voice-bus'
|
||||
import { systemPrompt } from '@/lib/voice-prompts'
|
||||
import { TOOL_SCHEMAS } from '@/lib/voice-tool-schemas'
|
||||
import { executeTool } from '@/lib/voice-executors'
|
||||
import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text'
|
||||
import {
|
||||
loadHistory, saveHistory, resetHistory,
|
||||
buildMessagesWithCache, stripCacheControl, HistoryMessage,
|
||||
} from '@/lib/voice-history'
|
||||
|
||||
const MODEL = process.env.ANTHROPIC_MODEL || 'claude-haiku-4-5'
|
||||
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
|
||||
const MAX_TOOL_ROUNDS = 4
|
||||
|
||||
let _client: Anthropic | null = null
|
||||
function client(): Anthropic {
|
||||
if (_client) return _client
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY
|
||||
if (!apiKey) throw new Error('ANTHROPIC_API_KEY not set')
|
||||
const proxy = process.env.ANTHROPIC_PROXY || process.env.HTTPS_PROXY || ''
|
||||
const fetchOptions = proxy
|
||||
? ({ dispatcher: new ProxyAgent(proxy) } as any)
|
||||
: undefined
|
||||
_client = new Anthropic({ apiKey, fetchOptions })
|
||||
return _client
|
||||
}
|
||||
|
||||
function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) {
|
||||
voiceBus.emit('voice', {
|
||||
event,
|
||||
agent,
|
||||
text,
|
||||
timestamp: new Date().toISOString(),
|
||||
})
|
||||
}
|
||||
|
||||
type AgentId = 'cosmo' | 'lusya'
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const body = await req.json().catch(() => null)
|
||||
if (!body || typeof body.text !== 'string' || !body.text.trim()) {
|
||||
return NextResponse.json({ error: 'text required' }, { status: 400 })
|
||||
}
|
||||
const userText: string = body.text.trim()
|
||||
const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo'
|
||||
|
||||
// Echo command в орб
|
||||
emitVoice('command', agent, userText)
|
||||
|
||||
// Reset-команда — стираем историю и отвечаем шаблонно
|
||||
if (isResetCommand(userText)) {
|
||||
await resetHistory(agent)
|
||||
const msg = 'Начинаю новую сессию.'
|
||||
emitVoice('response', agent, msg)
|
||||
return NextResponse.json({ text: msg, reset: true })
|
||||
}
|
||||
|
||||
// Загружаем историю и добавляем новый user-turn
|
||||
const history = await loadHistory(agent)
|
||||
history.push({ role: 'user', content: userText })
|
||||
|
||||
const systemBlocks: Anthropic.TextBlockParam[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: systemPrompt(agent),
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
]
|
||||
|
||||
const apiMessages: Anthropic.MessageParam[] = buildMessagesWithCache(history) as any
|
||||
|
||||
let finalText = ''
|
||||
const initialUserIdx = history.length - 1
|
||||
|
||||
try {
|
||||
const c = client()
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
||||
const t0 = Date.now()
|
||||
const resp = await c.messages.create({
|
||||
model: MODEL,
|
||||
max_tokens: MAX_TOKENS,
|
||||
system: systemBlocks,
|
||||
messages: apiMessages,
|
||||
tools: TOOL_SCHEMAS,
|
||||
})
|
||||
|
||||
const usage = resp.usage as any
|
||||
console.log(
|
||||
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
|
||||
`stop=${resp.stop_reason} · in=${usage?.input_tokens} out=${usage?.output_tokens} ` +
|
||||
`cache_r=${usage?.cache_read_input_tokens || 0} cache_w=${usage?.cache_creation_input_tokens || 0}`
|
||||
)
|
||||
|
||||
// Разбираем content на text + tool_use
|
||||
const toolUses: Anthropic.ToolUseBlock[] = []
|
||||
for (const block of resp.content) {
|
||||
if (block.type === 'text') finalText += block.text
|
||||
else if (block.type === 'tool_use') toolUses.push(block)
|
||||
}
|
||||
|
||||
// Сохраняем assistant turn в API messages как есть (важно для tool_use_id)
|
||||
apiMessages.push({ role: 'assistant', content: resp.content as any })
|
||||
|
||||
if (resp.stop_reason === 'tool_use' && toolUses.length) {
|
||||
const toolResults: Anthropic.ToolResultBlockParam[] = []
|
||||
for (const tu of toolUses) {
|
||||
console.log(`[voice/chat] tool ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`)
|
||||
const result = await executeTool(tu.name, tu.input, agent)
|
||||
toolResults.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: tu.id,
|
||||
content: JSON.stringify(result),
|
||||
})
|
||||
}
|
||||
apiMessages.push({ role: 'user', content: toolResults })
|
||||
continue
|
||||
}
|
||||
|
||||
// end_turn / max_tokens / stop_sequence — финальный ответ готов
|
||||
break
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.error('[voice/chat] anthropic error:', e?.message || e)
|
||||
const msg = 'Что-то сломалось.'
|
||||
emitVoice('error', agent, msg)
|
||||
return NextResponse.json({ error: 'llm_failed', detail: String(e?.message || e), text: msg }, { status: 502 })
|
||||
}
|
||||
|
||||
if (!finalText.trim()) {
|
||||
const msg = 'Не получил ответ.'
|
||||
emitVoice('error', agent, msg)
|
||||
return NextResponse.json({ text: msg }, { status: 200 })
|
||||
}
|
||||
|
||||
// Сохраняем все turn'ы после initial user (включая tool_use / tool_result)
|
||||
const newTurns = apiMessages.slice(initialUserIdx + 1)
|
||||
for (const turn of newTurns) {
|
||||
history.push({
|
||||
role: turn.role as 'user' | 'assistant',
|
||||
content: stripCacheControl(turn.content),
|
||||
} as HistoryMessage)
|
||||
}
|
||||
await saveHistory(agent, history)
|
||||
|
||||
const cleaned = cleanForSpeech(stripFillers(finalText))
|
||||
emitVoice('response', agent, cleaned)
|
||||
return NextResponse.json({ text: cleaned })
|
||||
}
|
||||
Reference in New Issue
Block a user