Files
smart-home-tablet/app/api/voice/chat/route.ts
Cosmo eeac2eefb3
All checks were successful
Deploy / deploy (push) Successful in 5m44s
feat(voice): server-side LLM/STT — porting Python satellite into tablet
Шаг 1 миграции голосового стека из home-voice-assistant в сам tablet:

- /api/voice/chat — Claude Haiku 4.5 с tool-loop (max 4 раунда), prompt
  caching на system + старой истории, история в /data/voice-history/.
  Эмитит command/response/error в voice-bus → орб моргает как раньше.
- /api/voice/stt — Groq whisper-large-v3-turbo, multipart или raw audio.
- lib/voice-text.ts — порт clean_for_speech (без pymorphy3, время в
  именительном падеже) и strip_fillers + RESET_PATTERNS.
- lib/voice-executors.ts — tool executors через loopback fetch на
  существующие /api/voice/tools/* и /api/voice/timer.
- Поддержка ANTHROPIC_PROXY/GROQ_PROXY (fallback на HTTPS_PROXY).

После деплоя нужны GROQ_API_KEY и ANTHROPIC_API_KEY в tablet.env.
Шаги 2 (push-to-talk в браузере) и 3 (wake-word) — отдельно.
2026-04-27 08:24:19 +00:00

156 lines
5.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
export const dynamic = 'force-dynamic'
export const runtime = 'nodejs'
import { NextResponse } from 'next/server'
import Anthropic from '@anthropic-ai/sdk'
import { ProxyAgent } from 'undici'
import { voiceBus } from '@/lib/voice-bus'
import { systemPrompt } from '@/lib/voice-prompts'
import { TOOL_SCHEMAS } from '@/lib/voice-tool-schemas'
import { executeTool } from '@/lib/voice-executors'
import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text'
import {
loadHistory, saveHistory, resetHistory,
buildMessagesWithCache, stripCacheControl, HistoryMessage,
} from '@/lib/voice-history'
const MODEL = process.env.ANTHROPIC_MODEL || 'claude-haiku-4-5'
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
const MAX_TOOL_ROUNDS = 4
let _client: Anthropic | null = null
function client(): Anthropic {
if (_client) return _client
const apiKey = process.env.ANTHROPIC_API_KEY
if (!apiKey) throw new Error('ANTHROPIC_API_KEY not set')
const proxy = process.env.ANTHROPIC_PROXY || process.env.HTTPS_PROXY || ''
const fetchOptions = proxy
? ({ dispatcher: new ProxyAgent(proxy) } as any)
: undefined
_client = new Anthropic({ apiKey, fetchOptions })
return _client
}
function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) {
voiceBus.emit('voice', {
event,
agent,
text,
timestamp: new Date().toISOString(),
})
}
type AgentId = 'cosmo' | 'lusya'
export async function POST(req: Request) {
const body = await req.json().catch(() => null)
if (!body || typeof body.text !== 'string' || !body.text.trim()) {
return NextResponse.json({ error: 'text required' }, { status: 400 })
}
const userText: string = body.text.trim()
const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo'
// Echo command в орб
emitVoice('command', agent, userText)
// Reset-команда — стираем историю и отвечаем шаблонно
if (isResetCommand(userText)) {
await resetHistory(agent)
const msg = 'Начинаю новую сессию.'
emitVoice('response', agent, msg)
return NextResponse.json({ text: msg, reset: true })
}
// Загружаем историю и добавляем новый user-turn
const history = await loadHistory(agent)
history.push({ role: 'user', content: userText })
const systemBlocks: Anthropic.TextBlockParam[] = [
{
type: 'text',
text: systemPrompt(agent),
cache_control: { type: 'ephemeral' },
},
]
const apiMessages: Anthropic.MessageParam[] = buildMessagesWithCache(history) as any
let finalText = ''
const initialUserIdx = history.length - 1
try {
const c = client()
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
const t0 = Date.now()
const resp = await c.messages.create({
model: MODEL,
max_tokens: MAX_TOKENS,
system: systemBlocks,
messages: apiMessages,
tools: TOOL_SCHEMAS,
})
const usage = resp.usage as any
console.log(
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
`stop=${resp.stop_reason} · in=${usage?.input_tokens} out=${usage?.output_tokens} ` +
`cache_r=${usage?.cache_read_input_tokens || 0} cache_w=${usage?.cache_creation_input_tokens || 0}`
)
// Разбираем content на text + tool_use
const toolUses: Anthropic.ToolUseBlock[] = []
for (const block of resp.content) {
if (block.type === 'text') finalText += block.text
else if (block.type === 'tool_use') toolUses.push(block)
}
// Сохраняем assistant turn в API messages как есть (важно для tool_use_id)
apiMessages.push({ role: 'assistant', content: resp.content as any })
if (resp.stop_reason === 'tool_use' && toolUses.length) {
const toolResults: Anthropic.ToolResultBlockParam[] = []
for (const tu of toolUses) {
console.log(`[voice/chat] tool ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`)
const result = await executeTool(tu.name, tu.input, agent)
toolResults.push({
type: 'tool_result',
tool_use_id: tu.id,
content: JSON.stringify(result),
})
}
apiMessages.push({ role: 'user', content: toolResults })
continue
}
// end_turn / max_tokens / stop_sequence — финальный ответ готов
break
}
} catch (e: any) {
console.error('[voice/chat] anthropic error:', e?.message || e)
const msg = 'Что-то сломалось.'
emitVoice('error', agent, msg)
return NextResponse.json({ error: 'llm_failed', detail: String(e?.message || e), text: msg }, { status: 502 })
}
if (!finalText.trim()) {
const msg = 'Не получил ответ.'
emitVoice('error', agent, msg)
return NextResponse.json({ text: msg }, { status: 200 })
}
// Сохраняем все turn'ы после initial user (включая tool_use / tool_result)
const newTurns = apiMessages.slice(initialUserIdx + 1)
for (const turn of newTurns) {
history.push({
role: turn.role as 'user' | 'assistant',
content: stripCacheControl(turn.content),
} as HistoryMessage)
}
await saveHistory(agent, history)
const cleaned = cleanForSpeech(stripFillers(finalText))
emitVoice('response', agent, cleaned)
return NextResponse.json({ text: cleaned })
}