Files
smart-home-tablet/app/api/voice/chat/route.ts
Cosmo 130a58637a
Some checks failed
Deploy / deploy (push) Failing after 1m0s
feat: switch voice from Groq to Claude Haiku via ai-proxy
2026-05-01 11:34:29 +00:00

265 lines
8.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
export const dynamic = 'force-dynamic'
export const runtime = 'nodejs'
import { NextResponse } from 'next/server'
import { voiceBus } from '@/lib/voice-bus'
import { systemPrompt } from '@/lib/voice-prompts'
import { TOOL_SCHEMAS, executeTool } from '@/lib/tools/_registry'
import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text'
import {
loadHistory, saveHistory, resetHistory,
HistoryMessage,
} from '@/lib/voice-history'
const MODEL = process.env.CLAUDE_MODEL || 'claude-haiku-4-5'
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
const MAX_TOOL_ROUNDS = 4
const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10)
const AI_PROXY_URL = process.env.AI_PROXY_URL || 'http://192.168.31.103:3301'
const AI_PROXY_KEY = process.env.AI_PROXY_KEY || 'review-bot-proxy-d3ff719d7c87e529909c09fadcbf2748'
// Rate limit
const rateBuckets = new Map<string, { count: number; resetAt: number }>()
function rateLimit(key: string): boolean {
const now = Date.now()
const b = rateBuckets.get(key)
if (!b || b.resetAt <= now) {
rateBuckets.set(key, { count: 1, resetAt: now + 60_000 })
return true
}
if (b.count >= RATE_LIMIT_PER_MINUTE) return false
b.count++
return true
}
let lastSweep = 0
function sweep() {
const now = Date.now()
if (now - lastSweep < 5 * 60_000) return
lastSweep = now
for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k)
}
// Convert OpenAI-style tool schemas to Anthropic format
function toAnthropicTools(tools: any[]): any[] {
return tools.map(t => ({
name: t.function.name,
description: t.function.description || '',
input_schema: t.function.parameters || { type: 'object', properties: {} },
}))
}
// Convert history (OpenAI format) to Anthropic messages format
// History may contain tool calls — need to convert
function historyToAnthropicMessages(history: HistoryMessage[]): any[] {
const result: any[] = []
for (const msg of history) {
if (msg.role === 'system') continue // skip, goes in system field
if (msg.role === 'user') {
result.push({ role: 'user', content: msg.content || '' })
} else if (msg.role === 'assistant') {
if (msg.tool_calls && msg.tool_calls.length > 0) {
// Assistant with tool calls
const content: any[] = []
if (msg.content) content.push({ type: 'text', text: msg.content })
for (const tc of msg.tool_calls) {
let input: any = {}
try { input = JSON.parse(tc.function.arguments) } catch {}
content.push({ type: 'tool_use', id: tc.id, name: tc.function.name, input })
}
result.push({ role: 'assistant', content })
} else {
result.push({ role: 'assistant', content: msg.content || '' })
}
} else if (msg.role === 'tool') {
// Tool result goes as user message
const last = result[result.length - 1]
const toolResultBlock = {
type: 'tool_result',
tool_use_id: msg.tool_call_id,
content: msg.content || '',
}
if (last && last.role === 'user' && Array.isArray(last.content)) {
last.content.push(toolResultBlock)
} else {
result.push({ role: 'user', content: [toolResultBlock] })
}
}
}
return result
}
async function claudeRequest(model: string, system: string, messages: any[], tools?: any[]): Promise<any> {
const body: any = {
model,
max_tokens: MAX_TOKENS,
system,
messages,
}
if (tools && tools.length > 0) {
body.tools = tools
}
const res = await fetch(`${AI_PROXY_URL}/v1/messages`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Proxy-Key': AI_PROXY_KEY,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify(body),
})
if (!res.ok) {
const err = await res.text()
throw new Error(`claude_proxy_${res.status}: ${err}`)
}
return res.json()
}
type AgentId = 'cosmo' | 'lusya'
function emitVoice(event: string, agent: AgentId, text?: string) {
voiceBus.emit('voice', {
event, agent, text,
timestamp: new Date().toISOString(),
})
}
export async function POST(req: Request) {
const cookie = req.headers.get('cookie') || ''
const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i)
const internal = req.headers.get('x-voice-internal') || ''
const fwd = req.headers.get('x-forwarded-for') || ''
const ratekey = tokenMatch?.[1] || (internal ? 'internal' : '') || fwd.split(',')[0].trim() || 'anon'
sweep()
if (!rateLimit(ratekey)) {
return NextResponse.json({ error: 'rate_limited' }, { status: 429 })
}
const body = await req.json().catch(() => null)
if (!body || typeof body.text !== 'string' || !body.text.trim()) {
return NextResponse.json({ error: 'text required' }, { status: 400 })
}
const userText: string = body.text.trim().slice(0, 4000)
const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo'
emitVoice('command', agent, userText)
if (isResetCommand(userText)) {
await resetHistory(agent)
const msg = 'Начинаю новую сессию.'
emitVoice('response', agent, msg)
return NextResponse.json({ text: msg, reset: true })
}
const history = await loadHistory(agent)
const sysPrompt = systemPrompt(agent)
const anthropicTools = toAnthropicTools(TOOL_SCHEMAS as any[])
// Build messages array
const messages: any[] = [
...historyToAnthropicMessages(history),
{ role: 'user', content: userText },
]
let finalText = ''
// Track new turns for history saving
const newTurns: HistoryMessage[] = [{ role: 'user', content: userText }]
try {
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
const t0 = Date.now()
const resp = await claudeRequest(MODEL, sysPrompt, messages, anthropicTools)
console.log(
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
`stop=${resp.stop_reason} · in=${resp.usage?.input_tokens} out=${resp.usage?.output_tokens}`
)
const content: any[] = resp.content || []
const stopReason: string = resp.stop_reason || 'end_turn'
if (stopReason === 'tool_use') {
const toolUseBlocks = content.filter((b: any) => b.type === 'tool_use')
const textBlocks = content.filter((b: any) => b.type === 'text')
const partialText = textBlocks.map((b: any) => b.text).join('')
// Add assistant message to messages
messages.push({ role: 'assistant', content })
newTurns.push({
role: 'assistant',
content: partialText || null,
tool_calls: toolUseBlocks.map((b: any) => ({
id: b.id,
function: { name: b.name, arguments: JSON.stringify(b.input) },
})),
})
// Execute tools and collect results
const toolResults: any[] = []
for (const tb of toolUseBlocks) {
console.log(`[voice/chat] tool ${tb.name}(${JSON.stringify(tb.input).slice(0, 200)})`)
const result = await executeTool(tb.name, tb.input || {}, agent)
toolResults.push({
type: 'tool_result',
tool_use_id: tb.id,
content: JSON.stringify(result),
})
newTurns.push({
role: 'tool',
content: JSON.stringify(result),
tool_call_id: tb.id,
})
}
// Add tool results as user message
messages.push({ role: 'user', content: toolResults })
continue
}
// end_turn — final response
const textBlocks = content.filter((b: any) => b.type === 'text')
finalText = textBlocks.map((b: any) => b.text).join('')
newTurns.push({ role: 'assistant', content: finalText })
break
}
} catch (e: any) {
const errStr = String(e?.message || e)
console.error('[voice/chat] claude error:', errStr)
const msg = 'Что-то сломалось.'
emitVoice('error', agent, msg)
return NextResponse.json({ error: 'llm_failed', detail: errStr, text: msg }, { status: 502 })
}
if (!finalText.trim()) {
const msg = 'Не получил ответ.'
emitVoice('error', agent, msg)
return NextResponse.json({ text: msg }, { status: 200 })
}
// Save history
const updatedHistory: HistoryMessage[] = [...history, ...newTurns]
await saveHistory(agent, updatedHistory)
// Filter any tool call artifacts from text
const filteredText = finalText
.split('\n')
.filter(line => {
const l = line.trim()
return !(
/^(get_|set_|control_|create_|update_|delete_|cancel_)[a-z_]+\s/.test(l) ||
l.startsWith('<function') ||
l.startsWith('function=')
)
})
.join('\n')
.trim()
const cleaned = cleanForSpeech(stripFillers(filteredText || finalText))
emitVoice('response', agent, cleaned)
return NextResponse.json({ text: cleaned })
}