feat: switch voice from Groq to Claude Haiku via ai-proxy
Some checks failed
Deploy / deploy (push) Failing after 1m0s
Some checks failed
Deploy / deploy (push) Failing after 1m0s
This commit is contained in:
@@ -2,8 +2,6 @@ export const dynamic = 'force-dynamic'
|
|||||||
export const runtime = 'nodejs'
|
export const runtime = 'nodejs'
|
||||||
|
|
||||||
import { NextResponse } from 'next/server'
|
import { NextResponse } from 'next/server'
|
||||||
import Groq from 'groq-sdk'
|
|
||||||
import { HttpsProxyAgent } from 'https-proxy-agent'
|
|
||||||
|
|
||||||
import { voiceBus } from '@/lib/voice-bus'
|
import { voiceBus } from '@/lib/voice-bus'
|
||||||
import { systemPrompt } from '@/lib/voice-prompts'
|
import { systemPrompt } from '@/lib/voice-prompts'
|
||||||
@@ -14,12 +12,15 @@ import {
|
|||||||
HistoryMessage,
|
HistoryMessage,
|
||||||
} from '@/lib/voice-history'
|
} from '@/lib/voice-history'
|
||||||
|
|
||||||
const MODEL = process.env.GROQ_MODEL || 'llama-3.3-70b-versatile'
|
const MODEL = process.env.CLAUDE_MODEL || 'claude-haiku-4-5'
|
||||||
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
|
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
|
||||||
const MAX_TOOL_ROUNDS = 4
|
const MAX_TOOL_ROUNDS = 4
|
||||||
const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10)
|
const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10)
|
||||||
|
|
||||||
// In-memory rate-limit per IP / cookie (host один — Docker контейнер).
|
const AI_PROXY_URL = process.env.AI_PROXY_URL || 'http://192.168.31.103:3301'
|
||||||
|
const AI_PROXY_KEY = process.env.AI_PROXY_KEY || 'review-bot-proxy-d3ff719d7c87e529909c09fadcbf2748'
|
||||||
|
|
||||||
|
// Rate limit
|
||||||
const rateBuckets = new Map<string, { count: number; resetAt: number }>()
|
const rateBuckets = new Map<string, { count: number; resetAt: number }>()
|
||||||
function rateLimit(key: string): boolean {
|
function rateLimit(key: string): boolean {
|
||||||
const now = Date.now()
|
const now = Date.now()
|
||||||
@@ -40,28 +41,92 @@ function sweep() {
|
|||||||
for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k)
|
for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k)
|
||||||
}
|
}
|
||||||
|
|
||||||
let _client: Groq | null = null
|
// Convert OpenAI-style tool schemas to Anthropic format
|
||||||
function client(): Groq {
|
function toAnthropicTools(tools: any[]): any[] {
|
||||||
if (_client) return _client
|
return tools.map(t => ({
|
||||||
const apiKey = process.env.GROQ_API_KEY
|
name: t.function.name,
|
||||||
if (!apiKey) throw new Error('GROQ_API_KEY not set')
|
description: t.function.description || '',
|
||||||
const proxyUrl = process.env.GROQ_PROXY
|
input_schema: t.function.parameters || { type: 'object', properties: {} },
|
||||||
const httpAgent = proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined
|
}))
|
||||||
_client = new Groq({ apiKey, httpAgent } as any)
|
|
||||||
return _client
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) {
|
// Convert history (OpenAI format) to Anthropic messages format
|
||||||
voiceBus.emit('voice', {
|
// History may contain tool calls — need to convert
|
||||||
event,
|
function historyToAnthropicMessages(history: HistoryMessage[]): any[] {
|
||||||
agent,
|
const result: any[] = []
|
||||||
text,
|
for (const msg of history) {
|
||||||
timestamp: new Date().toISOString(),
|
if (msg.role === 'system') continue // skip, goes in system field
|
||||||
|
if (msg.role === 'user') {
|
||||||
|
result.push({ role: 'user', content: msg.content || '' })
|
||||||
|
} else if (msg.role === 'assistant') {
|
||||||
|
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
||||||
|
// Assistant with tool calls
|
||||||
|
const content: any[] = []
|
||||||
|
if (msg.content) content.push({ type: 'text', text: msg.content })
|
||||||
|
for (const tc of msg.tool_calls) {
|
||||||
|
let input: any = {}
|
||||||
|
try { input = JSON.parse(tc.function.arguments) } catch {}
|
||||||
|
content.push({ type: 'tool_use', id: tc.id, name: tc.function.name, input })
|
||||||
|
}
|
||||||
|
result.push({ role: 'assistant', content })
|
||||||
|
} else {
|
||||||
|
result.push({ role: 'assistant', content: msg.content || '' })
|
||||||
|
}
|
||||||
|
} else if (msg.role === 'tool') {
|
||||||
|
// Tool result goes as user message
|
||||||
|
const last = result[result.length - 1]
|
||||||
|
const toolResultBlock = {
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: msg.tool_call_id,
|
||||||
|
content: msg.content || '',
|
||||||
|
}
|
||||||
|
if (last && last.role === 'user' && Array.isArray(last.content)) {
|
||||||
|
last.content.push(toolResultBlock)
|
||||||
|
} else {
|
||||||
|
result.push({ role: 'user', content: [toolResultBlock] })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
async function claudeRequest(model: string, system: string, messages: any[], tools?: any[]): Promise<any> {
|
||||||
|
const body: any = {
|
||||||
|
model,
|
||||||
|
max_tokens: MAX_TOKENS,
|
||||||
|
system,
|
||||||
|
messages,
|
||||||
|
}
|
||||||
|
if (tools && tools.length > 0) {
|
||||||
|
body.tools = tools
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(`${AI_PROXY_URL}/v1/messages`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Proxy-Key': AI_PROXY_KEY,
|
||||||
|
'anthropic-version': '2023-06-01',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.text()
|
||||||
|
throw new Error(`claude_proxy_${res.status}: ${err}`)
|
||||||
|
}
|
||||||
|
return res.json()
|
||||||
}
|
}
|
||||||
|
|
||||||
type AgentId = 'cosmo' | 'lusya'
|
type AgentId = 'cosmo' | 'lusya'
|
||||||
|
|
||||||
|
function emitVoice(event: string, agent: AgentId, text?: string) {
|
||||||
|
voiceBus.emit('voice', {
|
||||||
|
event, agent, text,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
export async function POST(req: Request) {
|
export async function POST(req: Request) {
|
||||||
const cookie = req.headers.get('cookie') || ''
|
const cookie = req.headers.get('cookie') || ''
|
||||||
const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i)
|
const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i)
|
||||||
@@ -89,104 +154,84 @@ export async function POST(req: Request) {
|
|||||||
return NextResponse.json({ text: msg, reset: true })
|
return NextResponse.json({ text: msg, reset: true })
|
||||||
}
|
}
|
||||||
|
|
||||||
// Загружаем историю и строим messages для Groq (OpenAI-compatible format)
|
|
||||||
const history = await loadHistory(agent)
|
const history = await loadHistory(agent)
|
||||||
|
const sysPrompt = systemPrompt(agent)
|
||||||
|
const anthropicTools = toAnthropicTools(TOOL_SCHEMAS as any[])
|
||||||
|
|
||||||
// Системный prompt + история + новый user message
|
// Build messages array
|
||||||
const apiMessages: any[] = [
|
const messages: any[] = [
|
||||||
{ role: 'system', content: systemPrompt(agent) },
|
...historyToAnthropicMessages(history),
|
||||||
...history,
|
|
||||||
{ role: 'user', content: userText },
|
{ role: 'user', content: userText },
|
||||||
]
|
]
|
||||||
|
|
||||||
let finalText = ''
|
let finalText = ''
|
||||||
const historyStartLen = apiMessages.length // позиция после которой добавляем новые turns
|
// Track new turns for history saving
|
||||||
|
const newTurns: HistoryMessage[] = [{ role: 'user', content: userText }]
|
||||||
// Защита от tool-cycling
|
|
||||||
let lastToolSig = ''
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const c = client()
|
|
||||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
||||||
const t0 = Date.now()
|
const t0 = Date.now()
|
||||||
const resp = await c.chat.completions.create({
|
const resp = await claudeRequest(MODEL, sysPrompt, messages, anthropicTools)
|
||||||
model: MODEL,
|
|
||||||
max_tokens: MAX_TOKENS,
|
|
||||||
messages: apiMessages,
|
|
||||||
tools: TOOL_SCHEMAS as any,
|
|
||||||
tool_choice: 'auto',
|
|
||||||
})
|
|
||||||
|
|
||||||
const choice = resp.choices[0]
|
|
||||||
const msg = choice.message
|
|
||||||
const usage = resp.usage as any
|
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
|
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
|
||||||
`stop=${choice.finish_reason} · in=${usage?.prompt_tokens} out=${usage?.completion_tokens}`
|
`stop=${resp.stop_reason} · in=${resp.usage?.input_tokens} out=${resp.usage?.output_tokens}`
|
||||||
)
|
)
|
||||||
|
|
||||||
// Добавляем assistant message в messages
|
const content: any[] = resp.content || []
|
||||||
apiMessages.push(msg)
|
const stopReason: string = resp.stop_reason || 'end_turn'
|
||||||
|
|
||||||
if (choice.finish_reason === 'tool_calls' && msg.tool_calls?.length) {
|
if (stopReason === 'tool_use') {
|
||||||
const toolCalls = msg.tool_calls
|
const toolUseBlocks = content.filter((b: any) => b.type === 'tool_use')
|
||||||
|
const textBlocks = content.filter((b: any) => b.type === 'text')
|
||||||
|
const partialText = textBlocks.map((b: any) => b.text).join('')
|
||||||
|
|
||||||
// Loop-guard: сигнатура текущего раунда
|
// Add assistant message to messages
|
||||||
const sig = toolCalls
|
messages.push({ role: 'assistant', content })
|
||||||
.map((tc: any) => `${tc.function.name}:${tc.function.arguments}`)
|
newTurns.push({
|
||||||
.sort()
|
role: 'assistant',
|
||||||
.join('|')
|
content: partialText || null,
|
||||||
if (sig === lastToolSig) {
|
tool_calls: toolUseBlocks.map((b: any) => ({
|
||||||
console.warn('[voice/chat] tool cycle detected, breaking loop')
|
id: b.id,
|
||||||
finalText += '\nНе получилось выполнить запрос.'
|
function: { name: b.name, arguments: JSON.stringify(b.input) },
|
||||||
break
|
})),
|
||||||
}
|
})
|
||||||
lastToolSig = sig
|
|
||||||
|
|
||||||
// Выполняем все tool calls и добавляем результаты
|
// Execute tools and collect results
|
||||||
for (const tc of toolCalls) {
|
const toolResults: any[] = []
|
||||||
console.log(`[voice/chat] tool ${tc.function.name}(${tc.function.arguments.slice(0, 200)})`)
|
for (const tb of toolUseBlocks) {
|
||||||
let args: any = {}
|
console.log(`[voice/chat] tool ${tb.name}(${JSON.stringify(tb.input).slice(0, 200)})`)
|
||||||
try { args = JSON.parse(tc.function.arguments) } catch (_) {}
|
const result = await executeTool(tb.name, tb.input || {}, agent)
|
||||||
const result = await executeTool(tc.function.name, args, agent)
|
toolResults.push({
|
||||||
apiMessages.push({
|
type: 'tool_result',
|
||||||
role: 'tool',
|
tool_use_id: tb.id,
|
||||||
tool_call_id: tc.id,
|
|
||||||
content: JSON.stringify(result),
|
content: JSON.stringify(result),
|
||||||
})
|
})
|
||||||
|
newTurns.push({
|
||||||
|
role: 'tool',
|
||||||
|
content: JSON.stringify(result),
|
||||||
|
tool_call_id: tb.id,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add tool results as user message
|
||||||
|
messages.push({ role: 'user', content: toolResults })
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Финальный ответ
|
// end_turn — final response
|
||||||
finalText = msg.content || ''
|
const textBlocks = content.filter((b: any) => b.type === 'text')
|
||||||
|
finalText = textBlocks.map((b: any) => b.text).join('')
|
||||||
|
|
||||||
|
newTurns.push({ role: 'assistant', content: finalText })
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
const errStr = String(e?.message || e)
|
const errStr = String(e?.message || e)
|
||||||
console.error('[voice/chat] groq error:', errStr)
|
console.error('[voice/chat] claude error:', errStr)
|
||||||
|
const msg = 'Что-то сломалось.'
|
||||||
// tool_use_failed: модель неправильно сформировала tool call — повторить без tools
|
emitVoice('error', agent, msg)
|
||||||
if (errStr.includes('tool_use_failed') || errStr.includes('Failed to call a function')) {
|
return NextResponse.json({ error: 'llm_failed', detail: errStr, text: msg }, { status: 502 })
|
||||||
try {
|
|
||||||
const c2 = client()
|
|
||||||
const fallback = await c2.chat.completions.create({
|
|
||||||
model: MODEL,
|
|
||||||
max_tokens: MAX_TOKENS,
|
|
||||||
messages: apiMessages.slice(0, historyStartLen + 1),
|
|
||||||
})
|
|
||||||
finalText = fallback.choices[0]?.message?.content || ''
|
|
||||||
console.log('[voice/chat] tool_use_failed fallback ok')
|
|
||||||
} catch (e2) {
|
|
||||||
console.error('[voice/chat] fallback failed:', e2)
|
|
||||||
finalText = 'Не удалось выполнить запрос.'
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const msg = 'Что-то сломалось.'
|
|
||||||
emitVoice('error', agent, msg)
|
|
||||||
return NextResponse.json({ error: 'llm_failed', detail: errStr, text: msg }, { status: 502 })
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!finalText.trim()) {
|
if (!finalText.trim()) {
|
||||||
@@ -195,22 +240,20 @@ export async function POST(req: Request) {
|
|||||||
return NextResponse.json({ text: msg }, { status: 200 })
|
return NextResponse.json({ text: msg }, { status: 200 })
|
||||||
}
|
}
|
||||||
|
|
||||||
// Сохраняем новые turns в историю (без системного prompt'а)
|
// Save history
|
||||||
const newTurns = apiMessages.slice(historyStartLen)
|
const updatedHistory: HistoryMessage[] = [...history, ...newTurns]
|
||||||
const updatedHistory: HistoryMessage[] = [
|
|
||||||
...history,
|
|
||||||
...newTurns.map((m: any) => ({ role: m.role, content: m.content ?? null, tool_calls: m.tool_calls, tool_call_id: m.tool_call_id } as HistoryMessage)),
|
|
||||||
]
|
|
||||||
await saveHistory(agent, updatedHistory)
|
await saveHistory(agent, updatedHistory)
|
||||||
|
|
||||||
// Убрать строки вида "get_weather ..." или "<function=...>" которые иногда генерирует LLM
|
// Filter any tool call artifacts from text
|
||||||
const filteredText = finalText
|
const filteredText = finalText
|
||||||
.split('\n')
|
.split('\n')
|
||||||
.filter(line => {
|
.filter(line => {
|
||||||
const l = line.trim()
|
const l = line.trim()
|
||||||
return !(/^(get_|set_|control_|create_|update_|delete_|cancel_)[a-z_]+\s/.test(l) ||
|
return !(
|
||||||
l.startsWith('<function') ||
|
/^(get_|set_|control_|create_|update_|delete_|cancel_)[a-z_]+\s/.test(l) ||
|
||||||
l.startsWith('function='))
|
l.startsWith('<function') ||
|
||||||
|
l.startsWith('function=')
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.join('\n')
|
.join('\n')
|
||||||
.trim()
|
.trim()
|
||||||
|
|||||||
@@ -121,7 +121,6 @@ export default function VoiceController() {
|
|||||||
model: 'v5',
|
model: 'v5',
|
||||||
baseAssetPath: '/vad/',
|
baseAssetPath: '/vad/',
|
||||||
onnxWASMBasePath: '/vad/',
|
onnxWASMBasePath: '/vad/',
|
||||||
logLevel: 'error',
|
|
||||||
ortConfig: (ort: any) => {
|
ortConfig: (ort: any) => {
|
||||||
ort.env.wasm.numThreads = 1
|
ort.env.wasm.numThreads = 1
|
||||||
ort.env.wasm.simd = true
|
ort.env.wasm.simd = true
|
||||||
|
|||||||
Reference in New Issue
Block a user