191 lines
6.6 KiB
TypeScript
191 lines
6.6 KiB
TypeScript
export const dynamic = 'force-dynamic'
|
||
export const runtime = 'nodejs'
|
||
|
||
import { NextResponse } from 'next/server'
|
||
import Groq from 'groq-sdk'
|
||
import { HttpsProxyAgent } from 'https-proxy-agent'
|
||
|
||
import { voiceBus } from '@/lib/voice-bus'
|
||
import { systemPrompt } from '@/lib/voice-prompts'
|
||
import { TOOL_SCHEMAS, executeTool } from '@/lib/tools/_registry'
|
||
import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text'
|
||
import {
|
||
loadHistory, saveHistory, resetHistory,
|
||
HistoryMessage,
|
||
} from '@/lib/voice-history'
|
||
|
||
const MODEL = process.env.GROQ_MODEL || 'llama-3.3-70b-versatile'
|
||
const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
|
||
const MAX_TOOL_ROUNDS = 4
|
||
const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10)
|
||
|
||
// In-memory rate-limit per IP / cookie (host один — Docker контейнер).
|
||
const rateBuckets = new Map<string, { count: number; resetAt: number }>()
|
||
function rateLimit(key: string): boolean {
|
||
const now = Date.now()
|
||
const b = rateBuckets.get(key)
|
||
if (!b || b.resetAt <= now) {
|
||
rateBuckets.set(key, { count: 1, resetAt: now + 60_000 })
|
||
return true
|
||
}
|
||
if (b.count >= RATE_LIMIT_PER_MINUTE) return false
|
||
b.count++
|
||
return true
|
||
}
|
||
let lastSweep = 0
|
||
function sweep() {
|
||
const now = Date.now()
|
||
if (now - lastSweep < 5 * 60_000) return
|
||
lastSweep = now
|
||
for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k)
|
||
}
|
||
|
||
let _client: Groq | null = null
|
||
function client(): Groq {
|
||
if (_client) return _client
|
||
const apiKey = process.env.GROQ_API_KEY
|
||
if (!apiKey) throw new Error('GROQ_API_KEY not set')
|
||
const proxyUrl = process.env.GROQ_PROXY
|
||
const httpAgent = proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined
|
||
_client = new Groq({ apiKey, httpAgent } as any)
|
||
return _client
|
||
}
|
||
|
||
function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) {
|
||
voiceBus.emit('voice', {
|
||
event,
|
||
agent,
|
||
text,
|
||
timestamp: new Date().toISOString(),
|
||
})
|
||
}
|
||
|
||
type AgentId = 'cosmo' | 'lusya'
|
||
|
||
export async function POST(req: Request) {
|
||
const cookie = req.headers.get('cookie') || ''
|
||
const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i)
|
||
const internal = req.headers.get('x-voice-internal') || ''
|
||
const fwd = req.headers.get('x-forwarded-for') || ''
|
||
const ratekey = tokenMatch?.[1] || (internal ? 'internal' : '') || fwd.split(',')[0].trim() || 'anon'
|
||
sweep()
|
||
if (!rateLimit(ratekey)) {
|
||
return NextResponse.json({ error: 'rate_limited' }, { status: 429 })
|
||
}
|
||
|
||
const body = await req.json().catch(() => null)
|
||
if (!body || typeof body.text !== 'string' || !body.text.trim()) {
|
||
return NextResponse.json({ error: 'text required' }, { status: 400 })
|
||
}
|
||
const userText: string = body.text.trim().slice(0, 4000)
|
||
const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo'
|
||
|
||
emitVoice('command', agent, userText)
|
||
|
||
if (isResetCommand(userText)) {
|
||
await resetHistory(agent)
|
||
const msg = 'Начинаю новую сессию.'
|
||
emitVoice('response', agent, msg)
|
||
return NextResponse.json({ text: msg, reset: true })
|
||
}
|
||
|
||
// Загружаем историю и строим messages для Groq (OpenAI-compatible format)
|
||
const history = await loadHistory(agent)
|
||
|
||
// Системный prompt + история + новый user message
|
||
const apiMessages: any[] = [
|
||
{ role: 'system', content: systemPrompt(agent) },
|
||
...history,
|
||
{ role: 'user', content: userText },
|
||
]
|
||
|
||
let finalText = ''
|
||
const historyStartLen = apiMessages.length // позиция после которой добавляем новые turns
|
||
|
||
// Защита от tool-cycling
|
||
let lastToolSig = ''
|
||
|
||
try {
|
||
const c = client()
|
||
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
||
const t0 = Date.now()
|
||
const resp = await c.chat.completions.create({
|
||
model: MODEL,
|
||
max_tokens: MAX_TOKENS,
|
||
messages: apiMessages,
|
||
tools: TOOL_SCHEMAS as any,
|
||
tool_choice: 'auto',
|
||
})
|
||
|
||
const choice = resp.choices[0]
|
||
const msg = choice.message
|
||
const usage = resp.usage as any
|
||
|
||
console.log(
|
||
`[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
|
||
`stop=${choice.finish_reason} · in=${usage?.prompt_tokens} out=${usage?.completion_tokens}`
|
||
)
|
||
|
||
// Добавляем assistant message в messages
|
||
apiMessages.push(msg)
|
||
|
||
if (choice.finish_reason === 'tool_calls' && msg.tool_calls?.length) {
|
||
const toolCalls = msg.tool_calls
|
||
|
||
// Loop-guard: сигнатура текущего раунда
|
||
const sig = toolCalls
|
||
.map((tc: any) => `${tc.function.name}:${tc.function.arguments}`)
|
||
.sort()
|
||
.join('|')
|
||
if (sig === lastToolSig) {
|
||
console.warn('[voice/chat] tool cycle detected, breaking loop')
|
||
finalText += '\nНе получилось выполнить запрос.'
|
||
break
|
||
}
|
||
lastToolSig = sig
|
||
|
||
// Выполняем все tool calls и добавляем результаты
|
||
for (const tc of toolCalls) {
|
||
console.log(`[voice/chat] tool ${tc.function.name}(${tc.function.arguments.slice(0, 200)})`)
|
||
let args: any = {}
|
||
try { args = JSON.parse(tc.function.arguments) } catch (_) {}
|
||
const result = await executeTool(tc.function.name, args, agent)
|
||
apiMessages.push({
|
||
role: 'tool',
|
||
tool_call_id: tc.id,
|
||
content: JSON.stringify(result),
|
||
})
|
||
}
|
||
continue
|
||
}
|
||
|
||
// Финальный ответ
|
||
finalText = msg.content || ''
|
||
break
|
||
}
|
||
} catch (e: any) {
|
||
console.error('[voice/chat] groq error:', e?.message || e)
|
||
const msg = 'Что-то сломалось.'
|
||
emitVoice('error', agent, msg)
|
||
return NextResponse.json({ error: 'llm_failed', detail: String(e?.message || e), text: msg }, { status: 502 })
|
||
}
|
||
|
||
if (!finalText.trim()) {
|
||
const msg = 'Не получил ответ.'
|
||
emitVoice('error', agent, msg)
|
||
return NextResponse.json({ text: msg }, { status: 200 })
|
||
}
|
||
|
||
// Сохраняем новые turns в историю (без системного prompt'а)
|
||
const newTurns = apiMessages.slice(historyStartLen)
|
||
const updatedHistory: HistoryMessage[] = [
|
||
...history,
|
||
...newTurns.map((m: any) => ({ role: m.role, content: m.content ?? null, tool_calls: m.tool_calls, tool_call_id: m.tool_call_id } as HistoryMessage)),
|
||
]
|
||
await saveHistory(agent, updatedHistory)
|
||
|
||
const cleaned = cleanForSpeech(stripFillers(finalText))
|
||
emitVoice('response', agent, cleaned)
|
||
return NextResponse.json({ text: cleaned })
|
||
}
|