export const dynamic = 'force-dynamic' export const runtime = 'nodejs' import { NextResponse } from 'next/server' import Anthropic from '@anthropic-ai/sdk' import { ProxyAgent } from 'undici' import { voiceBus } from '@/lib/voice-bus' import { systemPrompt } from '@/lib/voice-prompts' import { TOOL_SCHEMAS } from '@/lib/voice-tool-schemas' import { executeTool } from '@/lib/voice-executors' import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text' import { loadHistory, saveHistory, resetHistory, buildMessagesWithCache, stripCacheControl, HistoryMessage, } from '@/lib/voice-history' const MODEL = process.env.ANTHROPIC_MODEL || 'claude-haiku-4-5' const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10) const MAX_TOOL_ROUNDS = 4 const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10) // In-memory rate-limit per IP / cookie (host один — Docker контейнер). // Защита от случайного бесконечного цикла или утечки PIN: даже если // auth_token утечёт, вызов /api/voice/chat будет ограничен. const rateBuckets = new Map() function rateLimit(key: string): boolean { const now = Date.now() const b = rateBuckets.get(key) if (!b || b.resetAt <= now) { rateBuckets.set(key, { count: 1, resetAt: now + 60_000 }) return true } if (b.count >= RATE_LIMIT_PER_MINUTE) return false b.count++ return true } // Гигиена: чистим старые бакеты периодически (раз в 5 минут максимум). let lastSweep = 0 function sweep() { const now = Date.now() if (now - lastSweep < 5 * 60_000) return lastSweep = now for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k) } let _client: Anthropic | null = null function client(): Anthropic { if (_client) return _client const apiKey = process.env.ANTHROPIC_API_KEY if (!apiKey) throw new Error('ANTHROPIC_API_KEY not set') const proxy = process.env.ANTHROPIC_PROXY || process.env.HTTPS_PROXY || '' const fetchOptions = proxy ? ({ dispatcher: new ProxyAgent(proxy) } as any) : undefined _client = new Anthropic({ apiKey, fetchOptions }) return _client } function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) { voiceBus.emit('voice', { event, agent, text, timestamp: new Date().toISOString(), }) } type AgentId = 'cosmo' | 'lusya' export async function POST(req: Request) { // Rate-limit по auth_token (или x-voice-internal — для loopback'а от tools). // Идентифицируем клиента: cookie auth_token > x-voice-internal > IP > 'anon'. const cookie = req.headers.get('cookie') || '' const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i) const internal = req.headers.get('x-voice-internal') || '' const fwd = req.headers.get('x-forwarded-for') || '' const ratekey = tokenMatch?.[1] || (internal ? 'internal' : '') || fwd.split(',')[0].trim() || 'anon' sweep() if (!rateLimit(ratekey)) { return NextResponse.json({ error: 'rate_limited' }, { status: 429 }) } const body = await req.json().catch(() => null) if (!body || typeof body.text !== 'string' || !body.text.trim()) { return NextResponse.json({ error: 'text required' }, { status: 400 }) } const userText: string = body.text.trim().slice(0, 4000) // защита от gigantic prompts const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo' // Echo command в орб emitVoice('command', agent, userText) // Reset-команда — стираем историю и отвечаем шаблонно if (isResetCommand(userText)) { await resetHistory(agent) const msg = 'Начинаю новую сессию.' emitVoice('response', agent, msg) return NextResponse.json({ text: msg, reset: true }) } // Загружаем историю и добавляем новый user-turn const history = await loadHistory(agent) history.push({ role: 'user', content: userText }) const systemBlocks: Anthropic.TextBlockParam[] = [ { type: 'text', text: systemPrompt(agent), cache_control: { type: 'ephemeral' }, }, ] const apiMessages: Anthropic.MessageParam[] = buildMessagesWithCache(history) as any let finalText = '' const initialUserIdx = history.length - 1 // Защита от tool-cycling: запоминаем последний (name, args) — если LLM // дважды подряд просит одно и то же, прерываем цикл. let lastToolSig = '' try { const c = client() for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { const t0 = Date.now() const resp = await c.messages.create({ model: MODEL, max_tokens: MAX_TOKENS, system: systemBlocks, messages: apiMessages, tools: TOOL_SCHEMAS, }) const usage = resp.usage as any console.log( `[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` + `stop=${resp.stop_reason} · in=${usage?.input_tokens} out=${usage?.output_tokens} ` + `cache_r=${usage?.cache_read_input_tokens || 0} cache_w=${usage?.cache_creation_input_tokens || 0}` ) // Разбираем content на text + tool_use const toolUses: Anthropic.ToolUseBlock[] = [] for (const block of resp.content) { if (block.type === 'text') finalText += block.text else if (block.type === 'tool_use') toolUses.push(block) } // Сохраняем assistant turn в API messages как есть (важно для tool_use_id) apiMessages.push({ role: 'assistant', content: resp.content as any }) if (resp.stop_reason === 'tool_use' && toolUses.length) { // Сигнатура текущего раунда — для loop-guard. const sig = toolUses .map((t) => `${t.name}:${JSON.stringify(t.input)}`) .sort() .join('|') if (sig === lastToolSig) { console.warn('[voice/chat] tool cycle detected, breaking loop') finalText += '\nНе получилось выполнить запрос.' break } lastToolSig = sig const toolResults: Anthropic.ToolResultBlockParam[] = [] for (const tu of toolUses) { console.log(`[voice/chat] tool ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`) const result = await executeTool(tu.name, tu.input, agent) toolResults.push({ type: 'tool_result', tool_use_id: tu.id, content: JSON.stringify(result), }) } apiMessages.push({ role: 'user', content: toolResults }) continue } // end_turn / max_tokens / stop_sequence — финальный ответ готов break } } catch (e: any) { console.error('[voice/chat] anthropic error:', e?.message || e) const msg = 'Что-то сломалось.' emitVoice('error', agent, msg) return NextResponse.json({ error: 'llm_failed', detail: String(e?.message || e), text: msg }, { status: 502 }) } if (!finalText.trim()) { const msg = 'Не получил ответ.' emitVoice('error', agent, msg) return NextResponse.json({ text: msg }, { status: 200 }) } // Сохраняем все turn'ы после initial user (включая tool_use / tool_result) const newTurns = apiMessages.slice(initialUserIdx + 1) for (const turn of newTurns) { history.push({ role: turn.role as 'user' | 'assistant', content: stripCacheControl(turn.content), } as HistoryMessage) } await saveHistory(agent, history) const cleaned = cleanForSpeech(stripFillers(finalText)) emitVoice('response', agent, cleaned) return NextResponse.json({ text: cleaned }) }