feat: switch from Anthropic to Groq API (llama-3.3-70b-versatile)

- route.ts: replace @anthropic-ai/sdk with groq-sdk, rewrite chat loop - voice-tool-schemas.ts: convert from Anthropic format to OpenAI/Groq function tools - voice-history.ts: extend HistoryMessage type to include tool role, simplify cache stubs No prompt caching (Groq does not support it), tool calling preserved.
2026-04-30 20:43:30 +00:00
parent 96fa78bd5c
commit 04b7d1f104
3 changed files with 263 additions and 257 deletions
--- a/app/api/voice/chat/route.ts
+++ b/app/api/voice/chat/route.ts
@@ -2,8 +2,7 @@ export const dynamic = 'force-dynamic'
 export const runtime = 'nodejs'

 import { NextResponse } from 'next/server'
-import Anthropic from '@anthropic-ai/sdk'
-import { ProxyAgent } from 'undici'
+import Groq from 'groq-sdk'

 import { voiceBus } from '@/lib/voice-bus'
 import { systemPrompt } from '@/lib/voice-prompts'
@@ -12,17 +11,15 @@ import { executeTool } from '@/lib/voice-executors'
 import { cleanForSpeech, stripFillers, isResetCommand } from '@/lib/voice-text'
 import {
  loadHistory, saveHistory, resetHistory,
-  buildMessagesWithCache, stripCacheControl, HistoryMessage,
+  HistoryMessage,
 } from '@/lib/voice-history'

-const MODEL = process.env.ANTHROPIC_MODEL || 'claude-haiku-4-5'
+const MODEL = process.env.GROQ_MODEL || 'llama-3.3-70b-versatile'
 const MAX_TOKENS = parseInt(process.env.VOICE_MAX_TOKENS || '300', 10)
 const MAX_TOOL_ROUNDS = 4
 const RATE_LIMIT_PER_MINUTE = parseInt(process.env.VOICE_RATE_LIMIT || '20', 10)

 // In-memory rate-limit per IP / cookie (host один — Docker контейнер).
-// Защита от случайного бесконечного цикла или утечки PIN: даже если
-// auth_token утечёт, вызов /api/voice/chat будет ограничен.
 const rateBuckets = new Map<string, { count: number; resetAt: number }>()
 function rateLimit(key: string): boolean {
  const now = Date.now()
@@ -35,7 +32,6 @@ function rateLimit(key: string): boolean {
  b.count++
  return true
 }
-// Гигиена: чистим старые бакеты периодически (раз в 5 минут максимум).
 let lastSweep = 0
 function sweep() {
  const now = Date.now()
@@ -44,16 +40,12 @@ function sweep() {
  for (const [k, v] of rateBuckets) if (v.resetAt <= now) rateBuckets.delete(k)
 }

-let _client: Anthropic | null = null
-function client(): Anthropic {
+let _client: Groq | null = null
+function client(): Groq {
  if (_client) return _client
-  const apiKey = process.env.ANTHROPIC_API_KEY
-  if (!apiKey) throw new Error('ANTHROPIC_API_KEY not set')
-  const proxy = process.env.ANTHROPIC_PROXY || process.env.HTTPS_PROXY || ''
-  const fetchOptions = proxy
-    ? ({ dispatcher: new ProxyAgent(proxy) } as any)
-    : undefined
-  _client = new Anthropic({ apiKey, fetchOptions })
+  const apiKey = process.env.GROQ_API_KEY
+  if (!apiKey) throw new Error('GROQ_API_KEY not set')
+  _client = new Groq({ apiKey })
  return _client
 }

@@ -69,8 +61,6 @@ function emitVoice(event: string, agent: 'cosmo' | 'lusya', text?: string) {
 type AgentId = 'cosmo' | 'lusya'

 export async function POST(req: Request) {
-  // Rate-limit по auth_token (или x-voice-internal — для loopback'а от tools).
-  // Идентифицируем клиента: cookie auth_token > x-voice-internal > IP > 'anon'.
  const cookie = req.headers.get('cookie') || ''
  const tokenMatch = cookie.match(/auth_token=([a-f0-9]{32,})/i)
  const internal = req.headers.get('x-voice-internal') || ''
@@ -85,13 +75,11 @@ export async function POST(req: Request) {
  if (!body || typeof body.text !== 'string' || !body.text.trim()) {
    return NextResponse.json({ error: 'text required' }, { status: 400 })
  }
-  const userText: string = body.text.trim().slice(0, 4000)  // защита от gigantic prompts
+  const userText: string = body.text.trim().slice(0, 4000)
  const agent: AgentId = body.agent === 'lusya' ? 'lusya' : 'cosmo'

-  // Echo command в орб
  emitVoice('command', agent, userText)

-  // Reset-команда — стираем историю и отвечаем шаблонно
  if (isResetCommand(userText)) {
    await resetHistory(agent)
    const msg = 'Начинаю новую сессию.'
@@ -99,59 +87,52 @@ export async function POST(req: Request) {
    return NextResponse.json({ text: msg, reset: true })
  }

-  // Загружаем историю и добавляем новый user-turn
+  // Загружаем историю и строим messages для Groq (OpenAI-compatible format)
  const history = await loadHistory(agent)
-  history.push({ role: 'user', content: userText })

-  const systemBlocks: Anthropic.TextBlockParam[] = [
-    {
-      type: 'text',
-      text: systemPrompt(agent),
-      cache_control: { type: 'ephemeral' },
-    },
+  // Системный prompt + история + новый user message
+  const apiMessages: any[] = [
+    { role: 'system', content: systemPrompt(agent) },
+    ...history,
+    { role: 'user', content: userText },
  ]

-  const apiMessages: Anthropic.MessageParam[] = buildMessagesWithCache(history) as any
-
  let finalText = ''
-  const initialUserIdx = history.length - 1
-  // Защита от tool-cycling: запоминаем последний (name, args) — если LLM
-  // дважды подряд просит одно и то же, прерываем цикл.
+  const historyStartLen = apiMessages.length // позиция после которой добавляем новые turns
+
+  // Защита от tool-cycling
  let lastToolSig = ''

  try {
    const c = client()
    for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
      const t0 = Date.now()
-      const resp = await c.messages.create({
+      const resp = await c.chat.completions.create({
        model: MODEL,
        max_tokens: MAX_TOKENS,
-        system: systemBlocks,
        messages: apiMessages,
-        tools: TOOL_SCHEMAS,
+        tools: TOOL_SCHEMAS as any,
+        tool_choice: 'auto',
      })

+      const choice = resp.choices[0]
+      const msg = choice.message
      const usage = resp.usage as any
+
      console.log(
        `[voice/chat] ${agent} round ${round + 1} ${Date.now() - t0}ms · ` +
-        `stop=${resp.stop_reason} · in=${usage?.input_tokens} out=${usage?.output_tokens} ` +
-        `cache_r=${usage?.cache_read_input_tokens || 0} cache_w=${usage?.cache_creation_input_tokens || 0}`
+        `stop=${choice.finish_reason} · in=${usage?.prompt_tokens} out=${usage?.completion_tokens}`
      )

-      // Разбираем content на text + tool_use
-      const toolUses: Anthropic.ToolUseBlock[] = []
-      for (const block of resp.content) {
-        if (block.type === 'text') finalText += block.text
-        else if (block.type === 'tool_use') toolUses.push(block)
-      }
+      // Добавляем assistant message в messages
+      apiMessages.push(msg)

-      // Сохраняем assistant turn в API messages как есть (важно для tool_use_id)
-      apiMessages.push({ role: 'assistant', content: resp.content as any })
+      if (choice.finish_reason === 'tool_calls' && msg.tool_calls?.length) {
+        const toolCalls = msg.tool_calls

-      if (resp.stop_reason === 'tool_use' && toolUses.length) {
-        // Сигнатура текущего раунда — для loop-guard.
-        const sig = toolUses
-          .map((t) => `${t.name}:${JSON.stringify(t.input)}`)
+        // Loop-guard: сигнатура текущего раунда
+        const sig = toolCalls
+          .map((tc: any) => `${tc.function.name}:${tc.function.arguments}`)
          .sort()
          .join('|')
        if (sig === lastToolSig) {
@@ -161,25 +142,27 @@ export async function POST(req: Request) {
        }
        lastToolSig = sig

-        const toolResults: Anthropic.ToolResultBlockParam[] = []
-        for (const tu of toolUses) {
-          console.log(`[voice/chat] tool ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`)
-          const result = await executeTool(tu.name, tu.input, agent)
-          toolResults.push({
-            type: 'tool_result',
-            tool_use_id: tu.id,
+        // Выполняем все tool calls и добавляем результаты
+        for (const tc of toolCalls) {
+          console.log(`[voice/chat] tool ${tc.function.name}(${tc.function.arguments.slice(0, 200)})`)
+          let args: any = {}
+          try { args = JSON.parse(tc.function.arguments) } catch (_) {}
+          const result = await executeTool(tc.function.name, args, agent)
+          apiMessages.push({
+            role: 'tool',
+            tool_call_id: tc.id,
            content: JSON.stringify(result),
          })
        }
-        apiMessages.push({ role: 'user', content: toolResults })
        continue
      }

-      // end_turn / max_tokens / stop_sequence — финальный ответ готов
+      // Финальный ответ
+      finalText = msg.content || ''
      break
    }
  } catch (e: any) {
-    console.error('[voice/chat] anthropic error:', e?.message || e)
+    console.error('[voice/chat] groq error:', e?.message || e)
    const msg = 'Что-то сломалось.'
    emitVoice('error', agent, msg)
    return NextResponse.json({ error: 'llm_failed', detail: String(e?.message || e), text: msg }, { status: 502 })
@@ -191,15 +174,13 @@ export async function POST(req: Request) {
    return NextResponse.json({ text: msg }, { status: 200 })
  }

-  // Сохраняем все turn'ы после initial user (включая tool_use / tool_result)
-  const newTurns = apiMessages.slice(initialUserIdx + 1)
-  for (const turn of newTurns) {
-    history.push({
-      role: turn.role as 'user' | 'assistant',
-      content: stripCacheControl(turn.content),
-    } as HistoryMessage)
-  }
-  await saveHistory(agent, history)
+  // Сохраняем новые turns в историю (без системного prompt'а)
+  const newTurns = apiMessages.slice(historyStartLen)
+  const updatedHistory: HistoryMessage[] = [
+    ...history,
+    ...newTurns.map((m: any) => ({ role: m.role, content: m.content ?? null, tool_calls: m.tool_calls, tool_call_id: m.tool_call_id } as HistoryMessage)),
+  ]
+  await saveHistory(agent, updatedHistory)

  const cleaned = cleanForSpeech(stripFillers(finalText))
  emitVoice('response', agent, cleaned)