import { generateText } from "ai"; import type { LanguageModelUsage, ModelMessage } from "ai"; import { createAnthropic } from "@ai-sdk/anthropic"; import { createGoogleGenerativeAI } from "@ai-sdk/google"; import type { ProviderConfig, ProviderProtocol } from "@infiplot/types"; import { fetchWithRetry } from "./fetchWithRetry"; import { normalizeBaseUrl } from "./normalizeUrl"; export type ChatMessage = { role: "system" | "user" | "assistant"; content: string; }; // Different providers expose prompt-cache stats under different keys. We probe // for the three forms we've seen in the wild and fall back to total tokens // when no cache field exists. // // DeepSeek (v3+) usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens // OpenAI / o-series usage.prompt_tokens_details.cached_tokens // Anthropic / others usage.cache_read_input_tokens / cache_creation_input_tokens // No-cache (MiMo, // local Ollama, …) only prompt_tokens / completion_tokens — print those // so we still get a rough cost baseline. type Usage = { prompt_tokens?: number; completion_tokens?: number; prompt_cache_hit_tokens?: number; prompt_cache_miss_tokens?: number; prompt_tokens_details?: { cached_tokens?: number }; cache_read_input_tokens?: number; cache_creation_input_tokens?: number; }; function summarizeUsage(tag: string, usage: Usage | undefined): string { if (!usage) return `[cache] ${tag} no-usage`; const prompt = usage.prompt_tokens ?? 0; const completion = usage.completion_tokens ?? 0; // DeepSeek-style if (typeof usage.prompt_cache_hit_tokens === "number") { const hit = usage.prompt_cache_hit_tokens; const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit); const denom = hit + miss; const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a"; return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`; } // OpenAI-style const oaiCached = usage.prompt_tokens_details?.cached_tokens; if (typeof oaiCached === "number") { const miss = Math.max(0, prompt - oaiCached); const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a"; return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`; } // Anthropic-style if (typeof usage.cache_read_input_tokens === "number") { const hit = usage.cache_read_input_tokens; const create = usage.cache_creation_input_tokens ?? 0; const denom = hit + create + prompt; const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a"; return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`; } // No cache field at all return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`; } // AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails, // so a single shape covers Anthropic + Gemini (no per-provider probing). function summarizeSdkUsage( tag: string, usage: LanguageModelUsage | undefined, ): string { if (!usage) return `[cache] ${tag} no-usage`; const input = usage.inputTokens ?? 0; const output = usage.outputTokens ?? 0; const read = usage.inputTokenDetails?.cacheReadTokens; const write = usage.inputTokenDetails?.cacheWriteTokens; if (typeof read === "number" || typeof write === "number") { const hit = read ?? 0; const create = write ?? 0; const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a"; return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`; } return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`; } // text/vision default to the OpenAI-compatible wire protocol when unset. function resolveTextProtocol(config: ProviderConfig): ProviderProtocol { return config.provider ?? "openai_compatible"; } export async function chat( config: ProviderConfig, messages: ChatMessage[], opts?: { temperature?: number; responseFormat?: "json_object" | "text"; tag?: string; }, ): Promise { const protocol = resolveTextProtocol(config); if (protocol === "anthropic" || protocol === "google") { return chatViaAiSdk(config, messages, opts, protocol); } return chatOpenAiCompatible(config, messages, opts); } // Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent // (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream, // matching how it already tolerates loose JSON from every provider. async function chatViaAiSdk( config: ProviderConfig, messages: ChatMessage[], opts: { temperature?: number; tag?: string } | undefined, protocol: "anthropic" | "google", ): Promise { const baseURL = normalizeBaseUrl(config.baseUrl, protocol); const model = protocol === "anthropic" ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model) : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })( config.model, ); const system = messages.find((m) => m.role === "system")?.content; const convo: ModelMessage[] = messages .filter((m) => m.role !== "system") .map((m) => ({ role: m.role as "user" | "assistant", content: m.content, })); const { text, usage } = await generateText({ model, system, messages: convo, temperature: opts?.temperature ?? 0.9, }); console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage)); if (typeof text !== "string" || text.length === 0) { throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`); } return text; } async function chatOpenAiCompatible( config: ProviderConfig, messages: ChatMessage[], opts?: { temperature?: number; responseFormat?: "json_object" | "text"; tag?: string; }, ): Promise { const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`; const body: Record = { model: config.model, messages, temperature: opts?.temperature ?? 0.9, }; if (opts?.responseFormat === "json_object") { body.response_format = { type: "json_object" }; } const res = await fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${config.apiKey}`, }, body: JSON.stringify(body), }); const text = await res.text(); if (!res.ok) { throw new Error(`Chat API error ${res.status}: ${text}`); } let json: { choices: { message: { content: string } }[]; usage?: Usage; }; try { json = JSON.parse(text); } catch { throw new Error(`Chat API returned invalid JSON: ${text.slice(0, 500)}`); } // Guard against empty choices array or missing message/content fields const content = json.choices?.[0]?.message?.content; if (typeof content !== "string") { throw new Error( `Chat API returned no content. Response: ${text.slice(0, 500)}` ); } console.log(summarizeUsage(opts?.tag ?? "chat", json.usage)); return content; }