diff --git a/lib/ai-client/chat.ts b/lib/ai-client/chat.ts index 08c4dff..4480dbb 100644 --- a/lib/ai-client/chat.ts +++ b/lib/ai-client/chat.ts @@ -6,10 +6,65 @@ export type ChatMessage = { content: string; }; +// Different providers expose prompt-cache stats under different keys. We probe +// for the three forms we've seen in the wild and fall back to total tokens +// when no cache field exists. +// +// DeepSeek (v3+) usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens +// OpenAI / o-series usage.prompt_tokens_details.cached_tokens +// Anthropic / others usage.cache_read_input_tokens / cache_creation_input_tokens +// No-cache (MiMo, +// local Ollama, …) only prompt_tokens / completion_tokens — print those +// so we still get a rough cost baseline. +type Usage = { + prompt_tokens?: number; + completion_tokens?: number; + prompt_cache_hit_tokens?: number; + prompt_cache_miss_tokens?: number; + prompt_tokens_details?: { cached_tokens?: number }; + cache_read_input_tokens?: number; + cache_creation_input_tokens?: number; +}; + +function summarizeUsage(tag: string, usage: Usage | undefined): string { + if (!usage) return `[cache] ${tag} no-usage`; + const prompt = usage.prompt_tokens ?? 0; + const completion = usage.completion_tokens ?? 0; + // DeepSeek-style + if (typeof usage.prompt_cache_hit_tokens === "number") { + const hit = usage.prompt_cache_hit_tokens; + const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit); + const denom = hit + miss; + const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a"; + return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`; + } + // OpenAI-style + const oaiCached = usage.prompt_tokens_details?.cached_tokens; + if (typeof oaiCached === "number") { + const miss = Math.max(0, prompt - oaiCached); + const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a"; + return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`; + } + // Anthropic-style + if (typeof usage.cache_read_input_tokens === "number") { + const hit = usage.cache_read_input_tokens; + const create = usage.cache_creation_input_tokens ?? 0; + const denom = hit + create + prompt; + const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a"; + return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`; + } + // No cache field at all + return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`; +} + export async function chat( config: ProviderConfig, messages: ChatMessage[], - opts?: { temperature?: number; responseFormat?: "json_object" | "text" }, + opts?: { + temperature?: number; + responseFormat?: "json_object" | "text"; + tag?: string; + }, ): Promise { const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`; const body: Record = { @@ -35,7 +90,10 @@ export async function chat( throw new Error(`Chat API error ${res.status}: ${text}`); } - let json: { choices: { message: { content: string } }[] }; + let json: { + choices: { message: { content: string } }[]; + usage?: Usage; + }; try { json = JSON.parse(text); } catch { @@ -50,5 +108,7 @@ export async function chat( ); } + console.log(summarizeUsage(opts?.tag ?? "chat", json.usage)); + return content; } diff --git a/lib/engine/agents/architect.ts b/lib/engine/agents/architect.ts index d3349e0..a53d469 100644 --- a/lib/engine/agents/architect.ts +++ b/lib/engine/agents/architect.ts @@ -53,7 +53,7 @@ export async function runArchitect( { role: "system", content: ARCHITECT_SYSTEM }, { role: "user", content: buildArchitectUserMessage(session) }, ], - { temperature: 0.85, responseFormat: "json_object" }, + { temperature: 0.85, responseFormat: "json_object", tag: "architect" }, ); const parsed = parseJsonLoose(raw); diff --git a/lib/engine/agents/characterDesigner.ts b/lib/engine/agents/characterDesigner.ts index 152a975..e407c10 100644 --- a/lib/engine/agents/characterDesigner.ts +++ b/lib/engine/agents/characterDesigner.ts @@ -56,7 +56,7 @@ async function runDesignLLM( content: buildCharacterDesignerUserMessage(charName, session), }, ], - { temperature: 0.7, responseFormat: "json_object" }, + { temperature: 0.7, responseFormat: "json_object", tag: "character-designer" }, ); return parseJsonLoose(raw); } diff --git a/lib/engine/agents/cinematographer.ts b/lib/engine/agents/cinematographer.ts index 9274a0b..e2c3d22 100644 --- a/lib/engine/agents/cinematographer.ts +++ b/lib/engine/agents/cinematographer.ts @@ -67,7 +67,7 @@ export async function runCinematographer( ), }, ], - { temperature: 0.6, responseFormat: "json_object" }, + { temperature: 0.6, responseFormat: "json_object", tag: "cinematographer" }, ); const parsed = parseJsonLoose(raw); diff --git a/lib/engine/agents/writer.ts b/lib/engine/agents/writer.ts index 97a5e4f..ce04981 100644 --- a/lib/engine/agents/writer.ts +++ b/lib/engine/agents/writer.ts @@ -369,7 +369,7 @@ export async function runWriter( { role: "system", content: WRITER_SYSTEM }, { role: "user", content: buildWriterUserMessage(session) }, ], - { temperature: 0.9, responseFormat: "json_object" }, + { temperature: 0.9, responseFormat: "json_object", tag: "writer" }, ); const parsed = parseJsonLoose(raw); diff --git a/lib/engine/director.ts b/lib/engine/director.ts index 5bfa156..786e77b 100644 --- a/lib/engine/director.ts +++ b/lib/engine/director.ts @@ -405,7 +405,7 @@ export async function directInsertBeat( content: buildInsertBeatUserMessage(session, freeformAction), }, ], - { temperature: 0.9, responseFormat: "json_object" }, + { temperature: 0.9, responseFormat: "json_object", tag: "insert-beat" }, ); const parsed = parseJsonLoose(raw);