diff --git a/lib/ai-client/chat.ts b/lib/ai-client/chat.ts
index 08c4dff..4480dbb 100644
--- a/lib/ai-client/chat.ts
+++ b/lib/ai-client/chat.ts
@@ -6,10 +6,65 @@ export type ChatMessage = {
   content: string;
 };
 
+// Different providers expose prompt-cache stats under different keys. We probe
+// for the three forms we've seen in the wild and fall back to total tokens
+// when no cache field exists.
+//
+//   DeepSeek (v3+)    usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens
+//   OpenAI / o-series usage.prompt_tokens_details.cached_tokens
+//   Anthropic / others  usage.cache_read_input_tokens / cache_creation_input_tokens
+//   No-cache (MiMo,
+//     local Ollama, …) only prompt_tokens / completion_tokens — print those
+//                       so we still get a rough cost baseline.
+type Usage = {
+  prompt_tokens?: number;
+  completion_tokens?: number;
+  prompt_cache_hit_tokens?: number;
+  prompt_cache_miss_tokens?: number;
+  prompt_tokens_details?: { cached_tokens?: number };
+  cache_read_input_tokens?: number;
+  cache_creation_input_tokens?: number;
+};
+
+function summarizeUsage(tag: string, usage: Usage | undefined): string {
+  if (!usage) return `[cache] ${tag} no-usage`;
+  const prompt = usage.prompt_tokens ?? 0;
+  const completion = usage.completion_tokens ?? 0;
+  // DeepSeek-style
+  if (typeof usage.prompt_cache_hit_tokens === "number") {
+    const hit = usage.prompt_cache_hit_tokens;
+    const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit);
+    const denom = hit + miss;
+    const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
+    return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`;
+  }
+  // OpenAI-style
+  const oaiCached = usage.prompt_tokens_details?.cached_tokens;
+  if (typeof oaiCached === "number") {
+    const miss = Math.max(0, prompt - oaiCached);
+    const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a";
+    return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`;
+  }
+  // Anthropic-style
+  if (typeof usage.cache_read_input_tokens === "number") {
+    const hit = usage.cache_read_input_tokens;
+    const create = usage.cache_creation_input_tokens ?? 0;
+    const denom = hit + create + prompt;
+    const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
+    return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`;
+  }
+  // No cache field at all
+  return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
+}
+
 export async function chat(
   config: ProviderConfig,
   messages: ChatMessage[],
-  opts?: { temperature?: number; responseFormat?: "json_object" | "text" },
+  opts?: {
+    temperature?: number;
+    responseFormat?: "json_object" | "text";
+    tag?: string;
+  },
 ): Promise<string> {
   const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
   const body: Record<string, unknown> = {
@@ -35,7 +90,10 @@ export async function chat(
     throw new Error(`Chat API error ${res.status}: ${text}`);
   }
 
-  let json: { choices: { message: { content: string } }[] };
+  let json: {
+    choices: { message: { content: string } }[];
+    usage?: Usage;
+  };
   try {
     json = JSON.parse(text);
   } catch {
@@ -50,5 +108,7 @@ export async function chat(
     );
   }
 
+  console.log(summarizeUsage(opts?.tag ?? "chat", json.usage));
+
   return content;
 }
diff --git a/lib/engine/agents/architect.ts b/lib/engine/agents/architect.ts
index d3349e0..a53d469 100644
--- a/lib/engine/agents/architect.ts
+++ b/lib/engine/agents/architect.ts
@@ -53,7 +53,7 @@ export async function runArchitect(
         { role: "system", content: ARCHITECT_SYSTEM },
         { role: "user", content: buildArchitectUserMessage(session) },
       ],
-      { temperature: 0.85, responseFormat: "json_object" },
+      { temperature: 0.85, responseFormat: "json_object", tag: "architect" },
     );
 
     const parsed = parseJsonLoose<RawStoryState>(raw);
diff --git a/lib/engine/agents/characterDesigner.ts b/lib/engine/agents/characterDesigner.ts
index 152a975..e407c10 100644
--- a/lib/engine/agents/characterDesigner.ts
+++ b/lib/engine/agents/characterDesigner.ts
@@ -56,7 +56,7 @@ async function runDesignLLM(
         content: buildCharacterDesignerUserMessage(charName, session),
       },
     ],
-    { temperature: 0.7, responseFormat: "json_object" },
+    { temperature: 0.7, responseFormat: "json_object", tag: "character-designer" },
   );
   return parseJsonLoose<CharacterDesignOutput>(raw);
 }
diff --git a/lib/engine/agents/cinematographer.ts b/lib/engine/agents/cinematographer.ts
index 9274a0b..e2c3d22 100644
--- a/lib/engine/agents/cinematographer.ts
+++ b/lib/engine/agents/cinematographer.ts
@@ -67,7 +67,7 @@ export async function runCinematographer(
         ),
       },
     ],
-    { temperature: 0.6, responseFormat: "json_object" },
+    { temperature: 0.6, responseFormat: "json_object", tag: "cinematographer" },
   );
 
   const parsed = parseJsonLoose<RawCinematographerOutput>(raw);
diff --git a/lib/engine/agents/writer.ts b/lib/engine/agents/writer.ts
index 97a5e4f..ce04981 100644
--- a/lib/engine/agents/writer.ts
+++ b/lib/engine/agents/writer.ts
@@ -369,7 +369,7 @@ export async function runWriter(
       { role: "system", content: WRITER_SYSTEM },
       { role: "user", content: buildWriterUserMessage(session) },
     ],
-    { temperature: 0.9, responseFormat: "json_object" },
+    { temperature: 0.9, responseFormat: "json_object", tag: "writer" },
   );
 
   const parsed = parseJsonLoose<RawScene>(raw);
diff --git a/lib/engine/director.ts b/lib/engine/director.ts
index 5bfa156..786e77b 100644
--- a/lib/engine/director.ts
+++ b/lib/engine/director.ts
@@ -405,7 +405,7 @@ export async function directInsertBeat(
         content: buildInsertBeatUserMessage(session, freeformAction),
       },
     ],
-    { temperature: 0.9, responseFormat: "json_object" },
+    { temperature: 0.9, responseFormat: "json_object", tag: "insert-beat" },
   );
 
   const parsed = parseJsonLoose<InsertBeatPartial>(raw);