diff --git a/app/api/parse-style-image/route.ts b/app/api/parse-style-image/route.ts
index 6fb2d4d..1fca6e1 100644
--- a/app/api/parse-style-image/route.ts
+++ b/app/api/parse-style-image/route.ts
@@ -55,7 +55,6 @@ export async function POST(req: Request) {
       config.vision,
       body.imageDataUrl,
       STYLE_EXTRACTION_PROMPT,
-      { responseFormat: "json_object" },
     );
 
     let parsed: { stylePrompt?: string };
diff --git a/lib/ai-client/chat.ts b/lib/ai-client/chat.ts
index f28a280..f869c8f 100644
--- a/lib/ai-client/chat.ts
+++ b/lib/ai-client/chat.ts
@@ -1,69 +1,15 @@
 import { generateText } from "ai";
 import type { LanguageModelUsage, ModelMessage } from "ai";
-import { createAnthropic } from "@ai-sdk/anthropic";
-import { createGoogleGenerativeAI } from "@ai-sdk/google";
-import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
-import { fetchWithRetry } from "./fetchWithRetry";
-import { normalizeBaseUrl } from "./normalizeUrl";
+import type { ProviderConfig } from "@infiplot/types";
+import { createLanguageModel, resolveProtocol } from "./model";
 
 export type ChatMessage = {
   role: "system" | "user" | "assistant";
   content: string;
 };
 
-// Different providers expose prompt-cache stats under different keys. We probe
-// for the three forms we've seen in the wild and fall back to total tokens
-// when no cache field exists.
-//
-//   DeepSeek (v3+)    usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens
-//   OpenAI / o-series usage.prompt_tokens_details.cached_tokens
-//   Anthropic / others  usage.cache_read_input_tokens / cache_creation_input_tokens
-//   No-cache (MiMo,
-//     local Ollama, …) only prompt_tokens / completion_tokens — print those
-//                       so we still get a rough cost baseline.
-type Usage = {
-  prompt_tokens?: number;
-  completion_tokens?: number;
-  prompt_cache_hit_tokens?: number;
-  prompt_cache_miss_tokens?: number;
-  prompt_tokens_details?: { cached_tokens?: number };
-  cache_read_input_tokens?: number;
-  cache_creation_input_tokens?: number;
-};
-
-function summarizeUsage(tag: string, usage: Usage | undefined): string {
-  if (!usage) return `[cache] ${tag} no-usage`;
-  const prompt = usage.prompt_tokens ?? 0;
-  const completion = usage.completion_tokens ?? 0;
-  // DeepSeek-style
-  if (typeof usage.prompt_cache_hit_tokens === "number") {
-    const hit = usage.prompt_cache_hit_tokens;
-    const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit);
-    const denom = hit + miss;
-    const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
-    return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`;
-  }
-  // OpenAI-style
-  const oaiCached = usage.prompt_tokens_details?.cached_tokens;
-  if (typeof oaiCached === "number") {
-    const miss = Math.max(0, prompt - oaiCached);
-    const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a";
-    return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`;
-  }
-  // Anthropic-style
-  if (typeof usage.cache_read_input_tokens === "number") {
-    const hit = usage.cache_read_input_tokens;
-    const create = usage.cache_creation_input_tokens ?? 0;
-    const denom = hit + create + prompt;
-    const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
-    return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`;
-  }
-  // No cache field at all
-  return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
-}
-
 // AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
-// so a single shape covers Anthropic + Gemini (no per-provider probing).
+// so a single shape covers Anthropic, Gemini, and OpenAI-compatible providers.
 function summarizeSdkUsage(
   tag: string,
   usage: LanguageModelUsage | undefined,
@@ -82,43 +28,16 @@ function summarizeSdkUsage(
   return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
 }
 
-// text/vision default to the OpenAI-compatible wire protocol when unset.
-function resolveTextProtocol(config: ProviderConfig): ProviderProtocol {
-  return config.provider ?? "openai_compatible";
-}
-
 export async function chat(
   config: ProviderConfig,
   messages: ChatMessage[],
   opts?: {
     temperature?: number;
-    responseFormat?: "json_object" | "text";
     tag?: string;
   },
 ): Promise<string> {
-  const protocol = resolveTextProtocol(config);
-  if (protocol === "anthropic" || protocol === "google") {
-    return chatViaAiSdk(config, messages, opts, protocol);
-  }
-  return chatOpenAiCompatible(config, messages, opts);
-}
-
-// Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent
-// (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream,
-// matching how it already tolerates loose JSON from every provider.
-async function chatViaAiSdk(
-  config: ProviderConfig,
-  messages: ChatMessage[],
-  opts: { temperature?: number; tag?: string } | undefined,
-  protocol: "anthropic" | "google",
-): Promise<string> {
-  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
-  const model =
-    protocol === "anthropic"
-      ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
-      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
-          config.model,
-        );
+  const protocol = resolveProtocol(config);
+  const model = createLanguageModel(config, protocol);
 
   const system = messages.find((m) => m.role === "system")?.content;
   const convo: ModelMessage[] = messages
@@ -142,59 +61,3 @@ async function chatViaAiSdk(
   }
   return text;
 }
-
-async function chatOpenAiCompatible(
-  config: ProviderConfig,
-  messages: ChatMessage[],
-  opts?: {
-    temperature?: number;
-    responseFormat?: "json_object" | "text";
-    tag?: string;
-  },
-): Promise<string> {
-  const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
-  const body: Record<string, unknown> = {
-    model: config.model,
-    messages,
-    temperature: opts?.temperature ?? 0.9,
-  };
-  if (opts?.responseFormat === "json_object") {
-    body.response_format = { type: "json_object" };
-  }
-
-  const res = await fetchWithRetry(url, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
-      Authorization: `Bearer ${config.apiKey}`,
-    },
-    body: JSON.stringify(body),
-  });
-
-  const text = await res.text();
-  if (!res.ok) {
-    throw new Error(`Chat API error ${res.status}: ${text}`);
-  }
-
-  let json: {
-    choices: { message: { content: string } }[];
-    usage?: Usage;
-  };
-  try {
-    json = JSON.parse(text);
-  } catch {
-    throw new Error(`Chat API returned invalid JSON: ${text.slice(0, 500)}`);
-  }
-
-  // Guard against empty choices array or missing message/content fields
-  const content = json.choices?.[0]?.message?.content;
-  if (typeof content !== "string") {
-    throw new Error(
-      `Chat API returned no content. Response: ${text.slice(0, 500)}`
-    );
-  }
-
-  console.log(summarizeUsage(opts?.tag ?? "chat", json.usage));
-
-  return content;
-}
diff --git a/lib/ai-client/model.ts b/lib/ai-client/model.ts
new file mode 100644
index 0000000..155e424
--- /dev/null
+++ b/lib/ai-client/model.ts
@@ -0,0 +1,23 @@
+import { createAnthropic } from "@ai-sdk/anthropic";
+import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import { createOpenAI } from "@ai-sdk/openai";
+import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
+import { normalizeBaseUrl } from "./normalizeUrl";
+
+export function resolveProtocol(config: ProviderConfig): ProviderProtocol {
+  return config.provider ?? "openai_compatible";
+}
+
+export function createLanguageModel(config: ProviderConfig, protocol: ProviderProtocol) {
+  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
+  switch (protocol) {
+    case "anthropic":
+      return createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
+    case "google":
+      return createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
+    case "openai_compatible":
+    case "openai":
+    default:
+      return createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
+  }
+}
diff --git a/lib/ai-client/vision.ts b/lib/ai-client/vision.ts
index b43429a..12df0fa 100644
--- a/lib/ai-client/vision.ts
+++ b/lib/ai-client/vision.ts
@@ -1,10 +1,7 @@
 import { generateText } from "ai";
 import type { ModelMessage } from "ai";
-import { createAnthropic } from "@ai-sdk/anthropic";
-import { createGoogleGenerativeAI } from "@ai-sdk/google";
-import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
-import { fetchWithRetry } from "./fetchWithRetry";
-import { normalizeBaseUrl } from "./normalizeUrl";
+import type { ProviderConfig } from "@infiplot/types";
+import { createLanguageModel, resolveProtocol } from "./model";
 
 const VISION_TIMEOUT_MS = 60_000;
 
@@ -13,55 +10,20 @@ export async function interpretClick(
   imageBase64: string,
   prompt: string,
 ): Promise<string> {
-  // Wrap the raw base64 in a PNG data URL — the Canvas annotator on the
-  // client encodes as PNG. analyzeImageDataUrl handles the actual request.
   return analyzeImageDataUrl(
     config,
     `data:image/png;base64,${imageBase64}`,
     prompt,
-    { responseFormat: "json_object" },
   );
 }
 
-// text/vision default to the OpenAI-compatible wire protocol when unset.
-function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
-  return config.provider ?? "openai_compatible";
-}
-
-/**
- * General single-image vision call. Accepts a complete data URL (preserves
- * the source mime type, e.g. webp/jpeg) and lets the caller opt out of
- * `response_format: json_object` for free-form text responses.
- */
 export async function analyzeImageDataUrl(
   config: ProviderConfig,
   imageDataUrl: string,
   prompt: string,
-  opts: { responseFormat?: "json_object" | "text" } = {},
 ): Promise<string> {
-  const protocol = resolveVisionProtocol(config);
-  if (protocol === "anthropic" || protocol === "google") {
-    return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
-  }
-  return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
-}
-
-// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
-// the full data URL directly; the SDK decodes it. response_format is not sent
-// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
-async function analyzeViaAiSdk(
-  config: ProviderConfig,
-  imageDataUrl: string,
-  prompt: string,
-  protocol: "anthropic" | "google",
-): Promise<string> {
-  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
-  const model =
-    protocol === "anthropic"
-      ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
-      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
-          config.model,
-        );
+  const protocol = resolveProtocol(config);
+  const model = createLanguageModel(config, protocol);
 
   const messages: ModelMessage[] = [
     {
@@ -80,6 +42,7 @@ async function analyzeViaAiSdk(
       model,
       messages,
       temperature: 0.2,
+      maxRetries: 0,
       abortSignal: timeoutCtrl.signal,
     });
     if (typeof text !== "string" || text.length === 0) {
@@ -90,70 +53,3 @@ async function analyzeViaAiSdk(
     clearTimeout(timeoutId);
   }
 }
-
-async function analyzeOpenAiCompatible(
-  config: ProviderConfig,
-  imageDataUrl: string,
-  prompt: string,
-  opts: { responseFormat?: "json_object" | "text" } = {},
-): Promise<string> {
-  const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
-
-  const body: Record<string, unknown> = {
-    model: config.model,
-    messages: [
-      {
-        role: "user",
-        content: [
-          { type: "text", text: prompt },
-          { type: "image_url", image_url: { url: imageDataUrl } },
-        ],
-      },
-    ],
-    temperature: 0.2,
-  };
-  if (opts.responseFormat === "json_object") {
-    body.response_format = { type: "json_object" };
-  }
-
-  const timeoutCtrl = new AbortController();
-  const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
-
-  let res: Response;
-  try {
-    res = await fetchWithRetry(url, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${config.apiKey}`,
-      },
-      body: JSON.stringify(body),
-      signal: timeoutCtrl.signal,
-      retries: 0,
-    });
-  } finally {
-    clearTimeout(timeoutId);
-  }
-
-  const text = await res.text();
-  if (!res.ok) {
-    throw new Error(`Vision API error ${res.status}: ${text}`);
-  }
-
-  let json: { choices: { message: { content: string } }[] };
-  try {
-    json = JSON.parse(text);
-  } catch {
-    throw new Error(`Vision API returned invalid JSON: ${text.slice(0, 500)}`);
-  }
-
-  // Guard against empty choices array or missing message/content fields
-  const content = json.choices?.[0]?.message?.content;
-  if (typeof content !== "string") {
-    throw new Error(
-      `Vision API returned no content. Response: ${text.slice(0, 500)}`
-    );
-  }
-
-  return content;
-}
diff --git a/lib/engine/agents/architect.ts b/lib/engine/agents/architect.ts
index a53d469..6c9cf75 100644
--- a/lib/engine/agents/architect.ts
+++ b/lib/engine/agents/architect.ts
@@ -53,7 +53,7 @@ export async function runArchitect(
         { role: "system", content: ARCHITECT_SYSTEM },
         { role: "user", content: buildArchitectUserMessage(session) },
       ],
-      { temperature: 0.85, responseFormat: "json_object", tag: "architect" },
+      { temperature: 0.85, tag: "architect" },
     );
 
     const parsed = parseJsonLoose<RawStoryState>(raw);
diff --git a/lib/engine/agents/characterDesigner.ts b/lib/engine/agents/characterDesigner.ts
index e407c10..60835c0 100644
--- a/lib/engine/agents/characterDesigner.ts
+++ b/lib/engine/agents/characterDesigner.ts
@@ -56,7 +56,7 @@ async function runDesignLLM(
         content: buildCharacterDesignerUserMessage(charName, session),
       },
     ],
-    { temperature: 0.7, responseFormat: "json_object", tag: "character-designer" },
+    { temperature: 0.7, tag: "character-designer" },
   );
   return parseJsonLoose<CharacterDesignOutput>(raw);
 }
diff --git a/lib/engine/agents/cinematographer.ts b/lib/engine/agents/cinematographer.ts
index e2c3d22..7b994ce 100644
--- a/lib/engine/agents/cinematographer.ts
+++ b/lib/engine/agents/cinematographer.ts
@@ -67,7 +67,7 @@ export async function runCinematographer(
         ),
       },
     ],
-    { temperature: 0.6, responseFormat: "json_object", tag: "cinematographer" },
+    { temperature: 0.6, tag: "cinematographer" },
   );
 
   const parsed = parseJsonLoose<RawCinematographerOutput>(raw);
diff --git a/lib/engine/agents/writer.ts b/lib/engine/agents/writer.ts
index b560d56..935d2e8 100644
--- a/lib/engine/agents/writer.ts
+++ b/lib/engine/agents/writer.ts
@@ -423,7 +423,7 @@ export async function runWriterPlan(
       { role: "system", content: WRITER_PLAN_SYSTEM },
       { role: "user", content: buildWriterPlanUserMessage(session) },
     ],
-    { temperature: 0.9, responseFormat: "json_object", tag: "writer-plan" },
+    { temperature: 0.9, tag: "writer-plan" },
   );
 
   const parsed = parseJsonLoose<RawPlan>(raw);
@@ -473,7 +473,7 @@ export async function runWriterBeats(
       { role: "system", content: WRITER_BEATS_SYSTEM },
       { role: "user", content: buildWriterBeatsUserMessage(session, plan) },
     ],
-    { temperature: 0.9, responseFormat: "json_object", tag: "writer-beats" },
+    { temperature: 0.9, tag: "writer-beats" },
   );
 
   const parsed = parseJsonLoose<RawBeats>(raw);
diff --git a/lib/engine/director.ts b/lib/engine/director.ts
index 28114a7..24c92a9 100644
--- a/lib/engine/director.ts
+++ b/lib/engine/director.ts
@@ -446,7 +446,7 @@ export async function directInsertBeat(
         content: buildInsertBeatUserMessage(session, freeformAction),
       },
     ],
-    { temperature: 0.9, responseFormat: "json_object", tag: "insert-beat" },
+    { temperature: 0.9, tag: "insert-beat" },
   );
 
   const parsed = parseJsonLoose<InsertBeatPartial>(raw);