From 83fd5717e7876f06134220de5b348e7001d9a62b Mon Sep 17 00:00:00 2001
From: yuanzonghao <yuanzonghao123@gmail.com>
Date: Thu, 4 Jun 2026 15:51:53 +0800
Subject: [PATCH 1/2] =?UTF-8?q?feat(ai-client):=20multi-provider=20compat?=
 =?UTF-8?q?=20=E2=80=94=20native=20Anthropic/Google=20+=20URL=20tolerance?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- TEXT/VISION: add native Anthropic & Google Gemini paths via Vercel AI SDK,
  selectable through TEXT_PROVIDER / VISION_PROVIDER (default openai_compatible)
- IMAGE: expand to openai (gpt-image) / google (Nano Banana) via AI SDK
  alongside the existing Runware task-array and OpenAI-compatible REST paths
- normalizeBaseUrl: tolerate URLs with/without /v1 (or /chat/completions);
  append the per-protocol version segment only for bare hosts
- config: readProvider() reads *_PROVIDER; types: ProviderProtocol + provider?
- deps: @ai-sdk/anthropic, @ai-sdk/google; docs in .env.example + README

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .env.example                  |  33 +++++-
 README.md                     |  14 ++-
 lib/ai-client/chat.ts         |  90 +++++++++++++++-
 lib/ai-client/image.ts        | 189 +++++++++++++++++++++++++---------
 lib/ai-client/normalizeUrl.ts |  66 ++++++++++++
 lib/ai-client/vision.ts       |  76 +++++++++++++-
 lib/config.ts                 |  32 +++++-
 lib/types/index.ts            |  31 ++++++
 package.json                  |   4 +
 pnpm-lock.yaml                | 146 ++++++++++++++++++++++++--
 10 files changed, 614 insertions(+), 67 deletions(-)
 create mode 100644 lib/ai-client/normalizeUrl.ts

diff --git a/.env.example b/.env.example
index ae1980e..6d04fa6 100644
--- a/.env.example
+++ b/.env.example
@@ -3,14 +3,18 @@
 # Recommended setup: Xiaomi MiMo Token Plan for TEXT / VISION / TTS
 # (one API key covers all three) + Runware for IMAGE (FLUX.2 [klein]).
 #
-# TEXT / VISION use any OpenAI-compatible endpoint (any OpenAI-
-# compatible host works: OpenRouter, OpenAI, Anthropic via proxy,
-# Gemini, DeepSeek, Ollama, ...).
+# TEXT / VISION default to any OpenAI-compatible endpoint, and can switch to
+# native Anthropic or Google Gemini via TEXT_PROVIDER / VISION_PROVIDER.
 # TTS uses Xiaomi MiMo's own voice design / clone protocol
 # (not OpenAI-compatible; appends -voicedesign / -voiceclone).
 #
-# IMAGE uses Runware's own task-array protocol (not OpenAI-compatible);
-# the adapter posts an `imageInference` task to IMAGE_BASE_URL.
+# IMAGE supports Runware (its own task-array protocol), OpenAI (gpt-image),
+# and Google Gemini (Nano Banana) via IMAGE_PROVIDER.
+#
+# *_PROVIDER (optional) selects the wire protocol; leave unset for the
+# OpenAI-compatible default (image is auto-detected from the URL). Base URLs
+# tolerate a missing or extra /v1 (or a trailing /chat/completions) — the
+# engine normalizes them.
 # =============================================================
 
 # ---- 1. Text LLM · scene director ----------------------------------
@@ -26,6 +30,10 @@
 TEXT_BASE_URL=https://api.deepseek.com/v1
 TEXT_API_KEY=sk-xxx
 TEXT_MODEL=deepseek-v4-flash
+# TEXT_PROVIDER: openai_compatible (default) | anthropic | google
+#   anthropic → TEXT_BASE_URL=https://api.anthropic.com  TEXT_MODEL=claude-sonnet-4-6
+#   google    → TEXT_BASE_URL=https://generativelanguage.googleapis.com  TEXT_MODEL=gemini-3.5-flash
+# TEXT_PROVIDER=openai_compatible
 
 # ---- 2. Image generator (renders the scene background) -------------
 # Recommended: Runware + FLUX.2 [klein] 9B KV — distilled 4-step model,
@@ -36,12 +44,27 @@ TEXT_MODEL=deepseek-v4-flash
 IMAGE_BASE_URL=https://api.runware.ai/v1
 IMAGE_API_KEY=runware-xxx
 IMAGE_MODEL=runware:400@6
+# IMAGE_PROVIDER: runware (auto-detected for runware.ai) | openai_compatible
+#                 | openai | google
+#   openai → gpt-image, supports referenceImages (character/scene continuity).
+#            IMAGE_BASE_URL=https://api.openai.com  IMAGE_MODEL=gpt-image-1
+#   google → Gemini "Nano Banana" (Imagen is EOL 2026-06-24, do not use it).
+#            IMAGE_BASE_URL=https://generativelanguage.googleapis.com
+#            IMAGE_MODEL=gemini-2.5-flash-image
+# NOTE: openai/google return raw bytes → inlined as a data: URI for the session
+# (heavier per-call transport than Runware's UUID re-reference loop). Runware
+# stays fastest + cheapest for the scene-by-scene flow.
+# IMAGE_PROVIDER=runware
 
 # ---- 3. Vision model · multimodal click interpretation -------------
 # Recommended: MiMo V2.5 — multimodal, accepts image_url content parts.
 VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
 VISION_API_KEY=tp-xxx
 VISION_MODEL=mimo-v2.5
+# VISION_PROVIDER: openai_compatible (default) | anthropic | google
+#   anthropic → VISION_BASE_URL=https://api.anthropic.com  VISION_MODEL=claude-sonnet-4-6
+#   google    → VISION_BASE_URL=https://generativelanguage.googleapis.com  VISION_MODEL=gemini-3.5-flash
+# VISION_PROVIDER=openai_compatible
 
 # ---- 4. TTS · Xiaomi MiMo (optional — leave blank to disable) ------
 # Per-character voice design → clone, with per-line delivery direction.
diff --git a/README.md b/README.md
index 80c38ce..09c2220 100644
--- a/README.md
+++ b/README.md
@@ -125,7 +125,7 @@ InfiPlot 同时支持部署到 Vercel 与 Cloudflare Workers。Cloudflare 部署
 
 ## 配置教程
 
-InfiPlot 会与四类模型供应商通信。**文本（Text）和视觉（Vision）都使用 OpenAI 兼容的接口**，可以自由搭配。**图像（Image）**目前接入 **Runware**（其自有的 task-array 协议，并非 OpenAI 兼容）。**语音（TTS）**使用**小米 MiMo** 自有的音色设计/克隆协议——支持角色级音色设计、克隆与逐行演绎指导。
+InfiPlot 会与四类模型供应商通信。**文本（Text）和视觉（Vision）** 默认使用 OpenAI 兼容接口，也可原生切换到 **Anthropic** 或 **Google Gemini**。**图像（Image）** 支持 **Runware**（其自有 task-array 协议）、**OpenAI**（`gpt-image`）与 **Google Gemini**（Nano Banana）。**语音（TTS）**使用**小米 MiMo** 自有的音色设计/克隆协议——支持角色级音色设计、克隆与逐行演绎指导。
 
 **1. 选择你的供应商**
 
@@ -136,6 +136,18 @@ InfiPlot 会与四类模型供应商通信。**文本（Text）和视觉（Visio
 | Vision · 点击解读  | `VISION_BASE_URL` `VISION_API_KEY` `VISION_MODEL`  | ✅ | Google 的 `gemini-3.5-flash` |
 | TTS · 角色配音 | `TTS_BASE_URL` `TTS_API_KEY` `TTS_SPEECH_MODEL` | 可选 —— 留空则静音运行 | 小米 MiMo 的 `mimo-v2.5-tts` |
 
+> **可选 · 指定接口协议**：每类模型都可加一个 `*_PROVIDER` 变量（`TEXT_PROVIDER` / `VISION_PROVIDER` / `IMAGE_PROVIDER`）显式选择接口协议。**不设则保持向后兼容**——文本/视觉默认走 OpenAI 兼容接口，图像按 `*_BASE_URL` 自动判断（`runware.ai` → Runware，否则 OpenAI 兼容）。
+>
+> | 取值 | 适用 | 说明 |
+> |---|---|---|
+> | `openai_compatible`（默认） | Text · Vision · Image | OpenAI Chat Completions / `/images/generations` |
+> | `anthropic` | Text · Vision | 原生 Anthropic Messages 接口 |
+> | `google` | Text · Vision · Image | 原生 Gemini；图像用 Nano Banana 系（如 `gemini-2.5-flash-image`，**勿用已停服的 Imagen**） |
+> | `openai` | Image | OpenAI `gpt-image`，支持参考图编辑 |
+> | `runware` | Image | Runware task-array 协议 |
+>
+> 此外，`*_BASE_URL` 带不带 `/v1`（甚至末尾多写了 `/chat/completions`）都能正常工作——引擎会自动规范化。
+
 **2. 填写环境变量**
 
 九个变量为必填；TTS 可选（留空则静音运行）。此外还有一个用于低成本测试的开关：
diff --git a/lib/ai-client/chat.ts b/lib/ai-client/chat.ts
index 4480dbb..f28a280 100644
--- a/lib/ai-client/chat.ts
+++ b/lib/ai-client/chat.ts
@@ -1,5 +1,10 @@
-import type { ProviderConfig } from "@infiplot/types";
+import { generateText } from "ai";
+import type { LanguageModelUsage, ModelMessage } from "ai";
+import { createAnthropic } from "@ai-sdk/anthropic";
+import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
 import { fetchWithRetry } from "./fetchWithRetry";
+import { normalizeBaseUrl } from "./normalizeUrl";
 
 export type ChatMessage = {
   role: "system" | "user" | "assistant";
@@ -57,6 +62,31 @@ function summarizeUsage(tag: string, usage: Usage | undefined): string {
   return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
 }
 
+// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
+// so a single shape covers Anthropic + Gemini (no per-provider probing).
+function summarizeSdkUsage(
+  tag: string,
+  usage: LanguageModelUsage | undefined,
+): string {
+  if (!usage) return `[cache] ${tag} no-usage`;
+  const input = usage.inputTokens ?? 0;
+  const output = usage.outputTokens ?? 0;
+  const read = usage.inputTokenDetails?.cacheReadTokens;
+  const write = usage.inputTokenDetails?.cacheWriteTokens;
+  if (typeof read === "number" || typeof write === "number") {
+    const hit = read ?? 0;
+    const create = write ?? 0;
+    const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a";
+    return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`;
+  }
+  return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
+}
+
+// text/vision default to the OpenAI-compatible wire protocol when unset.
+function resolveTextProtocol(config: ProviderConfig): ProviderProtocol {
+  return config.provider ?? "openai_compatible";
+}
+
 export async function chat(
   config: ProviderConfig,
   messages: ChatMessage[],
@@ -66,7 +96,63 @@ export async function chat(
     tag?: string;
   },
 ): Promise<string> {
-  const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
+  const protocol = resolveTextProtocol(config);
+  if (protocol === "anthropic" || protocol === "google") {
+    return chatViaAiSdk(config, messages, opts, protocol);
+  }
+  return chatOpenAiCompatible(config, messages, opts);
+}
+
+// Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent
+// (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream,
+// matching how it already tolerates loose JSON from every provider.
+async function chatViaAiSdk(
+  config: ProviderConfig,
+  messages: ChatMessage[],
+  opts: { temperature?: number; tag?: string } | undefined,
+  protocol: "anthropic" | "google",
+): Promise<string> {
+  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
+  const model =
+    protocol === "anthropic"
+      ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
+      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
+          config.model,
+        );
+
+  const system = messages.find((m) => m.role === "system")?.content;
+  const convo: ModelMessage[] = messages
+    .filter((m) => m.role !== "system")
+    .map((m) => ({
+      role: m.role as "user" | "assistant",
+      content: m.content,
+    }));
+
+  const { text, usage } = await generateText({
+    model,
+    system,
+    messages: convo,
+    temperature: opts?.temperature ?? 0.9,
+  });
+
+  console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage));
+
+  if (typeof text !== "string" || text.length === 0) {
+    throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`);
+  }
+  return text;
+}
+
+async function chatOpenAiCompatible(
+  config: ProviderConfig,
+  messages: ChatMessage[],
+  opts?: {
+    temperature?: number;
+    responseFormat?: "json_object" | "text";
+    tag?: string;
+  },
+): Promise<string> {
+  const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
   const body: Record<string, unknown> = {
     model: config.model,
     messages,
diff --git a/lib/ai-client/image.ts b/lib/ai-client/image.ts
index 218de21..bf11a0c 100644
--- a/lib/ai-client/image.ts
+++ b/lib/ai-client/image.ts
@@ -1,5 +1,9 @@
-import type { ProviderConfig } from "@infiplot/types";
+import { generateImage as generateImageSdk } from "ai";
+import { createOpenAI } from "@ai-sdk/openai";
+import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
 import { fetchWithRetry } from "./fetchWithRetry";
+import { normalizeBaseUrl } from "./normalizeUrl";
 
 // Runware uses its own task-array protocol (not OpenAI-compatible).
 // POST <baseUrl> with [{ taskType: "imageInference", ... }]; errors come
@@ -38,30 +42,52 @@ export type GenerateImageOptions = {
    * Reference image (UUID, public URL, or base64) for img2img. When set,
    * FLUX preserves the seed image's composition and applies `strength` to
    * deviate. NOTE: FLUX.2 [klein] 9B KV does NOT support seedImage — use
-   * `referenceImages` for visual continuity instead.
+   * `referenceImages` for visual continuity instead. Runware-only.
    */
   seedImage?: string;
   /**
    * Reference images (UUIDs, URLs, or base64) to condition generation on —
    * typically character portraits + the prior scene image. Runware caps at 4;
-   * we silently truncate beyond that.
+   * we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these
+   * map to `prompt.images` (the SDK accepts public URLs or data URLs).
    */
   referenceImages?: string[];
-  /** 0–1, FLUX needs ≥ 0.8 to actually have an effect. */
+  /** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
   strength?: number;
 };
 
 export type GenerateImageResult = {
-  /** Public CDN URL of the generated image (Runware-hosted). */
+  /**
+   * Image the client can render directly. A Runware CDN URL on the Runware
+   * path; a `data:<mime>;base64,...` URI on the AI SDK paths (OpenAI/Gemini
+   * return raw bytes, not a hosted URL).
+   */
   imageUrl: string;
-  /** Stable UUID for cheap re-reference in later `referenceImages`. */
+  /**
+   * Stable handle for cheap re-reference in later `referenceImages`. A real
+   * Runware UUID on the Runware path; a synthetic UUID on other paths (those
+   * re-reference via the URL/data-URL form instead).
+   */
   imageUuid: string;
 };
 
+// Image roles support more protocols than text/vision. When IMAGE_PROVIDER is
+// unset we keep the historical URL-based inference so existing deployments
+// (Runware, or an OpenAI-compatible gateway) behave exactly as before.
+function inferImageProtocol(config: ProviderConfig): ProviderProtocol {
+  const isOpenAiCompat =
+    !config.baseUrl.includes("runware.ai") || config.model === "image-2-vip";
+  return isOpenAiCompat ? "openai_compatible" : "runware";
+}
+
+function resolveImageProtocol(config: ProviderConfig): ProviderProtocol {
+  return config.provider ?? inferImageProtocol(config);
+}
+
 // ──────────────────────────────────────────────────────────────────────
 //  generateImage — text-to-image (default) or referenceImages-conditioned.
-//  Returns both the public URL (for client display + future references)
-//  and the UUID (cheapest reference form for subsequent calls).
+//  Returns both a renderable image URL and a re-reference handle (see
+//  GenerateImageResult). Dispatches on the resolved wire protocol.
 // ──────────────────────────────────────────────────────────────────────
 
 export async function generateImage(
@@ -69,51 +95,120 @@ export async function generateImage(
   prompt: string,
   options?: GenerateImageOptions,
 ): Promise<GenerateImageResult> {
-  const url = config.baseUrl.replace(/\/$/, "");
+  const protocol = resolveImageProtocol(config);
+  switch (protocol) {
+    case "openai":
+    case "google":
+      return generateImageViaAiSdk(config, prompt, options, protocol);
+    case "runware":
+      return generateImageRunware(config, prompt, options);
+    case "anthropic":
+      throw new Error(
+        'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".',
+      );
+    case "openai_compatible":
+    default:
+      return generateImageOpenAiCompatible(config, prompt);
+  }
+}
 
-  // 1. OpenAI-compatible route (GPTGod, DALL-E, etc.)
-  const isOpenAi = !url.includes("runware.ai") || config.model === "image-2-vip";
-  if (isOpenAi) {
-    const endpoint = url.endsWith("/images/generations") ? url : `${url}/images/generations`;
-    console.log(`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`);
-    
-    const res = await fetchWithRetry(endpoint, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${config.apiKey}`,
-      },
-      body: JSON.stringify({
-        model: config.model,
-        prompt: prompt,
-        n: 1,
-        size: "1792x1024", // Use horizontal size (16:9)
-      }),
-    });
+// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK.
+// Unlike the fetch path, this supports reference-image editing via
+// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the
+// client a data URI and synthesize a UUID; continuity references reuse the
+// data URI rather than a provider UUID.
+async function generateImageViaAiSdk(
+  config: ProviderConfig,
+  prompt: string,
+  options: GenerateImageOptions | undefined,
+  protocol: "openai" | "google",
+): Promise<GenerateImageResult> {
+  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
+  const imageModel =
+    protocol === "openai"
+      ? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model)
+      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image(
+          config.model,
+        );
 
-    const text = await res.text();
-    let json: any;
-    try {
-      json = JSON.parse(text);
-    } catch {
-      throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
-    }
+  const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
+  const promptArg =
+    refs.length > 0 ? { text: prompt, images: refs } : prompt;
 
-    if (json.error) {
-      throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
-    }
+  // OpenAI's image models take an explicit `size`; gpt-image's widest landscape
+  // option is 1536x1024. Gemini takes an `aspectRatio` instead.
+  const { image } = await generateImageSdk({
+    model: imageModel,
+    prompt: promptArg,
+    ...(protocol === "openai"
+      ? { size: "1536x1024" as `${number}x${number}` }
+      : { aspectRatio: "16:9" as `${number}:${number}` }),
+  });
 
-    const data = json.data?.[0];
-    const imageUrl = data?.url;
-    if (!imageUrl) {
-      throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
-    }
-    // Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
-    const imageUuid = crypto.randomUUID();
-    return { imageUrl, imageUuid };
+  return {
+    imageUrl: `data:${image.mediaType};base64,${image.base64}`,
+    imageUuid: crypto.randomUUID(),
+  };
+}
+
+// OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
+// text-to-image only — no reference images on this path; for editing/anchoring
+// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above.
+async function generateImageOpenAiCompatible(
+  config: ProviderConfig,
+  prompt: string,
+): Promise<GenerateImageResult> {
+  const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
+  const endpoint = `${base}/images/generations`;
+  console.log(
+    `[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`,
+  );
+
+  const res = await fetchWithRetry(endpoint, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify({
+      model: config.model,
+      prompt: prompt,
+      n: 1,
+      size: "1792x1024", // Use horizontal size (16:9)
+    }),
+  });
+
+  const text = await res.text();
+  let json: any;
+  try {
+    json = JSON.parse(text);
+  } catch {
+    throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
   }
 
-  // 2. Runware task-array route
+  if (json.error) {
+    throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
+  }
+
+  const data = json.data?.[0];
+  const imageUrl = data?.url;
+  if (!imageUrl) {
+    throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
+  }
+  // Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
+  const imageUuid = crypto.randomUUID();
+  return { imageUrl, imageUuid };
+}
+
+// Runware task-array route — self-implemented to preserve the UUID/URL closed
+// loop (the official @runware/ai-sdk-provider drops both).
+async function generateImageRunware(
+  config: ProviderConfig,
+  prompt: string,
+  options?: GenerateImageOptions,
+): Promise<GenerateImageResult> {
+  const url = normalizeBaseUrl(config.baseUrl, "runware");
+
   const task: Record<string, unknown> = {
     taskType: "imageInference",
     taskUUID: crypto.randomUUID(),
diff --git a/lib/ai-client/normalizeUrl.ts b/lib/ai-client/normalizeUrl.ts
new file mode 100644
index 0000000..10de5f3
--- /dev/null
+++ b/lib/ai-client/normalizeUrl.ts
@@ -0,0 +1,66 @@
+import type { ProviderProtocol } from "@infiplot/types";
+
+// ──────────────────────────────────────────────────────────────────────
+//  Base-URL normalization — tolerate whatever shape the user pastes.
+//
+//  The README never specified whether the base URL needs a `/v1` suffix,
+//  so users provide all of these for the same endpoint:
+//      https://api.deepseek.com
+//      https://api.deepseek.com/v1
+//      https://api.deepseek.com/v1/chat/completions
+//  We normalize to a canonical base the adapter can safely append its own
+//  endpoint path to. This also fixes the pre-existing double-suffix bug
+//  where a pasted `.../chat/completions` became `.../chat/completions/chat/completions`.
+//
+//  Strategy (bare-host-only version append):
+//    1. strip trailing slashes
+//    2. strip a trailing known endpoint suffix (chat/completions, messages, …)
+//    3. only when the URL the user gave is a BARE host (scheme://host[:port]
+//       with no path) do we append the protocol's default version segment.
+//       Any path the user wrote (/v1, /beta, /zen/go, /chat/completions, …) is
+//       treated as an explicit location and left intact — so we never turn
+//       `/beta` into `/beta/v1`, and a version-less `/chat/completions`
+//       endpoint is preserved.
+// ──────────────────────────────────────────────────────────────────────
+
+// Endpoint paths an adapter appends itself — stripped so we keep only the base.
+const ENDPOINT_SUFFIX =
+  /\/(chat\/completions|completions|responses|messages|images\/(generations|edits))\/?$/i;
+
+// Default version segment to append per protocol for a bare host.
+const DEFAULT_VERSION_SEGMENT: Record<ProviderProtocol, string | null> = {
+  openai_compatible: "v1",
+  openai: "v1",
+  anthropic: "v1",
+  google: "v1beta",
+  // Runware posts to the bare base URL with no version-pathed sub-resource,
+  // so never inject a segment for it.
+  runware: null,
+};
+
+// True when `raw` is just scheme://host[:port] with no meaningful path — the
+// only shape where we infer a default version segment. A lone "/" counts as
+// bare. Falls back to a scheme-anchored regex if the URL can't be parsed.
+function isBareHost(raw: string): boolean {
+  try {
+    const { pathname } = new URL(raw);
+    return pathname === "" || pathname === "/";
+  } catch {
+    return !/^[a-z][a-z0-9+.-]*:\/\/[^/]+\/.+/i.test(raw);
+  }
+}
+
+export function normalizeBaseUrl(
+  raw: string,
+  protocol: ProviderProtocol,
+): string {
+  const trimmed = raw.trim();
+  let u = trimmed.replace(/\/+$/, "");
+  u = u.replace(ENDPOINT_SUFFIX, "").replace(/\/+$/, "");
+
+  const seg = DEFAULT_VERSION_SEGMENT[protocol];
+  if (seg && isBareHost(trimmed)) {
+    u = `${u}/${seg}`;
+  }
+  return u;
+}
diff --git a/lib/ai-client/vision.ts b/lib/ai-client/vision.ts
index ade15d6..b43429a 100644
--- a/lib/ai-client/vision.ts
+++ b/lib/ai-client/vision.ts
@@ -1,5 +1,12 @@
-import type { ProviderConfig } from "@infiplot/types";
+import { generateText } from "ai";
+import type { ModelMessage } from "ai";
+import { createAnthropic } from "@ai-sdk/anthropic";
+import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
 import { fetchWithRetry } from "./fetchWithRetry";
+import { normalizeBaseUrl } from "./normalizeUrl";
+
+const VISION_TIMEOUT_MS = 60_000;
 
 export async function interpretClick(
   config: ProviderConfig,
@@ -16,6 +23,11 @@ export async function interpretClick(
   );
 }
 
+// text/vision default to the OpenAI-compatible wire protocol when unset.
+function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
+  return config.provider ?? "openai_compatible";
+}
+
 /**
  * General single-image vision call. Accepts a complete data URL (preserves
  * the source mime type, e.g. webp/jpeg) and lets the caller opt out of
@@ -27,7 +39,65 @@ export async function analyzeImageDataUrl(
   prompt: string,
   opts: { responseFormat?: "json_object" | "text" } = {},
 ): Promise<string> {
-  const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
+  const protocol = resolveVisionProtocol(config);
+  if (protocol === "anthropic" || protocol === "google") {
+    return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
+  }
+  return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
+}
+
+// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
+// the full data URL directly; the SDK decodes it. response_format is not sent
+// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
+async function analyzeViaAiSdk(
+  config: ProviderConfig,
+  imageDataUrl: string,
+  prompt: string,
+  protocol: "anthropic" | "google",
+): Promise<string> {
+  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
+  const model =
+    protocol === "anthropic"
+      ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
+      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
+          config.model,
+        );
+
+  const messages: ModelMessage[] = [
+    {
+      role: "user",
+      content: [
+        { type: "text", text: prompt },
+        { type: "image", image: imageDataUrl },
+      ],
+    },
+  ];
+
+  const timeoutCtrl = new AbortController();
+  const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
+  try {
+    const { text } = await generateText({
+      model,
+      messages,
+      temperature: 0.2,
+      abortSignal: timeoutCtrl.signal,
+    });
+    if (typeof text !== "string" || text.length === 0) {
+      throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`);
+    }
+    return text;
+  } finally {
+    clearTimeout(timeoutId);
+  }
+}
+
+async function analyzeOpenAiCompatible(
+  config: ProviderConfig,
+  imageDataUrl: string,
+  prompt: string,
+  opts: { responseFormat?: "json_object" | "text" } = {},
+): Promise<string> {
+  const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
 
   const body: Record<string, unknown> = {
     model: config.model,
@@ -47,7 +117,7 @@ export async function analyzeImageDataUrl(
   }
 
   const timeoutCtrl = new AbortController();
-  const timeoutId = setTimeout(() => timeoutCtrl.abort(), 60_000);
+  const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
 
   let res: Response;
   try {
diff --git a/lib/config.ts b/lib/config.ts
index 576199b..10def17 100644
--- a/lib/config.ts
+++ b/lib/config.ts
@@ -1,4 +1,16 @@
-import type { EngineConfig, TtsConfig } from "@infiplot/types";
+import type {
+  EngineConfig,
+  ProviderProtocol,
+  TtsConfig,
+} from "@infiplot/types";
+
+const VALID_PROTOCOLS = [
+  "openai_compatible",
+  "anthropic",
+  "google",
+  "openai",
+  "runware",
+] as const;
 
 function readVar(name: string): string {
   const v = process.env[name];
@@ -11,6 +23,21 @@ function readOptionalVar(name: string): string | undefined {
   return v && v.length > 0 ? v : undefined;
 }
 
+// Optional *_PROVIDER selector. Unset → undefined, and each ai-client adapter
+// applies its own default (text/vision → openai_compatible; image → inferred
+// from the base URL). Validated eagerly so a typo fails fast at boot rather
+// than mid-request.
+function readProvider(name: string): ProviderProtocol | undefined {
+  const v = readOptionalVar(name)?.trim().toLowerCase();
+  if (!v) return undefined;
+  if ((VALID_PROTOCOLS as readonly string[]).includes(v)) {
+    return v as ProviderProtocol;
+  }
+  throw new Error(
+    `Invalid ${name}: "${v}". Must be one of: ${VALID_PROTOCOLS.join(", ")}`,
+  );
+}
+
 function loadTtsConfig(): TtsConfig | undefined {
   const baseUrl = readOptionalVar("TTS_BASE_URL");
   const apiKey = readOptionalVar("TTS_API_KEY");
@@ -28,16 +55,19 @@ export function loadEngineConfig(headers?: Headers): EngineConfig {
       baseUrl: readVar("TEXT_BASE_URL"),
       apiKey: readVar("TEXT_API_KEY"),
       model: readVar("TEXT_MODEL"),
+      provider: readProvider("TEXT_PROVIDER"),
     },
     image: {
       baseUrl: readVar("IMAGE_BASE_URL"),
       apiKey: readVar("IMAGE_API_KEY"),
       model: readVar("IMAGE_MODEL"),
+      provider: readProvider("IMAGE_PROVIDER"),
     },
     vision: {
       baseUrl: readVar("VISION_BASE_URL"),
       apiKey: readVar("VISION_API_KEY"),
       model: readVar("VISION_MODEL"),
+      provider: readProvider("VISION_PROVIDER"),
     },
     tts: loadTtsConfig(),
     mockImage: readOptionalVar("MOCK_IMAGE") === "true",
diff --git a/lib/types/index.ts b/lib/types/index.ts
index c5e6a35..43b3859 100644
--- a/lib/types/index.ts
+++ b/lib/types/index.ts
@@ -268,10 +268,41 @@ export type VisionClassify = "insert-beat" | "change-scene";
 //  Provider config
 // ──────────────────────────────────────────────────────────────────────
 
+/**
+ * Wire protocol used to talk to a model provider. Which values are valid
+ * depends on the model role — each ai-client adapter accepts its own subset
+ * and falls back to a sensible default for anything else:
+ *
+ *   openai_compatible  text / vision / image  — OpenAI Chat Completions +
+ *                      `/images/generations` (self-implemented fetch; the
+ *                      default for text/vision when unset)
+ *   anthropic          text / vision          — native Anthropic Messages (AI SDK)
+ *   google             text / vision / image  — native Gemini (AI SDK); image
+ *                      uses the Nano Banana family
+ *   openai             image only             — OpenAI gpt-image via AI SDK,
+ *                      unlocks reference-image editing (for text/vision use
+ *                      openai_compatible, which already speaks OpenAI's format)
+ *   runware            image only             — Runware task-array protocol
+ *                      (self-implemented; the default for runware.ai URLs)
+ */
+export type ProviderProtocol =
+  | "openai_compatible"
+  | "anthropic"
+  | "google"
+  | "openai"
+  | "runware";
+
 export type ProviderConfig = {
   baseUrl: string;
   apiKey: string;
   model: string;
+  /**
+   * Wire protocol. When unset, callers apply a role-specific default:
+   * text/vision → "openai_compatible"; image → inferred from baseUrl
+   * (runware.ai → "runware", otherwise "openai_compatible") so existing
+   * deployments keep working without setting *_PROVIDER.
+   */
+  provider?: ProviderProtocol;
 };
 
 export type TtsConfig = {
diff --git a/package.json b/package.json
index c5d3a40..f02121b 100644
--- a/package.json
+++ b/package.json
@@ -20,6 +20,10 @@
     "deploy:cf": "opennextjs-cloudflare deploy"
   },
   "dependencies": {
+    "@ai-sdk/anthropic": "^3.0.81",
+    "@ai-sdk/google": "^3.0.80",
+    "@ai-sdk/openai": "^3.0.67",
+    "ai": "^6.0.196",
     "jsonrepair": "^3.14.0",
     "next": "^16.0.0",
     "react": "^19.0.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index ce3df06..e70280e 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -8,12 +8,24 @@ importers:
 
   .:
     dependencies:
+      '@ai-sdk/anthropic':
+        specifier: ^3.0.81
+        version: 3.0.81(zod@4.4.3)
+      '@ai-sdk/google':
+        specifier: ^3.0.80
+        version: 3.0.80(zod@4.4.3)
+      '@ai-sdk/openai':
+        specifier: ^3.0.67
+        version: 3.0.67(zod@4.4.3)
+      ai:
+        specifier: ^6.0.196
+        version: 6.0.196(zod@4.4.3)
       jsonrepair:
         specifier: ^3.14.0
         version: 3.14.0
       next:
         specifier: ^16.0.0
-        version: 16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        version: 16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       react:
         specifier: ^19.0.0
         version: 19.2.7
@@ -23,7 +35,7 @@ importers:
     devDependencies:
       '@opennextjs/cloudflare':
         specifier: ^1.19.11
-        version: 1.19.11(next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(wrangler@4.97.0)
+        version: 1.19.11(next@16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(wrangler@4.97.0)
       '@types/node':
         specifier: ^22.9.0
         version: 22.19.19
@@ -54,6 +66,40 @@ importers:
 
 packages:
 
+  '@ai-sdk/anthropic@3.0.81':
+    resolution: {integrity: sha512-B1JDd9Ugq9R5AgIaW3674lhGCMMYJcPUxnrZh8fzbGojgg4QvHFRv6eZahGQAUsmGHbcf74G9bdSBDLWQGY2GA==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
+  '@ai-sdk/gateway@3.0.124':
+    resolution: {integrity: sha512-h8CrmbSG+8X0C+M/E1M4oiDHYevqwbzAPN+uLRHS0eJaatF2MZ+juNtOHXNOjk7Bsk9mD2RjYMjJO9dFkb9I7Q==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
+  '@ai-sdk/google@3.0.80':
+    resolution: {integrity: sha512-5ORbm/yFUPO0MEvZsxBMN0cdKw2+lwU/wVn5KN3KF8Dmk1LughuDuUohMh/7iU/XFTiyB0OvmTW/tdV/J7O9zg==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
+  '@ai-sdk/openai@3.0.67':
+    resolution: {integrity: sha512-oAiGC9eWG7IgtdsdS74bOCnAAHarAfTJhWN9x5INwnWPekL802AvF+0I5DvLzIF1MIRmNw4N8mPSL/GUVbX9Mw==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
+  '@ai-sdk/provider-utils@4.0.27':
+    resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
+  '@ai-sdk/provider@3.0.10':
+    resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==}
+    engines: {node: '>=18'}
+
   '@alloc/quick-lru@5.2.0':
     resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==}
     engines: {node: '>=10'}
@@ -1036,6 +1082,10 @@ packages:
       next: '>=15.5.18 <16 || >=16.2.6'
       wrangler: ^4.86.0
 
+  '@opentelemetry/api@1.9.1':
+    resolution: {integrity: sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==}
+    engines: {node: '>=8.0.0'}
+
   '@poppinss/colors@4.1.6':
     resolution: {integrity: sha512-H9xkIdFswbS8n1d6vmRd8+c10t2Qe+rZITbbDHHkQixH5+2x1FDGmi/0K+WgWiqQFKPSlIYB7jlH6Kpfn6Fleg==}
 
@@ -1204,6 +1254,9 @@ packages:
   '@speed-highlight/core@1.2.15':
     resolution: {integrity: sha512-BMq1K3DsElxDWawkX6eLg9+CKJrTVGCBAWVuHXVUV2u0s2711qiChLSId6ikYPfxhdYocLNt3wWwSvDiTvFabw==}
 
+  '@standard-schema/spec@1.1.0':
+    resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
+
   '@swc/helpers@0.5.15':
     resolution: {integrity: sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==}
 
@@ -1227,6 +1280,10 @@ packages:
   '@types/react@19.2.16':
     resolution: {integrity: sha512-esJiCAnl0kfpNdE69f3So4WJUXy95dLZydX0KwK46riIHDzHM7O9Vtf9xCHW0PXIqvgqNrswl522kA/5yx+F4w==}
 
+  '@vercel/oidc@3.2.0':
+    resolution: {integrity: sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug==}
+    engines: {node: '>= 20'}
+
   abort-controller@3.0.0:
     resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
     engines: {node: '>=6.5'}
@@ -1244,6 +1301,12 @@ packages:
     resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
     engines: {node: '>= 8.0.0'}
 
+  ai@6.0.196:
+    resolution: {integrity: sha512-2T45UeqKL4a11KQ14I5i1YYHOvCFrMF478E1k6PVjlQSGUvXSv4xrxIaQbUL4qgv91DADSbddwv3oR49pPAK3g==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4.1.8
+
   ansi-colors@4.1.3:
     resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==}
     engines: {node: '>=6'}
@@ -1549,6 +1612,10 @@ packages:
     resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
     engines: {node: '>=6'}
 
+  eventsource-parser@3.1.0:
+    resolution: {integrity: sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==}
+    engines: {node: '>=18.0.0'}
+
   execa@5.1.1:
     resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==}
     engines: {node: '>=10'}
@@ -1754,6 +1821,9 @@ packages:
     resolution: {integrity: sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==}
     hasBin: true
 
+  json-schema@0.4.0:
+    resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
+
   jsonrepair@3.14.0:
     resolution: {integrity: sha512-tWPGKMZf/8UPim+fcW2EfcQ/d/7aKUrP6IECz9G3Tu6Q5dX0orSleqJ9z6sSw7qrQkjF8/Edo4DvsWBZ8H+HNg==}
     hasBin: true
@@ -2384,8 +2454,47 @@ packages:
   youch@4.1.0-beta.10:
     resolution: {integrity: sha512-rLfVLB4FgQneDr0dv1oddCVZmKjcJ6yX6mS4pU82Mq/Dt9a3cLZQ62pDBL4AUO+uVrCvtWz3ZFUL2HFAFJ/BXQ==}
 
+  zod@4.4.3:
+    resolution: {integrity: sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==}
+
 snapshots:
 
+  '@ai-sdk/anthropic@3.0.81(zod@4.4.3)':
+    dependencies:
+      '@ai-sdk/provider': 3.0.10
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3)
+      zod: 4.4.3
+
+  '@ai-sdk/gateway@3.0.124(zod@4.4.3)':
+    dependencies:
+      '@ai-sdk/provider': 3.0.10
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3)
+      '@vercel/oidc': 3.2.0
+      zod: 4.4.3
+
+  '@ai-sdk/google@3.0.80(zod@4.4.3)':
+    dependencies:
+      '@ai-sdk/provider': 3.0.10
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3)
+      zod: 4.4.3
+
+  '@ai-sdk/openai@3.0.67(zod@4.4.3)':
+    dependencies:
+      '@ai-sdk/provider': 3.0.10
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3)
+      zod: 4.4.3
+
+  '@ai-sdk/provider-utils@4.0.27(zod@4.4.3)':
+    dependencies:
+      '@ai-sdk/provider': 3.0.10
+      '@standard-schema/spec': 1.1.0
+      eventsource-parser: 3.1.0
+      zod: 4.4.3
+
+  '@ai-sdk/provider@3.0.10':
+    dependencies:
+      json-schema: 0.4.0
+
   '@alloc/quick-lru@5.2.0': {}
 
   '@ast-grep/napi-darwin-arm64@0.40.5':
@@ -3446,7 +3555,7 @@ snapshots:
       '@nodelib/fs.scandir': 2.1.5
       fastq: 1.20.1
 
-  '@opennextjs/aws@4.0.2(next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7))':
+  '@opennextjs/aws@4.0.2(next@16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))':
     dependencies:
       '@ast-grep/napi': 0.40.5
       '@aws-sdk/client-cloudfront': 3.984.0
@@ -3462,24 +3571,24 @@ snapshots:
       cookie: 1.1.1
       esbuild: 0.25.4
       express: 5.2.1
-      next: 16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      next: 16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       path-to-regexp: 6.3.0
       urlpattern-polyfill: 10.1.0
       yaml: 2.9.0
     transitivePeerDependencies:
       - supports-color
 
-  '@opennextjs/cloudflare@1.19.11(next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(wrangler@4.97.0)':
+  '@opennextjs/cloudflare@1.19.11(next@16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(wrangler@4.97.0)':
     dependencies:
       '@ast-grep/napi': 0.40.5
       '@dotenvx/dotenvx': 1.31.0
-      '@opennextjs/aws': 4.0.2(next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7))
+      '@opennextjs/aws': 4.0.2(next@16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))
       ci-info: 4.4.0
       cloudflare: 4.5.0
       comment-json: 4.6.2
       enquirer: 2.4.1
       glob: 12.0.0
-      next: 16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      next: 16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       ts-tqdm: 0.8.6
       wrangler: 4.97.0
       yargs: 18.0.0
@@ -3487,6 +3596,8 @@ snapshots:
       - encoding
       - supports-color
 
+  '@opentelemetry/api@1.9.1': {}
+
   '@poppinss/colors@4.1.6':
     dependencies:
       kleur: 4.1.5
@@ -3697,6 +3808,8 @@ snapshots:
 
   '@speed-highlight/core@1.2.15': {}
 
+  '@standard-schema/spec@1.1.0': {}
+
   '@swc/helpers@0.5.15':
     dependencies:
       tslib: 2.8.1
@@ -3724,6 +3837,8 @@ snapshots:
     dependencies:
       csstype: 3.2.3
 
+  '@vercel/oidc@3.2.0': {}
+
   abort-controller@3.0.0:
     dependencies:
       event-target-shim: 5.0.1
@@ -3739,6 +3854,14 @@ snapshots:
     dependencies:
       humanize-ms: 1.2.1
 
+  ai@6.0.196(zod@4.4.3):
+    dependencies:
+      '@ai-sdk/gateway': 3.0.124(zod@4.4.3)
+      '@ai-sdk/provider': 3.0.10
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3)
+      '@opentelemetry/api': 1.9.1
+      zod: 4.4.3
+
   ansi-colors@4.1.3: {}
 
   ansi-regex@5.0.1: {}
@@ -4052,6 +4175,8 @@ snapshots:
 
   event-target-shim@5.0.1: {}
 
+  eventsource-parser@3.1.0: {}
+
   execa@5.1.1:
     dependencies:
       cross-spawn: 7.0.6
@@ -4293,6 +4418,8 @@ snapshots:
 
   jiti@1.21.7: {}
 
+  json-schema@0.4.0: {}
+
   jsonrepair@3.14.0: {}
 
   kleur@4.1.5: {}
@@ -4376,7 +4503,7 @@ snapshots:
 
   negotiator@1.0.0: {}
 
-  next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7):
+  next@16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7):
     dependencies:
       '@next/env': 16.2.7
       '@swc/helpers': 0.5.15
@@ -4395,6 +4522,7 @@ snapshots:
       '@next/swc-linux-x64-musl': 16.2.7
       '@next/swc-win32-arm64-msvc': 16.2.7
       '@next/swc-win32-x64-msvc': 16.2.7
+      '@opentelemetry/api': 1.9.1
       sharp: 0.34.5
     transitivePeerDependencies:
       - '@babel/core'
@@ -4928,3 +5056,5 @@ snapshots:
       '@speed-highlight/core': 1.2.15
       cookie: 1.1.1
       youch-core: 0.3.3
+
+  zod@4.4.3: {}

From 865bf322e99487fdb5d189b314a173c5f80176a6 Mon Sep 17 00:00:00 2001
From: yuanzonghao <yuanzonghao123@gmail.com>
Date: Thu, 4 Jun 2026 16:47:56 +0800
Subject: [PATCH 2/2] fix(ai-client): parse Runware host by hostname; doc nits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- inferImageProtocol: match runware.ai by parsed hostname (exact match or
  subdomain) instead of a bare substring, so notrunware.ai /
  runware.ai.evil.com no longer misroute to the Runware protocol
- README: document the image-2-vip → OpenAI-compatible exception; correct the
  Imagen wording (deprecated, EOL 2026-06-24 — not yet discontinued)

Addresses Copilot review on #30.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 README.md              |  4 ++--
 lib/ai-client/image.ts | 14 +++++++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 09c2220..edafdea 100644
--- a/README.md
+++ b/README.md
@@ -136,13 +136,13 @@ InfiPlot 会与四类模型供应商通信。**文本（Text）和视觉（Visio
 | Vision · 点击解读  | `VISION_BASE_URL` `VISION_API_KEY` `VISION_MODEL`  | ✅ | Google 的 `gemini-3.5-flash` |
 | TTS · 角色配音 | `TTS_BASE_URL` `TTS_API_KEY` `TTS_SPEECH_MODEL` | 可选 —— 留空则静音运行 | 小米 MiMo 的 `mimo-v2.5-tts` |
 
-> **可选 · 指定接口协议**：每类模型都可加一个 `*_PROVIDER` 变量（`TEXT_PROVIDER` / `VISION_PROVIDER` / `IMAGE_PROVIDER`）显式选择接口协议。**不设则保持向后兼容**——文本/视觉默认走 OpenAI 兼容接口，图像按 `*_BASE_URL` 自动判断（`runware.ai` → Runware，否则 OpenAI 兼容）。
+> **可选 · 指定接口协议**：每类模型都可加一个 `*_PROVIDER` 变量（`TEXT_PROVIDER` / `VISION_PROVIDER` / `IMAGE_PROVIDER`）显式选择接口协议。**不设则保持向后兼容**——文本/视觉默认走 OpenAI 兼容接口，图像按 `*_BASE_URL` 自动判断（`runware.ai` → Runware，否则 OpenAI 兼容；个别在 `runware.ai` 上以 OpenAI 协议提供的模型——如 `image-2-vip`——会按 OpenAI 兼容处理，需要时用 `IMAGE_PROVIDER` 显式覆盖即可）。
 >
 > | 取值 | 适用 | 说明 |
 > |---|---|---|
 > | `openai_compatible`（默认） | Text · Vision · Image | OpenAI Chat Completions / `/images/generations` |
 > | `anthropic` | Text · Vision | 原生 Anthropic Messages 接口 |
-> | `google` | Text · Vision · Image | 原生 Gemini；图像用 Nano Banana 系（如 `gemini-2.5-flash-image`，**勿用已停服的 Imagen**） |
+> | `google` | Text · Vision · Image | 原生 Gemini；图像用 Nano Banana 系（如 `gemini-2.5-flash-image`，**勿用 Imagen（已废弃，2026-06-24 停服）**） |
 > | `openai` | Image | OpenAI `gpt-image`，支持参考图编辑 |
 > | `runware` | Image | Runware task-array 协议 |
 >
diff --git a/lib/ai-client/image.ts b/lib/ai-client/image.ts
index bf11a0c..f7c03f8 100644
--- a/lib/ai-client/image.ts
+++ b/lib/ai-client/image.ts
@@ -71,12 +71,24 @@ export type GenerateImageResult = {
   imageUuid: string;
 };
 
+// Match the Runware host by parsed hostname (exact match or subdomain), not a
+// bare substring — otherwise `notrunware.ai` or `api.runware.ai.evil.com` would
+// misroute to the Runware protocol. Falls back to false on an unparseable URL.
+function isRunwareHost(baseUrl: string): boolean {
+  try {
+    const host = new URL(baseUrl).hostname.toLowerCase();
+    return host === "runware.ai" || host.endsWith(".runware.ai");
+  } catch {
+    return false;
+  }
+}
+
 // Image roles support more protocols than text/vision. When IMAGE_PROVIDER is
 // unset we keep the historical URL-based inference so existing deployments
 // (Runware, or an OpenAI-compatible gateway) behave exactly as before.
 function inferImageProtocol(config: ProviderConfig): ProviderProtocol {
   const isOpenAiCompat =
-    !config.baseUrl.includes("runware.ai") || config.model === "image-2-vip";
+    !isRunwareHost(config.baseUrl) || config.model === "image-2-vip";
   return isOpenAiCompat ? "openai_compatible" : "runware";
 }