refactor(ai-client): unify OpenAI-compatible path to AI SDK generateText

Eliminate the dual code path (raw fetch vs AI SDK) for text and vision. All providers now go through createLanguageModel() + generateText(), removing chatOpenAiCompatible/analyzeOpenAiCompatible, the manual Usage type, summarizeUsage, and responseFormat plumbing from 8 call sites. Key fix: @ai-sdk/openai v3 defaults to the Responses API (/responses); DeepSeek only supports Chat Completions, so we use .chat() explicitly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-07 00:31:36 +08:00
parent 04b869eed0
commit 57bc6556ab
8 changed files with 40 additions and 239 deletions
@@ -2,8 +2,8 @@ import { generateText } from "ai";
 import type { ModelMessage } from "ai";
 import { createAnthropic } from "@ai-sdk/anthropic";
 import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import { createOpenAI } from "@ai-sdk/openai";
 import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
-import { fetchWithRetry } from "./fetchWithRetry";
 import { normalizeBaseUrl } from "./normalizeUrl";

 const VISION_TIMEOUT_MS = 60_000;
@@ -13,55 +13,39 @@ export async function interpretClick(
  imageBase64: string,
  prompt: string,
 ): Promise<string> {
-  // Wrap the raw base64 in a PNG data URL — the Canvas annotator on the
-  // client encodes as PNG. analyzeImageDataUrl handles the actual request.
  return analyzeImageDataUrl(
    config,
    `data:image/png;base64,${imageBase64}`,
    prompt,
-    { responseFormat: "json_object" },
  );
 }

-// text/vision default to the OpenAI-compatible wire protocol when unset.
 function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
  return config.provider ?? "openai_compatible";
 }

-/**
- * General single-image vision call. Accepts a complete data URL (preserves
- * the source mime type, e.g. webp/jpeg) and lets the caller opt out of
- * `response_format: json_object` for free-form text responses.
- */
 export async function analyzeImageDataUrl(
  config: ProviderConfig,
  imageDataUrl: string,
  prompt: string,
-  opts: { responseFormat?: "json_object" | "text" } = {},
 ): Promise<string> {
  const protocol = resolveVisionProtocol(config);
-  if (protocol === "anthropic" || protocol === "google") {
-    return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
-  }
-  return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
-}
-
-// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
-// the full data URL directly; the SDK decodes it. response_format is not sent
-// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
-async function analyzeViaAiSdk(
-  config: ProviderConfig,
-  imageDataUrl: string,
-  prompt: string,
-  protocol: "anthropic" | "google",
-): Promise<string> {
  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
-  const model =
-    protocol === "anthropic"
-      ? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
-      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
-          config.model,
-        );
+
+  let model;
+  switch (protocol) {
+    case "anthropic":
+      model = createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
+      break;
+    case "google":
+      model = createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
+      break;
+    case "openai_compatible":
+    case "openai":
+    default:
+      model = createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
+      break;
+  }

  const messages: ModelMessage[] = [
    {
@@ -80,6 +64,7 @@ async function analyzeViaAiSdk(
      model,
      messages,
      temperature: 0.2,
+      maxRetries: 0,
      abortSignal: timeoutCtrl.signal,
    });
    if (typeof text !== "string" || text.length === 0) {
@@ -90,70 +75,3 @@ async function analyzeViaAiSdk(
    clearTimeout(timeoutId);
  }
 }
-
-async function analyzeOpenAiCompatible(
-  config: ProviderConfig,
-  imageDataUrl: string,
-  prompt: string,
-  opts: { responseFormat?: "json_object" | "text" } = {},
-): Promise<string> {
-  const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
-
-  const body: Record<string, unknown> = {
-    model: config.model,
-    messages: [
-      {
-        role: "user",
-        content: [
-          { type: "text", text: prompt },
-          { type: "image_url", image_url: { url: imageDataUrl } },
-        ],
-      },
-    ],
-    temperature: 0.2,
-  };
-  if (opts.responseFormat === "json_object") {
-    body.response_format = { type: "json_object" };
-  }
-
-  const timeoutCtrl = new AbortController();
-  const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
-
-  let res: Response;
-  try {
-    res = await fetchWithRetry(url, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${config.apiKey}`,
-      },
-      body: JSON.stringify(body),
-      signal: timeoutCtrl.signal,
-      retries: 0,
-    });
-  } finally {
-    clearTimeout(timeoutId);
-  }
-
-  const text = await res.text();
-  if (!res.ok) {
-    throw new Error(`Vision API error ${res.status}: ${text}`);
-  }
-
-  let json: { choices: { message: { content: string } }[] };
-  try {
-    json = JSON.parse(text);
-  } catch {
-    throw new Error(`Vision API returned invalid JSON: ${text.slice(0, 500)}`);
-  }
-
-  // Guard against empty choices array or missing message/content fields
-  const content = json.choices?.[0]?.message?.content;
-  if (typeof content !== "string") {
-    throw new Error(
-      `Vision API returned no content. Response: ${text.slice(0, 500)}`
-    );
-  }
-
-  return content;
-}