refactor(ai-client): unify OpenAI-compatible path to AI SDK generateText

Eliminate the dual code path (raw fetch vs AI SDK) for text and vision.
All providers now go through createLanguageModel() + generateText(),
removing chatOpenAiCompatible/analyzeOpenAiCompatible, the manual Usage
type, summarizeUsage, and responseFormat plumbing from 8 call sites.

Key fix: @ai-sdk/openai v3 defaults to the Responses API (/responses);
DeepSeek only supports Chat Completions, so we use .chat() explicitly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-06-07 00:31:36 +08:00
parent 04b869eed0
commit 57bc6556ab
8 changed files with 40 additions and 239 deletions
+17 -99
View File
@@ -2,8 +2,8 @@ import { generateText } from "ai";
import type { ModelMessage } from "ai";
import { createAnthropic } from "@ai-sdk/anthropic";
import { createGoogleGenerativeAI } from "@ai-sdk/google";
import { createOpenAI } from "@ai-sdk/openai";
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
import { fetchWithRetry } from "./fetchWithRetry";
import { normalizeBaseUrl } from "./normalizeUrl";
const VISION_TIMEOUT_MS = 60_000;
@@ -13,55 +13,39 @@ export async function interpretClick(
imageBase64: string,
prompt: string,
): Promise<string> {
// Wrap the raw base64 in a PNG data URL — the Canvas annotator on the
// client encodes as PNG. analyzeImageDataUrl handles the actual request.
return analyzeImageDataUrl(
config,
`data:image/png;base64,${imageBase64}`,
prompt,
{ responseFormat: "json_object" },
);
}
// text/vision default to the OpenAI-compatible wire protocol when unset.
function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
return config.provider ?? "openai_compatible";
}
/**
* General single-image vision call. Accepts a complete data URL (preserves
* the source mime type, e.g. webp/jpeg) and lets the caller opt out of
* `response_format: json_object` for free-form text responses.
*/
export async function analyzeImageDataUrl(
config: ProviderConfig,
imageDataUrl: string,
prompt: string,
opts: { responseFormat?: "json_object" | "text" } = {},
): Promise<string> {
const protocol = resolveVisionProtocol(config);
if (protocol === "anthropic" || protocol === "google") {
return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
}
return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
}
// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
// the full data URL directly; the SDK decodes it. response_format is not sent
// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
async function analyzeViaAiSdk(
config: ProviderConfig,
imageDataUrl: string,
prompt: string,
protocol: "anthropic" | "google",
): Promise<string> {
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
const model =
protocol === "anthropic"
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
config.model,
);
let model;
switch (protocol) {
case "anthropic":
model = createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
break;
case "google":
model = createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
break;
case "openai_compatible":
case "openai":
default:
model = createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
break;
}
const messages: ModelMessage[] = [
{
@@ -80,6 +64,7 @@ async function analyzeViaAiSdk(
model,
messages,
temperature: 0.2,
maxRetries: 0,
abortSignal: timeoutCtrl.signal,
});
if (typeof text !== "string" || text.length === 0) {
@@ -90,70 +75,3 @@ async function analyzeViaAiSdk(
clearTimeout(timeoutId);
}
}
async function analyzeOpenAiCompatible(
config: ProviderConfig,
imageDataUrl: string,
prompt: string,
opts: { responseFormat?: "json_object" | "text" } = {},
): Promise<string> {
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
const body: Record<string, unknown> = {
model: config.model,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image_url", image_url: { url: imageDataUrl } },
],
},
],
temperature: 0.2,
};
if (opts.responseFormat === "json_object") {
body.response_format = { type: "json_object" };
}
const timeoutCtrl = new AbortController();
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
let res: Response;
try {
res = await fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify(body),
signal: timeoutCtrl.signal,
retries: 0,
});
} finally {
clearTimeout(timeoutId);
}
const text = await res.text();
if (!res.ok) {
throw new Error(`Vision API error ${res.status}: ${text}`);
}
let json: { choices: { message: { content: string } }[] };
try {
json = JSON.parse(text);
} catch {
throw new Error(`Vision API returned invalid JSON: ${text.slice(0, 500)}`);
}
// Guard against empty choices array or missing message/content fields
const content = json.choices?.[0]?.message?.content;
if (typeof content !== "string") {
throw new Error(
`Vision API returned no content. Response: ${text.slice(0, 500)}`
);
}
return content;
}