feat(ai-client): multi-provider compat — native Anthropic/Google + URL tolerance

- TEXT/VISION: add native Anthropic & Google Gemini paths via Vercel AI SDK,
  selectable through TEXT_PROVIDER / VISION_PROVIDER (default openai_compatible)
- IMAGE: expand to openai (gpt-image) / google (Nano Banana) via AI SDK
  alongside the existing Runware task-array and OpenAI-compatible REST paths
- normalizeBaseUrl: tolerate URLs with/without /v1 (or /chat/completions);
  append the per-protocol version segment only for bare hosts
- config: readProvider() reads *_PROVIDER; types: ProviderProtocol + provider?
- deps: @ai-sdk/anthropic, @ai-sdk/google; docs in .env.example + README

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-06-04 15:51:53 +08:00
parent a4dc57a1b6
commit 83fd5717e7
10 changed files with 614 additions and 67 deletions
+73 -3
View File
@@ -1,5 +1,12 @@
import type { ProviderConfig } from "@infiplot/types";
import { generateText } from "ai";
import type { ModelMessage } from "ai";
import { createAnthropic } from "@ai-sdk/anthropic";
import { createGoogleGenerativeAI } from "@ai-sdk/google";
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
import { fetchWithRetry } from "./fetchWithRetry";
import { normalizeBaseUrl } from "./normalizeUrl";
const VISION_TIMEOUT_MS = 60_000;
export async function interpretClick(
config: ProviderConfig,
@@ -16,6 +23,11 @@ export async function interpretClick(
);
}
// text/vision default to the OpenAI-compatible wire protocol when unset.
function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
return config.provider ?? "openai_compatible";
}
/**
* General single-image vision call. Accepts a complete data URL (preserves
* the source mime type, e.g. webp/jpeg) and lets the caller opt out of
@@ -27,7 +39,65 @@ export async function analyzeImageDataUrl(
prompt: string,
opts: { responseFormat?: "json_object" | "text" } = {},
): Promise<string> {
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
const protocol = resolveVisionProtocol(config);
if (protocol === "anthropic" || protocol === "google") {
return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
}
return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
}
// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
// the full data URL directly; the SDK decodes it. response_format is not sent
// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
async function analyzeViaAiSdk(
config: ProviderConfig,
imageDataUrl: string,
prompt: string,
protocol: "anthropic" | "google",
): Promise<string> {
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
const model =
protocol === "anthropic"
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
config.model,
);
const messages: ModelMessage[] = [
{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image", image: imageDataUrl },
],
},
];
const timeoutCtrl = new AbortController();
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
try {
const { text } = await generateText({
model,
messages,
temperature: 0.2,
abortSignal: timeoutCtrl.signal,
});
if (typeof text !== "string" || text.length === 0) {
throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`);
}
return text;
} finally {
clearTimeout(timeoutId);
}
}
async function analyzeOpenAiCompatible(
config: ProviderConfig,
imageDataUrl: string,
prompt: string,
opts: { responseFormat?: "json_object" | "text" } = {},
): Promise<string> {
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
const body: Record<string, unknown> = {
model: config.model,
@@ -47,7 +117,7 @@ export async function analyzeImageDataUrl(
}
const timeoutCtrl = new AbortController();
const timeoutId = setTimeout(() => timeoutCtrl.abort(), 60_000);
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
let res: Response;
try {