feat(ai-client): multi-provider compat — native Anthropic/Google + URL tolerance

- TEXT/VISION: add native Anthropic & Google Gemini paths via Vercel AI SDK,
  selectable through TEXT_PROVIDER / VISION_PROVIDER (default openai_compatible)
- IMAGE: expand to openai (gpt-image) / google (Nano Banana) via AI SDK
  alongside the existing Runware task-array and OpenAI-compatible REST paths
- normalizeBaseUrl: tolerate URLs with/without /v1 (or /chat/completions);
  append the per-protocol version segment only for bare hosts
- config: readProvider() reads *_PROVIDER; types: ProviderProtocol + provider?
- deps: @ai-sdk/anthropic, @ai-sdk/google; docs in .env.example + README

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-06-04 15:51:53 +08:00
parent a4dc57a1b6
commit 83fd5717e7
10 changed files with 614 additions and 67 deletions
+142 -47
View File
@@ -1,5 +1,9 @@
import type { ProviderConfig } from "@infiplot/types";
import { generateImage as generateImageSdk } from "ai";
import { createOpenAI } from "@ai-sdk/openai";
import { createGoogleGenerativeAI } from "@ai-sdk/google";
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
import { fetchWithRetry } from "./fetchWithRetry";
import { normalizeBaseUrl } from "./normalizeUrl";
// Runware uses its own task-array protocol (not OpenAI-compatible).
// POST <baseUrl> with [{ taskType: "imageInference", ... }]; errors come
@@ -38,30 +42,52 @@ export type GenerateImageOptions = {
* Reference image (UUID, public URL, or base64) for img2img. When set,
* FLUX preserves the seed image's composition and applies `strength` to
* deviate. NOTE: FLUX.2 [klein] 9B KV does NOT support seedImage — use
* `referenceImages` for visual continuity instead.
* `referenceImages` for visual continuity instead. Runware-only.
*/
seedImage?: string;
/**
* Reference images (UUIDs, URLs, or base64) to condition generation on —
* typically character portraits + the prior scene image. Runware caps at 4;
* we silently truncate beyond that.
* we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these
* map to `prompt.images` (the SDK accepts public URLs or data URLs).
*/
referenceImages?: string[];
/** 01, FLUX needs ≥ 0.8 to actually have an effect. */
/** 01, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
strength?: number;
};
export type GenerateImageResult = {
/** Public CDN URL of the generated image (Runware-hosted). */
/**
* Image the client can render directly. A Runware CDN URL on the Runware
* path; a `data:<mime>;base64,...` URI on the AI SDK paths (OpenAI/Gemini
* return raw bytes, not a hosted URL).
*/
imageUrl: string;
/** Stable UUID for cheap re-reference in later `referenceImages`. */
/**
* Stable handle for cheap re-reference in later `referenceImages`. A real
* Runware UUID on the Runware path; a synthetic UUID on other paths (those
* re-reference via the URL/data-URL form instead).
*/
imageUuid: string;
};
// Image roles support more protocols than text/vision. When IMAGE_PROVIDER is
// unset we keep the historical URL-based inference so existing deployments
// (Runware, or an OpenAI-compatible gateway) behave exactly as before.
function inferImageProtocol(config: ProviderConfig): ProviderProtocol {
const isOpenAiCompat =
!config.baseUrl.includes("runware.ai") || config.model === "image-2-vip";
return isOpenAiCompat ? "openai_compatible" : "runware";
}
function resolveImageProtocol(config: ProviderConfig): ProviderProtocol {
return config.provider ?? inferImageProtocol(config);
}
// ──────────────────────────────────────────────────────────────────────
// generateImage — text-to-image (default) or referenceImages-conditioned.
// Returns both the public URL (for client display + future references)
// and the UUID (cheapest reference form for subsequent calls).
// Returns both a renderable image URL and a re-reference handle (see
// GenerateImageResult). Dispatches on the resolved wire protocol.
// ──────────────────────────────────────────────────────────────────────
export async function generateImage(
@@ -69,51 +95,120 @@ export async function generateImage(
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const url = config.baseUrl.replace(/\/$/, "");
const protocol = resolveImageProtocol(config);
switch (protocol) {
case "openai":
case "google":
return generateImageViaAiSdk(config, prompt, options, protocol);
case "runware":
return generateImageRunware(config, prompt, options);
case "anthropic":
throw new Error(
'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".',
);
case "openai_compatible":
default:
return generateImageOpenAiCompatible(config, prompt);
}
}
// 1. OpenAI-compatible route (GPTGod, DALL-E, etc.)
const isOpenAi = !url.includes("runware.ai") || config.model === "image-2-vip";
if (isOpenAi) {
const endpoint = url.endsWith("/images/generations") ? url : `${url}/images/generations`;
console.log(`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`);
const res = await fetchWithRetry(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
prompt: prompt,
n: 1,
size: "1792x1024", // Use horizontal size (16:9)
}),
});
// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK.
// Unlike the fetch path, this supports reference-image editing via
// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the
// client a data URI and synthesize a UUID; continuity references reuse the
// data URI rather than a provider UUID.
async function generateImageViaAiSdk(
config: ProviderConfig,
prompt: string,
options: GenerateImageOptions | undefined,
protocol: "openai" | "google",
): Promise<GenerateImageResult> {
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
const imageModel =
protocol === "openai"
? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model)
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image(
config.model,
);
const text = await res.text();
let json: any;
try {
json = JSON.parse(text);
} catch {
throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
}
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
const promptArg =
refs.length > 0 ? { text: prompt, images: refs } : prompt;
if (json.error) {
throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
}
// OpenAI's image models take an explicit `size`; gpt-image's widest landscape
// option is 1536x1024. Gemini takes an `aspectRatio` instead.
const { image } = await generateImageSdk({
model: imageModel,
prompt: promptArg,
...(protocol === "openai"
? { size: "1536x1024" as `${number}x${number}` }
: { aspectRatio: "16:9" as `${number}:${number}` }),
});
const data = json.data?.[0];
const imageUrl = data?.url;
if (!imageUrl) {
throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
}
// Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
const imageUuid = crypto.randomUUID();
return { imageUrl, imageUuid };
return {
imageUrl: `data:${image.mediaType};base64,${image.base64}`,
imageUuid: crypto.randomUUID(),
};
}
// OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
// text-to-image only — no reference images on this path; for editing/anchoring
// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above.
async function generateImageOpenAiCompatible(
config: ProviderConfig,
prompt: string,
): Promise<GenerateImageResult> {
const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
const endpoint = `${base}/images/generations`;
console.log(
`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`,
);
const res = await fetchWithRetry(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
prompt: prompt,
n: 1,
size: "1792x1024", // Use horizontal size (16:9)
}),
});
const text = await res.text();
let json: any;
try {
json = JSON.parse(text);
} catch {
throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
}
// 2. Runware task-array route
if (json.error) {
throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
}
const data = json.data?.[0];
const imageUrl = data?.url;
if (!imageUrl) {
throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
}
// Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
const imageUuid = crypto.randomUUID();
return { imageUrl, imageUuid };
}
// Runware task-array route — self-implemented to preserve the UUID/URL closed
// loop (the official @runware/ai-sdk-provider drops both).
async function generateImageRunware(
config: ProviderConfig,
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const url = normalizeBaseUrl(config.baseUrl, "runware");
const task: Record<string, unknown> = {
taskType: "imageInference",
taskUUID: crypto.randomUUID(),