Files
infiplot-web/lib/ai-client/image.ts
T
yuanzonghao e68e7e1690 feat(engine): add opt-in image timeout and scene-paint hedging
IMAGE_TIMEOUT_MS sets a per-attempt hard deadline (AbortSignal.timeout);
IMAGE_HEDGE_MS races a second identical scene-paint request when the
first is still pending past the threshold. Both default to OFF when
unset, preserving historical behavior for self-hosted deploys.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-13 11:21:47 +08:00

381 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import OpenAI, { toFile, type Uploadable } from "openai";
import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types";
import { fetchWithRetry } from "./fetchWithRetry";
import { normalizeBaseUrl } from "./normalizeUrl";
// Runware uses its own task-array protocol (not OpenAI-compatible).
// POST <baseUrl> with [{ taskType: "imageInference", ... }]; errors come
// back as a 200 with `errors[]`, so we have to inspect the body either way.
//
// referenceImages accepts UUIDs, public URLs, or base64. UUID is cheapest
// in transport cost; URL is next; base64 last resort. The FLUX.2 [klein] 9B
// KV variant (runware:400@6) accelerates multi-reference inference ~2.5× via
// its KV cache for reference latents (cached only within one inference run,
// not persisted across calls — hence the need to keep stable UUIDs/URLs for
// later reuse).
//
// We request outputType=URL so Runware persists the image and returns a CDN
// link the client can render directly. The same response also carries the
// image UUID, so we never need a separate uploadImage round-trip to anchor
// future referenceImages.
const DEFAULT_IMG2IMG_STRENGTH = 0.85;
const MAX_REFERENCE_IMAGES = 4;
type RunwareImageResult = {
imageURL?: string;
imageUUID?: string;
};
type RunwareError = {
code?: string;
message?: string;
parameter?: string;
};
type RunwareResponse = {
data?: RunwareImageResult[];
errors?: RunwareError[];
};
export type GenerateImageOptions = {
/**
* Reference image (UUID, public URL, or base64) for img2img. When set,
* FLUX preserves the seed image's composition and applies `strength` to
* deviate. NOTE: FLUX.2 [klein] 9B KV does NOT support seedImage — use
* `referenceImages` for visual continuity instead. Runware-only.
*/
seedImage?: string;
/**
* Reference images (UUIDs, URLs, or base64) to condition generation on —
* typically character portraits + the prior scene image. Runware caps at 4;
* we silently truncate beyond that. On the native OpenAI path these are
* fetched/decoded and sent to `images.edit`.
*/
referenceImages?: string[];
/** 01, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
strength?: number;
/**
* Output aspect, locked per session. "portrait" → 9:16 vertical for mobile;
* default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest
* supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792,
* native gpt-image 1024x1536.
*/
orientation?: Orientation;
/**
* Per-attempt hard deadline (ms). A timed-out attempt is retryable.
* Unset → no client-side timeout (historical behavior).
*/
timeoutMs?: number;
/** Retry-attempt override for this call (default 2). 0 = single attempt. */
retries?: number;
/** External cancellation, e.g. aborting the losing leg of a hedged race. */
signal?: AbortSignal;
};
export type GenerateImageResult = {
/**
* Image the client can render directly. A Runware CDN URL on the Runware
* path; a `data:<mime>;base64,...` URI on the native OpenAI path when GPT
* image models return raw bytes instead of a hosted URL.
*/
imageUrl: string;
/**
* Stable handle for cheap re-reference in later `referenceImages`. A real
* Runware UUID on the Runware path; a synthetic UUID on other paths (those
* re-reference via the URL/data-URL form instead).
*/
imageUuid: string;
};
// Match the Runware host by parsed hostname (exact match or subdomain), not a
// bare substring — otherwise `notrunware.ai` or `api.runware.ai.evil.com` would
// misroute to the Runware protocol. Falls back to false on an unparseable URL.
function isRunwareHost(baseUrl: string): boolean {
try {
const host = new URL(baseUrl).hostname.toLowerCase();
return host === "runware.ai" || host.endsWith(".runware.ai");
} catch {
return false;
}
}
// Image roles support more protocols than text/vision. When IMAGE_PROVIDER is
// unset we keep the historical URL-based inference so existing deployments
// (Runware, or an OpenAI-compatible gateway) behave exactly as before.
function inferImageProtocol(config: ProviderConfig): ProviderProtocol {
const isOpenAiCompat =
!isRunwareHost(config.baseUrl) || config.model === "image-2-vip";
return isOpenAiCompat ? "openai_compatible" : "runware";
}
function resolveImageProtocol(config: ProviderConfig): ProviderProtocol {
return config.provider ?? inferImageProtocol(config);
}
// ──────────────────────────────────────────────────────────────────────
// generateImage — text-to-image (default) or referenceImages-conditioned.
// Returns both a renderable image URL and a re-reference handle (see
// GenerateImageResult). Dispatches on the resolved wire protocol.
// ──────────────────────────────────────────────────────────────────────
export async function generateImage(
config: ProviderConfig,
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const protocol = resolveImageProtocol(config);
switch (protocol) {
case "openai":
return generateImageOpenAi(config, prompt, options);
case "runware":
return generateImageRunware(config, prompt, options);
case "openai_compatible":
default:
return generateImageOpenAiCompatible(config, prompt, options);
}
}
// Native OpenAI (gpt-image) via the official OpenAI SDK. Unlike the compatible
// fetch path, this supports reference-image editing through `images.edit`.
// GPT image models return raw bytes, so we hand the client a data URI and
// synthesize a UUID; continuity references reuse the data URI rather than a
// provider UUID.
async function generateImageOpenAi(
config: ProviderConfig,
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const client = new OpenAI({
apiKey: config.apiKey,
baseURL: normalizeBaseUrl(config.baseUrl, "openai"),
maxRetries: 2,
dangerouslyAllowBrowser: true,
});
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
const portrait = options?.orientation === "portrait";
const size = portrait ? "1024x1536" : "1536x1024";
const requestOptions = {
signal: options?.signal,
timeout: options?.timeoutMs,
...(options?.retries !== undefined ? { maxRetries: options.retries } : {}),
};
const response =
refs.length > 0
? await client.images.edit(
{
model: config.model,
prompt,
image: await Promise.all(refs.map(referenceImageToUploadable)),
n: 1,
size,
},
requestOptions,
)
: await client.images.generate(
{
model: config.model,
prompt,
n: 1,
size,
},
requestOptions,
);
return imageResponseToResult(response);
}
async function referenceImageToUploadable(ref: string): Promise<Uploadable> {
if (ref.startsWith("data:")) {
const response = await fetch(ref);
if (!response.ok) {
throw new Error(`Failed to read data URL reference image.`);
}
const mediaType = response.headers.get("content-type") ?? "image/png";
return toFile(response, `reference.${extensionFromMediaType(mediaType)}`, {
type: mediaType,
});
}
if (/^https?:\/\//i.test(ref)) {
const response = await fetch(ref);
if (!response.ok) {
throw new Error(
`Failed to fetch reference image ${ref}: HTTP ${response.status}`,
);
}
const mediaType = response.headers.get("content-type") ?? "image/png";
return toFile(response, filenameFromUrl(ref, mediaType), {
type: mediaType,
});
}
throw new Error(
`Native OpenAI image editing requires reference image URLs or data URLs; got "${ref.slice(0, 32)}...".`,
);
}
function imageResponseToResult(
response: OpenAI.Images.ImagesResponse,
): GenerateImageResult {
const data = response.data?.[0];
const b64 = data?.b64_json;
if (b64) {
const format = response.output_format ?? "png";
return {
imageUrl: `data:image/${format};base64,${b64}`,
imageUuid: crypto.randomUUID(),
};
}
const imageUrl = data?.url;
if (imageUrl) {
return { imageUrl, imageUuid: crypto.randomUUID() };
}
throw new Error(`No image data in OpenAI response.`);
}
function filenameFromUrl(url: string, mediaType: string): string {
try {
const name = new URL(url).pathname.split("/").filter(Boolean).at(-1);
if (name && /\.[a-z0-9]+$/i.test(name)) return name;
} catch {
// Fall back to the media type below.
}
return `reference.${extensionFromMediaType(mediaType)}`;
}
function extensionFromMediaType(mediaType: string): string {
if (mediaType.includes("jpeg") || mediaType.includes("jpg")) return "jpg";
if (mediaType.includes("webp")) return "webp";
return "png";
}
// OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
// text-to-image only — no reference images on this path; for editing/anchoring
// set IMAGE_PROVIDER=openai to take the native OpenAI path above.
async function generateImageOpenAiCompatible(
config: ProviderConfig,
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
const endpoint = `${base}/images/generations`;
console.log(
`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`,
);
const res = await fetchWithRetry(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
prompt: prompt,
n: 1,
// Session-locked aspect (16:9 default, 9:16 portrait for mobile).
size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024",
}),
retries: options?.retries,
timeoutMs: options?.timeoutMs,
signal: options?.signal,
});
const text = await res.text();
let json: any;
try {
json = JSON.parse(text);
} catch {
throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
}
if (json.error) {
throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
}
const data = json.data?.[0];
const imageUrl = data?.url;
if (!imageUrl) {
throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
}
// Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
const imageUuid = crypto.randomUUID();
return { imageUrl, imageUuid };
}
// Runware task-array route — self-implemented to preserve the UUID/URL closed
// loop (the official @runware/ai-sdk-provider drops both).
async function generateImageRunware(
config: ProviderConfig,
prompt: string,
options?: GenerateImageOptions,
): Promise<GenerateImageResult> {
const url = normalizeBaseUrl(config.baseUrl, "runware");
// Session-locked output aspect. Image models emit a FIXED pixel size; CSS
// object-fit on the client adapts this frame to the exact device/window. Both
// dimensions stay a multiple of 64 as FLUX requires.
const portrait = options?.orientation === "portrait";
const task: Record<string, unknown> = {
taskType: "imageInference",
taskUUID: crypto.randomUUID(),
model: config.model,
positivePrompt: prompt,
width: portrait ? 1024 : 1792,
height: portrait ? 1792 : 1024,
steps: 4,
CFGScale: 3.5,
numberResults: 1,
outputType: "URL",
outputFormat: "PNG",
includeCost: false,
};
if (options?.seedImage) {
task.seedImage = options.seedImage;
task.strength = options.strength ?? DEFAULT_IMG2IMG_STRENGTH;
}
if (options?.referenceImages?.length) {
task.referenceImages = options.referenceImages.slice(0, MAX_REFERENCE_IMAGES);
}
const res = await fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify([task]),
retries: options?.retries,
timeoutMs: options?.timeoutMs,
signal: options?.signal,
});
const text = await res.text();
let json: RunwareResponse;
try {
json = JSON.parse(text) as RunwareResponse;
} catch {
throw new Error(`Image API error ${res.status}: ${text.slice(0, 500)}`);
}
if (json.errors?.length) {
const e = json.errors[0]!;
throw new Error(
`Runware error [${e.code ?? "unknown"}]: ${e.message ?? "no message"}` +
(e.parameter ? ` (parameter: ${e.parameter})` : ""),
);
}
const result = json.data?.[0];
const imageUrl = result?.imageURL;
const imageUuid = result?.imageUUID;
if (!imageUrl || !imageUuid) {
throw new Error(`No image URL/UUID in Runware response: ${text.slice(0, 300)}`);
}
return { imageUrl, imageUuid };
}