feat(engine): add opt-in image timeout and scene-paint hedging

IMAGE_TIMEOUT_MS sets a per-attempt hard deadline (AbortSignal.timeout);
IMAGE_HEDGE_MS races a second identical scene-paint request when the
first is still pending past the threshold. Both default to OFF when
unset, preserving historical behavior for self-hosted deploys.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-06-13 11:21:47 +08:00
parent c4ffc16498
commit e68e7e1690
8 changed files with 223 additions and 28 deletions
+16
View File
@@ -56,6 +56,22 @@ IMAGE_MODEL=runware:400@6
# stays fastest + cheapest for the scene-by-scene flow.
# IMAGE_PROVIDER=runware
# Optional image-latency guards. BOTH default to OFF when unset — leaving
# them blank keeps the exact historical behavior, so self-hosted deploys are
# unaffected unless they opt in.
# IMAGE_TIMEOUT_MS — per-attempt hard deadline for image requests; a timed
# out attempt is retried like a 5xx. Recommended 30000 for Runware
# (healthy-day p99 is ~26-37s; Runware's own gateway 504s at ~55s).
# IMAGE_HEDGE_MS — scene-paint hedging: if the referenced scene paint has
# not finished after this many ms, race a second identical request and
# keep whichever finishes first (the loser is aborted, but the provider
# may still bill it). Rescues straggler tasks; never fires when the first
# attempt already failed (e.g. 429/503 saturation). Recommended 15000 for
# Runware (healthy-day p95). Do NOT set thresholds this low for providers
# that are normally slow (e.g. gpt-image takes 20-60s per image).
# IMAGE_TIMEOUT_MS=30000
# IMAGE_HEDGE_MS=15000
# ---- 3. Vision model · multimodal click interpretation -------------
# Recommended: MiMo V2.5 — multimodal, accepts image_url content parts.
VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
+1
View File
@@ -138,6 +138,7 @@ Use `.env.example` as the source of truth. Never commit `.env.local`, API keys,
- Text and Vision use `TEXT_*` and `VISION_*`; default protocol is `openai_compatible`, with native `anthropic` and `google` available via `TEXT_PROVIDER` / `VISION_PROVIDER`.
- Image uses `IMAGE_*`; supported protocols are `runware`, `openai_compatible`, native `openai`, and native `google`. When `IMAGE_PROVIDER` is unset, Runware is inferred from `*.runware.ai` URLs and otherwise falls back to OpenAI-compatible image generations.
- `IMAGE_TIMEOUT_MS` (per-attempt hard deadline) and `IMAGE_HEDGE_MS` (Painter scene-paint hedging: race a second request when the first is still pending after the threshold) are both OFF when unset — the default path must stay byte-identical to historical behavior. Hedging applies only to the Tier-A scene paint, never to portraits, and never fires after a fast failure (saturation guard). Client-side engine configs (`resolveEngineConfig`) intentionally do not set these fields.
- TTS supports Xiaomi MiMo (voicedesign + voiceclone) or StepFun (preset voices auto-selected by keyword scoring), inferred from `TTS_BASE_URL` (host containing `stepfun.com` → StepFun, otherwise → MiMo). `CharacterVoice` is a discriminated union on `provider`; synth dispatches on the voice's own tag so a session may carry both shapes through a provider switch. Blank config means silent mode.
- `MOCK_IMAGE=true` skips image generation and returns a placeholder for cheap local iteration.
- `NEXT_PUBLIC_IMAGE_PROXY_URL` and `NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS` opt into browser-side image proxying for allowed hosts.
+31 -5
View File
@@ -1,16 +1,35 @@
type RetryInit = RequestInit & { retries?: number; retryDelayMs?: number };
type RetryInit = RequestInit & {
retries?: number;
retryDelayMs?: number;
/**
* Per-attempt hard deadline. A timed-out attempt counts as a retryable
* failure (it consumes retry budget like a 5xx). Unset → no client-side
* timeout, preserving the historical behavior.
*/
timeoutMs?: number;
};
export async function fetchWithRetry(
url: string,
init: RetryInit,
): Promise<Response> {
const { retries = 2, retryDelayMs = 1500, ...fetchInit } = init;
const { retries = 2, retryDelayMs = 1500, timeoutMs, ...fetchInit } = init;
if (!fetchInit.redirect) fetchInit.redirect = "manual";
// Caller-supplied signal (e.g. a hedge loser being cancelled) must abort
// immediately and permanently — it is NOT retryable, unlike our own
// per-attempt timeout below.
const externalSignal = fetchInit.signal ?? undefined;
let lastError: unknown;
for (let attempt = 0; attempt <= retries; attempt++) {
if (externalSignal?.aborted) throw abortError(externalSignal);
const attemptSignal = timeoutMs
? externalSignal
? AbortSignal.any([externalSignal, AbortSignal.timeout(timeoutMs)])
: AbortSignal.timeout(timeoutMs)
: externalSignal;
try {
const res = await fetch(url, fetchInit);
const res = await fetch(url, { ...fetchInit, signal: attemptSignal });
if (res.ok) return res;
// Don't retry 4xx (client errors won't fix themselves)
if (res.status >= 400 && res.status < 500) return res;
@@ -22,9 +41,10 @@ export async function fetchWithRetry(
return res;
} catch (err) {
lastError = err;
const isAbort =
err instanceof DOMException && err.name === "AbortError";
if (externalSignal?.aborted) throw err;
const isAbort = err instanceof DOMException && err.name === "AbortError";
if (isAbort) throw err;
// TimeoutError (from AbortSignal.timeout) falls through as retryable.
if (attempt < retries) {
await sleep(retryDelayMs * (attempt + 1));
continue;
@@ -35,6 +55,12 @@ export async function fetchWithRetry(
throw lastError;
}
function abortError(signal: AbortSignal): unknown {
return signal.reason instanceof Error
? signal.reason
: new DOMException("This operation was aborted", "AbortError");
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
+39 -13
View File
@@ -59,6 +59,15 @@ export type GenerateImageOptions = {
* native gpt-image 1024x1536.
*/
orientation?: Orientation;
/**
* Per-attempt hard deadline (ms). A timed-out attempt is retryable.
* Unset → no client-side timeout (historical behavior).
*/
timeoutMs?: number;
/** Retry-attempt override for this call (default 2). 0 = single attempt. */
retries?: number;
/** External cancellation, e.g. aborting the losing leg of a hedged race. */
signal?: AbortSignal;
};
export type GenerateImageResult = {
@@ -143,22 +152,33 @@ async function generateImageOpenAi(
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
const portrait = options?.orientation === "portrait";
const size = portrait ? "1024x1536" : "1536x1024";
const requestOptions = {
signal: options?.signal,
timeout: options?.timeoutMs,
...(options?.retries !== undefined ? { maxRetries: options.retries } : {}),
};
const response =
refs.length > 0
? await client.images.edit({
model: config.model,
prompt,
image: await Promise.all(refs.map(referenceImageToUploadable)),
n: 1,
size,
})
: await client.images.generate({
model: config.model,
prompt,
n: 1,
size,
});
? await client.images.edit(
{
model: config.model,
prompt,
image: await Promise.all(refs.map(referenceImageToUploadable)),
n: 1,
size,
},
requestOptions,
)
: await client.images.generate(
{
model: config.model,
prompt,
n: 1,
size,
},
requestOptions,
);
return imageResponseToResult(response);
}
@@ -257,6 +277,9 @@ async function generateImageOpenAiCompatible(
// Session-locked aspect (16:9 default, 9:16 portrait for mobile).
size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024",
}),
retries: options?.retries,
timeoutMs: options?.timeoutMs,
signal: options?.signal,
});
const text = await res.text();
@@ -326,6 +349,9 @@ async function generateImageRunware(
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify([task]),
retries: options?.retries,
timeoutMs: options?.timeoutMs,
signal: options?.signal,
});
const text = await res.text();
+11
View File
@@ -21,6 +21,15 @@ function readOptionalVar(name: string): string | undefined {
return v && v.length > 0 ? v : undefined;
}
// Invalid/non-positive values are treated as unset (feature stays off) rather
// than failing boot — these knobs are tuning aids, not required config.
function readOptionalPositiveInt(name: string): number | undefined {
const v = readOptionalVar(name);
if (!v) return undefined;
const n = Number(v);
return Number.isFinite(n) && n > 0 ? Math.floor(n) : undefined;
}
// Optional *_PROVIDER selector. Unset → undefined, and each ai-client adapter
// applies its own default (text/vision → openai_compatible; image → inferred
// from the base URL). Validated eagerly so a typo fails fast at boot rather
@@ -69,5 +78,7 @@ export function loadEngineConfig(): EngineConfig {
},
tts: loadTtsConfig(),
mockImage: readOptionalVar("MOCK_IMAGE") === "true",
imageTimeoutMs: readOptionalPositiveInt("IMAGE_TIMEOUT_MS"),
imageHedgeMs: readOptionalPositiveInt("IMAGE_HEDGE_MS"),
};
}
+6 -1
View File
@@ -87,7 +87,12 @@ export async function renderCharacterPortrait(
visualDescription,
styleGuide,
);
const { imageUrl, imageUuid } = await generateImage(config.image, prompt);
// Portraits get the hard timeout but are never hedged — a scene already
// runs several portrait paints in parallel, and hedging those would push
// burst concurrency past Runware's recommended 2-4 in-flight requests.
const { imageUrl, imageUuid } = await generateImage(config.image, prompt, {
timeoutMs: config.imageTimeoutMs,
});
return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid };
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
+106 -9
View File
@@ -123,6 +123,10 @@ export function collectReferenceImages(
return refs.slice(0, MAX_REFERENCE_IMAGES);
}
function errMsg(err: unknown): string {
return err instanceof Error ? err.message : String(err);
}
async function tryGenerate(
config: ProviderConfig,
prompt: string,
@@ -132,12 +136,93 @@ async function tryGenerate(
try {
return await generateImage(config, prompt, options);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.warn(`[painter] ${label} failed: ${msg}`);
console.warn(`[painter] ${label} failed: ${errMsg(err)}`);
return null;
}
}
// Hedged Tier-A: fire leg 1; if it hasn't settled after hedgeMs, race an
// identical leg 2 and take whichever finishes first. This rescues straggler
// paints (a single task stuck on a slow worker) without waiting out the
// provider's own gateway limit (Runware kills tasks at ~55s with a 504).
//
// Deliberately NOT retry-on-error: a leg that fails fast (429/503 queue
// saturation, 4xx) falls through to Tier B immediately — hedging into a
// saturated queue only adds load. Each leg runs with retries=0 so the hedge
// itself is the only retry layer (no retry×retry multiplication).
async function tryGenerateHedged(
config: ProviderConfig,
prompt: string,
options: GenerateImageOptions,
label: string,
hedgeMs: number,
): Promise<GenerateImageResult | null> {
type Settled =
| { leg: 1 | 2; ok: GenerateImageResult }
| { leg: 1 | 2; err: unknown };
const t0 = Date.now();
const controllers: (AbortController | undefined)[] = [undefined, undefined];
const fire = (leg: 1 | 2): Promise<Settled> => {
const ac = new AbortController();
controllers[leg - 1] = ac;
return generateImage(config, prompt, {
...options,
retries: 0,
signal: ac.signal,
}).then(
(ok) => ({ leg, ok }) as Settled,
(err) => ({ leg, err }) as Settled,
);
};
const leg1 = fire(1);
let timer: ReturnType<typeof setTimeout> | undefined;
const hedgeTimer = new Promise<"hedge">((resolve) => {
timer = setTimeout(() => resolve("hedge"), hedgeMs);
});
const first = await Promise.race([leg1, hedgeTimer]);
if (first !== "hedge") {
clearTimeout(timer);
if ("ok" in first) return first.ok;
console.warn(`[painter] ${label} failed: ${errMsg(first.err)}`);
return null;
}
console.warn(
`[painter] hedge fired: ${label} still pending after ${hedgeMs}ms`,
);
const leg2 = fire(2);
let result = await Promise.race([leg1, leg2]);
if ("err" in result) {
// First settler failed — give the survivor its full chance.
console.warn(
`[painter] hedge leg${result.leg} failed: ${errMsg(result.err)}`,
);
result = await (result.leg === 1 ? leg2 : leg1);
}
if ("ok" in result) {
const loserIdx = result.leg === 1 ? 1 : 0;
controllers[loserIdx]?.abort();
const loser = result.leg === 1 ? leg2 : leg1;
loser.then(
(s) => "err" in s && console.debug(`[painter] hedge loser leg${s.leg} aborted`),
() => {},
);
console.log(
`[painter] hedge won by leg${result.leg} in ${Date.now() - t0}ms`,
);
return result.ok;
}
console.warn(
`[painter] ${label} failed (both hedge legs): ${errMsg(result.err)}`,
);
return null;
}
export type PainterResult =
| { kind: "real"; imageUrl: string; imageUuid: string }
| { kind: "mock"; imageUrl: string };
@@ -167,14 +252,25 @@ export async function runPainter(
// Tier A — with referenceImages (priorSceneImage + character portraits).
// FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path
// ~2.5× compared to the non-KV variant.
// ~2.5× compared to the non-KV variant. When IMAGE_HEDGE_MS is configured,
// the scene paint is hedged (see tryGenerateHedged); portraits are not.
if (refs.length > 0) {
const r = await tryGenerate(
config.image,
prompt,
{ referenceImages: refs, orientation: input.orientation },
`referenceImages (${refs.length})`,
);
const tierAOptions: GenerateImageOptions = {
referenceImages: refs,
orientation: input.orientation,
timeoutMs: config.imageTimeoutMs,
};
const label = `referenceImages (${refs.length})`;
const r =
config.imageHedgeMs && config.imageHedgeMs > 0
? await tryGenerateHedged(
config.image,
prompt,
tierAOptions,
label,
config.imageHedgeMs,
)
: await tryGenerate(config.image, prompt, tierAOptions, label);
if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
}
@@ -183,6 +279,7 @@ export async function runPainter(
// Errors here propagate to the caller.
const r = await generateImage(config.image, prompt, {
orientation: input.orientation,
timeoutMs: config.imageTimeoutMs,
});
return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
}
+13
View File
@@ -367,6 +367,19 @@ export type EngineConfig = {
tts?: TtsConfig;
/** When true the renderer returns a placeholder PNG instead of calling the image API. */
mockImage?: boolean;
/**
* Per-attempt hard timeout (ms) for image-generation requests. Unset → no
* client-side timeout (only the provider's own gateway limits apply, e.g.
* Runware kills tasks at ~55s with a 504).
*/
imageTimeoutMs?: number;
/**
* Painter scene-paint hedge threshold (ms). When the Tier-A (referenced)
* paint hasn't completed after this long, a second identical request races
* the first and the earlier result wins. Unset/0 → hedging disabled.
* Derived from healthy-day Runware p95 (~14s); recommended 15000.
*/
imageHedgeMs?: number;
};
// ──────────────────────────────────────────────────────────────────────