Merge pull request #66 from zonghaoyuan/feat/painter-hedged-retry
feat(engine): add opt-in image timeout and scene-paint hedging
This commit is contained in:
@@ -56,6 +56,22 @@ IMAGE_MODEL=runware:400@6
|
||||
# stays fastest + cheapest for the scene-by-scene flow.
|
||||
# IMAGE_PROVIDER=runware
|
||||
|
||||
# Optional image-latency guards. BOTH default to OFF when unset — leaving
|
||||
# them blank keeps the exact historical behavior, so self-hosted deploys are
|
||||
# unaffected unless they opt in.
|
||||
# IMAGE_TIMEOUT_MS — per-attempt hard deadline for image requests; a timed
|
||||
# out attempt is retried like a 5xx. Recommended 30000 for Runware
|
||||
# (healthy-day p99 is ~26-37s; Runware's own gateway 504s at ~55s).
|
||||
# IMAGE_HEDGE_MS — scene-paint hedging: if the referenced scene paint has
|
||||
# not finished after this many ms, race a second identical request and
|
||||
# keep whichever finishes first (the loser is aborted, but the provider
|
||||
# may still bill it). Rescues straggler tasks; never fires when the first
|
||||
# attempt already failed (e.g. 429/503 saturation). Recommended 15000 for
|
||||
# Runware (healthy-day p95). Do NOT set thresholds this low for providers
|
||||
# that are normally slow (e.g. gpt-image takes 20-60s per image).
|
||||
# IMAGE_TIMEOUT_MS=30000
|
||||
# IMAGE_HEDGE_MS=15000
|
||||
|
||||
# ---- 3. Vision model · multimodal click interpretation -------------
|
||||
# Recommended: MiMo V2.5 — multimodal, accepts image_url content parts.
|
||||
VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
|
||||
@@ -138,6 +138,7 @@ Use `.env.example` as the source of truth. Never commit `.env.local`, API keys,
|
||||
|
||||
- Text and Vision use `TEXT_*` and `VISION_*`; default protocol is `openai_compatible`, with native `anthropic` and `google` available via `TEXT_PROVIDER` / `VISION_PROVIDER`.
|
||||
- Image uses `IMAGE_*`; supported protocols are `runware`, `openai_compatible`, native `openai`, and native `google`. When `IMAGE_PROVIDER` is unset, Runware is inferred from `*.runware.ai` URLs and otherwise falls back to OpenAI-compatible image generations.
|
||||
- `IMAGE_TIMEOUT_MS` (per-attempt hard deadline) and `IMAGE_HEDGE_MS` (Painter scene-paint hedging: race a second request when the first is still pending after the threshold) are both OFF when unset — the default path must stay byte-identical to historical behavior. Hedging applies only to the Tier-A scene paint, never to portraits, and never fires after a fast failure (saturation guard). Client-side engine configs (`resolveEngineConfig`) intentionally do not set these fields.
|
||||
- TTS supports Xiaomi MiMo (voicedesign + voiceclone) or StepFun (preset voices auto-selected by keyword scoring), inferred from `TTS_BASE_URL` (host containing `stepfun.com` → StepFun, otherwise → MiMo). `CharacterVoice` is a discriminated union on `provider`; synth dispatches on the voice's own tag so a session may carry both shapes through a provider switch. Blank config means silent mode.
|
||||
- `MOCK_IMAGE=true` skips image generation and returns a placeholder for cheap local iteration.
|
||||
- `NEXT_PUBLIC_IMAGE_PROXY_URL` and `NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS` opt into browser-side image proxying for allowed hosts.
|
||||
|
||||
@@ -1,16 +1,35 @@
|
||||
type RetryInit = RequestInit & { retries?: number; retryDelayMs?: number };
|
||||
type RetryInit = RequestInit & {
|
||||
retries?: number;
|
||||
retryDelayMs?: number;
|
||||
/**
|
||||
* Per-attempt hard deadline. A timed-out attempt counts as a retryable
|
||||
* failure (it consumes retry budget like a 5xx). Unset → no client-side
|
||||
* timeout, preserving the historical behavior.
|
||||
*/
|
||||
timeoutMs?: number;
|
||||
};
|
||||
|
||||
export async function fetchWithRetry(
|
||||
url: string,
|
||||
init: RetryInit,
|
||||
): Promise<Response> {
|
||||
const { retries = 2, retryDelayMs = 1500, ...fetchInit } = init;
|
||||
const { retries = 2, retryDelayMs = 1500, timeoutMs, ...fetchInit } = init;
|
||||
if (!fetchInit.redirect) fetchInit.redirect = "manual";
|
||||
// Caller-supplied signal (e.g. a hedge loser being cancelled) must abort
|
||||
// immediately and permanently — it is NOT retryable, unlike our own
|
||||
// per-attempt timeout below.
|
||||
const externalSignal = fetchInit.signal ?? undefined;
|
||||
|
||||
let lastError: unknown;
|
||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||
if (externalSignal?.aborted) throw abortError(externalSignal);
|
||||
const attemptSignal = timeoutMs
|
||||
? externalSignal
|
||||
? AbortSignal.any([externalSignal, AbortSignal.timeout(timeoutMs)])
|
||||
: AbortSignal.timeout(timeoutMs)
|
||||
: externalSignal;
|
||||
try {
|
||||
const res = await fetch(url, fetchInit);
|
||||
const res = await fetch(url, { ...fetchInit, signal: attemptSignal });
|
||||
if (res.ok) return res;
|
||||
// Don't retry 4xx (client errors won't fix themselves)
|
||||
if (res.status >= 400 && res.status < 500) return res;
|
||||
@@ -22,9 +41,10 @@ export async function fetchWithRetry(
|
||||
return res;
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const isAbort =
|
||||
err instanceof DOMException && err.name === "AbortError";
|
||||
if (externalSignal?.aborted) throw err;
|
||||
const isAbort = err instanceof DOMException && err.name === "AbortError";
|
||||
if (isAbort) throw err;
|
||||
// TimeoutError (from AbortSignal.timeout) falls through as retryable.
|
||||
if (attempt < retries) {
|
||||
await sleep(retryDelayMs * (attempt + 1));
|
||||
continue;
|
||||
@@ -35,6 +55,12 @@ export async function fetchWithRetry(
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
function abortError(signal: AbortSignal): unknown {
|
||||
return signal.reason instanceof Error
|
||||
? signal.reason
|
||||
: new DOMException("This operation was aborted", "AbortError");
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
+39
-13
@@ -59,6 +59,15 @@ export type GenerateImageOptions = {
|
||||
* native gpt-image 1024x1536.
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
/**
|
||||
* Per-attempt hard deadline (ms). A timed-out attempt is retryable.
|
||||
* Unset → no client-side timeout (historical behavior).
|
||||
*/
|
||||
timeoutMs?: number;
|
||||
/** Retry-attempt override for this call (default 2). 0 = single attempt. */
|
||||
retries?: number;
|
||||
/** External cancellation, e.g. aborting the losing leg of a hedged race. */
|
||||
signal?: AbortSignal;
|
||||
};
|
||||
|
||||
export type GenerateImageResult = {
|
||||
@@ -143,22 +152,33 @@ async function generateImageOpenAi(
|
||||
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
|
||||
const portrait = options?.orientation === "portrait";
|
||||
const size = portrait ? "1024x1536" : "1536x1024";
|
||||
const requestOptions = {
|
||||
signal: options?.signal,
|
||||
timeout: options?.timeoutMs,
|
||||
...(options?.retries !== undefined ? { maxRetries: options.retries } : {}),
|
||||
};
|
||||
|
||||
const response =
|
||||
refs.length > 0
|
||||
? await client.images.edit({
|
||||
model: config.model,
|
||||
prompt,
|
||||
image: await Promise.all(refs.map(referenceImageToUploadable)),
|
||||
n: 1,
|
||||
size,
|
||||
})
|
||||
: await client.images.generate({
|
||||
model: config.model,
|
||||
prompt,
|
||||
n: 1,
|
||||
size,
|
||||
});
|
||||
? await client.images.edit(
|
||||
{
|
||||
model: config.model,
|
||||
prompt,
|
||||
image: await Promise.all(refs.map(referenceImageToUploadable)),
|
||||
n: 1,
|
||||
size,
|
||||
},
|
||||
requestOptions,
|
||||
)
|
||||
: await client.images.generate(
|
||||
{
|
||||
model: config.model,
|
||||
prompt,
|
||||
n: 1,
|
||||
size,
|
||||
},
|
||||
requestOptions,
|
||||
);
|
||||
|
||||
return imageResponseToResult(response);
|
||||
}
|
||||
@@ -257,6 +277,9 @@ async function generateImageOpenAiCompatible(
|
||||
// Session-locked aspect (16:9 default, 9:16 portrait for mobile).
|
||||
size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024",
|
||||
}),
|
||||
retries: options?.retries,
|
||||
timeoutMs: options?.timeoutMs,
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
const text = await res.text();
|
||||
@@ -326,6 +349,9 @@ async function generateImageRunware(
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify([task]),
|
||||
retries: options?.retries,
|
||||
timeoutMs: options?.timeoutMs,
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
const text = await res.text();
|
||||
|
||||
@@ -21,6 +21,15 @@ function readOptionalVar(name: string): string | undefined {
|
||||
return v && v.length > 0 ? v : undefined;
|
||||
}
|
||||
|
||||
// Invalid/non-positive values are treated as unset (feature stays off) rather
|
||||
// than failing boot — these knobs are tuning aids, not required config.
|
||||
function readOptionalPositiveInt(name: string): number | undefined {
|
||||
const v = readOptionalVar(name);
|
||||
if (!v) return undefined;
|
||||
const n = Number(v);
|
||||
return Number.isFinite(n) && n > 0 ? Math.floor(n) : undefined;
|
||||
}
|
||||
|
||||
// Optional *_PROVIDER selector. Unset → undefined, and each ai-client adapter
|
||||
// applies its own default (text/vision → openai_compatible; image → inferred
|
||||
// from the base URL). Validated eagerly so a typo fails fast at boot rather
|
||||
@@ -69,5 +78,7 @@ export function loadEngineConfig(): EngineConfig {
|
||||
},
|
||||
tts: loadTtsConfig(),
|
||||
mockImage: readOptionalVar("MOCK_IMAGE") === "true",
|
||||
imageTimeoutMs: readOptionalPositiveInt("IMAGE_TIMEOUT_MS"),
|
||||
imageHedgeMs: readOptionalPositiveInt("IMAGE_HEDGE_MS"),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -87,7 +87,12 @@ export async function renderCharacterPortrait(
|
||||
visualDescription,
|
||||
styleGuide,
|
||||
);
|
||||
const { imageUrl, imageUuid } = await generateImage(config.image, prompt);
|
||||
// Portraits get the hard timeout but are never hedged — a scene already
|
||||
// runs several portrait paints in parallel, and hedging those would push
|
||||
// burst concurrency past Runware's recommended 2-4 in-flight requests.
|
||||
const { imageUrl, imageUuid } = await generateImage(config.image, prompt, {
|
||||
timeoutMs: config.imageTimeoutMs,
|
||||
});
|
||||
return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid };
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
|
||||
@@ -123,6 +123,10 @@ export function collectReferenceImages(
|
||||
return refs.slice(0, MAX_REFERENCE_IMAGES);
|
||||
}
|
||||
|
||||
function errMsg(err: unknown): string {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
|
||||
async function tryGenerate(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
@@ -132,12 +136,93 @@ async function tryGenerate(
|
||||
try {
|
||||
return await generateImage(config, prompt, options);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.warn(`[painter] ${label} failed: ${msg}`);
|
||||
console.warn(`[painter] ${label} failed: ${errMsg(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Hedged Tier-A: fire leg 1; if it hasn't settled after hedgeMs, race an
|
||||
// identical leg 2 and take whichever finishes first. This rescues straggler
|
||||
// paints (a single task stuck on a slow worker) without waiting out the
|
||||
// provider's own gateway limit (Runware kills tasks at ~55s with a 504).
|
||||
//
|
||||
// Deliberately NOT retry-on-error: a leg that fails fast (429/503 queue
|
||||
// saturation, 4xx) falls through to Tier B immediately — hedging into a
|
||||
// saturated queue only adds load. Each leg runs with retries=0 so the hedge
|
||||
// itself is the only retry layer (no retry×retry multiplication).
|
||||
async function tryGenerateHedged(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options: GenerateImageOptions,
|
||||
label: string,
|
||||
hedgeMs: number,
|
||||
): Promise<GenerateImageResult | null> {
|
||||
type Settled =
|
||||
| { leg: 1 | 2; ok: GenerateImageResult }
|
||||
| { leg: 1 | 2; err: unknown };
|
||||
|
||||
const t0 = Date.now();
|
||||
const controllers: (AbortController | undefined)[] = [undefined, undefined];
|
||||
const fire = (leg: 1 | 2): Promise<Settled> => {
|
||||
const ac = new AbortController();
|
||||
controllers[leg - 1] = ac;
|
||||
return generateImage(config, prompt, {
|
||||
...options,
|
||||
retries: 0,
|
||||
signal: ac.signal,
|
||||
}).then(
|
||||
(ok) => ({ leg, ok }) as Settled,
|
||||
(err) => ({ leg, err }) as Settled,
|
||||
);
|
||||
};
|
||||
|
||||
const leg1 = fire(1);
|
||||
let timer: ReturnType<typeof setTimeout> | undefined;
|
||||
const hedgeTimer = new Promise<"hedge">((resolve) => {
|
||||
timer = setTimeout(() => resolve("hedge"), hedgeMs);
|
||||
});
|
||||
|
||||
const first = await Promise.race([leg1, hedgeTimer]);
|
||||
if (first !== "hedge") {
|
||||
clearTimeout(timer);
|
||||
if ("ok" in first) return first.ok;
|
||||
console.warn(`[painter] ${label} failed: ${errMsg(first.err)}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`[painter] hedge fired: ${label} still pending after ${hedgeMs}ms`,
|
||||
);
|
||||
const leg2 = fire(2);
|
||||
|
||||
let result = await Promise.race([leg1, leg2]);
|
||||
if ("err" in result) {
|
||||
// First settler failed — give the survivor its full chance.
|
||||
console.warn(
|
||||
`[painter] hedge leg${result.leg} failed: ${errMsg(result.err)}`,
|
||||
);
|
||||
result = await (result.leg === 1 ? leg2 : leg1);
|
||||
}
|
||||
|
||||
if ("ok" in result) {
|
||||
const loserIdx = result.leg === 1 ? 1 : 0;
|
||||
controllers[loserIdx]?.abort();
|
||||
const loser = result.leg === 1 ? leg2 : leg1;
|
||||
loser.then(
|
||||
(s) => "err" in s && console.debug(`[painter] hedge loser leg${s.leg} aborted`),
|
||||
() => {},
|
||||
);
|
||||
console.log(
|
||||
`[painter] hedge won by leg${result.leg} in ${Date.now() - t0}ms`,
|
||||
);
|
||||
return result.ok;
|
||||
}
|
||||
console.warn(
|
||||
`[painter] ${label} failed (both hedge legs): ${errMsg(result.err)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
export type PainterResult =
|
||||
| { kind: "real"; imageUrl: string; imageUuid: string }
|
||||
| { kind: "mock"; imageUrl: string };
|
||||
@@ -167,14 +252,25 @@ export async function runPainter(
|
||||
|
||||
// Tier A — with referenceImages (priorSceneImage + character portraits).
|
||||
// FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path
|
||||
// ~2.5× compared to the non-KV variant.
|
||||
// ~2.5× compared to the non-KV variant. When IMAGE_HEDGE_MS is configured,
|
||||
// the scene paint is hedged (see tryGenerateHedged); portraits are not.
|
||||
if (refs.length > 0) {
|
||||
const r = await tryGenerate(
|
||||
config.image,
|
||||
prompt,
|
||||
{ referenceImages: refs, orientation: input.orientation },
|
||||
`referenceImages (${refs.length})`,
|
||||
);
|
||||
const tierAOptions: GenerateImageOptions = {
|
||||
referenceImages: refs,
|
||||
orientation: input.orientation,
|
||||
timeoutMs: config.imageTimeoutMs,
|
||||
};
|
||||
const label = `referenceImages (${refs.length})`;
|
||||
const r =
|
||||
config.imageHedgeMs && config.imageHedgeMs > 0
|
||||
? await tryGenerateHedged(
|
||||
config.image,
|
||||
prompt,
|
||||
tierAOptions,
|
||||
label,
|
||||
config.imageHedgeMs,
|
||||
)
|
||||
: await tryGenerate(config.image, prompt, tierAOptions, label);
|
||||
if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
}
|
||||
|
||||
@@ -183,6 +279,7 @@ export async function runPainter(
|
||||
// Errors here propagate to the caller.
|
||||
const r = await generateImage(config.image, prompt, {
|
||||
orientation: input.orientation,
|
||||
timeoutMs: config.imageTimeoutMs,
|
||||
});
|
||||
return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
}
|
||||
|
||||
@@ -367,6 +367,19 @@ export type EngineConfig = {
|
||||
tts?: TtsConfig;
|
||||
/** When true the renderer returns a placeholder PNG instead of calling the image API. */
|
||||
mockImage?: boolean;
|
||||
/**
|
||||
* Per-attempt hard timeout (ms) for image-generation requests. Unset → no
|
||||
* client-side timeout (only the provider's own gateway limits apply, e.g.
|
||||
* Runware kills tasks at ~55s with a 504).
|
||||
*/
|
||||
imageTimeoutMs?: number;
|
||||
/**
|
||||
* Painter scene-paint hedge threshold (ms). When the Tier-A (referenced)
|
||||
* paint hasn't completed after this long, a second identical request races
|
||||
* the first and the earlier result wins. Unset/0 → hedging disabled.
|
||||
* Derived from healthy-day Runware p95 (~14s); recommended 15000.
|
||||
*/
|
||||
imageHedgeMs?: number;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user