From e68e7e1690d25a44311436d03af40dfa0bfba37a Mon Sep 17 00:00:00 2001 From: yuanzonghao Date: Sat, 13 Jun 2026 11:21:47 +0800 Subject: [PATCH] feat(engine): add opt-in image timeout and scene-paint hedging IMAGE_TIMEOUT_MS sets a per-attempt hard deadline (AbortSignal.timeout); IMAGE_HEDGE_MS races a second identical scene-paint request when the first is still pending past the threshold. Both default to OFF when unset, preserving historical behavior for self-hosted deploys. Co-Authored-By: Claude Opus 4.7 --- .env.example | 16 ++++ AGENTS.md | 1 + lib/ai-client/fetchWithRetry.ts | 36 ++++++-- lib/ai-client/image.ts | 52 ++++++++--- lib/config.ts | 11 +++ lib/engine/agents/characterDesigner.ts | 7 +- lib/engine/agents/painter.ts | 115 +++++++++++++++++++++++-- lib/types/index.ts | 13 +++ 8 files changed, 223 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index bf96f47..2141759 100644 --- a/.env.example +++ b/.env.example @@ -56,6 +56,22 @@ IMAGE_MODEL=runware:400@6 # stays fastest + cheapest for the scene-by-scene flow. # IMAGE_PROVIDER=runware +# Optional image-latency guards. BOTH default to OFF when unset — leaving +# them blank keeps the exact historical behavior, so self-hosted deploys are +# unaffected unless they opt in. +# IMAGE_TIMEOUT_MS — per-attempt hard deadline for image requests; a timed +# out attempt is retried like a 5xx. Recommended 30000 for Runware +# (healthy-day p99 is ~26-37s; Runware's own gateway 504s at ~55s). +# IMAGE_HEDGE_MS — scene-paint hedging: if the referenced scene paint has +# not finished after this many ms, race a second identical request and +# keep whichever finishes first (the loser is aborted, but the provider +# may still bill it). Rescues straggler tasks; never fires when the first +# attempt already failed (e.g. 429/503 saturation). Recommended 15000 for +# Runware (healthy-day p95). Do NOT set thresholds this low for providers +# that are normally slow (e.g. gpt-image takes 20-60s per image). +# IMAGE_TIMEOUT_MS=30000 +# IMAGE_HEDGE_MS=15000 + # ---- 3. Vision model · multimodal click interpretation ------------- # Recommended: MiMo V2.5 — multimodal, accepts image_url content parts. VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1 diff --git a/AGENTS.md b/AGENTS.md index 9785d7c..de4bec0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -138,6 +138,7 @@ Use `.env.example` as the source of truth. Never commit `.env.local`, API keys, - Text and Vision use `TEXT_*` and `VISION_*`; default protocol is `openai_compatible`, with native `anthropic` and `google` available via `TEXT_PROVIDER` / `VISION_PROVIDER`. - Image uses `IMAGE_*`; supported protocols are `runware`, `openai_compatible`, native `openai`, and native `google`. When `IMAGE_PROVIDER` is unset, Runware is inferred from `*.runware.ai` URLs and otherwise falls back to OpenAI-compatible image generations. +- `IMAGE_TIMEOUT_MS` (per-attempt hard deadline) and `IMAGE_HEDGE_MS` (Painter scene-paint hedging: race a second request when the first is still pending after the threshold) are both OFF when unset — the default path must stay byte-identical to historical behavior. Hedging applies only to the Tier-A scene paint, never to portraits, and never fires after a fast failure (saturation guard). Client-side engine configs (`resolveEngineConfig`) intentionally do not set these fields. - TTS supports Xiaomi MiMo (voicedesign + voiceclone) or StepFun (preset voices auto-selected by keyword scoring), inferred from `TTS_BASE_URL` (host containing `stepfun.com` → StepFun, otherwise → MiMo). `CharacterVoice` is a discriminated union on `provider`; synth dispatches on the voice's own tag so a session may carry both shapes through a provider switch. Blank config means silent mode. - `MOCK_IMAGE=true` skips image generation and returns a placeholder for cheap local iteration. - `NEXT_PUBLIC_IMAGE_PROXY_URL` and `NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS` opt into browser-side image proxying for allowed hosts. diff --git a/lib/ai-client/fetchWithRetry.ts b/lib/ai-client/fetchWithRetry.ts index 8cc647d..64dc7ba 100644 --- a/lib/ai-client/fetchWithRetry.ts +++ b/lib/ai-client/fetchWithRetry.ts @@ -1,16 +1,35 @@ -type RetryInit = RequestInit & { retries?: number; retryDelayMs?: number }; +type RetryInit = RequestInit & { + retries?: number; + retryDelayMs?: number; + /** + * Per-attempt hard deadline. A timed-out attempt counts as a retryable + * failure (it consumes retry budget like a 5xx). Unset → no client-side + * timeout, preserving the historical behavior. + */ + timeoutMs?: number; +}; export async function fetchWithRetry( url: string, init: RetryInit, ): Promise { - const { retries = 2, retryDelayMs = 1500, ...fetchInit } = init; + const { retries = 2, retryDelayMs = 1500, timeoutMs, ...fetchInit } = init; if (!fetchInit.redirect) fetchInit.redirect = "manual"; + // Caller-supplied signal (e.g. a hedge loser being cancelled) must abort + // immediately and permanently — it is NOT retryable, unlike our own + // per-attempt timeout below. + const externalSignal = fetchInit.signal ?? undefined; let lastError: unknown; for (let attempt = 0; attempt <= retries; attempt++) { + if (externalSignal?.aborted) throw abortError(externalSignal); + const attemptSignal = timeoutMs + ? externalSignal + ? AbortSignal.any([externalSignal, AbortSignal.timeout(timeoutMs)]) + : AbortSignal.timeout(timeoutMs) + : externalSignal; try { - const res = await fetch(url, fetchInit); + const res = await fetch(url, { ...fetchInit, signal: attemptSignal }); if (res.ok) return res; // Don't retry 4xx (client errors won't fix themselves) if (res.status >= 400 && res.status < 500) return res; @@ -22,9 +41,10 @@ export async function fetchWithRetry( return res; } catch (err) { lastError = err; - const isAbort = - err instanceof DOMException && err.name === "AbortError"; + if (externalSignal?.aborted) throw err; + const isAbort = err instanceof DOMException && err.name === "AbortError"; if (isAbort) throw err; + // TimeoutError (from AbortSignal.timeout) falls through as retryable. if (attempt < retries) { await sleep(retryDelayMs * (attempt + 1)); continue; @@ -35,6 +55,12 @@ export async function fetchWithRetry( throw lastError; } +function abortError(signal: AbortSignal): unknown { + return signal.reason instanceof Error + ? signal.reason + : new DOMException("This operation was aborted", "AbortError"); +} + function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } diff --git a/lib/ai-client/image.ts b/lib/ai-client/image.ts index 5cae0f8..c2cb1c5 100644 --- a/lib/ai-client/image.ts +++ b/lib/ai-client/image.ts @@ -59,6 +59,15 @@ export type GenerateImageOptions = { * native gpt-image 1024x1536. */ orientation?: Orientation; + /** + * Per-attempt hard deadline (ms). A timed-out attempt is retryable. + * Unset → no client-side timeout (historical behavior). + */ + timeoutMs?: number; + /** Retry-attempt override for this call (default 2). 0 = single attempt. */ + retries?: number; + /** External cancellation, e.g. aborting the losing leg of a hedged race. */ + signal?: AbortSignal; }; export type GenerateImageResult = { @@ -143,22 +152,33 @@ async function generateImageOpenAi( const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES); const portrait = options?.orientation === "portrait"; const size = portrait ? "1024x1536" : "1536x1024"; + const requestOptions = { + signal: options?.signal, + timeout: options?.timeoutMs, + ...(options?.retries !== undefined ? { maxRetries: options.retries } : {}), + }; const response = refs.length > 0 - ? await client.images.edit({ - model: config.model, - prompt, - image: await Promise.all(refs.map(referenceImageToUploadable)), - n: 1, - size, - }) - : await client.images.generate({ - model: config.model, - prompt, - n: 1, - size, - }); + ? await client.images.edit( + { + model: config.model, + prompt, + image: await Promise.all(refs.map(referenceImageToUploadable)), + n: 1, + size, + }, + requestOptions, + ) + : await client.images.generate( + { + model: config.model, + prompt, + n: 1, + size, + }, + requestOptions, + ); return imageResponseToResult(response); } @@ -257,6 +277,9 @@ async function generateImageOpenAiCompatible( // Session-locked aspect (16:9 default, 9:16 portrait for mobile). size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024", }), + retries: options?.retries, + timeoutMs: options?.timeoutMs, + signal: options?.signal, }); const text = await res.text(); @@ -326,6 +349,9 @@ async function generateImageRunware( Authorization: `Bearer ${config.apiKey}`, }, body: JSON.stringify([task]), + retries: options?.retries, + timeoutMs: options?.timeoutMs, + signal: options?.signal, }); const text = await res.text(); diff --git a/lib/config.ts b/lib/config.ts index c733df9..efd20af 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -21,6 +21,15 @@ function readOptionalVar(name: string): string | undefined { return v && v.length > 0 ? v : undefined; } +// Invalid/non-positive values are treated as unset (feature stays off) rather +// than failing boot — these knobs are tuning aids, not required config. +function readOptionalPositiveInt(name: string): number | undefined { + const v = readOptionalVar(name); + if (!v) return undefined; + const n = Number(v); + return Number.isFinite(n) && n > 0 ? Math.floor(n) : undefined; +} + // Optional *_PROVIDER selector. Unset → undefined, and each ai-client adapter // applies its own default (text/vision → openai_compatible; image → inferred // from the base URL). Validated eagerly so a typo fails fast at boot rather @@ -69,5 +78,7 @@ export function loadEngineConfig(): EngineConfig { }, tts: loadTtsConfig(), mockImage: readOptionalVar("MOCK_IMAGE") === "true", + imageTimeoutMs: readOptionalPositiveInt("IMAGE_TIMEOUT_MS"), + imageHedgeMs: readOptionalPositiveInt("IMAGE_HEDGE_MS"), }; } diff --git a/lib/engine/agents/characterDesigner.ts b/lib/engine/agents/characterDesigner.ts index 6b2a7b7..05d417a 100644 --- a/lib/engine/agents/characterDesigner.ts +++ b/lib/engine/agents/characterDesigner.ts @@ -87,7 +87,12 @@ export async function renderCharacterPortrait( visualDescription, styleGuide, ); - const { imageUrl, imageUuid } = await generateImage(config.image, prompt); + // Portraits get the hard timeout but are never hedged — a scene already + // runs several portrait paints in parallel, and hedging those would push + // burst concurrency past Runware's recommended 2-4 in-flight requests. + const { imageUrl, imageUuid } = await generateImage(config.image, prompt, { + timeoutMs: config.imageTimeoutMs, + }); return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); diff --git a/lib/engine/agents/painter.ts b/lib/engine/agents/painter.ts index b586fb4..f4fd2e4 100644 --- a/lib/engine/agents/painter.ts +++ b/lib/engine/agents/painter.ts @@ -123,6 +123,10 @@ export function collectReferenceImages( return refs.slice(0, MAX_REFERENCE_IMAGES); } +function errMsg(err: unknown): string { + return err instanceof Error ? err.message : String(err); +} + async function tryGenerate( config: ProviderConfig, prompt: string, @@ -132,12 +136,93 @@ async function tryGenerate( try { return await generateImage(config, prompt, options); } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.warn(`[painter] ${label} failed: ${msg}`); + console.warn(`[painter] ${label} failed: ${errMsg(err)}`); return null; } } +// Hedged Tier-A: fire leg 1; if it hasn't settled after hedgeMs, race an +// identical leg 2 and take whichever finishes first. This rescues straggler +// paints (a single task stuck on a slow worker) without waiting out the +// provider's own gateway limit (Runware kills tasks at ~55s with a 504). +// +// Deliberately NOT retry-on-error: a leg that fails fast (429/503 queue +// saturation, 4xx) falls through to Tier B immediately — hedging into a +// saturated queue only adds load. Each leg runs with retries=0 so the hedge +// itself is the only retry layer (no retry×retry multiplication). +async function tryGenerateHedged( + config: ProviderConfig, + prompt: string, + options: GenerateImageOptions, + label: string, + hedgeMs: number, +): Promise { + type Settled = + | { leg: 1 | 2; ok: GenerateImageResult } + | { leg: 1 | 2; err: unknown }; + + const t0 = Date.now(); + const controllers: (AbortController | undefined)[] = [undefined, undefined]; + const fire = (leg: 1 | 2): Promise => { + const ac = new AbortController(); + controllers[leg - 1] = ac; + return generateImage(config, prompt, { + ...options, + retries: 0, + signal: ac.signal, + }).then( + (ok) => ({ leg, ok }) as Settled, + (err) => ({ leg, err }) as Settled, + ); + }; + + const leg1 = fire(1); + let timer: ReturnType | undefined; + const hedgeTimer = new Promise<"hedge">((resolve) => { + timer = setTimeout(() => resolve("hedge"), hedgeMs); + }); + + const first = await Promise.race([leg1, hedgeTimer]); + if (first !== "hedge") { + clearTimeout(timer); + if ("ok" in first) return first.ok; + console.warn(`[painter] ${label} failed: ${errMsg(first.err)}`); + return null; + } + + console.warn( + `[painter] hedge fired: ${label} still pending after ${hedgeMs}ms`, + ); + const leg2 = fire(2); + + let result = await Promise.race([leg1, leg2]); + if ("err" in result) { + // First settler failed — give the survivor its full chance. + console.warn( + `[painter] hedge leg${result.leg} failed: ${errMsg(result.err)}`, + ); + result = await (result.leg === 1 ? leg2 : leg1); + } + + if ("ok" in result) { + const loserIdx = result.leg === 1 ? 1 : 0; + controllers[loserIdx]?.abort(); + const loser = result.leg === 1 ? leg2 : leg1; + loser.then( + (s) => "err" in s && console.debug(`[painter] hedge loser leg${s.leg} aborted`), + () => {}, + ); + console.log( + `[painter] hedge won by leg${result.leg} in ${Date.now() - t0}ms`, + ); + return result.ok; + } + console.warn( + `[painter] ${label} failed (both hedge legs): ${errMsg(result.err)}`, + ); + return null; +} + export type PainterResult = | { kind: "real"; imageUrl: string; imageUuid: string } | { kind: "mock"; imageUrl: string }; @@ -167,14 +252,25 @@ export async function runPainter( // Tier A — with referenceImages (priorSceneImage + character portraits). // FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path - // ~2.5× compared to the non-KV variant. + // ~2.5× compared to the non-KV variant. When IMAGE_HEDGE_MS is configured, + // the scene paint is hedged (see tryGenerateHedged); portraits are not. if (refs.length > 0) { - const r = await tryGenerate( - config.image, - prompt, - { referenceImages: refs, orientation: input.orientation }, - `referenceImages (${refs.length})`, - ); + const tierAOptions: GenerateImageOptions = { + referenceImages: refs, + orientation: input.orientation, + timeoutMs: config.imageTimeoutMs, + }; + const label = `referenceImages (${refs.length})`; + const r = + config.imageHedgeMs && config.imageHedgeMs > 0 + ? await tryGenerateHedged( + config.image, + prompt, + tierAOptions, + label, + config.imageHedgeMs, + ) + : await tryGenerate(config.image, prompt, tierAOptions, label); if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid }; } @@ -183,6 +279,7 @@ export async function runPainter( // Errors here propagate to the caller. const r = await generateImage(config.image, prompt, { orientation: input.orientation, + timeoutMs: config.imageTimeoutMs, }); return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid }; } diff --git a/lib/types/index.ts b/lib/types/index.ts index ba0a7fa..9fb048f 100644 --- a/lib/types/index.ts +++ b/lib/types/index.ts @@ -367,6 +367,19 @@ export type EngineConfig = { tts?: TtsConfig; /** When true the renderer returns a placeholder PNG instead of calling the image API. */ mockImage?: boolean; + /** + * Per-attempt hard timeout (ms) for image-generation requests. Unset → no + * client-side timeout (only the provider's own gateway limits apply, e.g. + * Runware kills tasks at ~55s with a 504). + */ + imageTimeoutMs?: number; + /** + * Painter scene-paint hedge threshold (ms). When the Tier-A (referenced) + * paint hasn't completed after this long, a second identical request races + * the first and the earlier result wins. Unset/0 → hedging disabled. + * Derived from healthy-day Runware p95 (~14s); recommended 15000. + */ + imageHedgeMs?: number; }; // ──────────────────────────────────────────────────────────────────────