feat: Runware FLUX.2 image + lazy per-beat TTS (#5)
Reduce median scene-load latency from ~30-80s to ~17-25s by switching image generation to Runware FLUX.2 [klein] 9B KV and moving per-beat TTS synthesis off the scene response into a new lazy /api/beat-audio endpoint with hard timeout + abort support.
- feat(image): migrate to Runware FLUX.2 [klein] 9B KV — task-array API, $0.001/image, sub-second inference.
- feat(tts): split /api/scene into directScene + image + voicedesign-provisioning; lazily synth per beat via /api/beat-audio with 15s hard timeout + AbortSignal threaded to MiMo so timed-out calls don't keep burning sockets/quota; client fans out per-beat fetches on scene-id change with abort + identity-check finally to prevent cross-scene beat-id collisions.
- refactor(tts): slim BeatAudioRequest to { beat, voice } — ~800KB per-beat upload dropped to ~160KB by sending only the speaker's voice instead of the full session.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
@@ -1,28 +1,43 @@
|
||||
import type { ProviderConfig } from "@yume/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
|
||||
type ImageUrlPart = { type: string; image_url?: { url?: string } };
|
||||
type ChatResponse = {
|
||||
choices: {
|
||||
message: {
|
||||
content: string | ImageUrlPart[];
|
||||
images?: ImageUrlPart[];
|
||||
};
|
||||
}[];
|
||||
// Runware uses its own task-array protocol (not OpenAI-compatible).
|
||||
// POST <baseUrl> with [{ taskType: "imageInference", ... }]; errors come
|
||||
// back as a 200 with `errors[]`, so we have to inspect the body either way.
|
||||
type RunwareImageResult = {
|
||||
imageBase64Data?: string;
|
||||
};
|
||||
type RunwareError = {
|
||||
code?: string;
|
||||
message?: string;
|
||||
parameter?: string;
|
||||
};
|
||||
type RunwareResponse = {
|
||||
data?: RunwareImageResult[];
|
||||
errors?: RunwareError[];
|
||||
};
|
||||
|
||||
export async function generateImage(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
): Promise<string> {
|
||||
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||
const url = config.baseUrl.replace(/\/$/, "");
|
||||
|
||||
const body = {
|
||||
model: config.model,
|
||||
modalities: ["image", "text"],
|
||||
size: "1792x1024",
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
};
|
||||
const body = [
|
||||
{
|
||||
taskType: "imageInference",
|
||||
taskUUID: crypto.randomUUID(),
|
||||
model: config.model,
|
||||
positivePrompt: prompt,
|
||||
width: 1792,
|
||||
height: 1024,
|
||||
steps: 4,
|
||||
CFGScale: 3.5,
|
||||
numberResults: 1,
|
||||
outputType: "base64Data",
|
||||
outputFormat: "PNG",
|
||||
},
|
||||
];
|
||||
|
||||
const res = await fetchWithRetry(url, {
|
||||
method: "POST",
|
||||
@@ -33,47 +48,27 @@ export async function generateImage(
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
const text = await res.text();
|
||||
let json: RunwareResponse;
|
||||
try {
|
||||
json = JSON.parse(text) as RunwareResponse;
|
||||
} catch {
|
||||
throw new Error(`Image API error ${res.status}: ${text.slice(0, 500)}`);
|
||||
}
|
||||
|
||||
const json = (await res.json()) as ChatResponse;
|
||||
const msg = json.choices[0]?.message;
|
||||
if (!msg) throw new Error("Image API returned no message");
|
||||
|
||||
// 1) OpenRouter-style: msg.images = [{ image_url: { url } }]
|
||||
// 2) OpenAI multimodal: msg.content = [{ type: "image_url", image_url: { url } }]
|
||||
const structured: ImageUrlPart[] = [];
|
||||
if (msg.images) structured.push(...msg.images);
|
||||
if (Array.isArray(msg.content)) structured.push(...msg.content);
|
||||
for (const part of structured) {
|
||||
const u = part.image_url?.url;
|
||||
if (u) return await urlToBase64(u);
|
||||
if (json.errors?.length) {
|
||||
const e = json.errors[0]!;
|
||||
throw new Error(
|
||||
`Runware error [${e.code ?? "unknown"}]: ${e.message ?? "no message"}` +
|
||||
(e.parameter ? ` (parameter: ${e.parameter})` : ""),
|
||||
);
|
||||
}
|
||||
|
||||
// 3) provider-style: content is a string with markdown image 
|
||||
// or a bare URL fragment
|
||||
if (typeof msg.content === "string") {
|
||||
const md = msg.content.match(/!\[[^\]]*\]\((https?:\/\/[^\s)]+)\)/);
|
||||
if (md?.[1]) return await urlToBase64(md[1]);
|
||||
const bare = msg.content.match(/https?:\/\/\S+?\.(?:png|jpg|jpeg|webp)/i);
|
||||
if (bare?.[0]) return await urlToBase64(bare[0]);
|
||||
const b64 = json.data?.[0]?.imageBase64Data;
|
||||
if (!b64) {
|
||||
throw new Error(
|
||||
`No image in Runware response: ${text.slice(0, 300)}`,
|
||||
);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`No image found in response: ${JSON.stringify(msg).slice(0, 300)}`,
|
||||
);
|
||||
}
|
||||
|
||||
async function urlToBase64(url: string): Promise<string> {
|
||||
if (url.startsWith("data:")) {
|
||||
const idx = url.indexOf("base64,");
|
||||
if (idx === -1) throw new Error("data URL is not base64-encoded");
|
||||
return url.slice(idx + "base64,".length);
|
||||
}
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`Failed to fetch image url: ${res.status}`);
|
||||
const buf = await res.arrayBuffer();
|
||||
return Buffer.from(buf).toString("base64");
|
||||
return b64;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user