feat: prefetch, vision split, provider adapter, UI polish

Engine
- Split /api/vision out from /api/interact so client can drive
  prefetch + cache lookup independently of click interpretation
- Image client switched to chat-completions+modalities API (OpenRouter/
  provider style), supporting markdown image URL responses
- annotateClick now resizes to 768w before composite to keep vision
  payloads small and avoid CDN timeouts
- Prompts updated to mention "JSON" in user messages (required by
  Gemini's strict JSON mode)
- Shared fetchWithRetry helper: 2 retries for chat/image, 0 for vision
  (with 60s hard timeout)

Client
- Parallel prefetch of all three choice branches on each new frame
- Effect deliberately excludes phase from deps so user-click doesn't
  abort in-flight prefetches
- Cache hit/miss/free-form fallback handled in handleClick
- PlayCanvas reads img naturalWidth/Height and adapts container to
  whatever aspect AI returns (no more cropped third choice)
- max-width raised to 560px, max-height calc(100dvh - 200px)

Misc
- README env-path corrected to apps/web/.env.local
- users.md: BGM/TTS idea note
- .env.example moved into apps/web alongside next config

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-05-12 19:38:03 +08:00
parent ad4b09c744
commit 9cedfa66e4
20 changed files with 405 additions and 151 deletions
+2 -1
View File
@@ -1,4 +1,5 @@
import type { ProviderConfig } from "@dada/types";
import { fetchWithRetry } from "./fetchWithRetry";
export type ChatMessage = {
role: "system" | "user" | "assistant";
@@ -20,7 +21,7 @@ export async function chat(
body.response_format = { type: "json_object" };
}
const res = await fetch(url, {
const res = await fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
+39
View File
@@ -0,0 +1,39 @@
type RetryInit = RequestInit & { retries?: number; retryDelayMs?: number };
export async function fetchWithRetry(
url: string,
init: RetryInit,
): Promise<Response> {
const { retries = 2, retryDelayMs = 1500, ...fetchInit } = init;
let lastError: unknown;
for (let attempt = 0; attempt <= retries; attempt++) {
try {
const res = await fetch(url, fetchInit);
if (res.ok) return res;
// Don't retry 4xx (client errors won't fix themselves)
if (res.status >= 400 && res.status < 500) return res;
// 5xx: retry if we have budget left
if (attempt < retries) {
await sleep(retryDelayMs * (attempt + 1));
continue;
}
return res;
} catch (err) {
lastError = err;
const isAbort =
err instanceof DOMException && err.name === "AbortError";
if (isAbort) throw err;
if (attempt < retries) {
await sleep(retryDelayMs * (attempt + 1));
continue;
}
throw err;
}
}
throw lastError;
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
+54 -20
View File
@@ -1,20 +1,29 @@
import type { ProviderConfig } from "@dada/types";
import { fetchWithRetry } from "./fetchWithRetry";
type ImageUrlPart = { type: string; image_url?: { url?: string } };
type ChatResponse = {
choices: {
message: {
content: string | ImageUrlPart[];
images?: ImageUrlPart[];
};
}[];
};
export async function generateImage(
config: ProviderConfig,
prompt: string,
opts?: { size?: string; quality?: "low" | "medium" | "high" | "auto" },
): Promise<string> {
const url = `${config.baseUrl.replace(/\/$/, "")}/images/generations`;
const body: Record<string, unknown> = {
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
const body = {
model: config.model,
prompt,
size: opts?.size ?? "1024x1536",
quality: opts?.quality ?? "medium",
n: 1,
modalities: ["image", "text"],
messages: [{ role: "user", content: prompt }],
};
const res = await fetch(url, {
const res = await fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
@@ -25,20 +34,45 @@ export async function generateImage(
if (!res.ok) {
const text = await res.text();
throw new Error(`Image API error ${res.status}: ${text}`);
throw new Error(`Image API error ${res.status}: ${text.slice(0, 500)}`);
}
const json = (await res.json()) as {
data: { b64_json?: string; url?: string }[];
};
const item = json.data[0];
if (!item) throw new Error("Image API returned no data");
const json = (await res.json()) as ChatResponse;
const msg = json.choices[0]?.message;
if (!msg) throw new Error("Image API returned no message");
if (item.b64_json) return item.b64_json;
if (item.url) {
const imgRes = await fetch(item.url);
const buf = await imgRes.arrayBuffer();
return Buffer.from(buf).toString("base64");
// 1) OpenRouter-style: msg.images = [{ image_url: { url } }]
// 2) OpenAI multimodal: msg.content = [{ type: "image_url", image_url: { url } }]
const structured: ImageUrlPart[] = [];
if (msg.images) structured.push(...msg.images);
if (Array.isArray(msg.content)) structured.push(...msg.content);
for (const part of structured) {
const u = part.image_url?.url;
if (u) return await urlToBase64(u);
}
throw new Error("Image API returned neither b64_json nor url");
// 3) provider-style: content is a string with markdown image ![alt](url)
// or a bare URL fragment
if (typeof msg.content === "string") {
const md = msg.content.match(/!\[[^\]]*\]\((https?:\/\/[^\s)]+)\)/);
if (md?.[1]) return await urlToBase64(md[1]);
const bare = msg.content.match(/https?:\/\/\S+?\.(?:png|jpg|jpeg|webp)/i);
if (bare?.[0]) return await urlToBase64(bare[0]);
}
throw new Error(
`No image found in response: ${JSON.stringify(msg).slice(0, 300)}`,
);
}
async function urlToBase64(url: string): Promise<string> {
if (url.startsWith("data:")) {
const idx = url.indexOf("base64,");
if (idx === -1) throw new Error("data URL is not base64-encoded");
return url.slice(idx + "base64,".length);
}
const res = await fetch(url);
if (!res.ok) throw new Error(`Failed to fetch image url: ${res.status}`);
const buf = await res.arrayBuffer();
return Buffer.from(buf).toString("base64");
}
+19 -8
View File
@@ -1,4 +1,5 @@
import type { ProviderConfig } from "@dada/types";
import { fetchWithRetry } from "./fetchWithRetry";
export async function interpretClick(
config: ProviderConfig,
@@ -25,14 +26,24 @@ export async function interpretClick(
response_format: { type: "json_object" },
};
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify(body),
});
const timeoutCtrl = new AbortController();
const timeoutId = setTimeout(() => timeoutCtrl.abort(), 60_000);
let res: Response;
try {
res = await fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify(body),
signal: timeoutCtrl.signal,
retries: 0,
});
} finally {
clearTimeout(timeoutId);
}
if (!res.ok) {
const text = await res.text();