feat: prefetch, vision split, provider adapter, UI polish
Engine - Split /api/vision out from /api/interact so client can drive prefetch + cache lookup independently of click interpretation - Image client switched to chat-completions+modalities API (OpenRouter/ provider style), supporting markdown image URL responses - annotateClick now resizes to 768w before composite to keep vision payloads small and avoid CDN timeouts - Prompts updated to mention "JSON" in user messages (required by Gemini's strict JSON mode) - Shared fetchWithRetry helper: 2 retries for chat/image, 0 for vision (with 60s hard timeout) Client - Parallel prefetch of all three choice branches on each new frame - Effect deliberately excludes phase from deps so user-click doesn't abort in-flight prefetches - Cache hit/miss/free-form fallback handled in handleClick - PlayCanvas reads img naturalWidth/Height and adapts container to whatever aspect AI returns (no more cropped third choice) - max-width raised to 560px, max-height calc(100dvh - 200px) Misc - README env-path corrected to apps/web/.env.local - users.md: BGM/TTS idea note - .env.example moved into apps/web alongside next config Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,21 +0,0 @@
|
|||||||
# =============================================================
|
|
||||||
# Dada — AI Visual Novel
|
|
||||||
# Three independently configurable AI providers
|
|
||||||
# Any OpenAI-compatible endpoint works (OpenAI, Anthropic, Gemini,
|
|
||||||
# OpenRouter, DeepSeek, Ollama, ...).
|
|
||||||
# =============================================================
|
|
||||||
|
|
||||||
# ---- 1. Text LLM (story director) -----------------------------
|
|
||||||
TEXT_BASE_URL=https://api.anthropic.com/v1
|
|
||||||
TEXT_API_KEY=sk-ant-xxx
|
|
||||||
TEXT_MODEL=claude-opus-4-7
|
|
||||||
|
|
||||||
# ---- 2. Image generator (renders the whole UI screen) ---------
|
|
||||||
IMAGE_BASE_URL=https://api.openai.com/v1
|
|
||||||
IMAGE_API_KEY=sk-xxx
|
|
||||||
IMAGE_MODEL=gpt-image-2
|
|
||||||
|
|
||||||
# ---- 3. Vision model (interprets where the user clicked) ------
|
|
||||||
VISION_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
|
||||||
VISION_API_KEY=xxx
|
|
||||||
VISION_MODEL=gemini-3-flash
|
|
||||||
@@ -49,7 +49,7 @@ Three providers, all independently configurable. Any OpenAI-compatible chat / im
|
|||||||
| Image · UI renderer | `IMAGE_BASE_URL` `IMAGE_API_KEY` `IMAGE_MODEL` | `gpt-image-2` via OpenAI |
|
| Image · UI renderer | `IMAGE_BASE_URL` `IMAGE_API_KEY` `IMAGE_MODEL` | `gpt-image-2` via OpenAI |
|
||||||
| Vision · click reader | `VISION_BASE_URL` `VISION_API_KEY` `VISION_MODEL` | `gemini-3-flash` via Google |
|
| Vision · click reader | `VISION_BASE_URL` `VISION_API_KEY` `VISION_MODEL` | `gemini-3-flash` via Google |
|
||||||
|
|
||||||
See `.env.example` for the exact shape.
|
See `apps/web/.env.example` for the exact shape.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ Requires Node 20+ and pnpm 9+.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
pnpm install
|
pnpm install
|
||||||
cp .env.example .env.local
|
cp apps/web/.env.example apps/web/.env.local
|
||||||
# fill in the nine env vars
|
# fill in the nine env vars
|
||||||
pnpm dev
|
pnpm dev
|
||||||
# open http://localhost:3000
|
# open http://localhost:3000
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# =============================================================
|
||||||
|
# Dada — AI Visual Novel
|
||||||
|
# Three independently configurable AI providers
|
||||||
|
# Any OpenAI-compatible endpoint works (OpenRouter, OpenAI,
|
||||||
|
# Anthropic via OpenAI-compat proxy, Gemini, DeepSeek, Ollama).
|
||||||
|
#
|
||||||
|
# Image generation uses the chat-completions + modalities API
|
||||||
|
# (OpenRouter-style), NOT the legacy /images/generations endpoint.
|
||||||
|
# =============================================================
|
||||||
|
|
||||||
|
# ---- 1. Text LLM (story director) -----------------------------
|
||||||
|
TEXT_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
TEXT_API_KEY=sk-or-v1-xxx
|
||||||
|
TEXT_MODEL=~anthropic/claude-sonnet-latest
|
||||||
|
|
||||||
|
# ---- 2. Image generator (renders the whole UI screen) ---------
|
||||||
|
IMAGE_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
IMAGE_API_KEY=sk-or-v1-xxx
|
||||||
|
IMAGE_MODEL=openai/gpt-5.4-image-2
|
||||||
|
|
||||||
|
# ---- 3. Vision model (interprets where the user clicked) ------
|
||||||
|
VISION_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
VISION_API_KEY=sk-or-v1-xxx
|
||||||
|
VISION_MODEL=~google/gemini-flash-latest
|
||||||
@@ -14,9 +14,9 @@ export async function POST(req: Request) {
|
|||||||
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
|
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!body.session || !body.prevImageBase64 || !body.click) {
|
if (!body.session || !body.intent) {
|
||||||
return NextResponse.json(
|
return NextResponse.json(
|
||||||
{ error: "session, prevImageBase64, click are required" },
|
{ error: "session and intent are required" },
|
||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import { visionTurn } from "@dada/engine";
|
||||||
|
import type { VisionRequest } from "@dada/types";
|
||||||
|
import { NextResponse } from "next/server";
|
||||||
|
import { loadEngineConfig } from "@/lib/config";
|
||||||
|
|
||||||
|
export const runtime = "nodejs";
|
||||||
|
export const maxDuration = 60;
|
||||||
|
|
||||||
|
export async function POST(req: Request) {
|
||||||
|
let body: VisionRequest;
|
||||||
|
try {
|
||||||
|
body = (await req.json()) as VisionRequest;
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!body.session || !body.prevImageBase64 || !body.click) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "session, prevImageBase64, click are required" },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const config = loadEngineConfig();
|
||||||
|
const result = await visionTurn(config, body);
|
||||||
|
return NextResponse.json(result);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : "Unknown error";
|
||||||
|
return NextResponse.json({ error: message }, { status: 500 });
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,7 +13,7 @@ export default function RootLayout({
|
|||||||
children: React.ReactNode;
|
children: React.ReactNode;
|
||||||
}) {
|
}) {
|
||||||
return (
|
return (
|
||||||
<html lang="zh-CN">
|
<html lang="zh-CN" suppressHydrationWarning>
|
||||||
<head>
|
<head>
|
||||||
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
||||||
<link
|
<link
|
||||||
|
|||||||
+105
-14
@@ -11,6 +11,7 @@ import type {
|
|||||||
Session,
|
Session,
|
||||||
StartResponse,
|
StartResponse,
|
||||||
StoryFrame,
|
StoryFrame,
|
||||||
|
VisionResponse,
|
||||||
} from "@dada/types";
|
} from "@dada/types";
|
||||||
|
|
||||||
function PlayInner() {
|
function PlayInner() {
|
||||||
@@ -28,7 +29,10 @@ function PlayInner() {
|
|||||||
} | null>(null);
|
} | null>(null);
|
||||||
const [turnNum, setTurnNum] = useState(0);
|
const [turnNum, setTurnNum] = useState(0);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
const startedRef = useRef(false);
|
const startedRef = useRef(false);
|
||||||
|
const prefetchAbortRef = useRef<AbortController | null>(null);
|
||||||
|
const prefetchRef = useRef<Record<string, Promise<InteractResponse>>>({});
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (startedRef.current) return;
|
if (startedRef.current) return;
|
||||||
@@ -88,14 +92,60 @@ function PlayInner() {
|
|||||||
.catch((e) => setError(String(e)));
|
.catch((e) => setError(String(e)));
|
||||||
}, [params, router]);
|
}, [params, router]);
|
||||||
|
|
||||||
|
// Prefetch next-frame candidates whenever current frame becomes ready.
|
||||||
|
// All three fire in parallel for fastest cache fill. NOT depending on
|
||||||
|
// `phase` — we don't want to abort in-flight prefetches just because
|
||||||
|
// the user clicked. They should continue so handleClick can await them.
|
||||||
|
useEffect(() => {
|
||||||
|
if (!session || !frame) return;
|
||||||
|
|
||||||
|
prefetchAbortRef.current?.abort();
|
||||||
|
const ctrl = new AbortController();
|
||||||
|
prefetchAbortRef.current = ctrl;
|
||||||
|
|
||||||
|
const choices = frame.uiElements.filter((e) => e.kind === "choice");
|
||||||
|
const promises: Record<string, Promise<InteractResponse>> = {};
|
||||||
|
|
||||||
|
for (const choice of choices) {
|
||||||
|
const syntheticIntent: ClickIntent = {
|
||||||
|
targetId: choice.id,
|
||||||
|
targetLabel: choice.label,
|
||||||
|
reasoning: "prefetch",
|
||||||
|
};
|
||||||
|
const p = fetch("/api/interact", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ session, intent: syntheticIntent }),
|
||||||
|
signal: ctrl.signal,
|
||||||
|
}).then(async (r) => {
|
||||||
|
if (!r.ok) {
|
||||||
|
const j = (await r.json().catch(() => ({}))) as { error?: string };
|
||||||
|
throw new Error(j.error ?? r.statusText);
|
||||||
|
}
|
||||||
|
return r.json() as Promise<InteractResponse>;
|
||||||
|
});
|
||||||
|
p.catch(() => {});
|
||||||
|
promises[choice.id] = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
prefetchRef.current = promises;
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
ctrl.abort();
|
||||||
|
};
|
||||||
|
}, [frame?.id, session?.id]);
|
||||||
|
|
||||||
async function handleClick(click: { x: number; y: number }) {
|
async function handleClick(click: { x: number; y: number }) {
|
||||||
if (phase !== "ready" || !session || !imageBase64) return;
|
if (phase !== "ready" || !session || !imageBase64) return;
|
||||||
setPhase("interacting");
|
setPhase("interacting");
|
||||||
setPendingClick(click);
|
setPendingClick(click);
|
||||||
setIntent(null);
|
setIntent(null);
|
||||||
|
|
||||||
|
const cacheSnapshot = prefetchRef.current;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch("/api/interact", {
|
// Step 1: Vision (~4s) — figure out what the user actually clicked
|
||||||
|
const visionRes = await fetch("/api/vision", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
@@ -104,20 +154,61 @@ function PlayInner() {
|
|||||||
click,
|
click,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
if (!visionRes.ok) {
|
||||||
const j = (await res.json().catch(() => ({}))) as { error?: string };
|
const j = (await visionRes.json().catch(() => ({}))) as {
|
||||||
throw new Error(j.error ?? res.statusText);
|
error?: string;
|
||||||
|
};
|
||||||
|
throw new Error(j.error ?? visionRes.statusText);
|
||||||
}
|
}
|
||||||
const data = (await res.json()) as InteractResponse;
|
const { intent: clickIntent } =
|
||||||
|
(await visionRes.json()) as VisionResponse;
|
||||||
|
|
||||||
const updatedHistory = [
|
// Step 2: Cache lookup
|
||||||
...data.session.history,
|
const cached = clickIntent.targetId
|
||||||
{ frame: data.frame },
|
? cacheSnapshot[clickIntent.targetId]
|
||||||
];
|
: undefined;
|
||||||
setSession({ ...data.session, history: updatedHistory });
|
|
||||||
setFrame(data.frame);
|
let result: InteractResponse;
|
||||||
setImageBase64(data.imageBase64);
|
if (cached) {
|
||||||
setIntent(data.intent);
|
// Cache hit — await the prefetched promise (mostly already resolved)
|
||||||
|
result = await cached;
|
||||||
|
// Overwrite the synthetic prefetch intent on history with the real one
|
||||||
|
const lastIdx = result.session.history.length - 1;
|
||||||
|
result = {
|
||||||
|
...result,
|
||||||
|
intent: clickIntent,
|
||||||
|
session: {
|
||||||
|
...result.session,
|
||||||
|
history: result.session.history.map((entry, idx) =>
|
||||||
|
idx === lastIdx
|
||||||
|
? { ...entry, click, intent: clickIntent }
|
||||||
|
: entry,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// Cache miss (free-form click) — abort wasted prefetches, run live
|
||||||
|
prefetchAbortRef.current?.abort();
|
||||||
|
const liveRes = await fetch("/api/interact", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ session, intent: clickIntent, click }),
|
||||||
|
});
|
||||||
|
if (!liveRes.ok) {
|
||||||
|
const j = (await liveRes.json().catch(() => ({}))) as {
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
throw new Error(j.error ?? liveRes.statusText);
|
||||||
|
}
|
||||||
|
result = (await liveRes.json()) as InteractResponse;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply the result: append new frame to history
|
||||||
|
const updatedHistory = [...result.session.history, { frame: result.frame }];
|
||||||
|
setSession({ ...result.session, history: updatedHistory });
|
||||||
|
setFrame(result.frame);
|
||||||
|
setImageBase64(result.imageBase64);
|
||||||
|
setIntent(clickIntent);
|
||||||
setPendingClick(null);
|
setPendingClick(null);
|
||||||
setTurnNum((t) => t + 1);
|
setTurnNum((t) => t + 1);
|
||||||
setPhase("ready");
|
setPhase("ready");
|
||||||
@@ -189,7 +280,7 @@ function PlayInner() {
|
|||||||
AI · is · painting · the · next · moment
|
AI · is · painting · the · next · moment
|
||||||
</p>
|
</p>
|
||||||
<p className="font-serif italic text-clay-400 text-xs">
|
<p className="font-serif italic text-clay-400 text-xs">
|
||||||
this usually takes 12–20 seconds
|
cached choices resolve in seconds · free-form takes longer
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useRef } from "react";
|
import { useRef, useState } from "react";
|
||||||
|
|
||||||
export type Phase = "loading-first" | "ready" | "interacting";
|
export type Phase = "loading-first" | "ready" | "interacting";
|
||||||
|
|
||||||
|
const SHADOW =
|
||||||
|
"0 1px 0 rgba(45,24,16,0.05), 0 36px 64px -28px rgba(45,24,16,0.25), 0 8px 18px -6px rgba(45,24,16,0.10)";
|
||||||
|
|
||||||
export function PlayCanvas({
|
export function PlayCanvas({
|
||||||
imageBase64,
|
imageBase64,
|
||||||
phase,
|
phase,
|
||||||
@@ -15,11 +18,12 @@ export function PlayCanvas({
|
|||||||
pendingClick: { x: number; y: number } | null;
|
pendingClick: { x: number; y: number } | null;
|
||||||
onClick: (click: { x: number; y: number }) => void;
|
onClick: (click: { x: number; y: number }) => void;
|
||||||
}) {
|
}) {
|
||||||
const ref = useRef<HTMLDivElement>(null);
|
const imgRef = useRef<HTMLImageElement>(null);
|
||||||
|
const [dims, setDims] = useState<{ w: number; h: number } | null>(null);
|
||||||
|
|
||||||
function handleClick(e: React.MouseEvent<HTMLDivElement>) {
|
function handleClick(e: React.MouseEvent<HTMLImageElement>) {
|
||||||
if (phase !== "ready" || !ref.current || !imageBase64) return;
|
if (phase !== "ready" || !imgRef.current) return;
|
||||||
const rect = ref.current.getBoundingClientRect();
|
const rect = imgRef.current.getBoundingClientRect();
|
||||||
const x = (e.clientX - rect.left) / rect.width;
|
const x = (e.clientX - rect.left) / rect.width;
|
||||||
const y = (e.clientY - rect.top) / rect.height;
|
const y = (e.clientY - rect.top) / rect.height;
|
||||||
onClick({
|
onClick({
|
||||||
@@ -32,35 +36,29 @@ export function PlayCanvas({
|
|||||||
const dimmed = phase === "interacting";
|
const dimmed = phase === "interacting";
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="w-full max-w-[440px] mx-auto">
|
<div className="w-full flex flex-col items-center">
|
||||||
<div
|
|
||||||
ref={ref}
|
|
||||||
onClick={handleClick}
|
|
||||||
className={`relative aspect-[2/3] w-full overflow-hidden bg-cream-200 select-none ${interactive ? "cursor-pointer" : "cursor-wait"}`}
|
|
||||||
style={{
|
|
||||||
boxShadow:
|
|
||||||
"0 1px 0 rgba(45,24,16,0.05), 0 36px 64px -28px rgba(45,24,16,0.25), 0 8px 18px -6px rgba(45,24,16,0.10)",
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{imageBase64 ? (
|
{imageBase64 ? (
|
||||||
|
<div className="relative inline-block" style={{ boxShadow: SHADOW }}>
|
||||||
<img
|
<img
|
||||||
key={imageBase64.slice(-48)}
|
key={imageBase64.slice(-48)}
|
||||||
|
ref={imgRef}
|
||||||
src={`data:image/png;base64,${imageBase64}`}
|
src={`data:image/png;base64,${imageBase64}`}
|
||||||
alt="Generated frame"
|
alt="Generated frame"
|
||||||
className={`absolute inset-0 w-full h-full object-cover animate-fade-in transition-opacity duration-700 ease-out ${dimmed ? "opacity-30" : "opacity-100"}`}
|
onClick={handleClick}
|
||||||
|
onLoad={(e) => {
|
||||||
|
const img = e.currentTarget;
|
||||||
|
setDims({ w: img.naturalWidth, h: img.naturalHeight });
|
||||||
|
}}
|
||||||
draggable={false}
|
draggable={false}
|
||||||
|
className={`block w-auto h-auto select-none animate-fade-in transition-opacity duration-700 ease-out ${interactive ? "cursor-pointer" : "cursor-wait"} ${dimmed ? "opacity-30" : "opacity-100"}`}
|
||||||
|
style={{
|
||||||
|
maxWidth: "min(560px, 92vw)",
|
||||||
|
maxHeight: "calc(100dvh - 200px)",
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
) : (
|
|
||||||
<div className="absolute inset-0 flex flex-col items-center justify-center gap-4">
|
|
||||||
<div className="w-1.5 h-1.5 bg-clay-500 rounded-full animate-slow-pulse" />
|
|
||||||
<p className="text-[9px] smallcaps text-clay-500 animate-slow-pulse">
|
|
||||||
Painting · the · first · frame
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<div className="absolute inset-x-0 top-0 h-12 bg-gradient-to-b from-clay-900/15 to-transparent pointer-events-none" />
|
<div className="absolute inset-x-0 top-0 h-10 bg-gradient-to-b from-clay-900/12 to-transparent pointer-events-none" />
|
||||||
<div className="absolute inset-x-0 bottom-0 h-12 bg-gradient-to-t from-clay-900/15 to-transparent pointer-events-none" />
|
<div className="absolute inset-x-0 bottom-0 h-10 bg-gradient-to-t from-clay-900/12 to-transparent pointer-events-none" />
|
||||||
|
|
||||||
{pendingClick && (
|
{pendingClick && (
|
||||||
<>
|
<>
|
||||||
@@ -92,10 +90,27 @@ export function PlayCanvas({
|
|||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
) : (
|
||||||
|
<div
|
||||||
|
className="relative aspect-[2/3] bg-cream-200 flex flex-col items-center justify-center gap-4"
|
||||||
|
style={{
|
||||||
|
width: "min(560px, calc((100dvh - 200px) * 2 / 3), 92vw)",
|
||||||
|
boxShadow: SHADOW,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div className="w-1.5 h-1.5 bg-clay-500 rounded-full animate-slow-pulse" />
|
||||||
|
<p className="text-[9px] smallcaps text-clay-500 animate-slow-pulse">
|
||||||
|
Painting · the · first · frame
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
<div className="flex items-center justify-between mt-3 px-1">
|
<div
|
||||||
|
className="flex items-center justify-between mt-3 px-1 w-full"
|
||||||
|
style={{ maxWidth: "min(560px, 92vw)" }}
|
||||||
|
>
|
||||||
<span className="text-[9px] smallcaps text-clay-400 num">
|
<span className="text-[9px] smallcaps text-clay-400 num">
|
||||||
1024 × 1536 · png
|
{dims ? `${dims.w} × ${dims.h} · png` : "—"}
|
||||||
</span>
|
</span>
|
||||||
<span className="text-[9px] smallcaps text-clay-400">
|
<span className="text-[9px] smallcaps text-clay-400">
|
||||||
{phase === "ready" ? "Tap · anywhere" : "···"}
|
{phase === "ready" ? "Tap · anywhere" : "···"}
|
||||||
|
|||||||
Vendored
+2
@@ -1,4 +1,6 @@
|
|||||||
/// <reference types="next" />
|
/// <reference types="next" />
|
||||||
/// <reference types="next/image-types/global" />
|
/// <reference types="next/image-types/global" />
|
||||||
|
import "./.next/dev/types/routes.d.ts";
|
||||||
|
|
||||||
// NOTE: This file should not be edited
|
// NOTE: This file should not be edited
|
||||||
|
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { ProviderConfig } from "@dada/types";
|
import type { ProviderConfig } from "@dada/types";
|
||||||
|
import { fetchWithRetry } from "./fetchWithRetry";
|
||||||
|
|
||||||
export type ChatMessage = {
|
export type ChatMessage = {
|
||||||
role: "system" | "user" | "assistant";
|
role: "system" | "user" | "assistant";
|
||||||
@@ -20,7 +21,7 @@ export async function chat(
|
|||||||
body.response_format = { type: "json_object" };
|
body.response_format = { type: "json_object" };
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await fetchWithRetry(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
type RetryInit = RequestInit & { retries?: number; retryDelayMs?: number };
|
||||||
|
|
||||||
|
export async function fetchWithRetry(
|
||||||
|
url: string,
|
||||||
|
init: RetryInit,
|
||||||
|
): Promise<Response> {
|
||||||
|
const { retries = 2, retryDelayMs = 1500, ...fetchInit } = init;
|
||||||
|
|
||||||
|
let lastError: unknown;
|
||||||
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(url, fetchInit);
|
||||||
|
if (res.ok) return res;
|
||||||
|
// Don't retry 4xx (client errors won't fix themselves)
|
||||||
|
if (res.status >= 400 && res.status < 500) return res;
|
||||||
|
// 5xx: retry if we have budget left
|
||||||
|
if (attempt < retries) {
|
||||||
|
await sleep(retryDelayMs * (attempt + 1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
} catch (err) {
|
||||||
|
lastError = err;
|
||||||
|
const isAbort =
|
||||||
|
err instanceof DOMException && err.name === "AbortError";
|
||||||
|
if (isAbort) throw err;
|
||||||
|
if (attempt < retries) {
|
||||||
|
await sleep(retryDelayMs * (attempt + 1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
@@ -1,20 +1,29 @@
|
|||||||
import type { ProviderConfig } from "@dada/types";
|
import type { ProviderConfig } from "@dada/types";
|
||||||
|
import { fetchWithRetry } from "./fetchWithRetry";
|
||||||
|
|
||||||
|
type ImageUrlPart = { type: string; image_url?: { url?: string } };
|
||||||
|
type ChatResponse = {
|
||||||
|
choices: {
|
||||||
|
message: {
|
||||||
|
content: string | ImageUrlPart[];
|
||||||
|
images?: ImageUrlPart[];
|
||||||
|
};
|
||||||
|
}[];
|
||||||
|
};
|
||||||
|
|
||||||
export async function generateImage(
|
export async function generateImage(
|
||||||
config: ProviderConfig,
|
config: ProviderConfig,
|
||||||
prompt: string,
|
prompt: string,
|
||||||
opts?: { size?: string; quality?: "low" | "medium" | "high" | "auto" },
|
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const url = `${config.baseUrl.replace(/\/$/, "")}/images/generations`;
|
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||||
const body: Record<string, unknown> = {
|
|
||||||
|
const body = {
|
||||||
model: config.model,
|
model: config.model,
|
||||||
prompt,
|
modalities: ["image", "text"],
|
||||||
size: opts?.size ?? "1024x1536",
|
messages: [{ role: "user", content: prompt }],
|
||||||
quality: opts?.quality ?? "medium",
|
|
||||||
n: 1,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await fetchWithRetry(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@@ -25,20 +34,45 @@ export async function generateImage(
|
|||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const text = await res.text();
|
const text = await res.text();
|
||||||
throw new Error(`Image API error ${res.status}: ${text}`);
|
throw new Error(`Image API error ${res.status}: ${text.slice(0, 500)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const json = (await res.json()) as {
|
const json = (await res.json()) as ChatResponse;
|
||||||
data: { b64_json?: string; url?: string }[];
|
const msg = json.choices[0]?.message;
|
||||||
};
|
if (!msg) throw new Error("Image API returned no message");
|
||||||
const item = json.data[0];
|
|
||||||
if (!item) throw new Error("Image API returned no data");
|
|
||||||
|
|
||||||
if (item.b64_json) return item.b64_json;
|
// 1) OpenRouter-style: msg.images = [{ image_url: { url } }]
|
||||||
if (item.url) {
|
// 2) OpenAI multimodal: msg.content = [{ type: "image_url", image_url: { url } }]
|
||||||
const imgRes = await fetch(item.url);
|
const structured: ImageUrlPart[] = [];
|
||||||
const buf = await imgRes.arrayBuffer();
|
if (msg.images) structured.push(...msg.images);
|
||||||
|
if (Array.isArray(msg.content)) structured.push(...msg.content);
|
||||||
|
for (const part of structured) {
|
||||||
|
const u = part.image_url?.url;
|
||||||
|
if (u) return await urlToBase64(u);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) provider-style: content is a string with markdown image 
|
||||||
|
// or a bare URL fragment
|
||||||
|
if (typeof msg.content === "string") {
|
||||||
|
const md = msg.content.match(/!\[[^\]]*\]\((https?:\/\/[^\s)]+)\)/);
|
||||||
|
if (md?.[1]) return await urlToBase64(md[1]);
|
||||||
|
const bare = msg.content.match(/https?:\/\/\S+?\.(?:png|jpg|jpeg|webp)/i);
|
||||||
|
if (bare?.[0]) return await urlToBase64(bare[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(
|
||||||
|
`No image found in response: ${JSON.stringify(msg).slice(0, 300)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function urlToBase64(url: string): Promise<string> {
|
||||||
|
if (url.startsWith("data:")) {
|
||||||
|
const idx = url.indexOf("base64,");
|
||||||
|
if (idx === -1) throw new Error("data URL is not base64-encoded");
|
||||||
|
return url.slice(idx + "base64,".length);
|
||||||
|
}
|
||||||
|
const res = await fetch(url);
|
||||||
|
if (!res.ok) throw new Error(`Failed to fetch image url: ${res.status}`);
|
||||||
|
const buf = await res.arrayBuffer();
|
||||||
return Buffer.from(buf).toString("base64");
|
return Buffer.from(buf).toString("base64");
|
||||||
}
|
}
|
||||||
throw new Error("Image API returned neither b64_json nor url");
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { ProviderConfig } from "@dada/types";
|
import type { ProviderConfig } from "@dada/types";
|
||||||
|
import { fetchWithRetry } from "./fetchWithRetry";
|
||||||
|
|
||||||
export async function interpretClick(
|
export async function interpretClick(
|
||||||
config: ProviderConfig,
|
config: ProviderConfig,
|
||||||
@@ -25,14 +26,24 @@ export async function interpretClick(
|
|||||||
response_format: { type: "json_object" },
|
response_format: { type: "json_object" },
|
||||||
};
|
};
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const timeoutCtrl = new AbortController();
|
||||||
|
const timeoutId = setTimeout(() => timeoutCtrl.abort(), 60_000);
|
||||||
|
|
||||||
|
let res: Response;
|
||||||
|
try {
|
||||||
|
res = await fetchWithRetry(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${config.apiKey}`,
|
Authorization: `Bearer ${config.apiKey}`,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
|
signal: timeoutCtrl.signal,
|
||||||
|
retries: 0,
|
||||||
});
|
});
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
}
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const text = await res.text();
|
const text = await res.text();
|
||||||
|
|||||||
@@ -5,25 +5,31 @@ export async function annotateClick(
|
|||||||
click: { x: number; y: number },
|
click: { x: number; y: number },
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const buf = Buffer.from(imageBase64, "base64");
|
const buf = Buffer.from(imageBase64, "base64");
|
||||||
const meta = await sharp(buf).metadata();
|
|
||||||
const w = meta.width ?? 1024;
|
const resized = await sharp(buf)
|
||||||
const h = meta.height ?? 1536;
|
.resize({ width: 768, withoutEnlargement: true, fit: "inside" })
|
||||||
|
.png()
|
||||||
|
.toBuffer();
|
||||||
|
|
||||||
|
const meta = await sharp(resized).metadata();
|
||||||
|
const w = meta.width ?? 768;
|
||||||
|
const h = meta.height ?? 1152;
|
||||||
|
|
||||||
const cx = Math.round(click.x * w);
|
const cx = Math.round(click.x * w);
|
||||||
const cy = Math.round(click.y * h);
|
const cy = Math.round(click.y * h);
|
||||||
const r = Math.round(Math.min(w, h) * 0.025);
|
const r = Math.max(8, Math.round(Math.min(w, h) * 0.025));
|
||||||
const stroke = Math.max(3, Math.round(r * 0.25));
|
const stroke = Math.max(2, Math.round(r * 0.25));
|
||||||
|
|
||||||
const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}">
|
const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}" viewBox="0 0 ${w} ${h}">
|
||||||
<circle cx="${cx}" cy="${cy}" r="${r}" fill="rgba(255,40,40,0.55)"
|
<circle cx="${cx}" cy="${cy}" r="${r}" fill="rgba(255,40,40,0.55)"
|
||||||
stroke="rgba(255,255,255,0.95)" stroke-width="${stroke}" />
|
stroke="rgba(255,255,255,0.95)" stroke-width="${stroke}" />
|
||||||
<circle cx="${cx}" cy="${cy}" r="${Math.round(r * 0.25)}"
|
<circle cx="${cx}" cy="${cy}" r="${Math.round(r * 0.25)}"
|
||||||
fill="rgba(255,255,255,1)" />
|
fill="rgba(255,255,255,1)" />
|
||||||
</svg>`;
|
</svg>`;
|
||||||
|
|
||||||
const out = await sharp(buf)
|
const out = await sharp(resized)
|
||||||
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
||||||
.png()
|
.png({ compressionLevel: 9 })
|
||||||
.toBuffer();
|
.toBuffer();
|
||||||
|
|
||||||
return out.toString("base64");
|
return out.toString("base64");
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
export { startSession, takeTurn } from "./orchestrator";
|
export { startSession, takeTurn, visionTurn } from "./orchestrator";
|
||||||
export { annotateClick } from "./annotate";
|
export { annotateClick } from "./annotate";
|
||||||
export * from "./prompts";
|
export * from "./prompts";
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
import type {
|
import type {
|
||||||
|
ClickIntent,
|
||||||
EngineConfig,
|
EngineConfig,
|
||||||
InteractRequest,
|
InteractRequest,
|
||||||
InteractResponse,
|
InteractResponse,
|
||||||
Session,
|
Session,
|
||||||
StartRequest,
|
StartRequest,
|
||||||
StartResponse,
|
StartResponse,
|
||||||
|
VisionRequest,
|
||||||
|
VisionResponse,
|
||||||
} from "@dada/types";
|
} from "@dada/types";
|
||||||
import { annotateClick } from "./annotate";
|
import { annotateClick } from "./annotate";
|
||||||
import { direct } from "./director";
|
import { direct } from "./director";
|
||||||
@@ -37,21 +40,27 @@ export async function startSession(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function visionTurn(
|
||||||
|
config: EngineConfig,
|
||||||
|
req: VisionRequest,
|
||||||
|
): Promise<VisionResponse> {
|
||||||
|
const annotated = await annotateClick(req.prevImageBase64, req.click);
|
||||||
|
const lastFrame = req.session.history.at(-1)?.frame;
|
||||||
|
const uiElements = lastFrame?.uiElements ?? [];
|
||||||
|
const intent = await interpret(config.vision, annotated, uiElements);
|
||||||
|
return { intent };
|
||||||
|
}
|
||||||
|
|
||||||
export async function takeTurn(
|
export async function takeTurn(
|
||||||
config: EngineConfig,
|
config: EngineConfig,
|
||||||
req: InteractRequest,
|
req: InteractRequest,
|
||||||
): Promise<InteractResponse> {
|
): Promise<InteractResponse> {
|
||||||
const annotated = await annotateClick(req.prevImageBase64, req.click);
|
|
||||||
|
|
||||||
const lastFrame = req.session.history.at(-1)?.frame;
|
|
||||||
const uiElements = lastFrame?.uiElements ?? [];
|
|
||||||
|
|
||||||
const intent = await interpret(config.vision, annotated, uiElements);
|
|
||||||
|
|
||||||
const updatedSession: Session = {
|
const updatedSession: Session = {
|
||||||
...req.session,
|
...req.session,
|
||||||
history: req.session.history.map((entry, idx, arr) =>
|
history: req.session.history.map((entry, idx, arr) =>
|
||||||
idx === arr.length - 1 ? { ...entry, click: req.click, intent } : entry,
|
idx === arr.length - 1
|
||||||
|
? { ...entry, click: req.click, intent: req.intent }
|
||||||
|
: entry,
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -66,6 +75,6 @@ export async function takeTurn(
|
|||||||
session: updatedSession,
|
session: updatedSession,
|
||||||
frame: nextFrame,
|
frame: nextFrame,
|
||||||
imageBase64: nextImage,
|
imageBase64: nextImage,
|
||||||
intent,
|
intent: req.intent,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ export function buildDirectorUserMessage(session: Session): string {
|
|||||||
parts.push(`画风:${session.styleGuide}`);
|
parts.push(`画风:${session.styleGuide}`);
|
||||||
|
|
||||||
if (session.history.length === 0) {
|
if (session.history.length === 0) {
|
||||||
parts.push("\n这是故事的开场。请生成开场画面。");
|
parts.push("\n这是故事的开场。请生成开场画面,严格以 JSON 格式返回。");
|
||||||
return parts.join("\n");
|
return parts.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ export function buildDirectorUserMessage(session: Session): string {
|
|||||||
parts.push(beat.join("\n"));
|
parts.push(beat.join("\n"));
|
||||||
});
|
});
|
||||||
|
|
||||||
parts.push("\n请生成下一帧。");
|
parts.push("\n请生成下一帧,严格以 JSON 格式返回。");
|
||||||
return parts.join("\n");
|
return parts.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,5 +111,5 @@ export function buildVisionUserPrompt(uiElements: UIElement[]): string {
|
|||||||
return `当前画面包含以下已知 UI 元素:
|
return `当前画面包含以下已知 UI 元素:
|
||||||
${list}
|
${list}
|
||||||
|
|
||||||
红点位置即为用户点击位置。请判断用户的意图。`;
|
红点位置即为用户点击位置。请判断用户的意图,并以 JSON 格式返回结果。`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,5 +8,5 @@ export async function render(
|
|||||||
styleGuide: string,
|
styleGuide: string,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const prompt = buildImagePrompt(frame, styleGuide);
|
const prompt = buildImagePrompt(frame, styleGuide);
|
||||||
return generateImage(config, prompt, { size: "1024x1536", quality: "medium" });
|
return generateImage(config, prompt);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,12 +60,22 @@ export type StartResponse = {
|
|||||||
imageBase64: string;
|
imageBase64: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type InteractRequest = {
|
export type VisionRequest = {
|
||||||
session: Session;
|
session: Session;
|
||||||
prevImageBase64: string;
|
prevImageBase64: string;
|
||||||
click: { x: number; y: number };
|
click: { x: number; y: number };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type VisionResponse = {
|
||||||
|
intent: ClickIntent;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type InteractRequest = {
|
||||||
|
session: Session;
|
||||||
|
intent: ClickIntent;
|
||||||
|
click?: { x: number; y: number };
|
||||||
|
};
|
||||||
|
|
||||||
export type InteractResponse = {
|
export type InteractResponse = {
|
||||||
session: Session;
|
session: Session;
|
||||||
frame: StoryFrame;
|
frame: StoryFrame;
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
"installCommand": "pnpm install",
|
"installCommand": "pnpm install",
|
||||||
"functions": {
|
"functions": {
|
||||||
"apps/web/app/api/interact/route.ts": { "maxDuration": 60 },
|
"apps/web/app/api/interact/route.ts": { "maxDuration": 60 },
|
||||||
|
"apps/web/app/api/vision/route.ts": { "maxDuration": 60 },
|
||||||
"apps/web/app/api/start/route.ts": { "maxDuration": 60 }
|
"apps/web/app/api/start/route.ts": { "maxDuration": 60 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user