Initial commit: AI-driven visual novel scaffold

- Monorepo (pnpm workspace): apps/web + packages/{types,ai-client,engine}
- Next.js 16 web app with three-stage AI orchestration
- Three independently configurable providers: text LLM, image generator, vision model
- Warm minimalist editorial UI design
- One-click Vercel deploy ready

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-05-09 13:29:58 +08:00
commit cbd95bbea2
45 changed files with 1855 additions and 0 deletions
+19
View File
@@ -0,0 +1,19 @@
{
"name": "@dada/engine",
"version": "0.1.0",
"private": true,
"type": "module",
"main": "./src/index.ts",
"types": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"typecheck": "tsc --noEmit"
},
"dependencies": {
"@dada/ai-client": "workspace:*",
"@dada/types": "workspace:*",
"sharp": "^0.33.5"
}
}
+30
View File
@@ -0,0 +1,30 @@
import sharp from "sharp";
export async function annotateClick(
imageBase64: string,
click: { x: number; y: number },
): Promise<string> {
const buf = Buffer.from(imageBase64, "base64");
const meta = await sharp(buf).metadata();
const w = meta.width ?? 1024;
const h = meta.height ?? 1536;
const cx = Math.round(click.x * w);
const cy = Math.round(click.y * h);
const r = Math.round(Math.min(w, h) * 0.025);
const stroke = Math.max(3, Math.round(r * 0.25));
const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}">
<circle cx="${cx}" cy="${cy}" r="${r}" fill="rgba(255,40,40,0.55)"
stroke="rgba(255,255,255,0.95)" stroke-width="${stroke}" />
<circle cx="${cx}" cy="${cy}" r="${Math.round(r * 0.25)}"
fill="rgba(255,255,255,1)" />
</svg>`;
const out = await sharp(buf)
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
.png()
.toBuffer();
return out.toString("base64");
}
+37
View File
@@ -0,0 +1,37 @@
import { chat } from "@dada/ai-client";
import type { ProviderConfig, Session, StoryFrame, UIElement } from "@dada/types";
import { parseJsonLoose } from "./jsonParser";
import { DIRECTOR_SYSTEM, buildDirectorUserMessage } from "./prompts";
type DirectorOutput = {
narration?: string;
speaker?: string;
line?: string;
scenePrompt: string;
uiElements: UIElement[];
};
export async function direct(
config: ProviderConfig,
session: Session,
): Promise<StoryFrame> {
const raw = await chat(
config,
[
{ role: "system", content: DIRECTOR_SYSTEM },
{ role: "user", content: buildDirectorUserMessage(session) },
],
{ temperature: 0.9, responseFormat: "json_object" },
);
const parsed = parseJsonLoose<DirectorOutput>(raw);
return {
id: `frame_${Date.now()}`,
narration: parsed.narration?.trim() || undefined,
speaker: parsed.speaker?.trim() || undefined,
line: parsed.line?.trim() || undefined,
scenePrompt: parsed.scenePrompt,
uiElements: parsed.uiElements ?? [],
};
}
+3
View File
@@ -0,0 +1,3 @@
export { startSession, takeTurn } from "./orchestrator";
export { annotateClick } from "./annotate";
export * from "./prompts";
+27
View File
@@ -0,0 +1,27 @@
export function parseJsonLoose<T>(raw: string): T {
const trimmed = raw.trim();
try {
return JSON.parse(trimmed) as T;
} catch {
// fall through
}
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
if (fenced?.[1]) {
try {
return JSON.parse(fenced[1]) as T;
} catch {
// fall through
}
}
const first = trimmed.indexOf("{");
const last = trimmed.lastIndexOf("}");
if (first !== -1 && last > first) {
const slice = trimmed.slice(first, last + 1);
return JSON.parse(slice) as T;
}
throw new Error(`Failed to parse JSON from model output: ${raw.slice(0, 200)}`);
}
+71
View File
@@ -0,0 +1,71 @@
import type {
EngineConfig,
InteractRequest,
InteractResponse,
Session,
StartRequest,
StartResponse,
} from "@dada/types";
import { annotateClick } from "./annotate";
import { direct } from "./director";
import { render } from "./renderer";
import { interpret } from "./vision";
function newSessionId(): string {
return `s_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
}
export async function startSession(
config: EngineConfig,
req: StartRequest,
): Promise<StartResponse> {
const session: Session = {
id: newSessionId(),
createdAt: Date.now(),
worldSetting: req.worldSetting.trim(),
styleGuide: req.styleGuide.trim(),
history: [],
};
const frame = await direct(config.text, session);
const imageBase64 = await render(config.image, frame, session.styleGuide);
return {
sessionId: session.id,
frame,
imageBase64,
};
}
export async function takeTurn(
config: EngineConfig,
req: InteractRequest,
): Promise<InteractResponse> {
const annotated = await annotateClick(req.prevImageBase64, req.click);
const lastFrame = req.session.history.at(-1)?.frame;
const uiElements = lastFrame?.uiElements ?? [];
const intent = await interpret(config.vision, annotated, uiElements);
const updatedSession: Session = {
...req.session,
history: req.session.history.map((entry, idx, arr) =>
idx === arr.length - 1 ? { ...entry, click: req.click, intent } : entry,
),
};
const nextFrame = await direct(config.text, updatedSession);
const nextImage = await render(
config.image,
nextFrame,
updatedSession.styleGuide,
);
return {
session: updatedSession,
frame: nextFrame,
imageBase64: nextImage,
intent,
};
}
+115
View File
@@ -0,0 +1,115 @@
import type { Session, StoryFrame, UIElement } from "@dada/types";
export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。每次根据世界观、画风和历史,输出当前画面要呈现的内容。
必须输出严格 JSON,结构如下:
{
"narration": "本帧旁白(可空字符串)",
"speaker": "本帧说话角色名(可空)",
"line": "本帧角色台词(可空)",
"scenePrompt": "英文场景描述,给图像模型用,描述画面里看到什么",
"uiElements": [
{ "id": "choice_1", "kind": "choice", "label": "选项一文字(≤15 字)" },
{ "id": "choice_2", "kind": "choice", "label": "选项二文字(≤15 字)" },
{ "id": "choice_3", "kind": "choice", "label": "选项三文字(≤15 字)" }
]
}
规则:
- narration / line 中文,scenePrompt 英文
- 默认 3 个 choice 元素,可以根据情境额外加 menu/item/custom(罕见)
- 选项必须能切实推进剧情,且互不重复
- scenePrompt 描述当前的画面,不要包括 UI 元素,UI 元素会另外渲染
- 单帧旁白与台词加起来控制在 80 字以内
- 不要输出 JSON 以外的任何文本`;
export function buildDirectorUserMessage(session: Session): string {
const parts: string[] = [];
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.history.length === 0) {
parts.push("\n这是故事的开场。请生成开场画面。");
return parts.join("\n");
}
parts.push("\n历史:");
session.history.forEach((entry, idx) => {
const f = entry.frame;
const beat: string[] = [`【第 ${idx + 1} 帧】`];
if (f.narration) beat.push(`旁白:${f.narration}`);
if (f.line) beat.push(`${f.speaker ?? "?"}${f.line}`);
if (entry.intent) {
beat.push(
`用户行为:${entry.intent.targetLabel ?? entry.intent.freeformAction ?? "未知"}`,
);
}
parts.push(beat.join("\n"));
});
parts.push("\n请生成下一帧。");
return parts.join("\n");
}
export function buildImagePrompt(
frame: StoryFrame,
styleGuide: string,
): string {
const choiceList = frame.uiElements
.filter((e) => e.kind === "choice")
.map((e, i) => `${i + 1}. ${e.label}`)
.join("\n");
const extraUI = frame.uiElements
.filter((e) => e.kind !== "choice")
.map((e) => `- ${e.kind}: ${e.label}`)
.join("\n");
return `Generate a vertical 9:16 visual novel UI screen.
ART STYLE: ${styleGuide}
(Match this style consistently — for the scene art AND the UI elements.
For example: anime → traditional galgame dialogue box; cyberpunk → neon HUD;
stick figure → hand-drawn paper UI; cinematic realism → minimalist film overlay.)
SCENE (occupies the upper portion of the image):
${frame.scenePrompt}
DIALOGUE PANEL (semi-transparent, lower-middle area):
${frame.speaker ? `Speaker name displayed prominently: "${frame.speaker}"` : "Narration only — no speaker tag."}
${frame.line ? `Dialogue text: "${frame.line}"` : ""}
${frame.narration ? `Narration text (italic if speaker also present): "${frame.narration}"` : ""}
CHOICE PANEL (bottom area, three clearly tappable buttons stacked or arranged):
${choiceList}
${extraUI ? `\nADDITIONAL UI ELEMENTS:\n${extraUI}` : ""}
CRITICAL LAYOUT REQUIREMENTS:
- All text must be perfectly legible (high contrast, readable size)
- Choice buttons must be clearly distinguishable as interactive elements
- Choice text must NOT be cropped, NOT overlap with character faces
- The image is the entire interface — no external chrome will be added
- Choices appear in the order listed above`;
}
export const VISION_SYSTEM_PROMPT = `你是视觉理解助手。用户在视觉小说界面上点击了红色圆点位置,你要根据红点位置和图中可见的 UI 元素,判断用户的意图。
必须输出严格 JSON
{
"targetId": "对应的 UI 元素 idchoice_1 / choice_2 / choice_3 / menu / ...),如果点击的是非 UI 区域则为 null",
"targetLabel": "对应 UI 元素的文字描述(如 '告诉她真相'),未知则为 null",
"reasoning": "一句话说明判断理由",
"freeformAction": "如果用户点的是场景中的物件/角色等非选项区域,描述他可能的意图(如 '想拿起桌上的钥匙'),否则空字符串"
}
不要输出 JSON 以外的任何文本。`;
export function buildVisionUserPrompt(uiElements: UIElement[]): string {
const list = uiElements
.map((e) => `- id="${e.id}" kind="${e.kind}" label="${e.label}"`)
.join("\n");
return `当前画面包含以下已知 UI 元素:
${list}
红点位置即为用户点击位置。请判断用户的意图。`;
}
+12
View File
@@ -0,0 +1,12 @@
import { generateImage } from "@dada/ai-client";
import type { ProviderConfig, StoryFrame } from "@dada/types";
import { buildImagePrompt } from "./prompts";
export async function render(
config: ProviderConfig,
frame: StoryFrame,
styleGuide: string,
): Promise<string> {
const prompt = buildImagePrompt(frame, styleGuide);
return generateImage(config, prompt, { size: "1024x1536", quality: "medium" });
}
+26
View File
@@ -0,0 +1,26 @@
import { interpretClick } from "@dada/ai-client";
import type { ClickIntent, ProviderConfig, UIElement } from "@dada/types";
import { parseJsonLoose } from "./jsonParser";
import { VISION_SYSTEM_PROMPT, buildVisionUserPrompt } from "./prompts";
export async function interpret(
config: ProviderConfig,
annotatedImageBase64: string,
uiElements: UIElement[],
): Promise<ClickIntent> {
const userPrompt = `${VISION_SYSTEM_PROMPT}\n\n${buildVisionUserPrompt(uiElements)}`;
const raw = await interpretClick(config, annotatedImageBase64, userPrompt);
const parsed = parseJsonLoose<{
targetId?: string | null;
targetLabel?: string | null;
reasoning?: string;
freeformAction?: string;
}>(raw);
return {
targetId: parsed.targetId ?? null,
targetLabel: parsed.targetLabel ?? null,
reasoning: parsed.reasoning ?? "",
freeformAction: parsed.freeformAction || undefined,
};
}
+7
View File
@@ -0,0 +1,7 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"noEmit": true
},
"include": ["src/**/*"]
}