Initial commit: AI-driven visual novel scaffold

- Monorepo (pnpm workspace): apps/web + packages/{types,ai-client,engine} - Next.js 16 web app with three-stage AI orchestration - Three independently configurable providers: text LLM, image generator, vision model - Warm minimalist editorial UI design - One-click Vercel deploy ready Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 13:29:58 +08:00
commit cbd95bbea2
45 changed files with 1855 additions and 0 deletions
@@ -0,0 +1,19 @@
+{
+  "name": "@dada/engine",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "main": "./src/index.ts",
+  "types": "./src/index.ts",
+  "exports": {
+    ".": "./src/index.ts"
+  },
+  "scripts": {
+    "typecheck": "tsc --noEmit"
+  },
+  "dependencies": {
+    "@dada/ai-client": "workspace:*",
+    "@dada/types": "workspace:*",
+    "sharp": "^0.33.5"
+  }
+}
@@ -0,0 +1,30 @@
+import sharp from "sharp";
+
+export async function annotateClick(
+  imageBase64: string,
+  click: { x: number; y: number },
+): Promise<string> {
+  const buf = Buffer.from(imageBase64, "base64");
+  const meta = await sharp(buf).metadata();
+  const w = meta.width ?? 1024;
+  const h = meta.height ?? 1536;
+
+  const cx = Math.round(click.x * w);
+  const cy = Math.round(click.y * h);
+  const r = Math.round(Math.min(w, h) * 0.025);
+  const stroke = Math.max(3, Math.round(r * 0.25));
+
+  const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}">
+    <circle cx="${cx}" cy="${cy}" r="${r}" fill="rgba(255,40,40,0.55)"
+            stroke="rgba(255,255,255,0.95)" stroke-width="${stroke}" />
+    <circle cx="${cx}" cy="${cy}" r="${Math.round(r * 0.25)}"
+            fill="rgba(255,255,255,1)" />
+  </svg>`;
+
+  const out = await sharp(buf)
+    .composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
+    .png()
+    .toBuffer();
+
+  return out.toString("base64");
+}
@@ -0,0 +1,37 @@
+import { chat } from "@dada/ai-client";
+import type { ProviderConfig, Session, StoryFrame, UIElement } from "@dada/types";
+import { parseJsonLoose } from "./jsonParser";
+import { DIRECTOR_SYSTEM, buildDirectorUserMessage } from "./prompts";
+
+type DirectorOutput = {
+  narration?: string;
+  speaker?: string;
+  line?: string;
+  scenePrompt: string;
+  uiElements: UIElement[];
+};
+
+export async function direct(
+  config: ProviderConfig,
+  session: Session,
+): Promise<StoryFrame> {
+  const raw = await chat(
+    config,
+    [
+      { role: "system", content: DIRECTOR_SYSTEM },
+      { role: "user", content: buildDirectorUserMessage(session) },
+    ],
+    { temperature: 0.9, responseFormat: "json_object" },
+  );
+
+  const parsed = parseJsonLoose<DirectorOutput>(raw);
+
+  return {
+    id: `frame_${Date.now()}`,
+    narration: parsed.narration?.trim() || undefined,
+    speaker: parsed.speaker?.trim() || undefined,
+    line: parsed.line?.trim() || undefined,
+    scenePrompt: parsed.scenePrompt,
+    uiElements: parsed.uiElements ?? [],
+  };
+}
@@ -0,0 +1,3 @@
+export { startSession, takeTurn } from "./orchestrator";
+export { annotateClick } from "./annotate";
+export * from "./prompts";
@@ -0,0 +1,27 @@
+export function parseJsonLoose<T>(raw: string): T {
+  const trimmed = raw.trim();
+
+  try {
+    return JSON.parse(trimmed) as T;
+  } catch {
+    // fall through
+  }
+
+  const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
+  if (fenced?.[1]) {
+    try {
+      return JSON.parse(fenced[1]) as T;
+    } catch {
+      // fall through
+    }
+  }
+
+  const first = trimmed.indexOf("{");
+  const last = trimmed.lastIndexOf("}");
+  if (first !== -1 && last > first) {
+    const slice = trimmed.slice(first, last + 1);
+    return JSON.parse(slice) as T;
+  }
+
+  throw new Error(`Failed to parse JSON from model output: ${raw.slice(0, 200)}`);
+}
@@ -0,0 +1,71 @@
+import type {
+  EngineConfig,
+  InteractRequest,
+  InteractResponse,
+  Session,
+  StartRequest,
+  StartResponse,
+} from "@dada/types";
+import { annotateClick } from "./annotate";
+import { direct } from "./director";
+import { render } from "./renderer";
+import { interpret } from "./vision";
+
+function newSessionId(): string {
+  return `s_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
+}
+
+export async function startSession(
+  config: EngineConfig,
+  req: StartRequest,
+): Promise<StartResponse> {
+  const session: Session = {
+    id: newSessionId(),
+    createdAt: Date.now(),
+    worldSetting: req.worldSetting.trim(),
+    styleGuide: req.styleGuide.trim(),
+    history: [],
+  };
+
+  const frame = await direct(config.text, session);
+  const imageBase64 = await render(config.image, frame, session.styleGuide);
+
+  return {
+    sessionId: session.id,
+    frame,
+    imageBase64,
+  };
+}
+
+export async function takeTurn(
+  config: EngineConfig,
+  req: InteractRequest,
+): Promise<InteractResponse> {
+  const annotated = await annotateClick(req.prevImageBase64, req.click);
+
+  const lastFrame = req.session.history.at(-1)?.frame;
+  const uiElements = lastFrame?.uiElements ?? [];
+
+  const intent = await interpret(config.vision, annotated, uiElements);
+
+  const updatedSession: Session = {
+    ...req.session,
+    history: req.session.history.map((entry, idx, arr) =>
+      idx === arr.length - 1 ? { ...entry, click: req.click, intent } : entry,
+    ),
+  };
+
+  const nextFrame = await direct(config.text, updatedSession);
+  const nextImage = await render(
+    config.image,
+    nextFrame,
+    updatedSession.styleGuide,
+  );
+
+  return {
+    session: updatedSession,
+    frame: nextFrame,
+    imageBase64: nextImage,
+    intent,
+  };
+}
@@ -0,0 +1,115 @@
+import type { Session, StoryFrame, UIElement } from "@dada/types";
+
+export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。每次根据世界观、画风和历史，输出当前画面要呈现的内容。
+
+必须输出严格 JSON，结构如下：
+{
+  "narration": "本帧旁白（可空字符串）",
+  "speaker": "本帧说话角色名（可空）",
+  "line": "本帧角色台词（可空）",
+  "scenePrompt": "英文场景描述，给图像模型用，描述画面里看到什么",
+  "uiElements": [
+    { "id": "choice_1", "kind": "choice", "label": "选项一文字（≤15 字）" },
+    { "id": "choice_2", "kind": "choice", "label": "选项二文字（≤15 字）" },
+    { "id": "choice_3", "kind": "choice", "label": "选项三文字（≤15 字）" }
+  ]
+}
+
+规则：
+- narration / line 中文，scenePrompt 英文
+- 默认 3 个 choice 元素，可以根据情境额外加 menu/item/custom（罕见）
+- 选项必须能切实推进剧情，且互不重复
+- scenePrompt 描述当前的画面，不要包括 UI 元素，UI 元素会另外渲染
+- 单帧旁白与台词加起来控制在 80 字以内
+- 不要输出 JSON 以外的任何文本`;
+
+export function buildDirectorUserMessage(session: Session): string {
+  const parts: string[] = [];
+  parts.push(`世界观：${session.worldSetting}`);
+  parts.push(`画风：${session.styleGuide}`);
+
+  if (session.history.length === 0) {
+    parts.push("\n这是故事的开场。请生成开场画面。");
+    return parts.join("\n");
+  }
+
+  parts.push("\n历史：");
+  session.history.forEach((entry, idx) => {
+    const f = entry.frame;
+    const beat: string[] = [`【第 ${idx + 1} 帧】`];
+    if (f.narration) beat.push(`旁白：${f.narration}`);
+    if (f.line) beat.push(`${f.speaker ?? "?"}：${f.line}`);
+    if (entry.intent) {
+      beat.push(
+        `用户行为：${entry.intent.targetLabel ?? entry.intent.freeformAction ?? "未知"}`,
+      );
+    }
+    parts.push(beat.join("\n"));
+  });
+
+  parts.push("\n请生成下一帧。");
+  return parts.join("\n");
+}
+
+export function buildImagePrompt(
+  frame: StoryFrame,
+  styleGuide: string,
+): string {
+  const choiceList = frame.uiElements
+    .filter((e) => e.kind === "choice")
+    .map((e, i) => `${i + 1}. ${e.label}`)
+    .join("\n");
+
+  const extraUI = frame.uiElements
+    .filter((e) => e.kind !== "choice")
+    .map((e) => `- ${e.kind}: ${e.label}`)
+    .join("\n");
+
+  return `Generate a vertical 9:16 visual novel UI screen.
+
+ART STYLE: ${styleGuide}
+(Match this style consistently — for the scene art AND the UI elements.
+For example: anime → traditional galgame dialogue box; cyberpunk → neon HUD;
+stick figure → hand-drawn paper UI; cinematic realism → minimalist film overlay.)
+
+SCENE (occupies the upper portion of the image):
+${frame.scenePrompt}
+
+DIALOGUE PANEL (semi-transparent, lower-middle area):
+${frame.speaker ? `Speaker name displayed prominently: "${frame.speaker}"` : "Narration only — no speaker tag."}
+${frame.line ? `Dialogue text: "${frame.line}"` : ""}
+${frame.narration ? `Narration text (italic if speaker also present): "${frame.narration}"` : ""}
+
+CHOICE PANEL (bottom area, three clearly tappable buttons stacked or arranged):
+${choiceList}
+${extraUI ? `\nADDITIONAL UI ELEMENTS:\n${extraUI}` : ""}
+
+CRITICAL LAYOUT REQUIREMENTS:
+- All text must be perfectly legible (high contrast, readable size)
+- Choice buttons must be clearly distinguishable as interactive elements
+- Choice text must NOT be cropped, NOT overlap with character faces
+- The image is the entire interface — no external chrome will be added
+- Choices appear in the order listed above`;
+}
+
+export const VISION_SYSTEM_PROMPT = `你是视觉理解助手。用户在视觉小说界面上点击了红色圆点位置，你要根据红点位置和图中可见的 UI 元素，判断用户的意图。
+
+必须输出严格 JSON：
+{
+  "targetId": "对应的 UI 元素 id（choice_1 / choice_2 / choice_3 / menu / ...），如果点击的是非 UI 区域则为 null",
+  "targetLabel": "对应 UI 元素的文字描述（如 '告诉她真相'），未知则为 null",
+  "reasoning": "一句话说明判断理由",
+  "freeformAction": "如果用户点的是场景中的物件/角色等非选项区域，描述他可能的意图（如 '想拿起桌上的钥匙'），否则空字符串"
+}
+
+不要输出 JSON 以外的任何文本。`;
+
+export function buildVisionUserPrompt(uiElements: UIElement[]): string {
+  const list = uiElements
+    .map((e) => `- id="${e.id}" kind="${e.kind}" label="${e.label}"`)
+    .join("\n");
+  return `当前画面包含以下已知 UI 元素：
+${list}
+
+红点位置即为用户点击位置。请判断用户的意图。`;
+}
@@ -0,0 +1,12 @@
+import { generateImage } from "@dada/ai-client";
+import type { ProviderConfig, StoryFrame } from "@dada/types";
+import { buildImagePrompt } from "./prompts";
+
+export async function render(
+  config: ProviderConfig,
+  frame: StoryFrame,
+  styleGuide: string,
+): Promise<string> {
+  const prompt = buildImagePrompt(frame, styleGuide);
+  return generateImage(config, prompt, { size: "1024x1536", quality: "medium" });
+}
@@ -0,0 +1,26 @@
+import { interpretClick } from "@dada/ai-client";
+import type { ClickIntent, ProviderConfig, UIElement } from "@dada/types";
+import { parseJsonLoose } from "./jsonParser";
+import { VISION_SYSTEM_PROMPT, buildVisionUserPrompt } from "./prompts";
+
+export async function interpret(
+  config: ProviderConfig,
+  annotatedImageBase64: string,
+  uiElements: UIElement[],
+): Promise<ClickIntent> {
+  const userPrompt = `${VISION_SYSTEM_PROMPT}\n\n${buildVisionUserPrompt(uiElements)}`;
+  const raw = await interpretClick(config, annotatedImageBase64, userPrompt);
+  const parsed = parseJsonLoose<{
+    targetId?: string | null;
+    targetLabel?: string | null;
+    reasoning?: string;
+    freeformAction?: string;
+  }>(raw);
+
+  return {
+    targetId: parsed.targetId ?? null,
+    targetLabel: parsed.targetLabel ?? null,
+    reasoning: parsed.reasoning ?? "",
+    freeformAction: parsed.freeformAction || undefined,
+  };
+}
@@ -0,0 +1,7 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "noEmit": true
+  },
+  "include": ["src/**/*"]
+}