diff --git a/apps/web/app/play/page.tsx b/apps/web/app/play/page.tsx index 4db467f..715004b 100644 --- a/apps/web/app/play/page.tsx +++ b/apps/web/app/play/page.tsx @@ -131,6 +131,7 @@ function PlayInner() { worldSetting: finalPayload.worldSetting, styleGuide: finalPayload.styleGuide, history: [{ frame: data.frame }], + characters: [], }); setFrame(data.frame); setImageBase64(data.imageBase64); @@ -183,6 +184,82 @@ function PlayInner() { }; }, [frame?.id, session?.id]); + // ── Shared result applier ──────────────────────────────────────────── + async function applyInteractResult( + resultPromise: Promise, + clickIntent: ClickIntent, + click?: { x: number; y: number }, + ) { + const result = await resultPromise; + // Overwrite synthetic prefetch intent with the real click intent + const lastIdx = result.session.history.length - 1; + const patched: InteractResponse = { + ...result, + intent: clickIntent, + session: { + ...result.session, + history: result.session.history.map((entry, idx) => + idx === lastIdx ? { ...entry, click, intent: clickIntent } : entry, + ), + }, + }; + const updatedHistory = [ + ...patched.session.history, + { frame: patched.frame }, + ]; + setSession({ ...patched.session, history: updatedHistory }); + setFrame(patched.frame); + setImageBase64(patched.imageBase64); + setIntent(clickIntent); + setPendingClick(null); + setTurnNum((t) => t + 1); + setPhase("ready"); + } + + // ── HTML button click — bypasses Vision entirely ────────────────────── + async function handleChoiceSelect(choiceId: string, label: string) { + if (phase !== "ready" || !session) return; + setPhase("interacting"); + setIntent(null); + + const clickIntent: ClickIntent = { + targetId: choiceId, + targetLabel: label, + reasoning: "direct-button-click", + }; + + const cacheSnapshot = prefetchRef.current; + const cached = cacheSnapshot[choiceId]; + + try { + if (cached) { + // Cache hit — zero extra wait + await applyInteractResult(cached, clickIntent); + } else { + // Cache miss — call interact directly (no Vision roundtrip) + prefetchAbortRef.current?.abort(); + const res = await fetch("/api/interact", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ session, intent: clickIntent }), + }); + if (!res.ok) { + const j = (await res.json().catch(() => ({}))) as { error?: string }; + throw new Error(j.error ?? res.statusText); + } + await applyInteractResult( + res.json() as Promise, + clickIntent, + ); + } + } catch (e) { + setError(String(e)); + setPendingClick(null); + setPhase("ready"); + } + } + + // ── Background / free-form click — still uses Vision ───────────────── async function handleClick(click: { x: number; y: number }) { if (phase !== "ready" || !session || !imageBase64) return; setPhase("interacting"); @@ -192,15 +269,10 @@ function PlayInner() { const cacheSnapshot = prefetchRef.current; try { - // Step 1: Vision (~4s) — figure out what the user actually clicked const visionRes = await fetch("/api/vision", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - session, - prevImageBase64: imageBase64, - click, - }), + body: JSON.stringify({ session, prevImageBase64: imageBase64, click }), }); if (!visionRes.ok) { const j = (await visionRes.json().catch(() => ({}))) as { @@ -211,31 +283,13 @@ function PlayInner() { const { intent: clickIntent } = (await visionRes.json()) as VisionResponse; - // Step 2: Cache lookup const cached = clickIntent.targetId ? cacheSnapshot[clickIntent.targetId] : undefined; - let result: InteractResponse; if (cached) { - // Cache hit — await the prefetched promise (mostly already resolved) - result = await cached; - // Overwrite the synthetic prefetch intent on history with the real one - const lastIdx = result.session.history.length - 1; - result = { - ...result, - intent: clickIntent, - session: { - ...result.session, - history: result.session.history.map((entry, idx) => - idx === lastIdx - ? { ...entry, click, intent: clickIntent } - : entry, - ), - }, - }; + await applyInteractResult(cached, clickIntent, click); } else { - // Cache miss (free-form click) — abort wasted prefetches, run live prefetchAbortRef.current?.abort(); const liveRes = await fetch("/api/interact", { method: "POST", @@ -248,18 +302,12 @@ function PlayInner() { }; throw new Error(j.error ?? liveRes.statusText); } - result = (await liveRes.json()) as InteractResponse; + await applyInteractResult( + liveRes.json() as Promise, + clickIntent, + click, + ); } - - // Apply the result: append new frame to history - const updatedHistory = [...result.session.history, { frame: result.frame }]; - setSession({ ...result.session, history: updatedHistory }); - setFrame(result.frame); - setImageBase64(result.imageBase64); - setIntent(clickIntent); - setPendingClick(null); - setTurnNum((t) => t + 1); - setPhase("ready"); } catch (e) { setError(String(e)); setPendingClick(null); @@ -295,8 +343,10 @@ function PlayInner() { @@ -326,37 +376,22 @@ function PlayInner() { -
+
{phase === "loading-first" && (

正 · 在 · 唤 · 起 · 第 · 一 · 帧

)} - {phase === "interacting" && ( -
-

- AI · 正 · 在 · 描 · 画 · 下 · 一 · 刻 -

-

- 预取选项秒级响应 · 自由点击稍候 -

-
- )} {phase === "ready" && intent?.targetLabel && ( -

- - 上 · 一 · 步 · - - {intent.targetLabel} -

- )} - {phase === "ready" && !intent && turnNum > 0 && ( -

- 点 · 击 · 任 · 意 · 处 · 回 · 应 +

+ 上 · 一 · 步 · + {intent.targetLabel}

)}
diff --git a/apps/web/components/PlayCanvas.tsx b/apps/web/components/PlayCanvas.tsx index 0d04d3c..19405e6 100644 --- a/apps/web/components/PlayCanvas.tsx +++ b/apps/web/components/PlayCanvas.tsx @@ -1,28 +1,124 @@ "use client"; -import { useRef, useState } from "react"; +import { useEffect, useRef, useState } from "react"; +import type { StoryFrame } from "@yume/types"; export type Phase = "loading-first" | "ready" | "interacting"; const SHADOW = "0 1px 0 rgba(45,24,16,0.05), 0 36px 64px -28px rgba(45,24,16,0.25), 0 8px 18px -6px rgba(45,24,16,0.10)"; +// ── Typewriter hook ──────────────────────────────────────────────────── +function useTypewriter(text: string, speed = 28): string { + const [displayed, setDisplayed] = useState(""); + const textRef = useRef(text); + + useEffect(() => { + // Reset immediately when the text changes + setDisplayed(""); + textRef.current = text; + if (!text) return; + + let i = 0; + const id = setInterval(() => { + i += 1; + setDisplayed(text.slice(0, i)); + if (i >= text.length) clearInterval(id); + }, speed); + return () => clearInterval(id); + }, [text, speed]); + + return displayed; +} + +// ── Choice button ────────────────────────────────────────────────────── +function ChoiceButton({ + index, + label, + disabled, + onClick, +}: { + index: number; + label: string; + disabled: boolean; + onClick: () => void; +}) { + return ( + + ); +} + +// ── Main component ───────────────────────────────────────────────────── export function PlayCanvas({ imageBase64, phase, + frame, pendingClick, onClick, + onSelectChoice, fullViewport = false, }: { imageBase64: string | null; phase: Phase; + frame: StoryFrame | null; pendingClick: { x: number; y: number } | null; onClick: (click: { x: number; y: number }) => void; + onSelectChoice?: (choiceId: string, label: string) => void; fullViewport?: boolean; }) { const imgRef = useRef(null); const [dims, setDims] = useState<{ w: number; h: number } | null>(null); + const choices = frame?.uiElements.filter((e) => e.kind === "choice") ?? []; + const dialogueText = frame + ? [frame.speaker ? `${frame.speaker}:${frame.line ?? ""}` : frame.line, frame.narration] + .filter(Boolean) + .join("\n") + : ""; + const narrationOnly = !frame?.speaker && !frame?.line && !!frame?.narration; + const displayBody = frame?.speaker + ? frame.line ?? "" + : frame?.narration ?? ""; + + const typedBody = useTypewriter(displayBody, 30); + function handleClick(e: React.MouseEvent) { if (phase !== "ready" || !imgRef.current) return; const rect = imgRef.current.getBoundingClientRect(); @@ -37,16 +133,13 @@ export function PlayCanvas({ const interactive = phase === "ready" && !!imageBase64; const dimmed = phase === "interacting"; - // 16:9 sizing — letterbox into available viewport const sizeStyle = fullViewport ? { maxWidth: "100vw", maxHeight: "100dvh" } - : { maxWidth: "96vw", maxHeight: "calc(100dvh - 280px)" }; + : { maxWidth: "96vw", maxHeight: "calc(100dvh - 200px)" }; - // Placeholder needs an explicit width for aspect-video to compute height. - // Pick the largest 16:9 box that fits in the available viewport. const placeholderWidth = fullViewport ? "min(100vw, calc(100dvh * 16 / 9))" - : "min(96vw, calc((100dvh - 280px) * 16 / 9))"; + : "min(96vw, calc((100dvh - 200px) * 16 / 9))"; return (
+ {/* ── Background image ── */} + {/* ── Top/bottom gradient vignette ── */} {!fullViewport && ( <>
-
)} + {/* ══════════════════════════════════════════════════════════ + PREFAB UI OVERLAY — rendered on top of image + ══════════════════════════════════════════════════════════ */} + {frame && ( +
+ {/* ── Choices row ── */} + {choices.length > 0 && ( +
+ {choices.map((choice, i) => ( + onSelectChoice?.(choice.id, choice.label)} + /> + ))} +
+ )} + + {/* ── Dialogue / narration box ── */} + {(frame.narration || frame.line) && ( +
+ {/* Inner golden corner decoration */} + + ✦ + + + ✦ + + + {/* Speaker name tag */} + {frame.speaker && ( +

+ {frame.speaker} +

+ )} + + {/* Main text */} +

+ {typedBody} + {/* Narration only — also show secondary line */} + {frame.speaker && frame.narration && ( + + {frame.narration} + + )} +

+ + {/* Scroll hint ▼ */} + + ▼ + +
+ )} +
+ )} + + {/* Loading/interacting dim overlay */} + {phase === "interacting" && ( +
+

+ AI · 正 · 在 · 描 · 画 · 下 · 一 · 刻 +

+
+ )} + + {/* Click ripple indicator */} {pendingClick && ( <>
- {phase === "ready" ? "任 · 意 · 点 · 击" : "···"} + {phase === "ready" ? (choices.length > 0 ? "选 · 择 · 一 · 项" : "任 · 意 · 点 · 击") : "···"}
)} diff --git a/packages/engine/src/prompts.ts b/packages/engine/src/prompts.ts index 4c2be55..d2594d0 100644 --- a/packages/engine/src/prompts.ts +++ b/packages/engine/src/prompts.ts @@ -1,4 +1,5 @@ -import type { Session, StoryFrame, UIElement } from "@yume/types"; +import type { Character, Session, StoryFrame, UIElement } from "@yume/types"; + export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。每次根据世界观、画风和历史,输出当前画面要呈现的内容。 @@ -19,7 +20,7 @@ export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。 - narration / line 中文,scenePrompt 英文 - 默认 3 个 choice 元素,可以根据情境额外加 menu/item/custom(罕见) - 选项必须能切实推进剧情,且互不重复 -- scenePrompt 描述当前的画面,不要包括 UI 元素,UI 元素会另外渲染 +- scenePrompt 描述当前的画面,不要包括 UI 元素 - 单帧旁白与台词加起来控制在 80 字以内 - 不要输出 JSON 以外的任何文本`; @@ -55,44 +56,25 @@ export function buildImagePrompt( frame: StoryFrame, styleGuide: string, ): string { - const choiceList = frame.uiElements - .filter((e) => e.kind === "choice") - .map((e, i) => `${i + 1}. ${e.label}`) - .join("\n"); - - const extraUI = frame.uiElements - .filter((e) => e.kind !== "choice") - .map((e) => `- ${e.kind}: ${e.label}`) - .join("\n"); - - return `Generate a landscape 16:9 cinematic visual novel UI screen, widescreen format (1792x1024 or equivalent). + return `Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024). ART STYLE: ${styleGuide} -(Match this style consistently — for the scene art AND the UI elements. -For example: anime → traditional galgame dialogue box; cyberpunk → neon HUD; -stick figure → hand-drawn paper UI; cinematic realism → minimalist film overlay.) -SCENE (fills the entire 16:9 canvas as a cinematic widescreen background): +SCENE (fill the ENTIRE canvas — no UI elements, no text overlays): ${frame.scenePrompt} -DIALOGUE PANEL (cinematic bottom band, semi-transparent, spans full width, occupies the lower ~25% of the frame): -${frame.speaker ? `Speaker name displayed prominently above the dialogue text: "${frame.speaker}"` : "Narration only — no speaker tag."} -${frame.line ? `Dialogue text: "${frame.line}"` : ""} -${frame.narration ? `Narration text (italic if speaker also present): "${frame.narration}"` : ""} - -CHOICE PANEL (three clearly tappable buttons, arranged HORIZONTALLY in a row across the lower-third of the frame, ABOVE or overlaid on the dialogue band; equally sized; centered in the safe zone of the 16:9 canvas): -${choiceList} -${extraUI ? `\nADDITIONAL UI ELEMENTS:\n${extraUI}` : ""} - -CRITICAL LAYOUT REQUIREMENTS: -- 16:9 LANDSCAPE orientation — wider than tall. Do NOT produce a portrait/square image. -- All text and buttons must be inside the central safe zone (avoid the outer 8% on every side), so the viewport can letterbox without cropping any UI. -- All text must be perfectly legible (high contrast, readable size). -- Choice buttons must be clearly distinguishable as interactive elements, arranged horizontally left-to-right in the order listed above. -- Choice text must NOT be cropped, NOT overlap with character faces or the dialogue panel. -- The image is the entire interface — no external chrome will be added.`; +STRICT RULES — NEVER violate these: +- DO NOT draw any dialogue boxes, speech bubbles, text panels, or any rectangular overlay. +- DO NOT draw any buttons, choice options, menu items, or interactive UI elements. +- DO NOT render any Chinese or English text anywhere in the image. +- DO NOT add any HUD, interface chrome, or game UI elements. +- The image is a PURE BACKGROUND SCENE ONLY. All UI will be added as HTML on top. +- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output. +- Leave the bottom 35% of the frame relatively uncluttered (darker or softer) so overlaid UI panels remain readable. +- Characters or key scene elements should be positioned in the upper 65% of the frame.`; } + export const VISION_SYSTEM_PROMPT = `你是视觉理解助手。用户在视觉小说界面上点击了红色圆点位置,你要根据红点位置和图中可见的 UI 元素,判断用户的意图。 必须输出严格 JSON: