diff --git a/apps/web/app/play/page.tsx b/apps/web/app/play/page.tsx
index 4db467f..715004b 100644
--- a/apps/web/app/play/page.tsx
+++ b/apps/web/app/play/page.tsx
@@ -131,6 +131,7 @@ function PlayInner() {
           worldSetting: finalPayload.worldSetting,
           styleGuide: finalPayload.styleGuide,
           history: [{ frame: data.frame }],
+          characters: [],
         });
         setFrame(data.frame);
         setImageBase64(data.imageBase64);
@@ -183,6 +184,82 @@ function PlayInner() {
     };
   }, [frame?.id, session?.id]);
 
+  // ── Shared result applier ────────────────────────────────────────────
+  async function applyInteractResult(
+    resultPromise: Promise<InteractResponse>,
+    clickIntent: ClickIntent,
+    click?: { x: number; y: number },
+  ) {
+    const result = await resultPromise;
+    // Overwrite synthetic prefetch intent with the real click intent
+    const lastIdx = result.session.history.length - 1;
+    const patched: InteractResponse = {
+      ...result,
+      intent: clickIntent,
+      session: {
+        ...result.session,
+        history: result.session.history.map((entry, idx) =>
+          idx === lastIdx ? { ...entry, click, intent: clickIntent } : entry,
+        ),
+      },
+    };
+    const updatedHistory = [
+      ...patched.session.history,
+      { frame: patched.frame },
+    ];
+    setSession({ ...patched.session, history: updatedHistory });
+    setFrame(patched.frame);
+    setImageBase64(patched.imageBase64);
+    setIntent(clickIntent);
+    setPendingClick(null);
+    setTurnNum((t) => t + 1);
+    setPhase("ready");
+  }
+
+  // ── HTML button click — bypasses Vision entirely ──────────────────────
+  async function handleChoiceSelect(choiceId: string, label: string) {
+    if (phase !== "ready" || !session) return;
+    setPhase("interacting");
+    setIntent(null);
+
+    const clickIntent: ClickIntent = {
+      targetId: choiceId,
+      targetLabel: label,
+      reasoning: "direct-button-click",
+    };
+
+    const cacheSnapshot = prefetchRef.current;
+    const cached = cacheSnapshot[choiceId];
+
+    try {
+      if (cached) {
+        // Cache hit — zero extra wait
+        await applyInteractResult(cached, clickIntent);
+      } else {
+        // Cache miss — call interact directly (no Vision roundtrip)
+        prefetchAbortRef.current?.abort();
+        const res = await fetch("/api/interact", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ session, intent: clickIntent }),
+        });
+        if (!res.ok) {
+          const j = (await res.json().catch(() => ({}))) as { error?: string };
+          throw new Error(j.error ?? res.statusText);
+        }
+        await applyInteractResult(
+          res.json() as Promise<InteractResponse>,
+          clickIntent,
+        );
+      }
+    } catch (e) {
+      setError(String(e));
+      setPendingClick(null);
+      setPhase("ready");
+    }
+  }
+
+  // ── Background / free-form click — still uses Vision ─────────────────
   async function handleClick(click: { x: number; y: number }) {
     if (phase !== "ready" || !session || !imageBase64) return;
     setPhase("interacting");
@@ -192,15 +269,10 @@ function PlayInner() {
     const cacheSnapshot = prefetchRef.current;
 
     try {
-      // Step 1: Vision (~4s) — figure out what the user actually clicked
       const visionRes = await fetch("/api/vision", {
         method: "POST",
         headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({
-          session,
-          prevImageBase64: imageBase64,
-          click,
-        }),
+        body: JSON.stringify({ session, prevImageBase64: imageBase64, click }),
       });
       if (!visionRes.ok) {
         const j = (await visionRes.json().catch(() => ({}))) as {
@@ -211,31 +283,13 @@ function PlayInner() {
       const { intent: clickIntent } =
         (await visionRes.json()) as VisionResponse;
 
-      // Step 2: Cache lookup
       const cached = clickIntent.targetId
         ? cacheSnapshot[clickIntent.targetId]
         : undefined;
 
-      let result: InteractResponse;
       if (cached) {
-        // Cache hit — await the prefetched promise (mostly already resolved)
-        result = await cached;
-        // Overwrite the synthetic prefetch intent on history with the real one
-        const lastIdx = result.session.history.length - 1;
-        result = {
-          ...result,
-          intent: clickIntent,
-          session: {
-            ...result.session,
-            history: result.session.history.map((entry, idx) =>
-              idx === lastIdx
-                ? { ...entry, click, intent: clickIntent }
-                : entry,
-            ),
-          },
-        };
+        await applyInteractResult(cached, clickIntent, click);
       } else {
-        // Cache miss (free-form click) — abort wasted prefetches, run live
         prefetchAbortRef.current?.abort();
         const liveRes = await fetch("/api/interact", {
           method: "POST",
@@ -248,18 +302,12 @@ function PlayInner() {
           };
           throw new Error(j.error ?? liveRes.statusText);
         }
-        result = (await liveRes.json()) as InteractResponse;
+        await applyInteractResult(
+          liveRes.json() as Promise<InteractResponse>,
+          clickIntent,
+          click,
+        );
       }
-
-      // Apply the result: append new frame to history
-      const updatedHistory = [...result.session.history, { frame: result.frame }];
-      setSession({ ...result.session, history: updatedHistory });
-      setFrame(result.frame);
-      setImageBase64(result.imageBase64);
-      setIntent(clickIntent);
-      setPendingClick(null);
-      setTurnNum((t) => t + 1);
-      setPhase("ready");
     } catch (e) {
       setError(String(e));
       setPendingClick(null);
@@ -295,8 +343,10 @@ function PlayInner() {
         <PlayCanvas
           imageBase64={imageBase64}
           phase={phase}
+          frame={frame}
           pendingClick={pendingClick}
           onClick={handleClick}
+          onSelectChoice={handleChoiceSelect}
           fullViewport
         />
       </div>
@@ -326,37 +376,22 @@ function PlayInner() {
         <PlayCanvas
           imageBase64={imageBase64}
           phase={phase}
+          frame={frame}
           pendingClick={pendingClick}
           onClick={handleClick}
+          onSelectChoice={handleChoiceSelect}
         />
 
-        <div className="mt-7 md:mt-9 max-w-md w-full text-center min-h-[64px] flex items-center justify-center">
+        <div className="mt-4 max-w-md w-full text-center min-h-[28px] flex items-center justify-center">
           {phase === "loading-first" && (
             <p className="text-[10px] smallcaps text-clay-500 animate-slow-pulse">
               正 · 在 · 唤 · 起 · 第 · 一 · 帧
             </p>
           )}
-          {phase === "interacting" && (
-            <div className="flex flex-col items-center gap-2 animate-fade-in">
-              <p className="text-[10px] smallcaps text-clay-500 animate-slow-pulse">
-                AI · 正 · 在 · 描 · 画 · 下 · 一 · 刻
-              </p>
-              <p className="font-serif italic text-clay-400 text-xs">
-                预取选项秒级响应 · 自由点击稍候
-              </p>
-            </div>
-          )}
           {phase === "ready" && intent?.targetLabel && (
-            <p className="font-serif italic text-clay-500 text-base leading-relaxed animate-fade-in max-w-[320px]">
-              <span className="text-[9px] smallcaps not-italic text-clay-400 mr-2 align-middle">
-                上 · 一 · 步 ·
-              </span>
-              <span className="align-middle">{intent.targetLabel}</span>
-            </p>
-          )}
-          {phase === "ready" && !intent && turnNum > 0 && (
-            <p className="text-[10px] smallcaps text-clay-400 animate-fade-in">
-              点 · 击 · 任 · 意 · 处 · 回 · 应
+            <p className="text-[9px] smallcaps text-clay-400 animate-fade-in">
+              <span className="mr-2">上 · 一 · 步 ·</span>
+              <span className="text-clay-600">{intent.targetLabel}</span>
             </p>
           )}
         </div>
diff --git a/apps/web/components/PlayCanvas.tsx b/apps/web/components/PlayCanvas.tsx
index 0d04d3c..19405e6 100644
--- a/apps/web/components/PlayCanvas.tsx
+++ b/apps/web/components/PlayCanvas.tsx
@@ -1,28 +1,124 @@
 "use client";
 
-import { useRef, useState } from "react";
+import { useEffect, useRef, useState } from "react";
+import type { StoryFrame } from "@yume/types";
 
 export type Phase = "loading-first" | "ready" | "interacting";
 
 const SHADOW =
   "0 1px 0 rgba(45,24,16,0.05), 0 36px 64px -28px rgba(45,24,16,0.25), 0 8px 18px -6px rgba(45,24,16,0.10)";
 
+// ── Typewriter hook ────────────────────────────────────────────────────
+function useTypewriter(text: string, speed = 28): string {
+  const [displayed, setDisplayed] = useState("");
+  const textRef = useRef(text);
+
+  useEffect(() => {
+    // Reset immediately when the text changes
+    setDisplayed("");
+    textRef.current = text;
+    if (!text) return;
+
+    let i = 0;
+    const id = setInterval(() => {
+      i += 1;
+      setDisplayed(text.slice(0, i));
+      if (i >= text.length) clearInterval(id);
+    }, speed);
+    return () => clearInterval(id);
+  }, [text, speed]);
+
+  return displayed;
+}
+
+// ── Choice button ──────────────────────────────────────────────────────
+function ChoiceButton({
+  index,
+  label,
+  disabled,
+  onClick,
+}: {
+  index: number;
+  label: string;
+  disabled: boolean;
+  onClick: () => void;
+}) {
+  return (
+    <button
+      type="button"
+      disabled={disabled}
+      onClick={onClick}
+      className="group relative flex-1 min-w-0 px-4 py-3 text-left transition-all duration-200
+        disabled:opacity-50 disabled:cursor-wait"
+      style={{
+        background: "rgba(20, 14, 8, 0.68)",
+        border: "1.5px solid rgba(180, 140, 80, 0.65)",
+        borderRadius: "6px",
+        backdropFilter: "blur(8px)",
+        WebkitBackdropFilter: "blur(8px)",
+        boxShadow: "0 2px 12px rgba(0,0,0,0.4), inset 0 1px 0 rgba(200,165,90,0.12)",
+      }}
+    >
+      {/* Hover shimmer overlay */}
+      <span
+        className="absolute inset-0 rounded-[5px] opacity-0 group-hover:opacity-100 transition-opacity duration-200 pointer-events-none"
+        style={{
+          background: "rgba(180,140,60,0.10)",
+          border: "1.5px solid rgba(200,165,90,0.85)",
+        }}
+      />
+      <span className="relative flex items-baseline gap-2">
+        <span
+          className="shrink-0 font-serif text-[11px] num"
+          style={{ color: "rgba(195,155,75,0.9)" }}
+        >
+          {index + 1}.
+        </span>
+        <span
+          className="font-serif text-[13px] md:text-[14px] leading-snug"
+          style={{ color: "rgba(245,235,210,0.95)" }}
+        >
+          {label}
+        </span>
+      </span>
+    </button>
+  );
+}
+
+// ── Main component ─────────────────────────────────────────────────────
 export function PlayCanvas({
   imageBase64,
   phase,
+  frame,
   pendingClick,
   onClick,
+  onSelectChoice,
   fullViewport = false,
 }: {
   imageBase64: string | null;
   phase: Phase;
+  frame: StoryFrame | null;
   pendingClick: { x: number; y: number } | null;
   onClick: (click: { x: number; y: number }) => void;
+  onSelectChoice?: (choiceId: string, label: string) => void;
   fullViewport?: boolean;
 }) {
   const imgRef = useRef<HTMLImageElement>(null);
   const [dims, setDims] = useState<{ w: number; h: number } | null>(null);
 
+  const choices = frame?.uiElements.filter((e) => e.kind === "choice") ?? [];
+  const dialogueText = frame
+    ? [frame.speaker ? `${frame.speaker}：${frame.line ?? ""}` : frame.line, frame.narration]
+        .filter(Boolean)
+        .join("\n")
+    : "";
+  const narrationOnly = !frame?.speaker && !frame?.line && !!frame?.narration;
+  const displayBody = frame?.speaker
+    ? frame.line ?? ""
+    : frame?.narration ?? "";
+
+  const typedBody = useTypewriter(displayBody, 30);
+
   function handleClick(e: React.MouseEvent<HTMLImageElement>) {
     if (phase !== "ready" || !imgRef.current) return;
     const rect = imgRef.current.getBoundingClientRect();
@@ -37,16 +133,13 @@ export function PlayCanvas({
   const interactive = phase === "ready" && !!imageBase64;
   const dimmed = phase === "interacting";
 
-  // 16:9 sizing — letterbox into available viewport
   const sizeStyle = fullViewport
     ? { maxWidth: "100vw", maxHeight: "100dvh" }
-    : { maxWidth: "96vw", maxHeight: "calc(100dvh - 280px)" };
+    : { maxWidth: "96vw", maxHeight: "calc(100dvh - 200px)" };
 
-  // Placeholder needs an explicit width for aspect-video to compute height.
-  // Pick the largest 16:9 box that fits in the available viewport.
   const placeholderWidth = fullViewport
     ? "min(100vw, calc(100dvh * 16 / 9))"
-    : "min(96vw, calc((100dvh - 280px) * 16 / 9))";
+    : "min(96vw, calc((100dvh - 200px) * 16 / 9))";
 
   return (
     <div
@@ -57,6 +150,7 @@ export function PlayCanvas({
           className="relative inline-block"
           style={{ boxShadow: fullViewport ? "none" : SHADOW }}
         >
+          {/* ── Background image ── */}
           <img
             key={imageBase64.slice(-48)}
             ref={imgRef}
@@ -68,17 +162,121 @@ export function PlayCanvas({
               setDims({ w: img.naturalWidth, h: img.naturalHeight });
             }}
             draggable={false}
-            className={`block w-auto h-auto select-none animate-fade-in transition-opacity duration-700 ease-out ${interactive ? "cursor-pointer" : "cursor-wait"} ${dimmed ? "opacity-30" : "opacity-100"}`}
+            className={`block w-auto h-auto select-none animate-fade-in transition-opacity duration-700 ease-out ${
+              interactive ? "cursor-pointer" : "cursor-wait"
+            } ${dimmed ? "opacity-40" : "opacity-100"}`}
             style={sizeStyle}
           />
 
+          {/* ── Top/bottom gradient vignette ── */}
           {!fullViewport && (
             <>
               <div className="absolute inset-x-0 top-0 h-10 bg-gradient-to-b from-clay-900/12 to-transparent pointer-events-none" />
-              <div className="absolute inset-x-0 bottom-0 h-10 bg-gradient-to-t from-clay-900/12 to-transparent pointer-events-none" />
             </>
           )}
 
+          {/* ══════════════════════════════════════════════════════════
+              PREFAB UI OVERLAY — rendered on top of image
+          ══════════════════════════════════════════════════════════ */}
+          {frame && (
+            <div className="absolute inset-0 flex flex-col justify-end pointer-events-none select-none">
+              {/* ── Choices row ── */}
+              {choices.length > 0 && (
+                <div
+                  className="pointer-events-auto px-[3%] pb-[1.5%] flex gap-[1.5%] items-stretch"
+                >
+                  {choices.map((choice, i) => (
+                    <ChoiceButton
+                      key={choice.id}
+                      index={i}
+                      label={choice.label}
+                      disabled={phase !== "ready"}
+                      onClick={() => onSelectChoice?.(choice.id, choice.label)}
+                    />
+                  ))}
+                </div>
+              )}
+
+              {/* ── Dialogue / narration box ── */}
+              {(frame.narration || frame.line) && (
+                <div
+                  className="pointer-events-none mx-[2%] mb-[2%] px-[3%] py-[2.2%] relative"
+                  style={{
+                    background: "rgba(14, 10, 6, 0.72)",
+                    border: "1.5px solid rgba(175, 138, 72, 0.60)",
+                    borderRadius: "6px",
+                    backdropFilter: "blur(10px)",
+                    WebkitBackdropFilter: "blur(10px)",
+                    boxShadow:
+                      "0 4px 24px rgba(0,0,0,0.55), inset 0 1px 0 rgba(200,165,90,0.10)",
+                  }}
+                >
+                  {/* Inner golden corner decoration */}
+                  <span
+                    className="absolute top-[6px] left-[8px] text-[10px] opacity-40 pointer-events-none"
+                    style={{ color: "rgba(195,155,75,1)" }}
+                    aria-hidden
+                  >
+                    ✦
+                  </span>
+                  <span
+                    className="absolute top-[6px] right-[8px] text-[10px] opacity-40 pointer-events-none"
+                    style={{ color: "rgba(195,155,75,1)" }}
+                    aria-hidden
+                  >
+                    ✦
+                  </span>
+
+                  {/* Speaker name tag */}
+                  {frame.speaker && (
+                    <p
+                      className="font-serif text-[11px] md:text-[12px] smallcaps mb-[0.6em]"
+                      style={{ color: "rgba(205,165,90,0.92)" }}
+                    >
+                      {frame.speaker}
+                    </p>
+                  )}
+
+                  {/* Main text */}
+                  <p
+                    className="font-serif leading-[1.85] text-[13px] md:text-[15px]"
+                    style={{ color: "rgba(245,235,210,0.95)" }}
+                  >
+                    {typedBody}
+                    {/* Narration only — also show secondary line */}
+                    {frame.speaker && frame.narration && (
+                      <span
+                        className="block mt-[0.5em] italic text-[12px] md:text-[13px]"
+                        style={{ color: "rgba(200,185,155,0.78)" }}
+                      >
+                        {frame.narration}
+                      </span>
+                    )}
+                  </p>
+
+                  {/* Scroll hint ▼ */}
+                  <span
+                    className="absolute bottom-[6px] right-[10px] text-[10px] animate-slow-pulse"
+                    style={{ color: "rgba(195,155,75,0.7)" }}
+                    aria-hidden
+                  >
+                    ▼
+                  </span>
+                </div>
+              )}
+            </div>
+          )}
+
+          {/* Loading/interacting dim overlay */}
+          {phase === "interacting" && (
+            <div className="absolute inset-0 flex items-center justify-center pointer-events-none">
+              <p className="text-[10px] smallcaps text-cream-50/70 animate-slow-pulse">
+                AI · 正 · 在 · 描 · 画 · 下 · 一 · 刻
+              </p>
+            </div>
+          )}
+
+          {/* Click ripple indicator */}
           {pendingClick && (
             <>
               <div
@@ -133,7 +331,7 @@ export function PlayCanvas({
             {dims ? `${dims.w} × ${dims.h} · png` : "—"}
           </span>
           <span className="text-[9px] smallcaps text-clay-400">
-            {phase === "ready" ? "任 · 意 · 点 · 击" : "···"}
+            {phase === "ready" ? (choices.length > 0 ? "选 · 择 · 一 · 项" : "任 · 意 · 点 · 击") : "···"}
           </span>
         </div>
       )}
diff --git a/packages/engine/src/prompts.ts b/packages/engine/src/prompts.ts
index 4c2be55..d2594d0 100644
--- a/packages/engine/src/prompts.ts
+++ b/packages/engine/src/prompts.ts
@@ -1,4 +1,5 @@
-import type { Session, StoryFrame, UIElement } from "@yume/types";
+import type { Character, Session, StoryFrame, UIElement } from "@yume/types";
+
 
 export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。每次根据世界观、画风和历史，输出当前画面要呈现的内容。
 
@@ -19,7 +20,7 @@ export const DIRECTOR_SYSTEM = `你是一个交互视觉小说的编剧导演。
 - narration / line 中文，scenePrompt 英文
 - 默认 3 个 choice 元素，可以根据情境额外加 menu/item/custom（罕见）
 - 选项必须能切实推进剧情，且互不重复
-- scenePrompt 描述当前的画面，不要包括 UI 元素，UI 元素会另外渲染
+- scenePrompt 描述当前的画面，不要包括 UI 元素
 - 单帧旁白与台词加起来控制在 80 字以内
 - 不要输出 JSON 以外的任何文本`;
 
@@ -55,44 +56,25 @@ export function buildImagePrompt(
   frame: StoryFrame,
   styleGuide: string,
 ): string {
-  const choiceList = frame.uiElements
-    .filter((e) => e.kind === "choice")
-    .map((e, i) => `${i + 1}. ${e.label}`)
-    .join("\n");
-
-  const extraUI = frame.uiElements
-    .filter((e) => e.kind !== "choice")
-    .map((e) => `- ${e.kind}: ${e.label}`)
-    .join("\n");
-
-  return `Generate a landscape 16:9 cinematic visual novel UI screen, widescreen format (1792x1024 or equivalent).
+  return `Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024).
 
 ART STYLE: ${styleGuide}
-(Match this style consistently — for the scene art AND the UI elements.
-For example: anime → traditional galgame dialogue box; cyberpunk → neon HUD;
-stick figure → hand-drawn paper UI; cinematic realism → minimalist film overlay.)
 
-SCENE (fills the entire 16:9 canvas as a cinematic widescreen background):
+SCENE (fill the ENTIRE canvas — no UI elements, no text overlays):
 ${frame.scenePrompt}
 
-DIALOGUE PANEL (cinematic bottom band, semi-transparent, spans full width, occupies the lower ~25% of the frame):
-${frame.speaker ? `Speaker name displayed prominently above the dialogue text: "${frame.speaker}"` : "Narration only — no speaker tag."}
-${frame.line ? `Dialogue text: "${frame.line}"` : ""}
-${frame.narration ? `Narration text (italic if speaker also present): "${frame.narration}"` : ""}
-
-CHOICE PANEL (three clearly tappable buttons, arranged HORIZONTALLY in a row across the lower-third of the frame, ABOVE or overlaid on the dialogue band; equally sized; centered in the safe zone of the 16:9 canvas):
-${choiceList}
-${extraUI ? `\nADDITIONAL UI ELEMENTS:\n${extraUI}` : ""}
-
-CRITICAL LAYOUT REQUIREMENTS:
-- 16:9 LANDSCAPE orientation — wider than tall. Do NOT produce a portrait/square image.
-- All text and buttons must be inside the central safe zone (avoid the outer 8% on every side), so the viewport can letterbox without cropping any UI.
-- All text must be perfectly legible (high contrast, readable size).
-- Choice buttons must be clearly distinguishable as interactive elements, arranged horizontally left-to-right in the order listed above.
-- Choice text must NOT be cropped, NOT overlap with character faces or the dialogue panel.
-- The image is the entire interface — no external chrome will be added.`;
+STRICT RULES — NEVER violate these:
+- DO NOT draw any dialogue boxes, speech bubbles, text panels, or any rectangular overlay.
+- DO NOT draw any buttons, choice options, menu items, or interactive UI elements.
+- DO NOT render any Chinese or English text anywhere in the image.
+- DO NOT add any HUD, interface chrome, or game UI elements.
+- The image is a PURE BACKGROUND SCENE ONLY. All UI will be added as HTML on top.
+- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output.
+- Leave the bottom 35% of the frame relatively uncluttered (darker or softer) so overlaid UI panels remain readable.
+- Characters or key scene elements should be positioned in the upper 65% of the frame.`;
 }
 
+
 export const VISION_SYSTEM_PROMPT = `你是视觉理解助手。用户在视觉小说界面上点击了红色圆点位置，你要根据红点位置和图中可见的 UI 元素，判断用户的意图。
 
 必须输出严格 JSON：