Merge pull request #31 from zonghaoyuan/feat/mobile-portrait-images

feat(web,engine): portrait-orientation scene images for mobile full-bleed
2026-06-04 18:13:51 +08:00
parent 77f5296e18 ea207e103b
commit 4be980d8ee
10 changed files with 287 additions and 61 deletions
@@ -1,4 +1,4 @@
-import type { Metadata } from "next";
+import type { Metadata, Viewport } from "next";
 import { Cormorant_Garamond, Inter } from "next/font/google";
 import { Analytics } from "@/components/Analytics";
 import "./globals.css";
@@ -25,6 +25,15 @@ export const metadata: Metadata = {
  description: "InfiPlot 是一款用 AI 实时生成图片、语音与剧情分支的交互式剧情游戏 Demo。",
 };

+// viewportFit:cover lets the immersive /play portrait layout extend under the
+// iOS notch / home-indicator and exposes env(safe-area-inset-*) to the
+// floating controls. device-width + initialScale keep mobile rendering 1:1.
+export const viewport: Viewport = {
+  width: "device-width",
+  initialScale: 1,
+  viewportFit: "cover",
+};
+
 export default function RootLayout({
  children,
 }: {
@@ -6,6 +6,7 @@ import {
  Suspense,
  useCallback,
  useEffect,
+  useLayoutEffect,
  useMemo,
  useRef,
  useState,
@@ -24,6 +25,7 @@ import type {
  Character,
  CharacterVoice,
  InsertBeatResponse,
+  Orientation,
  Scene,
  SceneExit,
  SceneResponse,
@@ -58,6 +60,24 @@ function getByoHeaders(): Record<string, string> {
 // it, low enough to catch a scene that's clearly being rate-limited.
 const SILENCE_NUDGE_THRESHOLD = 3;

+// Mobile-portrait users get a 9:16 scene image painted for them; everyone else
+// (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a
+// touch device (coarse pointer) held upright counts as "portrait" — a mouse
+// device is always landscape. Detected once and locked for the whole session.
+function detectOrientation(): Orientation {
+  if (typeof window === "undefined") return "landscape";
+  const portrait = window.matchMedia("(orientation: portrait)").matches;
+  const coarse = window.matchMedia("(pointer: coarse)").matches;
+  return portrait && coarse ? "portrait" : "landscape";
+}
+
+// Runs before the browser paints (so it can correct first-frame state without a
+// visible flash), but useLayoutEffect warns when called during SSR. PlayInner
+// only ever renders on the client (/play prerenders the Suspense fallback), yet
+// fall back to useEffect on the server anyway to keep the warning out.
+const useIsomorphicLayoutEffect =
+  typeof window !== "undefined" ? useLayoutEffect : useEffect;
+
 // Cap how long we wait for the browser to download + decode a scene image
 // before giving up and rendering anyway. Runware's CDN is usually <2s for a
 // 1792×1024 PNG, but over slow links / VPN / strict corp networks the same
@@ -457,6 +477,9 @@ function PlayInner() {
  } | null>(null);
  const [error, setError] = useState<string | null>(null);
  const [presentation, setPresentation] = useState(false);
+  // Session-locked image orientation (see detectOrientation). "portrait" makes
+  // the whole play surface render full-bleed vertical on phones.
+  const [orientation, setOrientation] = useState<Orientation>("landscape");
  const [lastExitLabel, setLastExitLabel] = useState<string | null>(null);
  // Consecutive server-side TTS misses (null audio / failed /api/beat-audio).
  // Climbs when the shared server key is rate-limited by MiMo — the exact pain
@@ -782,6 +805,16 @@ function PlayInner() {
    };
  }, [togglePresentation, presentation]);

+  // Lock the visible orientation BEFORE the first paint, so portrait phones
+  // never flash the landscape loading chrome. The state inits to "landscape"
+  // for SSR-safety; this corrects it pre-paint (no-op re-render on landscape
+  // devices). Prebaked cards (decision C) stay landscape-baked regardless of
+  // device. The bootstrap effect below re-derives the same value for the
+  // /api/start payload.
+  useIsomorphicLayoutEffect(() => {
+    setOrientation(params.get("card") ? "landscape" : detectOrientation());
+  }, [params]);
+
  // ── Bootstrap: start session ─────────────────────────────────────────
  useEffect(() => {
    if (startedRef.current) return;
@@ -801,6 +834,7 @@ function PlayInner() {
      worldSetting: string;
      styleGuide: string;
      styleReferenceImage?: string;
+      orientation?: Orientation;
    } | null = null;
    if (!cardName) {
      if (presetId) {
@@ -829,6 +863,16 @@ function PlayInner() {
      }
    }

+    // Lock orientation for the whole session. Prebaked cards (decision C) are
+    // landscape-baked, so they stay landscape regardless of device; only the
+    // live /api/start path requests a portrait paint when the phone is upright.
+    // The visible state is already set pre-paint by the layout effect above;
+    // here we only need the value for the /api/start payload.
+    const sessionOrientation: Orientation = cardName
+      ? "landscape"
+      : detectOrientation();
+    if (livePayload) livePayload.orientation = sessionOrientation;
+
    if (!cardName && !livePayload) {
      router.replace("/");
      return;
@@ -903,6 +947,7 @@ function PlayInner() {
          characters: data.characters,
          storyState: data.storyState,
          styleReferenceImage: data.styleReferenceImage,
+          orientation: data.scene.orientation ?? sessionOrientation,
        };
        visitedBeatsRef.current = [data.scene.entryBeatId];
        setSession(initial);
@@ -1290,7 +1335,13 @@ function PlayInner() {
    );
  }

-  if (presentation) {
+  // Mobile portrait renders full-bleed by default — it sidesteps the iOS
+  // Safari Fullscreen API (unsupported on iPhone) with a CSS full-viewport
+  // layout instead. Desktop "presentation" mode shares the same immersive
+  // canvas, toggled via the F key.
+  const immersive = presentation || orientation === "portrait";
+
+  if (immersive) {
    return (
      <div className="fixed inset-0 bg-black flex items-center justify-center z-50">
        <PlayCanvas
@@ -1304,8 +1355,33 @@ function PlayInner() {
          onBackgroundClick={onBackgroundClick}
          onAdvance={onAdvance}
          onSelectChoice={onSelectChoice}
+          orientation={orientation}
          fullViewport
        />
+        {orientation === "portrait" && (
+          <div
+            className="absolute inset-x-0 top-0 z-10 flex items-center justify-between px-4 pointer-events-none"
+            style={{ paddingTop: "max(0.5rem, env(safe-area-inset-top))" }}
+          >
+            <Link
+              href="/"
+              className="pointer-events-auto flex h-9 w-9 items-center justify-center rounded-full bg-black/40 text-white/80 backdrop-blur-sm transition-colors hover:text-white"
+              aria-label="返回"
+            >
+              <i className="fa-solid fa-arrow-left text-[13px]" />
+            </Link>
+            <button
+              type="button"
+              onClick={toggleMuted}
+              className="pointer-events-auto flex h-9 w-9 items-center justify-center rounded-full bg-black/40 text-white/80 backdrop-blur-sm transition-colors hover:text-white"
+              aria-label={muted ? "取消静音" : "静音"}
+            >
+              <i
+                className={`fa-solid ${muted ? "fa-volume-xmark" : "fa-volume-high"} text-[13px]`}
+              />
+            </button>
+          </div>
+        )}
      </div>
    );
  }
@@ -1354,6 +1430,7 @@ function PlayInner() {
          onBackgroundClick={onBackgroundClick}
          onAdvance={onAdvance}
          onSelectChoice={onSelectChoice}
+          orientation={orientation}
          aboveCanvas={
            <button
              type="button"
@@ -1,7 +1,7 @@
 "use client";

 import { useCallback, useEffect, useRef, useState, type ReactNode } from "react";
-import type { Beat, BeatChoice } from "@infiplot/types";
+import type { Beat, BeatChoice, Orientation } from "@infiplot/types";

 export type Phase =
  | "loading-first"        // first scene not yet rendered
@@ -109,11 +109,13 @@ function ChoiceButton({
  index,
  label,
  disabled,
+  vertical,
  onClick,
 }: {
  index: number;
  label: string;
  disabled: boolean;
+  vertical: boolean;
  onClick: () => void;
 }) {
  return (
@@ -121,8 +123,8 @@ function ChoiceButton({
      type="button"
      disabled={disabled}
      onClick={onClick}
-      className="group relative flex-1 min-w-0 px-4 py-3 text-left transition-all duration-200
-        disabled:opacity-50 disabled:cursor-wait"
+      className={`group relative ${vertical ? "w-full" : "flex-1 min-w-0"} px-4 py-3 text-left transition-all duration-200
+        disabled:opacity-50 disabled:cursor-wait`}
      style={{
        background: "rgba(20, 14, 8, 0.68)",
        border: "1.5px solid rgba(180, 140, 80, 0.65)",
@@ -141,13 +143,13 @@ function ChoiceButton({
      />
      <span className="relative flex items-baseline gap-2">
        <span
-          className="shrink-0 font-serif text-[11px] num"
+          className={`shrink-0 font-serif num ${vertical ? "text-[13px]" : "text-[11px]"}`}
          style={{ color: "rgba(195,155,75,0.9)" }}
        >
          {index + 1}.
        </span>
        <span
-          className="font-serif text-[13px] md:text-[14px] leading-snug"
+          className={`font-serif leading-snug ${vertical ? "text-[15px]" : "text-[13px] md:text-[14px]"}`}
          style={{ color: "rgba(245,235,210,0.95)" }}
        >
          {label}
@@ -170,6 +172,7 @@ export function PlayCanvas({
  onAdvance,
  onSelectChoice,
  fullViewport = false,
+  orientation = "landscape",
  aboveCanvas,
  aboveCanvasLeft,
 }: {
@@ -184,6 +187,8 @@ export function PlayCanvas({
  onAdvance: () => void;
  onSelectChoice: (choice: BeatChoice) => void;
  fullViewport?: boolean;
+  // 会话锁定的图片朝向。"portrait" 时整图铺满视口（object-fit:cover）、选项竖排、字号放大。
+  orientation?: Orientation;
  // 渲染在图片正上方、右对齐的 slot（画面外、紧贴右上角）。
  aboveCanvas?: ReactNode;
  // 渲染在图片正上方、左对齐的 slot（画面外、紧贴左上角），与 aboveCanvas 水平镜像。
@@ -255,9 +260,27 @@ export function PlayCanvas({

  function handleImageClick(e: React.MouseEvent<HTMLImageElement>) {
    if (phase !== "ready" || !imgRef.current || !beat) return;
-    const rect = imgRef.current.getBoundingClientRect();
-    const x = (e.clientX - rect.left) / rect.width;
-    const y = (e.clientY - rect.top) / rect.height;
+    const el = imgRef.current;
+    const rect = el.getBoundingClientRect();
+    // Portrait renders with object-fit:cover, which scales the 9:16 image to
+    // FILL the box and crops the overflow — so the rendered box ≠ the full
+    // image. Map the click from box-space back into full-image-space via the
+    // cover geometry so the marker lands where the user tapped. Landscape's box
+    // matches the image aspect (no crop), so it keeps simple normalization.
+    let x: number;
+    let y: number;
+    if (orientation === "portrait") {
+      const nw = el.naturalWidth || 1024;
+      const nh = el.naturalHeight || 1792;
+      const scale = Math.max(rect.width / nw, rect.height / nh);
+      const dispW = nw * scale;
+      const dispH = nh * scale;
+      x = (e.clientX - rect.left + (dispW - rect.width) / 2) / dispW;
+      y = (e.clientY - rect.top + (dispH - rect.height) / 2) / dispH;
+    } else {
+      x = (e.clientX - rect.left) / rect.width;
+      y = (e.clientY - rect.top) / rect.height;
+    }
    // If the typewriter is still printing, a click completes it instantly
    // (standard VN affordance) — the page never sees this click.
    if (!typingDone) {
@@ -291,13 +314,26 @@ export function PlayCanvas({
  const interactive = phase === "ready" && !!imageUrl;
  const dimmed = phase === "transitioning";

-  const sizeStyle = fullViewport
-    ? { maxWidth: "100vw", maxHeight: "100dvh" }
-    : { maxWidth: "96vw", maxHeight: "calc(100dvh - 200px)" };
+  const portrait = orientation === "portrait";
+  const intrinsicW = portrait ? 1024 : 1792;
+  const intrinsicH = portrait ? 1792 : 1024;

-  const placeholderWidth = fullViewport
-    ? "min(100vw, calc(100dvh * 16 / 9))"
-    : "min(96vw, calc((100dvh - 200px) * 16 / 9))";
+  // Portrait (mobile) always fills the whole viewport with object-fit:cover so
+  // the 9:16 image matches the exact device/window — no letterbox. Landscape
+  // keeps the prior contain-style sizing so the full 16:9 frame stays visible.
+  const sizeStyle: React.CSSProperties = portrait
+    ? { width: "100vw", height: "100dvh", objectFit: "cover" }
+    : fullViewport
+      ? { maxWidth: "100vw", maxHeight: "100dvh" }
+      : { maxWidth: "96vw", maxHeight: "calc(100dvh - 200px)" };
+
+  const placeholderStyle: React.CSSProperties = portrait
+    ? { width: "100vw", height: "100dvh" }
+    : {
+        width: fullViewport
+          ? "min(100vw, calc(100dvh * 16 / 9))"
+          : "min(96vw, calc((100dvh - 200px) * 16 / 9))",
+      };


  return (
@@ -323,22 +359,23 @@ export function PlayCanvas({
          style={{ boxShadow: fullViewport ? "none" : SHADOW }}
        >
          {/* Background image — Runware CDN URL or data URI (mock mode).
-              The width/height attributes are NOT rendered dimensions (w-auto
-              h-auto + the maxWidth/maxHeight in sizeStyle still drive the
-              final layout); they give the browser an intrinsic aspect ratio
-              so that, while the bytes are still arriving from the CDN, the
-              <img> reserves a 1792:1024 box instead of collapsing to a
-              one-pixel sliver — fixes the "等很久 → 一根线 → 突然出图" jank. */}
+              The width/height attributes give the browser the intrinsic aspect
+              ratio (1792:1024 landscape / 1024:1792 portrait) so that, while the
+              bytes are still arriving from the CDN, the <img> reserves the right
+              box instead of collapsing to a one-pixel sliver — fixes the
+              "等很久 → 一根线 → 突然出图" jank. Landscape uses w-auto/h-auto +
+              maxWidth/maxHeight (contain); portrait switches sizeStyle to
+              100vw×100dvh with object-fit:cover (full-bleed, no letterbox). */}
          <img
            key={imageUrl.slice(-48)}
            ref={imgRef}
            src={imageUrl}
-            width={1792}
-            height={1024}
+            width={intrinsicW}
+            height={intrinsicH}
            alt="Generated scene"
            onClick={handleImageClick}
            draggable={false}
-            className={`block w-auto h-auto select-none animate-fade-in transition-opacity duration-700 ease-out ${
+            className={`block ${portrait ? "" : "w-auto h-auto"} select-none animate-fade-in transition-opacity duration-700 ease-out ${
              interactive ? "cursor-pointer" : "cursor-wait"
            } ${dimmed ? "opacity-40" : "opacity-100"}`}
            style={sizeStyle}
@@ -361,15 +398,29 @@ export function PlayCanvas({
          )}

          {beat && (
-            <div className="absolute inset-0 flex flex-col justify-end pointer-events-none select-none">
+            <div
+              className="absolute inset-0 flex flex-col justify-end pointer-events-none select-none"
+              style={
+                portrait
+                  ? { paddingBottom: "env(safe-area-inset-bottom)" }
+                  : undefined
+              }
+            >
              {choices.length > 0 && (
-                <div className="pointer-events-auto px-[3%] pb-[1.5%] flex gap-[1.5%] items-stretch">
+                <div
+                  className={`pointer-events-auto px-[3%] pb-[1.5%] flex items-stretch ${
+                    portrait
+                      ? "flex-col gap-2 max-h-[45dvh] overflow-y-auto"
+                      : "gap-[1.5%]"
+                  }`}
+                >
                  {choices.map((choice, i) => (
                    <ChoiceButton
                      key={choice.id}
                      index={i}
                      label={choice.label}
                      disabled={phase !== "ready"}
+                      vertical={portrait}
                      onClick={() => onSelectChoice(choice)}
                    />
                  ))}
@@ -407,7 +458,9 @@ export function PlayCanvas({

                  {beat.speaker && (
                    <p
-                      className="font-serif text-[11px] md:text-[12px] smallcaps mb-[0.6em]"
+                      className={`font-serif smallcaps mb-[0.6em] ${
+                        portrait ? "text-[13px]" : "text-[11px] md:text-[12px]"
+                      }`}
                      style={{ color: "rgba(205,165,90,0.92)" }}
                    >
                      {beat.speaker}
@@ -415,15 +468,17 @@ export function PlayCanvas({
                  )}

                  <p
-                    className="font-serif leading-[1.85] text-[13px] md:text-[15px]"
+                    className={`font-serif leading-[1.85] ${
+                      portrait ? "text-[16px]" : "text-[13px] md:text-[15px]"
+                    }`}
                    style={{ color: "rgba(245,235,210,0.95)" }}
                  >
                    {typedBody}
                    {beat.speaker && beat.narration && (
                      <span
-                        className={`block mt-[0.5em] italic text-[12px] md:text-[13px] transition-opacity duration-300 ${
-                          typingDone ? "opacity-100" : "opacity-0"
-                        }`}
+                        className={`block mt-[0.5em] italic transition-opacity duration-300 ${
+                          portrait ? "text-[14px]" : "text-[12px] md:text-[13px]"
+                        } ${typingDone ? "opacity-100" : "opacity-0"}`}
                        style={{ color: "rgba(200,185,155,0.78)" }}
                        aria-hidden={!typingDone}
                      >
@@ -488,11 +543,10 @@ export function PlayCanvas({
        </div>
      ) : (
        <div
-          className="relative aspect-video bg-cream-200 flex flex-col items-center justify-center gap-4"
-          style={{
-            width: placeholderWidth,
-            boxShadow: fullViewport ? "none" : SHADOW,
-          }}
+          className={`relative bg-cream-200 flex flex-col items-center justify-center gap-4 ${
+            portrait ? "" : "aspect-video"
+          }`}
+          style={{ ...placeholderStyle, boxShadow: fullViewport ? "none" : SHADOW }}
        >
          <div className="w-1.5 h-1.5 bg-clay-500 rounded-full animate-slow-pulse" />
          <p className="text-[9px] smallcaps text-clay-500 animate-slow-pulse">
@@ -1,7 +1,7 @@
 import { generateImage as generateImageSdk } from "ai";
 import { createOpenAI } from "@ai-sdk/openai";
 import { createGoogleGenerativeAI } from "@ai-sdk/google";
-import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
+import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types";
 import { fetchWithRetry } from "./fetchWithRetry";
 import { normalizeBaseUrl } from "./normalizeUrl";

@@ -54,6 +54,13 @@ export type GenerateImageOptions = {
  referenceImages?: string[];
  /** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
  strength?: number;
+  /**
+   * Output aspect, locked per session. "portrait" → 9:16 vertical for mobile;
+   * default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest
+   * supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792,
+   * native gpt-image 1024x1536, Gemini aspectRatio 9:16.
+   */
+  orientation?: Orientation;
 };

 export type GenerateImageResult = {
@@ -120,7 +127,7 @@ export async function generateImage(
      );
    case "openai_compatible":
    default:
-      return generateImageOpenAiCompatible(config, prompt);
+      return generateImageOpenAiCompatible(config, prompt, options);
  }
 }

@@ -147,14 +154,15 @@ async function generateImageViaAiSdk(
  const promptArg =
    refs.length > 0 ? { text: prompt, images: refs } : prompt;

-  // OpenAI's image models take an explicit `size`; gpt-image's widest landscape
-  // option is 1536x1024. Gemini takes an `aspectRatio` instead.
+  // Session-locked aspect. gpt-image takes an explicit `size` (portrait /
+  // landscape options are 1024x1536 / 1536x1024); Gemini takes an `aspectRatio`.
+  const portrait = options?.orientation === "portrait";
  const { image } = await generateImageSdk({
    model: imageModel,
    prompt: promptArg,
    ...(protocol === "openai"
-      ? { size: "1536x1024" as `${number}x${number}` }
-      : { aspectRatio: "16:9" as `${number}:${number}` }),
+      ? { size: (portrait ? "1024x1536" : "1536x1024") as `${number}x${number}` }
+      : { aspectRatio: (portrait ? "9:16" : "16:9") as `${number}:${number}` }),
  });

  return {
@@ -169,6 +177,7 @@ async function generateImageViaAiSdk(
 async function generateImageOpenAiCompatible(
  config: ProviderConfig,
  prompt: string,
+  options?: GenerateImageOptions,
 ): Promise<GenerateImageResult> {
  const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
  const endpoint = `${base}/images/generations`;
@@ -186,7 +195,8 @@ async function generateImageOpenAiCompatible(
      model: config.model,
      prompt: prompt,
      n: 1,
-      size: "1792x1024", // Use horizontal size (16:9)
+      // Session-locked aspect (16:9 default, 9:16 portrait for mobile).
+      size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024",
    }),
  });

@@ -221,13 +231,18 @@ async function generateImageRunware(
 ): Promise<GenerateImageResult> {
  const url = normalizeBaseUrl(config.baseUrl, "runware");

+  // Session-locked output aspect. Image models emit a FIXED pixel size; CSS
+  // object-fit on the client adapts this frame to the exact device/window. Both
+  // dimensions stay a multiple of 64 as FLUX requires.
+  const portrait = options?.orientation === "portrait";
+
  const task: Record<string, unknown> = {
    taskType: "imageInference",
    taskUUID: crypto.randomUUID(),
    model: config.model,
    positivePrompt: prompt,
-    width: 1792,
-    height: 1024,
+    width: portrait ? 1024 : 1792,
+    height: portrait ? 1792 : 1024,
    steps: 4,
    CFGScale: 3.5,
    numberResults: 1,
@@ -4,6 +4,7 @@ import type {
  Beat,
  Character,
  EngineConfig,
+  Orientation,
  ProviderConfig,
 } from "@infiplot/types";
 import { mockImageDataUri } from "../mockImage";
@@ -54,6 +55,11 @@ export type PainterInput = {
   * session paints — even before any priorScene exists.
   */
  styleReferenceImage?: string;
+  /**
+   * Session-locked output aspect. Drives both the Painter prompt's framing
+   * rules and the generated image's pixel dimensions. Default "landscape".
+   */
+  orientation?: Orientation;
 };

 // Pick the references we send to Runware as `referenceImages`. Priority:
@@ -142,13 +148,14 @@ export async function runPainter(
  entryBeat: Beat | undefined,
 ): Promise<PainterResult> {
  if (config.mockImage) {
-    return { kind: "mock", imageUrl: await mockImageDataUri() };
+    return { kind: "mock", imageUrl: await mockImageDataUri(input.orientation) };
  }

  const prompt = buildPainterPrompt(
    input.integratedPrompt,
    input.styleGuide,
    input.onStageCharacters,
+    input.orientation,
  );

  const refs = collectReferenceImages(
@@ -165,7 +172,7 @@ export async function runPainter(
    const r = await tryGenerate(
      config.image,
      prompt,
-      { referenceImages: refs },
+      { referenceImages: refs, orientation: input.orientation },
      `referenceImages (${refs.length})`,
    );
    if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
@@ -174,6 +181,8 @@ export async function runPainter(
  // Tier B — pure text-to-image. Last resort, used when Tier A failed OR
  // there are no references to send (first scene with no characters yet).
  // Errors here propagate to the caller.
-  const r = await generateImage(config.image, prompt);
+  const r = await generateImage(config.image, prompt, {
+    orientation: input.orientation,
+  });
  return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
 }
@@ -1,4 +1,5 @@
 import { chat } from "@infiplot/ai-client";
+import { coerceOrientation } from "@infiplot/types";
 import type {
  Beat,
  Character,
@@ -332,6 +333,10 @@ export async function directScene(
  // filtered to those now in the registry, so the archetype block covers them.
  const onStageCharacters = characters.filter((c) => plan.cast.includes(c.name));

+  // Session-locked orientation (set at session start). Threads into both the
+  // Painter prompt's framing rules and the generated image's pixel dimensions.
+  const orientation = coerceOrientation(session.orientation);
+
  const tPainter = Date.now();
  const painted = await runPainter(
    config,
@@ -341,6 +346,7 @@ export async function directScene(
      onStageCharacters,
      priorSceneImage: priorSceneReference,
      styleReferenceImage: session.styleReferenceImage,
+      orientation,
    },
    entryBeatForPaint,
  );
@@ -403,6 +409,7 @@ export async function directScene(
    sceneKey: plan.sceneKey,
    imageUuid: painted.kind === "real" ? painted.imageUuid : undefined,
    imageUrl: painted.imageUrl,
+    orientation,
  };

  // Merge the Writer's volatile memory rewrite onto the carried bible so the
@@ -1,3 +1,5 @@
+import type { Orientation } from "@infiplot/types";
+
 // Static SVG placeholder used when MOCK_IMAGE=true, so we can exercise the
 // TTS path without paying for image generation. Returned as a data URI so the
 // rest of the pipeline can treat it as an `imageUrl` interchangeably with
@@ -9,17 +11,23 @@
 // data URI so the engine has zero Node-native dependencies and runs on
 // Cloudflare Workers. SVG also stays crisp at any display size.

-const W = 1792;
-const H = 1024;
-const SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}">
-  <rect width="${W}" height="${H}" fill="#161109"/>
-  <rect x="2" y="2" width="${W - 4}" height="${H - 4}" fill="none" stroke="#5a4628" stroke-width="3" stroke-dasharray="14 10"/>
+function buildDataUri(w: number, h: number): string {
+  const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}">
+  <rect width="${w}" height="${h}" fill="#161109"/>
+  <rect x="2" y="2" width="${w - 4}" height="${h - 4}" fill="none" stroke="#5a4628" stroke-width="3" stroke-dasharray="14 10"/>
  <text x="50%" y="45%" fill="#b88f4a" font-family="Georgia, serif" font-size="72" letter-spacing="6" text-anchor="middle">MOCK IMAGE</text>
  <text x="50%" y="53%" fill="#6e5430" font-family="Georgia, serif" font-size="30" letter-spacing="3" text-anchor="middle">TTS TEST — image generation skipped</text>
 </svg>`;
-
-const DATA_URI = `data:image/svg+xml;charset=utf-8,${encodeURIComponent(SVG)}`;
-
-export async function mockImageDataUri(): Promise<string> {
-  return DATA_URI;
+  return `data:image/svg+xml;charset=utf-8,${encodeURIComponent(svg)}`;
+}
+
+// Mirror the real Painter's dimensions per orientation so mock mode exercises
+// the same portrait/landscape layout the client renders for real images.
+const LANDSCAPE = buildDataUri(1792, 1024);
+const PORTRAIT = buildDataUri(1024, 1792);
+
+export async function mockImageDataUri(
+  orientation: Orientation = "landscape",
+): Promise<string> {
+  return orientation === "portrait" ? PORTRAIT : LANDSCAPE;
 }
@@ -12,6 +12,7 @@ import type {
  VisionRequest,
  VisionResponse,
 } from "@infiplot/types";
+import { coerceOrientation } from "@infiplot/types";
 import { runArchitect } from "./agents/architect";
 import { directInsertBeat, directScene } from "./director";
 import { synthesizeBeat } from "./voice";
@@ -48,6 +49,7 @@ export async function startSession(
    history: [],
    characters: [],
    styleReferenceImage: req.styleReferenceImage?.trim() || undefined,
+    orientation: coerceOrientation(req.orientation),
  };

  // Stage 0 — Architect: expand the terse world/style prompt into a story
@@ -1,6 +1,7 @@
 import type {
  BeatActiveCharacter,
  Character,
+  Orientation,
  Scene,
  Session,
  StoryState,
@@ -803,6 +804,7 @@ export function buildPainterPrompt(
  integratedPrompt: string,
  styleGuide: string,
  characters: { name: string; visualDescription?: string }[],
+  orientation: Orientation = "landscape",
 ): string {
  const archetypeBlock = characters
    .filter((c) => c.visualDescription)
@@ -813,7 +815,15 @@ export function buildPainterPrompt(
    ? `\n\nCHARACTER ARCHETYPES (anchor identity, outfit, and style across scenes — keep each character visually identical to their archetype):\n${archetypeBlock}`
    : "";

-  return `Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024).
+  const portrait = orientation === "portrait";
+  const header = portrait
+    ? "Generate a cinematic vertical (portrait) background illustration, 9:16 tall format (1024x1792)."
+    : "Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024).";
+  const orientationRule = portrait
+    ? "- 9:16 PORTRAIT orientation — taller than wide. No landscape or square output."
+    : "- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output.";
+
+  return `${header}

 ART STYLE: ${styleGuide}

@@ -826,7 +836,7 @@ STRICT RULES — NEVER violate these:
 - DO NOT render any Chinese or English text anywhere in the image.
 - DO NOT add any HUD, interface chrome, or game UI elements.
 - The image is a PURE BACKGROUND SCENE ONLY. All UI will be added as HTML on top.
- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output.
+${orientationRule}
 - Leave the bottom 35% of the frame relatively uncluttered (darker or softer) so overlaid UI panels remain readable.
 - Characters or key scene elements should be positioned in the upper 65% of the frame.
 - Maintain character identity exactly as specified in CHARACTER ARCHETYPES — same face, same hairstyle, same outfit across every scene.
@@ -40,6 +40,23 @@ export type BeatChoiceEffect =
  | { kind: "advance-beat"; targetBeatId: string }
  | { kind: "change-scene"; nextSceneSeed: string };

+// ──────────────────────────────────────────────────────────────────────
+//  Orientation — session-wide image aspect, locked at session start.
+//  "landscape" → 16:9 (1792×1024), the default for desktop / mobile-landscape.
+//  "portrait"  → 9:16 (1024×1792), painted for mobile users holding the phone
+//  upright so the scene fills the screen instead of letterboxing a widescreen
+//  image. CSS object-fit then adapts the 9:16 frame to the exact device size.
+// ──────────────────────────────────────────────────────────────────────
+
+export type Orientation = "portrait" | "landscape";
+
+/** Normalize an untrusted orientation value (from a request body, or a
+ *  persisted session that predates the field) to a valid Orientation.
+ *  Anything other than "portrait" falls back to "landscape" (back-compat). */
+export function coerceOrientation(value: unknown): Orientation {
+  return value === "portrait" ? "portrait" : "landscape";
+}
+
 // ──────────────────────────────────────────────────────────────────────
 //  Scene — one background image + a graph of beats.
 //  The Director emits an entire Scene per call; the player navigates
@@ -75,6 +92,12 @@ export type Scene = {
   * Runware URL — the client renders both forms transparently.
   */
  imageUrl?: string;
+  /**
+   * Orientation this scene's image was painted in. Mirrors the session's
+   * locked orientation; recorded per-scene so the client can pick the right
+   * intrinsic dimensions / object-fit even across legacy or mixed history.
+   */
+  orientation?: Orientation;
 };

 export type SceneExit =
@@ -251,6 +274,12 @@ export type Session = {
   * payload small for /api/scene round-trips.
   */
  styleReferenceImage?: string;
+  /**
+   * Session-wide image orientation, locked at session start from the client's
+   * device + orientation and carried on every /api/scene call so all scenes
+   * share one aspect ratio. Absent → "landscape" (back-compat).
+   */
+  orientation?: Orientation;
 };

 // ──────────────────────────────────────────────────────────────────────
@@ -337,6 +366,12 @@ export type StartRequest = {
   * drops `config.tts` so the engine skips all server-side TTS work.
   */
  clientTts?: boolean;
+  /**
+   * Device orientation chosen at session start. "portrait" makes the engine
+   * paint 9:16 vertical scene images (mobile, held upright); "landscape"
+   * (default) keeps 16:9 widescreen. Locked for the whole session.
+   */
+  orientation?: Orientation;
 };

 // /api/parse-style-image — vision LLM extracts a textual painting-style