feat(web,engine): portrait-orientation scene images for mobile full-bleed
Thread orientation (portrait|landscape) from client through API, engine, and image gen. Portrait devices render 1024x1792 (9:16) full-bleed scenes; desktop/landscape keeps 1792x1024 (16:9). Adds cover-aware click→image coordinate mapping, session-locked orientation, a shared coerceOrientation helper, and a choices overflow cap in portrait. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+24
-9
@@ -1,7 +1,7 @@
|
||||
import { generateImage as generateImageSdk } from "ai";
|
||||
import { createOpenAI } from "@ai-sdk/openai";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
@@ -54,6 +54,13 @@ export type GenerateImageOptions = {
|
||||
referenceImages?: string[];
|
||||
/** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
|
||||
strength?: number;
|
||||
/**
|
||||
* Output aspect, locked per session. "portrait" → 9:16 vertical for mobile;
|
||||
* default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest
|
||||
* supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792,
|
||||
* native gpt-image 1024x1536, Gemini aspectRatio 9:16.
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
|
||||
export type GenerateImageResult = {
|
||||
@@ -120,7 +127,7 @@ export async function generateImage(
|
||||
);
|
||||
case "openai_compatible":
|
||||
default:
|
||||
return generateImageOpenAiCompatible(config, prompt);
|
||||
return generateImageOpenAiCompatible(config, prompt, options);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,14 +154,15 @@ async function generateImageViaAiSdk(
|
||||
const promptArg =
|
||||
refs.length > 0 ? { text: prompt, images: refs } : prompt;
|
||||
|
||||
// OpenAI's image models take an explicit `size`; gpt-image's widest landscape
|
||||
// option is 1536x1024. Gemini takes an `aspectRatio` instead.
|
||||
// Session-locked aspect. gpt-image takes an explicit `size` (portrait /
|
||||
// landscape options are 1024x1536 / 1536x1024); Gemini takes an `aspectRatio`.
|
||||
const portrait = options?.orientation === "portrait";
|
||||
const { image } = await generateImageSdk({
|
||||
model: imageModel,
|
||||
prompt: promptArg,
|
||||
...(protocol === "openai"
|
||||
? { size: "1536x1024" as `${number}x${number}` }
|
||||
: { aspectRatio: "16:9" as `${number}:${number}` }),
|
||||
? { size: (portrait ? "1024x1536" : "1536x1024") as `${number}x${number}` }
|
||||
: { aspectRatio: (portrait ? "9:16" : "16:9") as `${number}:${number}` }),
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -169,6 +177,7 @@ async function generateImageViaAiSdk(
|
||||
async function generateImageOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options?: GenerateImageOptions,
|
||||
): Promise<GenerateImageResult> {
|
||||
const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
|
||||
const endpoint = `${base}/images/generations`;
|
||||
@@ -186,7 +195,8 @@ async function generateImageOpenAiCompatible(
|
||||
model: config.model,
|
||||
prompt: prompt,
|
||||
n: 1,
|
||||
size: "1792x1024", // Use horizontal size (16:9)
|
||||
// Session-locked aspect (16:9 default, 9:16 portrait for mobile).
|
||||
size: options?.orientation === "portrait" ? "1024x1792" : "1792x1024",
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -221,13 +231,18 @@ async function generateImageRunware(
|
||||
): Promise<GenerateImageResult> {
|
||||
const url = normalizeBaseUrl(config.baseUrl, "runware");
|
||||
|
||||
// Session-locked output aspect. Image models emit a FIXED pixel size; CSS
|
||||
// object-fit on the client adapts this frame to the exact device/window. Both
|
||||
// dimensions stay a multiple of 64 as FLUX requires.
|
||||
const portrait = options?.orientation === "portrait";
|
||||
|
||||
const task: Record<string, unknown> = {
|
||||
taskType: "imageInference",
|
||||
taskUUID: crypto.randomUUID(),
|
||||
model: config.model,
|
||||
positivePrompt: prompt,
|
||||
width: 1792,
|
||||
height: 1024,
|
||||
width: portrait ? 1024 : 1792,
|
||||
height: portrait ? 1792 : 1024,
|
||||
steps: 4,
|
||||
CFGScale: 3.5,
|
||||
numberResults: 1,
|
||||
|
||||
@@ -4,6 +4,7 @@ import type {
|
||||
Beat,
|
||||
Character,
|
||||
EngineConfig,
|
||||
Orientation,
|
||||
ProviderConfig,
|
||||
} from "@infiplot/types";
|
||||
import { mockImageDataUri } from "../mockImage";
|
||||
@@ -54,6 +55,11 @@ export type PainterInput = {
|
||||
* session paints — even before any priorScene exists.
|
||||
*/
|
||||
styleReferenceImage?: string;
|
||||
/**
|
||||
* Session-locked output aspect. Drives both the Painter prompt's framing
|
||||
* rules and the generated image's pixel dimensions. Default "landscape".
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
|
||||
// Pick the references we send to Runware as `referenceImages`. Priority:
|
||||
@@ -142,13 +148,14 @@ export async function runPainter(
|
||||
entryBeat: Beat | undefined,
|
||||
): Promise<PainterResult> {
|
||||
if (config.mockImage) {
|
||||
return { kind: "mock", imageUrl: await mockImageDataUri() };
|
||||
return { kind: "mock", imageUrl: await mockImageDataUri(input.orientation) };
|
||||
}
|
||||
|
||||
const prompt = buildPainterPrompt(
|
||||
input.integratedPrompt,
|
||||
input.styleGuide,
|
||||
input.onStageCharacters,
|
||||
input.orientation,
|
||||
);
|
||||
|
||||
const refs = collectReferenceImages(
|
||||
@@ -165,7 +172,7 @@ export async function runPainter(
|
||||
const r = await tryGenerate(
|
||||
config.image,
|
||||
prompt,
|
||||
{ referenceImages: refs },
|
||||
{ referenceImages: refs, orientation: input.orientation },
|
||||
`referenceImages (${refs.length})`,
|
||||
);
|
||||
if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
@@ -174,6 +181,8 @@ export async function runPainter(
|
||||
// Tier B — pure text-to-image. Last resort, used when Tier A failed OR
|
||||
// there are no references to send (first scene with no characters yet).
|
||||
// Errors here propagate to the caller.
|
||||
const r = await generateImage(config.image, prompt);
|
||||
const r = await generateImage(config.image, prompt, {
|
||||
orientation: input.orientation,
|
||||
});
|
||||
return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import { coerceOrientation } from "@infiplot/types";
|
||||
import type {
|
||||
Beat,
|
||||
Character,
|
||||
@@ -332,6 +333,10 @@ export async function directScene(
|
||||
// filtered to those now in the registry, so the archetype block covers them.
|
||||
const onStageCharacters = characters.filter((c) => plan.cast.includes(c.name));
|
||||
|
||||
// Session-locked orientation (set at session start). Threads into both the
|
||||
// Painter prompt's framing rules and the generated image's pixel dimensions.
|
||||
const orientation = coerceOrientation(session.orientation);
|
||||
|
||||
const tPainter = Date.now();
|
||||
const painted = await runPainter(
|
||||
config,
|
||||
@@ -341,6 +346,7 @@ export async function directScene(
|
||||
onStageCharacters,
|
||||
priorSceneImage: priorSceneReference,
|
||||
styleReferenceImage: session.styleReferenceImage,
|
||||
orientation,
|
||||
},
|
||||
entryBeatForPaint,
|
||||
);
|
||||
@@ -403,6 +409,7 @@ export async function directScene(
|
||||
sceneKey: plan.sceneKey,
|
||||
imageUuid: painted.kind === "real" ? painted.imageUuid : undefined,
|
||||
imageUrl: painted.imageUrl,
|
||||
orientation,
|
||||
};
|
||||
|
||||
// Merge the Writer's volatile memory rewrite onto the carried bible so the
|
||||
|
||||
+18
-10
@@ -1,3 +1,5 @@
|
||||
import type { Orientation } from "@infiplot/types";
|
||||
|
||||
// Static SVG placeholder used when MOCK_IMAGE=true, so we can exercise the
|
||||
// TTS path without paying for image generation. Returned as a data URI so the
|
||||
// rest of the pipeline can treat it as an `imageUrl` interchangeably with
|
||||
@@ -9,17 +11,23 @@
|
||||
// data URI so the engine has zero Node-native dependencies and runs on
|
||||
// Cloudflare Workers. SVG also stays crisp at any display size.
|
||||
|
||||
const W = 1792;
|
||||
const H = 1024;
|
||||
const SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}">
|
||||
<rect width="${W}" height="${H}" fill="#161109"/>
|
||||
<rect x="2" y="2" width="${W - 4}" height="${H - 4}" fill="none" stroke="#5a4628" stroke-width="3" stroke-dasharray="14 10"/>
|
||||
function buildDataUri(w: number, h: number): string {
|
||||
const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="${w}" height="${h}">
|
||||
<rect width="${w}" height="${h}" fill="#161109"/>
|
||||
<rect x="2" y="2" width="${w - 4}" height="${h - 4}" fill="none" stroke="#5a4628" stroke-width="3" stroke-dasharray="14 10"/>
|
||||
<text x="50%" y="45%" fill="#b88f4a" font-family="Georgia, serif" font-size="72" letter-spacing="6" text-anchor="middle">MOCK IMAGE</text>
|
||||
<text x="50%" y="53%" fill="#6e5430" font-family="Georgia, serif" font-size="30" letter-spacing="3" text-anchor="middle">TTS TEST — image generation skipped</text>
|
||||
</svg>`;
|
||||
|
||||
const DATA_URI = `data:image/svg+xml;charset=utf-8,${encodeURIComponent(SVG)}`;
|
||||
|
||||
export async function mockImageDataUri(): Promise<string> {
|
||||
return DATA_URI;
|
||||
return `data:image/svg+xml;charset=utf-8,${encodeURIComponent(svg)}`;
|
||||
}
|
||||
|
||||
// Mirror the real Painter's dimensions per orientation so mock mode exercises
|
||||
// the same portrait/landscape layout the client renders for real images.
|
||||
const LANDSCAPE = buildDataUri(1792, 1024);
|
||||
const PORTRAIT = buildDataUri(1024, 1792);
|
||||
|
||||
export async function mockImageDataUri(
|
||||
orientation: Orientation = "landscape",
|
||||
): Promise<string> {
|
||||
return orientation === "portrait" ? PORTRAIT : LANDSCAPE;
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import type {
|
||||
VisionRequest,
|
||||
VisionResponse,
|
||||
} from "@infiplot/types";
|
||||
import { coerceOrientation } from "@infiplot/types";
|
||||
import { runArchitect } from "./agents/architect";
|
||||
import { directInsertBeat, directScene } from "./director";
|
||||
import { synthesizeBeat } from "./voice";
|
||||
@@ -48,6 +49,7 @@ export async function startSession(
|
||||
history: [],
|
||||
characters: [],
|
||||
styleReferenceImage: req.styleReferenceImage?.trim() || undefined,
|
||||
orientation: coerceOrientation(req.orientation),
|
||||
};
|
||||
|
||||
// Stage 0 — Architect: expand the terse world/style prompt into a story
|
||||
|
||||
+12
-2
@@ -1,6 +1,7 @@
|
||||
import type {
|
||||
BeatActiveCharacter,
|
||||
Character,
|
||||
Orientation,
|
||||
Scene,
|
||||
Session,
|
||||
StoryState,
|
||||
@@ -803,6 +804,7 @@ export function buildPainterPrompt(
|
||||
integratedPrompt: string,
|
||||
styleGuide: string,
|
||||
characters: { name: string; visualDescription?: string }[],
|
||||
orientation: Orientation = "landscape",
|
||||
): string {
|
||||
const archetypeBlock = characters
|
||||
.filter((c) => c.visualDescription)
|
||||
@@ -813,7 +815,15 @@ export function buildPainterPrompt(
|
||||
? `\n\nCHARACTER ARCHETYPES (anchor identity, outfit, and style across scenes — keep each character visually identical to their archetype):\n${archetypeBlock}`
|
||||
: "";
|
||||
|
||||
return `Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024).
|
||||
const portrait = orientation === "portrait";
|
||||
const header = portrait
|
||||
? "Generate a cinematic vertical (portrait) background illustration, 9:16 tall format (1024x1792)."
|
||||
: "Generate a cinematic landscape background illustration, 16:9 widescreen (1792x1024).";
|
||||
const orientationRule = portrait
|
||||
? "- 9:16 PORTRAIT orientation — taller than wide. No landscape or square output."
|
||||
: "- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output.";
|
||||
|
||||
return `${header}
|
||||
|
||||
ART STYLE: ${styleGuide}
|
||||
|
||||
@@ -826,7 +836,7 @@ STRICT RULES — NEVER violate these:
|
||||
- DO NOT render any Chinese or English text anywhere in the image.
|
||||
- DO NOT add any HUD, interface chrome, or game UI elements.
|
||||
- The image is a PURE BACKGROUND SCENE ONLY. All UI will be added as HTML on top.
|
||||
- 16:9 LANDSCAPE orientation — wider than tall. No portrait or square output.
|
||||
${orientationRule}
|
||||
- Leave the bottom 35% of the frame relatively uncluttered (darker or softer) so overlaid UI panels remain readable.
|
||||
- Characters or key scene elements should be positioned in the upper 65% of the frame.
|
||||
- Maintain character identity exactly as specified in CHARACTER ARCHETYPES — same face, same hairstyle, same outfit across every scene.
|
||||
|
||||
@@ -40,6 +40,23 @@ export type BeatChoiceEffect =
|
||||
| { kind: "advance-beat"; targetBeatId: string }
|
||||
| { kind: "change-scene"; nextSceneSeed: string };
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Orientation — session-wide image aspect, locked at session start.
|
||||
// "landscape" → 16:9 (1792×1024), the default for desktop / mobile-landscape.
|
||||
// "portrait" → 9:16 (1024×1792), painted for mobile users holding the phone
|
||||
// upright so the scene fills the screen instead of letterboxing a widescreen
|
||||
// image. CSS object-fit then adapts the 9:16 frame to the exact device size.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type Orientation = "portrait" | "landscape";
|
||||
|
||||
/** Normalize an untrusted orientation value (from a request body, or a
|
||||
* persisted session that predates the field) to a valid Orientation.
|
||||
* Anything other than "portrait" falls back to "landscape" (back-compat). */
|
||||
export function coerceOrientation(value: unknown): Orientation {
|
||||
return value === "portrait" ? "portrait" : "landscape";
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Scene — one background image + a graph of beats.
|
||||
// The Director emits an entire Scene per call; the player navigates
|
||||
@@ -75,6 +92,12 @@ export type Scene = {
|
||||
* Runware URL — the client renders both forms transparently.
|
||||
*/
|
||||
imageUrl?: string;
|
||||
/**
|
||||
* Orientation this scene's image was painted in. Mirrors the session's
|
||||
* locked orientation; recorded per-scene so the client can pick the right
|
||||
* intrinsic dimensions / object-fit even across legacy or mixed history.
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
|
||||
export type SceneExit =
|
||||
@@ -251,6 +274,12 @@ export type Session = {
|
||||
* payload small for /api/scene round-trips.
|
||||
*/
|
||||
styleReferenceImage?: string;
|
||||
/**
|
||||
* Session-wide image orientation, locked at session start from the client's
|
||||
* device + orientation and carried on every /api/scene call so all scenes
|
||||
* share one aspect ratio. Absent → "landscape" (back-compat).
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
@@ -337,6 +366,12 @@ export type StartRequest = {
|
||||
* drops `config.tts` so the engine skips all server-side TTS work.
|
||||
*/
|
||||
clientTts?: boolean;
|
||||
/**
|
||||
* Device orientation chosen at session start. "portrait" makes the engine
|
||||
* paint 9:16 vertical scene images (mobile, held upright); "landscape"
|
||||
* (default) keeps 16:9 widescreen. Locked for the whole session.
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
|
||||
// /api/parse-style-image — vision LLM extracts a textual painting-style
|
||||
|
||||
Reference in New Issue
Block a user