"use client";
import Link from "next/link";
import { useRouter, useSearchParams } from "next/navigation";
import {
Suspense,
useCallback,
useEffect,
useLayoutEffect,
useMemo,
useRef,
useState,
} from "react";
import {
PlayCanvas,
type Phase,
} from "@/components/PlayCanvas";
import type { DialogueHistoryItem } from "@/components/DialogueHistoryModal";
import type { GalleryDoc, GalleryScene } from "@/app/gallery/page";
import { SettingsModal, readStoredPlayerName, readStoredVisionClick } from "@/components/SettingsModal";
import { annotateClick } from "@/lib/annotateClient";
import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
import { PRESETS } from "@/lib/presets";
import { provisionVoice, synthesize } from "@infiplot/tts-client";
import type {
Beat,
BeatChoice,
Character,
CharacterVoice,
FreeformClassifyResponse,
InsertBeatResponse,
Orientation,
Scene,
SceneExit,
SceneResponse,
Session,
StartResponse,
TtsConfig,
VisionResponse,
} from "@infiplot/types";
import { track } from "@/lib/analytics";
const MUTED_STORAGE_KEY = "infiplot:muted";
// ── FOT reduction helpers ──────────────────────────────────────────────
// Strip bulky voice.referenceAudioBase64 from the session before sending it to
// the server. The engine only needs character names + visualDescriptions for
// scene generation; voice data is only used by /api/beat-audio (which receives
// the voice directly, not via session). The client retains voices locally and
// re-merges them from the response via mergeCharactersPreserveVoice.
function stripVoicesForTransport(session: Session): Session {
return {
...session,
characters: session.characters.map((c) => ({ ...c, voice: undefined })),
};
}
// Merge server-returned characters with locally-held voices. The server strips
// voice from already-known characters (P0), so only NEW characters carry voice.
// For existing characters, re-attach the voice the client already holds.
function mergeCharactersPreserveVoice(
local: Character[],
remote: Character[],
): Character[] {
const localByName = new Map(local.map((c) => [c.name, c]));
return remote.map((c) => {
const prev = localByName.get(c.name);
if (!prev) return c;
return { ...c, voice: c.voice ?? prev.voice };
});
}
// Consecutive silent (no-audio) beats before we surface the BYO-key nudge to a
// non-BYO, unmuted player. Set high enough that one transient miss won't trip
// it, low enough to catch a scene that's clearly being rate-limited.
const SILENCE_NUDGE_THRESHOLD = 3;
// Mobile-portrait users get a 9:16 scene image painted for them; everyone else
// (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a
// touch device (coarse pointer) held upright counts as "portrait" — a mouse
// device is always landscape. Detected once and locked for the whole session.
function detectOrientation(): Orientation {
if (typeof window === "undefined") return "landscape";
const portrait = window.matchMedia("(orientation: portrait)").matches;
const coarse = window.matchMedia("(pointer: coarse)").matches;
return portrait && coarse ? "portrait" : "landscape";
}
// Runs before the browser paints (so it can correct first-frame state without a
// visible flash), but useLayoutEffect warns when called during SSR. PlayInner
// only ever renders on the client (/play prerenders the Suspense fallback), yet
// fall back to useEffect on the server anyway to keep the warning out.
const useIsomorphicLayoutEffect =
typeof window !== "undefined" ? useLayoutEffect : useEffect;
// Cap how long we wait for the browser to download + decode a scene image
// before giving up and rendering anyway. Runware's CDN is usually <2s for a
// 1792×1024 PNG, but over slow links / VPN / strict corp networks the same
// download can stretch to 10-20s. The previous 8s ceiling fired in that
// window, and because the rendered has no aspect-ratio occupation, the
// layout collapsed to a one-pixel-tall sliver until the bytes actually
// finished arriving — "等了很久 → 一根线 → 突然出图" of the original report.
// 20s + the aspect-video fallback together remove that failure mode.
const IMAGE_PRELOAD_TIMEOUT_MS = 20000;
// ──────────────────────────────────────────────────────────────────────
// Two ways an gets its pixels, picked per-URL by shouldProxy():
//
// 1. DIRECT (default — no proxy configured): preload the URL with an
// Image() + decode() so the HTTP cache is warm and the bitmap decoded
// before React commits, then hand the ORIGINAL URL to . This is the
// long-standing behavior; deployers who set no env var get exactly this
// and are completely unaffected by the proxy machinery below.
//
// 2. PROXY (opt-in — NEXT_PUBLIC_IMAGE_PROXY_URL set, host allow-listed):
// fetch the bytes through the Cloudflare Worker (which adds CORS and
// serves over stable HTTP/2), await the FULL body via .blob(), materialize
// a blob: URL over that local copy, and hand THAT to . The
// never sees a network-backed src, so there's no "字节还在路上" middle
// state and no progressive paint.
// Why it matters: Chrome's direct fetch of im.runware.ai sometimes hits
// ERR_QUIC_PROTOCOL_ERROR mid-stream, leaving partial PNG bytes that
// paint row-by-row. The Worker re-fetches server-to-server (no QUIC
// fragility) and serves over HTTP/2 — atomic and reliable. Trade-off:
// callers MUST revoke the blob URL when swapping it out (revokeBlobUrlFor)
// or the bytes leak in the JS heap.
//
// Data URIs (MOCK_IMAGE mode) are already local; passed through unchanged
// on both paths. blobUrlCache is keyed by the ORIGINAL URL either way.
// ──────────────────────────────────────────────────────────────────────
// Direct-path preload: decode the URL in memory before committing to React
// state, so when the mounts the cache is warm and first paint is
// instant. Errors / timeouts resolve quietly — better a broken than a
// hung play loop. (im.runware.ai sends no CORS header, so we can't fetch()
// its bytes here; warming + decoding is the most the direct path can do.)
function preloadImage(url: string): Promise {
return new Promise((resolve) => {
const img = new Image();
let timer: ReturnType;
// Single exit: clear the timeout and resolve. resolve() is idempotent, so
// whichever path fires first (load+decode, error, timeout) wins.
const done = () => {
clearTimeout(timer);
resolve();
};
// Armed across BOTH network load and decode, so a hung decode still
// resolves quietly — better a broken than a stuck play loop.
timer = setTimeout(done, IMAGE_PRELOAD_TIMEOUT_MS);
img.onload = () => {
// .decode() forces the bitmap to be fully decoded before we proceed —
// without it, a slow decode could still cause a flash on first paint.
img.decode().then(done, done);
};
img.onerror = done;
img.src = url;
});
}
// Opt-in Cloudflare Workers proxy (deploy your own — see the link in README).
// Inlined by Next.js at build time. Empty / unset → no proxy → every URL takes
// the direct path above, exactly as if this feature didn't exist.
const IMAGE_PROXY_BASE = (
process.env.NEXT_PUBLIC_IMAGE_PROXY_URL ?? ""
).replace(/\/$/, "");
// Hostnames eligible for the proxy. Default: Runware's CDN only. Deployers who
// point IMAGE_BASE_URL at another provider can opt that provider's image host
// in via NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS (comma-separated). Inlined at
// build time. Anything not on this list stays on the direct path.
const IMAGE_PROXY_ALLOWED_HOSTS = (
process.env.NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS ?? "im.runware.ai"
)
.split(",")
.map((h) => h.trim().toLowerCase())
.filter(Boolean);
// Route a URL through the proxy only when a proxy is configured AND it's a
// remote http(s) image on an allow-listed host. data: URIs (MOCK_IMAGE) are
// already local; malformed URLs and any other origin fall through to direct.
function shouldProxy(originalUrl: string): boolean {
if (!IMAGE_PROXY_BASE) return false;
if (originalUrl.startsWith("data:")) return false;
try {
const { protocol, hostname } = new URL(originalUrl);
if (protocol !== "https:" && protocol !== "http:") return false;
return IMAGE_PROXY_ALLOWED_HOSTS.includes(hostname.toLowerCase());
} catch {
return false;
}
}
function proxiedImageUrl(originalUrl: string): string {
return `${IMAGE_PROXY_BASE}/?url=${encodeURIComponent(originalUrl)}`;
}
async function fetchImageAsBlobUrl(url: string): Promise {
if (url.startsWith("data:")) return url;
// Direct path (default): warm the cache + decode, hand back the original
// URL. No fetch() — im.runware.ai has no CORS, so fetch().blob() would throw.
if (!shouldProxy(url)) {
await preloadImage(url);
return url;
}
// Proxy path (opt-in): fetch through the Worker and materialize a blob: URL.
// On error / timeout fall back to the original URL so still tries
// (possible progressive paint — same as the direct path, never worse).
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), IMAGE_PRELOAD_TIMEOUT_MS);
try {
const r = await fetch(proxiedImageUrl(url), { signal: ctrl.signal });
if (!r.ok) return url;
const blob = await r.blob();
return URL.createObjectURL(blob);
} catch {
return url;
} finally {
clearTimeout(timer);
}
}
// Module-level cache so speculative prefetches and the eventual commit share
// the same in-flight fetch — no double-download per scene. Keyed by the
// ORIGINAL CDN URL (the blob: URL it resolves to is the value). Persists for
// the page's lifetime; entries are explicitly revoked when the scene swaps.
const blobUrlCache = new Map>();
function getOrCreateBlobUrl(originalUrl: string): Promise {
let p = blobUrlCache.get(originalUrl);
if (!p) {
p = fetchImageAsBlobUrl(originalUrl);
blobUrlCache.set(originalUrl, p);
}
return p;
}
function revokeBlobUrlFor(originalUrl: string): void {
const p = blobUrlCache.get(originalUrl);
if (!p) return;
blobUrlCache.delete(originalUrl);
p.then((u) => {
if (u.startsWith("blob:")) URL.revokeObjectURL(u);
}).catch(() => {});
}
// ──────────────────────────────────────────────────────────────────────
// Prefetch pool — speculative SceneResponses keyed by choice path.
//
// Key format: "C1" → reached by choosing C1 from current scene.
// "C1/C2" → after C1, then C2 (recursive must-pass prefetch).
//
// When the player picks a change-scene choice, we keep that key's
// descendants (re-rooted) and abort the rest.
// ──────────────────────────────────────────────────────────────────────
const PREFETCH_MAX_DEPTH = 3;
type PrefetchEntry = {
promise: Promise;
abort: AbortController;
};
type ScenePathStep = {
fromScene: Scene;
fromVisitedBeats: string[];
exit: { choiceId: string; label: string; nextSceneSeed: string };
};
function buildDialogueHistory(
session: Session | null,
): DialogueHistoryItem[] {
if (!session) return [];
return session.history.flatMap((entry, sceneIndex) => {
const beatsById = new Map(entry.scene.beats.map((b) => [b.id, b]));
const visitedBeatIds = entry.visitedBeatIds;
return visitedBeatIds.flatMap((beatId, beatIndex) => {
const beat = beatsById.get(beatId);
if (!beat) return [];
const nextVisitedBeatId = visitedBeatIds[beatIndex + 1];
const choice =
beat.next.type === "choice"
? beat.next.choices.find((c) => {
if (c.effect.kind === "advance-beat") {
return c.effect.targetBeatId === nextVisitedBeatId;
}
return (
beatIndex === visitedBeatIds.length - 1 &&
entry.exit?.kind === "choice" &&
c.id === entry.exit.choiceId
);
})
: undefined;
const freeformAction =
beatIndex === visitedBeatIds.length - 1 &&
entry.exit?.kind === "freeform"
? entry.exit.action
: undefined;
const body = beat.speaker ? beat.line : beat.narration;
const narration = beat.speaker ? beat.narration : undefined;
if (!body && !narration && !choice && !freeformAction) return [];
return [
{
id: `${sceneIndex}:${beatId}:${beatIndex}`,
sceneIndex: sceneIndex + 1,
speaker: beat.speaker,
body,
narration,
selectedChoice: choice?.label,
freeformAction,
},
];
});
});
}
function pathKey(steps: ScenePathStep[]): string {
return steps.map((s) => s.exit.choiceId).join("/");
}
function buildSpeculativeSession(
base: Session,
steps: ScenePathStep[],
): Session {
// Drop base's current (last) entry and re-add each step's `fromScene` with
// its exit set. Final result has `history.length = base.length - 1 + steps.length`.
const newHistory = [...base.history.slice(0, -1)];
for (const step of steps) {
newHistory.push({
scene: step.fromScene,
visitedBeatIds: step.fromVisitedBeats,
exit: {
kind: "choice",
choiceId: step.exit.choiceId,
label: step.exit.label,
nextSceneSeed: step.exit.nextSceneSeed,
},
});
}
return { ...base, history: newHistory };
}
function findAllChangeSceneChoices(scene: Scene): BeatChoice[] {
const result: BeatChoice[] = [];
const seen = new Set();
for (const b of scene.beats) {
if (b.next.type === "choice") {
for (const c of b.next.choices) {
if (c.effect.kind === "change-scene" && !seen.has(c.id)) {
seen.add(c.id);
result.push(c);
}
}
}
}
return result;
}
function findSoleChangeSceneChoice(scene: Scene): BeatChoice | null {
const all = findAllChangeSceneChoices(scene);
return all.length === 1 ? all[0]! : null;
}
function prefetchScenePath(
pool: Map,
// Resolved-prefetch sink for the gallery export. Every successful resolve
// is recorded here keyed by `${parentSceneId}:${choiceId}` so the gallery
// can let the player click any choice whose alternate the AI already paid
// to generate — even ones that were later abandoned mid-play because the
// player took a different branch. Survives `consumeChoice`'s abort sweep:
// a prefetch that's already resolved when its parent choice is abandoned
// still leaves the result here.
resolvedSink: Map,
baseSession: Session,
steps: ScenePathStep[],
depth: number,
clientTts: boolean,
): void {
if (depth >= PREFETCH_MAX_DEPTH) return;
const key = pathKey(steps);
if (pool.has(key)) return;
const specSession = buildSpeculativeSession(baseSession, steps);
const abort = new AbortController();
const promise = (async () => {
const res = await fetch("/api/scene", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ session: stripVoicesForTransport(specSession), clientTts }),
signal: abort.signal,
});
if (!res.ok) {
const j = (await res.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? res.statusText);
}
const data = (await res.json()) as SceneResponse;
// Record this resolved alternate for the gallery export. Key is
// (parent scene id at the choice point) : (choice id). Includes the
// CDN imageUrl on the Scene so the gallery has everything it needs to
// render without any further info from the engine.
const lastStep = steps[steps.length - 1]!;
resolvedSink.set(`${lastStep.fromScene.id}:${lastStep.exit.choiceId}`, {
...data.scene,
imageUrl: data.imageUrl,
});
// Kick off the blob fetch for this URL so when the player eventually
// picks this choice, transitioning is a no-op cache lookup instead of a
// fresh CDN download. Don't await — let it run in the background; the
// transition path awaits the same cached promise via getOrCreateBlobUrl.
void getOrCreateBlobUrl(data.imageUrl);
// Re-attach locally-held voices the server stripped from known characters.
data.characters = mergeCharactersPreserveVoice(
baseSession.characters,
data.characters,
);
// Recursive: if the resulting scene has exactly one change-scene exit,
// it is a must-pass node — prefetch its child too.
if (depth + 1 < PREFETCH_MAX_DEPTH) {
const sole = findSoleChangeSceneChoice(data.scene);
if (sole && sole.effect.kind === "change-scene") {
const nextStep: ScenePathStep = {
fromScene: data.scene,
fromVisitedBeats: [data.scene.entryBeatId],
exit: {
choiceId: sole.id,
label: sole.label,
nextSceneSeed: sole.effect.nextSceneSeed,
},
};
// Carry forward the registry that the parent prefetch result already
// settled (it may include characters introduced by the intermediate
// scene). Without this, the L2+ prefetch starts from the original
// base.characters and a later transition through this survivor would
// silently drop voices the player has already heard.
const carriedBase: Session = {
...baseSession,
characters: data.characters,
storyState: data.storyState,
};
prefetchScenePath(
pool,
resolvedSink,
carriedBase,
[...steps, nextStep],
depth + 1,
clientTts,
);
}
}
return data;
})();
promise.catch(() => {});
pool.set(key, { promise, abort });
}
function consumeChoice(
pool: Map,
choiceId: string,
): PrefetchEntry | undefined {
const my = pool.get(choiceId);
const survivors = new Map();
for (const [key, entry] of pool) {
if (key === choiceId) continue;
if (key.startsWith(choiceId + "/")) {
survivors.set(key.slice(choiceId.length + 1), entry);
} else {
entry.abort.abort();
}
}
pool.clear();
for (const [k, e] of survivors) pool.set(k, e);
return my;
}
function clearPool(pool: Map): void {
for (const e of pool.values()) e.abort.abort();
pool.clear();
}
// ──────────────────────────────────────────────────────────────────────
// BYO voice resolution (client-direct Xiaomi TTS).
//
// In BYO mode the server skips all TTS (clientTts:true), so the browser must
// obtain each speaker's reference audio itself. `cache` is keyed by character
// NAME and persists for the whole session, so a voice locked in on a
// character's first speaking beat stays identical across every later scene —
// even though /api/scene returns its characters without `.voice`. Storing the
// in-flight Promise (not the resolved value) dedupes the burst of concurrent
// beats by the same speaker into ONE voicedesign call, which matters because
// Xiaomi rate-limits voicedesign hard.
// ──────────────────────────────────────────────────────────────────────
async function resolveByoVoice(
cache: Map>,
cfg: TtsConfig,
speaker: Character,
): Promise {
const cached = cache.get(speaker.name);
if (cached) return cached;
// Prebaked cards ship baked reference audio — reuse it directly (cross-key
// synth with the user's key works), keeping the prebaked voice identical.
if (speaker.voice) {
const ready = Promise.resolve(speaker.voice);
cache.set(speaker.name, ready);
return ready;
}
if (!speaker.voiceDescription) return null;
const p = provisionVoice(cfg, speaker.voiceDescription);
cache.set(speaker.name, p);
try {
return await p;
} catch (e) {
cache.delete(speaker.name); // failed provision — let a later beat retry
throw e;
}
}
// ──────────────────────────────────────────────────────────────────────
// Component
// ──────────────────────────────────────────────────────────────────────
function PlayInner() {
const router = useRouter();
const params = useSearchParams();
const [phase, setPhase] = useState("loading-first");
const [session, setSession] = useState(null);
const [currentScene, setCurrentScene] = useState(null);
const [currentBeatId, setCurrentBeatId] = useState(null);
const [imageUrl, setImageUrl] = useState(null);
const [beatAudioMap, setBeatAudioMap] = useState>({});
// Lazy-initialize 优先级:本局选择(homepage 的「语音配音」存到 sessionStorage:infiplot:custom)
// > 上次会话的粘性偏好(localStorage:infiplot:muted) > 默认非静音。
// 这样首页选了「关闭」开始游戏,进来就是静音;选「开启」就不是静音;进入 play 页后用户自己
// 切换 静音/有声 时再用 localStorage 持久化,下一局开新游戏 sessionStorage 选择会再覆盖。
const [muted, setMuted] = useState(() => {
if (typeof window === "undefined") return false;
try {
const stored = window.sessionStorage.getItem("infiplot:custom");
if (stored) {
const parsed = JSON.parse(stored) as { audioEnabled?: boolean };
if (typeof parsed.audioEnabled === "boolean") {
return !parsed.audioEnabled;
}
}
return window.localStorage.getItem(MUTED_STORAGE_KEY) === "1";
} catch {
return false;
}
});
const [pendingClick, setPendingClick] = useState<{
x: number;
y: number;
} | null>(null);
const [error, setError] = useState(null);
const [presentation, setPresentation] = useState(false);
// Session-locked image orientation (see detectOrientation). "portrait" makes
// the whole play surface render full-bleed vertical on phones.
const [orientation, setOrientation] = useState("landscape");
const [lastExitLabel, setLastExitLabel] = useState(null);
// Consecutive server-side TTS misses (null audio / failed /api/beat-audio).
// Climbs when the shared server key is rate-limited by MiMo — the exact pain
// BYO fixes — so the play page can nudge non-BYO users to add their own key.
// Reset to 0 on any successful synth. Only the server path touches it.
const [silenceStrikes, setSilenceStrikes] = useState(0);
// Once the player dismisses the silence nudge, keep it gone for this session.
const [nudgeDismissed, setNudgeDismissed] = useState(false);
const [settingsOpen, setSettingsOpen] = useState(false);
const [visionClickEnabled, setVisionClickEnabled] = useState(true);
const startedRef = useRef(false);
const poolRef = useRef