5a7daa8452
Signed-off-by: baizhi958216 <1475289190@qq.com>
1616 lines
62 KiB
TypeScript
1616 lines
62 KiB
TypeScript
"use client";
|
||
|
||
import Link from "next/link";
|
||
import { useRouter, useSearchParams } from "next/navigation";
|
||
import {
|
||
Suspense,
|
||
useCallback,
|
||
useEffect,
|
||
useLayoutEffect,
|
||
useMemo,
|
||
useRef,
|
||
useState,
|
||
} from "react";
|
||
import {
|
||
PlayCanvas,
|
||
type Phase,
|
||
} from "@/components/PlayCanvas";
|
||
import type { DialogueHistoryItem } from "@/components/DialogueHistoryModal";
|
||
import { TtsKeyModal } from "@/components/TtsKeyModal";
|
||
import { annotateClick } from "@/lib/annotateClient";
|
||
import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
|
||
import { PRESETS } from "@/lib/presets";
|
||
import { provisionVoice, synthesize } from "@infiplot/tts-client";
|
||
import type {
|
||
Beat,
|
||
BeatChoice,
|
||
Character,
|
||
CharacterVoice,
|
||
InsertBeatResponse,
|
||
Orientation,
|
||
Scene,
|
||
SceneExit,
|
||
SceneResponse,
|
||
Session,
|
||
StartResponse,
|
||
TtsConfig,
|
||
VisionResponse,
|
||
} from "@infiplot/types";
|
||
import { track } from "@/lib/analytics";
|
||
|
||
const MUTED_STORAGE_KEY = "infiplot:muted";
|
||
|
||
// ── FOT reduction helpers ──────────────────────────────────────────────
|
||
// Strip bulky voice.referenceAudioBase64 from the session before sending it to
|
||
// the server. The engine only needs character names + visualDescriptions for
|
||
// scene generation; voice data is only used by /api/beat-audio (which receives
|
||
// the voice directly, not via session). The client retains voices locally and
|
||
// re-merges them from the response via mergeCharactersPreserveVoice.
|
||
function stripVoicesForTransport(session: Session): Session {
|
||
return {
|
||
...session,
|
||
characters: session.characters.map((c) => ({ ...c, voice: undefined })),
|
||
};
|
||
}
|
||
|
||
// Merge server-returned characters with locally-held voices. The server strips
|
||
// voice from already-known characters (P0), so only NEW characters carry voice.
|
||
// For existing characters, re-attach the voice the client already holds.
|
||
function mergeCharactersPreserveVoice(
|
||
local: Character[],
|
||
remote: Character[],
|
||
): Character[] {
|
||
const localByName = new Map(local.map((c) => [c.name, c]));
|
||
return remote.map((c) => {
|
||
const prev = localByName.get(c.name);
|
||
if (!prev) return c;
|
||
return { ...c, voice: c.voice ?? prev.voice };
|
||
});
|
||
}
|
||
|
||
// Consecutive silent (no-audio) beats before we surface the BYO-key nudge to a
|
||
// non-BYO, unmuted player. Set high enough that one transient miss won't trip
|
||
// it, low enough to catch a scene that's clearly being rate-limited.
|
||
const SILENCE_NUDGE_THRESHOLD = 3;
|
||
|
||
// Mobile-portrait users get a 9:16 scene image painted for them; everyone else
|
||
// (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a
|
||
// touch device (coarse pointer) held upright counts as "portrait" — a mouse
|
||
// device is always landscape. Detected once and locked for the whole session.
|
||
function detectOrientation(): Orientation {
|
||
if (typeof window === "undefined") return "landscape";
|
||
const portrait = window.matchMedia("(orientation: portrait)").matches;
|
||
const coarse = window.matchMedia("(pointer: coarse)").matches;
|
||
return portrait && coarse ? "portrait" : "landscape";
|
||
}
|
||
|
||
// Runs before the browser paints (so it can correct first-frame state without a
|
||
// visible flash), but useLayoutEffect warns when called during SSR. PlayInner
|
||
// only ever renders on the client (/play prerenders the Suspense fallback), yet
|
||
// fall back to useEffect on the server anyway to keep the warning out.
|
||
const useIsomorphicLayoutEffect =
|
||
typeof window !== "undefined" ? useLayoutEffect : useEffect;
|
||
|
||
// Cap how long we wait for the browser to download + decode a scene image
|
||
// before giving up and rendering anyway. Runware's CDN is usually <2s for a
|
||
// 1792×1024 PNG, but over slow links / VPN / strict corp networks the same
|
||
// download can stretch to 10-20s. The previous 8s ceiling fired in that
|
||
// window, and because the rendered <img> has no aspect-ratio occupation, the
|
||
// layout collapsed to a one-pixel-tall sliver until the bytes actually
|
||
// finished arriving — "等了很久 → 一根线 → 突然出图" of the original report.
|
||
// 20s + the <img> aspect-video fallback together remove that failure mode.
|
||
const IMAGE_PRELOAD_TIMEOUT_MS = 20000;
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// Two ways an <img> gets its pixels, picked per-URL by shouldProxy():
|
||
//
|
||
// 1. DIRECT (default — no proxy configured): preload the URL with an
|
||
// Image() + decode() so the HTTP cache is warm and the bitmap decoded
|
||
// before React commits, then hand the ORIGINAL URL to <img>. This is the
|
||
// long-standing behavior; deployers who set no env var get exactly this
|
||
// and are completely unaffected by the proxy machinery below.
|
||
//
|
||
// 2. PROXY (opt-in — NEXT_PUBLIC_IMAGE_PROXY_URL set, host allow-listed):
|
||
// fetch the bytes through the Cloudflare Worker (which adds CORS and
|
||
// serves over stable HTTP/2), await the FULL body via .blob(), materialize
|
||
// a blob: URL over that local copy, and hand THAT to <img>. The <img>
|
||
// never sees a network-backed src, so there's no "字节还在路上" middle
|
||
// state and no progressive paint.
|
||
// Why it matters: Chrome's direct fetch of im.runware.ai sometimes hits
|
||
// ERR_QUIC_PROTOCOL_ERROR mid-stream, leaving partial PNG bytes that
|
||
// paint row-by-row. The Worker re-fetches server-to-server (no QUIC
|
||
// fragility) and serves over HTTP/2 — atomic and reliable. Trade-off:
|
||
// callers MUST revoke the blob URL when swapping it out (revokeBlobUrlFor)
|
||
// or the bytes leak in the JS heap.
|
||
//
|
||
// Data URIs (MOCK_IMAGE mode) are already local; passed through unchanged
|
||
// on both paths. blobUrlCache is keyed by the ORIGINAL URL either way.
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
|
||
// Direct-path preload: decode the URL in memory before committing to React
|
||
// state, so when the <img> mounts the cache is warm and first paint is
|
||
// instant. Errors / timeouts resolve quietly — better a broken <img> than a
|
||
// hung play loop. (im.runware.ai sends no CORS header, so we can't fetch()
|
||
// its bytes here; warming + decoding is the most the direct path can do.)
|
||
function preloadImage(url: string): Promise<void> {
|
||
return new Promise<void>((resolve) => {
|
||
const img = new Image();
|
||
let timer: ReturnType<typeof setTimeout>;
|
||
// Single exit: clear the timeout and resolve. resolve() is idempotent, so
|
||
// whichever path fires first (load+decode, error, timeout) wins.
|
||
const done = () => {
|
||
clearTimeout(timer);
|
||
resolve();
|
||
};
|
||
// Armed across BOTH network load and decode, so a hung decode still
|
||
// resolves quietly — better a broken <img> than a stuck play loop.
|
||
timer = setTimeout(done, IMAGE_PRELOAD_TIMEOUT_MS);
|
||
img.onload = () => {
|
||
// .decode() forces the bitmap to be fully decoded before we proceed —
|
||
// without it, a slow decode could still cause a flash on first paint.
|
||
img.decode().then(done, done);
|
||
};
|
||
img.onerror = done;
|
||
img.src = url;
|
||
});
|
||
}
|
||
|
||
// Opt-in Cloudflare Workers proxy (deploy your own — see the link in README).
|
||
// Inlined by Next.js at build time. Empty / unset → no proxy → every URL takes
|
||
// the direct path above, exactly as if this feature didn't exist.
|
||
const IMAGE_PROXY_BASE = (
|
||
process.env.NEXT_PUBLIC_IMAGE_PROXY_URL ?? ""
|
||
).replace(/\/$/, "");
|
||
|
||
// Hostnames eligible for the proxy. Default: Runware's CDN only. Deployers who
|
||
// point IMAGE_BASE_URL at another provider can opt that provider's image host
|
||
// in via NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS (comma-separated). Inlined at
|
||
// build time. Anything not on this list stays on the direct path.
|
||
const IMAGE_PROXY_ALLOWED_HOSTS = (
|
||
process.env.NEXT_PUBLIC_IMAGE_PROXY_ALLOWED_HOSTS ?? "im.runware.ai"
|
||
)
|
||
.split(",")
|
||
.map((h) => h.trim().toLowerCase())
|
||
.filter(Boolean);
|
||
|
||
// Route a URL through the proxy only when a proxy is configured AND it's a
|
||
// remote http(s) image on an allow-listed host. data: URIs (MOCK_IMAGE) are
|
||
// already local; malformed URLs and any other origin fall through to direct.
|
||
function shouldProxy(originalUrl: string): boolean {
|
||
if (!IMAGE_PROXY_BASE) return false;
|
||
if (originalUrl.startsWith("data:")) return false;
|
||
try {
|
||
const { protocol, hostname } = new URL(originalUrl);
|
||
if (protocol !== "https:" && protocol !== "http:") return false;
|
||
return IMAGE_PROXY_ALLOWED_HOSTS.includes(hostname.toLowerCase());
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
function proxiedImageUrl(originalUrl: string): string {
|
||
return `${IMAGE_PROXY_BASE}/?url=${encodeURIComponent(originalUrl)}`;
|
||
}
|
||
|
||
async function fetchImageAsBlobUrl(url: string): Promise<string> {
|
||
if (url.startsWith("data:")) return url;
|
||
|
||
// Direct path (default): warm the cache + decode, hand back the original
|
||
// URL. No fetch() — im.runware.ai has no CORS, so fetch().blob() would throw.
|
||
if (!shouldProxy(url)) {
|
||
await preloadImage(url);
|
||
return url;
|
||
}
|
||
|
||
// Proxy path (opt-in): fetch through the Worker and materialize a blob: URL.
|
||
// On error / timeout fall back to the original URL so <img> still tries
|
||
// (possible progressive paint — same as the direct path, never worse).
|
||
const ctrl = new AbortController();
|
||
const timer = setTimeout(() => ctrl.abort(), IMAGE_PRELOAD_TIMEOUT_MS);
|
||
try {
|
||
const r = await fetch(proxiedImageUrl(url), { signal: ctrl.signal });
|
||
if (!r.ok) return url;
|
||
const blob = await r.blob();
|
||
return URL.createObjectURL(blob);
|
||
} catch {
|
||
return url;
|
||
} finally {
|
||
clearTimeout(timer);
|
||
}
|
||
}
|
||
|
||
// Module-level cache so speculative prefetches and the eventual commit share
|
||
// the same in-flight fetch — no double-download per scene. Keyed by the
|
||
// ORIGINAL CDN URL (the blob: URL it resolves to is the value). Persists for
|
||
// the page's lifetime; entries are explicitly revoked when the scene swaps.
|
||
const blobUrlCache = new Map<string, Promise<string>>();
|
||
|
||
function getOrCreateBlobUrl(originalUrl: string): Promise<string> {
|
||
let p = blobUrlCache.get(originalUrl);
|
||
if (!p) {
|
||
p = fetchImageAsBlobUrl(originalUrl);
|
||
blobUrlCache.set(originalUrl, p);
|
||
}
|
||
return p;
|
||
}
|
||
|
||
function revokeBlobUrlFor(originalUrl: string): void {
|
||
const p = blobUrlCache.get(originalUrl);
|
||
if (!p) return;
|
||
blobUrlCache.delete(originalUrl);
|
||
p.then((u) => {
|
||
if (u.startsWith("blob:")) URL.revokeObjectURL(u);
|
||
}).catch(() => {});
|
||
}
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// Prefetch pool — speculative SceneResponses keyed by choice path.
|
||
//
|
||
// Key format: "C1" → reached by choosing C1 from current scene.
|
||
// "C1/C2" → after C1, then C2 (recursive must-pass prefetch).
|
||
//
|
||
// When the player picks a change-scene choice, we keep that key's
|
||
// descendants (re-rooted) and abort the rest.
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
|
||
const PREFETCH_MAX_DEPTH = 3;
|
||
|
||
type PrefetchEntry = {
|
||
promise: Promise<SceneResponse>;
|
||
abort: AbortController;
|
||
};
|
||
|
||
type ScenePathStep = {
|
||
fromScene: Scene;
|
||
fromVisitedBeats: string[];
|
||
exit: { choiceId: string; label: string; nextSceneSeed: string };
|
||
};
|
||
|
||
function buildDialogueHistory(
|
||
session: Session | null,
|
||
currentSceneId: string | undefined,
|
||
currentVisitedBeatIds: string[],
|
||
): DialogueHistoryItem[] {
|
||
if (!session) return [];
|
||
|
||
return session.history.flatMap((entry, sceneIndex) => {
|
||
const beatsById = new Map(entry.scene.beats.map((b) => [b.id, b]));
|
||
const visitedBeatIds =
|
||
entry.scene.id === currentSceneId
|
||
? currentVisitedBeatIds
|
||
: entry.visitedBeatIds;
|
||
|
||
return visitedBeatIds.flatMap((beatId, beatIndex) => {
|
||
const beat = beatsById.get(beatId);
|
||
if (!beat) return [];
|
||
|
||
const nextVisitedBeatId = visitedBeatIds[beatIndex + 1];
|
||
const choice =
|
||
beat.next.type === "choice"
|
||
? beat.next.choices.find((c) => {
|
||
if (c.effect.kind === "advance-beat") {
|
||
return c.effect.targetBeatId === nextVisitedBeatId;
|
||
}
|
||
return (
|
||
beatIndex === visitedBeatIds.length - 1 &&
|
||
entry.exit?.kind === "choice" &&
|
||
c.id === entry.exit.choiceId
|
||
);
|
||
})
|
||
: undefined;
|
||
const freeformAction =
|
||
beatIndex === visitedBeatIds.length - 1 &&
|
||
entry.exit?.kind === "freeform"
|
||
? entry.exit.action
|
||
: undefined;
|
||
|
||
const body = beat.speaker ? beat.line : beat.narration;
|
||
const narration = beat.speaker ? beat.narration : undefined;
|
||
if (!body && !narration && !choice && !freeformAction) return [];
|
||
|
||
return [
|
||
{
|
||
id: `${sceneIndex}:${beatId}:${beatIndex}`,
|
||
sceneIndex: sceneIndex + 1,
|
||
speaker: beat.speaker,
|
||
body,
|
||
narration,
|
||
selectedChoice: choice?.label,
|
||
freeformAction,
|
||
},
|
||
];
|
||
});
|
||
});
|
||
}
|
||
|
||
function pathKey(steps: ScenePathStep[]): string {
|
||
return steps.map((s) => s.exit.choiceId).join("/");
|
||
}
|
||
|
||
function buildSpeculativeSession(
|
||
base: Session,
|
||
steps: ScenePathStep[],
|
||
): Session {
|
||
// Drop base's current (last) entry and re-add each step's `fromScene` with
|
||
// its exit set. Final result has `history.length = base.length - 1 + steps.length`.
|
||
const newHistory = [...base.history.slice(0, -1)];
|
||
for (const step of steps) {
|
||
newHistory.push({
|
||
scene: step.fromScene,
|
||
visitedBeatIds: step.fromVisitedBeats,
|
||
exit: {
|
||
kind: "choice",
|
||
choiceId: step.exit.choiceId,
|
||
label: step.exit.label,
|
||
nextSceneSeed: step.exit.nextSceneSeed,
|
||
},
|
||
});
|
||
}
|
||
return { ...base, history: newHistory };
|
||
}
|
||
|
||
function findAllChangeSceneChoices(scene: Scene): BeatChoice[] {
|
||
const result: BeatChoice[] = [];
|
||
const seen = new Set<string>();
|
||
for (const b of scene.beats) {
|
||
if (b.next.type === "choice") {
|
||
for (const c of b.next.choices) {
|
||
if (c.effect.kind === "change-scene" && !seen.has(c.id)) {
|
||
seen.add(c.id);
|
||
result.push(c);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function findSoleChangeSceneChoice(scene: Scene): BeatChoice | null {
|
||
const all = findAllChangeSceneChoices(scene);
|
||
return all.length === 1 ? all[0]! : null;
|
||
}
|
||
|
||
function prefetchScenePath(
|
||
pool: Map<string, PrefetchEntry>,
|
||
baseSession: Session,
|
||
steps: ScenePathStep[],
|
||
depth: number,
|
||
clientTts: boolean,
|
||
): void {
|
||
if (depth >= PREFETCH_MAX_DEPTH) return;
|
||
const key = pathKey(steps);
|
||
if (pool.has(key)) return;
|
||
|
||
const specSession = buildSpeculativeSession(baseSession, steps);
|
||
const abort = new AbortController();
|
||
const promise = (async () => {
|
||
const res = await fetch("/api/scene", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({ session: stripVoicesForTransport(specSession), clientTts }),
|
||
signal: abort.signal,
|
||
});
|
||
if (!res.ok) {
|
||
const j = (await res.json().catch(() => ({}))) as { error?: string };
|
||
throw new Error(j.error ?? res.statusText);
|
||
}
|
||
const data = (await res.json()) as SceneResponse;
|
||
|
||
// Kick off the blob fetch for this URL so when the player eventually
|
||
// picks this choice, transitioning is a no-op cache lookup instead of a
|
||
// fresh CDN download. Don't await — let it run in the background; the
|
||
// transition path awaits the same cached promise via getOrCreateBlobUrl.
|
||
void getOrCreateBlobUrl(data.imageUrl);
|
||
|
||
// Re-attach locally-held voices the server stripped from known characters.
|
||
data.characters = mergeCharactersPreserveVoice(
|
||
baseSession.characters,
|
||
data.characters,
|
||
);
|
||
|
||
// Recursive: if the resulting scene has exactly one change-scene exit,
|
||
// it is a must-pass node — prefetch its child too.
|
||
if (depth + 1 < PREFETCH_MAX_DEPTH) {
|
||
const sole = findSoleChangeSceneChoice(data.scene);
|
||
if (sole && sole.effect.kind === "change-scene") {
|
||
const nextStep: ScenePathStep = {
|
||
fromScene: data.scene,
|
||
fromVisitedBeats: [data.scene.entryBeatId],
|
||
exit: {
|
||
choiceId: sole.id,
|
||
label: sole.label,
|
||
nextSceneSeed: sole.effect.nextSceneSeed,
|
||
},
|
||
};
|
||
// Carry forward the registry that the parent prefetch result already
|
||
// settled (it may include characters introduced by the intermediate
|
||
// scene). Without this, the L2+ prefetch starts from the original
|
||
// base.characters and a later transition through this survivor would
|
||
// silently drop voices the player has already heard.
|
||
const carriedBase: Session = {
|
||
...baseSession,
|
||
characters: data.characters,
|
||
storyState: data.storyState,
|
||
};
|
||
prefetchScenePath(
|
||
pool,
|
||
carriedBase,
|
||
[...steps, nextStep],
|
||
depth + 1,
|
||
clientTts,
|
||
);
|
||
}
|
||
}
|
||
|
||
return data;
|
||
})();
|
||
|
||
promise.catch(() => {});
|
||
pool.set(key, { promise, abort });
|
||
}
|
||
|
||
function consumeChoice(
|
||
pool: Map<string, PrefetchEntry>,
|
||
choiceId: string,
|
||
): PrefetchEntry | undefined {
|
||
const my = pool.get(choiceId);
|
||
const survivors = new Map<string, PrefetchEntry>();
|
||
for (const [key, entry] of pool) {
|
||
if (key === choiceId) continue;
|
||
if (key.startsWith(choiceId + "/")) {
|
||
survivors.set(key.slice(choiceId.length + 1), entry);
|
||
} else {
|
||
entry.abort.abort();
|
||
}
|
||
}
|
||
pool.clear();
|
||
for (const [k, e] of survivors) pool.set(k, e);
|
||
return my;
|
||
}
|
||
|
||
function clearPool(pool: Map<string, PrefetchEntry>): void {
|
||
for (const e of pool.values()) e.abort.abort();
|
||
pool.clear();
|
||
}
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// BYO voice resolution (client-direct Xiaomi TTS).
|
||
//
|
||
// In BYO mode the server skips all TTS (clientTts:true), so the browser must
|
||
// obtain each speaker's reference audio itself. `cache` is keyed by character
|
||
// NAME and persists for the whole session, so a voice locked in on a
|
||
// character's first speaking beat stays identical across every later scene —
|
||
// even though /api/scene returns its characters without `.voice`. Storing the
|
||
// in-flight Promise (not the resolved value) dedupes the burst of concurrent
|
||
// beats by the same speaker into ONE voicedesign call, which matters because
|
||
// Xiaomi rate-limits voicedesign hard.
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
|
||
async function resolveByoVoice(
|
||
cache: Map<string, Promise<CharacterVoice>>,
|
||
cfg: TtsConfig,
|
||
speaker: Character,
|
||
): Promise<CharacterVoice | null> {
|
||
const cached = cache.get(speaker.name);
|
||
if (cached) return cached;
|
||
// Prebaked cards ship baked reference audio — reuse it directly (cross-key
|
||
// synth with the user's key works), keeping the prebaked voice identical.
|
||
if (speaker.voice) {
|
||
const ready = Promise.resolve(speaker.voice);
|
||
cache.set(speaker.name, ready);
|
||
return ready;
|
||
}
|
||
if (!speaker.voiceDescription) return null;
|
||
const p = provisionVoice(cfg, speaker.voiceDescription);
|
||
cache.set(speaker.name, p);
|
||
try {
|
||
return await p;
|
||
} catch (e) {
|
||
cache.delete(speaker.name); // failed provision — let a later beat retry
|
||
throw e;
|
||
}
|
||
}
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// Component
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
|
||
function PlayInner() {
|
||
const router = useRouter();
|
||
const params = useSearchParams();
|
||
|
||
const [phase, setPhase] = useState<Phase>("loading-first");
|
||
const [session, setSession] = useState<Session | null>(null);
|
||
const [currentScene, setCurrentScene] = useState<Scene | null>(null);
|
||
const [currentBeatId, setCurrentBeatId] = useState<string | null>(null);
|
||
const [imageUrl, setImageUrl] = useState<string | null>(null);
|
||
const [beatAudioMap, setBeatAudioMap] = useState<Record<string, string>>({});
|
||
// Lazy-initialize 优先级:本局选择(homepage 的「语音配音」存到 sessionStorage:infiplot:custom)
|
||
// > 上次会话的粘性偏好(localStorage:infiplot:muted) > 默认非静音。
|
||
// 这样首页选了「关闭」开始游戏,进来就是静音;选「开启」就不是静音;进入 play 页后用户自己
|
||
// 切换 静音/有声 时再用 localStorage 持久化,下一局开新游戏 sessionStorage 选择会再覆盖。
|
||
const [muted, setMuted] = useState<boolean>(() => {
|
||
if (typeof window === "undefined") return false;
|
||
try {
|
||
const stored = window.sessionStorage.getItem("infiplot:custom");
|
||
if (stored) {
|
||
const parsed = JSON.parse(stored) as { audioEnabled?: boolean };
|
||
if (typeof parsed.audioEnabled === "boolean") {
|
||
return !parsed.audioEnabled;
|
||
}
|
||
}
|
||
return window.localStorage.getItem(MUTED_STORAGE_KEY) === "1";
|
||
} catch {
|
||
return false;
|
||
}
|
||
});
|
||
const [pendingClick, setPendingClick] = useState<{
|
||
x: number;
|
||
y: number;
|
||
} | null>(null);
|
||
const [error, setError] = useState<string | null>(null);
|
||
const [presentation, setPresentation] = useState(false);
|
||
// Session-locked image orientation (see detectOrientation). "portrait" makes
|
||
// the whole play surface render full-bleed vertical on phones.
|
||
const [orientation, setOrientation] = useState<Orientation>("landscape");
|
||
const [lastExitLabel, setLastExitLabel] = useState<string | null>(null);
|
||
// Consecutive server-side TTS misses (null audio / failed /api/beat-audio).
|
||
// Climbs when the shared server key is rate-limited by MiMo — the exact pain
|
||
// BYO fixes — so the play page can nudge non-BYO users to add their own key.
|
||
// Reset to 0 on any successful synth. Only the server path touches it.
|
||
const [silenceStrikes, setSilenceStrikes] = useState(0);
|
||
// Once the player dismisses the silence nudge, keep it gone for this session.
|
||
const [nudgeDismissed, setNudgeDismissed] = useState(false);
|
||
// The in-place BYO-key modal, opened from the silence nudge so the player can
|
||
// add a key without leaving the play page.
|
||
const [ttsModalOpen, setTtsModalOpen] = useState(false);
|
||
|
||
const startedRef = useRef(false);
|
||
const poolRef = useRef<Map<string, PrefetchEntry>>(new Map());
|
||
// Lazy per-beat audio fetches keyed by beat.id. Aborted when the scene
|
||
// changes so stale in-flight requests can't poison the new scene's map
|
||
// (beat ids like "b1" are scene-local and would collide across scenes).
|
||
const beatAudioAbortRef = useRef<Map<string, AbortController>>(new Map());
|
||
// Mirrors `muted` so the closure-stable fetchBeatAudio (deps []) can gate on
|
||
// it. Muting stops TTS *synthesis*, not just playback — TTS is the only sound
|
||
// source, so synthesizing audio the user can't hear just burns quota.
|
||
// 首页「语音配音 关闭」会把 muted 初值置为 true(见上方 useState 初始化),
|
||
// 不再单独维护 audioEnabledRef —— 单一来源避免两个 flag 漂移。
|
||
const mutedRef = useRef<boolean>(muted);
|
||
|
||
// Resolved bring-your-own Xiaomi TTS config (region preset + key), read once
|
||
// from localStorage. When non-null, the browser provisions + synths voices
|
||
// directly against Xiaomi — the key never touches our server — and every
|
||
// start/scene/insert-beat request carries clientTts:true so the engine skips
|
||
// server-side TTS. null = user hasn't opted in (server default / silent).
|
||
const [byoTtsConfig, setByoTtsConfig] = useState<TtsConfig | null>(() =>
|
||
loadClientTtsConfig(),
|
||
);
|
||
const byoTtsRef = useRef<TtsConfig | null>(byoTtsConfig);
|
||
// BYO voice cache (see resolveByoVoice). Keyed by character name; persists
|
||
// across scenes so each speaker is provisioned at most once per session.
|
||
const provisionedVoicesRef = useRef<Map<string, Promise<CharacterVoice>>>(
|
||
new Map(),
|
||
);
|
||
|
||
// Mirrors for use inside async handlers (closure-stable)
|
||
const sessionRef = useRef<Session | null>(null);
|
||
const currentSceneRef = useRef<Scene | null>(null);
|
||
const currentBeatRef = useRef<Beat | null>(null);
|
||
const visitedBeatsRef = useRef<string[]>([]);
|
||
// Original (CDN) URL of the currently-rendered scene image. Used as the key
|
||
// to revoke its blob: URL when the scene swaps. We track the ORIGINAL URL,
|
||
// not the blob URL, because blobUrlCache is keyed by original URL.
|
||
const lastImageOriginalUrlRef = useRef<string | null>(null);
|
||
|
||
const currentBeat = useMemo<Beat | null>(() => {
|
||
if (!currentScene || !currentBeatId) return null;
|
||
return currentScene.beats.find((b) => b.id === currentBeatId) ?? null;
|
||
}, [currentScene, currentBeatId]);
|
||
|
||
const dialogueHistory = useMemo<DialogueHistoryItem[]>(
|
||
() =>
|
||
buildDialogueHistory(
|
||
session,
|
||
currentScene?.id,
|
||
visitedBeatsRef.current,
|
||
),
|
||
[session, currentScene?.id, currentBeatId],
|
||
);
|
||
|
||
const audioSrc = (currentBeat ? beatAudioMap[currentBeat.id] : undefined) ?? null;
|
||
|
||
useEffect(() => {
|
||
sessionRef.current = session;
|
||
}, [session]);
|
||
useEffect(() => {
|
||
currentSceneRef.current = currentScene;
|
||
}, [currentScene]);
|
||
useEffect(() => {
|
||
currentBeatRef.current = currentBeat;
|
||
}, [currentBeat]);
|
||
useEffect(() => {
|
||
mutedRef.current = muted;
|
||
}, [muted]);
|
||
|
||
// Coarse liveness ping for active-time analytics. /play is a single SPA
|
||
// route, so page views alone read as ~0 duration; a 30s heartbeat (only
|
||
// while the tab is visible) gives Umami the timestamps to derive real
|
||
// engaged time. Content-free — no payload. The interval is never even
|
||
// scheduled unless the tracker is configured, so it's zero work when off.
|
||
useEffect(() => {
|
||
if (!process.env.NEXT_PUBLIC_UMAMI_SRC || !process.env.NEXT_PUBLIC_UMAMI_WEBSITE_ID) {
|
||
return;
|
||
}
|
||
const id = window.setInterval(() => {
|
||
if (document.visibilityState === "visible") track("play_heartbeat");
|
||
}, 30_000);
|
||
return () => window.clearInterval(id);
|
||
}, []);
|
||
|
||
// Whenever currentBeatId changes, append it to visited (skip consecutive dups)
|
||
useEffect(() => {
|
||
if (!currentBeatId) return;
|
||
if (visitedBeatsRef.current.at(-1) === currentBeatId) return;
|
||
visitedBeatsRef.current = [...visitedBeatsRef.current, currentBeatId];
|
||
setSession((s) => {
|
||
if (!s) return s;
|
||
return {
|
||
...s,
|
||
history: s.history.map((h, i, arr) =>
|
||
i === arr.length - 1
|
||
? { ...h, visitedBeatIds: [...visitedBeatsRef.current] }
|
||
: h,
|
||
),
|
||
};
|
||
});
|
||
}, [currentBeatId]);
|
||
|
||
// ── Lazy per-beat audio fetch ────────────────────────────────────────
|
||
// Returns silently on any failure — the UI never waits for audio, so a
|
||
// null result just means that beat plays without voice.
|
||
// Sends only the speaker's voice + the line to speak — NOT the whole
|
||
// session — so the per-beat payload stays small even with many characters
|
||
// (each voice.referenceAudioBase64 is ~160KB).
|
||
const fetchBeatAudio = useCallback(
|
||
async (
|
||
sess: Session,
|
||
beat: { id: string; speaker?: string; line?: string; lineDelivery?: string },
|
||
): Promise<void> => {
|
||
if (mutedRef.current) return; // 静音 → 不合成 TTS(避免无谓的调用与花费)。
|
||
// 「首页选关闭」也走这条路:bootstrap 时 muted 已被初始化为 true。
|
||
if (!beat.speaker || !beat.line) return;
|
||
const speaker = sess.characters.find((c) => c.name === beat.speaker);
|
||
if (!speaker) return;
|
||
|
||
const byo = byoTtsRef.current;
|
||
// Non-BYO relies on the server having provisioned speaker.voice. BYO
|
||
// skipped server TTS, so it needs a baked voice (prebaked card) or a
|
||
// voiceDescription to provision from in the browser.
|
||
if (!byo && !speaker.voice) return;
|
||
if (byo && !speaker.voice && !speaker.voiceDescription) return;
|
||
|
||
if (beatAudioAbortRef.current.has(beat.id)) return;
|
||
const abort = new AbortController();
|
||
beatAudioAbortRef.current.set(beat.id, abort);
|
||
try {
|
||
let audioUrl: string | null = null;
|
||
if (byo) {
|
||
// Client-direct: provision (once per speaker, cached) + synth against
|
||
// Xiaomi with the user's own key — no /api/beat-audio round-trip and
|
||
// the key never touches our server.
|
||
const voice = await resolveByoVoice(
|
||
provisionedVoicesRef.current,
|
||
byo,
|
||
speaker,
|
||
);
|
||
if (!voice || abort.signal.aborted) return;
|
||
const out = await synthesize(
|
||
byo,
|
||
voice,
|
||
beat.line,
|
||
beat.lineDelivery,
|
||
abort.signal,
|
||
);
|
||
audioUrl = `data:${out.mimeType};base64,${out.audioBase64}`;
|
||
} else {
|
||
const res = await fetch("/api/beat-audio", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({
|
||
beat: { id: beat.id, line: beat.line, lineDelivery: beat.lineDelivery },
|
||
voice: speaker.voice,
|
||
}),
|
||
signal: abort.signal,
|
||
});
|
||
if (res.status === 204) {
|
||
setSilenceStrikes((n) => Math.min(n + 1, 99));
|
||
return;
|
||
}
|
||
if (!res.ok) {
|
||
setSilenceStrikes((n) => Math.min(n + 1, 99));
|
||
return;
|
||
}
|
||
const blob = await res.blob();
|
||
audioUrl = URL.createObjectURL(blob);
|
||
setSilenceStrikes(0);
|
||
}
|
||
// Skip the state write if we've been aborted between the await and
|
||
// here — beat ids are scene-local, so a late arrival from a prior
|
||
// scene would otherwise overwrite the current scene's audio under the
|
||
// same id.
|
||
if (audioUrl && !abort.signal.aborted) {
|
||
setBeatAudioMap((m) => ({ ...m, [beat.id]: audioUrl }));
|
||
} else if (audioUrl?.startsWith("blob:")) {
|
||
URL.revokeObjectURL(audioUrl);
|
||
}
|
||
} catch {
|
||
// aborted / network / Xiaomi rate-limit — silent fallback (no audio)
|
||
} finally {
|
||
// Only clear the slot if it's still ours. An aborted prior fetch
|
||
// running its finally late could otherwise delete the controller of a
|
||
// new fetch that took the same beat id, leaving the new one
|
||
// unabortable on the next scene change.
|
||
if (beatAudioAbortRef.current.get(beat.id) === abort) {
|
||
beatAudioAbortRef.current.delete(beat.id);
|
||
}
|
||
}
|
||
},
|
||
[],
|
||
);
|
||
|
||
function cancelBeatAudioFetches(): void {
|
||
for (const c of beatAudioAbortRef.current.values()) c.abort();
|
||
beatAudioAbortRef.current.clear();
|
||
}
|
||
|
||
// Fire one /api/beat-audio request per speaking beat in the current scene.
|
||
// Reads refs (not props) so it stays closure-stable and can be re-run on
|
||
// un-mute as well as on scene change.
|
||
const prefetchSceneAudio = useCallback(() => {
|
||
const scene = currentSceneRef.current;
|
||
const sess = sessionRef.current;
|
||
if (!scene || !sess) return;
|
||
for (const b of scene.beats) {
|
||
if (b.speaker && b.line) void fetchBeatAudio(sess, b);
|
||
}
|
||
}, [fetchBeatAudio]);
|
||
|
||
// (Re)synthesize each time the scene changes. Cancel any in-flight requests
|
||
// from the prior scene first — beat ids are scene-local ("b1" repeats across
|
||
// scenes) so a late arrival would land under the wrong beat otherwise.
|
||
useEffect(() => {
|
||
cancelBeatAudioFetches();
|
||
setBeatAudioMap((prev) => {
|
||
for (const url of Object.values(prev)) {
|
||
if (url.startsWith("blob:")) URL.revokeObjectURL(url);
|
||
}
|
||
return {};
|
||
});
|
||
prefetchSceneAudio();
|
||
}, [currentScene?.id, prefetchSceneAudio]);
|
||
|
||
// ── Mute persistence (read is via the useState lazy initializer above) ─
|
||
const toggleMuted = useCallback(() => {
|
||
track("tts_toggle", { muted: !mutedRef.current });
|
||
setMuted((prev) => {
|
||
const next = !prev;
|
||
try {
|
||
window.localStorage.setItem(MUTED_STORAGE_KEY, next ? "1" : "0");
|
||
} catch {
|
||
// ignore
|
||
}
|
||
return next;
|
||
});
|
||
}, []);
|
||
|
||
// Muting stops synthesis, not just playback: abort in-flight requests when
|
||
// muting. When un-muting, re-synthesize the current scene — fetchBeatAudio
|
||
// skips synthesis while muted, so a scene entered muted has no audio to play
|
||
// back otherwise. (Clearing the map re-synthesizes already-fetched beats on a
|
||
// mid-scene un-mute, but that's bounded to one scene and a rare toggle.)
|
||
//
|
||
// Gate on actual mute *transitions*: on mount this effect would otherwise
|
||
// fire alongside the scene effect above (both call prefetchSceneAudio),
|
||
// doubling the initial /api/beat-audio batch — the first set is dispatched
|
||
// only to be aborted mid-flight, burning TTS quota.
|
||
const prevMutedRef = useRef(muted);
|
||
useEffect(() => {
|
||
const prev = prevMutedRef.current;
|
||
prevMutedRef.current = muted;
|
||
if (prev === muted) return;
|
||
cancelBeatAudioFetches();
|
||
if (muted) return;
|
||
setBeatAudioMap((prev) => {
|
||
for (const url of Object.values(prev)) {
|
||
if (url.startsWith("blob:")) URL.revokeObjectURL(url);
|
||
}
|
||
return {};
|
||
});
|
||
prefetchSceneAudio();
|
||
}, [muted, prefetchSceneAudio]);
|
||
|
||
// ── BYO key enabled/disabled from the play page (silence nudge → modal) ─
|
||
// On enable: point the synth path at the user's key and immediately
|
||
// re-synthesize the current scene in-browser, so the voices the player just
|
||
// missed come back without a reload (their characters already carry
|
||
// server-provisioned `voice`, which resolveByoVoice reuses with the new key).
|
||
// On disable: just stop using it; later scenes fall back to the server.
|
||
const handleByoSaved = useCallback(
|
||
(configured: boolean) => {
|
||
const cfg = configured ? loadClientTtsConfig() : null;
|
||
byoTtsRef.current = cfg;
|
||
setByoTtsConfig(cfg);
|
||
if (cfg) {
|
||
setSilenceStrikes(0);
|
||
cancelBeatAudioFetches();
|
||
setBeatAudioMap((prev) => {
|
||
for (const url of Object.values(prev)) {
|
||
if (url.startsWith("blob:")) URL.revokeObjectURL(url);
|
||
}
|
||
return {};
|
||
});
|
||
prefetchSceneAudio();
|
||
}
|
||
},
|
||
[prefetchSceneAudio],
|
||
);
|
||
|
||
// ── Presentation mode toggle ─────────────────────────────────────────
|
||
const togglePresentation = useCallback(async () => {
|
||
const entering = !presentation;
|
||
track("fullscreen_toggle", { on: entering });
|
||
if (entering) {
|
||
try {
|
||
if (!document.fullscreenElement) {
|
||
await document.documentElement.requestFullscreen();
|
||
}
|
||
} catch {
|
||
// ignore — fall through to chrome-less mode anyway
|
||
}
|
||
setPresentation(true);
|
||
} else {
|
||
try {
|
||
if (document.fullscreenElement) await document.exitFullscreen();
|
||
} catch {
|
||
// ignore
|
||
}
|
||
setPresentation(false);
|
||
}
|
||
}, [presentation]);
|
||
|
||
useEffect(() => {
|
||
function onKey(e: KeyboardEvent) {
|
||
if (e.key === "f" || e.key === "F") {
|
||
if (e.metaKey || e.ctrlKey || e.altKey) return;
|
||
e.preventDefault();
|
||
void togglePresentation();
|
||
} else if (e.key === "Escape" && presentation) {
|
||
setPresentation(false);
|
||
}
|
||
}
|
||
function onFullscreenChange() {
|
||
if (!document.fullscreenElement && presentation) setPresentation(false);
|
||
}
|
||
window.addEventListener("keydown", onKey);
|
||
document.addEventListener("fullscreenchange", onFullscreenChange);
|
||
return () => {
|
||
window.removeEventListener("keydown", onKey);
|
||
document.removeEventListener("fullscreenchange", onFullscreenChange);
|
||
};
|
||
}, [togglePresentation, presentation]);
|
||
|
||
// Lock the visible orientation BEFORE the first paint, so portrait phones
|
||
// never flash the landscape loading chrome. The state inits to "landscape"
|
||
// for SSR-safety; this corrects it pre-paint (no-op re-render on landscape
|
||
// devices). Prebaked cards (decision C) stay landscape-baked regardless of
|
||
// device. The bootstrap effect below re-derives the same value for the
|
||
// /api/start payload.
|
||
useIsomorphicLayoutEffect(() => {
|
||
setOrientation(params.get("card") ? "landscape" : detectOrientation());
|
||
}, [params]);
|
||
|
||
// ── Bootstrap: start session ─────────────────────────────────────────
|
||
useEffect(() => {
|
||
if (startedRef.current) return;
|
||
startedRef.current = true;
|
||
|
||
// 三条进入路径:
|
||
// ?card=<m0..f31> → 首页精选卡,直接从 /home/firstact/{name}.json
|
||
// 静态文件加载(已在构建期 prebake,免一切引擎调用)
|
||
// ?preset=<id> → 内置 PRESETS(仍走 /api/start 现场生成)
|
||
// ?custom=1 → 用户自定义 prompt,sessionStorage 取 ws/sg
|
||
// 后走 /api/start 现场生成
|
||
const cardName = params.get("card");
|
||
const presetId = params.get("preset");
|
||
const isCustom = params.get("custom") === "1";
|
||
|
||
let livePayload: {
|
||
worldSetting: string;
|
||
styleGuide: string;
|
||
styleReferenceImage?: string;
|
||
orientation?: Orientation;
|
||
} | null = null;
|
||
if (!cardName) {
|
||
if (presetId) {
|
||
const p = PRESETS.find((x) => x.id === presetId);
|
||
if (p) livePayload = { worldSetting: p.worldSetting, styleGuide: p.styleGuide };
|
||
} else if (isCustom) {
|
||
const stored = sessionStorage.getItem("infiplot:custom");
|
||
if (stored) {
|
||
try {
|
||
const parsed = JSON.parse(stored) as {
|
||
worldSetting: string;
|
||
styleGuide: string;
|
||
audioEnabled?: boolean;
|
||
styleReferenceImage?: string;
|
||
};
|
||
livePayload = {
|
||
worldSetting: parsed.worldSetting,
|
||
styleGuide: parsed.styleGuide,
|
||
styleReferenceImage: parsed.styleReferenceImage || undefined,
|
||
};
|
||
// audioEnabled 已在 useState 初始化时反向投射到 muted;这里无需再额外存。
|
||
} catch {
|
||
livePayload = null;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Lock orientation for the whole session. Prebaked cards (decision C) are
|
||
// landscape-baked, so they stay landscape regardless of device; only the
|
||
// live /api/start path requests a portrait paint when the phone is upright.
|
||
// The visible state is already set pre-paint by the layout effect above;
|
||
// here we only need the value for the /api/start payload.
|
||
const sessionOrientation: Orientation = cardName
|
||
? "landscape"
|
||
: detectOrientation();
|
||
if (livePayload) livePayload.orientation = sessionOrientation;
|
||
|
||
if (!cardName && !livePayload) {
|
||
router.replace("/");
|
||
return;
|
||
}
|
||
|
||
type PrebakedFirstAct = StartResponse & {
|
||
worldSetting: string;
|
||
styleGuide: string;
|
||
// Live /api/start path tags this on after the response (prebaked card
|
||
// JSONs never have one — they were rendered at build time without any
|
||
// user-uploaded reference). Carried into Session so /api/scene's painter
|
||
// anchors the same style image on every subsequent scene.
|
||
styleReferenceImage?: string;
|
||
cardName?: string;
|
||
cardTitle?: string;
|
||
cardGender?: string;
|
||
};
|
||
|
||
const fetchStart: Promise<PrebakedFirstAct> = cardName
|
||
? fetch(`/home/firstact/${encodeURIComponent(cardName)}.json`).then(
|
||
async (r) => {
|
||
if (!r.ok) throw new Error(`找不到精选剧情:${cardName}`);
|
||
return (await r.json()) as PrebakedFirstAct;
|
||
},
|
||
)
|
||
: fetch("/api/start", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({
|
||
...livePayload,
|
||
clientTts: !!byoTtsRef.current,
|
||
}),
|
||
}).then(async (r) => {
|
||
if (!r.ok) {
|
||
const j = (await r.json().catch(() => ({}))) as { error?: string };
|
||
throw new Error(j.error ?? r.statusText);
|
||
}
|
||
const data = (await r.json()) as StartResponse;
|
||
// Live /api/start doesn't echo ws/sg back — splice in what we sent.
|
||
// styleReferenceImage is similarly not in StartResponse; tag it on so
|
||
// the session we build below carries it for every /api/scene call.
|
||
return {
|
||
...data,
|
||
worldSetting: livePayload!.worldSetting,
|
||
styleGuide: livePayload!.styleGuide,
|
||
styleReferenceImage: livePayload!.styleReferenceImage,
|
||
};
|
||
});
|
||
|
||
fetchStart
|
||
.then(async (data) => {
|
||
// Resolve to a paintable src before committing to state. Proxy path:
|
||
// a fully-local blob: URL the browser paints atomically (no row-by-row
|
||
// "层层加载"). Direct path (default): the preloaded original URL.
|
||
const blobUrl = await getOrCreateBlobUrl(data.imageUrl);
|
||
lastImageOriginalUrlRef.current = data.imageUrl;
|
||
|
||
const initial: Session = {
|
||
id: data.sessionId,
|
||
createdAt: Date.now(),
|
||
worldSetting: data.worldSetting,
|
||
styleGuide: data.styleGuide,
|
||
history: [
|
||
{
|
||
scene: data.scene,
|
||
visitedBeatIds: [data.scene.entryBeatId],
|
||
},
|
||
],
|
||
characters: data.characters,
|
||
storyState: data.storyState,
|
||
styleReferenceImage: data.styleReferenceImage,
|
||
orientation: data.scene.orientation ?? sessionOrientation,
|
||
};
|
||
visitedBeatsRef.current = [data.scene.entryBeatId];
|
||
setSession(initial);
|
||
setCurrentScene(data.scene);
|
||
setCurrentBeatId(data.scene.entryBeatId);
|
||
setImageUrl(blobUrl);
|
||
// beatAudioMap is populated lazily by the per-beat fetch effect once
|
||
// currentScene becomes non-null (see fetchBeatAudio).
|
||
setPhase("ready");
|
||
track("scene_reached", { scene_index: initial.history.length });
|
||
})
|
||
.catch((e) => setError(String(e)));
|
||
}, [params, router]);
|
||
|
||
// ── Prefetch on scene entry: L1 + recursive L2/L3 for must-pass ──────
|
||
useEffect(() => {
|
||
const s = session;
|
||
const scene = currentScene;
|
||
if (!s || !scene) return;
|
||
|
||
const exits = findAllChangeSceneChoices(scene);
|
||
for (const choice of exits) {
|
||
if (choice.effect.kind !== "change-scene") continue;
|
||
const step: ScenePathStep = {
|
||
fromScene: scene,
|
||
// Snapshot of visited beats at prefetch start. Slight drift is OK.
|
||
fromVisitedBeats: [...visitedBeatsRef.current],
|
||
exit: {
|
||
choiceId: choice.id,
|
||
label: choice.label,
|
||
nextSceneSeed: choice.effect.nextSceneSeed,
|
||
},
|
||
};
|
||
prefetchScenePath(poolRef.current, s, [step], 0, !!byoTtsRef.current);
|
||
}
|
||
}, [currentScene?.id, session?.id]);
|
||
|
||
// Abort all in-flight speculative prefetches when the page unmounts, so we
|
||
// stop paying for background scene/image generation. Empty deps → fires only
|
||
// on unmount; it must NOT run on scene transitions, which rely on
|
||
// consumeChoice keeping the re-rooted survivor prefetches alive.
|
||
// Also revoke any surviving blob: URLs so their bytes can be GC'd — the
|
||
// module-level blobUrlCache outlives the component but its entries should
|
||
// not survive the page navigation that unmounts us.
|
||
useEffect(() => {
|
||
const pool = poolRef.current;
|
||
const beatAborts = beatAudioAbortRef.current;
|
||
return () => {
|
||
clearPool(pool);
|
||
for (const c of beatAborts.values()) c.abort();
|
||
beatAborts.clear();
|
||
for (const [originalUrl] of blobUrlCache) {
|
||
revokeBlobUrlFor(originalUrl);
|
||
}
|
||
};
|
||
}, []);
|
||
|
||
// ── Handlers ──────────────────────────────────────────────────────────
|
||
|
||
function onAdvance() {
|
||
if (phase !== "ready") return;
|
||
const beat = currentBeatRef.current;
|
||
if (!beat || beat.next.type !== "continue") return;
|
||
setCurrentBeatId(beat.next.nextBeatId);
|
||
}
|
||
|
||
async function performSceneTransition(
|
||
source: PrefetchEntry | Promise<SceneResponse>,
|
||
exit: SceneExit,
|
||
visitedForCurrent: string[],
|
||
exitLabel: string,
|
||
) {
|
||
setPhase("transitioning");
|
||
setPendingClick(null);
|
||
try {
|
||
const result = await ("promise" in source ? source.promise : source);
|
||
|
||
const base = sessionRef.current;
|
||
if (!base) throw new Error("Session lost mid-transition");
|
||
|
||
// Pull full image bytes into a local blob: URL before committing. For
|
||
// prefetched scenes the speculative getOrCreateBlobUrl in
|
||
// prefetchScenePath already has this in flight (often resolved), so
|
||
// this is a near-instant cache lookup. For cold transitions we eat the
|
||
// CDN download / preload time under the "transitioning" overlay. Proxy
|
||
// path: the <img> then gets a fully-local blob (no progressive paint);
|
||
// direct path (default): the preloaded original URL.
|
||
const blobUrl = await getOrCreateBlobUrl(result.imageUrl);
|
||
// Revoke the previous scene's blob (no longer rendered) to release JS
|
||
// heap. New scene's original URL takes its place as "current".
|
||
const priorOriginal = lastImageOriginalUrlRef.current;
|
||
if (priorOriginal && priorOriginal !== result.imageUrl) {
|
||
revokeBlobUrlFor(priorOriginal);
|
||
}
|
||
lastImageOriginalUrlRef.current = result.imageUrl;
|
||
|
||
const closedHistory = base.history.map((h, i, arr) =>
|
||
i === arr.length - 1
|
||
? { ...h, visitedBeatIds: visitedForCurrent, exit }
|
||
: h,
|
||
);
|
||
const newSession: Session = {
|
||
...base,
|
||
history: [
|
||
...closedHistory,
|
||
{
|
||
scene: result.scene,
|
||
visitedBeatIds: [result.scene.entryBeatId],
|
||
},
|
||
],
|
||
characters: mergeCharactersPreserveVoice(
|
||
base.characters,
|
||
result.characters,
|
||
),
|
||
storyState: result.storyState,
|
||
};
|
||
visitedBeatsRef.current = [result.scene.entryBeatId];
|
||
setSession(newSession);
|
||
setCurrentScene(result.scene);
|
||
setCurrentBeatId(result.scene.entryBeatId);
|
||
setImageUrl(blobUrl);
|
||
// beatAudioMap reset + per-beat fetches kicked off by the scene effect.
|
||
setLastExitLabel(exitLabel);
|
||
setPhase("ready");
|
||
track("scene_reached", { scene_index: newSession.history.length });
|
||
} catch (e) {
|
||
if ((e as { name?: string }).name === "AbortError") {
|
||
setPhase("ready");
|
||
return;
|
||
}
|
||
setError(String(e));
|
||
setPhase("ready");
|
||
}
|
||
}
|
||
|
||
function onSelectChoice(choice: BeatChoice) {
|
||
if (phase !== "ready" || !session || !currentScene) return;
|
||
|
||
const beatNext = currentBeatRef.current?.next;
|
||
const choiceIndex =
|
||
beatNext?.type === "choice"
|
||
? beatNext.choices.findIndex((c) => c.id === choice.id)
|
||
: -1;
|
||
if (choiceIndex >= 0) {
|
||
track("choice_select", {
|
||
scene_index: session.history.length,
|
||
choice_index: choiceIndex,
|
||
kind: choice.effect.kind,
|
||
});
|
||
}
|
||
|
||
if (choice.effect.kind === "advance-beat") {
|
||
// Pure local jump. No network. No pool changes.
|
||
setCurrentBeatId(choice.effect.targetBeatId);
|
||
return;
|
||
}
|
||
|
||
const visited = [...visitedBeatsRef.current];
|
||
const exit: SceneExit = {
|
||
kind: "choice",
|
||
choiceId: choice.id,
|
||
label: choice.label,
|
||
nextSceneSeed: choice.effect.nextSceneSeed,
|
||
};
|
||
|
||
const cached = consumeChoice(poolRef.current, choice.id);
|
||
if (cached) {
|
||
void performSceneTransition(cached, exit, visited, choice.label);
|
||
return;
|
||
}
|
||
|
||
// Cold path — start a fresh fetch
|
||
const step: ScenePathStep = {
|
||
fromScene: currentScene,
|
||
fromVisitedBeats: visited,
|
||
exit: {
|
||
choiceId: choice.id,
|
||
label: choice.label,
|
||
nextSceneSeed: choice.effect.nextSceneSeed,
|
||
},
|
||
};
|
||
const specSession = buildSpeculativeSession(session, [step]);
|
||
clearPool(poolRef.current);
|
||
|
||
const promise = (async () => {
|
||
const res = await fetch("/api/scene", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({
|
||
session: stripVoicesForTransport(specSession),
|
||
clientTts: !!byoTtsRef.current,
|
||
}),
|
||
});
|
||
if (!res.ok) {
|
||
const j = (await res.json().catch(() => ({}))) as { error?: string };
|
||
throw new Error(j.error ?? res.statusText);
|
||
}
|
||
return (await res.json()) as SceneResponse;
|
||
})();
|
||
|
||
void performSceneTransition(promise, exit, visited, choice.label);
|
||
}
|
||
|
||
async function onBackgroundClick(click: { x: number; y: number }) {
|
||
if (phase !== "ready" || !session || !currentScene || !imageUrl) return;
|
||
setPhase("vision-thinking");
|
||
setPendingClick(click);
|
||
|
||
try {
|
||
const annotatedImageBase64 = await annotateClick(imageUrl, click);
|
||
const visionRes = await fetch("/api/vision", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({ session: stripVoicesForTransport(session), annotatedImageBase64 }),
|
||
});
|
||
if (!visionRes.ok) {
|
||
const j = (await visionRes.json().catch(() => ({}))) as {
|
||
error?: string;
|
||
};
|
||
throw new Error(j.error ?? visionRes.statusText);
|
||
}
|
||
const decision = (await visionRes.json()) as VisionResponse;
|
||
track("vision_click", { result: decision.classify });
|
||
|
||
if (decision.classify === "insert-beat") {
|
||
setPhase("inserting-beat");
|
||
const insertRes = await fetch("/api/insert-beat", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({
|
||
session: stripVoicesForTransport(session),
|
||
freeformAction: decision.intent.freeformAction,
|
||
clientTts: !!byoTtsRef.current,
|
||
}),
|
||
});
|
||
if (!insertRes.ok) {
|
||
const j = (await insertRes.json().catch(() => ({}))) as {
|
||
error?: string;
|
||
};
|
||
throw new Error(j.error ?? insertRes.statusText);
|
||
}
|
||
const { partial, characters: insertChars } =
|
||
(await insertRes.json()) as InsertBeatResponse;
|
||
|
||
const fromBeatId =
|
||
currentBeatRef.current?.id ?? currentScene.entryBeatId;
|
||
const newBeatId = `b_ins_${Date.now()}_${Math.random()
|
||
.toString(36)
|
||
.slice(2, 6)}`;
|
||
const newBeat: Beat = {
|
||
id: newBeatId,
|
||
narration: partial.narration,
|
||
speaker: partial.speaker,
|
||
line: partial.line,
|
||
lineDelivery: partial.lineDelivery,
|
||
next: { type: "continue", nextBeatId: fromBeatId },
|
||
};
|
||
|
||
const patched: Scene = {
|
||
...currentScene,
|
||
beats: [...currentScene.beats, newBeat],
|
||
};
|
||
|
||
const nextSession: Session = {
|
||
...session,
|
||
history: session.history.map((h, i, arr) =>
|
||
i === arr.length - 1 ? { ...h, scene: patched } : h,
|
||
),
|
||
characters: mergeCharactersPreserveVoice(
|
||
session.characters,
|
||
insertChars,
|
||
),
|
||
};
|
||
setSession(nextSession);
|
||
setCurrentScene(patched);
|
||
setCurrentBeatId(newBeatId);
|
||
// Insert-beat doesn't change scene.id, so the scene effect won't
|
||
// re-fire — manually kick off the audio fetch for the new beat.
|
||
if (newBeat.speaker && newBeat.line) {
|
||
void fetchBeatAudio(nextSession, {
|
||
id: newBeatId,
|
||
speaker: newBeat.speaker,
|
||
line: newBeat.line,
|
||
lineDelivery: newBeat.lineDelivery,
|
||
});
|
||
}
|
||
setLastExitLabel(decision.intent.freeformAction);
|
||
setPhase("ready");
|
||
setPendingClick(null);
|
||
} else {
|
||
const exit: SceneExit = {
|
||
kind: "freeform",
|
||
action: decision.intent.freeformAction,
|
||
};
|
||
const visited = [...visitedBeatsRef.current];
|
||
const base = sessionRef.current;
|
||
if (!base) {
|
||
setPhase("ready");
|
||
setPendingClick(null);
|
||
return;
|
||
}
|
||
const specSession: Session = {
|
||
...base,
|
||
history: base.history.map((h, i, arr) =>
|
||
i === arr.length - 1 ? { ...h, visitedBeatIds: visited, exit } : h,
|
||
),
|
||
};
|
||
clearPool(poolRef.current);
|
||
|
||
const promise = (async () => {
|
||
const res = await fetch("/api/scene", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({
|
||
session: stripVoicesForTransport(specSession),
|
||
clientTts: !!byoTtsRef.current,
|
||
}),
|
||
});
|
||
if (!res.ok) {
|
||
const j = (await res.json().catch(() => ({}))) as {
|
||
error?: string;
|
||
};
|
||
throw new Error(j.error ?? res.statusText);
|
||
}
|
||
return (await res.json()) as SceneResponse;
|
||
})();
|
||
|
||
await performSceneTransition(
|
||
promise,
|
||
exit,
|
||
visited,
|
||
decision.intent.freeformAction,
|
||
);
|
||
}
|
||
} catch (e) {
|
||
setError(String(e));
|
||
setPendingClick(null);
|
||
setPhase("ready");
|
||
}
|
||
}
|
||
|
||
// ── Render ────────────────────────────────────────────────────────────
|
||
|
||
if (error) {
|
||
return (
|
||
<div className="min-h-screen flex flex-col items-center justify-center px-8">
|
||
<div className="max-w-md text-center animate-fade-in">
|
||
<p className="text-[10px] smallcaps text-clay-500 mb-6">
|
||
出 · 了 · 点 · 状 · 况
|
||
</p>
|
||
<p className="font-serif italic text-clay-900 text-lg leading-[1.7] mb-6">
|
||
{error}
|
||
</p>
|
||
<Link
|
||
href="/"
|
||
className="mt-4 text-[10px] smallcaps text-clay-700 hover:text-ember-500 transition-colors inline-flex items-center gap-3"
|
||
>
|
||
<i className="fa-solid fa-arrow-left text-[9px]" />
|
||
返 回
|
||
</Link>
|
||
</div>
|
||
</div>
|
||
);
|
||
}
|
||
|
||
// Mobile portrait renders full-bleed by default — it sidesteps the iOS
|
||
// Safari Fullscreen API (unsupported on iPhone) with a CSS full-viewport
|
||
// layout instead. Desktop "presentation" mode shares the same immersive
|
||
// canvas, toggled via the F key.
|
||
const immersive = presentation || orientation === "portrait";
|
||
|
||
if (immersive) {
|
||
return (
|
||
<div className="fixed inset-0 bg-black flex items-center justify-center z-50">
|
||
<PlayCanvas
|
||
imageUrl={imageUrl}
|
||
audioSrc={audioSrc}
|
||
muted={muted}
|
||
phase={phase}
|
||
beat={currentBeat}
|
||
pendingClick={pendingClick}
|
||
onBackgroundClick={onBackgroundClick}
|
||
onAdvance={onAdvance}
|
||
onSelectChoice={onSelectChoice}
|
||
orientation={orientation}
|
||
fullViewport
|
||
dialogueHistory={dialogueHistory}
|
||
/>
|
||
{orientation === "portrait" && (
|
||
<div
|
||
className="absolute inset-x-0 top-0 z-10 flex items-center justify-between px-4 pointer-events-none"
|
||
style={{ paddingTop: "max(0.5rem, env(safe-area-inset-top))" }}
|
||
>
|
||
<Link
|
||
href="/"
|
||
className="pointer-events-auto flex h-9 w-9 items-center justify-center rounded-full bg-black/40 text-white/80 backdrop-blur-sm transition-colors hover:text-white"
|
||
aria-label="返回"
|
||
>
|
||
<i className="fa-solid fa-arrow-left text-[13px]" />
|
||
</Link>
|
||
<button
|
||
type="button"
|
||
onClick={toggleMuted}
|
||
className="pointer-events-auto flex h-9 w-9 items-center justify-center rounded-full bg-black/40 text-white/80 backdrop-blur-sm transition-colors hover:text-white"
|
||
aria-label={muted ? "取消静音" : "静音"}
|
||
>
|
||
<i
|
||
className={`fa-solid ${muted ? "fa-volume-xmark" : "fa-volume-high"} text-[13px]`}
|
||
/>
|
||
</button>
|
||
</div>
|
||
)}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
const sceneCount = session?.history.length ?? 0;
|
||
const beatCount = visitedBeatsRef.current.length;
|
||
|
||
// Surface the BYO-key nudge only to an unmuted, non-BYO player whose last few
|
||
// beats came back silent (shared key rate-limited) — the exact pain BYO fixes.
|
||
// Dismissible for the session.
|
||
const showSilenceNudge =
|
||
phase === "ready" &&
|
||
!muted &&
|
||
!byoTtsConfig &&
|
||
!nudgeDismissed &&
|
||
silenceStrikes >= SILENCE_NUDGE_THRESHOLD;
|
||
|
||
return (
|
||
<div className="min-h-screen flex flex-col">
|
||
<header className="px-5 md:px-12 pt-6 md:pt-8 flex items-center justify-between">
|
||
<Link
|
||
href="/"
|
||
className="text-clay-600 hover:text-clay-900 transition-colors flex items-center gap-3"
|
||
>
|
||
<i className="fa-solid fa-arrow-left text-[12px]" />
|
||
<span className="font-serif text-[22px] md:text-[26px] leading-none tracking-tight">
|
||
Infi<em className="italic font-light text-ember-500">Plot</em>
|
||
</span>
|
||
</Link>
|
||
<div className="flex items-center gap-3 text-[10px] smallcaps text-clay-500 num">
|
||
<span>第 · {String(sceneCount).padStart(3, "0")} · 幕</span>
|
||
<span className="text-clay-300">·</span>
|
||
<span>{String(beatCount).padStart(3, "0")} · 拍</span>
|
||
</div>
|
||
</header>
|
||
|
||
<main className="flex-1 flex flex-col items-center justify-center px-4 md:px-8 py-6 md:py-10">
|
||
<PlayCanvas
|
||
imageUrl={imageUrl}
|
||
audioSrc={audioSrc}
|
||
muted={muted}
|
||
phase={phase}
|
||
beat={currentBeat}
|
||
pendingClick={pendingClick}
|
||
onBackgroundClick={onBackgroundClick}
|
||
onAdvance={onAdvance}
|
||
onSelectChoice={onSelectChoice}
|
||
orientation={orientation}
|
||
dialogueHistory={dialogueHistory}
|
||
aboveCanvas={
|
||
<button
|
||
type="button"
|
||
onClick={() => void togglePresentation()}
|
||
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2"
|
||
aria-label="进入全屏"
|
||
title="全屏 (F)"
|
||
>
|
||
<i className="fa-solid fa-expand text-[10px]" />
|
||
F · 键 · 全 · 屏
|
||
</button>
|
||
}
|
||
aboveCanvasLeft={
|
||
<>
|
||
<button
|
||
type="button"
|
||
onClick={toggleMuted}
|
||
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2"
|
||
aria-label={muted ? "取消静音" : "静音"}
|
||
title={muted ? "取消静音" : "静音"}
|
||
>
|
||
<i
|
||
className={`fa-solid ${muted ? "fa-volume-xmark" : "fa-volume-high"} text-[10px]`}
|
||
/>
|
||
{muted ? "静 · 音" : "有 · 声"}
|
||
</button>
|
||
|
||
{/* Silence nudge — a compact pill right beside the mute toggle.
|
||
Clicking opens the BYO-key modal in place (no trip to the
|
||
homepage). The × dismisses it for the session. */}
|
||
{showSilenceNudge && (
|
||
<span className="flex items-center gap-1 animate-fade-in">
|
||
<button
|
||
type="button"
|
||
onClick={() => setTtsModalOpen(true)}
|
||
className="inline-flex items-center gap-1.5 rounded-full border border-ember-500/40 bg-ember-500/10 px-2.5 py-1 text-[10px] text-ember-500 hover:bg-ember-500/20 transition-colors"
|
||
title="经常没声音?填入你自己的小米 MiMo Key(免费),配音更稳定"
|
||
>
|
||
<i className="fa-solid fa-volume-xmark text-[9px]" />
|
||
经常没声音?自带 Key
|
||
</button>
|
||
<button
|
||
type="button"
|
||
onClick={() => setNudgeDismissed(true)}
|
||
aria-label="关闭提示"
|
||
title="关闭"
|
||
className="text-clay-400 hover:text-clay-700 transition-colors"
|
||
>
|
||
<i className="fa-solid fa-xmark text-[10px]" />
|
||
</button>
|
||
</span>
|
||
)}
|
||
</>
|
||
}
|
||
/>
|
||
|
||
<div className="mt-4 max-w-md w-full text-center min-h-[28px] flex items-center justify-center">
|
||
{phase === "loading-first" && (
|
||
<p className="text-[10px] smallcaps text-clay-500 animate-slow-pulse">
|
||
正 · 在 · 唤 · 起 · 第 · 一 · 幕
|
||
</p>
|
||
)}
|
||
{phase === "ready" && lastExitLabel && (
|
||
<p className="text-[9px] smallcaps text-clay-400 animate-fade-in">
|
||
<span className="mr-2">上 · 一 · 步 ·</span>
|
||
<span className="text-clay-600">{lastExitLabel}</span>
|
||
</p>
|
||
)}
|
||
</div>
|
||
|
||
</main>
|
||
|
||
{ttsModalOpen && (
|
||
<TtsKeyModal
|
||
onClose={() => setTtsModalOpen(false)}
|
||
onSaved={handleByoSaved}
|
||
footerNote="保存后会立即用这把 Key 在你的浏览器里合成当前这一幕的配音;本设备后续游玩也会自动使用此 Key。"
|
||
/>
|
||
)}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
export default function PlayPage() {
|
||
return (
|
||
<Suspense
|
||
fallback={
|
||
<div className="min-h-screen flex items-center justify-center">
|
||
<span className="text-[10px] smallcaps text-clay-500 animate-slow-pulse">
|
||
载入中
|
||
</span>
|
||
</div>
|
||
}
|
||
>
|
||
<PlayInner />
|
||
</Suspense>
|
||
);
|
||
}
|