refactor(play): use client-side engine API instead of direct fetch

Signed-off-by: baizhi958216 <1475289190@qq.com>
This commit is contained in:
baizhi958216
2026-06-11 11:14:55 +08:00
parent ab2f42bc42
commit b63b694940
+71 -262
View File
@@ -28,13 +28,20 @@ import {
storyShareFilename, storyShareFilename,
} from "@/lib/storyShare"; } from "@/lib/storyShare";
import { provisionVoice, synthesize } from "@infiplot/tts-client"; import { provisionVoice, synthesize } from "@infiplot/tts-client";
import {
startSession,
requestScene,
visionDecide,
classifyFreeform,
requestInsertBeat,
} from "@infiplot/engine";
import { readStoredModelConfig, resolveEngineConfig } from "@/lib/clientModelConfig";
import type { import type {
Beat, Beat,
BeatChoice, BeatChoice,
Character, Character,
CharacterVoice, CharacterVoice,
FreeformClassifyResponse, EngineConfig,
InsertBeatResponse,
Orientation, Orientation,
Scene, Scene,
SceneExit, SceneExit,
@@ -42,44 +49,21 @@ import type {
Session, Session,
StartResponse, StartResponse,
TtsConfig, TtsConfig,
VisionResponse,
} from "@infiplot/types"; } from "@infiplot/types";
import { track } from "@/lib/analytics"; import { track } from "@/lib/analytics";
const MUTED_STORAGE_KEY = "infiplot:muted"; const MUTED_STORAGE_KEY = "infiplot:muted";
// ── FOT reduction helpers ────────────────────────────────────────────── // ── Client-side engine config builder ──────────────────────────────────
// Strip bulky voice.referenceAudioBase64 from the session before sending it to // Reads model credentials from localStorage and assembles the EngineConfig
// the server. The engine only needs character names + visualDescriptions for // that the engine expects. Called at the point of use (inside async handlers)
// scene generation; voice data is only used by /api/beat-audio (which receives // so mid-session settings changes are picked up immediately.
// the voice directly, not via session). The client retains voices locally and function buildEngineConfig(): EngineConfig {
// re-merges them from the response via mergeCharactersPreserveVoice. const modelCfg = readStoredModelConfig();
function stripVoicesForTransport(session: Session): Session { const ttsCfg = loadClientTtsConfig();
return { return resolveEngineConfig(modelCfg, ttsCfg);
...session,
characters: session.characters.map((c) => ({ ...c, voice: undefined })),
};
} }
// Merge server-returned characters with locally-held voices. The server strips
// voice from already-known characters (P0), so only NEW characters carry voice.
// For existing characters, re-attach the voice the client already holds.
function mergeCharactersPreserveVoice(
local: Character[],
remote: Character[],
): Character[] {
const localByName = new Map(local.map((c) => [c.name, c]));
return remote.map((c) => {
const prev = localByName.get(c.name);
if (!prev) return c;
return { ...c, voice: c.voice ?? prev.voice };
});
}
// Consecutive silent (no-audio) beats before we surface the BYO-key nudge to a
// non-BYO, unmuted player. Set high enough that one transient miss won't trip
// it, low enough to catch a scene that's clearly being rate-limited.
const SILENCE_NUDGE_THRESHOLD = 3;
// Mobile-portrait users get a 9:16 scene image painted for them; everyone else // Mobile-portrait users get a 9:16 scene image painted for them; everyone else
// (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a // (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a
@@ -395,19 +379,9 @@ function prefetchScenePath(
const specSession = buildSpeculativeSession(baseSession, steps); const specSession = buildSpeculativeSession(baseSession, steps);
const abort = new AbortController(); const abort = new AbortController();
const promise = (async () => { const promise = (async () => {
const res = await fetch("/api/scene", { const config = buildEngineConfig();
method: "POST", const data = await requestScene(config, { session: specSession, clientTts });
headers: { if (abort.signal.aborted) throw new Error("aborted");
"Content-Type": "application/json",
},
body: JSON.stringify({ session: stripVoicesForTransport(specSession), clientTts }),
signal: abort.signal,
});
if (!res.ok) {
const j = (await res.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? res.statusText);
}
const data = (await res.json()) as SceneResponse;
// Record this resolved alternate for the gallery export. Key is // Record this resolved alternate for the gallery export. Key is
// (parent scene id at the choice point) : (choice id). Includes the // (parent scene id at the choice point) : (choice id). Includes the
@@ -425,12 +399,6 @@ function prefetchScenePath(
// transition path awaits the same cached promise via getOrCreateBlobUrl. // transition path awaits the same cached promise via getOrCreateBlobUrl.
void getOrCreateBlobUrl(data.imageUrl); void getOrCreateBlobUrl(data.imageUrl);
// Re-attach locally-held voices the server stripped from known characters.
data.characters = mergeCharactersPreserveVoice(
baseSession.characters,
data.characters,
);
// Recursive: if the resulting scene has exactly one change-scene exit, // Recursive: if the resulting scene has exactly one change-scene exit,
// it is a must-pass node — prefetch its child too. // it is a must-pass node — prefetch its child too.
if (depth + 1 < PREFETCH_MAX_DEPTH) { if (depth + 1 < PREFETCH_MAX_DEPTH) {
@@ -579,12 +547,6 @@ function PlayInner() {
const [orientation, setOrientation] = useState<Orientation>("landscape"); const [orientation, setOrientation] = useState<Orientation>("landscape");
const [lastExitLabel, setLastExitLabel] = useState<string | null>(null); const [lastExitLabel, setLastExitLabel] = useState<string | null>(null);
// Consecutive server-side TTS misses (null audio / failed /api/beat-audio). // Consecutive server-side TTS misses (null audio / failed /api/beat-audio).
// Climbs when the shared server key is rate-limited by MiMo — the exact pain
// BYO fixes — so the play page can nudge non-BYO users to add their own key.
// Reset to 0 on any successful synth. Only the server path touches it.
const [silenceStrikes, setSilenceStrikes] = useState(0);
// Once the player dismisses the silence nudge, keep it gone for this session.
const [nudgeDismissed, setNudgeDismissed] = useState(false);
const [settingsOpen, setSettingsOpen] = useState(false); const [settingsOpen, setSettingsOpen] = useState(false);
const [visionClickEnabled, setVisionClickEnabled] = useState(true); const [visionClickEnabled, setVisionClickEnabled] = useState(true);
@@ -728,8 +690,7 @@ function PlayInner() {
let audioUrl: string | null = null; let audioUrl: string | null = null;
if (byo) { if (byo) {
// Client-direct: provision (once per speaker, cached) + synth against // Client-direct: provision (once per speaker, cached) + synth against
// Xiaomi with the user's own key — no /api/beat-audio round-trip and // Xiaomi with the user's own key — the key never touches our server.
// the key never touches our server.
const voice = await resolveByoVoice( const voice = await resolveByoVoice(
provisionedVoicesRef.current, provisionedVoicesRef.current,
byo, byo,
@@ -745,28 +706,8 @@ function PlayInner() {
); );
audioUrl = `data:${out.mimeType};base64,${out.audioBase64}`; audioUrl = `data:${out.mimeType};base64,${out.audioBase64}`;
} else { } else {
const res = await fetch("/api/beat-audio", { // No TTS configured — silent.
method: "POST", return;
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
beat: { id: beat.id, line: beat.line, lineDelivery: beat.lineDelivery },
voice: speaker.voice,
}),
signal: abort.signal,
});
if (res.status === 204) {
setSilenceStrikes((n) => Math.min(n + 1, 99));
return;
}
if (!res.ok) {
setSilenceStrikes((n) => Math.min(n + 1, 99));
return;
}
const blob = await res.blob();
audioUrl = URL.createObjectURL(blob);
setSilenceStrikes(0);
} }
// Skip the state write if we've been aborted between the await and // Skip the state write if we've been aborted between the await and
// here — beat ids are scene-local, so a late arrival from a prior // here — beat ids are scene-local, so a late arrival from a prior
@@ -774,8 +715,6 @@ function PlayInner() {
// same id. // same id.
if (audioUrl && !abort.signal.aborted) { if (audioUrl && !abort.signal.aborted) {
setBeatAudioMap((m) => ({ ...m, [beat.id]: audioUrl })); setBeatAudioMap((m) => ({ ...m, [beat.id]: audioUrl }));
} else if (audioUrl?.startsWith("blob:")) {
URL.revokeObjectURL(audioUrl);
} }
} catch { } catch {
// aborted / network / Xiaomi rate-limit — silent fallback (no audio) // aborted / network / Xiaomi rate-limit — silent fallback (no audio)
@@ -864,26 +803,12 @@ function PlayInner() {
}, [muted, prefetchSceneAudio]); }, [muted, prefetchSceneAudio]);
const handleSettingsSaved = useCallback( const handleSettingsSaved = useCallback(
(settings: { ttsConfigured: boolean; playerName: string; visionClickEnabled: boolean }) => { (settings: { playerName: string; visionClickEnabled: boolean }) => {
setVisionClickEnabled(settings.visionClickEnabled); setVisionClickEnabled(settings.visionClickEnabled);
const nextPlayerName = settings.playerName || undefined; const nextPlayerName = settings.playerName || undefined;
setSession((prev) => prev ? { ...prev, playerName: nextPlayerName } : prev); setSession((prev) => prev ? { ...prev, playerName: nextPlayerName } : prev);
const cfg = settings.ttsConfigured ? loadClientTtsConfig() : null;
byoTtsRef.current = cfg;
setByoTtsConfig(cfg);
if (cfg) {
setSilenceStrikes(0);
cancelBeatAudioFetches();
setBeatAudioMap((prev) => {
for (const url of Object.values(prev)) {
if (url.startsWith("blob:")) URL.revokeObjectURL(url);
}
return {};
});
prefetchSceneAudio();
}
}, },
[prefetchSceneAudio], [],
); );
function detachRecordedReplay(): void { function detachRecordedReplay(): void {
@@ -1260,31 +1185,22 @@ function PlayInner() {
throw new Error(`找不到精选剧情:${cardName}`); throw new Error(`找不到精选剧情:${cardName}`);
}, },
) )
: fetch("/api/start", { : (async () => {
method: "POST", const config = buildEngineConfig();
headers: { const data = await startSession(config, {
"Content-Type": "application/json", ...livePayload!,
},
body: JSON.stringify({
...livePayload,
clientTts: !!byoTtsRef.current, clientTts: !!byoTtsRef.current,
}), });
}).then(async (r) => { // startSession doesn't echo ws/sg back — splice in what we sent.
if (!r.ok) {
const j = (await r.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? r.statusText);
}
const data = (await r.json()) as StartResponse;
// Live /api/start doesn't echo ws/sg back — splice in what we sent.
// styleReferenceImage is similarly not in StartResponse; tag it on so // styleReferenceImage is similarly not in StartResponse; tag it on so
// the session we build below carries it for every /api/scene call. // the session we build below carries it for every scene call.
return { return {
...data, ...data,
worldSetting: livePayload!.worldSetting, worldSetting: livePayload!.worldSetting,
styleGuide: livePayload!.styleGuide, styleGuide: livePayload!.styleGuide,
styleReferenceImage: livePayload!.styleReferenceImage, styleReferenceImage: livePayload!.styleReferenceImage,
}; };
}); })();
fetchStart fetchStart
.then(async (data) => { .then(async (data) => {
@@ -1430,10 +1346,7 @@ function PlayInner() {
storyStateAfter: result.storyState, storyStateAfter: result.storyState,
}, },
], ],
characters: mergeCharactersPreserveVoice( characters: result.characters,
base.characters,
result.characters,
),
storyState: result.storyState, storyState: result.storyState,
}; };
visitedBeatsRef.current = [result.scene.entryBeatId]; visitedBeatsRef.current = [result.scene.entryBeatId];
@@ -1656,21 +1569,12 @@ function PlayInner() {
clearPool(poolRef.current); clearPool(poolRef.current);
const promise = (async () => { const promise = (async () => {
const res = await fetch("/api/scene", { const config = buildEngineConfig();
method: "POST", const data = await requestScene(config, {
headers: { session: specSession,
"Content-Type": "application/json", clientTts: !!byoTtsRef.current,
},
body: JSON.stringify({
session: stripVoicesForTransport(specSession),
clientTts: !!byoTtsRef.current,
}),
}); });
if (!res.ok) { return data;
const j = (await res.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? res.statusText);
}
return (await res.json()) as SceneResponse;
})(); })();
void performSceneTransition(promise, exit, visited, choice.label); void performSceneTransition(promise, exit, visited, choice.label);
@@ -1688,38 +1592,23 @@ function PlayInner() {
setPhase("vision-thinking"); setPhase("vision-thinking");
try { try {
const classifyRes = await fetch("/api/classify-freeform", { const config = buildEngineConfig();
method: "POST", const decision = await classifyFreeform(config, {
headers: { "Content-Type": "application/json" }, session,
body: JSON.stringify({ freeformText: text,
session: stripVoicesForTransport(session),
freeformText: text,
}),
}); });
if (!classifyRes.ok) {
const j = (await classifyRes.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? classifyRes.statusText);
}
const decision = (await classifyRes.json()) as FreeformClassifyResponse;
if (decision.classify === "insert-beat") { if (decision.classify === "insert-beat") {
// Interactive beat: NPC responds to the player's action, scene stays // Interactive beat: NPC responds to the player's action, scene stays
setPhase("inserting-beat"); setPhase("inserting-beat");
const insertRes = await fetch("/api/insert-beat", { const { partial, characters: insertChars } = await requestInsertBeat(
method: "POST", config,
headers: { "Content-Type": "application/json" }, {
body: JSON.stringify({ session,
session: stripVoicesForTransport(session),
freeformAction: decision.freeformAction, freeformAction: decision.freeformAction,
clientTts: !!byoTtsRef.current, clientTts: !!byoTtsRef.current,
}), },
}); );
if (!insertRes.ok) {
const j = (await insertRes.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? insertRes.statusText);
}
const { partial, characters: insertChars } =
(await insertRes.json()) as InsertBeatResponse;
const fromBeatId = const fromBeatId =
currentBeatRef.current?.id ?? currentScene.entryBeatId; currentBeatRef.current?.id ?? currentScene.entryBeatId;
@@ -1746,10 +1635,7 @@ function PlayInner() {
history: session.history.map((h, i, arr) => history: session.history.map((h, i, arr) =>
i === arr.length - 1 ? { ...h, scene: patched, visitedBeatIds: nextVisited } : h, i === arr.length - 1 ? { ...h, scene: patched, visitedBeatIds: nextVisited } : h,
), ),
characters: mergeCharactersPreserveVoice( characters: insertChars,
session.characters,
insertChars,
),
}; };
setSession(nextSession); setSession(nextSession);
setCurrentScene(patched); setCurrentScene(patched);
@@ -1785,19 +1671,12 @@ function PlayInner() {
}; };
const promise = (async () => { const promise = (async () => {
const res = await fetch("/api/scene", { const config = buildEngineConfig();
method: "POST", const data = await requestScene(config, {
headers: { "Content-Type": "application/json" }, session: specSession,
body: JSON.stringify({ clientTts: !!byoTtsRef.current,
session: stripVoicesForTransport(specSession),
clientTts: !!byoTtsRef.current,
}),
}); });
if (!res.ok) { return data;
const j = (await res.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? res.statusText);
}
return (await res.json()) as SceneResponse;
})(); })();
setPendingClick(null); setPendingClick(null);
@@ -1816,43 +1695,23 @@ function PlayInner() {
try { try {
const annotatedImageBase64 = await annotateClick(imageUrl, click); const annotatedImageBase64 = await annotateClick(imageUrl, click);
const visionRes = await fetch("/api/vision", { const config = buildEngineConfig();
method: "POST", const decision = await visionDecide(config, {
headers: { session,
"Content-Type": "application/json", annotatedImageBase64,
},
body: JSON.stringify({ session: stripVoicesForTransport(session), annotatedImageBase64 }),
}); });
if (!visionRes.ok) {
const j = (await visionRes.json().catch(() => ({}))) as {
error?: string;
};
throw new Error(j.error ?? visionRes.statusText);
}
const decision = (await visionRes.json()) as VisionResponse;
track("vision_click", { result: decision.classify }); track("vision_click", { result: decision.classify });
if (decision.classify === "insert-beat") { if (decision.classify === "insert-beat") {
setPhase("inserting-beat"); setPhase("inserting-beat");
const insertRes = await fetch("/api/insert-beat", { const { partial, characters: insertChars } = await requestInsertBeat(
method: "POST", config,
headers: { {
"Content-Type": "application/json", session,
},
body: JSON.stringify({
session: stripVoicesForTransport(session),
freeformAction: decision.intent.freeformAction, freeformAction: decision.intent.freeformAction,
clientTts: !!byoTtsRef.current, clientTts: !!byoTtsRef.current,
}), },
}); );
if (!insertRes.ok) {
const j = (await insertRes.json().catch(() => ({}))) as {
error?: string;
};
throw new Error(j.error ?? insertRes.statusText);
}
const { partial, characters: insertChars } =
(await insertRes.json()) as InsertBeatResponse;
const fromBeatId = const fromBeatId =
currentBeatRef.current?.id ?? currentScene.entryBeatId; currentBeatRef.current?.id ?? currentScene.entryBeatId;
@@ -1878,10 +1737,7 @@ function PlayInner() {
history: session.history.map((h, i, arr) => history: session.history.map((h, i, arr) =>
i === arr.length - 1 ? { ...h, scene: patched } : h, i === arr.length - 1 ? { ...h, scene: patched } : h,
), ),
characters: mergeCharactersPreserveVoice( characters: insertChars,
session.characters,
insertChars,
),
}; };
setSession(nextSession); setSession(nextSession);
setCurrentScene(patched); setCurrentScene(patched);
@@ -1920,23 +1776,12 @@ function PlayInner() {
clearPool(poolRef.current); clearPool(poolRef.current);
const promise = (async () => { const promise = (async () => {
const res = await fetch("/api/scene", { const config = buildEngineConfig();
method: "POST", const data = await requestScene(config, {
headers: { session: specSession,
"Content-Type": "application/json", clientTts: !!byoTtsRef.current,
},
body: JSON.stringify({
session: stripVoicesForTransport(specSession),
clientTts: !!byoTtsRef.current,
}),
}); });
if (!res.ok) { return data;
const j = (await res.json().catch(() => ({}))) as {
error?: string;
};
throw new Error(j.error ?? res.statusText);
}
return (await res.json()) as SceneResponse;
})(); })();
await performSceneTransition( await performSceneTransition(
@@ -2054,16 +1899,6 @@ function PlayInner() {
const sceneCount = session?.history.length ?? 0; const sceneCount = session?.history.length ?? 0;
const beatCount = visitedBeatsRef.current.length; const beatCount = visitedBeatsRef.current.length;
// Surface the BYO-key nudge only to an unmuted, non-BYO player whose last few
// beats came back silent (shared key rate-limited) — the exact pain BYO fixes.
// Dismissible for the session.
const showSilenceNudge =
phase === "ready" &&
!muted &&
!byoTtsConfig &&
!nudgeDismissed &&
silenceStrikes >= SILENCE_NUDGE_THRESHOLD;
return ( return (
<div className="min-h-screen flex flex-col"> <div className="min-h-screen flex flex-col">
<header className="px-5 md:px-12 pt-6 md:pt-8 flex items-center justify-between"> <header className="px-5 md:px-12 pt-6 md:pt-8 flex items-center justify-between">
@@ -2154,32 +1989,6 @@ function PlayInner() {
/> />
{muted ? "静 · 音" : "有 · 声"} {muted ? "静 · 音" : "有 · 声"}
</button> </button>
{/* Silence nudge — a compact pill right beside the mute toggle.
Clicking opens the BYO-key modal in place (no trip to the
homepage). The × dismisses it for the session. */}
{showSilenceNudge && (
<span className="flex items-center gap-1 animate-fade-in">
<button
type="button"
onClick={() => setSettingsOpen(true)}
className="inline-flex items-center gap-1.5 rounded-full border border-ember-500/40 bg-ember-500/10 px-2.5 py-1 text-[10px] text-ember-500 hover:bg-ember-500/20 transition-colors"
title="经常没声音?填入你自己的小米 MiMo Key(免费),配音更稳定"
>
<i className="fa-solid fa-volume-xmark text-[9px]" />
Key
</button>
<button
type="button"
onClick={() => setNudgeDismissed(true)}
aria-label="关闭提示"
title="关闭"
className="text-clay-400 hover:text-clay-700 transition-colors"
>
<i className="fa-solid fa-xmark text-[10px]" />
</button>
</span>
)}
</> </>
} }
/> />