feat(web): embed beat audio into gallery and infiplot exports

Walk every speaking beat at export time, reuse current scene's beatAudioMap,
and synth the rest via BYO TTS or /api/beat-audio with concurrency 4. Show a
progress toast on the play page while collecting.

Gallery export keeps audio in a sidecar localStorage key so the first paint
is not blocked by JSON.parse-ing several MB of base64; the gallery lazy-loads
it after the first scene image, then plays per-beat audio with a mute toggle
persisted to localStorage. .infiplot share files embed audioByBeatId in the
doc itself (v2); on import the data URIs survive scene swaps and feed back
into the per-beat audio map so replayers hear the original voices for free.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
DESKTOP-I1T6TF3\Q
2026-06-11 09:29:16 +08:00
parent a61a91060d
commit 621f83c47b
6 changed files with 528 additions and 59 deletions
+2 -2
View File
@@ -4,8 +4,8 @@ export const runtime = "nodejs";
// Cap a bit above pack's MAX_DOC_BYTES — ciphertext adds the 16-byte GCM tag
// and the 17-byte header; some slack accommodates near-cap docs without
// rejecting them at unpack time.
const MAX_FILE_BYTES = 6_000_000;
// rejecting them at unpack time. Bumped to fit pre-baked beat audio.
const MAX_FILE_BYTES = 13_000_000;
// Decrypt a `.infiplot` share file back to its doc JSON string. Returns the
// plaintext as a JSON field (not raw bytes) so the client can chain it through
+108 -3
View File
@@ -57,8 +57,11 @@ export type GalleryScene = {
};
export type GalleryDoc = {
/** v1 = scenes only (initial export). v2 = + alternates + characters. */
v: 1 | 2;
/** v1 = scenes only (initial export). v2 = + alternates + characters.
* v3 = + beat audio (stored in a sidecar localStorage key so the main
* doc stays small and the first paint isn't blocked by JSON.parse-ing
* several MB of base64). */
v: 1 | 2 | 3;
id: string;
createdAt: number;
orientation: Orientation;
@@ -71,13 +74,18 @@ export type GalleryDoc = {
};
const STORAGE_PREFIX = "infiplot:gallery:";
const AUDIO_SUFFIX = ":audio";
const MUTED_STORAGE_KEY = "infiplot:gallery:muted";
function readDoc(id: string): GalleryDoc | null {
try {
const raw = window.localStorage.getItem(STORAGE_PREFIX + id);
if (!raw) return null;
const parsed = JSON.parse(raw) as GalleryDoc;
if ((parsed.v !== 1 && parsed.v !== 2) || !Array.isArray(parsed.scenes)) {
if (
(parsed.v !== 1 && parsed.v !== 2 && parsed.v !== 3) ||
!Array.isArray(parsed.scenes)
) {
return null;
}
return parsed;
@@ -86,6 +94,23 @@ function readDoc(id: string): GalleryDoc | null {
}
}
function readSidecarAudio(id: string): Record<string, string> {
try {
const raw = window.localStorage.getItem(
STORAGE_PREFIX + id + AUDIO_SUFFIX,
);
if (!raw) return {};
const parsed = JSON.parse(raw) as Record<string, string>;
const out: Record<string, string> = {};
for (const [k, v] of Object.entries(parsed)) {
if (typeof v === "string" && v.startsWith("data:")) out[k] = v;
}
return out;
} catch {
return {};
}
}
function detectOrientation(): Orientation {
if (typeof window === "undefined") return "landscape";
const portrait = window.matchMedia("(orientation: portrait)").matches;
@@ -352,6 +377,8 @@ function Slide({
beatId,
orientation,
alternates,
audioByBeatId,
muted,
dialogueOpen,
setDialogueOpen,
onAdvanceBeat,
@@ -361,6 +388,8 @@ function Slide({
beatId: string;
orientation: Orientation;
alternates: Record<string, GalleryScene>;
audioByBeatId: Record<string, string>;
muted: boolean;
dialogueOpen: boolean;
setDialogueOpen: (b: boolean) => void;
onAdvanceBeat: (nextBeatId: string) => void;
@@ -372,6 +401,24 @@ function Slide({
const beat = findBeat(scene, beatId) ?? findBeat(scene, scene.entryBeatId);
const audioSrc =
beat && scene.id && !muted
? (audioByBeatId[`${scene.id}:${beat.id}`] ?? null)
: null;
const audioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
const el = audioRef.current;
if (!el) return;
if (!audioSrc) {
el.pause();
return;
}
el.currentTime = 0;
void el.play().catch(() => {
// Browsers can refuse autoplay until user interacts — silent fail is fine.
});
}, [audioSrc]);
const choices: BeatChoice[] =
beat?.next.type === "choice"
? (beat.next as { type: "choice"; choices: BeatChoice[] }).choices
@@ -533,6 +580,16 @@ function Slide({
onClose={() => setDialogueOpen(false)}
/>
)}
{audioSrc && (
<audio
ref={audioRef}
src={audioSrc}
autoPlay
preload="auto"
className="hidden"
/>
)}
</div>
);
}
@@ -561,6 +618,20 @@ function GalleryInner() {
const [downloadingPortraits, setDownloadingPortraits] = useState(false);
const [orientation, setOrientation] = useState<Orientation>("landscape");
const [presentation, setPresentation] = useState(false);
// Audio map keyed by `${sceneId}:${beatId}`. Loaded in two phases: the
// sidecar localStorage key (gallery export path) is read lazily after first
// paint so the multi-MB JSON.parse doesn't block the first scene image's
// progressive paint. Imports from `.infiplot` files set this synchronously
// since the data is already in memory.
const [audioByBeatId, setAudioByBeatId] = useState<Record<string, string>>({});
const [muted, setMuted] = useState<boolean>(() => {
if (typeof window === "undefined") return false;
try {
return window.localStorage.getItem(MUTED_STORAGE_KEY) === "1";
} catch {
return false;
}
});
// Top toolbar auto-hide while in fullscreen — it shows briefly on entry,
// retracts upward, and pops back down when the cursor approaches the top
// edge. Outside presentation mode the bar is always visible.
@@ -609,6 +680,17 @@ function GalleryInner() {
setOrientation(d.orientation ?? detectOrientation());
const first = d.scenes[0]!;
setStack([{ scene: first, beatId: first.entryBeatId, mainIdx: 0 }]);
// Lazy-load the audio sidecar AFTER first paint so its JSON.parse (~MBs
// of base64) doesn't stall the main thread and let the first image
// paint row-by-row. setTimeout(0) yields back to the renderer first.
if (d.v === 3) {
const t = window.setTimeout(() => {
const audio = readSidecarAudio(id);
if (Object.keys(audio).length > 0) setAudioByBeatId(audio);
}, 0);
return () => window.clearTimeout(t);
}
}, []);
// Prefer the doc's stored orientation; fall back to the device.
@@ -1035,6 +1117,8 @@ function GalleryInner() {
beatId={top.beatId}
orientation={orientation}
alternates={alternates}
audioByBeatId={audioByBeatId}
muted={muted}
dialogueOpen={dialogueOpen}
setDialogueOpen={setDialogueOpen}
onAdvanceBeat={onAdvanceBeat}
@@ -1080,6 +1164,27 @@ function GalleryInner() {
</div>
<div className="pointer-events-auto flex items-center gap-2">
{Object.keys(audioByBeatId).length > 0 && (
<button
type="button"
onClick={() => {
const next = !muted;
setMuted(next);
try {
window.localStorage.setItem(MUTED_STORAGE_KEY, next ? "1" : "0");
} catch {
// ignore
}
}}
className="flex h-9 w-9 items-center justify-center rounded-full bg-black/40 text-white/80 backdrop-blur-sm transition-colors hover:text-white"
aria-label={muted ? "取消静音" : "静音"}
title={muted ? "取消静音" : "静音"}
>
<i
className={`fa-solid ${muted ? "fa-volume-xmark" : "fa-volume-high"} text-[12px]`}
/>
</button>
)}
<button
type="button"
onClick={() => void togglePresentation()}
+194 -50
View File
@@ -20,6 +20,7 @@ import type { GalleryDoc, GalleryScene } from "@/app/gallery/page";
import { SettingsModal, readStoredPlayerName, readStoredVisionClick } from "@/components/SettingsModal";
import { annotateClick } from "@/lib/annotateClient";
import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
import { collectBeatAudioForExport } from "@/lib/exportAudio";
import { PRESETS } from "@/lib/presets";
import {
STORY_SHARE_STORAGE_KEY,
@@ -587,6 +588,11 @@ function PlayInner() {
const [nudgeDismissed, setNudgeDismissed] = useState(false);
const [settingsOpen, setSettingsOpen] = useState(false);
const [visionClickEnabled, setVisionClickEnabled] = useState(true);
// Top-of-screen progress toast for the gallery / story export pipeline.
// null when idle; { done, total, label } while collecting beat audio.
const [exportProgress, setExportProgress] = useState<
{ done: number; total: number; label: string } | null
>(null);
const startedRef = useRef(false);
const poolRef = useRef<Map<string, PrefetchEntry>>(new Map());
@@ -631,6 +637,12 @@ function PlayInner() {
const replayIndexRef = useRef(-1);
const replayActiveRef = useRef(false);
const exportingStoryRef = useRef(false);
const exportingGalleryRef = useRef(false);
// Audio carried in from a `.infiplot` share file, keyed by `${sceneId}:${beatId}`.
// Survives scene swaps so a player who re-exports a replayed game keeps the
// baked voices that the original creator already paid to synth — they're
// free to embed back into the new gallery / share file.
const prebakedAudioRef = useRef<Record<string, string>>({});
// Original (CDN) URL of the currently-rendered scene image. Used as the key
// to revoke its blob: URL when the scene swaps. We track the ORIGINAL URL,
// not the blob URL, because blobUrlCache is keyed by original URL.
@@ -711,6 +723,18 @@ function PlayInner() {
if (mutedRef.current) return; // 静音 → 不合成 TTS(避免无谓的调用与花费)。
// 「首页选关闭」也走这条路:bootstrap 时 muted 已被初始化为 true。
if (!beat.speaker || !beat.line) return;
// Reuse pre-baked audio from a `.infiplot` import before any synth —
// free, instant, and identical to what the original player heard.
const curSceneId = currentSceneRef.current?.id;
if (curSceneId) {
const baked = prebakedAudioRef.current[`${curSceneId}:${beat.id}`];
if (baked) {
setBeatAudioMap((m) => (m[beat.id] === baked ? m : { ...m, [beat.id]: baked }));
return;
}
}
const speaker = sess.characters.find((c) => c.name === beat.speaker);
if (!speaker) return;
@@ -899,13 +923,26 @@ function PlayInner() {
// export so the cap is enforced strictly (≤ keepCount + 1 transiently → ≤ N
// once write completes). Corrupt entries (un-parseable / no createdAt) sort
// last and get evicted first.
//
// Audio lives in a sidecar key `infiplot:gallery:<id>:audio` so the main
// doc JSON.parse on gallery load doesn't block the main thread with several
// MB of base64. The sidecar key inherits its doc's age — paired by id, not
// its own createdAt (it never has one) — and is evicted alongside its doc.
const trimGalleryExports = useCallback((keepCount: number) => {
try {
const prefix = "infiplot:gallery:";
const entries: { key: string; createdAt: number }[] = [];
const audioSuffix = ":audio";
const docs: Map<string, { key: string; createdAt: number }> = new Map();
const sidecars: Map<string, string> = new Map();
for (let i = 0; i < window.localStorage.length; i++) {
const k = window.localStorage.key(i);
if (!k || !k.startsWith(prefix)) continue;
if (k.endsWith(audioSuffix)) {
const id = k.slice(prefix.length, -audioSuffix.length);
sidecars.set(id, k);
continue;
}
const id = k.slice(prefix.length);
let createdAt = 0;
try {
const raw = window.localStorage.getItem(k);
@@ -916,11 +953,22 @@ function PlayInner() {
} catch {
createdAt = 0;
}
entries.push({ key: k, createdAt });
docs.set(id, { key: k, createdAt });
}
entries.sort((a, b) => b.createdAt - a.createdAt);
for (const e of entries.slice(keepCount)) {
window.localStorage.removeItem(e.key);
const ordered = [...docs.entries()].sort(
(a, b) => b[1].createdAt - a[1].createdAt,
);
for (const [id, { key }] of ordered.slice(keepCount)) {
window.localStorage.removeItem(key);
const sc = sidecars.get(id);
if (sc) window.localStorage.removeItem(sc);
sidecars.delete(id);
}
// Orphan sidecars (their doc was already gone) get cleaned up too.
for (const sc of sidecars.values()) {
if (!docs.has(sc.slice(prefix.length, -audioSuffix.length))) {
window.localStorage.removeItem(sc);
}
}
} catch {
// best-effort — quota or disabled storage shouldn't block the export
@@ -932,9 +980,15 @@ function PlayInner() {
// reference (those are tens-to-hundreds of KB each). Writes it to
// localStorage under a one-shot id and opens /gallery#<id> in a new tab
// so the play session keeps running.
const handleExportGallery = useCallback(() => {
//
// Beat audio is collected synchronously here (reusing the per-scene
// beatAudioMap when possible, BYO / server TTS for the rest) and stashed
// in a sidecar localStorage key so the gallery's first paint isn't
// bottlenecked on JSON.parse-ing several MB of base64.
const handleExportGallery = useCallback(async () => {
const s = sessionRef.current;
if (!s) return;
if (!s || exportingGalleryRef.current) return;
exportingGalleryRef.current = true;
const scenes: GalleryScene[] = s.history
.map((h) => ({
id: h.scene.id,
@@ -947,7 +1001,10 @@ function PlayInner() {
exit: h.exit,
}))
.filter((sc) => sc.imageUrl);
if (scenes.length === 0) return;
if (scenes.length === 0) {
exportingGalleryRef.current = false;
return;
}
// Alternates: ${parentSceneId}:${choiceId} → reachable scene. Two sources,
// merged with main-path winning ties (it always agrees with prefetch when
@@ -999,8 +1056,29 @@ function PlayInner() {
const id = `${Date.now().toString(36)}_${Math.random()
.toString(36)
.slice(2, 8)}`;
let audioByBeatId: Record<string, string> = {};
try {
setExportProgress({ done: 0, total: 0, label: "正在准备配音" });
audioByBeatId = await collectBeatAudioForExport({
session: s,
beatAudioMap,
currentSceneId: currentSceneRef.current?.id ?? null,
byoTts: byoTtsRef.current,
byoVoiceCache: provisionedVoicesRef.current,
prebakedAudio: prebakedAudioRef.current,
onProgress: (done, total) =>
setExportProgress({ done, total, label: "正在准备配音" }),
});
} catch {
// best-effort — even if the collector throws, the gallery without audio
// is still usable; we keep going rather than block the export.
} finally {
setExportProgress(null);
}
const doc: GalleryDoc = {
v: 2,
v: audioByBeatId && Object.keys(audioByBeatId).length > 0 ? 3 : 2,
id,
createdAt: Date.now(),
orientation: s.orientation ?? "landscape",
@@ -1017,50 +1095,86 @@ function PlayInner() {
window.localStorage.setItem(`infiplot:gallery:${id}`, docStr);
} catch {
// localStorage full or disabled — silently bail; the player keeps playing.
exportingGalleryRef.current = false;
return;
}
track("gallery_export", { scene_count: scenes.length });
const audioCount = Object.keys(audioByBeatId).length;
if (audioCount > 0) {
try {
window.localStorage.setItem(
`infiplot:gallery:${id}:audio`,
JSON.stringify(audioByBeatId),
);
} catch {
// Sidecar too big for quota — gallery still opens without sound.
}
}
track("gallery_export", { scene_count: scenes.length, audio_count: audioCount });
window.open(`/gallery#id=${id}`, "_blank", "noopener");
}, [trimGalleryExports]);
exportingGalleryRef.current = false;
}, [beatAudioMap, trimGalleryExports]);
const handleExportStory = useCallback(() => {
const handleExportStory = useCallback(async () => {
const s = sessionRef.current;
if (!s || s.history.length === 0 || exportingStoryRef.current) return;
exportingStoryRef.current = true;
const sceneIndex = Math.max(0, s.history.length - 1);
const doc = createStoryShareDoc(s, {
sceneIndex,
beatId: currentBeatRef.current?.id ?? s.history[sceneIndex]?.scene.entryBeatId,
});
void (async () => {
try {
const r = await fetch("/api/story-pack", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ docStr: JSON.stringify(doc) }),
});
if (!r.ok) {
const j = (await r.json().catch(() => ({}))) as { error?: string };
window.alert(j.error ?? "剧情分享打包失败");
return;
}
const blob = await r.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = storyShareFilename(doc);
a.rel = "noopener";
document.body.appendChild(a);
a.click();
a.remove();
setTimeout(() => URL.revokeObjectURL(url), 2000);
} catch {
window.alert("剧情分享打包失败");
} finally {
exportingStoryRef.current = false;
let audioByBeatId: Record<string, string> = {};
try {
setExportProgress({ done: 0, total: 0, label: "正在准备配音" });
audioByBeatId = await collectBeatAudioForExport({
session: s,
beatAudioMap,
currentSceneId: currentSceneRef.current?.id ?? null,
byoTts: byoTtsRef.current,
byoVoiceCache: provisionedVoicesRef.current,
prebakedAudio: prebakedAudioRef.current,
onProgress: (done, total) =>
setExportProgress({ done, total, label: "正在准备配音" }),
});
} catch {
// best-effort — share the doc silent if collecting audio failed
} finally {
setExportProgress(null);
}
const doc = createStoryShareDoc(
s,
{
sceneIndex,
beatId: currentBeatRef.current?.id ?? s.history[sceneIndex]?.scene.entryBeatId,
},
Object.keys(audioByBeatId).length > 0 ? audioByBeatId : undefined,
);
try {
const r = await fetch("/api/story-pack", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ docStr: JSON.stringify(doc) }),
});
if (!r.ok) {
const j = (await r.json().catch(() => ({}))) as { error?: string };
window.alert(j.error ?? "剧情分享打包失败");
return;
}
})();
}, []);
const blob = await r.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = storyShareFilename(doc);
a.rel = "noopener";
document.body.appendChild(a);
a.click();
a.remove();
setTimeout(() => URL.revokeObjectURL(url), 2000);
} catch {
window.alert("剧情分享打包失败");
} finally {
exportingStoryRef.current = false;
}
}, [beatAudioMap]);
// ── Presentation mode toggle ─────────────────────────────────────────
const togglePresentation = useCallback(async () => {
@@ -1168,6 +1282,21 @@ function PlayInner() {
replayIndexRef.current = 0;
replayActiveRef.current = imported.history.length > 1;
visitedBeatsRef.current = [first.scene.entryBeatId];
// Stash pre-baked audio (from doc.audioByBeatId) so it survives scene
// swaps and re-exports. Keyed by `${sceneId}:${beatId}`. Also seed the
// current beatAudioMap for the first scene so audio plays right away
// — the scene-change effect normally clears the map on transition,
// and bare beat ids "b1/b2/..." would otherwise miss prebaked entries.
if (doc.audioByBeatId) {
prebakedAudioRef.current = { ...doc.audioByBeatId };
const seed: Record<string, string> = {};
for (const beat of first.scene.beats) {
const k = `${first.scene.id}:${beat.id}`;
const v = doc.audioByBeatId[k];
if (v) seed[beat.id] = v;
}
if (Object.keys(seed).length > 0) setBeatAudioMap(seed);
}
setSession(initial);
setCurrentScene(first.scene);
setCurrentBeatId(first.scene.entryBeatId);
@@ -2066,6 +2195,19 @@ function PlayInner() {
return (
<div className="min-h-screen flex flex-col">
{exportProgress && (
<div
className="fixed top-4 left-1/2 -translate-x-1/2 z-50 rounded-full bg-black/75 px-4 py-2 text-[11px] smallcaps text-white/95 backdrop-blur-sm shadow-lg flex items-center gap-2"
>
<i className="fa-solid fa-circle-notch animate-spin text-[11px] text-amber-300" />
<span>{exportProgress.label}</span>
{exportProgress.total > 0 && (
<span className="num text-white/70">
{exportProgress.done}/{exportProgress.total}
</span>
)}
</div>
)}
<header className="px-5 md:px-12 pt-6 md:pt-8 flex items-center justify-between">
<Link
href="/"
@@ -2119,20 +2261,22 @@ function PlayInner() {
<>
<button
type="button"
onClick={handleExportGallery}
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2"
onClick={() => void handleExportGallery()}
disabled={!!exportProgress}
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2 disabled:opacity-50"
aria-label="导出可交互图集"
title="导出本局为可交互图集链接(只会保留最近两次的可交互图集链接)"
title="导出本局为可交互图集链接(含配音;只会保留最近两次的可交互图集链接)"
>
<i className="fa-solid fa-link text-[10px]" />
· · ·
</button>
<button
type="button"
onClick={handleExportStory}
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2"
onClick={() => void handleExportStory()}
disabled={!!exportProgress}
className="text-[10px] smallcaps text-clay-500 hover:text-ember-500 transition-colors flex items-center gap-2 disabled:opacity-50"
aria-label="分享当前剧情"
title="导出本局为可继续游玩的剧情 JSON"
title="导出本局为可继续游玩的剧情 .infiplot(含配音)"
>
<i className="fa-solid fa-share-nodes text-[10px]" />
· · ·
+1 -1
View File
@@ -53,7 +53,7 @@ type AnalyticsEventData = {
tts_toggle: { muted: boolean };
fullscreen_toggle: { on: boolean };
play_heartbeat: never;
gallery_export: { scene_count: number };
gallery_export: { scene_count: number; audio_count: number };
};
export type AnalyticsEvent = keyof AnalyticsEventData;
+199
View File
@@ -0,0 +1,199 @@
// ──────────────────────────────────────────────────────────────────────
// Audio collection for the gallery / .infiplot share exports.
//
// Walks every speaking beat across `session.history` and produces a
// Record keyed by `${sceneId}:${beatId}` whose values are inline
// data: URIs (base64). Data URIs are the only audio form that survives
// transport through localStorage, AES-GCM ciphertext, and a fresh
// browser tab — blob: URLs from /api/beat-audio are tied to the document
// that created them.
//
// Three sources, in priority order:
// 1. prebaked — audio that came in through a .infiplot share file.
// Already a data URI, so just copied through.
// 2. current beatAudioMap — the play page's per-beat audio for the
// scene the player is on right now. Blob URLs get
// converted to data URIs; data URIs pass through.
// 3. fresh synth — BYO client TTS (browser-direct Xiaomi/StepFun) when
// a key is configured, otherwise /api/beat-audio.
//
// Concurrency 4 to keep TTS providers happy when a long session has
// dozens of speaking beats. Errors are silently skipped — a missing beat
// just plays without voice; we never block the export on a TTS hiccup.
// ──────────────────────────────────────────────────────────────────────
import { provisionVoice, synthesize } from "@infiplot/tts-client";
import type {
Beat,
Character,
CharacterVoice,
Session,
TtsConfig,
} from "@infiplot/types";
const CONCURRENCY = 4;
export type CollectBeatAudioOptions = {
session: Session;
/** Current-scene audio already loaded by the play page (keyed by bare beat id). */
beatAudioMap: Record<string, string>;
/** Scene id `beatAudioMap` belongs to (so we can promote its entries into the full key). */
currentSceneId: string | null;
/** BYO TTS config when the user supplied their own key; null for server-side TTS. */
byoTts: TtsConfig | null;
/** Cache of in-flight BYO voice provisions, keyed by character name. Reused across calls. */
byoVoiceCache: Map<string, Promise<CharacterVoice>>;
/** Audio carried in from a `.infiplot` share file (already keyed by `sceneId:beatId`). */
prebakedAudio?: Record<string, string>;
/** Progress callback (done/total). Fired after every beat (success or failure). */
onProgress?: (done: number, total: number) => void;
signal?: AbortSignal;
};
type Job = {
key: string;
scene: Session["history"][number]["scene"];
beat: Beat;
};
export async function collectBeatAudioForExport(
opts: CollectBeatAudioOptions,
): Promise<Record<string, string>> {
const out: Record<string, string> = {};
if (opts.prebakedAudio) {
for (const [k, v] of Object.entries(opts.prebakedAudio)) {
if (typeof v === "string" && v.startsWith("data:")) out[k] = v;
}
}
const jobs: Job[] = [];
for (const entry of opts.session.history) {
const scene = entry.scene;
for (const beat of scene.beats) {
if (!beat.speaker || !beat.line) continue;
const key = `${scene.id}:${beat.id}`;
if (out[key]) continue;
jobs.push({ key, scene, beat });
}
}
// Hoist current-scene blob/data URLs first so the play page's already-
// synthesized audio is reused instead of re-billed. Blob URLs are local to
// this document — convert to base64 so they survive export.
if (opts.currentSceneId) {
for (const job of jobs) {
if (job.scene.id !== opts.currentSceneId) continue;
const local = opts.beatAudioMap[job.beat.id];
if (!local) continue;
try {
out[job.key] = await urlToDataUri(local);
} catch {
// ignore — falls through to synth below
}
}
}
const remaining = jobs.filter((j) => !out[j.key]);
const total = jobs.length;
let done = jobs.length - remaining.length;
opts.onProgress?.(done, total);
const charByName = new Map(opts.session.characters.map((c) => [c.name, c]));
let cursor = 0;
async function worker(): Promise<void> {
while (cursor < remaining.length) {
if (opts.signal?.aborted) return;
const job = remaining[cursor++]!;
try {
const audio = await synthesizeBeatForExport(
job.beat,
charByName.get(job.beat.speaker!),
opts.byoTts,
opts.byoVoiceCache,
opts.signal,
);
if (audio) out[job.key] = audio;
} catch {
// silent — beat will play without voice
}
done++;
opts.onProgress?.(done, total);
}
}
const workers = Array.from(
{ length: Math.min(CONCURRENCY, Math.max(1, remaining.length)) },
() => worker(),
);
await Promise.all(workers);
return out;
}
async function synthesizeBeatForExport(
beat: Beat,
speaker: Character | undefined,
byo: TtsConfig | null,
voiceCache: Map<string, Promise<CharacterVoice>>,
signal?: AbortSignal,
): Promise<string | null> {
if (!speaker || !beat.line) return null;
if (byo) {
let voiceP = voiceCache.get(speaker.name);
if (!voiceP) {
if (speaker.voice) {
voiceP = Promise.resolve(speaker.voice);
} else if (speaker.voiceDescription) {
voiceP = provisionVoice(byo, speaker.voiceDescription, speaker.name);
} else {
return null;
}
voiceCache.set(speaker.name, voiceP);
}
let voice: CharacterVoice;
try {
voice = await voiceP;
} catch {
voiceCache.delete(speaker.name);
return null;
}
const out = await synthesize(byo, voice, beat.line, beat.lineDelivery, signal);
return `data:${out.mimeType};base64,${out.audioBase64}`;
}
if (!speaker.voice) return null;
const res = await fetch("/api/beat-audio", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
beat: { id: beat.id, line: beat.line, lineDelivery: beat.lineDelivery },
voice: speaker.voice,
}),
signal,
});
if (res.status === 204 || !res.ok) return null;
const blob = await res.blob();
return await blobToDataUri(blob);
}
async function urlToDataUri(url: string): Promise<string> {
if (url.startsWith("data:")) return url;
const res = await fetch(url);
const blob = await res.blob();
return await blobToDataUri(blob);
}
function blobToDataUri(blob: Blob): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => {
const v = reader.result;
if (typeof v === "string") resolve(v);
else reject(new Error("FileReader produced non-string result"));
};
reader.onerror = () => reject(reader.error ?? new Error("FileReader failed"));
reader.readAsDataURL(blob);
});
}
+24 -3
View File
@@ -11,7 +11,7 @@ import type {
export const STORY_SHARE_STORAGE_KEY = "infiplot:story-import";
export type StoryShareDoc = {
v: 1;
v: 1 | 2;
kind: "infiplot-story";
exportedAt: number;
current: {
@@ -19,6 +19,11 @@ export type StoryShareDoc = {
beatId?: string;
};
session: Session;
/** Pre-synthesized per-beat audio (data:audio/...;base64,...). Keyed by
* `${sceneId}:${beatId}`. v2+ only — older files just have no audio and
* play silent on replay. Embedding keeps the share file self-contained
* so a friend can hear the recorded voices without their own TTS key. */
audioByBeatId?: Record<string, string>;
};
type JsonRecord = Record<string, unknown>;
@@ -133,13 +138,16 @@ function sanitizeSessionForShare(session: Session): Session {
export function createStoryShareDoc(
session: Session,
current: { sceneIndex: number; beatId?: string },
audioByBeatId?: Record<string, string>,
): StoryShareDoc {
const hasAudio = !!audioByBeatId && Object.keys(audioByBeatId).length > 0;
return {
v: 1,
v: hasAudio ? 2 : 1,
kind: "infiplot-story",
exportedAt: Date.now(),
current,
session: sanitizeSessionForShare(session),
...(hasAudio ? { audioByBeatId } : {}),
};
}
@@ -149,7 +157,7 @@ export function storyShareFilename(doc: StoryShareDoc): string {
export function parseStoryShareDoc(value: unknown): StoryShareDoc {
if (!isRecord(value)) throw new Error("这不是有效的剧情分享文件");
if (value.kind !== "infiplot-story" || value.v !== 1) {
if (value.kind !== "infiplot-story" || (value.v !== 1 && value.v !== 2)) {
throw new Error("剧情分享文件格式不支持");
}
if (typeof value.exportedAt !== "number" || !Number.isFinite(value.exportedAt)) {
@@ -211,9 +219,22 @@ export function parseStoryShareDoc(value: unknown): StoryShareDoc {
}
}
let audioByBeatId: Record<string, string> | undefined;
if (value.audioByBeatId !== undefined) {
if (!isRecord(value.audioByBeatId)) {
throw new Error("剧情分享文件配音数据不合法");
}
const cleaned: Record<string, string> = {};
for (const [k, v] of Object.entries(value.audioByBeatId)) {
if (typeof v === "string" && v.startsWith("data:")) cleaned[k] = v;
}
if (Object.keys(cleaned).length > 0) audioByBeatId = cleaned;
}
const doc = value as StoryShareDoc;
return {
...doc,
session: sanitizeSessionForShare(doc.session),
...(audioByBeatId ? { audioByBeatId } : {}),
};
}