feat(web): embed beat audio into gallery and infiplot exports
Walk every speaking beat at export time, reuse current scene's beatAudioMap, and synth the rest via BYO TTS or /api/beat-audio with concurrency 4. Show a progress toast on the play page while collecting. Gallery export keeps audio in a sidecar localStorage key so the first paint is not blocked by JSON.parse-ing several MB of base64; the gallery lazy-loads it after the first scene image, then plays per-beat audio with a mute toggle persisted to localStorage. .infiplot share files embed audioByBeatId in the doc itself (v2); on import the data URIs survive scene swaps and feed back into the per-beat audio map so replayers hear the original voices for free. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -53,7 +53,7 @@ type AnalyticsEventData = {
|
||||
tts_toggle: { muted: boolean };
|
||||
fullscreen_toggle: { on: boolean };
|
||||
play_heartbeat: never;
|
||||
gallery_export: { scene_count: number };
|
||||
gallery_export: { scene_count: number; audio_count: number };
|
||||
};
|
||||
|
||||
export type AnalyticsEvent = keyof AnalyticsEventData;
|
||||
|
||||
@@ -0,0 +1,199 @@
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Audio collection for the gallery / .infiplot share exports.
|
||||
//
|
||||
// Walks every speaking beat across `session.history` and produces a
|
||||
// Record keyed by `${sceneId}:${beatId}` whose values are inline
|
||||
// data: URIs (base64). Data URIs are the only audio form that survives
|
||||
// transport through localStorage, AES-GCM ciphertext, and a fresh
|
||||
// browser tab — blob: URLs from /api/beat-audio are tied to the document
|
||||
// that created them.
|
||||
//
|
||||
// Three sources, in priority order:
|
||||
// 1. prebaked — audio that came in through a .infiplot share file.
|
||||
// Already a data URI, so just copied through.
|
||||
// 2. current beatAudioMap — the play page's per-beat audio for the
|
||||
// scene the player is on right now. Blob URLs get
|
||||
// converted to data URIs; data URIs pass through.
|
||||
// 3. fresh synth — BYO client TTS (browser-direct Xiaomi/StepFun) when
|
||||
// a key is configured, otherwise /api/beat-audio.
|
||||
//
|
||||
// Concurrency 4 to keep TTS providers happy when a long session has
|
||||
// dozens of speaking beats. Errors are silently skipped — a missing beat
|
||||
// just plays without voice; we never block the export on a TTS hiccup.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
import { provisionVoice, synthesize } from "@infiplot/tts-client";
|
||||
import type {
|
||||
Beat,
|
||||
Character,
|
||||
CharacterVoice,
|
||||
Session,
|
||||
TtsConfig,
|
||||
} from "@infiplot/types";
|
||||
|
||||
const CONCURRENCY = 4;
|
||||
|
||||
export type CollectBeatAudioOptions = {
|
||||
session: Session;
|
||||
/** Current-scene audio already loaded by the play page (keyed by bare beat id). */
|
||||
beatAudioMap: Record<string, string>;
|
||||
/** Scene id `beatAudioMap` belongs to (so we can promote its entries into the full key). */
|
||||
currentSceneId: string | null;
|
||||
/** BYO TTS config when the user supplied their own key; null for server-side TTS. */
|
||||
byoTts: TtsConfig | null;
|
||||
/** Cache of in-flight BYO voice provisions, keyed by character name. Reused across calls. */
|
||||
byoVoiceCache: Map<string, Promise<CharacterVoice>>;
|
||||
/** Audio carried in from a `.infiplot` share file (already keyed by `sceneId:beatId`). */
|
||||
prebakedAudio?: Record<string, string>;
|
||||
/** Progress callback (done/total). Fired after every beat (success or failure). */
|
||||
onProgress?: (done: number, total: number) => void;
|
||||
signal?: AbortSignal;
|
||||
};
|
||||
|
||||
type Job = {
|
||||
key: string;
|
||||
scene: Session["history"][number]["scene"];
|
||||
beat: Beat;
|
||||
};
|
||||
|
||||
export async function collectBeatAudioForExport(
|
||||
opts: CollectBeatAudioOptions,
|
||||
): Promise<Record<string, string>> {
|
||||
const out: Record<string, string> = {};
|
||||
|
||||
if (opts.prebakedAudio) {
|
||||
for (const [k, v] of Object.entries(opts.prebakedAudio)) {
|
||||
if (typeof v === "string" && v.startsWith("data:")) out[k] = v;
|
||||
}
|
||||
}
|
||||
|
||||
const jobs: Job[] = [];
|
||||
for (const entry of opts.session.history) {
|
||||
const scene = entry.scene;
|
||||
for (const beat of scene.beats) {
|
||||
if (!beat.speaker || !beat.line) continue;
|
||||
const key = `${scene.id}:${beat.id}`;
|
||||
if (out[key]) continue;
|
||||
jobs.push({ key, scene, beat });
|
||||
}
|
||||
}
|
||||
|
||||
// Hoist current-scene blob/data URLs first so the play page's already-
|
||||
// synthesized audio is reused instead of re-billed. Blob URLs are local to
|
||||
// this document — convert to base64 so they survive export.
|
||||
if (opts.currentSceneId) {
|
||||
for (const job of jobs) {
|
||||
if (job.scene.id !== opts.currentSceneId) continue;
|
||||
const local = opts.beatAudioMap[job.beat.id];
|
||||
if (!local) continue;
|
||||
try {
|
||||
out[job.key] = await urlToDataUri(local);
|
||||
} catch {
|
||||
// ignore — falls through to synth below
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const remaining = jobs.filter((j) => !out[j.key]);
|
||||
const total = jobs.length;
|
||||
let done = jobs.length - remaining.length;
|
||||
opts.onProgress?.(done, total);
|
||||
|
||||
const charByName = new Map(opts.session.characters.map((c) => [c.name, c]));
|
||||
|
||||
let cursor = 0;
|
||||
async function worker(): Promise<void> {
|
||||
while (cursor < remaining.length) {
|
||||
if (opts.signal?.aborted) return;
|
||||
const job = remaining[cursor++]!;
|
||||
try {
|
||||
const audio = await synthesizeBeatForExport(
|
||||
job.beat,
|
||||
charByName.get(job.beat.speaker!),
|
||||
opts.byoTts,
|
||||
opts.byoVoiceCache,
|
||||
opts.signal,
|
||||
);
|
||||
if (audio) out[job.key] = audio;
|
||||
} catch {
|
||||
// silent — beat will play without voice
|
||||
}
|
||||
done++;
|
||||
opts.onProgress?.(done, total);
|
||||
}
|
||||
}
|
||||
|
||||
const workers = Array.from(
|
||||
{ length: Math.min(CONCURRENCY, Math.max(1, remaining.length)) },
|
||||
() => worker(),
|
||||
);
|
||||
await Promise.all(workers);
|
||||
return out;
|
||||
}
|
||||
|
||||
async function synthesizeBeatForExport(
|
||||
beat: Beat,
|
||||
speaker: Character | undefined,
|
||||
byo: TtsConfig | null,
|
||||
voiceCache: Map<string, Promise<CharacterVoice>>,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string | null> {
|
||||
if (!speaker || !beat.line) return null;
|
||||
|
||||
if (byo) {
|
||||
let voiceP = voiceCache.get(speaker.name);
|
||||
if (!voiceP) {
|
||||
if (speaker.voice) {
|
||||
voiceP = Promise.resolve(speaker.voice);
|
||||
} else if (speaker.voiceDescription) {
|
||||
voiceP = provisionVoice(byo, speaker.voiceDescription, speaker.name);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
voiceCache.set(speaker.name, voiceP);
|
||||
}
|
||||
let voice: CharacterVoice;
|
||||
try {
|
||||
voice = await voiceP;
|
||||
} catch {
|
||||
voiceCache.delete(speaker.name);
|
||||
return null;
|
||||
}
|
||||
const out = await synthesize(byo, voice, beat.line, beat.lineDelivery, signal);
|
||||
return `data:${out.mimeType};base64,${out.audioBase64}`;
|
||||
}
|
||||
|
||||
if (!speaker.voice) return null;
|
||||
const res = await fetch("/api/beat-audio", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
beat: { id: beat.id, line: beat.line, lineDelivery: beat.lineDelivery },
|
||||
voice: speaker.voice,
|
||||
}),
|
||||
signal,
|
||||
});
|
||||
if (res.status === 204 || !res.ok) return null;
|
||||
const blob = await res.blob();
|
||||
return await blobToDataUri(blob);
|
||||
}
|
||||
|
||||
async function urlToDataUri(url: string): Promise<string> {
|
||||
if (url.startsWith("data:")) return url;
|
||||
const res = await fetch(url);
|
||||
const blob = await res.blob();
|
||||
return await blobToDataUri(blob);
|
||||
}
|
||||
|
||||
function blobToDataUri(blob: Blob): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => {
|
||||
const v = reader.result;
|
||||
if (typeof v === "string") resolve(v);
|
||||
else reject(new Error("FileReader produced non-string result"));
|
||||
};
|
||||
reader.onerror = () => reject(reader.error ?? new Error("FileReader failed"));
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
+24
-3
@@ -11,7 +11,7 @@ import type {
|
||||
export const STORY_SHARE_STORAGE_KEY = "infiplot:story-import";
|
||||
|
||||
export type StoryShareDoc = {
|
||||
v: 1;
|
||||
v: 1 | 2;
|
||||
kind: "infiplot-story";
|
||||
exportedAt: number;
|
||||
current: {
|
||||
@@ -19,6 +19,11 @@ export type StoryShareDoc = {
|
||||
beatId?: string;
|
||||
};
|
||||
session: Session;
|
||||
/** Pre-synthesized per-beat audio (data:audio/...;base64,...). Keyed by
|
||||
* `${sceneId}:${beatId}`. v2+ only — older files just have no audio and
|
||||
* play silent on replay. Embedding keeps the share file self-contained
|
||||
* so a friend can hear the recorded voices without their own TTS key. */
|
||||
audioByBeatId?: Record<string, string>;
|
||||
};
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
@@ -133,13 +138,16 @@ function sanitizeSessionForShare(session: Session): Session {
|
||||
export function createStoryShareDoc(
|
||||
session: Session,
|
||||
current: { sceneIndex: number; beatId?: string },
|
||||
audioByBeatId?: Record<string, string>,
|
||||
): StoryShareDoc {
|
||||
const hasAudio = !!audioByBeatId && Object.keys(audioByBeatId).length > 0;
|
||||
return {
|
||||
v: 1,
|
||||
v: hasAudio ? 2 : 1,
|
||||
kind: "infiplot-story",
|
||||
exportedAt: Date.now(),
|
||||
current,
|
||||
session: sanitizeSessionForShare(session),
|
||||
...(hasAudio ? { audioByBeatId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -149,7 +157,7 @@ export function storyShareFilename(doc: StoryShareDoc): string {
|
||||
|
||||
export function parseStoryShareDoc(value: unknown): StoryShareDoc {
|
||||
if (!isRecord(value)) throw new Error("这不是有效的剧情分享文件");
|
||||
if (value.kind !== "infiplot-story" || value.v !== 1) {
|
||||
if (value.kind !== "infiplot-story" || (value.v !== 1 && value.v !== 2)) {
|
||||
throw new Error("剧情分享文件格式不支持");
|
||||
}
|
||||
if (typeof value.exportedAt !== "number" || !Number.isFinite(value.exportedAt)) {
|
||||
@@ -211,9 +219,22 @@ export function parseStoryShareDoc(value: unknown): StoryShareDoc {
|
||||
}
|
||||
}
|
||||
|
||||
let audioByBeatId: Record<string, string> | undefined;
|
||||
if (value.audioByBeatId !== undefined) {
|
||||
if (!isRecord(value.audioByBeatId)) {
|
||||
throw new Error("剧情分享文件配音数据不合法");
|
||||
}
|
||||
const cleaned: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(value.audioByBeatId)) {
|
||||
if (typeof v === "string" && v.startsWith("data:")) cleaned[k] = v;
|
||||
}
|
||||
if (Object.keys(cleaned).length > 0) audioByBeatId = cleaned;
|
||||
}
|
||||
|
||||
const doc = value as StoryShareDoc;
|
||||
return {
|
||||
...doc,
|
||||
session: sanitizeSessionForShare(doc.session),
|
||||
...(audioByBeatId ? { audioByBeatId } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user