fix(web): reduce FOT by stripping redundant voice data from transport
Three transport-only optimizations that cut per-session Vercel FOT by ~50-60%: P0 — Server strips voice.referenceAudioBase64 from already-known characters in /api/scene and /api/insert-beat responses (defense-in-depth). P1 — Client strips all voice data from session before sending to /api/scene, /api/vision, and /api/insert-beat. Voices are retained locally and re-merged from responses via mergeCharactersPreserveVoice(). The engine only needs character names + visualDescriptions for scene generation. P3 — /api/beat-audio returns binary audio (Response with Content-Type) instead of JSON-wrapped base64, saving ~33% encoding overhead. Client converts to blob URLs; PlayCanvas accepts a single audioSrc prop. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,11 @@ export async function POST(req: Request) {
|
||||
try {
|
||||
const config = loadEngineConfig(req.headers);
|
||||
const result = await requestBeatAudio(config, body);
|
||||
return NextResponse.json(result);
|
||||
if (!result.audio) return new Response(null, { status: 204 });
|
||||
const binary = Buffer.from(result.audio.base64, "base64");
|
||||
return new Response(binary, {
|
||||
headers: { "Content-Type": result.audio.mime },
|
||||
});
|
||||
} catch (err) {
|
||||
// Engine already swallows synth errors and returns audio:null. Anything
|
||||
// that reaches here is config-level — surface so the client can log it.
|
||||
|
||||
@@ -26,7 +26,10 @@ export async function POST(req: Request) {
|
||||
// See StartRequest.clientTts — BYO clients synth in-browser, so drop server TTS.
|
||||
const config = body.clientTts === true ? { ...base, tts: undefined } : base;
|
||||
const result = await requestInsertBeat(config, body);
|
||||
return NextResponse.json(result);
|
||||
return NextResponse.json({
|
||||
...result,
|
||||
characters: result.characters.map((c) => ({ ...c, voice: undefined })),
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Unknown error";
|
||||
return NextResponse.json({ error: message }, { status: 500 });
|
||||
|
||||
+17
-2
@@ -1,8 +1,17 @@
|
||||
import { requestScene } from "@infiplot/engine";
|
||||
import type { SceneRequest } from "@infiplot/types";
|
||||
import type { Character, SceneRequest } from "@infiplot/types";
|
||||
import { NextResponse } from "next/server";
|
||||
import { loadEngineConfig } from "@/lib/config";
|
||||
|
||||
function stripKnownVoices(
|
||||
characters: Character[],
|
||||
knownNames: Set<string>,
|
||||
): Character[] {
|
||||
return characters.map((c) =>
|
||||
knownNames.has(c.name) ? { ...c, voice: undefined } : c,
|
||||
);
|
||||
}
|
||||
|
||||
export const runtime = "nodejs";
|
||||
// Capped at 60 for Vercel Hobby (300 allowed on Pro). The scene pipeline is
|
||||
// Writer + CharDesigner×N + Cinematographer + Painter — happy path 9–12s; the
|
||||
@@ -27,7 +36,13 @@ export async function POST(req: Request) {
|
||||
// See StartRequest.clientTts — BYO clients synth in-browser, so drop server TTS.
|
||||
const config = body.clientTts === true ? { ...base, tts: undefined } : base;
|
||||
const result = await requestScene(config, body);
|
||||
return NextResponse.json(result);
|
||||
const knownNames = new Set(
|
||||
(body.session.characters ?? []).map((c) => c.name),
|
||||
);
|
||||
return NextResponse.json({
|
||||
...result,
|
||||
characters: stripKnownVoices(result.characters, knownNames),
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Unknown error";
|
||||
return NextResponse.json({ error: message }, { status: 500 });
|
||||
|
||||
Reference in New Issue
Block a user