Files
infiplot-web/app/api/scene/route.ts
T
yuanzonghao e88e988de3 fix(web): reduce FOT by stripping redundant voice data from transport
Three transport-only optimizations that cut per-session Vercel FOT by ~50-60%:

P0 — Server strips voice.referenceAudioBase64 from already-known characters
in /api/scene and /api/insert-beat responses (defense-in-depth).

P1 — Client strips all voice data from session before sending to
/api/scene, /api/vision, and /api/insert-beat. Voices are retained locally
and re-merged from responses via mergeCharactersPreserveVoice(). The engine
only needs character names + visualDescriptions for scene generation.

P3 — /api/beat-audio returns binary audio (Response with Content-Type)
instead of JSON-wrapped base64, saving ~33% encoding overhead. Client
converts to blob URLs; PlayCanvas accepts a single audioSrc prop.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-05 00:24:34 +08:00

51 lines
1.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { requestScene } from "@infiplot/engine";
import type { Character, SceneRequest } from "@infiplot/types";
import { NextResponse } from "next/server";
import { loadEngineConfig } from "@/lib/config";
function stripKnownVoices(
characters: Character[],
knownNames: Set<string>,
): Character[] {
return characters.map((c) =>
knownNames.has(c.name) ? { ...c, voice: undefined } : c,
);
}
export const runtime = "nodejs";
// Capped at 60 for Vercel Hobby (300 allowed on Pro). The scene pipeline is
// Writer + CharDesigner×N + Cinematographer + Painter — happy path 912s; the
// tail (cold provider, multiple new characters) can push 3045s, so 60 is a
// reasonable headroom on Hobby.
export const maxDuration = 60;
export async function POST(req: Request) {
let body: SceneRequest;
try {
body = (await req.json()) as SceneRequest;
} catch {
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
}
if (!body.session) {
return NextResponse.json({ error: "session is required" }, { status: 400 });
}
try {
const base = loadEngineConfig(req.headers);
// See StartRequest.clientTts — BYO clients synth in-browser, so drop server TTS.
const config = body.clientTts === true ? { ...base, tts: undefined } : base;
const result = await requestScene(config, body);
const knownNames = new Set(
(body.session.characters ?? []).map((c) => c.name),
);
return NextResponse.json({
...result,
characters: stripKnownVoices(result.characters, knownNames),
});
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
return NextResponse.json({ error: message }, { status: 500 });
}
}