Files
infiplot-web/app/api/start/route.ts
T
yuanzonghao b0b2e922d3 feat(web): optional bring-your-own Xiaomi MiMo TTS key (browser-side synthesis)
Public users share one server TTS key, so Xiaomi's per-key RPM/TPM limits
cause silent playback under concurrency. This adds an OPTIONAL path: a user
can store their own Xiaomi MiMo key in the browser and synthesize voice
client-side against Xiaomi's CORS-open endpoints. The key lives only in
localStorage and is never sent to or logged by our server; the shared server
key still serves everyone who does not opt in.

- components/TtsKeyModal.tsx: shared key modal (key-family + region picker),
  reused by both the home and play pages
- app/play/page.tsx: silence nudge moved beside the mute toggle; modal opens
  in place instead of redirecting to the home page
- app/page.tsx: home page consumes the shared modal + readStoredTtsConfig
- lib/clientTtsConfig.ts, lib/ttsPresets.ts: browser config + region presets
- app/api/{start,scene,insert-beat}: thread per-request voice; lib/types update
- docs/xiaomi-tts-key.md + README note

Verified with tsc --noEmit (exit 0).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 16:58:55 +08:00

56 lines
2.0 KiB
TypeScript

import { startSession } from "@infiplot/engine";
import type { StartRequest } from "@infiplot/types";
import { NextResponse } from "next/server";
import { loadEngineConfig } from "@/lib/config";
export const runtime = "nodejs";
export const maxDuration = 60;
// Matches /api/vision and /api/parse-style-image — the user's resized 512px
// webp is ~30-80 KB; this caps pathological direct-API payloads (which would
// then ride along in every subsequent /api/scene request body via session).
const MAX_STYLE_REF_BYTES = 3 * 1024 * 1024;
export async function POST(req: Request) {
let body: StartRequest;
try {
body = (await req.json()) as StartRequest;
} catch {
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
}
if (!body.worldSetting?.trim() || !body.styleGuide?.trim()) {
return NextResponse.json(
{ error: "worldSetting and styleGuide are required" },
{ status: 400 },
);
}
if (typeof body.styleReferenceImage === "string") {
if (!body.styleReferenceImage.startsWith("data:image/")) {
return NextResponse.json(
{ error: "styleReferenceImage must be a data:image/... base64 URL" },
{ status: 400 },
);
}
if (body.styleReferenceImage.length > MAX_STYLE_REF_BYTES) {
return NextResponse.json(
{ error: `styleReferenceImage exceeds ${MAX_STYLE_REF_BYTES} bytes` },
{ status: 413 },
);
}
}
try {
const base = loadEngineConfig(req.headers);
// BYO key: the browser provisions + synths voices directly against Xiaomi
// (key never reaches us), so strip server-side TTS so the engine skips all
// provisioning + synth. See StartRequest.clientTts.
const config = body.clientTts ? { ...base, tts: undefined } : base;
const result = await startSession(config, body);
return NextResponse.json(result);
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
return NextResponse.json({ error: message }, { status: 500 });
}
}