From 65b7daff0bb5138558d69befb5abaa73e27bed6a Mon Sep 17 00:00:00 2001 From: yuanzonghao Date: Mon, 15 Jun 2026 14:33:46 +0800 Subject: [PATCH] fix(beat-audio): harden voice-provider validation and resolveVoice fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR-agent review findings: - resolveVoice fast path: replace ambiguous boolean comparison (voiceProvider === "stepfun") === serverStepfun with explicit per-provider equality checks. Prevents an undefined or unknown provider from matching the non-stepfun (xiaomi) branch by accident. - /api/beat-audio route: reject requests whose voice.provider is present but not in the VALID_TTS_PROVIDERS whitelist (e.g. "azure"). Previously such a request would pass validation when fallback fields were also present, and resolveVoice might use the invalid voice directly instead of falling back to reprovision — producing a silent beat instead of a voiced one. --- app/api/beat-audio/route.ts | 9 ++++++++- lib/engine/orchestrator.ts | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/app/api/beat-audio/route.ts b/app/api/beat-audio/route.ts index b3f3fdd..6c89270 100644 --- a/app/api/beat-audio/route.ts +++ b/app/api/beat-audio/route.ts @@ -23,11 +23,18 @@ export async function POST(req: Request) { // engine's resolveVoice re-provisions on a provider mismatch. We only // require the beat text + SOMETHING to synthesize from. const VALID_TTS_PROVIDERS = ["xiaomi", "stepfun"]; + const hasInvalidVoiceProvider = + !!body.voice?.provider && !VALID_TTS_PROVIDERS.includes(body.voice.provider); const hasVoice = !!body.voice?.provider && VALID_TTS_PROVIDERS.includes(body.voice.provider); const hasFallback = !!body.stepfunVoiceId || !!body.voiceDescription; - if (!body.beat?.id || !body.beat?.line || (!hasVoice && !hasFallback)) { + if ( + !body.beat?.id || + !body.beat?.line || + hasInvalidVoiceProvider || + (!hasVoice && !hasFallback) + ) { return NextResponse.json( { error: "beat.id and beat.line are required, plus either voice.provider (xiaomi|stepfun) or stepfunVoiceId/voiceDescription" }, { status: 400 }, diff --git a/lib/engine/orchestrator.ts b/lib/engine/orchestrator.ts index fd5dddb..916a344 100644 --- a/lib/engine/orchestrator.ts +++ b/lib/engine/orchestrator.ts @@ -258,10 +258,13 @@ async function resolveVoice( ): Promise { const serverStepfun = !!config.tts && isStepfun(config.tts); const voiceProvider = req.voice?.provider; + const voiceMatchesServer = + (voiceProvider === "stepfun" && serverStepfun) || + (voiceProvider === "xiaomi" && !serverStepfun); // Fast path: the client sent a matching voice. (Also covers the legacy // xiaomi card + xiaomi server case where the 220KB was unavoidable anyway.) - if (req.voice && (voiceProvider === "stepfun") === serverStepfun) { + if (req.voice && voiceMatchesServer) { return req.voice; }