diff --git a/app/api/beat-audio/route.ts b/app/api/beat-audio/route.ts index b3f3fdd..6c89270 100644 --- a/app/api/beat-audio/route.ts +++ b/app/api/beat-audio/route.ts @@ -23,11 +23,18 @@ export async function POST(req: Request) { // engine's resolveVoice re-provisions on a provider mismatch. We only // require the beat text + SOMETHING to synthesize from. const VALID_TTS_PROVIDERS = ["xiaomi", "stepfun"]; + const hasInvalidVoiceProvider = + !!body.voice?.provider && !VALID_TTS_PROVIDERS.includes(body.voice.provider); const hasVoice = !!body.voice?.provider && VALID_TTS_PROVIDERS.includes(body.voice.provider); const hasFallback = !!body.stepfunVoiceId || !!body.voiceDescription; - if (!body.beat?.id || !body.beat?.line || (!hasVoice && !hasFallback)) { + if ( + !body.beat?.id || + !body.beat?.line || + hasInvalidVoiceProvider || + (!hasVoice && !hasFallback) + ) { return NextResponse.json( { error: "beat.id and beat.line are required, plus either voice.provider (xiaomi|stepfun) or stepfunVoiceId/voiceDescription" }, { status: 400 }, diff --git a/lib/engine/orchestrator.ts b/lib/engine/orchestrator.ts index fd5dddb..916a344 100644 --- a/lib/engine/orchestrator.ts +++ b/lib/engine/orchestrator.ts @@ -258,10 +258,13 @@ async function resolveVoice( ): Promise { const serverStepfun = !!config.tts && isStepfun(config.tts); const voiceProvider = req.voice?.provider; + const voiceMatchesServer = + (voiceProvider === "stepfun" && serverStepfun) || + (voiceProvider === "xiaomi" && !serverStepfun); // Fast path: the client sent a matching voice. (Also covers the legacy // xiaomi card + xiaomi server case where the 220KB was unavoidable anyway.) - if (req.voice && (voiceProvider === "stepfun") === serverStepfun) { + if (req.voice && voiceMatchesServer) { return req.voice; }