fix(beat-audio): harden voice-provider validation and resolveVoice fast path

Address PR-agent review findings:

- resolveVoice fast path: replace ambiguous boolean comparison
  (voiceProvider === "stepfun") === serverStepfun with explicit
  per-provider equality checks. Prevents an undefined or unknown
  provider from matching the non-stepfun (xiaomi) branch by accident.

- /api/beat-audio route: reject requests whose voice.provider is present
  but not in the VALID_TTS_PROVIDERS whitelist (e.g. "azure"). Previously
  such a request would pass validation when fallback fields were also
  present, and resolveVoice might use the invalid voice directly instead
  of falling back to reprovision — producing a silent beat instead of a
  voiced one.
This commit is contained in:
yuanzonghao
2026-06-15 14:33:46 +08:00
parent 0166c5e0a9
commit 65b7daff0b
2 changed files with 12 additions and 2 deletions
+8 -1
View File
@@ -23,11 +23,18 @@ export async function POST(req: Request) {
// engine's resolveVoice re-provisions on a provider mismatch. We only
// require the beat text + SOMETHING to synthesize from.
const VALID_TTS_PROVIDERS = ["xiaomi", "stepfun"];
const hasInvalidVoiceProvider =
!!body.voice?.provider && !VALID_TTS_PROVIDERS.includes(body.voice.provider);
const hasVoice =
!!body.voice?.provider && VALID_TTS_PROVIDERS.includes(body.voice.provider);
const hasFallback =
!!body.stepfunVoiceId || !!body.voiceDescription;
if (!body.beat?.id || !body.beat?.line || (!hasVoice && !hasFallback)) {
if (
!body.beat?.id ||
!body.beat?.line ||
hasInvalidVoiceProvider ||
(!hasVoice && !hasFallback)
) {
return NextResponse.json(
{ error: "beat.id and beat.line are required, plus either voice.provider (xiaomi|stepfun) or stepfunVoiceId/voiceDescription" },
{ status: 400 },
+4 -1
View File
@@ -258,10 +258,13 @@ async function resolveVoice(
): Promise<CharacterVoice | undefined> {
const serverStepfun = !!config.tts && isStepfun(config.tts);
const voiceProvider = req.voice?.provider;
const voiceMatchesServer =
(voiceProvider === "stepfun" && serverStepfun) ||
(voiceProvider === "xiaomi" && !serverStepfun);
// Fast path: the client sent a matching voice. (Also covers the legacy
// xiaomi card + xiaomi server case where the 220KB was unavoidable anyway.)
if (req.voice && (voiceProvider === "stepfun") === serverStepfun) {
if (req.voice && voiceMatchesServer) {
return req.voice;
}