fix(tts): harden StepFun provider integration
- Validate voice.provider against known whitelist (xiaomi|stepfun) in beat-audio route to return a clear 400 instead of falling through - Move single-char pronouns (他/她) to weak-signal fallback in detectGender to avoid false positives on compounds like 其他 - Update .env.example with StepFun configuration examples Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+14
-4
@@ -66,10 +66,20 @@ VISION_MODEL=mimo-v2.5
|
||||
# google → VISION_BASE_URL=https://generativelanguage.googleapis.com VISION_MODEL=gemini-3.5-flash
|
||||
# VISION_PROVIDER=openai_compatible
|
||||
|
||||
# ---- 4. TTS · Xiaomi MiMo (optional — leave blank to disable) ------
|
||||
# Per-character voice design → clone, with per-line delivery direction.
|
||||
# Voice identity = the reference audio kept in the session (no server expiry).
|
||||
# The adapter appends -voicedesign / -voiceclone to TTS_SPEECH_MODEL.
|
||||
# ---- 4. TTS (optional — leave blank to disable) --------------------
|
||||
# Provider is auto-detected from TTS_BASE_URL host:
|
||||
# *stepfun.com → StepFun (preset voices, keyword-scored selection)
|
||||
# otherwise → Xiaomi MiMo (voicedesign + voiceclone)
|
||||
#
|
||||
# Xiaomi MiMo — per-character voice design → clone, with per-line delivery.
|
||||
# TTS_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
# TTS_API_KEY=tp-xxx
|
||||
# TTS_SPEECH_MODEL=mimo-v2.5-tts
|
||||
#
|
||||
# StepFun — 32 preset voices, auto-selected by gender + age + tone scoring.
|
||||
# TTS_BASE_URL=https://api.stepfun.com/v1
|
||||
# TTS_API_KEY=sk-xxx
|
||||
# TTS_SPEECH_MODEL=step-tts-mini # or step-tts-2 / stepaudio-2.5-tts
|
||||
TTS_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
TTS_API_KEY=tp-xxx
|
||||
TTS_SPEECH_MODEL=mimo-v2.5-tts
|
||||
|
||||
@@ -16,9 +16,15 @@ export async function POST(req: Request) {
|
||||
// Accept either provider's voice shape — xiaomi carries referenceAudioBase64,
|
||||
// stepfun carries voiceId. We only check the discriminator + the line text;
|
||||
// shape-specific validation lives in each provider's synth function.
|
||||
if (!body.beat?.id || !body.beat?.line || !body.voice?.provider) {
|
||||
const VALID_TTS_PROVIDERS = ["xiaomi", "stepfun"];
|
||||
if (
|
||||
!body.beat?.id ||
|
||||
!body.beat?.line ||
|
||||
!body.voice?.provider ||
|
||||
!VALID_TTS_PROVIDERS.includes(body.voice.provider)
|
||||
) {
|
||||
return NextResponse.json(
|
||||
{ error: "beat.id, beat.line and voice.provider are required" },
|
||||
{ error: "beat.id, beat.line and voice.provider (xiaomi|stepfun) are required" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
@@ -68,14 +68,16 @@ function hashStr(s: string): number {
|
||||
}
|
||||
|
||||
function detectGender(desc: string): "male" | "female" {
|
||||
// Female signals (broader cast — galgame skews toward female NPCs).
|
||||
if (/女性|女声|少女|姐姐|妹妹|熟女|御姐|阿姨|奶奶|女孩|姑娘|大妈|女子|女生|女士|她|小姐/.test(desc)) {
|
||||
if (/女性|女声|少女|姐姐|妹妹|熟女|御姐|阿姨|奶奶|女孩|姑娘|大妈|女子|女生|女士|小姐/.test(desc)) {
|
||||
return "female";
|
||||
}
|
||||
if (/男性|男声|少年|青年|大叔|哥哥|弟弟|男人|男孩|大爷|爷爷|男子|男生|先生|他|公子|师傅/.test(desc)) {
|
||||
if (/男性|男声|少年|青年|大叔|哥哥|弟弟|男人|男孩|大爷|爷爷|男子|男生|先生|公子|师傅/.test(desc)) {
|
||||
return "male";
|
||||
}
|
||||
// No strong signal: default female (matches the catalog's center of mass).
|
||||
// Weak signals: single-char pronouns checked last to avoid false positives
|
||||
// on compound words like "其他" (other) or "她们" (they-fem).
|
||||
if (/她/.test(desc)) return "female";
|
||||
if (/他/.test(desc)) return "male";
|
||||
return "female";
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user