fix(tts): make stepfun preset pick case-stable and per-character

- Hash the lowercased description (matching the case-insensitive scoring)
  so the same archetype text picks the same preset regardless of case.
- Thread the character name through provisionVoice -> stepfunProvision as
  the hash salt, so two characters that share archetype keywords spread
  across the top-N candidate presets instead of collapsing on one voice.

Xiaomi path is unaffected (voicedesign mints a unique clip per call).
This commit is contained in:
DESKTOP-I1T6TF3\Q
2026-06-09 09:14:44 +08:00
parent 19bbee16fe
commit 04f22249c9
4 changed files with 16 additions and 6 deletions
+1 -1
View File
@@ -525,7 +525,7 @@ async function resolveByoVoice(
return ready; return ready;
} }
if (!speaker.voiceDescription) return null; if (!speaker.voiceDescription) return null;
const p = provisionVoice(cfg, speaker.voiceDescription); const p = provisionVoice(cfg, speaker.voiceDescription, speaker.name);
cache.set(speaker.name, p); cache.set(speaker.name, p);
try { try {
return await p; return await p;
+1 -1
View File
@@ -103,7 +103,7 @@ export async function provisionCharacterVoice(
): Promise<CharacterVoice | undefined> { ): Promise<CharacterVoice | undefined> {
if (!config.tts) return undefined; if (!config.tts) return undefined;
try { try {
return await provisionVoice(config.tts, voiceDescription); return await provisionVoice(config.tts, voiceDescription, charName);
} catch (err) { } catch (err) {
const msg = err instanceof Error ? err.message : String(err); const msg = err instanceof Error ? err.message : String(err);
console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`); console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
+6 -1
View File
@@ -13,9 +13,14 @@ function isStepfun(cfg: TtsConfig): boolean {
export async function provisionVoice( export async function provisionVoice(
cfg: TtsConfig, cfg: TtsConfig,
description: string, description: string,
// Optional per-character salt (typically the character name). Only
// StepFun's preset-picker uses it — Xiaomi voicedesign mints a unique
// clip per call regardless. Threading it through keeps the API uniform
// and prevents archetype collisions on the StepFun path.
salt?: string,
): Promise<CharacterVoice> { ): Promise<CharacterVoice> {
return isStepfun(cfg) return isStepfun(cfg)
? stepfunProvision(cfg, description) ? stepfunProvision(cfg, description, salt)
: xiaomiProvision(cfg, description); : xiaomiProvision(cfg, description);
} }
+8 -3
View File
@@ -115,19 +115,24 @@ export function pickStepfunVoiceId(description: string, salt = ""): string {
// Pick from the top 3 (or fewer) deterministically by hashing the // Pick from the top 3 (or fewer) deterministically by hashing the
// description + an optional salt (charName) so two characters that share // description + an optional salt (charName) so two characters that share
// archetype keywords don't collapse onto the identical preset. // archetype keywords don't collapse onto the identical preset. Hash the
// lowercased desc so case differences in the same description don't pick
// different presets (scoring above is already case-insensitive).
const top = scored.slice(0, Math.min(3, scored.length)); const top = scored.slice(0, Math.min(3, scored.length));
const idx = hashStr(description + "|" + salt) % top.length; const idx = hashStr(desc + "|" + salt.toLowerCase()) % top.length;
return top[idx]!.v.id; return top[idx]!.v.id;
} }
// Provision is synchronous / no network — StepFun has no voicedesign equivalent. // Provision is synchronous / no network — StepFun has no voicedesign equivalent.
// We mirror xiaomiProvision's async signature so the router stays uniform. // We mirror xiaomiProvision's async signature so the router stays uniform.
// The optional `salt` (character name) spreads two characters that share
// archetype keywords across the top-N candidate presets.
export async function stepfunProvision( export async function stepfunProvision(
cfg: TtsConfig, cfg: TtsConfig,
description: string, description: string,
salt?: string,
): Promise<CharacterVoice> { ): Promise<CharacterVoice> {
const voiceId = pickStepfunVoiceId(description); const voiceId = pickStepfunVoiceId(description, salt);
return { return {
provider: "stepfun", provider: "stepfun",
voiceId, voiceId,