fix(tts): make stepfun preset pick case-stable and per-character
- Hash the lowercased description (matching the case-insensitive scoring) so the same archetype text picks the same preset regardless of case. - Thread the character name through provisionVoice -> stepfunProvision as the hash salt, so two characters that share archetype keywords spread across the top-N candidate presets instead of collapsing on one voice. Xiaomi path is unaffected (voicedesign mints a unique clip per call).
This commit is contained in:
+1
-1
@@ -525,7 +525,7 @@ async function resolveByoVoice(
|
|||||||
return ready;
|
return ready;
|
||||||
}
|
}
|
||||||
if (!speaker.voiceDescription) return null;
|
if (!speaker.voiceDescription) return null;
|
||||||
const p = provisionVoice(cfg, speaker.voiceDescription);
|
const p = provisionVoice(cfg, speaker.voiceDescription, speaker.name);
|
||||||
cache.set(speaker.name, p);
|
cache.set(speaker.name, p);
|
||||||
try {
|
try {
|
||||||
return await p;
|
return await p;
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ export async function provisionCharacterVoice(
|
|||||||
): Promise<CharacterVoice | undefined> {
|
): Promise<CharacterVoice | undefined> {
|
||||||
if (!config.tts) return undefined;
|
if (!config.tts) return undefined;
|
||||||
try {
|
try {
|
||||||
return await provisionVoice(config.tts, voiceDescription);
|
return await provisionVoice(config.tts, voiceDescription, charName);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
|
console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
|
||||||
|
|||||||
@@ -13,9 +13,14 @@ function isStepfun(cfg: TtsConfig): boolean {
|
|||||||
export async function provisionVoice(
|
export async function provisionVoice(
|
||||||
cfg: TtsConfig,
|
cfg: TtsConfig,
|
||||||
description: string,
|
description: string,
|
||||||
|
// Optional per-character salt (typically the character name). Only
|
||||||
|
// StepFun's preset-picker uses it — Xiaomi voicedesign mints a unique
|
||||||
|
// clip per call regardless. Threading it through keeps the API uniform
|
||||||
|
// and prevents archetype collisions on the StepFun path.
|
||||||
|
salt?: string,
|
||||||
): Promise<CharacterVoice> {
|
): Promise<CharacterVoice> {
|
||||||
return isStepfun(cfg)
|
return isStepfun(cfg)
|
||||||
? stepfunProvision(cfg, description)
|
? stepfunProvision(cfg, description, salt)
|
||||||
: xiaomiProvision(cfg, description);
|
: xiaomiProvision(cfg, description);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,19 +115,24 @@ export function pickStepfunVoiceId(description: string, salt = ""): string {
|
|||||||
|
|
||||||
// Pick from the top 3 (or fewer) deterministically by hashing the
|
// Pick from the top 3 (or fewer) deterministically by hashing the
|
||||||
// description + an optional salt (charName) so two characters that share
|
// description + an optional salt (charName) so two characters that share
|
||||||
// archetype keywords don't collapse onto the identical preset.
|
// archetype keywords don't collapse onto the identical preset. Hash the
|
||||||
|
// lowercased desc so case differences in the same description don't pick
|
||||||
|
// different presets (scoring above is already case-insensitive).
|
||||||
const top = scored.slice(0, Math.min(3, scored.length));
|
const top = scored.slice(0, Math.min(3, scored.length));
|
||||||
const idx = hashStr(description + "|" + salt) % top.length;
|
const idx = hashStr(desc + "|" + salt.toLowerCase()) % top.length;
|
||||||
return top[idx]!.v.id;
|
return top[idx]!.v.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Provision is synchronous / no network — StepFun has no voicedesign equivalent.
|
// Provision is synchronous / no network — StepFun has no voicedesign equivalent.
|
||||||
// We mirror xiaomiProvision's async signature so the router stays uniform.
|
// We mirror xiaomiProvision's async signature so the router stays uniform.
|
||||||
|
// The optional `salt` (character name) spreads two characters that share
|
||||||
|
// archetype keywords across the top-N candidate presets.
|
||||||
export async function stepfunProvision(
|
export async function stepfunProvision(
|
||||||
cfg: TtsConfig,
|
cfg: TtsConfig,
|
||||||
description: string,
|
description: string,
|
||||||
|
salt?: string,
|
||||||
): Promise<CharacterVoice> {
|
): Promise<CharacterVoice> {
|
||||||
const voiceId = pickStepfunVoiceId(description);
|
const voiceId = pickStepfunVoiceId(description, salt);
|
||||||
return {
|
return {
|
||||||
provider: "stepfun",
|
provider: "stepfun",
|
||||||
voiceId,
|
voiceId,
|
||||||
|
|||||||
Reference in New Issue
Block a user