04f22249c9
- Hash the lowercased description (matching the case-insensitive scoring) so the same archetype text picks the same preset regardless of case. - Thread the character name through provisionVoice -> stepfunProvision as the hash salt, so two characters that share archetype keywords spread across the top-N candidate presets instead of collapsing on one voice. Xiaomi path is unaffected (voicedesign mints a unique clip per call).
44 lines
1.8 KiB
TypeScript
44 lines
1.8 KiB
TypeScript
import type { CharacterVoice, TtsConfig } from "@infiplot/types";
|
|
import { stepfunProvision, stepfunSynthesize } from "./stepfun";
|
|
import { xiaomiProvision, xiaomiSynthesize } from "./xiaomi";
|
|
|
|
// Provider auto-detection by base URL — mirrors the image client convention
|
|
// of inferring Runware from *.runware.ai and falling back otherwise. Keeps
|
|
// the BYO client flow unchanged: TTS_PROVIDER env var stays unused, and
|
|
// browser-side keys (Xiaomi only today) keep working through the xiaomi path.
|
|
function isStepfun(cfg: TtsConfig): boolean {
|
|
return /(^|[./])stepfun\.com\b/i.test(cfg.baseUrl);
|
|
}
|
|
|
|
export async function provisionVoice(
|
|
cfg: TtsConfig,
|
|
description: string,
|
|
// Optional per-character salt (typically the character name). Only
|
|
// StepFun's preset-picker uses it — Xiaomi voicedesign mints a unique
|
|
// clip per call regardless. Threading it through keeps the API uniform
|
|
// and prevents archetype collisions on the StepFun path.
|
|
salt?: string,
|
|
): Promise<CharacterVoice> {
|
|
return isStepfun(cfg)
|
|
? stepfunProvision(cfg, description, salt)
|
|
: xiaomiProvision(cfg, description);
|
|
}
|
|
|
|
// Dispatch by the voice's own provider tag, not by the current config. A
|
|
// session can outlive a provider switch (e.g. .env.local flip mid-game), and
|
|
// each voice must be synthesized via the protocol that minted it. The cfg
|
|
// still needs to point at the matching provider's endpoint; mismatch surfaces
|
|
// as a transparent network error, which `synthesizeBeat` already swallows.
|
|
export async function synthesize(
|
|
cfg: TtsConfig,
|
|
voice: CharacterVoice,
|
|
text: string,
|
|
delivery?: string,
|
|
signal?: AbortSignal,
|
|
): Promise<{ audioBase64: string; mimeType: string }> {
|
|
if (voice.provider === "stepfun") {
|
|
return stepfunSynthesize(cfg, voice, text, delivery, signal);
|
|
}
|
|
return xiaomiSynthesize(cfg, voice, text, delivery, signal);
|
|
}
|