Files
infiplot-web/lib/tts-client/index.ts
T
DESKTOP-I1T6TF3\Q 04f22249c9 fix(tts): make stepfun preset pick case-stable and per-character
- Hash the lowercased description (matching the case-insensitive scoring)
  so the same archetype text picks the same preset regardless of case.
- Thread the character name through provisionVoice -> stepfunProvision as
  the hash salt, so two characters that share archetype keywords spread
  across the top-N candidate presets instead of collapsing on one voice.

Xiaomi path is unaffected (voicedesign mints a unique clip per call).
2026-06-09 09:14:44 +08:00

44 lines
1.8 KiB
TypeScript

import type { CharacterVoice, TtsConfig } from "@infiplot/types";
import { stepfunProvision, stepfunSynthesize } from "./stepfun";
import { xiaomiProvision, xiaomiSynthesize } from "./xiaomi";
// Provider auto-detection by base URL — mirrors the image client convention
// of inferring Runware from *.runware.ai and falling back otherwise. Keeps
// the BYO client flow unchanged: TTS_PROVIDER env var stays unused, and
// browser-side keys (Xiaomi only today) keep working through the xiaomi path.
function isStepfun(cfg: TtsConfig): boolean {
return /(^|[./])stepfun\.com\b/i.test(cfg.baseUrl);
}
export async function provisionVoice(
cfg: TtsConfig,
description: string,
// Optional per-character salt (typically the character name). Only
// StepFun's preset-picker uses it — Xiaomi voicedesign mints a unique
// clip per call regardless. Threading it through keeps the API uniform
// and prevents archetype collisions on the StepFun path.
salt?: string,
): Promise<CharacterVoice> {
return isStepfun(cfg)
? stepfunProvision(cfg, description, salt)
: xiaomiProvision(cfg, description);
}
// Dispatch by the voice's own provider tag, not by the current config. A
// session can outlive a provider switch (e.g. .env.local flip mid-game), and
// each voice must be synthesized via the protocol that minted it. The cfg
// still needs to point at the matching provider's endpoint; mismatch surfaces
// as a transparent network error, which `synthesizeBeat` already swallows.
export async function synthesize(
cfg: TtsConfig,
voice: CharacterVoice,
text: string,
delivery?: string,
signal?: AbortSignal,
): Promise<{ audioBase64: string; mimeType: string }> {
if (voice.provider === "stepfun") {
return stepfunSynthesize(cfg, voice, text, delivery, signal);
}
return xiaomiSynthesize(cfg, voice, text, delivery, signal);
}