feat(tts): Xiaomi MiMo per-beat voice + MOCK_IMAGE testing aid (#3)
Adds optional Xiaomi MiMo TTS layer on top of the scene/beat engine and a MOCK_IMAGE flag for cheap local TTS iteration. - Per-character voice provisioning via MiMo voice design → clone, reference audio persisted in session - Per-line free-form delivery direction (Director writes "鼓起勇气又害羞,声音发颤" style instructions; sent to MiMo's director channel, never read aloud) - Per-beat audio served with the scene response; frontend plays via hidden <audio> with typewriter synced to audio duration; mute toggle persisted via localStorage lazy initializer - Graceful degradation: any TTS step failing → silent beat, game continues - MOCK_IMAGE=true returns a sharp-generated placeholder PNG so local TTS iteration doesn't burn image tokens - Recommended config in .env.example: MiMo Token Plan covers TEXT/VISION/TTS with one key (mimo-v2.5-pro for text, mimo-v2.5 omni for vision, mimo-v2.5-tts for TTS) Squashed from #3: - feat(tts): 小米 MiMo 逐 beat 配音 + 按 session 角色音色 + 自由文本配音指导 - feat(engine): MOCK_IMAGE 占位图便于本地测试 - fix(tts): address Copilot review on PR #3 - fix(tts): Copilot round-2 review feedback Known limitation: Session.characters carries the full WAV reference audio (~200-300KB/character base64) and round-trips through every /api/scene, /api/vision, /api/insert-beat request. This is intrinsic to MiMo's design→clone model (voice identity IS the audio, no server-side voiceId). Fixing requires server-side storage which is out of scope; documented for future hardening. 🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
+33
-12
@@ -1,24 +1,45 @@
|
||||
# =============================================================
|
||||
# 云梦 — AI 视觉小说
|
||||
# Three independently configurable AI providers
|
||||
# Any OpenAI-compatible endpoint works (OpenRouter, OpenAI,
|
||||
# Anthropic via OpenAI-compat proxy, Gemini, DeepSeek, Ollama).
|
||||
# Recommended setup: Xiaomi MiMo Token Plan for TEXT / VISION / TTS
|
||||
# (one API key covers all three) + any image provider for IMAGE.
|
||||
#
|
||||
# Any OpenAI-compatible endpoint works for any slot — OpenRouter,
|
||||
# OpenAI, Anthropic via OpenAI-compat proxy, Gemini, DeepSeek, etc.
|
||||
# Image generation uses the chat-completions + modalities API
|
||||
# (OpenRouter-style), NOT the legacy /images/generations endpoint.
|
||||
# =============================================================
|
||||
|
||||
# ---- 1. Text LLM (story director) -----------------------------
|
||||
TEXT_BASE_URL=https://openrouter.ai/api/v1
|
||||
TEXT_API_KEY=sk-or-v1-xxx
|
||||
TEXT_MODEL=~anthropic/claude-sonnet-latest
|
||||
# ---- 1. Text LLM · scene director ----------------------------------
|
||||
# Recommended: MiMo V2.5 Pro (1M context, native JSON-mode, strong CN)
|
||||
# Token Plan host: https://token-plan-sgp.xiaomimimo.com/v1
|
||||
# Pay-as-you-go host: https://api.xiaomimimo.com/v1 (sk- keys)
|
||||
TEXT_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
TEXT_API_KEY=tp-xxx
|
||||
TEXT_MODEL=mimo-v2.5-pro
|
||||
|
||||
# ---- 2. Image generator (renders the whole UI screen) ---------
|
||||
# ---- 2. Image generator (renders the scene background) -------------
|
||||
# Any provider supporting chat-completions + modalities image output.
|
||||
IMAGE_BASE_URL=https://openrouter.ai/api/v1
|
||||
IMAGE_API_KEY=sk-or-v1-xxx
|
||||
IMAGE_MODEL=openai/gpt-5.4-image-2
|
||||
|
||||
# ---- 3. Vision model (interprets where the user clicked) ------
|
||||
VISION_BASE_URL=https://openrouter.ai/api/v1
|
||||
VISION_API_KEY=sk-or-v1-xxx
|
||||
VISION_MODEL=~google/gemini-flash-latest
|
||||
# ---- 3. Vision model · multimodal click interpretation -------------
|
||||
# Recommended: MiMo V2.5 omni — multimodal.
|
||||
# ⚠️ DO NOT use mimo-v2.5-pro for this slot — Pro is text-only and
|
||||
# rejects image_url content parts.
|
||||
VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
VISION_API_KEY=tp-xxx
|
||||
VISION_MODEL=mimo-v2.5
|
||||
|
||||
# ---- 4. TTS · Xiaomi MiMo (optional — leave blank to disable) ------
|
||||
# Per-character voice design → clone, with per-line delivery direction.
|
||||
# Voice identity = the reference audio kept in the session (no server expiry).
|
||||
# The adapter appends -voicedesign / -voiceclone to TTS_SPEECH_MODEL.
|
||||
TTS_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
|
||||
TTS_API_KEY=tp-xxx
|
||||
TTS_SPEECH_MODEL=mimo-v2.5-tts
|
||||
|
||||
# ---- 5. MOCK_IMAGE — skip image generation (cheap TTS testing) -----
|
||||
# true → return a placeholder image instead of calling the image model.
|
||||
# Text/story/voice still run normally. Great for iterating on TTS.
|
||||
MOCK_IMAGE=false
|
||||
|
||||
@@ -14,6 +14,7 @@ import { PlayCanvas, type Phase } from "@/components/PlayCanvas";
|
||||
import { PRESETS } from "@/lib/presets";
|
||||
import type {
|
||||
Beat,
|
||||
BeatAudio,
|
||||
BeatChoice,
|
||||
InsertBeatResponse,
|
||||
Scene,
|
||||
@@ -24,6 +25,8 @@ import type {
|
||||
VisionResponse,
|
||||
} from "@yume/types";
|
||||
|
||||
const MUTED_STORAGE_KEY = "yume:muted";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Prefetch pool — speculative SceneResponses keyed by choice path.
|
||||
//
|
||||
@@ -133,7 +136,16 @@ function prefetchScenePath(
|
||||
nextSceneSeed: sole.effect.nextSceneSeed,
|
||||
},
|
||||
};
|
||||
prefetchScenePath(pool, baseSession, [...steps, nextStep], depth + 1);
|
||||
// Carry forward the registry that the parent prefetch result already
|
||||
// settled (it may include characters introduced by the intermediate
|
||||
// scene). Without this, the L2+ prefetch starts from the original
|
||||
// base.characters and a later transition through this survivor would
|
||||
// silently drop voices the player has already heard.
|
||||
const carriedBase: Session = {
|
||||
...baseSession,
|
||||
characters: data.characters,
|
||||
};
|
||||
prefetchScenePath(pool, carriedBase, [...steps, nextStep], depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,6 +193,18 @@ function PlayInner() {
|
||||
const [currentScene, setCurrentScene] = useState<Scene | null>(null);
|
||||
const [currentBeatId, setCurrentBeatId] = useState<string | null>(null);
|
||||
const [imageBase64, setImageBase64] = useState<string | null>(null);
|
||||
const [beatAudioMap, setBeatAudioMap] = useState<Record<string, BeatAudio>>({});
|
||||
// Lazy-initialize from localStorage so PlayCanvas never mounts with the
|
||||
// wrong muted value (an effect-based read would briefly let audio play
|
||||
// before the preference settled in a scenario where audio arrives early).
|
||||
const [muted, setMuted] = useState<boolean>(() => {
|
||||
if (typeof window === "undefined") return false;
|
||||
try {
|
||||
return window.localStorage.getItem(MUTED_STORAGE_KEY) === "1";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
const [pendingClick, setPendingClick] = useState<{
|
||||
x: number;
|
||||
y: number;
|
||||
@@ -203,6 +227,10 @@ function PlayInner() {
|
||||
return currentScene.beats.find((b) => b.id === currentBeatId) ?? null;
|
||||
}, [currentScene, currentBeatId]);
|
||||
|
||||
const currentBeatAudio = currentBeat ? beatAudioMap[currentBeat.id] : undefined;
|
||||
const audioBase64 = currentBeatAudio?.base64 ?? null;
|
||||
const audioMime = currentBeatAudio?.mime ?? null;
|
||||
|
||||
useEffect(() => {
|
||||
sessionRef.current = session;
|
||||
}, [session]);
|
||||
@@ -231,6 +259,19 @@ function PlayInner() {
|
||||
});
|
||||
}, [currentBeatId]);
|
||||
|
||||
// ── Mute persistence (read is via the useState lazy initializer above) ─
|
||||
const toggleMuted = useCallback(() => {
|
||||
setMuted((prev) => {
|
||||
const next = !prev;
|
||||
try {
|
||||
window.localStorage.setItem(MUTED_STORAGE_KEY, next ? "1" : "0");
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
return next;
|
||||
});
|
||||
}, []);
|
||||
|
||||
// ── Presentation mode toggle ─────────────────────────────────────────
|
||||
const togglePresentation = useCallback(async () => {
|
||||
const entering = !presentation;
|
||||
@@ -327,12 +368,14 @@ function PlayInner() {
|
||||
visitedBeatIds: [data.scene.entryBeatId],
|
||||
},
|
||||
],
|
||||
characters: data.characters,
|
||||
};
|
||||
visitedBeatsRef.current = [data.scene.entryBeatId];
|
||||
setSession(initial);
|
||||
setCurrentScene(data.scene);
|
||||
setCurrentBeatId(data.scene.entryBeatId);
|
||||
setImageBase64(data.imageBase64);
|
||||
setBeatAudioMap(data.beatAudio ?? {});
|
||||
setPhase("ready");
|
||||
})
|
||||
.catch((e) => setError(String(e)));
|
||||
@@ -409,12 +452,14 @@ function PlayInner() {
|
||||
visitedBeatIds: [result.scene.entryBeatId],
|
||||
},
|
||||
],
|
||||
characters: result.characters,
|
||||
};
|
||||
visitedBeatsRef.current = [result.scene.entryBeatId];
|
||||
setSession(newSession);
|
||||
setCurrentScene(result.scene);
|
||||
setCurrentBeatId(result.scene.entryBeatId);
|
||||
setImageBase64(result.imageBase64);
|
||||
setBeatAudioMap(result.beatAudio ?? {});
|
||||
setLastExitLabel(exitLabel);
|
||||
setPhase("ready");
|
||||
} catch (e) {
|
||||
@@ -514,7 +559,8 @@ function PlayInner() {
|
||||
};
|
||||
throw new Error(j.error ?? insertRes.statusText);
|
||||
}
|
||||
const { partial } = (await insertRes.json()) as InsertBeatResponse;
|
||||
const { partial, characters: insertChars, audio } =
|
||||
(await insertRes.json()) as InsertBeatResponse;
|
||||
|
||||
const fromBeatId =
|
||||
currentBeatRef.current?.id ?? currentScene.entryBeatId;
|
||||
@@ -526,6 +572,7 @@ function PlayInner() {
|
||||
narration: partial.narration,
|
||||
speaker: partial.speaker,
|
||||
line: partial.line,
|
||||
lineDelivery: partial.lineDelivery,
|
||||
next: { type: "continue", nextBeatId: fromBeatId },
|
||||
};
|
||||
|
||||
@@ -541,11 +588,15 @@ function PlayInner() {
|
||||
history: s.history.map((h, i, arr) =>
|
||||
i === arr.length - 1 ? { ...h, scene: patched } : h,
|
||||
),
|
||||
characters: insertChars,
|
||||
}
|
||||
: s,
|
||||
);
|
||||
setCurrentScene(patched);
|
||||
setCurrentBeatId(newBeatId);
|
||||
if (audio) {
|
||||
setBeatAudioMap((m) => ({ ...m, [newBeatId]: audio }));
|
||||
}
|
||||
setLastExitLabel(decision.intent.freeformAction);
|
||||
setPhase("ready");
|
||||
setPendingClick(null);
|
||||
@@ -627,6 +678,9 @@ function PlayInner() {
|
||||
<div className="fixed inset-0 bg-black flex items-center justify-center z-50">
|
||||
<PlayCanvas
|
||||
imageBase64={imageBase64}
|
||||
audioBase64={audioBase64}
|
||||
audioMime={audioMime}
|
||||
muted={muted}
|
||||
phase={phase}
|
||||
beat={currentBeat}
|
||||
pendingClick={pendingClick}
|
||||
@@ -666,6 +720,9 @@ function PlayInner() {
|
||||
<main className="flex-1 flex flex-col items-center justify-center px-4 md:px-8 py-6 md:py-10">
|
||||
<PlayCanvas
|
||||
imageBase64={imageBase64}
|
||||
audioBase64={audioBase64}
|
||||
audioMime={audioMime}
|
||||
muted={muted}
|
||||
phase={phase}
|
||||
beat={currentBeat}
|
||||
pendingClick={pendingClick}
|
||||
@@ -700,7 +757,17 @@ function PlayInner() {
|
||||
F · 演 · 示
|
||||
</button>
|
||||
<div className="text-[9px] smallcaps text-clay-400 num">Ⅰ · Ⅰ</div>
|
||||
<span className="text-[9px] w-[60px]" aria-hidden />
|
||||
<button
|
||||
type="button"
|
||||
onClick={toggleMuted}
|
||||
className="text-[9px] smallcaps text-clay-400 hover:text-clay-700 transition-colors flex items-center gap-2 w-[80px] justify-end"
|
||||
aria-label={muted ? "取消静音" : "静音"}
|
||||
>
|
||||
<i
|
||||
className={`fa-solid ${muted ? "fa-volume-xmark" : "fa-volume-high"} text-[10px]`}
|
||||
/>
|
||||
{muted ? "静 · 音" : "有 · 声"}
|
||||
</button>
|
||||
</footer>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -13,30 +13,66 @@ export type Phase =
|
||||
const SHADOW =
|
||||
"0 1px 0 rgba(45,24,16,0.05), 0 36px 64px -28px rgba(45,24,16,0.25), 0 8px 18px -6px rgba(45,24,16,0.10)";
|
||||
|
||||
const DEFAULT_CHAR_MS = 28;
|
||||
const MIN_CHAR_MS = 30;
|
||||
// Voice playback speed multiplier. >1 speeds up the (somewhat slow) MiMo voice
|
||||
// while preserving pitch. Typewriter pacing is divided by the same factor.
|
||||
const SPEECH_RATE = 1.2;
|
||||
// If audio metadata never arrives within this window, give up waiting and
|
||||
// let the typewriter run at default speed.
|
||||
const AUDIO_WAIT_TIMEOUT_MS = 2500;
|
||||
|
||||
// ── Typewriter hook ────────────────────────────────────────────────────
|
||||
// Returns the progressively-revealed text, a `done` flag, and a `skip()` that
|
||||
// instantly completes the current text. Reset is keyed by `resetKey` (the beat
|
||||
// id) rather than the text, so a new beat whose line happens to match the
|
||||
// previous one still replays from scratch. `done` is derived synchronously
|
||||
// (not from a post-paint effect) so a stale "done" frame never paints.
|
||||
// previous one still replays from scratch.
|
||||
//
|
||||
// When `targetDurationMs` is provided we space characters to span that audio
|
||||
// duration, keeping text and voice in lockstep. While `waitForAudio` is true
|
||||
// and we don't yet know a duration, the typewriter holds (so text doesn't
|
||||
// race ahead of an audio that's still loading).
|
||||
function useTypewriter(
|
||||
text: string,
|
||||
resetKey: string,
|
||||
speed = 28,
|
||||
opts: { targetDurationMs?: number; waitForAudio: boolean } = {
|
||||
waitForAudio: false,
|
||||
},
|
||||
): { shown: string; done: boolean; skip: () => void } {
|
||||
const { targetDurationMs, waitForAudio } = opts;
|
||||
const [displayed, setDisplayed] = useState("");
|
||||
const [prevKey, setPrevKey] = useState(resetKey);
|
||||
const timer = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
// Sticky once the player has skipped this beat: prevents a late-arriving
|
||||
// audio metadata event from re-triggering the effect and replaying the text.
|
||||
const skippedRef = useRef(false);
|
||||
|
||||
// Render-phase reset (React "adjust state on prop change" pattern): when the
|
||||
// beat changes, drop the old progress before this render commits.
|
||||
if (resetKey !== prevKey) {
|
||||
setPrevKey(resetKey);
|
||||
setDisplayed("");
|
||||
skippedRef.current = false;
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
if (!text) return;
|
||||
// `=== undefined` (not `!targetDurationMs`): 0 means "audio failed or
|
||||
// timed out — run at default speed". The original truthy check stalled
|
||||
// the typewriter forever on those fallback paths.
|
||||
if (waitForAudio && targetDurationMs === undefined) return;
|
||||
// If the player skipped, settle on the full text and don't restart even
|
||||
// when audio metadata arrives late and re-triggers this effect.
|
||||
if (skippedRef.current) {
|
||||
setDisplayed(text);
|
||||
return;
|
||||
}
|
||||
|
||||
const speed =
|
||||
targetDurationMs && text.length > 0
|
||||
? Math.max(MIN_CHAR_MS, targetDurationMs / text.length)
|
||||
: DEFAULT_CHAR_MS;
|
||||
|
||||
let i = 0;
|
||||
timer.current = setInterval(() => {
|
||||
i += 1;
|
||||
@@ -50,13 +86,14 @@ function useTypewriter(
|
||||
if (timer.current) clearInterval(timer.current);
|
||||
timer.current = null;
|
||||
};
|
||||
}, [resetKey, text, speed]);
|
||||
}, [resetKey, text, targetDurationMs, waitForAudio]);
|
||||
|
||||
const skip = useCallback(() => {
|
||||
if (timer.current) {
|
||||
clearInterval(timer.current);
|
||||
timer.current = null;
|
||||
}
|
||||
skippedRef.current = true;
|
||||
setDisplayed(text);
|
||||
}, [text]);
|
||||
|
||||
@@ -123,6 +160,9 @@ function ChoiceButton({
|
||||
// ── Main component ─────────────────────────────────────────────────────
|
||||
export function PlayCanvas({
|
||||
imageBase64,
|
||||
audioBase64,
|
||||
audioMime,
|
||||
muted,
|
||||
phase,
|
||||
beat,
|
||||
pendingClick,
|
||||
@@ -132,6 +172,9 @@ export function PlayCanvas({
|
||||
fullViewport = false,
|
||||
}: {
|
||||
imageBase64: string | null;
|
||||
audioBase64: string | null;
|
||||
audioMime: string | null;
|
||||
muted: boolean;
|
||||
phase: Phase;
|
||||
beat: Beat | null;
|
||||
pendingClick: { x: number; y: number } | null;
|
||||
@@ -141,7 +184,11 @@ export function PlayCanvas({
|
||||
fullViewport?: boolean;
|
||||
}) {
|
||||
const imgRef = useRef<HTMLImageElement>(null);
|
||||
const audioRef = useRef<HTMLAudioElement>(null);
|
||||
const [dims, setDims] = useState<{ w: number; h: number } | null>(null);
|
||||
const [audioDurationMs, setAudioDurationMs] = useState<number | undefined>(
|
||||
undefined,
|
||||
);
|
||||
|
||||
const isChoiceBeat = beat?.next.type === "choice";
|
||||
const choices: BeatChoice[] = isChoiceBeat
|
||||
@@ -150,7 +197,56 @@ export function PlayCanvas({
|
||||
|
||||
const displayBody = beat?.speaker ? beat.line ?? "" : beat?.narration ?? "";
|
||||
const { shown: typedBody, done: typingDone, skip: skipTypewriter } =
|
||||
useTypewriter(displayBody, beat?.id ?? "", 30);
|
||||
useTypewriter(displayBody, beat?.id ?? "", {
|
||||
targetDurationMs: audioDurationMs,
|
||||
waitForAudio: Boolean(audioBase64),
|
||||
});
|
||||
|
||||
// ── Audio source change ──────────────────────────────────────────────
|
||||
// Reset duration when audio source changes; if loading takes too long,
|
||||
// unblock the typewriter via timeout so text doesn't stall.
|
||||
useEffect(() => {
|
||||
setAudioDurationMs(undefined);
|
||||
if (!audioBase64) return;
|
||||
const timer = setTimeout(() => {
|
||||
setAudioDurationMs((prev) => prev ?? 0);
|
||||
}, AUDIO_WAIT_TIMEOUT_MS);
|
||||
return () => clearTimeout(timer);
|
||||
}, [audioBase64]);
|
||||
|
||||
// ── Mute toggle ───────────────────────────────────────────────────────
|
||||
useEffect(() => {
|
||||
const el = audioRef.current;
|
||||
if (!el) return;
|
||||
el.muted = muted;
|
||||
el.playbackRate = SPEECH_RATE;
|
||||
if (!muted && audioBase64 && el.paused) {
|
||||
el.play().catch(() => {
|
||||
// autoplay blocked — silent until next interaction
|
||||
});
|
||||
}
|
||||
}, [muted, audioBase64]);
|
||||
|
||||
function handleAudioMetadata() {
|
||||
const el = audioRef.current;
|
||||
if (!el) return;
|
||||
el.playbackRate = SPEECH_RATE;
|
||||
// Effective playback time is shorter once sped up — keep the typewriter in sync.
|
||||
const ms = Number.isFinite(el.duration)
|
||||
? (el.duration * 1000) / SPEECH_RATE
|
||||
: 0;
|
||||
setAudioDurationMs(ms > 0 ? ms : 0);
|
||||
if (!muted) {
|
||||
el.play().catch(() => {
|
||||
// autoplay blocked
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function handleAudioError() {
|
||||
// Treat as zero duration so the typewriter runs at default speed.
|
||||
setAudioDurationMs(0);
|
||||
}
|
||||
|
||||
function handleImageClick(e: React.MouseEvent<HTMLImageElement>) {
|
||||
if (phase !== "ready" || !imgRef.current || !beat) return;
|
||||
@@ -197,6 +293,19 @@ export function PlayCanvas({
|
||||
<div
|
||||
className={`flex flex-col items-center ${fullViewport ? "w-full h-full justify-center" : "w-full"}`}
|
||||
>
|
||||
{/* Hidden audio element — voice playback for the current beat */}
|
||||
{audioBase64 && (
|
||||
<audio
|
||||
key={audioBase64.slice(-48)}
|
||||
ref={audioRef}
|
||||
src={`data:${audioMime ?? "audio/wav"};base64,${audioBase64}`}
|
||||
preload="auto"
|
||||
onLoadedMetadata={handleAudioMetadata}
|
||||
onError={handleAudioError}
|
||||
className="hidden"
|
||||
/>
|
||||
)}
|
||||
|
||||
{imageBase64 ? (
|
||||
<div
|
||||
className="relative inline-block"
|
||||
|
||||
+19
-1
@@ -1,4 +1,4 @@
|
||||
import type { EngineConfig } from "@yume/types";
|
||||
import type { EngineConfig, TtsConfig } from "@yume/types";
|
||||
|
||||
function readVar(name: string): string {
|
||||
const v = process.env[name];
|
||||
@@ -6,6 +6,22 @@ function readVar(name: string): string {
|
||||
return v;
|
||||
}
|
||||
|
||||
function readOptionalVar(name: string): string | undefined {
|
||||
const v = process.env[name];
|
||||
return v && v.length > 0 ? v : undefined;
|
||||
}
|
||||
|
||||
function loadTtsConfig(): TtsConfig | undefined {
|
||||
const baseUrl = readOptionalVar("TTS_BASE_URL");
|
||||
const apiKey = readOptionalVar("TTS_API_KEY");
|
||||
const speechModel = readOptionalVar("TTS_SPEECH_MODEL");
|
||||
|
||||
// Missing any → TTS disabled (game runs silently).
|
||||
if (!baseUrl || !apiKey || !speechModel) return undefined;
|
||||
|
||||
return { baseUrl, apiKey, speechModel };
|
||||
}
|
||||
|
||||
export function loadEngineConfig(): EngineConfig {
|
||||
return {
|
||||
text: {
|
||||
@@ -23,5 +39,7 @@ export function loadEngineConfig(): EngineConfig {
|
||||
apiKey: readVar("VISION_API_KEY"),
|
||||
model: readVar("VISION_MODEL"),
|
||||
},
|
||||
tts: loadTtsConfig(),
|
||||
mockImage: readOptionalVar("MOCK_IMAGE") === "true",
|
||||
};
|
||||
}
|
||||
|
||||
@@ -4,7 +4,12 @@ import type { NextConfig } from "next";
|
||||
const config: NextConfig = {
|
||||
reactStrictMode: true,
|
||||
typedRoutes: false,
|
||||
transpilePackages: ["@yume/engine", "@yume/ai-client", "@yume/types"],
|
||||
transpilePackages: [
|
||||
"@yume/engine",
|
||||
"@yume/ai-client",
|
||||
"@yume/types",
|
||||
"@yume/tts-client",
|
||||
],
|
||||
serverExternalPackages: ["sharp"],
|
||||
turbopack: {
|
||||
root: path.join(__dirname, "..", ".."),
|
||||
|
||||
Reference in New Issue
Block a user