Merge pull request #45 from zonghaoyuan/feat/player-name-and-freeform

feat(web): player name, freeform input & unified settings
This commit is contained in:
Zonghao Yuan
2026-06-07 12:38:15 +08:00
committed by GitHub
11 changed files with 897 additions and 77 deletions
+31
View File
@@ -0,0 +1,31 @@
import { classifyFreeform } from "@infiplot/engine";
import type { FreeformClassifyRequest } from "@infiplot/types";
import { NextResponse } from "next/server";
import { loadEngineConfig } from "@/lib/config";
export const runtime = "nodejs";
export async function POST(req: Request) {
let body: FreeformClassifyRequest;
try {
body = (await req.json()) as FreeformClassifyRequest;
} catch {
return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
}
if (!body.session || !body.freeformText?.trim()) {
return NextResponse.json(
{ error: "session and freeformText are required" },
{ status: 400 },
);
}
try {
const config = loadEngineConfig();
const result = await classifyFreeform(config, body);
return NextResponse.json(result);
} catch (err) {
const message = err instanceof Error ? err.message : "Unknown error";
return NextResponse.json({ error: message }, { status: 500 });
}
}
+38 -51
View File
@@ -11,7 +11,7 @@ import {
type Gender,
} from "@/lib/options";
import { readStoredTtsConfig } from "@/lib/clientTtsConfig";
import { TtsKeyModal } from "@/components/TtsKeyModal";
import { SettingsModal, readStoredPlayerName } from "@/components/SettingsModal";
/* ============================================================================
InfiPlot · 首页(编辑式视觉风格 · 居中构图,呼应低保真原型)
@@ -47,7 +47,6 @@ const OPTS: Opt[] = [
{ label: "性向", items: [...GENDERS] },
{ label: "绘画风格", modal: true, items: [...ART_STYLES] },
{ label: "剧情风格", items: [...PLOT_STYLES], defaultIndex: 1 },
{ label: "语音配音", items: ["关闭", "开启"], defaultIndex: 1 },
{ label: "内容节奏", items: [...PACINGS], defaultIndex: 1 },
];
@@ -1239,12 +1238,13 @@ export default function HomePage() {
// 顶部使用提示:默认展示,用户可点 × 永久关闭(localStorage:infiplot:hintClosed)。
const [hintClosed, setHintClosed] = useState(false);
// 自带 TTS Key 弹窗:可选增强,Key 只存浏览器、绝不经过服务器。
const [ttsOpen, setTtsOpen] = useState(false);
// 统一设置弹窗(名字 + 配音 + TTS Key:可选增强,数据只存浏览器。
const [settingsOpen, setSettingsOpen] = useState(false);
const [ttsConfigured, setTtsConfigured] = useState(false);
const [playerName, setPlayerName] = useState("");
const [audioEnabled, setAudioEnabled] = useState(true);
const styleRow = OPTS.findIndex((o) => o.modal);
const voiceRow = OPTS.findIndex((o) => o.label === "语音配音");
const genderIndex = sel[0] ?? 0;
const gender = (OPTS[0]!.items[genderIndex] as Gender) ?? "男性向";
const phrases = EXAMPLE_PHRASES[gender];
@@ -1286,9 +1286,14 @@ export default function HomePage() {
}
}, []);
// 启动时回填「已启用」徽标——读 localStorage 判断用户是否已存过 Key。
// 启动时回填配置状态——读 localStorage 判断用户是否已存过 Key / 名字 / 配音偏好
useEffect(() => {
setTtsConfigured(readStoredTtsConfig() != null);
setPlayerName(readStoredPlayerName());
try {
const stored = localStorage.getItem("infiplot:muted");
if (stored === "1") setAudioEnabled(false);
} catch { /* ignore */ }
}, []);
// 输入框随内容自动增高:长文本整段可见(打字与点卡片填入都覆盖)。
@@ -1315,8 +1320,7 @@ export default function HomePage() {
prompt.trim() || (phrases[phraseIdx] ?? "").trim();
const artStyle = ART_STYLES[sel[1] ?? 0] ?? "自动";
const plotStyle = PLOT_STYLES[sel[2] ?? 1] ?? "多线转折";
const voice = OPTS[3]!.items[sel[3] ?? 1]!;
const pace = PACINGS[sel[4] ?? 1] ?? "紧凑爽快";
const pace = PACINGS[sel[3] ?? 1] ?? "紧凑爽快";
// worldSetting 顺序很重要:玩家输入若存在,必须放在最前面、单独成段、
// 用强指令包住,否则模型会把它当成夹在风格说明里的背景参考、扩写出
@@ -1352,8 +1356,6 @@ export default function HomePage() {
artStyle === "自定义风格" ? DEFAULT_STYLE : artStyle;
styleGuide = STYLE_MAP[effectiveStyle] ?? STYLE_MAP[DEFAULT_STYLE]!;
}
const audioEnabled = voice === "开启";
// 只有「自定义」风格选中、且确实上传了参考图时才透传——其他预设没必要
// 占用 reference slot(也避免 styleGuide 已经是文本预设、画师收到不相关
// 参考图反而产生干扰)。
@@ -1373,7 +1375,7 @@ export default function HomePage() {
sessionStorage.setItem(
"infiplot:custom",
JSON.stringify({ worldSetting, styleGuide, audioEnabled, styleReferenceImage }),
JSON.stringify({ worldSetting, styleGuide, audioEnabled, styleReferenceImage, playerName: playerName || undefined }),
);
router.push("/play?custom=1");
};
@@ -1391,11 +1393,9 @@ export default function HomePage() {
// 其余选项(剧情风格 / 内容节奏)在预烘焙时已锁成「多线转折 / 紧凑爽快」
// 的红果默认基调,对精选卡不再生效。
const onCardClick = (idx: number, _card: StoryContent) => {
const voice = OPTS[3]!.items[sel[3] ?? 1]!;
const audioEnabled = voice === "开启";
sessionStorage.setItem(
"infiplot:custom",
JSON.stringify({ worldSetting: "", styleGuide: "", audioEnabled }),
JSON.stringify({ worldSetting: "", styleGuide: "", audioEnabled, playerName }),
);
track("game_start", {
source: "curated",
@@ -1456,11 +1456,7 @@ export default function HomePage() {
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
onKeyDown={(e) => {
if (
e.key === "Enter" &&
!e.shiftKey &&
!e.nativeEvent.isComposing
) {
if (e.key === "Enter" && !e.shiftKey && !e.nativeEvent.isComposing) {
e.preventDefault();
start();
}
@@ -1518,31 +1514,24 @@ export default function HomePage() {
/>
</div>
))}
</div>
{/* 自带 TTS Key 入口:公共语音模型有 RPM/TPM 限额,高并发易静音;
填自己的小米 MiMo Key(免费)→ 稳定配音、延迟更低,且 Key 只存本地。 */}
<div className="mt-5 flex justify-center">
{/* 设置入口:与 CategorySelect 视觉一致,点击打开 modal */}
<div className="text-left">
<button
type="button"
onClick={() => setTtsOpen(true)}
className={
"inline-flex items-center gap-2 rounded-full border px-4 py-1.5 font-sans text-xs md:text-[13px] transition-colors " +
(ttsConfigured
? "border-ember-500/40 bg-ember-500/5 text-ember-500 hover:bg-ember-500/10"
: "border-clay-900/15 text-clay-500 hover:border-clay-900/30 hover:text-clay-700")
}
onClick={() => setSettingsOpen(true)}
className="group flex items-center gap-2.5 pb-1.5 border-b border-clay-900/20 hover:border-clay-900/45 transition-colors"
>
<i
className={
ttsConfigured
? "fa-solid fa-circle-check text-[11px]"
: "fa-solid fa-microphone-lines text-[11px]"
}
/>
{ttsConfigured ? "自带配音 Key · 已启用" : "经常没声音?自带配音 Key(可选)"}
<span className="text-[10px] smallcaps text-clay-500"></span>
<span className={
"font-serif text-base md:text-lg " +
(ttsConfigured || playerName ? "text-ember-500" : "text-clay-900")
}>
{playerName || (ttsConfigured ? "已配置" : "未配置")}
</span>
<i className="fa-solid fa-gear text-[9px] text-clay-400" />
</button>
</div>
</div>
{/* 使用提示:可被用户永久关闭(localStorage:infiplot:hintClosed */}
{!hintClosed && (
@@ -1550,6 +1539,8 @@ export default function HomePage() {
<p className="font-serif text-[13px] md:text-sm leading-relaxed text-clay-500">
{" "}
<em className="not-italic text-ember-500">InfiPlot</em>
<span className="text-ember-500"></span>
API Key
</p>
<button
type="button"
@@ -1707,18 +1698,14 @@ export default function HomePage() {
setCustomStyleRefImage={setCustomStyleRefImage}
/>
)}
{ttsOpen && (
<TtsKeyModal
onClose={() => setTtsOpen(false)}
onSaved={(configured) => {
setTtsConfigured(configured);
// 启用自带 Key 时顺手把「语音配音」拨到「开启」——否则用户配了 Key
// 却还是静音,体验自相矛盾。停用时不动其选择,尊重用户原本的偏好。
if (configured && voiceRow >= 0) {
const onIdx = OPTS[voiceRow]!.items.indexOf("开启");
if (onIdx >= 0)
setSel((s) => s.map((v, j) => (j === voiceRow ? onIdx : v)));
}
{settingsOpen && (
<SettingsModal
initialAudioEnabled={audioEnabled}
onClose={() => setSettingsOpen(false)}
onSaved={(settings) => {
setTtsConfigured(settings.ttsConfigured);
setPlayerName(settings.playerName);
setAudioEnabled(settings.audioEnabled);
}}
/>
)}
+140 -1
View File
@@ -18,6 +18,7 @@ import {
import type { DialogueHistoryItem } from "@/components/DialogueHistoryModal";
import type { GalleryDoc, GalleryScene } from "@/app/gallery/page";
import { TtsKeyModal } from "@/components/TtsKeyModal";
import { readStoredPlayerName } from "@/components/SettingsModal";
import { annotateClick } from "@/lib/annotateClient";
import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
import { PRESETS } from "@/lib/presets";
@@ -27,6 +28,7 @@ import type {
BeatChoice,
Character,
CharacterVoice,
FreeformClassifyResponse,
InsertBeatResponse,
Orientation,
Scene,
@@ -1107,11 +1109,12 @@ function PlayInner() {
styleGuide: string;
styleReferenceImage?: string;
orientation?: Orientation;
playerName?: string;
} | null = null;
if (!cardName) {
if (presetId) {
const p = PRESETS.find((x) => x.id === presetId);
if (p) livePayload = { worldSetting: p.worldSetting, styleGuide: p.styleGuide };
if (p) livePayload = { worldSetting: p.worldSetting, styleGuide: p.styleGuide, playerName: readStoredPlayerName() || undefined };
} else if (isCustom) {
const stored = sessionStorage.getItem("infiplot:custom");
if (stored) {
@@ -1121,11 +1124,13 @@ function PlayInner() {
styleGuide: string;
audioEnabled?: boolean;
styleReferenceImage?: string;
playerName?: string;
};
livePayload = {
worldSetting: parsed.worldSetting,
styleGuide: parsed.styleGuide,
styleReferenceImage: parsed.styleReferenceImage || undefined,
playerName: parsed.playerName || undefined,
};
// audioEnabled 已在 useState 初始化时反向投射到 muted;这里无需再额外存。
} catch {
@@ -1224,6 +1229,7 @@ function PlayInner() {
storyState: data.storyState,
styleReferenceImage: data.styleReferenceImage,
orientation: data.scene.orientation ?? sessionOrientation,
playerName: livePayload?.playerName || readStoredPlayerName() || undefined,
};
visitedBeatsRef.current = [data.scene.entryBeatId];
setSession(initial);
@@ -1436,6 +1442,135 @@ function PlayInner() {
void performSceneTransition(promise, exit, visited, choice.label);
}
async function onFreeformInput(text: string) {
if (phase !== "ready" || !session || !currentScene) return;
track("freeform_input", {
scene_index: session.history.length,
text_length: text.length,
});
setPhase("vision-thinking");
try {
const classifyRes = await fetch("/api/classify-freeform", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
session: stripVoicesForTransport(session),
freeformText: text,
}),
});
if (!classifyRes.ok) {
const j = (await classifyRes.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? classifyRes.statusText);
}
const decision = (await classifyRes.json()) as FreeformClassifyResponse;
if (decision.classify === "insert-beat") {
// Interactive beat: NPC responds to the player's action, scene stays
setPhase("inserting-beat");
const insertRes = await fetch("/api/insert-beat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
session: stripVoicesForTransport(session),
freeformAction: decision.freeformAction,
clientTts: !!byoTtsRef.current,
}),
});
if (!insertRes.ok) {
const j = (await insertRes.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? insertRes.statusText);
}
const { partial, characters: insertChars } =
(await insertRes.json()) as InsertBeatResponse;
const fromBeatId =
currentBeatRef.current?.id ?? currentScene.entryBeatId;
const newBeatId = `b_ins_${Date.now()}_${Math.random()
.toString(36)
.slice(2, 6)}`;
const newBeat: Beat = {
id: newBeatId,
narration: partial.narration,
speaker: partial.speaker,
line: partial.line,
lineDelivery: partial.lineDelivery,
next: { type: "continue", nextBeatId: fromBeatId },
};
const patched: Scene = {
...currentScene,
beats: [...currentScene.beats, newBeat],
};
const nextSession: Session = {
...session,
history: session.history.map((h, i, arr) =>
i === arr.length - 1 ? { ...h, scene: patched } : h,
),
characters: mergeCharactersPreserveVoice(
session.characters,
insertChars,
),
};
setSession(nextSession);
setCurrentScene(patched);
setCurrentBeatId(newBeatId);
if (newBeat.speaker && newBeat.line) {
void fetchBeatAudio(nextSession, {
id: newBeatId,
speaker: newBeat.speaker,
line: newBeat.line,
lineDelivery: newBeat.lineDelivery,
});
}
setLastExitLabel(decision.freeformAction);
setPhase("ready");
return;
}
// change-scene path
const visited = [...visitedBeatsRef.current];
const exit: SceneExit = {
kind: "freeform",
action: decision.freeformAction,
};
clearPool(poolRef.current);
const specSession: Session = {
...session,
history: session.history.map((h, i, arr) =>
i === arr.length - 1
? { ...h, visitedBeatIds: visited, exit }
: h,
),
};
const promise = (async () => {
const res = await fetch("/api/scene", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
session: stripVoicesForTransport(specSession),
clientTts: !!byoTtsRef.current,
}),
});
if (!res.ok) {
const j = (await res.json().catch(() => ({}))) as { error?: string };
throw new Error(j.error ?? res.statusText);
}
return (await res.json()) as SceneResponse;
})();
setPendingClick(null);
void performSceneTransition(promise, exit, visited, decision.freeformAction);
} catch (e) {
setError(String(e));
setPhase("ready");
}
}
async function onBackgroundClick(click: { x: number; y: number }) {
if (phase !== "ready" || !session || !currentScene || !imageUrl) return;
setPhase("vision-thinking");
@@ -1623,7 +1758,9 @@ function PlayInner() {
onBackgroundClick={onBackgroundClick}
onAdvance={onAdvance}
onSelectChoice={onSelectChoice}
onFreeformInput={onFreeformInput}
orientation={orientation}
playerName={session?.playerName}
fullViewport
dialogueHistory={dialogueHistory}
/>
@@ -1698,7 +1835,9 @@ function PlayInner() {
onBackgroundClick={onBackgroundClick}
onAdvance={onAdvance}
onSelectChoice={onSelectChoice}
onFreeformInput={onFreeformInput}
orientation={orientation}
playerName={session?.playerName}
dialogueHistory={dialogueHistory}
aboveCanvas={
<button
+5 -1
View File
@@ -16,11 +16,15 @@ export function DialogueHistoryModal({
items,
portrait,
onClose,
playerName,
}: {
items: DialogueHistoryItem[];
portrait: boolean;
onClose: () => void;
playerName?: string;
}) {
const displaySpeaker = (s: string | undefined) =>
s === "你" && playerName ? playerName : s;
const listRef = useRef<HTMLDivElement>(null);
useEffect(() => {
@@ -97,7 +101,7 @@ export function DialogueHistoryModal({
</span>
{item.speaker && (
<span className="font-serif text-[12px] text-[rgba(205,165,90,0.92)]">
{item.speaker}
{displaySpeaker(item.speaker)}
</span>
)}
</div>
+122 -1
View File
@@ -174,8 +174,10 @@ export function PlayCanvas({
onBackgroundClick,
onAdvance,
onSelectChoice,
onFreeformInput,
fullViewport = false,
orientation = "landscape",
playerName,
aboveCanvas,
aboveCanvasLeft,
belowCanvas,
@@ -190,9 +192,11 @@ export function PlayCanvas({
onBackgroundClick: (click: { x: number; y: number }) => void;
onAdvance: () => void;
onSelectChoice: (choice: BeatChoice) => void;
onFreeformInput?: (text: string) => void;
fullViewport?: boolean;
// 会话锁定的图片朝向。"portrait" 时整图铺满视口(object-fit:cover)、选项竖排、字号放大。
orientation?: Orientation;
playerName?: string;
// 渲染在图片正上方、右对齐的 slot(画面外、紧贴右上角)。
aboveCanvas?: ReactNode;
// 渲染在图片正上方、左对齐的 slot(画面外、紧贴左上角),与 aboveCanvas 水平镜像。
@@ -204,6 +208,11 @@ export function PlayCanvas({
const imgRef = useRef<HTMLImageElement>(null);
const audioRef = useRef<HTMLAudioElement>(null);
const [historyOpen, setHistoryOpen] = useState(false);
const [freeformOpen, setFreeformOpen] = useState(false);
const [freeformText, setFreeformText] = useState("");
const freeformInputRef = useRef<HTMLInputElement>(null);
const displaySpeaker = (s: string | undefined) =>
s === "你" && playerName ? playerName : s;
const [audioDurationMs, setAudioDurationMs] = useState<number | undefined>(
undefined,
);
@@ -424,6 +433,7 @@ export function PlayCanvas({
items={dialogueHistory}
portrait={portrait}
onClose={() => setHistoryOpen(false)}
playerName={playerName}
/>
)}
@@ -435,6 +445,67 @@ export function PlayCanvas({
: "gap-[1.5%]"
}`}
>
{freeformOpen && onFreeformInput ? (
/* ── Expanded: full-width input replaces all choices ── */
<div
className="flex-1 flex items-center gap-2"
style={{
background: "rgba(20, 14, 8, 0.68)",
border: "1.5px solid rgba(180, 140, 80, 0.65)",
borderRadius: "6px",
backdropFilter: "blur(8px)",
WebkitBackdropFilter: "blur(8px)",
boxShadow: "0 2px 12px rgba(0,0,0,0.4), inset 0 1px 0 rgba(200,165,90,0.12)",
padding: "8px 12px",
}}
>
<input
ref={freeformInputRef}
value={freeformText}
onChange={(e) => setFreeformText(e.target.value.slice(0, 50))}
onKeyDown={(e) => {
if (e.key === "Enter" && !e.nativeEvent.isComposing && freeformText.trim() && phase === "ready") {
onFreeformInput(freeformText.trim());
setFreeformOpen(false);
setFreeformText("");
} else if (e.key === "Escape") {
setFreeformOpen(false);
setFreeformText("");
}
}}
placeholder="输入你想说的或想做的..."
maxLength={50}
autoFocus
className="flex-1 min-w-0 bg-transparent border-none outline-none font-serif text-[14px] placeholder:text-[rgba(200,185,155,0.50)]"
style={{ color: "rgba(245,235,210,0.95)" }}
/>
<button
type="button"
disabled={!freeformText.trim() || phase !== "ready"}
onClick={() => {
if (freeformText.trim()) {
onFreeformInput(freeformText.trim());
setFreeformOpen(false);
setFreeformText("");
}
}}
className="shrink-0 flex items-center justify-center w-8 h-8 rounded-sm transition-colors disabled:opacity-30"
style={{ color: "rgba(195,155,75,0.9)" }}
>
<i className="fa-solid fa-paper-plane text-[12px]" />
</button>
<button
type="button"
onClick={() => { setFreeformOpen(false); setFreeformText(""); }}
className="shrink-0 flex items-center justify-center w-8 h-8 rounded-sm transition-colors"
style={{ color: "rgba(200,185,155,0.55)" }}
>
<i className="fa-solid fa-xmark text-[13px]" />
</button>
</div>
) : (
/* ── Collapsed: normal choices + small freeform trigger ── */
<>
{choices.map((choice, i) => (
<ChoiceButton
key={choice.id}
@@ -445,6 +516,56 @@ export function PlayCanvas({
onClick={() => onSelectChoice(choice)}
/>
))}
{onFreeformInput && (
<button
type="button"
disabled={phase !== "ready"}
onClick={() => {
setFreeformOpen(true);
requestAnimationFrame(() => freeformInputRef.current?.focus());
}}
className="group shrink-0 flex items-center justify-center transition-all duration-200 disabled:opacity-50 disabled:cursor-wait"
style={{
background: "rgba(20, 14, 8, 0.45)",
border: "1.5px dashed rgba(180, 140, 80, 0.40)",
borderRadius: "6px",
backdropFilter: "blur(8px)",
WebkitBackdropFilter: "blur(8px)",
width: portrait ? "100%" : "42px",
padding: portrait ? "10px 16px" : "0",
}}
title="自由输入"
>
<span
className="opacity-0 group-hover:opacity-100 absolute inset-0 rounded-[5px] transition-opacity duration-200 pointer-events-none"
style={{
background: "rgba(180,140,60,0.08)",
border: "1.5px dashed rgba(200,165,90,0.70)",
}}
/>
{portrait ? (
<span className="relative flex items-center gap-2">
<i
className="fa-solid fa-pen-to-square text-[11px]"
style={{ color: "rgba(195,155,75,0.60)" }}
/>
<span
className="font-serif text-[13px]"
style={{ color: "rgba(200,185,155,0.70)" }}
>
</span>
</span>
) : (
<i
className="fa-solid fa-pen-to-square text-[12px] relative"
style={{ color: "rgba(195,155,75,0.55)" }}
/>
)}
</button>
)}
</>
)}
</div>
)}
@@ -484,7 +605,7 @@ export function PlayCanvas({
}`}
style={{ color: "rgba(205,165,90,0.92)" }}
>
{beat.speaker}
{displaySpeaker(beat.speaker)}
</p>
)}
+395
View File
@@ -0,0 +1,395 @@
"use client";
import { type ReactNode, useEffect, useState } from "react";
import {
clearStoredTtsConfig,
readStoredTtsConfig,
writeStoredTtsConfig,
} from "@/lib/clientTtsConfig";
import {
findTtsPreset,
PAYG_PRESET_ID,
TTS_KEY_DOC_URL,
TTS_REGION_PRESETS,
} from "@/lib/ttsPresets";
const PLAYER_NAME_STORAGE_KEY = "infiplot:playerName";
export function readStoredPlayerName(): string {
try {
return localStorage.getItem(PLAYER_NAME_STORAGE_KEY) ?? "";
} catch {
return "";
}
}
export function writeStoredPlayerName(name: string): void {
try {
if (name) {
localStorage.setItem(PLAYER_NAME_STORAGE_KEY, name);
} else {
localStorage.removeItem(PLAYER_NAME_STORAGE_KEY);
}
} catch {
/* ignore */
}
}
export function SettingsModal({
initialAudioEnabled = true,
onClose,
onSaved,
footerNote,
}: {
initialAudioEnabled?: boolean;
onClose: () => void;
onSaved: (settings: { ttsConfigured: boolean; playerName: string; audioEnabled: boolean }) => void;
footerNote?: ReactNode;
}) {
const [initialTts] = useState(() => readStoredTtsConfig());
const initialKind = findTtsPreset(initialTts?.presetId)?.kind ?? "payg";
const [keyType, setKeyType] = useState<"token-plan" | "payg">(initialKind);
const [regionId, setRegionId] = useState<string>(
initialKind === "token-plan"
? (initialTts?.presetId ?? TTS_REGION_PRESETS[0]!.id)
: TTS_REGION_PRESETS[0]!.id,
);
const [apiKey, setApiKey] = useState<string>(initialTts?.apiKey ?? "");
const [showKey, setShowKey] = useState(false);
const ttsAlreadyConfigured = initialTts != null;
const [playerName, setPlayerName] = useState(() => readStoredPlayerName());
const [voiceOn, setVoiceOn] = useState(initialAudioEnabled);
const [shown, setShown] = useState(false);
const expectedPrefix = keyType === "payg" ? "sk-" : "tp-";
const prefixMismatch =
apiKey.trim().length > 0 && !apiKey.trim().startsWith(expectedPrefix);
useEffect(() => {
const id = requestAnimationFrame(() => setShown(true));
return () => cancelAnimationFrame(id);
}, []);
const close = () => {
setShown(false);
setTimeout(onClose, 280);
};
const save = () => {
const name = playerName.trim();
writeStoredPlayerName(name);
try {
localStorage.setItem("infiplot:muted", voiceOn ? "0" : "1");
} catch { /* ignore */ }
const key = apiKey.trim();
let ttsConfigured = false;
if (key) {
const presetId = keyType === "payg" ? PAYG_PRESET_ID : regionId;
writeStoredTtsConfig({ presetId, apiKey: key });
ttsConfigured = true;
} else if (!ttsAlreadyConfigured) {
ttsConfigured = false;
} else {
ttsConfigured = true;
}
if (ttsConfigured && !voiceOn) setVoiceOn(true);
const finalVoiceOn = ttsConfigured ? true : voiceOn;
onSaved({ ttsConfigured, playerName: name, audioEnabled: finalVoiceOn });
close();
};
const clearAll = () => {
clearStoredTtsConfig();
writeStoredPlayerName("");
try { localStorage.removeItem("infiplot:muted"); } catch { /* ignore */ }
onSaved({ ttsConfigured: false, playerName: "", audioEnabled: true });
close();
};
const hasAnySetting = ttsAlreadyConfigured || readStoredPlayerName().length > 0;
return (
<div
onMouseDown={close}
className={
"fixed inset-0 z-[60] flex items-center justify-center p-6 md:p-10 transition-all duration-300 " +
(shown
? "bg-clay-900/30 backdrop-blur-md"
: "bg-clay-900/0 backdrop-blur-0")
}
>
<div
onMouseDown={(e) => e.stopPropagation()}
className={
"flex w-[560px] max-w-[94vw] max-h-[88vh] flex-col overflow-hidden rounded-sm border border-clay-900/15 bg-cream-50 shadow-2xl shadow-clay-900/25 transition-all duration-300 " +
(shown ? "opacity-100 scale-100" : "opacity-0 scale-95")
}
>
{/* Header */}
<div className="flex items-center gap-5 px-6 md:px-8 py-5 border-b border-clay-900/10">
<div className="flex flex-col">
<span className="font-serif text-xl md:text-2xl text-clay-900">
</span>
<span className="text-[11px] text-clay-500 mt-1 tracking-wide">
·
</span>
</div>
<button
type="button"
onClick={close}
aria-label="关闭"
className="ml-auto text-xl leading-none text-clay-500 hover:text-clay-900 transition-colors"
>
<i className="fa-solid fa-xmark" />
</button>
</div>
<div className="flex flex-col gap-0 overflow-y-auto">
{/* ── Player Name Section ── */}
<div className="flex flex-col gap-3 px-6 md:px-8 py-5">
<div className="flex items-center gap-2.5">
<span className="flex h-7 w-7 items-center justify-center rounded-sm border border-clay-900/10 bg-cream-100 text-clay-400">
<i className="fa-solid fa-user-pen text-[11px]" />
</span>
<span className="font-serif text-base text-clay-900">
</span>
</div>
<input
value={playerName}
onChange={(e) => setPlayerName(e.target.value)}
type="text"
maxLength={20}
autoComplete="off"
spellCheck={false}
placeholder="不填则使用「你」"
className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400"
/>
<span className="text-[11px] text-clay-400">
NPC
</span>
</div>
<div className="border-t border-clay-900/8 mx-6 md:mx-8" />
{/* ── Voice Section (toggle + key as child) ── */}
<div className="flex flex-col gap-3 px-6 md:px-8 pt-5 pb-5">
<div className="flex items-center gap-2.5">
<span className="flex h-7 w-7 items-center justify-center rounded-sm border border-clay-900/10 bg-cream-100 text-clay-400">
<i className="fa-solid fa-volume-high text-[11px]" />
</span>
<span className="font-serif text-base text-clay-900">
</span>
</div>
<div className="grid grid-cols-2 gap-2">
{(
[
{ on: true, label: "开启", icon: "fa-solid fa-volume-high" },
{ on: false, label: "关闭", icon: "fa-solid fa-volume-xmark" },
] as const
).map((t) => {
const active = voiceOn === t.on;
return (
<button
key={String(t.on)}
type="button"
onClick={() => setVoiceOn(t.on)}
className={
"flex items-center justify-center gap-2 rounded-sm border px-3 py-2.5 text-[13px] transition-all " +
(active
? "border-ember-500 bg-ember-500/5 text-clay-900"
: "border-clay-900/12 text-clay-600 hover:border-clay-900/35 hover:bg-cream-100")
}
>
<i className={t.icon + " text-[11px]"} />
{t.label}
</button>
);
})}
</div>
{/* ── TTS Key (sub-section, only when voice is on) ── */}
{voiceOn && (
<div className="mt-3 flex flex-col gap-4 rounded-sm border border-clay-900/8 bg-cream-100/40 p-4">
<div className="flex items-center gap-2">
<i className="fa-solid fa-key text-[10px] text-clay-400" />
<span className="text-[13px] text-clay-800">
Key
</span>
<span className="text-[10px] text-clay-400"></span>
</div>
<p className="text-[12px] leading-relaxed text-clay-500">
<span className="text-clay-800"> MiMo API Key</span>
Key MiMo
TTS
<span className="text-clay-800"></span>
使
</p>
<div className="flex flex-col gap-2">
<span className="text-[10px] smallcaps text-clay-500">
K e y ·
</span>
<div className="grid grid-cols-2 gap-2">
{(
[
{
kind: "payg",
label: "按量付费 Pay-as-you-go",
sub: "sk- 开头",
},
{
kind: "token-plan",
label: "套餐 Token Plan",
sub: "tp- 开头",
},
] as const
).map((t) => {
const active = keyType === t.kind;
return (
<button
key={t.kind}
type="button"
onClick={() => setKeyType(t.kind)}
className={
"flex flex-col gap-0.5 rounded-sm border px-3 py-2.5 text-left transition-all " +
(active
? "border-ember-500 bg-ember-500/5 text-clay-900"
: "border-clay-900/12 text-clay-600 hover:border-clay-900/35 hover:bg-cream-100")
}
>
<span className="text-[13px]">{t.label}</span>
<span className="text-[10px] text-clay-400">
{t.sub}
</span>
</button>
);
})}
</div>
</div>
{keyType === "token-plan" && (
<div className="flex flex-col gap-2">
<span className="text-[10px] smallcaps text-clay-500">
</span>
<div className="grid grid-cols-1 gap-2 sm:grid-cols-3">
{TTS_REGION_PRESETS.map((p) => {
const active = p.id === regionId;
return (
<button
key={p.id}
type="button"
onClick={() => setRegionId(p.id)}
className={
"rounded-sm border px-3 py-2.5 text-left text-[13px] transition-all " +
(active
? "border-ember-500 bg-ember-500/5 text-clay-900"
: "border-clay-900/12 text-clay-600 hover:border-clay-900/35 hover:bg-cream-100")
}
>
{p.label}
</button>
);
})}
</div>
<span className="text-[11px] text-clay-400">
</span>
</div>
)}
<div className="flex flex-col gap-2">
<span className="text-[10px] smallcaps text-clay-500">
A P I · K e y
</span>
<div className="relative">
<input
value={apiKey}
onChange={(e) => setApiKey(e.target.value)}
type={showKey ? "text" : "password"}
autoComplete="off"
spellCheck={false}
placeholder={
keyType === "payg"
? "粘贴 sk- 开头的按量 Key"
: "粘贴 tp- 开头的套餐 Key"
}
className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-50 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400"
/>
<button
type="button"
onClick={() => setShowKey((v) => !v)}
aria-label={showKey ? "隐藏" : "显示"}
className="absolute right-3 top-1/2 -translate-y-1/2 text-clay-400 hover:text-clay-700 transition-colors"
>
<i
className={`fa-solid ${showKey ? "fa-eye-slash" : "fa-eye"} text-sm`}
/>
</button>
</div>
{prefixMismatch && (
<span className="flex items-start gap-1.5 text-[11px] leading-relaxed text-ember-500">
<i className="fa-solid fa-triangle-exclamation mt-0.5 text-[10px]" />
Key {expectedPrefix}
{keyType === "payg"
? "按量付费 Pay-as-you-go"
: "套餐 Token Plan"}
</span>
)}
<a
href={TTS_KEY_DOC_URL}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 text-[11px] text-ember-500 hover:text-ember-400 transition-colors"
>
<i className="fa-brands fa-github text-[11px]" />
Key
</a>
</div>
{footerNote && (
<p className="text-[11px] leading-relaxed text-clay-400">
{footerNote}
</p>
)}
</div>
)}
</div>
</div>
{/* Footer */}
<div className="flex items-center gap-3 border-t border-clay-900/10 px-6 md:px-8 py-4">
{hasAnySetting && (
<button
type="button"
onClick={clearAll}
className="inline-flex items-center gap-2 rounded-sm border border-clay-900/15 px-4 py-2 font-sans text-sm text-clay-600 transition-colors hover:border-clay-900/35 hover:text-clay-900"
>
<i className="fa-solid fa-rotate-left text-xs" />
</button>
)}
<button
type="button"
onClick={save}
className="ml-auto inline-flex items-center gap-2 rounded-sm bg-clay-900 px-5 py-2.5 font-sans text-sm text-cream-50 transition-colors hover:bg-ember-500"
>
<i className="fa-solid fa-check text-xs" />
</button>
</div>
</div>
</div>
);
}
+1
View File
@@ -49,6 +49,7 @@ type AnalyticsEventData = {
kind: "advance-beat" | "change-scene";
};
vision_click: { result: "insert-beat" | "change-scene" };
freeform_input: { scene_index: number; text_length: number };
tts_toggle: { muted: boolean };
fullscreen_toggle: { on: boolean };
play_heartbeat: never;
+1
View File
@@ -2,6 +2,7 @@ export {
startSession,
requestScene,
visionDecide,
classifyFreeform,
requestInsertBeat,
requestBeatAudio,
} from "./orchestrator";
+45
View File
@@ -2,6 +2,9 @@ import type {
BeatAudioRequest,
BeatAudioResponse,
EngineConfig,
FreeformClassify,
FreeformClassifyRequest,
FreeformClassifyResponse,
InsertBeatRequest,
InsertBeatResponse,
Session,
@@ -13,10 +16,16 @@ import type {
VisionResponse,
} from "@infiplot/types";
import { coerceOrientation } from "@infiplot/types";
import { chat } from "@infiplot/ai-client";
import { runArchitect } from "./agents/architect";
import { selectStyle } from "./agents/styleSelector";
import { directInsertBeat, directScene } from "./director";
import { STYLE_MAP } from "@/lib/options";
import { parseJsonLoose } from "./jsonParser";
import {
FREEFORM_CLASSIFY_SYSTEM,
buildFreeformClassifyUserMessage,
} from "./prompts";
import { synthesizeBeat } from "./voice";
import { interpret } from "./vision";
@@ -52,6 +61,7 @@ export async function startSession(
characters: [],
styleReferenceImage: req.styleReferenceImage?.trim() || undefined,
orientation: coerceOrientation(req.orientation),
playerName: req.playerName?.trim() || undefined,
};
// Stage 0 — Architect (+ optional auto style selection, in parallel).
@@ -138,6 +148,41 @@ export async function visionDecide(
return interpret(config.vision, req.annotatedImageBase64, current);
}
// ──────────────────────────────────────────────────────────────────────
// classifyFreeform — classifies a freeform text input at a choice node
// into match-choice / insert-beat / change-scene. Single lightweight
// LLM call; no image, no scene generation.
// ──────────────────────────────────────────────────────────────────────
export async function classifyFreeform(
config: EngineConfig,
req: FreeformClassifyRequest,
): Promise<FreeformClassifyResponse> {
const current = req.session.history.at(-1)?.scene ?? null;
const userMsg = buildFreeformClassifyUserMessage(
req.freeformText,
current?.scenePrompt,
);
const raw = await chat(config.text, [
{ role: "system", content: FREEFORM_CLASSIFY_SYSTEM },
{ role: "user", content: userMsg },
], { temperature: 0, tag: "freeform-classify" });
const parsed = parseJsonLoose<{
classify?: string;
freeformAction?: string;
}>(raw);
const classify: FreeformClassify =
parsed.classify === "change-scene" ? "change-scene" : "insert-beat";
return {
classify,
freeformAction: parsed.freeformAction?.trim() || req.freeformText,
};
}
// ──────────────────────────────────────────────────────────────────────
// requestInsertBeat — single-agent transient beat (no image, no new
// characters). Stays single-LLM by design — the INSERT_BEAT prompt
+82 -9
View File
@@ -132,6 +132,11 @@ export function buildArchitectUserMessage(session: Session): string {
const parts: string[] = [];
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.playerName) {
parts.push(
`\n玩家名字:${session.playerName}\n(NPC 在对话中应自然地称呼玩家为「${session.playerName}」。「你」仍指代玩家视角,但 NPC 的台词里请使用这个名字而非泛称。不要为玩家设计立绘或音色——玩家是 POV 视角,永不出现在画面中。)`,
);
}
parts.push(
"\n请据此产出这部交互剧的故事档案(story bible),严格以 JSON 格式返回。",
);
@@ -421,6 +426,11 @@ function buildWriterContextParts(session: Session): string[] {
// ── 1. session scalars ────────────────────────────────────────────────
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.playerName) {
parts.push(
`玩家名字:${session.playerName}(NPC 对话时用此名字称呼玩家;speaker 字段仍固定为 "你" 不变)`,
);
}
parts.push("");
// ── 2. story bible — spine only (stable) ──────────────────────────────
@@ -874,26 +884,38 @@ STRICT RULES:
}
// ──────────────────────────────────────────────────────────────────────
// Insert-Beat — given a freeform vision action that is judged to stay
// *within* the current scene, generate one transient beat.
// Insert-Beat — given a freeform action (background click or typed
// input) that stays *within* the current scene, generate one beat
// with meaningful character interaction.
// Single-agent path; no character design / no rendering involved.
// ──────────────────────────────────────────────────────────────────────
export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场景内做了一个**不会换场景的自由动作**(比如看一眼桌上的相框、想了想刚才那句话)。请基于此动作,写出一个**单独的、过渡性的 beat**:可以是旁白、角色台词、或两者结合
export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场景内做了一个自由动作(可能是点击画面中的某个物件/角色,也可能是主动输入了一句话/动作)。请基于此动作,写出**一个有实质内容的 beat**
核心原则——**玩家的动作必须得到回应**:
- 如果当前场景有 NPC 在场,NPC **必须对玩家的动作做出反应**(说话、表情变化、动作回应)。用 narration 描述玩家的动作,用 speaker + line 写 NPC 的回应。
- 如果场景中没有 NPC(纯环境),可以用 narration 描述玩家的观察/发现,给玩家一个新细节或情绪波动。
- 不要写"你想做什么但没做"这种无意义的犹豫——玩家已经做了,世界要有反馈。
文本风格约束:
- narration / line 用中文,**纯净可显示文本**,不要写 (叹气) 这类配音标注
- narration 与 line 加起来 ≤80 字
- 不要打破当前场景的物理状态(玩家仍在原地、对面仍是同一个角色
- narration / line 用中文,**纯净可显示文本**,不要写 (叹气)(语速快) 这类配音标注
- narration 与 line 加起来 ≤100 字
- 不要打破当前场景的物理状态(玩家仍在原地)
- 不要生成选项或下一步指引 —— 玩家点击会自然回到原 beat
- 这个 beat 也要"有所得"——给玩家一个新细节、一丝潜台词或情绪波动show, don't tell,别写成无意义的空台词
- 内容要"有所得"——一个新细节、一丝潜台词、一次真实的交流show, don't tell
speaker 字段允许的取值**只有两种**(与主路径 Writer 一致 — Pattern B galgame 标准):
1. **已登记角色**里的 NPC 真名(**绝不允许引入新角色**)
2. **"你"** — 玩家本人在自言自语 / 说一句过渡性的话(对白框显示,但不调 TTS
2. **"你"** — 玩家本人开口说话(对白框显示,但不调 TTS
其它任何 POV 变体(玩家 / 我 / 主角 / protagonist / player / MC / I / me**一律错误**,请用 "你" 代替。
推荐模式(有 NPC 在场时):
narration = 描述玩家做了什么(动作/表情/心理)
speaker = NPC 真名
line = NPC 的回应台词
lineDelivery = 配音导演指令
- 如果有 line 且 speaker = NPC**必须**给出 lineDelivery(配音导演指令)
- 如果有 line 且 speaker = "你"lineDelivery 可以留空(玩家对白不调 TTS)
@@ -913,6 +935,11 @@ export function buildInsertBeatUserMessage(
): string {
const parts: string[] = [];
parts.push(`世界观:${session.worldSetting}`);
if (session.playerName) {
parts.push(
`玩家名字:${session.playerName}(NPC 对话时用此名字称呼玩家;speaker 字段仍固定为 "你" 不变)`,
);
}
if (session.characters.length > 0) {
parts.push("\n已登记角色(speaker 只能用这些名字):");
@@ -935,8 +962,17 @@ export function buildInsertBeatUserMessage(
}
}
if (current) {
const lastBeatId2 = current.visitedBeatIds.at(-1) ?? current.scene.entryBeatId;
const lastBeat2 = current.scene.beats.find((b) => b.id === lastBeatId2);
const activeNpcs = lastBeat2?.activeCharacters?.map((c) => c.name) ?? [];
if (activeNpcs.length > 0) {
parts.push(`当前画面中在场的 NPC${activeNpcs.join("、")}(优先让在场 NPC 回应玩家)`);
}
}
parts.push(`\n玩家此刻的自由动作:${freeformAction}`);
parts.push("\n请生成一个过渡性 beat,严格以 JSON 格式返回。");
parts.push("\n请生成一个有实质回应的 beat,严格以 JSON 格式返回。");
return parts.join("\n");
}
@@ -971,4 +1007,41 @@ export function buildVisionUserPrompt(scene: Scene | null): string {
红点位置即为玩家点击位置。请判断玩家意图与分类,以 JSON 格式返回。`;
}
// ──────────────────────────────────────────────────────────────────────
// Freeform Classify — classifies a player's freeform text input at a
// choice node into one of: match an existing choice, insert a beat
// in-scene, or trigger a scene change.
// ──────────────────────────────────────────────────────────────────────
export const FREEFORM_CLASSIFY_SYSTEM = `你是交互视觉小说的意图分类助手。玩家在一个选择节点输入了自由文本(而非点击已有选项)。你要判断这个输入最适合走哪条路径:
1. "insert-beat":玩家想在当前场景内与角色互动(问一句话、做一个动作、表达情绪、调查某个东西)→ NPC 会对玩家的动作做出回应,但不切换场景
2. "change-scene":玩家想去别的地方、做出重大决定、推动剧情到新阶段 → 切换到全新场景
判断准则:
- 大多数对话类输入(问问题、说一句话、对角色做出反应)→ "insert-beat"
- 明确要离开当前场景、去别的地方、跳过时间、做出改变人物关系的重大决定 → "change-scene"
- 拿不准时偏向 "insert-beat"(场内互动成本低,体验更流畅)
必须输出严格 JSON
{
"classify": "insert-beat" 或 "change-scene",
"freeformAction": "玩家想做什么的一句中文描述(用于后续编剧参考)"
}
不要输出 JSON 以外的任何文本。`;
export function buildFreeformClassifyUserMessage(
freeformText: string,
scenePrompt: string | undefined,
): string {
const parts: string[] = [];
if (scenePrompt) {
parts.push(`当前场景:${scenePrompt}`);
}
parts.push(`\n玩家输入:「${freeformText}`);
parts.push("\n请判断分类,以 JSON 格式返回。");
return parts.join("\n");
}
export type PainterCharacterInput = Pick<Character, "name" | "visualDescription">;
+23
View File
@@ -280,6 +280,12 @@ export type Session = {
* share one aspect ratio. Absent → "landscape" (back-compat).
*/
orientation?: Orientation;
/**
* Optional player-chosen display name. When set, NPC dialogue will address
* the player by this name instead of the generic "你". Stored client-side
* only (localStorage); never persisted server-side.
*/
playerName?: string;
};
// ──────────────────────────────────────────────────────────────────────
@@ -372,6 +378,8 @@ export type StartRequest = {
* (default) keeps 16:9 widescreen. Locked for the whole session.
*/
orientation?: Orientation;
/** Optional player display name — see Session.playerName. */
playerName?: string;
};
// /api/parse-style-image — vision LLM extracts a textual painting-style
@@ -458,6 +466,21 @@ export type VisionResponse = {
classify: VisionClassify;
};
// /api/classify-freeform — classifies a player's freeform text input
// into one of three paths: match an existing choice, insert a beat
// in-scene, or trigger a scene change.
export type FreeformClassifyRequest = {
session: Session;
freeformText: string;
};
export type FreeformClassify = "insert-beat" | "change-scene";
export type FreeformClassifyResponse = {
classify: FreeformClassify;
freeformAction: string;
};
// /api/insert-beat — generates a single transient beat in response to
// a freeform vision action. Does NOT regenerate the image.
export type InsertBeatRequest = {