feat(web): add player name, freeform input, and unified settings modal

- Player name: stored in localStorage, injected into Architect/Writer/InsertBeat prompts so NPCs address the player by name, displayed in dialogue UI - Freeform input: compact button at choice nodes expands to text input, LLM classifier routes to insert-beat (interactive NPC response) or change-scene - SettingsModal: unified panel merging player name, voice toggle (with collapsible TTS key section), replacing the old TtsKeyModal - Insert-beat upgrade: prompt now requires NPC reaction when characters are present, shared by both freeform and Vision paths - IME guard: isComposing check on freeform input to prevent CJK mid-composition submission Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-07 12:03:13 +08:00
parent b0b5630a25
commit ae3dd17e6b
11 changed files with 897 additions and 77 deletions
@@ -2,6 +2,7 @@ export {
  startSession,
  requestScene,
  visionDecide,
+  classifyFreeform,
  requestInsertBeat,
  requestBeatAudio,
 } from "./orchestrator";
@@ -2,6 +2,9 @@ import type {
  BeatAudioRequest,
  BeatAudioResponse,
  EngineConfig,
+  FreeformClassify,
+  FreeformClassifyRequest,
+  FreeformClassifyResponse,
  InsertBeatRequest,
  InsertBeatResponse,
  Session,
@@ -13,10 +16,16 @@ import type {
  VisionResponse,
 } from "@infiplot/types";
 import { coerceOrientation } from "@infiplot/types";
+import { chat } from "@infiplot/ai-client";
 import { runArchitect } from "./agents/architect";
 import { selectStyle } from "./agents/styleSelector";
 import { directInsertBeat, directScene } from "./director";
 import { STYLE_MAP } from "@/lib/options";
+import { parseJsonLoose } from "./jsonParser";
+import {
+  FREEFORM_CLASSIFY_SYSTEM,
+  buildFreeformClassifyUserMessage,
+} from "./prompts";
 import { synthesizeBeat } from "./voice";
 import { interpret } from "./vision";

@@ -52,6 +61,7 @@ export async function startSession(
    characters: [],
    styleReferenceImage: req.styleReferenceImage?.trim() || undefined,
    orientation: coerceOrientation(req.orientation),
+    playerName: req.playerName?.trim() || undefined,
  };

  // Stage 0 — Architect (+ optional auto style selection, in parallel).
@@ -138,6 +148,41 @@ export async function visionDecide(
  return interpret(config.vision, req.annotatedImageBase64, current);
 }

+// ──────────────────────────────────────────────────────────────────────
+//  classifyFreeform — classifies a freeform text input at a choice node
+//  into match-choice / insert-beat / change-scene. Single lightweight
+//  LLM call; no image, no scene generation.
+// ──────────────────────────────────────────────────────────────────────
+
+export async function classifyFreeform(
+  config: EngineConfig,
+  req: FreeformClassifyRequest,
+): Promise<FreeformClassifyResponse> {
+  const current = req.session.history.at(-1)?.scene ?? null;
+  const userMsg = buildFreeformClassifyUserMessage(
+    req.freeformText,
+    current?.scenePrompt,
+  );
+
+  const raw = await chat(config.text, [
+    { role: "system", content: FREEFORM_CLASSIFY_SYSTEM },
+    { role: "user", content: userMsg },
+  ], { temperature: 0, tag: "freeform-classify" });
+
+  const parsed = parseJsonLoose<{
+    classify?: string;
+    freeformAction?: string;
+  }>(raw);
+
+  const classify: FreeformClassify =
+    parsed.classify === "change-scene" ? "change-scene" : "insert-beat";
+
+  return {
+    classify,
+    freeformAction: parsed.freeformAction?.trim() || req.freeformText,
+  };
+}
+
 // ──────────────────────────────────────────────────────────────────────
 //  requestInsertBeat — single-agent transient beat (no image, no new
 //  characters). Stays single-LLM by design — the INSERT_BEAT prompt
@@ -132,6 +132,11 @@ export function buildArchitectUserMessage(session: Session): string {
  const parts: string[] = [];
  parts.push(`世界观：${session.worldSetting}`);
  parts.push(`画风：${session.styleGuide}`);
+  if (session.playerName) {
+    parts.push(
+      `\n玩家名字：${session.playerName}\n（NPC 在对话中应自然地称呼玩家为「${session.playerName}」。「你」仍指代玩家视角，但 NPC 的台词里请使用这个名字而非泛称。不要为玩家设计立绘或音色——玩家是 POV 视角，永不出现在画面中。）`,
+    );
+  }
  parts.push(
    "\n请据此产出这部交互剧的故事档案（story bible），严格以 JSON 格式返回。",
  );
@@ -421,6 +426,11 @@ function buildWriterContextParts(session: Session): string[] {
  // ── 1. session scalars ────────────────────────────────────────────────
  parts.push(`世界观：${session.worldSetting}`);
  parts.push(`画风：${session.styleGuide}`);
+  if (session.playerName) {
+    parts.push(
+      `玩家名字：${session.playerName}（NPC 对话时用此名字称呼玩家；speaker 字段仍固定为 "你" 不变）`,
+    );
+  }
  parts.push("");

  // ── 2. story bible — spine only (stable) ──────────────────────────────
@@ -874,26 +884,38 @@ STRICT RULES:
 }

 // ──────────────────────────────────────────────────────────────────────
-//  Insert-Beat — given a freeform vision action that is judged to stay
-//  *within* the current scene, generate one transient beat.
+//  Insert-Beat — given a freeform action (background click or typed
+//  input) that stays *within* the current scene, generate one beat
+//  with meaningful character interaction.
 //  Single-agent path; no character design / no rendering involved.
 // ──────────────────────────────────────────────────────────────────────

-export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场景内做了一个**不会换场景的自由动作**（比如看一眼桌上的相框、想了想刚才那句话）。请基于此动作，写出一个**单独的、过渡性的 beat**：可以是旁白、角色台词、或两者结合。
+export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场景内做了一个自由动作（可能是点击画面中的某个物件/角色，也可能是主动输入了一句话/动作）。请基于此动作，写出**一个有实质内容的 beat**。
+
+核心原则——**玩家的动作必须得到回应**：
+- 如果当前场景有 NPC 在场，NPC **必须对玩家的动作做出反应**（说话、表情变化、动作回应）。用 narration 描述玩家的动作，用 speaker + line 写 NPC 的回应。
+- 如果场景中没有 NPC（纯环境），可以用 narration 描述玩家的观察/发现，给玩家一个新细节或情绪波动。
+- 不要写"你想做什么但没做"这种无意义的犹豫——玩家已经做了，世界要有反馈。

 文本风格约束：
- narration / line 用中文，**纯净可显示文本**，不要写 (叹气) 这类配音标注
- narration 与 line 加起来 ≤80 字
- 不要打破当前场景的物理状态（玩家仍在原地、对面仍是同一个角色）
+- narration / line 用中文，**纯净可显示文本**，不要写 (叹气)(语速快) 这类配音标注
+- narration 与 line 加起来 ≤100 字
+- 不要打破当前场景的物理状态（玩家仍在原地）
 - 不要生成选项或下一步指引 —— 玩家点击会自然回到原 beat
- 这个 beat 也要"有所得"——给玩家一个新细节、一丝潜台词或情绪波动（show, don't tell），别写成无意义的空台词
+- 内容要"有所得"——一个新细节、一丝潜台词、一次真实的交流（show, don't tell）

 speaker 字段允许的取值**只有两种**（与主路径 Writer 一致 — Pattern B galgame 标准）：
 1. **已登记角色**里的 NPC 真名（**绝不允许引入新角色**）
-2. **"你"** — 玩家本人在自言自语 / 说一句过渡性的话（对白框显示，但不调 TTS）
+2. **"你"** — 玩家本人开口说话（对白框显示，但不调 TTS）

 其它任何 POV 变体（玩家 / 我 / 主角 / protagonist / player / MC / I / me）**一律错误**，请用 "你" 代替。

+推荐模式（有 NPC 在场时）：
+  narration = 描述玩家做了什么（动作/表情/心理）
+  speaker = NPC 真名
+  line = NPC 的回应台词
+  lineDelivery = 配音导演指令
+
 - 如果有 line 且 speaker = NPC，**必须**给出 lineDelivery（配音导演指令）
 - 如果有 line 且 speaker = "你"，lineDelivery 可以留空（玩家对白不调 TTS）

@@ -913,6 +935,11 @@ export function buildInsertBeatUserMessage(
 ): string {
  const parts: string[] = [];
  parts.push(`世界观：${session.worldSetting}`);
+  if (session.playerName) {
+    parts.push(
+      `玩家名字：${session.playerName}（NPC 对话时用此名字称呼玩家；speaker 字段仍固定为 "你" 不变）`,
+    );
+  }

  if (session.characters.length > 0) {
    parts.push("\n已登记角色（speaker 只能用这些名字）：");
@@ -935,8 +962,17 @@ export function buildInsertBeatUserMessage(
    }
  }

+  if (current) {
+    const lastBeatId2 = current.visitedBeatIds.at(-1) ?? current.scene.entryBeatId;
+    const lastBeat2 = current.scene.beats.find((b) => b.id === lastBeatId2);
+    const activeNpcs = lastBeat2?.activeCharacters?.map((c) => c.name) ?? [];
+    if (activeNpcs.length > 0) {
+      parts.push(`当前画面中在场的 NPC：${activeNpcs.join("、")}（优先让在场 NPC 回应玩家）`);
+    }
+  }
+
  parts.push(`\n玩家此刻的自由动作：${freeformAction}`);
-  parts.push("\n请生成一个过渡性 beat，严格以 JSON 格式返回。");
+  parts.push("\n请生成一个有实质回应的 beat，严格以 JSON 格式返回。");
  return parts.join("\n");
 }

@@ -971,4 +1007,41 @@ export function buildVisionUserPrompt(scene: Scene | null): string {
 红点位置即为玩家点击位置。请判断玩家意图与分类，以 JSON 格式返回。`;
 }

+// ──────────────────────────────────────────────────────────────────────
+//  Freeform Classify — classifies a player's freeform text input at a
+//  choice node into one of: match an existing choice, insert a beat
+//  in-scene, or trigger a scene change.
+// ──────────────────────────────────────────────────────────────────────
+
+export const FREEFORM_CLASSIFY_SYSTEM = `你是交互视觉小说的意图分类助手。玩家在一个选择节点输入了自由文本（而非点击已有选项）。你要判断这个输入最适合走哪条路径：
+
+1. "insert-beat"：玩家想在当前场景内与角色互动（问一句话、做一个动作、表达情绪、调查某个东西）→ NPC 会对玩家的动作做出回应，但不切换场景
+2. "change-scene"：玩家想去别的地方、做出重大决定、推动剧情到新阶段 → 切换到全新场景
+
+判断准则：
+- 大多数对话类输入（问问题、说一句话、对角色做出反应）→ "insert-beat"
+- 明确要离开当前场景、去别的地方、跳过时间、做出改变人物关系的重大决定 → "change-scene"
+- 拿不准时偏向 "insert-beat"（场内互动成本低，体验更流畅）
+
+必须输出严格 JSON：
+{
+  "classify": "insert-beat" 或 "change-scene",
+  "freeformAction": "玩家想做什么的一句中文描述（用于后续编剧参考）"
+}
+
+不要输出 JSON 以外的任何文本。`;
+
+export function buildFreeformClassifyUserMessage(
+  freeformText: string,
+  scenePrompt: string | undefined,
+): string {
+  const parts: string[] = [];
+  if (scenePrompt) {
+    parts.push(`当前场景：${scenePrompt}`);
+  }
+  parts.push(`\n玩家输入：「${freeformText}」`);
+  parts.push("\n请判断分类，以 JSON 格式返回。");
+  return parts.join("\n");
+}
+
 export type PainterCharacterInput = Pick<Character, "name" | "visualDescription">;