refactor: flatten monorepo to single web package (#12)

Flatten the pnpm monorepo (apps/web + packages/*) into a single web package at the repo root. - Move app/lib/components/scripts/public to root; drop apps/web and packages/* wrappers - Rewrite tsconfig paths (@infiplot/*) to ./lib/*; turbopack.root = __dirname - Update Vercel (no root-directory) and Cloudflare (pnpm build:cf at root) deploy paths - Regenerate pnpm-lock.yaml to drop stale workspace importers - Bump engines.node to >=22 to match wrangler Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 00:55:45 +08:00
parent 9543c3dba1
commit dc5ecd60f6
221 changed files with 241 additions and 379 deletions
@@ -0,0 +1,90 @@
+import { chat } from "@infiplot/ai-client";
+import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
+import { parseJsonLoose } from "../jsonParser";
+import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  Architect agent — ONE LLM call at session start.
+//
+//  Expands the user's (often terse) world + style prompt into a real story
+//  bible: a second-person protagonist with a want and a flaw, a single
+//  central dramatic question (logline), a genre frame that anchors the
+//  爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
+//  intentional cast. Seeds the StoryState that the Writer reads and updates
+//  every scene — so the story has a spine from beat one instead of being
+//  improvised cold.
+//
+//  Everything is best-effort coerced with fallbacks: a malformed LLM
+//  response can never abort session start — worst case the Writer just gets
+//  a thinner bible and improvises more.
+// ──────────────────────────────────────────────────────────────────────
+
+type RawStoryState = {
+  logline?: unknown;
+  genreTags?: unknown;
+  protagonist?: unknown;
+  castNotes?: unknown;
+  synopsis?: unknown;
+  openThreads?: unknown;
+  relationships?: unknown;
+  nextHook?: unknown;
+};
+
+function str(raw: unknown): string {
+  return typeof raw === "string" ? raw.trim() : "";
+}
+
+function strArray(raw: unknown): string[] | undefined {
+  if (!Array.isArray(raw)) return undefined;
+  const out = raw
+    .map((x) => (typeof x === "string" ? x.trim() : ""))
+    .filter((x) => x.length > 0);
+  return out.length > 0 ? out : undefined;
+}
+
+export async function runArchitect(
+  config: ProviderConfig,
+  session: Session,
+): Promise<StoryState> {
+  try {
+    const raw = await chat(
+      config,
+      [
+        { role: "system", content: ARCHITECT_SYSTEM },
+        { role: "user", content: buildArchitectUserMessage(session) },
+      ],
+      { temperature: 0.85, responseFormat: "json_object" },
+    );
+
+    const parsed = parseJsonLoose<RawStoryState>(raw);
+
+    return {
+      // Stable spine — fall back to the raw world/style prompt so the bible is
+      // never wholly empty even if the model returns garbage.
+      logline: str(parsed.logline) || session.worldSetting,
+      genreTags: str(parsed.genreTags),
+      protagonist:
+        str(parsed.protagonist) ||
+        "你是这个故事的主角（第二人称视角，永不出现在画面里）。",
+      castNotes: str(parsed.castNotes) || undefined,
+      // Volatile seeds — the opening Writer will rewrite these via its patch.
+      synopsis: str(parsed.synopsis) || "故事即将开始。",
+      openThreads: strArray(parsed.openThreads),
+      relationships: strArray(parsed.relationships),
+      nextHook: str(parsed.nextHook) || undefined,
+    };
+  } catch (err) {
+    // chat() or parseJsonLoose() can throw (network / unrepairable JSON).
+    // The Architect is best-effort: never let it abort session start — return
+    // a minimal bible seeded from the raw prompt and let the Writer improvise.
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`[architect] failed, using minimal bible: ${msg}`);
+    return {
+      logline: session.worldSetting,
+      genreTags: "",
+      protagonist:
+        "你是这个故事的主角（第二人称视角，永不出现在画面里）。",
+      synopsis: "故事即将开始。",
+    };
+  }
+}
@@ -0,0 +1,155 @@
+import { chat, generateImage } from "@infiplot/ai-client";
+import { provisionVoice } from "@infiplot/tts-client";
+import type {
+  Character,
+  CharacterVoice,
+  EngineConfig,
+  Session,
+} from "@infiplot/types";
+import { parseJsonLoose } from "../jsonParser";
+import { mockImageDataUri } from "../mockImage";
+import {
+  CHARACTER_DESIGNER_SYSTEM,
+  buildCharacterDesignerUserMessage,
+  buildCharacterPortraitPrompt,
+} from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  CharacterDesigner agent — designs ONE new character.
+//
+//  Exposed as three GRANULAR stages so the director can schedule the slow
+//  parts around the Painter (a voice is never needed to paint a scene, and
+//  only entry-beat characters' portraits are referenced by the Painter):
+//
+//    1. designCharacterCard      — ONE LLM call → visual + voice TEXT cards
+//       (intentional bundling: the same agent thinks about who this character
+//        IS, keeping appearance and vocal personality coherent)
+//    2. renderCharacterPortrait  — base portrait image (Runware URL + UUID)
+//    3. provisionCharacterVoice  — Xiaomi MiMo voicedesign → reference audio
+//
+//  Each step degrades gracefully — if image gen fails the character just has
+//  no portrait; if voice gen fails it has no voice. The game keeps running.
+// ──────────────────────────────────────────────────────────────────────
+
+type CharacterDesignOutput = {
+  visualDescription?: string;
+  voiceDescription?: string;
+};
+
+// TEMP: per-phase timing for latency diagnosis. Same convention as the
+// orchestrator's tlog. Remove after we have data on real-world numbers.
+function tlog(label: string, t0: number): void {
+  console.log(`${label}: ${Date.now() - t0}ms`);
+}
+
+async function runDesignLLM(
+  config: EngineConfig,
+  session: Session,
+  charName: string,
+): Promise<CharacterDesignOutput> {
+  const raw = await chat(
+    config.text,
+    [
+      { role: "system", content: CHARACTER_DESIGNER_SYSTEM },
+      {
+        role: "user",
+        content: buildCharacterDesignerUserMessage(charName, session),
+      },
+    ],
+    { temperature: 0.7, responseFormat: "json_object" },
+  );
+  return parseJsonLoose<CharacterDesignOutput>(raw);
+}
+
+// Generate the per-character base portrait. The portrait is a "concept
+// sheet" — single character, neutral pose, plain background — so it works
+// well as a Runware referenceImages anchor for later scenes.
+//
+// Returns the URL (for any client display + URL-form references) and the
+// UUID (cheapest reference form for subsequent Painter calls). Both come
+// back in one `imageInference` response now that we use outputType=URL —
+// no separate upload step needed.
+//
+// In mock mode we return the data URI as basePortraitUrl with no UUID
+// (Painter is short-circuited anyway, so the lack of a UUID is moot).
+export async function renderCharacterPortrait(
+  config: EngineConfig,
+  charName: string,
+  visualDescription: string,
+  styleGuide: string,
+): Promise<{ basePortraitUrl?: string; basePortraitUuid?: string }> {
+  try {
+    if (config.mockImage) {
+      return { basePortraitUrl: await mockImageDataUri() };
+    }
+    const prompt = buildCharacterPortraitPrompt(
+      charName,
+      visualDescription,
+      styleGuide,
+    );
+    const { imageUrl, imageUuid } = await generateImage(config.image, prompt);
+    return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid };
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`[characterDesigner] portrait gen failed for ${charName}: ${msg}`);
+    return {}; // no portrait at all — degrade gracefully
+  }
+}
+
+export async function provisionCharacterVoice(
+  config: EngineConfig,
+  voiceDescription: string,
+  charName: string,
+): Promise<CharacterVoice | undefined> {
+  if (!config.tts) return undefined;
+  try {
+    return await provisionVoice(config.tts, voiceDescription);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
+    return undefined;
+  }
+}
+
+// The cheap first stage: design the visual + voice TEXT cards in one LLM
+// call. The director then schedules renderCharacterPortrait /
+// provisionCharacterVoice around the Painter. Multiple new characters in the
+// same scene run this stage in parallel at the director level.
+export type CharacterCard = {
+  name: string;
+  visualDescription?: string;
+  voiceDescription: string;
+};
+
+export async function designCharacterCard(
+  config: EngineConfig,
+  session: Session,
+  charName: string,
+): Promise<CharacterCard> {
+  const tDesign = Date.now();
+  const design = await runDesignLLM(config, session, charName);
+  tlog(`[charDesigner ${charName}] design LLM`, tDesign);
+
+  return {
+    name: charName,
+    visualDescription: design.visualDescription?.trim() || undefined,
+    voiceDescription:
+      design.voiceDescription?.trim() ||
+      `请根据角色名「${charName}」推断其性别、年龄与气质，生成最贴合的音色。所属世界观：${session.worldSetting}`,
+  };
+}
+
+// Provision voice ONLY for an existing character that the LLM mentioned
+// without us having designed them yet (e.g., 编剧 referenced a name that
+// wasn't in `activeCharacters` but appeared as a speaker). Used by
+// directInsertBeat path and as a safety net in directScene. No portrait
+// is generated for these — they get a name + voice only.
+export async function provisionVoiceForName(
+  config: EngineConfig,
+  session: Session,
+  charName: string,
+): Promise<Character> {
+  const voiceDescription = `请根据角色名「${charName}」推断其性别、年龄与气质，生成最贴合的音色。所属世界观：${session.worldSetting}`;
+  const voice = await provisionCharacterVoice(config, voiceDescription, charName);
+  return { name: charName, voiceDescription, voice };
+}
@@ -0,0 +1,86 @@
+import { chat } from "@infiplot/ai-client";
+import type { BeatActiveCharacter, ProviderConfig } from "@infiplot/types";
+import { parseJsonLoose } from "../jsonParser";
+import {
+  CINEMATOGRAPHER_SYSTEM,
+  buildCinematographerUserMessage,
+} from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  Cinematographer agent — translates the Writer's narrative scene
+//  summary into an English compositional prompt for FLUX.
+//
+//  Reads: sceneSummary + entry beat's activeCharacters (poses)
+//         + prior sceneKey (for continuity hints)
+//  Writes: { shotType, integratedPrompt }
+//
+//  Does NOT describe character APPEARANCE — that's appended at the
+//  Painter stage from session.characters[].visualDescription. The
+//  Cinematographer only positions named characters in the frame and
+//  describes the environment + lighting + camera framing.
+//
+//  This separation lets the Cinematographer run IN PARALLEL with the
+//  CharacterDesigner — neither needs the other's output. They both
+//  feed independently into the Painter prompt.
+// ──────────────────────────────────────────────────────────────────────
+
+export type CinematographerOutput = {
+  shotType: string;
+  integratedPrompt: string;
+};
+
+type RawCinematographerOutput = {
+  shotType?: string;
+  integratedPrompt?: string;
+};
+
+export type CinematographerInput = {
+  sceneSummary: string;
+  styleGuide: string;
+  entryBeatActive: BeatActiveCharacter[];
+  /** Entry beat's speaker — drives the dynamic camera policy:
+   *    NPC name → NPC looks toward camera (close-up)
+   *    "你"     → medium shot, NPC listens
+   *    undefined → wide establishing shot */
+  entryBeatSpeaker?: string;
+  priorSceneKey?: string;
+  currentSceneKey?: string;
+};
+
+export async function runCinematographer(
+  config: ProviderConfig,
+  input: CinematographerInput,
+): Promise<CinematographerOutput> {
+  const raw = await chat(
+    config,
+    [
+      { role: "system", content: CINEMATOGRAPHER_SYSTEM },
+      {
+        role: "user",
+        content: buildCinematographerUserMessage(
+          input.sceneSummary,
+          input.styleGuide,
+          input.entryBeatActive,
+          input.entryBeatSpeaker,
+          input.priorSceneKey,
+          input.currentSceneKey,
+        ),
+      },
+    ],
+    { temperature: 0.6, responseFormat: "json_object" },
+  );
+
+  const parsed = parseJsonLoose<RawCinematographerOutput>(raw);
+
+  // Fallback: if the LLM produced nothing usable, synthesize a minimal
+  // integratedPrompt from the Writer's sceneSummary so the Painter has
+  // SOMETHING to work with rather than blowing up the whole pipeline.
+  const integratedPrompt =
+    parsed.integratedPrompt?.trim() ||
+    `A cinematic illustration depicting: ${input.sceneSummary}. Wide establishing shot, natural lighting, atmospheric mood.`;
+
+  return {
+    shotType: parsed.shotType?.trim() || "medium shot",
+    integratedPrompt,
+  };
+}
@@ -0,0 +1,163 @@
+import { generateImage } from "@infiplot/ai-client";
+import type { GenerateImageOptions, GenerateImageResult } from "@infiplot/ai-client";
+import type {
+  Beat,
+  Character,
+  EngineConfig,
+  ProviderConfig,
+} from "@infiplot/types";
+import { mockImageDataUri } from "../mockImage";
+import { buildPainterPrompt } from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  Painter — final image generation with multi-reference anchoring.
+//
+//  FLUX.2 [klein] 9B KV does NOT support seedImage (img2img). Instead,
+//  visual continuity comes entirely from `referenceImages` (capped at 4),
+//  which the KV-optimized variant accelerates ~2.5× via key-value caching
+//  of reference latents.
+//
+//  References are slotted in priority order (max 4):
+//    1. Prior scene image — when sceneKey matched a previous scene, this
+//       anchors the same physical space (lighting/layout/style continuity)
+//    2. Entry beat's speaker portrait — the NPC the player is talking with
+//       (most visually prominent)
+//    3. Other on-stage NPCs' portraits — secondary characters in the frame
+//
+//  References are sent as UUIDs (preferred — cheapest in transport) or URLs
+//  (fallback — still cheaper than base64). Base64 fallback was removed when
+//  generateImage switched to outputType=URL, which always returns both a UUID
+//  and a URL so we never lack a cheap reference handle.
+//
+//  Failure handling — two-tier degradation:
+//    A. referenceImages call           (preferred — full visual anchoring)
+//    B. pure text-to-image fallback    (last resort if Runware refs API errors)
+// ──────────────────────────────────────────────────────────────────────
+
+const MAX_REFERENCE_IMAGES = 4;
+
+export type PainterInput = {
+  integratedPrompt: string;
+  styleGuide: string;
+  onStageCharacters: Character[];
+  /**
+   * Prior scene's Runware UUID or URL. When set (= sceneKey hit a prior
+   * scene), it slots into referenceImages[0] for spatial continuity.
+   * Capacity-wise this displaces ONE character portrait — slot is shared
+   * with character refs, capped at 4 total per Runware spec.
+   */
+  priorSceneImage?: string;
+};
+
+// Pick the references we send to Runware as `referenceImages`. Priority:
+//   slot 0: priorSceneImage (if any — sceneKey continuity)
+//   slot 1: entry beat's speaker portrait (the NPC speaking to the player)
+//   slot 2+: other on-stage NPCs from entry beat's activeCharacters
+// Caps at 4 total. Returns the array exactly as it'll be sent — already
+// truncated, already deduplicated.
+export function collectReferenceImages(
+  characters: Character[],
+  entryBeat: Beat | undefined,
+  priorSceneImage: string | undefined,
+): string[] {
+  const refs: string[] = [];
+  const seen = new Set<string>();
+
+  // Slot 0 — prior scene image for spatial continuity. Goes first because
+  // backdrop drift is the most jarring discontinuity across same-sceneKey
+  // scenes; character drift is partially masked by character archetype text
+  // in the prompt anyway.
+  if (priorSceneImage) {
+    refs.push(priorSceneImage);
+  }
+
+  // Slot 1+ — character portraits, speaker-first.
+  //
+  // Prefer URL over UUID: Runware's `imageInference` returns a UUID, but that
+  // UUID isn't always recognized by the `referenceImages` pipeline (the error
+  // surfaces as `failedToTransferImage`). The URL is Runware's own CDN link —
+  // they can always fetch it from their own infra. UUID is kept as a backstop
+  // for any edge case where URL is missing (e.g., legacy session state).
+  const speakerName = entryBeat?.speaker;
+  if (speakerName) {
+    const speaker = characters.find((c) => c.name === speakerName);
+    const ref = speaker?.basePortraitUrl ?? speaker?.basePortraitUuid;
+    if (ref && refs.length < MAX_REFERENCE_IMAGES) {
+      refs.push(ref);
+      seen.add(speakerName);
+    }
+  }
+
+  for (const c of entryBeat?.activeCharacters ?? []) {
+    if (refs.length >= MAX_REFERENCE_IMAGES) break;
+    if (seen.has(c.name)) continue;
+    const char = characters.find((x) => x.name === c.name);
+    const ref = char?.basePortraitUrl ?? char?.basePortraitUuid;
+    if (ref) {
+      refs.push(ref);
+      seen.add(c.name);
+    }
+  }
+
+  return refs.slice(0, MAX_REFERENCE_IMAGES);
+}
+
+async function tryGenerate(
+  config: ProviderConfig,
+  prompt: string,
+  options: GenerateImageOptions,
+  label: string,
+): Promise<GenerateImageResult | null> {
+  try {
+    return await generateImage(config, prompt, options);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.warn(`[painter] ${label} failed: ${msg}`);
+    return null;
+  }
+}
+
+export type PainterResult =
+  | { kind: "real"; imageUrl: string; imageUuid: string }
+  | { kind: "mock"; imageUrl: string };
+
+export async function runPainter(
+  config: EngineConfig,
+  input: PainterInput,
+  entryBeat: Beat | undefined,
+): Promise<PainterResult> {
+  if (config.mockImage) {
+    return { kind: "mock", imageUrl: await mockImageDataUri() };
+  }
+
+  const prompt = buildPainterPrompt(
+    input.integratedPrompt,
+    input.styleGuide,
+    input.onStageCharacters,
+  );
+
+  const refs = collectReferenceImages(
+    input.onStageCharacters,
+    entryBeat,
+    input.priorSceneImage,
+  );
+
+  // Tier A — with referenceImages (priorSceneImage + character portraits).
+  // FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path
+  // ~2.5× compared to the non-KV variant.
+  if (refs.length > 0) {
+    const r = await tryGenerate(
+      config.image,
+      prompt,
+      { referenceImages: refs },
+      `referenceImages (${refs.length})`,
+    );
+    if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
+  }
+
+  // Tier B — pure text-to-image. Last resort, used when Tier A failed OR
+  // there are no references to send (first scene with no characters yet).
+  // Errors here propagate to the caller.
+  const r = await generateImage(config.image, prompt);
+  return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
+}
@@ -0,0 +1,425 @@
+import { chat } from "@infiplot/ai-client";
+import type {
+  Beat,
+  BeatActiveCharacter,
+  BeatChoice,
+  BeatChoiceEffect,
+  BeatNext,
+  ProviderConfig,
+  Session,
+  StoryStatePatch,
+} from "@infiplot/types";
+import { parseJsonLoose } from "../jsonParser";
+import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  Writer agent — owns the narrative half of scene generation.
+//
+//  Output: { sceneSummary, sceneKey, entryBeatId, beats[] }
+//  Each beat carries activeCharacters[] (names + poses) the
+//  Cinematographer reads when composing the establishing shot.
+//
+//  Character DESIGN (visual + voice) is NOT this agent's job —
+//  it only names characters; the CharacterDesigner picks up any
+//  unknown name from beats[].activeCharacters.
+// ──────────────────────────────────────────────────────────────────────
+
+export type WriterOutput = {
+  sceneSummary: string;
+  sceneKey?: string;
+  entryBeatId: string;
+  beats: Beat[];
+  /** Rewritten volatile story memory — merged onto the carried StoryState by
+   *  the director. Absent when the model omitted it (rare; bible just stales). */
+  storyStatePatch?: StoryStatePatch;
+};
+
+// Raw shapes — what the LLM produces before validation / coercion.
+type RawActiveCharacter = {
+  name?: string;
+  pose?: string;
+};
+type RawEffect = {
+  kind?: string;
+  targetBeatId?: string;
+  nextSceneSeed?: string;
+};
+type RawChoice = {
+  id?: string;
+  label?: string;
+  effect?: RawEffect;
+};
+type RawNext = {
+  type?: string;
+  nextBeatId?: string;
+  choices?: RawChoice[];
+};
+type RawBeat = {
+  id?: string;
+  narration?: string;
+  speaker?: string;
+  line?: string;
+  lineDelivery?: string;
+  activeCharacters?: RawActiveCharacter[];
+  next?: RawNext;
+};
+type RawStoryStatePatch = {
+  synopsis?: unknown;
+  openThreads?: unknown;
+  relationships?: unknown;
+  nextHook?: unknown;
+};
+type RawScene = {
+  sceneSummary?: string;
+  sceneKey?: string;
+  entryBeatId?: string;
+  beats?: RawBeat[];
+  storyStatePatch?: RawStoryStatePatch;
+};
+
+// ──────────────────────────────────────────────────────────────────────
+//  POV (player viewpoint) handling — Pattern B (galgame standard):
+//    - speaker = "你"      → ALLOWED (renders as dialog box, never TTS'd)
+//    - any other POV term  → normalized to "你" (LLM slip-up safety net)
+//    - activeCharacters    → POV is NEVER allowed (player has no body in-scene)
+//    - CharacterDesigner   → never invoked for "你" or POV variants
+// ──────────────────────────────────────────────────────────────────────
+
+const POV_DISPLAY_NAME = "你";
+const POV_VARIANTS = new Set([
+  "玩家",
+  "我",
+  "主角",
+  "protagonist",
+  "Protagonist",
+  "player",
+  "Player",
+  "PLAYER",
+  "MC",
+  "mc",
+  "Mc",
+  "I",
+  "i",
+  "me",
+  "Me",
+  "ME",
+]);
+
+function isPovName(name: string): boolean {
+  return name === POV_DISPLAY_NAME || POV_VARIANTS.has(name);
+}
+
+// Normalize a speaker name: any POV variant collapses to "你"; an NPC name
+// passes through unchanged. Caller passes already-trimmed input.
+function normalizeSpeakerName(name: string): string {
+  return POV_VARIANTS.has(name) ? POV_DISPLAY_NAME : name;
+}
+
+function coerceEffect(raw: RawEffect | undefined): BeatChoiceEffect {
+  if (raw?.kind === "advance-beat" && raw.targetBeatId?.trim()) {
+    return { kind: "advance-beat", targetBeatId: raw.targetBeatId.trim() };
+  }
+  return {
+    kind: "change-scene",
+    nextSceneSeed: raw?.nextSceneSeed?.trim() || "未指定",
+  };
+}
+
+function coerceChoice(raw: RawChoice, idx: number): BeatChoice {
+  return {
+    id: raw.id?.trim() || `c${idx + 1}`,
+    label: raw.label?.trim() || `选项 ${idx + 1}`,
+    effect: coerceEffect(raw.effect),
+  };
+}
+
+function coerceNext(raw: RawNext | undefined, fallbackBeatId: string): BeatNext {
+  if (raw?.type === "choice" && Array.isArray(raw.choices) && raw.choices.length) {
+    return {
+      type: "choice",
+      choices: raw.choices.map((c, i) => coerceChoice(c, i)),
+    };
+  }
+  return {
+    type: "continue",
+    nextBeatId: raw?.nextBeatId?.trim() || fallbackBeatId,
+  };
+}
+
+function coerceActiveCharacters(
+  raw: RawActiveCharacter[] | undefined,
+): BeatActiveCharacter[] | undefined {
+  if (!Array.isArray(raw)) return undefined;
+  const out = raw
+    .map((c): BeatActiveCharacter | null => {
+      const name = c.name?.trim();
+      if (!name) return null;
+      // POV is never IN the picture — strip the LLM's slip-up silently so
+      // CharacterDesigner doesn't end up generating a portrait for the player.
+      if (isPovName(name)) return null;
+      const pose = c.pose?.trim();
+      return pose ? { name, pose } : { name };
+    })
+    .filter((c): c is BeatActiveCharacter => Boolean(c));
+  return out.length > 0 ? out : undefined;
+}
+
+function coerceBeat(raw: RawBeat, idx: number, totalBeats: number): Beat {
+  const id = raw.id?.trim() || `b${idx + 1}`;
+  // Non-last beats default their `continue` target to the following beat.
+  // The last beat gets an empty fallback on purpose: repairBeats() turns a
+  // last/dangling continue into a real scene-change exit so the player can
+  // never get stuck self-looping on it.
+  const fallback = idx + 1 < totalBeats ? `b${idx + 2}` : "";
+
+  const rawSpeaker = raw.speaker?.trim() || undefined;
+  // Normalize any POV variant (玩家/我/主角/protagonist/...) to "你".
+  // NPC names pass through unchanged. This means the LLM can slip and
+  // write "玩家" or "I" and we still render the dialog box correctly with
+  // speaker="你" — and TTS is automatically skipped because no Character
+  // record exists for "你".
+  const speaker = rawSpeaker ? normalizeSpeakerName(rawSpeaker) : undefined;
+
+  const line = raw.line?.trim() || undefined;
+  return {
+    id,
+    narration: raw.narration?.trim() || undefined,
+    speaker,
+    line,
+    // lineDelivery is meaningful only for NPC speakers (TTS). For POV
+    // speaker ("你") TTS is skipped, so lineDelivery would never be used.
+    lineDelivery:
+      line && speaker !== POV_DISPLAY_NAME
+        ? raw.lineDelivery?.trim() || undefined
+        : undefined,
+    activeCharacters: coerceActiveCharacters(raw.activeCharacters),
+    next: coerceNext(raw.next, fallback),
+  };
+}
+
+const FALLBACK_SEED = "故事继续推进";
+
+function fallbackExitChoice(beatId: string): BeatChoice {
+  return {
+    id: `${beatId}__exit`,
+    label: "继续",
+    effect: { kind: "change-scene", nextSceneSeed: FALLBACK_SEED },
+  };
+}
+
+// Beat ids are graph keys (the front-end's `beats.find(b => b.id === ...)`,
+// the session's `visitedBeatIds`, and `continue`/`advance-beat` targets). If
+// the model reuses an id across beats, the second occurrence becomes silently
+// unreachable and external references collapse to the first beat. Rename
+// duplicates; rewrite the renamed beat's OWN self-references. External
+// references stay pointing at the first occurrence.
+function ensureUniqueBeatIds(beats: Beat[]): Beat[] {
+  const seen = new Set<string>();
+  return beats.map((b): Beat => {
+    if (!seen.has(b.id)) {
+      seen.add(b.id);
+      return b;
+    }
+    const oldId = b.id;
+    let n = 2;
+    while (seen.has(`${oldId}_${n}`)) n += 1;
+    const newId = `${oldId}_${n}`;
+    seen.add(newId);
+
+    let next = b.next;
+    if (next.type === "continue" && next.nextBeatId === oldId) {
+      next = { type: "continue", nextBeatId: newId };
+    } else if (next.type === "choice") {
+      next = {
+        type: "choice",
+        choices: next.choices.map((c) =>
+          c.effect.kind === "advance-beat" && c.effect.targetBeatId === oldId
+            ? {
+                ...c,
+                effect: { kind: "advance-beat" as const, targetBeatId: newId },
+              }
+            : c,
+        ),
+      };
+    }
+    return { ...b, id: newId, next };
+  });
+}
+
+// Repairs referential integrity AND guarantees the scene is escapable:
+// - a `continue` to a missing/self id is repointed to the next beat in order;
+//   a last/dangling continue with nowhere to go becomes a scene-change exit
+// - an `advance-beat` to a missing id is downgraded to a scene change
+// - if no change-scene exit exists anywhere, one is appended to the last beat
+function repairBeats(beats: Beat[]): Beat[] {
+  const ids = new Set(beats.map((b) => b.id));
+
+  const fixed: Beat[] = beats.map((b, idx): Beat => {
+    if (b.next.type === "continue") {
+      const target = b.next.nextBeatId;
+      if (ids.has(target) && target !== b.id) return b;
+      const nextByIndex = beats[idx + 1]?.id;
+      if (nextByIndex) {
+        return { ...b, next: { type: "continue", nextBeatId: nextByIndex } };
+      }
+      return { ...b, next: { type: "choice", choices: [fallbackExitChoice(b.id)] } };
+    }
+
+    const patched = b.next.choices.map((c) =>
+      c.effect.kind === "advance-beat" && !ids.has(c.effect.targetBeatId)
+        ? {
+            ...c,
+            effect: {
+              kind: "change-scene" as const,
+              nextSceneSeed: "未指定（导演引用不存在的 beat，已降级为换场）",
+            },
+          }
+        : c,
+    );
+    return { ...b, next: { type: "choice", choices: patched } };
+  });
+
+  const hasExit = fixed.some(
+    (b) =>
+      b.next.type === "choice" &&
+      b.next.choices.some((c) => c.effect.kind === "change-scene"),
+  );
+  if (!hasExit && fixed.length > 0) {
+    const lastIdx = fixed.length - 1;
+    const last = fixed[lastIdx]!;
+    const existing = last.next.type === "choice" ? last.next.choices : [];
+    fixed[lastIdx] = {
+      ...last,
+      next: { type: "choice", choices: [...existing, fallbackExitChoice(last.id)] },
+    };
+  }
+
+  return fixed;
+}
+
+// Choice ids are keys the front-end uses to cache + consume prefetched
+// scenes. Two beats both defaulting to c1/c2 would make a transition reuse
+// the WRONG prefetched scene — so force every choice id to be unique within
+// the scene.
+function ensureUniqueChoiceIds(beats: Beat[]): Beat[] {
+  const seen = new Set<string>();
+  for (const b of beats) {
+    if (b.next.type !== "choice") continue;
+    for (const c of b.next.choices) {
+      if (seen.has(c.id)) {
+        let n = 2;
+        while (seen.has(`${c.id}_${n}`)) n += 1;
+        c.id = `${c.id}_${n}`;
+      }
+      seen.add(c.id);
+    }
+  }
+  return beats;
+}
+
+// Normalize sceneKey to a safe lowercase-with-dashes English slug. If the
+// model returns something weird (中文 / spaces / mixed case), best-effort
+// fix; if it ends up empty, return undefined (the scene just won't be
+// considered for img2img reuse).
+function normalizeSceneKey(raw: string | undefined): string | undefined {
+  if (!raw) return undefined;
+  const slug = raw
+    .trim()
+    .toLowerCase()
+    .replace(/[^a-z0-9-]+/g, "-")
+    .replace(/-+/g, "-")
+    .replace(/^-|-$/g, "");
+  return slug.length > 0 ? slug : undefined;
+}
+
+function coerceStringArray(raw: unknown): string[] | undefined {
+  if (!Array.isArray(raw)) return undefined;
+  const out = raw
+    .map((x) => (typeof x === "string" ? x.trim() : ""))
+    .filter((x) => x.length > 0);
+  return out.length > 0 ? out : undefined;
+}
+
+// Pull the volatile story-memory rewrite out of the Writer's JSON. Only
+// non-empty fields are kept; an all-empty/absent patch returns undefined so
+// the director leaves the carried StoryState untouched.
+function coerceStoryStatePatch(
+  raw: RawStoryStatePatch | undefined,
+): StoryStatePatch | undefined {
+  if (!raw || typeof raw !== "object") return undefined;
+  const patch: StoryStatePatch = {};
+  const synopsis = typeof raw.synopsis === "string" ? raw.synopsis.trim() : "";
+  if (synopsis) patch.synopsis = synopsis;
+  const openThreads = coerceStringArray(raw.openThreads);
+  if (openThreads) patch.openThreads = openThreads;
+  const relationships = coerceStringArray(raw.relationships);
+  if (relationships) patch.relationships = relationships;
+  const nextHook = typeof raw.nextHook === "string" ? raw.nextHook.trim() : "";
+  if (nextHook) patch.nextHook = nextHook;
+  return Object.keys(patch).length > 0 ? patch : undefined;
+}
+
+export async function runWriter(
+  config: ProviderConfig,
+  session: Session,
+): Promise<WriterOutput> {
+  const raw = await chat(
+    config,
+    [
+      { role: "system", content: WRITER_SYSTEM },
+      { role: "user", content: buildWriterUserMessage(session) },
+    ],
+    { temperature: 0.9, responseFormat: "json_object" },
+  );
+
+  const parsed = parseJsonLoose<RawScene>(raw);
+  const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
+  if (rawBeats.length === 0) {
+    throw new Error("Writer returned no beats");
+  }
+
+  const beats = ensureUniqueChoiceIds(
+    repairBeats(
+      ensureUniqueBeatIds(
+        rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
+      ),
+    ),
+  );
+
+  const declaredEntry = parsed.entryBeatId?.trim();
+  const entryBeatId =
+    declaredEntry && beats.some((b) => b.id === declaredEntry)
+      ? declaredEntry
+      : beats[0]!.id;
+
+  return {
+    sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
+    sceneKey: normalizeSceneKey(parsed.sceneKey),
+    entryBeatId,
+    beats,
+    storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
+  };
+}
+
+// Surface the set of character names introduced by this scene's beats,
+// so the orchestrator can decide which ones need the CharacterDesigner to
+// fire. Pulls names from both `speaker` fields AND `activeCharacters`
+// (a character can be on-screen without speaking).
+//
+// Excludes POV ("你" / 玩家 / 主角 / ...) entirely — the player is never
+// designed (no portrait, no voice, no archetype).
+export function collectActiveCharacterNames(beats: Beat[]): string[] {
+  const seen = new Set<string>();
+  for (const b of beats) {
+    if (b.speaker && !isPovName(b.speaker)) seen.add(b.speaker);
+    if (b.activeCharacters) {
+      for (const c of b.activeCharacters) {
+        if (!isPovName(c.name)) seen.add(c.name);
+      }
+    }
+  }
+  return Array.from(seen);
+}
+
+// Re-export POV constants for downstream filters (director's orphanSpeakers).
+export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };