refactor: flatten monorepo to single web package (#12)

Flatten the pnpm monorepo (apps/web + packages/*) into a single web package at the repo root.

- Move app/lib/components/scripts/public to root; drop apps/web and packages/* wrappers
- Rewrite tsconfig paths (@infiplot/*) to ./lib/*; turbopack.root = __dirname
- Update Vercel (no root-directory) and Cloudflare (pnpm build:cf at root) deploy paths
- Regenerate pnpm-lock.yaml to drop stale workspace importers
- Bump engines.node to >=22 to match wrangler

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Zonghao Yuan
2026-06-03 00:55:45 +08:00
committed by GitHub
parent 9543c3dba1
commit dc5ecd60f6
221 changed files with 241 additions and 379 deletions
+90
View File
@@ -0,0 +1,90 @@
import { chat } from "@infiplot/ai-client";
import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Architect agent — ONE LLM call at session start.
//
// Expands the user's (often terse) world + style prompt into a real story
// bible: a second-person protagonist with a want and a flaw, a single
// central dramatic question (logline), a genre frame that anchors the
// 爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
// intentional cast. Seeds the StoryState that the Writer reads and updates
// every scene — so the story has a spine from beat one instead of being
// improvised cold.
//
// Everything is best-effort coerced with fallbacks: a malformed LLM
// response can never abort session start — worst case the Writer just gets
// a thinner bible and improvises more.
// ──────────────────────────────────────────────────────────────────────
type RawStoryState = {
logline?: unknown;
genreTags?: unknown;
protagonist?: unknown;
castNotes?: unknown;
synopsis?: unknown;
openThreads?: unknown;
relationships?: unknown;
nextHook?: unknown;
};
function str(raw: unknown): string {
return typeof raw === "string" ? raw.trim() : "";
}
function strArray(raw: unknown): string[] | undefined {
if (!Array.isArray(raw)) return undefined;
const out = raw
.map((x) => (typeof x === "string" ? x.trim() : ""))
.filter((x) => x.length > 0);
return out.length > 0 ? out : undefined;
}
export async function runArchitect(
config: ProviderConfig,
session: Session,
): Promise<StoryState> {
try {
const raw = await chat(
config,
[
{ role: "system", content: ARCHITECT_SYSTEM },
{ role: "user", content: buildArchitectUserMessage(session) },
],
{ temperature: 0.85, responseFormat: "json_object" },
);
const parsed = parseJsonLoose<RawStoryState>(raw);
return {
// Stable spine — fall back to the raw world/style prompt so the bible is
// never wholly empty even if the model returns garbage.
logline: str(parsed.logline) || session.worldSetting,
genreTags: str(parsed.genreTags),
protagonist:
str(parsed.protagonist) ||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
castNotes: str(parsed.castNotes) || undefined,
// Volatile seeds — the opening Writer will rewrite these via its patch.
synopsis: str(parsed.synopsis) || "故事即将开始。",
openThreads: strArray(parsed.openThreads),
relationships: strArray(parsed.relationships),
nextHook: str(parsed.nextHook) || undefined,
};
} catch (err) {
// chat() or parseJsonLoose() can throw (network / unrepairable JSON).
// The Architect is best-effort: never let it abort session start — return
// a minimal bible seeded from the raw prompt and let the Writer improvise.
const msg = err instanceof Error ? err.message : String(err);
console.error(`[architect] failed, using minimal bible: ${msg}`);
return {
logline: session.worldSetting,
genreTags: "",
protagonist:
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
synopsis: "故事即将开始。",
};
}
}
+155
View File
@@ -0,0 +1,155 @@
import { chat, generateImage } from "@infiplot/ai-client";
import { provisionVoice } from "@infiplot/tts-client";
import type {
Character,
CharacterVoice,
EngineConfig,
Session,
} from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import { mockImageDataUri } from "../mockImage";
import {
CHARACTER_DESIGNER_SYSTEM,
buildCharacterDesignerUserMessage,
buildCharacterPortraitPrompt,
} from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// CharacterDesigner agent — designs ONE new character.
//
// Exposed as three GRANULAR stages so the director can schedule the slow
// parts around the Painter (a voice is never needed to paint a scene, and
// only entry-beat characters' portraits are referenced by the Painter):
//
// 1. designCharacterCard — ONE LLM call → visual + voice TEXT cards
// (intentional bundling: the same agent thinks about who this character
// IS, keeping appearance and vocal personality coherent)
// 2. renderCharacterPortrait — base portrait image (Runware URL + UUID)
// 3. provisionCharacterVoice — Xiaomi MiMo voicedesign → reference audio
//
// Each step degrades gracefully — if image gen fails the character just has
// no portrait; if voice gen fails it has no voice. The game keeps running.
// ──────────────────────────────────────────────────────────────────────
type CharacterDesignOutput = {
visualDescription?: string;
voiceDescription?: string;
};
// TEMP: per-phase timing for latency diagnosis. Same convention as the
// orchestrator's tlog. Remove after we have data on real-world numbers.
function tlog(label: string, t0: number): void {
console.log(`${label}: ${Date.now() - t0}ms`);
}
async function runDesignLLM(
config: EngineConfig,
session: Session,
charName: string,
): Promise<CharacterDesignOutput> {
const raw = await chat(
config.text,
[
{ role: "system", content: CHARACTER_DESIGNER_SYSTEM },
{
role: "user",
content: buildCharacterDesignerUserMessage(charName, session),
},
],
{ temperature: 0.7, responseFormat: "json_object" },
);
return parseJsonLoose<CharacterDesignOutput>(raw);
}
// Generate the per-character base portrait. The portrait is a "concept
// sheet" — single character, neutral pose, plain background — so it works
// well as a Runware referenceImages anchor for later scenes.
//
// Returns the URL (for any client display + URL-form references) and the
// UUID (cheapest reference form for subsequent Painter calls). Both come
// back in one `imageInference` response now that we use outputType=URL —
// no separate upload step needed.
//
// In mock mode we return the data URI as basePortraitUrl with no UUID
// (Painter is short-circuited anyway, so the lack of a UUID is moot).
export async function renderCharacterPortrait(
config: EngineConfig,
charName: string,
visualDescription: string,
styleGuide: string,
): Promise<{ basePortraitUrl?: string; basePortraitUuid?: string }> {
try {
if (config.mockImage) {
return { basePortraitUrl: await mockImageDataUri() };
}
const prompt = buildCharacterPortraitPrompt(
charName,
visualDescription,
styleGuide,
);
const { imageUrl, imageUuid } = await generateImage(config.image, prompt);
return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid };
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[characterDesigner] portrait gen failed for ${charName}: ${msg}`);
return {}; // no portrait at all — degrade gracefully
}
}
export async function provisionCharacterVoice(
config: EngineConfig,
voiceDescription: string,
charName: string,
): Promise<CharacterVoice | undefined> {
if (!config.tts) return undefined;
try {
return await provisionVoice(config.tts, voiceDescription);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
return undefined;
}
}
// The cheap first stage: design the visual + voice TEXT cards in one LLM
// call. The director then schedules renderCharacterPortrait /
// provisionCharacterVoice around the Painter. Multiple new characters in the
// same scene run this stage in parallel at the director level.
export type CharacterCard = {
name: string;
visualDescription?: string;
voiceDescription: string;
};
export async function designCharacterCard(
config: EngineConfig,
session: Session,
charName: string,
): Promise<CharacterCard> {
const tDesign = Date.now();
const design = await runDesignLLM(config, session, charName);
tlog(`[charDesigner ${charName}] design LLM`, tDesign);
return {
name: charName,
visualDescription: design.visualDescription?.trim() || undefined,
voiceDescription:
design.voiceDescription?.trim() ||
`请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`,
};
}
// Provision voice ONLY for an existing character that the LLM mentioned
// without us having designed them yet (e.g., 编剧 referenced a name that
// wasn't in `activeCharacters` but appeared as a speaker). Used by
// directInsertBeat path and as a safety net in directScene. No portrait
// is generated for these — they get a name + voice only.
export async function provisionVoiceForName(
config: EngineConfig,
session: Session,
charName: string,
): Promise<Character> {
const voiceDescription = `请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`;
const voice = await provisionCharacterVoice(config, voiceDescription, charName);
return { name: charName, voiceDescription, voice };
}
+86
View File
@@ -0,0 +1,86 @@
import { chat } from "@infiplot/ai-client";
import type { BeatActiveCharacter, ProviderConfig } from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import {
CINEMATOGRAPHER_SYSTEM,
buildCinematographerUserMessage,
} from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Cinematographer agent — translates the Writer's narrative scene
// summary into an English compositional prompt for FLUX.
//
// Reads: sceneSummary + entry beat's activeCharacters (poses)
// + prior sceneKey (for continuity hints)
// Writes: { shotType, integratedPrompt }
//
// Does NOT describe character APPEARANCE — that's appended at the
// Painter stage from session.characters[].visualDescription. The
// Cinematographer only positions named characters in the frame and
// describes the environment + lighting + camera framing.
//
// This separation lets the Cinematographer run IN PARALLEL with the
// CharacterDesigner — neither needs the other's output. They both
// feed independently into the Painter prompt.
// ──────────────────────────────────────────────────────────────────────
export type CinematographerOutput = {
shotType: string;
integratedPrompt: string;
};
type RawCinematographerOutput = {
shotType?: string;
integratedPrompt?: string;
};
export type CinematographerInput = {
sceneSummary: string;
styleGuide: string;
entryBeatActive: BeatActiveCharacter[];
/** Entry beat's speaker — drives the dynamic camera policy:
* NPC name → NPC looks toward camera (close-up)
* "你" → medium shot, NPC listens
* undefined → wide establishing shot */
entryBeatSpeaker?: string;
priorSceneKey?: string;
currentSceneKey?: string;
};
export async function runCinematographer(
config: ProviderConfig,
input: CinematographerInput,
): Promise<CinematographerOutput> {
const raw = await chat(
config,
[
{ role: "system", content: CINEMATOGRAPHER_SYSTEM },
{
role: "user",
content: buildCinematographerUserMessage(
input.sceneSummary,
input.styleGuide,
input.entryBeatActive,
input.entryBeatSpeaker,
input.priorSceneKey,
input.currentSceneKey,
),
},
],
{ temperature: 0.6, responseFormat: "json_object" },
);
const parsed = parseJsonLoose<RawCinematographerOutput>(raw);
// Fallback: if the LLM produced nothing usable, synthesize a minimal
// integratedPrompt from the Writer's sceneSummary so the Painter has
// SOMETHING to work with rather than blowing up the whole pipeline.
const integratedPrompt =
parsed.integratedPrompt?.trim() ||
`A cinematic illustration depicting: ${input.sceneSummary}. Wide establishing shot, natural lighting, atmospheric mood.`;
return {
shotType: parsed.shotType?.trim() || "medium shot",
integratedPrompt,
};
}
+163
View File
@@ -0,0 +1,163 @@
import { generateImage } from "@infiplot/ai-client";
import type { GenerateImageOptions, GenerateImageResult } from "@infiplot/ai-client";
import type {
Beat,
Character,
EngineConfig,
ProviderConfig,
} from "@infiplot/types";
import { mockImageDataUri } from "../mockImage";
import { buildPainterPrompt } from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Painter — final image generation with multi-reference anchoring.
//
// FLUX.2 [klein] 9B KV does NOT support seedImage (img2img). Instead,
// visual continuity comes entirely from `referenceImages` (capped at 4),
// which the KV-optimized variant accelerates ~2.5× via key-value caching
// of reference latents.
//
// References are slotted in priority order (max 4):
// 1. Prior scene image — when sceneKey matched a previous scene, this
// anchors the same physical space (lighting/layout/style continuity)
// 2. Entry beat's speaker portrait — the NPC the player is talking with
// (most visually prominent)
// 3. Other on-stage NPCs' portraits — secondary characters in the frame
//
// References are sent as UUIDs (preferred — cheapest in transport) or URLs
// (fallback — still cheaper than base64). Base64 fallback was removed when
// generateImage switched to outputType=URL, which always returns both a UUID
// and a URL so we never lack a cheap reference handle.
//
// Failure handling — two-tier degradation:
// A. referenceImages call (preferred — full visual anchoring)
// B. pure text-to-image fallback (last resort if Runware refs API errors)
// ──────────────────────────────────────────────────────────────────────
const MAX_REFERENCE_IMAGES = 4;
export type PainterInput = {
integratedPrompt: string;
styleGuide: string;
onStageCharacters: Character[];
/**
* Prior scene's Runware UUID or URL. When set (= sceneKey hit a prior
* scene), it slots into referenceImages[0] for spatial continuity.
* Capacity-wise this displaces ONE character portrait — slot is shared
* with character refs, capped at 4 total per Runware spec.
*/
priorSceneImage?: string;
};
// Pick the references we send to Runware as `referenceImages`. Priority:
// slot 0: priorSceneImage (if any — sceneKey continuity)
// slot 1: entry beat's speaker portrait (the NPC speaking to the player)
// slot 2+: other on-stage NPCs from entry beat's activeCharacters
// Caps at 4 total. Returns the array exactly as it'll be sent — already
// truncated, already deduplicated.
export function collectReferenceImages(
characters: Character[],
entryBeat: Beat | undefined,
priorSceneImage: string | undefined,
): string[] {
const refs: string[] = [];
const seen = new Set<string>();
// Slot 0 — prior scene image for spatial continuity. Goes first because
// backdrop drift is the most jarring discontinuity across same-sceneKey
// scenes; character drift is partially masked by character archetype text
// in the prompt anyway.
if (priorSceneImage) {
refs.push(priorSceneImage);
}
// Slot 1+ — character portraits, speaker-first.
//
// Prefer URL over UUID: Runware's `imageInference` returns a UUID, but that
// UUID isn't always recognized by the `referenceImages` pipeline (the error
// surfaces as `failedToTransferImage`). The URL is Runware's own CDN link —
// they can always fetch it from their own infra. UUID is kept as a backstop
// for any edge case where URL is missing (e.g., legacy session state).
const speakerName = entryBeat?.speaker;
if (speakerName) {
const speaker = characters.find((c) => c.name === speakerName);
const ref = speaker?.basePortraitUrl ?? speaker?.basePortraitUuid;
if (ref && refs.length < MAX_REFERENCE_IMAGES) {
refs.push(ref);
seen.add(speakerName);
}
}
for (const c of entryBeat?.activeCharacters ?? []) {
if (refs.length >= MAX_REFERENCE_IMAGES) break;
if (seen.has(c.name)) continue;
const char = characters.find((x) => x.name === c.name);
const ref = char?.basePortraitUrl ?? char?.basePortraitUuid;
if (ref) {
refs.push(ref);
seen.add(c.name);
}
}
return refs.slice(0, MAX_REFERENCE_IMAGES);
}
async function tryGenerate(
config: ProviderConfig,
prompt: string,
options: GenerateImageOptions,
label: string,
): Promise<GenerateImageResult | null> {
try {
return await generateImage(config, prompt, options);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.warn(`[painter] ${label} failed: ${msg}`);
return null;
}
}
export type PainterResult =
| { kind: "real"; imageUrl: string; imageUuid: string }
| { kind: "mock"; imageUrl: string };
export async function runPainter(
config: EngineConfig,
input: PainterInput,
entryBeat: Beat | undefined,
): Promise<PainterResult> {
if (config.mockImage) {
return { kind: "mock", imageUrl: await mockImageDataUri() };
}
const prompt = buildPainterPrompt(
input.integratedPrompt,
input.styleGuide,
input.onStageCharacters,
);
const refs = collectReferenceImages(
input.onStageCharacters,
entryBeat,
input.priorSceneImage,
);
// Tier A — with referenceImages (priorSceneImage + character portraits).
// FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path
// ~2.5× compared to the non-KV variant.
if (refs.length > 0) {
const r = await tryGenerate(
config.image,
prompt,
{ referenceImages: refs },
`referenceImages (${refs.length})`,
);
if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
}
// Tier B — pure text-to-image. Last resort, used when Tier A failed OR
// there are no references to send (first scene with no characters yet).
// Errors here propagate to the caller.
const r = await generateImage(config.image, prompt);
return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
}
+425
View File
@@ -0,0 +1,425 @@
import { chat } from "@infiplot/ai-client";
import type {
Beat,
BeatActiveCharacter,
BeatChoice,
BeatChoiceEffect,
BeatNext,
ProviderConfig,
Session,
StoryStatePatch,
} from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Writer agent — owns the narrative half of scene generation.
//
// Output: { sceneSummary, sceneKey, entryBeatId, beats[] }
// Each beat carries activeCharacters[] (names + poses) the
// Cinematographer reads when composing the establishing shot.
//
// Character DESIGN (visual + voice) is NOT this agent's job —
// it only names characters; the CharacterDesigner picks up any
// unknown name from beats[].activeCharacters.
// ──────────────────────────────────────────────────────────────────────
export type WriterOutput = {
sceneSummary: string;
sceneKey?: string;
entryBeatId: string;
beats: Beat[];
/** Rewritten volatile story memory — merged onto the carried StoryState by
* the director. Absent when the model omitted it (rare; bible just stales). */
storyStatePatch?: StoryStatePatch;
};
// Raw shapes — what the LLM produces before validation / coercion.
type RawActiveCharacter = {
name?: string;
pose?: string;
};
type RawEffect = {
kind?: string;
targetBeatId?: string;
nextSceneSeed?: string;
};
type RawChoice = {
id?: string;
label?: string;
effect?: RawEffect;
};
type RawNext = {
type?: string;
nextBeatId?: string;
choices?: RawChoice[];
};
type RawBeat = {
id?: string;
narration?: string;
speaker?: string;
line?: string;
lineDelivery?: string;
activeCharacters?: RawActiveCharacter[];
next?: RawNext;
};
type RawStoryStatePatch = {
synopsis?: unknown;
openThreads?: unknown;
relationships?: unknown;
nextHook?: unknown;
};
type RawScene = {
sceneSummary?: string;
sceneKey?: string;
entryBeatId?: string;
beats?: RawBeat[];
storyStatePatch?: RawStoryStatePatch;
};
// ──────────────────────────────────────────────────────────────────────
// POV (player viewpoint) handling — Pattern B (galgame standard):
// - speaker = "你" → ALLOWED (renders as dialog box, never TTS'd)
// - any other POV term → normalized to "你" (LLM slip-up safety net)
// - activeCharacters → POV is NEVER allowed (player has no body in-scene)
// - CharacterDesigner → never invoked for "你" or POV variants
// ──────────────────────────────────────────────────────────────────────
const POV_DISPLAY_NAME = "你";
const POV_VARIANTS = new Set([
"玩家",
"我",
"主角",
"protagonist",
"Protagonist",
"player",
"Player",
"PLAYER",
"MC",
"mc",
"Mc",
"I",
"i",
"me",
"Me",
"ME",
]);
function isPovName(name: string): boolean {
return name === POV_DISPLAY_NAME || POV_VARIANTS.has(name);
}
// Normalize a speaker name: any POV variant collapses to "你"; an NPC name
// passes through unchanged. Caller passes already-trimmed input.
function normalizeSpeakerName(name: string): string {
return POV_VARIANTS.has(name) ? POV_DISPLAY_NAME : name;
}
function coerceEffect(raw: RawEffect | undefined): BeatChoiceEffect {
if (raw?.kind === "advance-beat" && raw.targetBeatId?.trim()) {
return { kind: "advance-beat", targetBeatId: raw.targetBeatId.trim() };
}
return {
kind: "change-scene",
nextSceneSeed: raw?.nextSceneSeed?.trim() || "未指定",
};
}
function coerceChoice(raw: RawChoice, idx: number): BeatChoice {
return {
id: raw.id?.trim() || `c${idx + 1}`,
label: raw.label?.trim() || `选项 ${idx + 1}`,
effect: coerceEffect(raw.effect),
};
}
function coerceNext(raw: RawNext | undefined, fallbackBeatId: string): BeatNext {
if (raw?.type === "choice" && Array.isArray(raw.choices) && raw.choices.length) {
return {
type: "choice",
choices: raw.choices.map((c, i) => coerceChoice(c, i)),
};
}
return {
type: "continue",
nextBeatId: raw?.nextBeatId?.trim() || fallbackBeatId,
};
}
function coerceActiveCharacters(
raw: RawActiveCharacter[] | undefined,
): BeatActiveCharacter[] | undefined {
if (!Array.isArray(raw)) return undefined;
const out = raw
.map((c): BeatActiveCharacter | null => {
const name = c.name?.trim();
if (!name) return null;
// POV is never IN the picture — strip the LLM's slip-up silently so
// CharacterDesigner doesn't end up generating a portrait for the player.
if (isPovName(name)) return null;
const pose = c.pose?.trim();
return pose ? { name, pose } : { name };
})
.filter((c): c is BeatActiveCharacter => Boolean(c));
return out.length > 0 ? out : undefined;
}
function coerceBeat(raw: RawBeat, idx: number, totalBeats: number): Beat {
const id = raw.id?.trim() || `b${idx + 1}`;
// Non-last beats default their `continue` target to the following beat.
// The last beat gets an empty fallback on purpose: repairBeats() turns a
// last/dangling continue into a real scene-change exit so the player can
// never get stuck self-looping on it.
const fallback = idx + 1 < totalBeats ? `b${idx + 2}` : "";
const rawSpeaker = raw.speaker?.trim() || undefined;
// Normalize any POV variant (玩家/我/主角/protagonist/...) to "你".
// NPC names pass through unchanged. This means the LLM can slip and
// write "玩家" or "I" and we still render the dialog box correctly with
// speaker="你" — and TTS is automatically skipped because no Character
// record exists for "你".
const speaker = rawSpeaker ? normalizeSpeakerName(rawSpeaker) : undefined;
const line = raw.line?.trim() || undefined;
return {
id,
narration: raw.narration?.trim() || undefined,
speaker,
line,
// lineDelivery is meaningful only for NPC speakers (TTS). For POV
// speaker ("你") TTS is skipped, so lineDelivery would never be used.
lineDelivery:
line && speaker !== POV_DISPLAY_NAME
? raw.lineDelivery?.trim() || undefined
: undefined,
activeCharacters: coerceActiveCharacters(raw.activeCharacters),
next: coerceNext(raw.next, fallback),
};
}
const FALLBACK_SEED = "故事继续推进";
function fallbackExitChoice(beatId: string): BeatChoice {
return {
id: `${beatId}__exit`,
label: "继续",
effect: { kind: "change-scene", nextSceneSeed: FALLBACK_SEED },
};
}
// Beat ids are graph keys (the front-end's `beats.find(b => b.id === ...)`,
// the session's `visitedBeatIds`, and `continue`/`advance-beat` targets). If
// the model reuses an id across beats, the second occurrence becomes silently
// unreachable and external references collapse to the first beat. Rename
// duplicates; rewrite the renamed beat's OWN self-references. External
// references stay pointing at the first occurrence.
function ensureUniqueBeatIds(beats: Beat[]): Beat[] {
const seen = new Set<string>();
return beats.map((b): Beat => {
if (!seen.has(b.id)) {
seen.add(b.id);
return b;
}
const oldId = b.id;
let n = 2;
while (seen.has(`${oldId}_${n}`)) n += 1;
const newId = `${oldId}_${n}`;
seen.add(newId);
let next = b.next;
if (next.type === "continue" && next.nextBeatId === oldId) {
next = { type: "continue", nextBeatId: newId };
} else if (next.type === "choice") {
next = {
type: "choice",
choices: next.choices.map((c) =>
c.effect.kind === "advance-beat" && c.effect.targetBeatId === oldId
? {
...c,
effect: { kind: "advance-beat" as const, targetBeatId: newId },
}
: c,
),
};
}
return { ...b, id: newId, next };
});
}
// Repairs referential integrity AND guarantees the scene is escapable:
// - a `continue` to a missing/self id is repointed to the next beat in order;
// a last/dangling continue with nowhere to go becomes a scene-change exit
// - an `advance-beat` to a missing id is downgraded to a scene change
// - if no change-scene exit exists anywhere, one is appended to the last beat
function repairBeats(beats: Beat[]): Beat[] {
const ids = new Set(beats.map((b) => b.id));
const fixed: Beat[] = beats.map((b, idx): Beat => {
if (b.next.type === "continue") {
const target = b.next.nextBeatId;
if (ids.has(target) && target !== b.id) return b;
const nextByIndex = beats[idx + 1]?.id;
if (nextByIndex) {
return { ...b, next: { type: "continue", nextBeatId: nextByIndex } };
}
return { ...b, next: { type: "choice", choices: [fallbackExitChoice(b.id)] } };
}
const patched = b.next.choices.map((c) =>
c.effect.kind === "advance-beat" && !ids.has(c.effect.targetBeatId)
? {
...c,
effect: {
kind: "change-scene" as const,
nextSceneSeed: "未指定(导演引用不存在的 beat,已降级为换场)",
},
}
: c,
);
return { ...b, next: { type: "choice", choices: patched } };
});
const hasExit = fixed.some(
(b) =>
b.next.type === "choice" &&
b.next.choices.some((c) => c.effect.kind === "change-scene"),
);
if (!hasExit && fixed.length > 0) {
const lastIdx = fixed.length - 1;
const last = fixed[lastIdx]!;
const existing = last.next.type === "choice" ? last.next.choices : [];
fixed[lastIdx] = {
...last,
next: { type: "choice", choices: [...existing, fallbackExitChoice(last.id)] },
};
}
return fixed;
}
// Choice ids are keys the front-end uses to cache + consume prefetched
// scenes. Two beats both defaulting to c1/c2 would make a transition reuse
// the WRONG prefetched scene — so force every choice id to be unique within
// the scene.
function ensureUniqueChoiceIds(beats: Beat[]): Beat[] {
const seen = new Set<string>();
for (const b of beats) {
if (b.next.type !== "choice") continue;
for (const c of b.next.choices) {
if (seen.has(c.id)) {
let n = 2;
while (seen.has(`${c.id}_${n}`)) n += 1;
c.id = `${c.id}_${n}`;
}
seen.add(c.id);
}
}
return beats;
}
// Normalize sceneKey to a safe lowercase-with-dashes English slug. If the
// model returns something weird (中文 / spaces / mixed case), best-effort
// fix; if it ends up empty, return undefined (the scene just won't be
// considered for img2img reuse).
function normalizeSceneKey(raw: string | undefined): string | undefined {
if (!raw) return undefined;
const slug = raw
.trim()
.toLowerCase()
.replace(/[^a-z0-9-]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
return slug.length > 0 ? slug : undefined;
}
function coerceStringArray(raw: unknown): string[] | undefined {
if (!Array.isArray(raw)) return undefined;
const out = raw
.map((x) => (typeof x === "string" ? x.trim() : ""))
.filter((x) => x.length > 0);
return out.length > 0 ? out : undefined;
}
// Pull the volatile story-memory rewrite out of the Writer's JSON. Only
// non-empty fields are kept; an all-empty/absent patch returns undefined so
// the director leaves the carried StoryState untouched.
function coerceStoryStatePatch(
raw: RawStoryStatePatch | undefined,
): StoryStatePatch | undefined {
if (!raw || typeof raw !== "object") return undefined;
const patch: StoryStatePatch = {};
const synopsis = typeof raw.synopsis === "string" ? raw.synopsis.trim() : "";
if (synopsis) patch.synopsis = synopsis;
const openThreads = coerceStringArray(raw.openThreads);
if (openThreads) patch.openThreads = openThreads;
const relationships = coerceStringArray(raw.relationships);
if (relationships) patch.relationships = relationships;
const nextHook = typeof raw.nextHook === "string" ? raw.nextHook.trim() : "";
if (nextHook) patch.nextHook = nextHook;
return Object.keys(patch).length > 0 ? patch : undefined;
}
export async function runWriter(
config: ProviderConfig,
session: Session,
): Promise<WriterOutput> {
const raw = await chat(
config,
[
{ role: "system", content: WRITER_SYSTEM },
{ role: "user", content: buildWriterUserMessage(session) },
],
{ temperature: 0.9, responseFormat: "json_object" },
);
const parsed = parseJsonLoose<RawScene>(raw);
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
if (rawBeats.length === 0) {
throw new Error("Writer returned no beats");
}
const beats = ensureUniqueChoiceIds(
repairBeats(
ensureUniqueBeatIds(
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
),
),
);
const declaredEntry = parsed.entryBeatId?.trim();
const entryBeatId =
declaredEntry && beats.some((b) => b.id === declaredEntry)
? declaredEntry
: beats[0]!.id;
return {
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
sceneKey: normalizeSceneKey(parsed.sceneKey),
entryBeatId,
beats,
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
};
}
// Surface the set of character names introduced by this scene's beats,
// so the orchestrator can decide which ones need the CharacterDesigner to
// fire. Pulls names from both `speaker` fields AND `activeCharacters`
// (a character can be on-screen without speaking).
//
// Excludes POV ("你" / 玩家 / 主角 / ...) entirely — the player is never
// designed (no portrait, no voice, no archetype).
export function collectActiveCharacterNames(beats: Beat[]): string[] {
const seen = new Set<string>();
for (const b of beats) {
if (b.speaker && !isPovName(b.speaker)) seen.add(b.speaker);
if (b.activeCharacters) {
for (const c of b.activeCharacters) {
if (!isPovName(c.name)) seen.add(c.name);
}
}
}
return Array.from(seen);
}
// Re-export POV constants for downstream filters (director's orphanSpeakers).
export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };