refactor: flatten monorepo to single web package (#12)
Flatten the pnpm monorepo (apps/web + packages/*) into a single web package at the repo root. - Move app/lib/components/scripts/public to root; drop apps/web and packages/* wrappers - Rewrite tsconfig paths (@infiplot/*) to ./lib/*; turbopack.root = __dirname - Update Vercel (no root-directory) and Cloudflare (pnpm build:cf at root) deploy paths - Regenerate pnpm-lock.yaml to drop stale workspace importers - Bump engines.node to >=22 to match wrangler Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Architect agent — ONE LLM call at session start.
|
||||
//
|
||||
// Expands the user's (often terse) world + style prompt into a real story
|
||||
// bible: a second-person protagonist with a want and a flaw, a single
|
||||
// central dramatic question (logline), a genre frame that anchors the
|
||||
// 爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
|
||||
// intentional cast. Seeds the StoryState that the Writer reads and updates
|
||||
// every scene — so the story has a spine from beat one instead of being
|
||||
// improvised cold.
|
||||
//
|
||||
// Everything is best-effort coerced with fallbacks: a malformed LLM
|
||||
// response can never abort session start — worst case the Writer just gets
|
||||
// a thinner bible and improvises more.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
type RawStoryState = {
|
||||
logline?: unknown;
|
||||
genreTags?: unknown;
|
||||
protagonist?: unknown;
|
||||
castNotes?: unknown;
|
||||
synopsis?: unknown;
|
||||
openThreads?: unknown;
|
||||
relationships?: unknown;
|
||||
nextHook?: unknown;
|
||||
};
|
||||
|
||||
function str(raw: unknown): string {
|
||||
return typeof raw === "string" ? raw.trim() : "";
|
||||
}
|
||||
|
||||
function strArray(raw: unknown): string[] | undefined {
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
const out = raw
|
||||
.map((x) => (typeof x === "string" ? x.trim() : ""))
|
||||
.filter((x) => x.length > 0);
|
||||
return out.length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
export async function runArchitect(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
): Promise<StoryState> {
|
||||
try {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: ARCHITECT_SYSTEM },
|
||||
{ role: "user", content: buildArchitectUserMessage(session) },
|
||||
],
|
||||
{ temperature: 0.85, responseFormat: "json_object" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawStoryState>(raw);
|
||||
|
||||
return {
|
||||
// Stable spine — fall back to the raw world/style prompt so the bible is
|
||||
// never wholly empty even if the model returns garbage.
|
||||
logline: str(parsed.logline) || session.worldSetting,
|
||||
genreTags: str(parsed.genreTags),
|
||||
protagonist:
|
||||
str(parsed.protagonist) ||
|
||||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
|
||||
castNotes: str(parsed.castNotes) || undefined,
|
||||
// Volatile seeds — the opening Writer will rewrite these via its patch.
|
||||
synopsis: str(parsed.synopsis) || "故事即将开始。",
|
||||
openThreads: strArray(parsed.openThreads),
|
||||
relationships: strArray(parsed.relationships),
|
||||
nextHook: str(parsed.nextHook) || undefined,
|
||||
};
|
||||
} catch (err) {
|
||||
// chat() or parseJsonLoose() can throw (network / unrepairable JSON).
|
||||
// The Architect is best-effort: never let it abort session start — return
|
||||
// a minimal bible seeded from the raw prompt and let the Writer improvise.
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[architect] failed, using minimal bible: ${msg}`);
|
||||
return {
|
||||
logline: session.worldSetting,
|
||||
genreTags: "",
|
||||
protagonist:
|
||||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
|
||||
synopsis: "故事即将开始。",
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
import { chat, generateImage } from "@infiplot/ai-client";
|
||||
import { provisionVoice } from "@infiplot/tts-client";
|
||||
import type {
|
||||
Character,
|
||||
CharacterVoice,
|
||||
EngineConfig,
|
||||
Session,
|
||||
} from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { mockImageDataUri } from "../mockImage";
|
||||
import {
|
||||
CHARACTER_DESIGNER_SYSTEM,
|
||||
buildCharacterDesignerUserMessage,
|
||||
buildCharacterPortraitPrompt,
|
||||
} from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// CharacterDesigner agent — designs ONE new character.
|
||||
//
|
||||
// Exposed as three GRANULAR stages so the director can schedule the slow
|
||||
// parts around the Painter (a voice is never needed to paint a scene, and
|
||||
// only entry-beat characters' portraits are referenced by the Painter):
|
||||
//
|
||||
// 1. designCharacterCard — ONE LLM call → visual + voice TEXT cards
|
||||
// (intentional bundling: the same agent thinks about who this character
|
||||
// IS, keeping appearance and vocal personality coherent)
|
||||
// 2. renderCharacterPortrait — base portrait image (Runware URL + UUID)
|
||||
// 3. provisionCharacterVoice — Xiaomi MiMo voicedesign → reference audio
|
||||
//
|
||||
// Each step degrades gracefully — if image gen fails the character just has
|
||||
// no portrait; if voice gen fails it has no voice. The game keeps running.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
type CharacterDesignOutput = {
|
||||
visualDescription?: string;
|
||||
voiceDescription?: string;
|
||||
};
|
||||
|
||||
// TEMP: per-phase timing for latency diagnosis. Same convention as the
|
||||
// orchestrator's tlog. Remove after we have data on real-world numbers.
|
||||
function tlog(label: string, t0: number): void {
|
||||
console.log(`${label}: ${Date.now() - t0}ms`);
|
||||
}
|
||||
|
||||
async function runDesignLLM(
|
||||
config: EngineConfig,
|
||||
session: Session,
|
||||
charName: string,
|
||||
): Promise<CharacterDesignOutput> {
|
||||
const raw = await chat(
|
||||
config.text,
|
||||
[
|
||||
{ role: "system", content: CHARACTER_DESIGNER_SYSTEM },
|
||||
{
|
||||
role: "user",
|
||||
content: buildCharacterDesignerUserMessage(charName, session),
|
||||
},
|
||||
],
|
||||
{ temperature: 0.7, responseFormat: "json_object" },
|
||||
);
|
||||
return parseJsonLoose<CharacterDesignOutput>(raw);
|
||||
}
|
||||
|
||||
// Generate the per-character base portrait. The portrait is a "concept
|
||||
// sheet" — single character, neutral pose, plain background — so it works
|
||||
// well as a Runware referenceImages anchor for later scenes.
|
||||
//
|
||||
// Returns the URL (for any client display + URL-form references) and the
|
||||
// UUID (cheapest reference form for subsequent Painter calls). Both come
|
||||
// back in one `imageInference` response now that we use outputType=URL —
|
||||
// no separate upload step needed.
|
||||
//
|
||||
// In mock mode we return the data URI as basePortraitUrl with no UUID
|
||||
// (Painter is short-circuited anyway, so the lack of a UUID is moot).
|
||||
export async function renderCharacterPortrait(
|
||||
config: EngineConfig,
|
||||
charName: string,
|
||||
visualDescription: string,
|
||||
styleGuide: string,
|
||||
): Promise<{ basePortraitUrl?: string; basePortraitUuid?: string }> {
|
||||
try {
|
||||
if (config.mockImage) {
|
||||
return { basePortraitUrl: await mockImageDataUri() };
|
||||
}
|
||||
const prompt = buildCharacterPortraitPrompt(
|
||||
charName,
|
||||
visualDescription,
|
||||
styleGuide,
|
||||
);
|
||||
const { imageUrl, imageUuid } = await generateImage(config.image, prompt);
|
||||
return { basePortraitUrl: imageUrl, basePortraitUuid: imageUuid };
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[characterDesigner] portrait gen failed for ${charName}: ${msg}`);
|
||||
return {}; // no portrait at all — degrade gracefully
|
||||
}
|
||||
}
|
||||
|
||||
export async function provisionCharacterVoice(
|
||||
config: EngineConfig,
|
||||
voiceDescription: string,
|
||||
charName: string,
|
||||
): Promise<CharacterVoice | undefined> {
|
||||
if (!config.tts) return undefined;
|
||||
try {
|
||||
return await provisionVoice(config.tts, voiceDescription);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[characterDesigner] voice provision failed for ${charName}: ${msg}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// The cheap first stage: design the visual + voice TEXT cards in one LLM
|
||||
// call. The director then schedules renderCharacterPortrait /
|
||||
// provisionCharacterVoice around the Painter. Multiple new characters in the
|
||||
// same scene run this stage in parallel at the director level.
|
||||
export type CharacterCard = {
|
||||
name: string;
|
||||
visualDescription?: string;
|
||||
voiceDescription: string;
|
||||
};
|
||||
|
||||
export async function designCharacterCard(
|
||||
config: EngineConfig,
|
||||
session: Session,
|
||||
charName: string,
|
||||
): Promise<CharacterCard> {
|
||||
const tDesign = Date.now();
|
||||
const design = await runDesignLLM(config, session, charName);
|
||||
tlog(`[charDesigner ${charName}] design LLM`, tDesign);
|
||||
|
||||
return {
|
||||
name: charName,
|
||||
visualDescription: design.visualDescription?.trim() || undefined,
|
||||
voiceDescription:
|
||||
design.voiceDescription?.trim() ||
|
||||
`请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Provision voice ONLY for an existing character that the LLM mentioned
|
||||
// without us having designed them yet (e.g., 编剧 referenced a name that
|
||||
// wasn't in `activeCharacters` but appeared as a speaker). Used by
|
||||
// directInsertBeat path and as a safety net in directScene. No portrait
|
||||
// is generated for these — they get a name + voice only.
|
||||
export async function provisionVoiceForName(
|
||||
config: EngineConfig,
|
||||
session: Session,
|
||||
charName: string,
|
||||
): Promise<Character> {
|
||||
const voiceDescription = `请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`;
|
||||
const voice = await provisionCharacterVoice(config, voiceDescription, charName);
|
||||
return { name: charName, voiceDescription, voice };
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import type { BeatActiveCharacter, ProviderConfig } from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import {
|
||||
CINEMATOGRAPHER_SYSTEM,
|
||||
buildCinematographerUserMessage,
|
||||
} from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Cinematographer agent — translates the Writer's narrative scene
|
||||
// summary into an English compositional prompt for FLUX.
|
||||
//
|
||||
// Reads: sceneSummary + entry beat's activeCharacters (poses)
|
||||
// + prior sceneKey (for continuity hints)
|
||||
// Writes: { shotType, integratedPrompt }
|
||||
//
|
||||
// Does NOT describe character APPEARANCE — that's appended at the
|
||||
// Painter stage from session.characters[].visualDescription. The
|
||||
// Cinematographer only positions named characters in the frame and
|
||||
// describes the environment + lighting + camera framing.
|
||||
//
|
||||
// This separation lets the Cinematographer run IN PARALLEL with the
|
||||
// CharacterDesigner — neither needs the other's output. They both
|
||||
// feed independently into the Painter prompt.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type CinematographerOutput = {
|
||||
shotType: string;
|
||||
integratedPrompt: string;
|
||||
};
|
||||
|
||||
type RawCinematographerOutput = {
|
||||
shotType?: string;
|
||||
integratedPrompt?: string;
|
||||
};
|
||||
|
||||
export type CinematographerInput = {
|
||||
sceneSummary: string;
|
||||
styleGuide: string;
|
||||
entryBeatActive: BeatActiveCharacter[];
|
||||
/** Entry beat's speaker — drives the dynamic camera policy:
|
||||
* NPC name → NPC looks toward camera (close-up)
|
||||
* "你" → medium shot, NPC listens
|
||||
* undefined → wide establishing shot */
|
||||
entryBeatSpeaker?: string;
|
||||
priorSceneKey?: string;
|
||||
currentSceneKey?: string;
|
||||
};
|
||||
|
||||
export async function runCinematographer(
|
||||
config: ProviderConfig,
|
||||
input: CinematographerInput,
|
||||
): Promise<CinematographerOutput> {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: CINEMATOGRAPHER_SYSTEM },
|
||||
{
|
||||
role: "user",
|
||||
content: buildCinematographerUserMessage(
|
||||
input.sceneSummary,
|
||||
input.styleGuide,
|
||||
input.entryBeatActive,
|
||||
input.entryBeatSpeaker,
|
||||
input.priorSceneKey,
|
||||
input.currentSceneKey,
|
||||
),
|
||||
},
|
||||
],
|
||||
{ temperature: 0.6, responseFormat: "json_object" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawCinematographerOutput>(raw);
|
||||
|
||||
// Fallback: if the LLM produced nothing usable, synthesize a minimal
|
||||
// integratedPrompt from the Writer's sceneSummary so the Painter has
|
||||
// SOMETHING to work with rather than blowing up the whole pipeline.
|
||||
const integratedPrompt =
|
||||
parsed.integratedPrompt?.trim() ||
|
||||
`A cinematic illustration depicting: ${input.sceneSummary}. Wide establishing shot, natural lighting, atmospheric mood.`;
|
||||
|
||||
return {
|
||||
shotType: parsed.shotType?.trim() || "medium shot",
|
||||
integratedPrompt,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
import { generateImage } from "@infiplot/ai-client";
|
||||
import type { GenerateImageOptions, GenerateImageResult } from "@infiplot/ai-client";
|
||||
import type {
|
||||
Beat,
|
||||
Character,
|
||||
EngineConfig,
|
||||
ProviderConfig,
|
||||
} from "@infiplot/types";
|
||||
import { mockImageDataUri } from "../mockImage";
|
||||
import { buildPainterPrompt } from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Painter — final image generation with multi-reference anchoring.
|
||||
//
|
||||
// FLUX.2 [klein] 9B KV does NOT support seedImage (img2img). Instead,
|
||||
// visual continuity comes entirely from `referenceImages` (capped at 4),
|
||||
// which the KV-optimized variant accelerates ~2.5× via key-value caching
|
||||
// of reference latents.
|
||||
//
|
||||
// References are slotted in priority order (max 4):
|
||||
// 1. Prior scene image — when sceneKey matched a previous scene, this
|
||||
// anchors the same physical space (lighting/layout/style continuity)
|
||||
// 2. Entry beat's speaker portrait — the NPC the player is talking with
|
||||
// (most visually prominent)
|
||||
// 3. Other on-stage NPCs' portraits — secondary characters in the frame
|
||||
//
|
||||
// References are sent as UUIDs (preferred — cheapest in transport) or URLs
|
||||
// (fallback — still cheaper than base64). Base64 fallback was removed when
|
||||
// generateImage switched to outputType=URL, which always returns both a UUID
|
||||
// and a URL so we never lack a cheap reference handle.
|
||||
//
|
||||
// Failure handling — two-tier degradation:
|
||||
// A. referenceImages call (preferred — full visual anchoring)
|
||||
// B. pure text-to-image fallback (last resort if Runware refs API errors)
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
const MAX_REFERENCE_IMAGES = 4;
|
||||
|
||||
export type PainterInput = {
|
||||
integratedPrompt: string;
|
||||
styleGuide: string;
|
||||
onStageCharacters: Character[];
|
||||
/**
|
||||
* Prior scene's Runware UUID or URL. When set (= sceneKey hit a prior
|
||||
* scene), it slots into referenceImages[0] for spatial continuity.
|
||||
* Capacity-wise this displaces ONE character portrait — slot is shared
|
||||
* with character refs, capped at 4 total per Runware spec.
|
||||
*/
|
||||
priorSceneImage?: string;
|
||||
};
|
||||
|
||||
// Pick the references we send to Runware as `referenceImages`. Priority:
|
||||
// slot 0: priorSceneImage (if any — sceneKey continuity)
|
||||
// slot 1: entry beat's speaker portrait (the NPC speaking to the player)
|
||||
// slot 2+: other on-stage NPCs from entry beat's activeCharacters
|
||||
// Caps at 4 total. Returns the array exactly as it'll be sent — already
|
||||
// truncated, already deduplicated.
|
||||
export function collectReferenceImages(
|
||||
characters: Character[],
|
||||
entryBeat: Beat | undefined,
|
||||
priorSceneImage: string | undefined,
|
||||
): string[] {
|
||||
const refs: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// Slot 0 — prior scene image for spatial continuity. Goes first because
|
||||
// backdrop drift is the most jarring discontinuity across same-sceneKey
|
||||
// scenes; character drift is partially masked by character archetype text
|
||||
// in the prompt anyway.
|
||||
if (priorSceneImage) {
|
||||
refs.push(priorSceneImage);
|
||||
}
|
||||
|
||||
// Slot 1+ — character portraits, speaker-first.
|
||||
//
|
||||
// Prefer URL over UUID: Runware's `imageInference` returns a UUID, but that
|
||||
// UUID isn't always recognized by the `referenceImages` pipeline (the error
|
||||
// surfaces as `failedToTransferImage`). The URL is Runware's own CDN link —
|
||||
// they can always fetch it from their own infra. UUID is kept as a backstop
|
||||
// for any edge case where URL is missing (e.g., legacy session state).
|
||||
const speakerName = entryBeat?.speaker;
|
||||
if (speakerName) {
|
||||
const speaker = characters.find((c) => c.name === speakerName);
|
||||
const ref = speaker?.basePortraitUrl ?? speaker?.basePortraitUuid;
|
||||
if (ref && refs.length < MAX_REFERENCE_IMAGES) {
|
||||
refs.push(ref);
|
||||
seen.add(speakerName);
|
||||
}
|
||||
}
|
||||
|
||||
for (const c of entryBeat?.activeCharacters ?? []) {
|
||||
if (refs.length >= MAX_REFERENCE_IMAGES) break;
|
||||
if (seen.has(c.name)) continue;
|
||||
const char = characters.find((x) => x.name === c.name);
|
||||
const ref = char?.basePortraitUrl ?? char?.basePortraitUuid;
|
||||
if (ref) {
|
||||
refs.push(ref);
|
||||
seen.add(c.name);
|
||||
}
|
||||
}
|
||||
|
||||
return refs.slice(0, MAX_REFERENCE_IMAGES);
|
||||
}
|
||||
|
||||
async function tryGenerate(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options: GenerateImageOptions,
|
||||
label: string,
|
||||
): Promise<GenerateImageResult | null> {
|
||||
try {
|
||||
return await generateImage(config, prompt, options);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.warn(`[painter] ${label} failed: ${msg}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export type PainterResult =
|
||||
| { kind: "real"; imageUrl: string; imageUuid: string }
|
||||
| { kind: "mock"; imageUrl: string };
|
||||
|
||||
export async function runPainter(
|
||||
config: EngineConfig,
|
||||
input: PainterInput,
|
||||
entryBeat: Beat | undefined,
|
||||
): Promise<PainterResult> {
|
||||
if (config.mockImage) {
|
||||
return { kind: "mock", imageUrl: await mockImageDataUri() };
|
||||
}
|
||||
|
||||
const prompt = buildPainterPrompt(
|
||||
input.integratedPrompt,
|
||||
input.styleGuide,
|
||||
input.onStageCharacters,
|
||||
);
|
||||
|
||||
const refs = collectReferenceImages(
|
||||
input.onStageCharacters,
|
||||
entryBeat,
|
||||
input.priorSceneImage,
|
||||
);
|
||||
|
||||
// Tier A — with referenceImages (priorSceneImage + character portraits).
|
||||
// FLUX.2 [klein] 9B KV's KV cache accelerates this multi-reference path
|
||||
// ~2.5× compared to the non-KV variant.
|
||||
if (refs.length > 0) {
|
||||
const r = await tryGenerate(
|
||||
config.image,
|
||||
prompt,
|
||||
{ referenceImages: refs },
|
||||
`referenceImages (${refs.length})`,
|
||||
);
|
||||
if (r) return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
}
|
||||
|
||||
// Tier B — pure text-to-image. Last resort, used when Tier A failed OR
|
||||
// there are no references to send (first scene with no characters yet).
|
||||
// Errors here propagate to the caller.
|
||||
const r = await generateImage(config.image, prompt);
|
||||
return { kind: "real", imageUrl: r.imageUrl, imageUuid: r.imageUuid };
|
||||
}
|
||||
@@ -0,0 +1,425 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import type {
|
||||
Beat,
|
||||
BeatActiveCharacter,
|
||||
BeatChoice,
|
||||
BeatChoiceEffect,
|
||||
BeatNext,
|
||||
ProviderConfig,
|
||||
Session,
|
||||
StoryStatePatch,
|
||||
} from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Writer agent — owns the narrative half of scene generation.
|
||||
//
|
||||
// Output: { sceneSummary, sceneKey, entryBeatId, beats[] }
|
||||
// Each beat carries activeCharacters[] (names + poses) the
|
||||
// Cinematographer reads when composing the establishing shot.
|
||||
//
|
||||
// Character DESIGN (visual + voice) is NOT this agent's job —
|
||||
// it only names characters; the CharacterDesigner picks up any
|
||||
// unknown name from beats[].activeCharacters.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type WriterOutput = {
|
||||
sceneSummary: string;
|
||||
sceneKey?: string;
|
||||
entryBeatId: string;
|
||||
beats: Beat[];
|
||||
/** Rewritten volatile story memory — merged onto the carried StoryState by
|
||||
* the director. Absent when the model omitted it (rare; bible just stales). */
|
||||
storyStatePatch?: StoryStatePatch;
|
||||
};
|
||||
|
||||
// Raw shapes — what the LLM produces before validation / coercion.
|
||||
type RawActiveCharacter = {
|
||||
name?: string;
|
||||
pose?: string;
|
||||
};
|
||||
type RawEffect = {
|
||||
kind?: string;
|
||||
targetBeatId?: string;
|
||||
nextSceneSeed?: string;
|
||||
};
|
||||
type RawChoice = {
|
||||
id?: string;
|
||||
label?: string;
|
||||
effect?: RawEffect;
|
||||
};
|
||||
type RawNext = {
|
||||
type?: string;
|
||||
nextBeatId?: string;
|
||||
choices?: RawChoice[];
|
||||
};
|
||||
type RawBeat = {
|
||||
id?: string;
|
||||
narration?: string;
|
||||
speaker?: string;
|
||||
line?: string;
|
||||
lineDelivery?: string;
|
||||
activeCharacters?: RawActiveCharacter[];
|
||||
next?: RawNext;
|
||||
};
|
||||
type RawStoryStatePatch = {
|
||||
synopsis?: unknown;
|
||||
openThreads?: unknown;
|
||||
relationships?: unknown;
|
||||
nextHook?: unknown;
|
||||
};
|
||||
type RawScene = {
|
||||
sceneSummary?: string;
|
||||
sceneKey?: string;
|
||||
entryBeatId?: string;
|
||||
beats?: RawBeat[];
|
||||
storyStatePatch?: RawStoryStatePatch;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// POV (player viewpoint) handling — Pattern B (galgame standard):
|
||||
// - speaker = "你" → ALLOWED (renders as dialog box, never TTS'd)
|
||||
// - any other POV term → normalized to "你" (LLM slip-up safety net)
|
||||
// - activeCharacters → POV is NEVER allowed (player has no body in-scene)
|
||||
// - CharacterDesigner → never invoked for "你" or POV variants
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
const POV_DISPLAY_NAME = "你";
|
||||
const POV_VARIANTS = new Set([
|
||||
"玩家",
|
||||
"我",
|
||||
"主角",
|
||||
"protagonist",
|
||||
"Protagonist",
|
||||
"player",
|
||||
"Player",
|
||||
"PLAYER",
|
||||
"MC",
|
||||
"mc",
|
||||
"Mc",
|
||||
"I",
|
||||
"i",
|
||||
"me",
|
||||
"Me",
|
||||
"ME",
|
||||
]);
|
||||
|
||||
function isPovName(name: string): boolean {
|
||||
return name === POV_DISPLAY_NAME || POV_VARIANTS.has(name);
|
||||
}
|
||||
|
||||
// Normalize a speaker name: any POV variant collapses to "你"; an NPC name
|
||||
// passes through unchanged. Caller passes already-trimmed input.
|
||||
function normalizeSpeakerName(name: string): string {
|
||||
return POV_VARIANTS.has(name) ? POV_DISPLAY_NAME : name;
|
||||
}
|
||||
|
||||
function coerceEffect(raw: RawEffect | undefined): BeatChoiceEffect {
|
||||
if (raw?.kind === "advance-beat" && raw.targetBeatId?.trim()) {
|
||||
return { kind: "advance-beat", targetBeatId: raw.targetBeatId.trim() };
|
||||
}
|
||||
return {
|
||||
kind: "change-scene",
|
||||
nextSceneSeed: raw?.nextSceneSeed?.trim() || "未指定",
|
||||
};
|
||||
}
|
||||
|
||||
function coerceChoice(raw: RawChoice, idx: number): BeatChoice {
|
||||
return {
|
||||
id: raw.id?.trim() || `c${idx + 1}`,
|
||||
label: raw.label?.trim() || `选项 ${idx + 1}`,
|
||||
effect: coerceEffect(raw.effect),
|
||||
};
|
||||
}
|
||||
|
||||
function coerceNext(raw: RawNext | undefined, fallbackBeatId: string): BeatNext {
|
||||
if (raw?.type === "choice" && Array.isArray(raw.choices) && raw.choices.length) {
|
||||
return {
|
||||
type: "choice",
|
||||
choices: raw.choices.map((c, i) => coerceChoice(c, i)),
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: "continue",
|
||||
nextBeatId: raw?.nextBeatId?.trim() || fallbackBeatId,
|
||||
};
|
||||
}
|
||||
|
||||
function coerceActiveCharacters(
|
||||
raw: RawActiveCharacter[] | undefined,
|
||||
): BeatActiveCharacter[] | undefined {
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
const out = raw
|
||||
.map((c): BeatActiveCharacter | null => {
|
||||
const name = c.name?.trim();
|
||||
if (!name) return null;
|
||||
// POV is never IN the picture — strip the LLM's slip-up silently so
|
||||
// CharacterDesigner doesn't end up generating a portrait for the player.
|
||||
if (isPovName(name)) return null;
|
||||
const pose = c.pose?.trim();
|
||||
return pose ? { name, pose } : { name };
|
||||
})
|
||||
.filter((c): c is BeatActiveCharacter => Boolean(c));
|
||||
return out.length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
function coerceBeat(raw: RawBeat, idx: number, totalBeats: number): Beat {
|
||||
const id = raw.id?.trim() || `b${idx + 1}`;
|
||||
// Non-last beats default their `continue` target to the following beat.
|
||||
// The last beat gets an empty fallback on purpose: repairBeats() turns a
|
||||
// last/dangling continue into a real scene-change exit so the player can
|
||||
// never get stuck self-looping on it.
|
||||
const fallback = idx + 1 < totalBeats ? `b${idx + 2}` : "";
|
||||
|
||||
const rawSpeaker = raw.speaker?.trim() || undefined;
|
||||
// Normalize any POV variant (玩家/我/主角/protagonist/...) to "你".
|
||||
// NPC names pass through unchanged. This means the LLM can slip and
|
||||
// write "玩家" or "I" and we still render the dialog box correctly with
|
||||
// speaker="你" — and TTS is automatically skipped because no Character
|
||||
// record exists for "你".
|
||||
const speaker = rawSpeaker ? normalizeSpeakerName(rawSpeaker) : undefined;
|
||||
|
||||
const line = raw.line?.trim() || undefined;
|
||||
return {
|
||||
id,
|
||||
narration: raw.narration?.trim() || undefined,
|
||||
speaker,
|
||||
line,
|
||||
// lineDelivery is meaningful only for NPC speakers (TTS). For POV
|
||||
// speaker ("你") TTS is skipped, so lineDelivery would never be used.
|
||||
lineDelivery:
|
||||
line && speaker !== POV_DISPLAY_NAME
|
||||
? raw.lineDelivery?.trim() || undefined
|
||||
: undefined,
|
||||
activeCharacters: coerceActiveCharacters(raw.activeCharacters),
|
||||
next: coerceNext(raw.next, fallback),
|
||||
};
|
||||
}
|
||||
|
||||
const FALLBACK_SEED = "故事继续推进";
|
||||
|
||||
function fallbackExitChoice(beatId: string): BeatChoice {
|
||||
return {
|
||||
id: `${beatId}__exit`,
|
||||
label: "继续",
|
||||
effect: { kind: "change-scene", nextSceneSeed: FALLBACK_SEED },
|
||||
};
|
||||
}
|
||||
|
||||
// Beat ids are graph keys (the front-end's `beats.find(b => b.id === ...)`,
|
||||
// the session's `visitedBeatIds`, and `continue`/`advance-beat` targets). If
|
||||
// the model reuses an id across beats, the second occurrence becomes silently
|
||||
// unreachable and external references collapse to the first beat. Rename
|
||||
// duplicates; rewrite the renamed beat's OWN self-references. External
|
||||
// references stay pointing at the first occurrence.
|
||||
function ensureUniqueBeatIds(beats: Beat[]): Beat[] {
|
||||
const seen = new Set<string>();
|
||||
return beats.map((b): Beat => {
|
||||
if (!seen.has(b.id)) {
|
||||
seen.add(b.id);
|
||||
return b;
|
||||
}
|
||||
const oldId = b.id;
|
||||
let n = 2;
|
||||
while (seen.has(`${oldId}_${n}`)) n += 1;
|
||||
const newId = `${oldId}_${n}`;
|
||||
seen.add(newId);
|
||||
|
||||
let next = b.next;
|
||||
if (next.type === "continue" && next.nextBeatId === oldId) {
|
||||
next = { type: "continue", nextBeatId: newId };
|
||||
} else if (next.type === "choice") {
|
||||
next = {
|
||||
type: "choice",
|
||||
choices: next.choices.map((c) =>
|
||||
c.effect.kind === "advance-beat" && c.effect.targetBeatId === oldId
|
||||
? {
|
||||
...c,
|
||||
effect: { kind: "advance-beat" as const, targetBeatId: newId },
|
||||
}
|
||||
: c,
|
||||
),
|
||||
};
|
||||
}
|
||||
return { ...b, id: newId, next };
|
||||
});
|
||||
}
|
||||
|
||||
// Repairs referential integrity AND guarantees the scene is escapable:
|
||||
// - a `continue` to a missing/self id is repointed to the next beat in order;
|
||||
// a last/dangling continue with nowhere to go becomes a scene-change exit
|
||||
// - an `advance-beat` to a missing id is downgraded to a scene change
|
||||
// - if no change-scene exit exists anywhere, one is appended to the last beat
|
||||
function repairBeats(beats: Beat[]): Beat[] {
|
||||
const ids = new Set(beats.map((b) => b.id));
|
||||
|
||||
const fixed: Beat[] = beats.map((b, idx): Beat => {
|
||||
if (b.next.type === "continue") {
|
||||
const target = b.next.nextBeatId;
|
||||
if (ids.has(target) && target !== b.id) return b;
|
||||
const nextByIndex = beats[idx + 1]?.id;
|
||||
if (nextByIndex) {
|
||||
return { ...b, next: { type: "continue", nextBeatId: nextByIndex } };
|
||||
}
|
||||
return { ...b, next: { type: "choice", choices: [fallbackExitChoice(b.id)] } };
|
||||
}
|
||||
|
||||
const patched = b.next.choices.map((c) =>
|
||||
c.effect.kind === "advance-beat" && !ids.has(c.effect.targetBeatId)
|
||||
? {
|
||||
...c,
|
||||
effect: {
|
||||
kind: "change-scene" as const,
|
||||
nextSceneSeed: "未指定(导演引用不存在的 beat,已降级为换场)",
|
||||
},
|
||||
}
|
||||
: c,
|
||||
);
|
||||
return { ...b, next: { type: "choice", choices: patched } };
|
||||
});
|
||||
|
||||
const hasExit = fixed.some(
|
||||
(b) =>
|
||||
b.next.type === "choice" &&
|
||||
b.next.choices.some((c) => c.effect.kind === "change-scene"),
|
||||
);
|
||||
if (!hasExit && fixed.length > 0) {
|
||||
const lastIdx = fixed.length - 1;
|
||||
const last = fixed[lastIdx]!;
|
||||
const existing = last.next.type === "choice" ? last.next.choices : [];
|
||||
fixed[lastIdx] = {
|
||||
...last,
|
||||
next: { type: "choice", choices: [...existing, fallbackExitChoice(last.id)] },
|
||||
};
|
||||
}
|
||||
|
||||
return fixed;
|
||||
}
|
||||
|
||||
// Choice ids are keys the front-end uses to cache + consume prefetched
|
||||
// scenes. Two beats both defaulting to c1/c2 would make a transition reuse
|
||||
// the WRONG prefetched scene — so force every choice id to be unique within
|
||||
// the scene.
|
||||
function ensureUniqueChoiceIds(beats: Beat[]): Beat[] {
|
||||
const seen = new Set<string>();
|
||||
for (const b of beats) {
|
||||
if (b.next.type !== "choice") continue;
|
||||
for (const c of b.next.choices) {
|
||||
if (seen.has(c.id)) {
|
||||
let n = 2;
|
||||
while (seen.has(`${c.id}_${n}`)) n += 1;
|
||||
c.id = `${c.id}_${n}`;
|
||||
}
|
||||
seen.add(c.id);
|
||||
}
|
||||
}
|
||||
return beats;
|
||||
}
|
||||
|
||||
// Normalize sceneKey to a safe lowercase-with-dashes English slug. If the
|
||||
// model returns something weird (中文 / spaces / mixed case), best-effort
|
||||
// fix; if it ends up empty, return undefined (the scene just won't be
|
||||
// considered for img2img reuse).
|
||||
function normalizeSceneKey(raw: string | undefined): string | undefined {
|
||||
if (!raw) return undefined;
|
||||
const slug = raw
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9-]+/g, "-")
|
||||
.replace(/-+/g, "-")
|
||||
.replace(/^-|-$/g, "");
|
||||
return slug.length > 0 ? slug : undefined;
|
||||
}
|
||||
|
||||
function coerceStringArray(raw: unknown): string[] | undefined {
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
const out = raw
|
||||
.map((x) => (typeof x === "string" ? x.trim() : ""))
|
||||
.filter((x) => x.length > 0);
|
||||
return out.length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
// Pull the volatile story-memory rewrite out of the Writer's JSON. Only
|
||||
// non-empty fields are kept; an all-empty/absent patch returns undefined so
|
||||
// the director leaves the carried StoryState untouched.
|
||||
function coerceStoryStatePatch(
|
||||
raw: RawStoryStatePatch | undefined,
|
||||
): StoryStatePatch | undefined {
|
||||
if (!raw || typeof raw !== "object") return undefined;
|
||||
const patch: StoryStatePatch = {};
|
||||
const synopsis = typeof raw.synopsis === "string" ? raw.synopsis.trim() : "";
|
||||
if (synopsis) patch.synopsis = synopsis;
|
||||
const openThreads = coerceStringArray(raw.openThreads);
|
||||
if (openThreads) patch.openThreads = openThreads;
|
||||
const relationships = coerceStringArray(raw.relationships);
|
||||
if (relationships) patch.relationships = relationships;
|
||||
const nextHook = typeof raw.nextHook === "string" ? raw.nextHook.trim() : "";
|
||||
if (nextHook) patch.nextHook = nextHook;
|
||||
return Object.keys(patch).length > 0 ? patch : undefined;
|
||||
}
|
||||
|
||||
export async function runWriter(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
): Promise<WriterOutput> {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: WRITER_SYSTEM },
|
||||
{ role: "user", content: buildWriterUserMessage(session) },
|
||||
],
|
||||
{ temperature: 0.9, responseFormat: "json_object" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawScene>(raw);
|
||||
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
|
||||
if (rawBeats.length === 0) {
|
||||
throw new Error("Writer returned no beats");
|
||||
}
|
||||
|
||||
const beats = ensureUniqueChoiceIds(
|
||||
repairBeats(
|
||||
ensureUniqueBeatIds(
|
||||
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
const declaredEntry = parsed.entryBeatId?.trim();
|
||||
const entryBeatId =
|
||||
declaredEntry && beats.some((b) => b.id === declaredEntry)
|
||||
? declaredEntry
|
||||
: beats[0]!.id;
|
||||
|
||||
return {
|
||||
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
|
||||
sceneKey: normalizeSceneKey(parsed.sceneKey),
|
||||
entryBeatId,
|
||||
beats,
|
||||
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
|
||||
};
|
||||
}
|
||||
|
||||
// Surface the set of character names introduced by this scene's beats,
|
||||
// so the orchestrator can decide which ones need the CharacterDesigner to
|
||||
// fire. Pulls names from both `speaker` fields AND `activeCharacters`
|
||||
// (a character can be on-screen without speaking).
|
||||
//
|
||||
// Excludes POV ("你" / 玩家 / 主角 / ...) entirely — the player is never
|
||||
// designed (no portrait, no voice, no archetype).
|
||||
export function collectActiveCharacterNames(beats: Beat[]): string[] {
|
||||
const seen = new Set<string>();
|
||||
for (const b of beats) {
|
||||
if (b.speaker && !isPovName(b.speaker)) seen.add(b.speaker);
|
||||
if (b.activeCharacters) {
|
||||
for (const c of b.activeCharacters) {
|
||||
if (!isPovName(c.name)) seen.add(c.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(seen);
|
||||
}
|
||||
|
||||
// Re-export POV constants for downstream filters (director's orphanSpeakers).
|
||||
export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };
|
||||
Reference in New Issue
Block a user