Merge pull request #1 from zonghaoyuan/feature/story-harness-opt
feat(engine): Architect agent + cross-scene StoryState coherence
This commit is contained in:
@@ -20,3 +20,5 @@ npm-debug.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
repomix-output.xml
|
||||
|
||||
users.md
|
||||
|
||||
@@ -187,6 +187,7 @@ function prefetchScenePath(
|
||||
const carriedBase: Session = {
|
||||
...baseSession,
|
||||
characters: data.characters,
|
||||
storyState: data.storyState,
|
||||
};
|
||||
prefetchScenePath(pool, carriedBase, [...steps, nextStep], depth + 1);
|
||||
}
|
||||
@@ -539,6 +540,7 @@ function PlayInner() {
|
||||
},
|
||||
],
|
||||
characters: data.characters,
|
||||
storyState: data.storyState,
|
||||
};
|
||||
visitedBeatsRef.current = [data.scene.entryBeatId];
|
||||
setSession(initial);
|
||||
@@ -635,6 +637,7 @@ function PlayInner() {
|
||||
},
|
||||
],
|
||||
characters: result.characters,
|
||||
storyState: result.storyState,
|
||||
};
|
||||
visitedBeatsRef.current = [result.scene.entryBeatId];
|
||||
setSession(newSession);
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Architect agent — ONE LLM call at session start.
|
||||
//
|
||||
// Expands the user's (often terse) world + style prompt into a real story
|
||||
// bible: a second-person protagonist with a want and a flaw, a single
|
||||
// central dramatic question (logline), a genre frame that anchors the
|
||||
// 爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
|
||||
// intentional cast. Seeds the StoryState that the Writer reads and updates
|
||||
// every scene — so the story has a spine from beat one instead of being
|
||||
// improvised cold.
|
||||
//
|
||||
// Everything is best-effort coerced with fallbacks: a malformed LLM
|
||||
// response can never abort session start — worst case the Writer just gets
|
||||
// a thinner bible and improvises more.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
type RawStoryState = {
|
||||
logline?: unknown;
|
||||
genreTags?: unknown;
|
||||
protagonist?: unknown;
|
||||
castNotes?: unknown;
|
||||
synopsis?: unknown;
|
||||
openThreads?: unknown;
|
||||
relationships?: unknown;
|
||||
nextHook?: unknown;
|
||||
};
|
||||
|
||||
function str(raw: unknown): string {
|
||||
return typeof raw === "string" ? raw.trim() : "";
|
||||
}
|
||||
|
||||
function strArray(raw: unknown): string[] | undefined {
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
const out = raw
|
||||
.map((x) => (typeof x === "string" ? x.trim() : ""))
|
||||
.filter((x) => x.length > 0);
|
||||
return out.length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
export async function runArchitect(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
): Promise<StoryState> {
|
||||
try {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: ARCHITECT_SYSTEM },
|
||||
{ role: "user", content: buildArchitectUserMessage(session) },
|
||||
],
|
||||
{ temperature: 0.85, responseFormat: "json_object" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawStoryState>(raw);
|
||||
|
||||
return {
|
||||
// Stable spine — fall back to the raw world/style prompt so the bible is
|
||||
// never wholly empty even if the model returns garbage.
|
||||
logline: str(parsed.logline) || session.worldSetting,
|
||||
genreTags: str(parsed.genreTags),
|
||||
protagonist:
|
||||
str(parsed.protagonist) ||
|
||||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
|
||||
castNotes: str(parsed.castNotes) || undefined,
|
||||
// Volatile seeds — the opening Writer will rewrite these via its patch.
|
||||
synopsis: str(parsed.synopsis) || "故事即将开始。",
|
||||
openThreads: strArray(parsed.openThreads),
|
||||
relationships: strArray(parsed.relationships),
|
||||
nextHook: str(parsed.nextHook) || undefined,
|
||||
};
|
||||
} catch (err) {
|
||||
// chat() or parseJsonLoose() can throw (network / unrepairable JSON).
|
||||
// The Architect is best-effort: never let it abort session start — return
|
||||
// a minimal bible seeded from the raw prompt and let the Writer improvise.
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[architect] failed, using minimal bible: ${msg}`);
|
||||
return {
|
||||
logline: session.worldSetting,
|
||||
genreTags: "",
|
||||
protagonist:
|
||||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
|
||||
synopsis: "故事即将开始。",
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -15,25 +15,20 @@ import {
|
||||
} from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// CharacterDesigner agent — designs ONE new character end-to-end.
|
||||
// CharacterDesigner agent — designs ONE new character.
|
||||
//
|
||||
// Pipeline (per character, all the slow parts are parallelized):
|
||||
// Exposed as three GRANULAR stages so the director can schedule the slow
|
||||
// parts around the Painter (a voice is never needed to paint a scene, and
|
||||
// only entry-beat characters' portraits are referenced by the Painter):
|
||||
//
|
||||
// 1. LLM call — designs BOTH visual + voice cards in one shot
|
||||
// (intentional: same agent thinks about who this character IS,
|
||||
// which keeps appearance and vocal personality coherent)
|
||||
// 1. designCharacterCard — ONE LLM call → visual + voice TEXT cards
|
||||
// (intentional bundling: the same agent thinks about who this character
|
||||
// IS, keeping appearance and vocal personality coherent)
|
||||
// 2. renderCharacterPortrait — base portrait image (Runware URL + UUID)
|
||||
// 3. provisionCharacterVoice — Xiaomi MiMo voicedesign → reference audio
|
||||
//
|
||||
// 2. In parallel:
|
||||
// a. Image gen — base portrait (Runware returns URL + UUID in one shot;
|
||||
// no separate upload round-trip is needed for cheap re-reference)
|
||||
// b. Voice provisioning — Xiaomi MiMo voicedesign from voiceDescription
|
||||
// → reference audio for later voiceclone synth
|
||||
//
|
||||
// 3. Returns merged Character ready to be added to session.characters
|
||||
//
|
||||
// Each step degrades gracefully — if image gen fails we return the
|
||||
// character without a portrait; if voice gen fails we return without
|
||||
// voice. The game keeps running even when sub-components fail.
|
||||
// Each step degrades gracefully — if image gen fails the character just has
|
||||
// no portrait; if voice gen fails it has no voice. The game keeps running.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
type CharacterDesignOutput = {
|
||||
@@ -77,7 +72,7 @@ async function runDesignLLM(
|
||||
//
|
||||
// In mock mode we return the data URI as basePortraitUrl with no UUID
|
||||
// (Painter is short-circuited anyway, so the lack of a UUID is moot).
|
||||
async function renderPortrait(
|
||||
export async function renderCharacterPortrait(
|
||||
config: EngineConfig,
|
||||
charName: string,
|
||||
visualDescription: string,
|
||||
@@ -101,7 +96,7 @@ async function renderPortrait(
|
||||
}
|
||||
}
|
||||
|
||||
async function provisionVoiceSafe(
|
||||
export async function provisionCharacterVoice(
|
||||
config: EngineConfig,
|
||||
voiceDescription: string,
|
||||
charName: string,
|
||||
@@ -116,45 +111,31 @@ async function provisionVoiceSafe(
|
||||
}
|
||||
}
|
||||
|
||||
// Single-character design pipeline. Called by the orchestrator once per
|
||||
// NEW character name; multiple characters in the same scene run their
|
||||
// pipelines in parallel at the orchestrator level.
|
||||
export async function designCharacter(
|
||||
// The cheap first stage: design the visual + voice TEXT cards in one LLM
|
||||
// call. The director then schedules renderCharacterPortrait /
|
||||
// provisionCharacterVoice around the Painter. Multiple new characters in the
|
||||
// same scene run this stage in parallel at the director level.
|
||||
export type CharacterCard = {
|
||||
name: string;
|
||||
visualDescription?: string;
|
||||
voiceDescription: string;
|
||||
};
|
||||
|
||||
export async function designCharacterCard(
|
||||
config: EngineConfig,
|
||||
session: Session,
|
||||
charName: string,
|
||||
): Promise<Character> {
|
||||
const tTotal = Date.now();
|
||||
|
||||
// Step 1 — LLM design (visual + voice). Must complete first.
|
||||
): Promise<CharacterCard> {
|
||||
const tDesign = Date.now();
|
||||
const design = await runDesignLLM(config, session, charName);
|
||||
tlog(`[charDesigner ${charName}] design LLM`, tDesign);
|
||||
|
||||
const visualDescription = design.visualDescription?.trim();
|
||||
const voiceDescription =
|
||||
design.voiceDescription?.trim() ||
|
||||
`请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`;
|
||||
|
||||
// Step 2 — parallel: portrait + voice provisioning.
|
||||
const tProvision = Date.now();
|
||||
const portraitPromise = visualDescription
|
||||
? renderPortrait(config, charName, visualDescription, session.styleGuide)
|
||||
: Promise.resolve({} as Awaited<ReturnType<typeof renderPortrait>>);
|
||||
const voicePromise = provisionVoiceSafe(config, voiceDescription, charName);
|
||||
|
||||
const [portrait, voice] = await Promise.all([portraitPromise, voicePromise]);
|
||||
tlog(`[charDesigner ${charName}] portrait+voice parallel`, tProvision);
|
||||
|
||||
tlog(`[charDesigner ${charName}] TOTAL`, tTotal);
|
||||
|
||||
return {
|
||||
name: charName,
|
||||
voiceDescription,
|
||||
visualDescription,
|
||||
basePortraitUrl: portrait.basePortraitUrl,
|
||||
basePortraitUuid: portrait.basePortraitUuid,
|
||||
voice,
|
||||
visualDescription: design.visualDescription?.trim() || undefined,
|
||||
voiceDescription:
|
||||
design.voiceDescription?.trim() ||
|
||||
`请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -169,6 +150,6 @@ export async function provisionVoiceForName(
|
||||
charName: string,
|
||||
): Promise<Character> {
|
||||
const voiceDescription = `请根据角色名「${charName}」推断其性别、年龄与气质,生成最贴合的音色。所属世界观:${session.worldSetting}`;
|
||||
const voice = await provisionVoiceSafe(config, voiceDescription, charName);
|
||||
const voice = await provisionCharacterVoice(config, voiceDescription, charName);
|
||||
return { name: charName, voiceDescription, voice };
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import type {
|
||||
BeatNext,
|
||||
ProviderConfig,
|
||||
Session,
|
||||
StoryStatePatch,
|
||||
} from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
|
||||
@@ -28,6 +29,9 @@ export type WriterOutput = {
|
||||
sceneKey?: string;
|
||||
entryBeatId: string;
|
||||
beats: Beat[];
|
||||
/** Rewritten volatile story memory — merged onto the carried StoryState by
|
||||
* the director. Absent when the model omitted it (rare; bible just stales). */
|
||||
storyStatePatch?: StoryStatePatch;
|
||||
};
|
||||
|
||||
// Raw shapes — what the LLM produces before validation / coercion.
|
||||
@@ -59,11 +63,18 @@ type RawBeat = {
|
||||
activeCharacters?: RawActiveCharacter[];
|
||||
next?: RawNext;
|
||||
};
|
||||
type RawStoryStatePatch = {
|
||||
synopsis?: unknown;
|
||||
openThreads?: unknown;
|
||||
relationships?: unknown;
|
||||
nextHook?: unknown;
|
||||
};
|
||||
type RawScene = {
|
||||
sceneSummary?: string;
|
||||
sceneKey?: string;
|
||||
entryBeatId?: string;
|
||||
beats?: RawBeat[];
|
||||
storyStatePatch?: RawStoryStatePatch;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
@@ -321,6 +332,33 @@ function normalizeSceneKey(raw: string | undefined): string | undefined {
|
||||
return slug.length > 0 ? slug : undefined;
|
||||
}
|
||||
|
||||
function coerceStringArray(raw: unknown): string[] | undefined {
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
const out = raw
|
||||
.map((x) => (typeof x === "string" ? x.trim() : ""))
|
||||
.filter((x) => x.length > 0);
|
||||
return out.length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
// Pull the volatile story-memory rewrite out of the Writer's JSON. Only
|
||||
// non-empty fields are kept; an all-empty/absent patch returns undefined so
|
||||
// the director leaves the carried StoryState untouched.
|
||||
function coerceStoryStatePatch(
|
||||
raw: RawStoryStatePatch | undefined,
|
||||
): StoryStatePatch | undefined {
|
||||
if (!raw || typeof raw !== "object") return undefined;
|
||||
const patch: StoryStatePatch = {};
|
||||
const synopsis = typeof raw.synopsis === "string" ? raw.synopsis.trim() : "";
|
||||
if (synopsis) patch.synopsis = synopsis;
|
||||
const openThreads = coerceStringArray(raw.openThreads);
|
||||
if (openThreads) patch.openThreads = openThreads;
|
||||
const relationships = coerceStringArray(raw.relationships);
|
||||
if (relationships) patch.relationships = relationships;
|
||||
const nextHook = typeof raw.nextHook === "string" ? raw.nextHook.trim() : "";
|
||||
if (nextHook) patch.nextHook = nextHook;
|
||||
return Object.keys(patch).length > 0 ? patch : undefined;
|
||||
}
|
||||
|
||||
export async function runWriter(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
@@ -359,6 +397,7 @@ export async function runWriter(
|
||||
sceneKey: normalizeSceneKey(parsed.sceneKey),
|
||||
entryBeatId,
|
||||
beats,
|
||||
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
+178
-48
@@ -6,8 +6,16 @@ import type {
|
||||
ProviderConfig,
|
||||
Scene,
|
||||
Session,
|
||||
StoryState,
|
||||
StoryStatePatch,
|
||||
} from "@infiplot/types";
|
||||
import { designCharacter, provisionVoiceForName } from "./agents/characterDesigner";
|
||||
import type { CharacterCard } from "./agents/characterDesigner";
|
||||
import {
|
||||
designCharacterCard,
|
||||
provisionCharacterVoice,
|
||||
provisionVoiceForName,
|
||||
renderCharacterPortrait,
|
||||
} from "./agents/characterDesigner";
|
||||
import { runCinematographer } from "./agents/cinematographer";
|
||||
import { runPainter } from "./agents/painter";
|
||||
import {
|
||||
@@ -27,26 +35,29 @@ import { INSERT_BEAT_SYSTEM, buildInsertBeatUserMessage } from "./prompts";
|
||||
//
|
||||
// Writer LLM (~3s, serial)
|
||||
// │
|
||||
// ├─ CharacterDesigner LLM × N (parallel per new char)
|
||||
// │ │
|
||||
// │ ├─ portrait gen (Runware returns URL + UUID in one call)
|
||||
// │ └─ voice provisioning (parallel within agent)
|
||||
// ├─ CharacterCard LLM × N (parallel per new char — TEXT only)
|
||||
// ├─ Cinematographer LLM (parallel with the cards)
|
||||
// │
|
||||
// ├─ Cinematographer LLM (parallel with all of the above)
|
||||
// └─ wait for cards + cinema
|
||||
// │
|
||||
// └─ wait for all parallel branches
|
||||
// ├─ entry-beat portraits ──┐ (block the Painter — its refs)
|
||||
// ▼ │
|
||||
// Painter — generateImage │ (overlapped, NOT on the paint path):
|
||||
// with referenceImages ├─ non-entry-beat portraits
|
||||
// │ └─ ALL voice provisioning + orphan voices
|
||||
// ▼
|
||||
// await the overlapped work, fold into the registry
|
||||
// │
|
||||
// ▼
|
||||
// Painter — generateImage with referenceImages (UUID/URL refs only;
|
||||
// no base64 to upload, since outputType=URL gives both back)
|
||||
// │
|
||||
// ▼
|
||||
// return { scene, sceneImageUrl, characters }
|
||||
// return { scene, sceneImageUrl, characters, storyState }
|
||||
//
|
||||
// The Cinematographer intentionally does NOT depend on CharacterDesigner
|
||||
// output — it only positions named characters in the frame, not their
|
||||
// appearance. This unlocks the parallelism that makes the full pipeline
|
||||
// ~9-12s instead of ~15-18s serial.
|
||||
// Two deliberate decouplings unlock the parallelism:
|
||||
// 1. The Cinematographer only POSITIONS named characters, so it needs no
|
||||
// visualDescription and runs alongside the card LLMs.
|
||||
// 2. The Painter only needs visualDescription TEXT (all on-stage) + the
|
||||
// entry-beat characters' PORTRAITS (its referenceImages). Voices are
|
||||
// never needed to paint, and non-entry portraits are never referenced —
|
||||
// so both overlap the (longest) paint call instead of blocking it.
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
function newSceneId(): string {
|
||||
@@ -112,10 +123,33 @@ function pickPriorSceneReference(
|
||||
return {};
|
||||
}
|
||||
|
||||
// Merge the Writer's volatile story-memory patch onto the carried StoryState.
|
||||
// The stable spine (logline/genreTags/protagonist/castNotes) is preserved;
|
||||
// only the volatile fields the Writer is allowed to rewrite are overwritten,
|
||||
// and only when the patch actually provided them. A missing carried state
|
||||
// (legacy session from before the Architect existed) degrades to an empty
|
||||
// spine rather than throwing.
|
||||
function applyStoryStatePatch(
|
||||
base: StoryState | undefined,
|
||||
patch: StoryStatePatch | undefined,
|
||||
): StoryState {
|
||||
const start: StoryState =
|
||||
base ?? { logline: "", genreTags: "", protagonist: "", synopsis: "" };
|
||||
if (!patch) return start;
|
||||
return {
|
||||
...start,
|
||||
synopsis: patch.synopsis ?? start.synopsis,
|
||||
openThreads: patch.openThreads ?? start.openThreads,
|
||||
relationships: patch.relationships ?? start.relationships,
|
||||
nextHook: patch.nextHook ?? start.nextHook,
|
||||
};
|
||||
}
|
||||
|
||||
export type SceneResult = {
|
||||
scene: Scene;
|
||||
sceneImageUrl: string;
|
||||
characters: Character[];
|
||||
storyState: StoryState;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
@@ -156,17 +190,19 @@ export async function directScene(
|
||||
writerOut.sceneKey,
|
||||
);
|
||||
|
||||
// Stage 2 — parallel: CharacterDesigner(s) and Cinematographer.
|
||||
// Cinematographer doesn't need character visualDescriptions (those are
|
||||
// appended at Painter stage), so it runs concurrently with chardesign.
|
||||
// ── Stage 2 — character cards (LLM) ∥ Cinematographer ──────────────────
|
||||
// Both are cheap LLM calls and neither needs the other's output, so they
|
||||
// run concurrently. The cards give us each new character's visualDescription
|
||||
// TEXT; portraits + voices are deferred to Stage 3 so they can overlap the
|
||||
// paint instead of blocking it.
|
||||
const tParallel = Date.now();
|
||||
|
||||
const designPromises = newCharNames.map((name) =>
|
||||
designCharacter(config, session, name).catch((err): Character => {
|
||||
const cardPromises = newCharNames.map((name) =>
|
||||
designCharacterCard(config, session, name).catch((err): CharacterCard => {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[directScene] designCharacter(${name}) failed: ${msg}`);
|
||||
// Last-resort fallback: register with name only so the speaker isn't
|
||||
// unknown. Caller may try voice provisioning later or skip.
|
||||
console.error(`[directScene] designCharacterCard(${name}) failed: ${msg}`);
|
||||
// Last-resort fallback: a name + generic voice card so the speaker isn't
|
||||
// unknown. No visualDescription → no portrait is attempted for them.
|
||||
return {
|
||||
name,
|
||||
voiceDescription: `请根据角色名「${name}」推断其性别、年龄与气质。所属世界观:${session.worldSetting}`,
|
||||
@@ -183,40 +219,102 @@ export async function directScene(
|
||||
currentSceneKey: writerOut.sceneKey,
|
||||
});
|
||||
|
||||
const [designedChars, cinemaOut] = await Promise.all([
|
||||
Promise.all(designPromises),
|
||||
const [cards, cinemaOut] = await Promise.all([
|
||||
Promise.all(cardPromises),
|
||||
cinemaPromise,
|
||||
]);
|
||||
tlog("[directScene] CharacterDesigner+Cinematographer parallel", tParallel);
|
||||
tlog("[directScene] CharacterCards+Cinematographer parallel", tParallel);
|
||||
|
||||
// Merge new chars into a working registry that we'll pass to the Painter.
|
||||
const characters = mergeCharacters(session.characters, designedChars);
|
||||
// Working registry: existing characters + new cards. visualDescription text
|
||||
// is present now; portraits + voices fill in over the next two phases.
|
||||
let characters = mergeCharacters(
|
||||
session.characters,
|
||||
cards.map((c) => ({
|
||||
name: c.name,
|
||||
voiceDescription: c.voiceDescription,
|
||||
visualDescription: c.visualDescription,
|
||||
})),
|
||||
);
|
||||
|
||||
// Edge case: a speaker referenced by the Writer might not have been in
|
||||
// `activeCharacters` of any beat (LLM oversight), so they got skipped by
|
||||
// newCharNames. Catch them here and at least provision a voice so the
|
||||
// beat-audio path doesn't render silent. No portrait — they weren't
|
||||
// visible in the scene, so visual consistency doesn't matter for them.
|
||||
// ── Stage 3 — portraits + voices, scheduled around the Painter ─────────
|
||||
const tProvision = Date.now();
|
||||
|
||||
// Entry-beat character names: the ONLY portraits the Painter references
|
||||
// (collectReferenceImages slots in the entry beat's speaker + activeChars).
|
||||
const entryNames = new Set<string>();
|
||||
if (entryBeat?.speaker && !isPovName(entryBeat.speaker)) {
|
||||
entryNames.add(entryBeat.speaker);
|
||||
}
|
||||
for (const c of entryBeatActive) {
|
||||
if (!isPovName(c.name)) entryNames.add(c.name);
|
||||
}
|
||||
|
||||
type NamedPortrait = {
|
||||
name: string;
|
||||
basePortraitUrl?: string;
|
||||
basePortraitUuid?: string;
|
||||
};
|
||||
// Kick off portrait gen for every NEW char that has a visualDescription.
|
||||
// Entry-beat portraits block the Painter; the rest overlap it.
|
||||
const entryPortraitPromises: Promise<NamedPortrait>[] = [];
|
||||
const restPortraitPromises: Promise<NamedPortrait>[] = [];
|
||||
for (const card of cards) {
|
||||
const vd = card.visualDescription;
|
||||
if (!vd) continue;
|
||||
const p = renderCharacterPortrait(
|
||||
config,
|
||||
card.name,
|
||||
vd,
|
||||
session.styleGuide,
|
||||
).then((res): NamedPortrait => ({ name: card.name, ...res }));
|
||||
(entryNames.has(card.name) ? entryPortraitPromises : restPortraitPromises).push(p);
|
||||
}
|
||||
|
||||
// Kick off voice provisioning for every NEW char (never on the paint path).
|
||||
const voicePromises = cards.map((card) =>
|
||||
provisionCharacterVoice(config, card.voiceDescription, card.name).then(
|
||||
(voice): Character => ({
|
||||
name: card.name,
|
||||
voiceDescription: card.voiceDescription,
|
||||
voice,
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
// Edge case: a speaker the Writer referenced without listing in any beat's
|
||||
// activeCharacters. collectActiveCharacterNames already includes speakers,
|
||||
// so this is a rare defensive net. Provision a voice only (never on-screen).
|
||||
const speakerNames = new Set(
|
||||
writerOut.beats.map((b) => b.speaker).filter((n): n is string => Boolean(n)),
|
||||
);
|
||||
const orphanSpeakers = [...speakerNames].filter(
|
||||
// Pattern B: "你" (player) is a valid speaker but never gets a Character
|
||||
// record — TTS is intentionally skipped on the client. Filter POV out so
|
||||
// provisionVoiceForName isn't accidentally invoked for the player.
|
||||
(n) => !isPovName(n) && !characters.some((c) => c.name === n),
|
||||
// record — TTS is intentionally skipped on the client.
|
||||
(n) =>
|
||||
!isPovName(n) &&
|
||||
!characters.some((c) => c.name === n) &&
|
||||
!cards.some((c) => c.name === n),
|
||||
);
|
||||
const orphanPromises = orphanSpeakers.map((n) =>
|
||||
provisionVoiceForName(config, session, n),
|
||||
);
|
||||
if (orphanSpeakers.length > 0) {
|
||||
const orphans = await Promise.all(
|
||||
orphanSpeakers.map((n) => provisionVoiceForName(config, session, n)),
|
||||
);
|
||||
const merged = mergeCharacters(characters, orphans);
|
||||
characters.splice(0, characters.length, ...merged);
|
||||
}
|
||||
|
||||
// Stage 3 — Painter (depends on cinemaOut + characters).
|
||||
// On-stage characters for THIS scene are the ones in any beat — pass them
|
||||
// all so the archetype block covers anyone the player might encounter.
|
||||
// Block the Painter ONLY on entry-beat portraits (its referenceImages).
|
||||
const entryPortraits = await Promise.all(entryPortraitPromises);
|
||||
characters = mergeCharacters(
|
||||
characters,
|
||||
entryPortraits.map((p) => ({
|
||||
name: p.name,
|
||||
voiceDescription: "", // preserved from the card by mergeCharacters
|
||||
basePortraitUrl: p.basePortraitUrl,
|
||||
basePortraitUuid: p.basePortraitUuid,
|
||||
})),
|
||||
);
|
||||
tlog("[directScene] entry-beat portraits", tProvision);
|
||||
|
||||
// ── Stage 4 — Painter (depends on cinemaOut + on-stage visual cards +
|
||||
// entry portraits). On-stage = everyone named in any beat, so the archetype
|
||||
// block covers anyone the player might encounter in this scene.
|
||||
const onStageCharacters = characters.filter((c) =>
|
||||
allActiveNames.includes(c.name),
|
||||
);
|
||||
@@ -234,6 +332,30 @@ export async function directScene(
|
||||
);
|
||||
tlog("[directScene] Painter", tPainter);
|
||||
|
||||
// Fold in the work that overlapped the paint: remaining portraits, all
|
||||
// voices, and any orphan-speaker voices. Awaited before returning so the
|
||||
// session the client persists is fully provisioned for later scenes.
|
||||
const tOverlap = Date.now();
|
||||
const [restPortraits, voicedChars, orphanChars] = await Promise.all([
|
||||
Promise.all(restPortraitPromises),
|
||||
Promise.all(voicePromises),
|
||||
Promise.all(orphanPromises),
|
||||
]);
|
||||
characters = mergeCharacters(
|
||||
characters,
|
||||
restPortraits.map((p) => ({
|
||||
name: p.name,
|
||||
voiceDescription: "",
|
||||
basePortraitUrl: p.basePortraitUrl,
|
||||
basePortraitUuid: p.basePortraitUuid,
|
||||
})),
|
||||
);
|
||||
characters = mergeCharacters(characters, voicedChars);
|
||||
if (orphanChars.length > 0) {
|
||||
characters = mergeCharacters(characters, orphanChars);
|
||||
}
|
||||
tlog("[directScene] overlapped portraits+voices", tOverlap);
|
||||
|
||||
const scene: Scene = {
|
||||
id: newSceneId(),
|
||||
// scenePrompt is the cinematographer's English compositional output;
|
||||
@@ -249,9 +371,17 @@ export async function directScene(
|
||||
imageUrl: painted.imageUrl,
|
||||
};
|
||||
|
||||
// Merge the Writer's volatile memory rewrite onto the carried bible so the
|
||||
// throughline survives the next scene cut (orchestrator returns it; the
|
||||
// client persists it back into the session).
|
||||
const storyState = applyStoryStatePatch(
|
||||
session.storyState,
|
||||
writerOut.storyStatePatch,
|
||||
);
|
||||
|
||||
tlog("[directScene] TOTAL", tTotal);
|
||||
|
||||
return { scene, sceneImageUrl: painted.imageUrl, characters };
|
||||
return { scene, sceneImageUrl: painted.imageUrl, characters, storyState };
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -9,6 +9,7 @@ export { annotateClick } from "./annotate";
|
||||
export { synthesizeBeat } from "./voice";
|
||||
export { mergeCharacters } from "./director";
|
||||
export type { SceneResult } from "./director";
|
||||
export { runArchitect } from "./agents/architect";
|
||||
export type { WriterOutput } from "./agents/writer";
|
||||
export type { CinematographerOutput } from "./agents/cinematographer";
|
||||
export type { InsertBeatPartial } from "@infiplot/types";
|
||||
|
||||
@@ -12,6 +12,7 @@ import type {
|
||||
VisionRequest,
|
||||
VisionResponse,
|
||||
} from "@infiplot/types";
|
||||
import { runArchitect } from "./agents/architect";
|
||||
import { annotateClick } from "./annotate";
|
||||
import { directInsertBeat, directScene } from "./director";
|
||||
import { synthesizeBeat } from "./voice";
|
||||
@@ -49,7 +50,18 @@ export async function startSession(
|
||||
characters: [],
|
||||
};
|
||||
|
||||
const { scene, sceneImageUrl, characters } = await directScene(config, session);
|
||||
// Stage 0 — Architect: expand the terse world/style prompt into a story
|
||||
// bible BEFORE the first scene. Serial by necessity (the opening Writer
|
||||
// reads session.storyState), but it gives the whole story a spine from beat
|
||||
// one — the latency is offset by the director's portrait/voice overlap win.
|
||||
const tArchitect = Date.now();
|
||||
session.storyState = await runArchitect(config.text, session);
|
||||
tlog("[start] Architect", tArchitect);
|
||||
|
||||
const { scene, sceneImageUrl, characters, storyState } = await directScene(
|
||||
config,
|
||||
session,
|
||||
);
|
||||
|
||||
tlog("[start] TOTAL", tTotal);
|
||||
|
||||
@@ -58,6 +70,7 @@ export async function startSession(
|
||||
scene,
|
||||
imageUrl: sceneImageUrl,
|
||||
characters,
|
||||
storyState,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -71,7 +84,7 @@ export async function requestScene(
|
||||
): Promise<SceneResponse> {
|
||||
const tTotal = Date.now();
|
||||
|
||||
const { scene, sceneImageUrl, characters } = await directScene(
|
||||
const { scene, sceneImageUrl, characters, storyState } = await directScene(
|
||||
config,
|
||||
req.session,
|
||||
);
|
||||
@@ -82,6 +95,7 @@ export async function requestScene(
|
||||
scene,
|
||||
imageUrl: sceneImageUrl,
|
||||
characters,
|
||||
storyState,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
+156
-10
@@ -3,20 +3,106 @@ import type {
|
||||
Character,
|
||||
Scene,
|
||||
Session,
|
||||
StoryState,
|
||||
} from "@infiplot/types";
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// Multi-agent scene generation pipeline:
|
||||
// Writer (编剧) — narrative + beats[] + per-beat activeCharacters
|
||||
// Architect (总编剧) — ONE-TIME at session start: the story bible
|
||||
// (protagonist / logline / genre / opening hook /
|
||||
// planned cast) → seeds StoryState
|
||||
// Writer (编剧) — narrative + beats[] + per-beat activeCharacters,
|
||||
// reads StoryState and emits a StoryStatePatch
|
||||
// CharacterDesigner — per-new-character visual + voice cards
|
||||
// Cinematographer (分镜导演) — sceneKey + English compositional prompt
|
||||
// Painter (画师) — FLUX rendering with character archetypes
|
||||
//
|
||||
// Each agent owns one system prompt + one user-message builder below.
|
||||
// All four agents see the same world / style guide, but each only reads
|
||||
// the slice of session state it needs to make its decision.
|
||||
// All agents see the same world / style guide, but each only reads the
|
||||
// slice of session state it needs to make its decision.
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Shared — render the StoryState bible into a compact prompt block read
|
||||
// by the Writer (and Architect, on revisions). Keeping one renderer means
|
||||
// the bible looks identical to every agent that consumes it.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function renderStoryState(s: StoryState | undefined): string {
|
||||
if (!s) return "";
|
||||
const lines: string[] = ["【故事档案 / 主线记忆】"];
|
||||
if (s.logline) lines.push(`主线(中心钩子):${s.logline}`);
|
||||
if (s.genreTags) lines.push(`题材基调:${s.genreTags}`);
|
||||
if (s.protagonist) lines.push(`主角「你」:${s.protagonist}`);
|
||||
if (s.castNotes) lines.push(`核心配角:\n${s.castNotes}`);
|
||||
if (s.synopsis) lines.push(`已发生(梗概):${s.synopsis}`);
|
||||
if (s.relationships?.length) {
|
||||
lines.push(`当前关系/情绪:\n${s.relationships.map((r) => `- ${r}`).join("\n")}`);
|
||||
}
|
||||
if (s.openThreads?.length) {
|
||||
lines.push(`未收的悬念/伏笔:\n${s.openThreads.map((t) => `- ${t}`).join("\n")}`);
|
||||
}
|
||||
if (s.nextHook) lines.push(`接下来要往哪走(下一个钩子方向):${s.nextHook}`);
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// 0. Architect (总编剧) — ONE LLM call at session start.
|
||||
//
|
||||
// Turns the (often terse) user world + style prompt into a real story
|
||||
// bible: a second-person protagonist with a want and a flaw, a single
|
||||
// central dramatic question, a genre frame that anchors the 爽点 rhythm,
|
||||
// an engineered opening hook (前3秒冷开场), and a small intentional cast.
|
||||
// Everything downstream — Writer, CharacterDesigner — reads this so the
|
||||
// story has a spine from beat one instead of being improvised cold.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const ARCHITECT_SYSTEM = `你是一部交互视觉小说的「总编剧 / 故事架构师」。玩家只给了你一句到几句的世界观和画风,你要在开拍前把它扩写成一份**故事档案(story bible)**,为后续每一幕定下脊梁。你不写具体台词、不写分镜、不设计立绘——你只搭骨架。
|
||||
|
||||
你深谙网文(番茄)、短剧(红果)与视觉小说(galgame)的爆款心法:
|
||||
- **开篇即钩子**:黄金三章 / 前3秒法则。开场不铺垫世界观,直接抛出冲突、悬念或一个反常的瞬间。
|
||||
- **代入感**:主角是第二人称「你」,是玩家的化身——要让玩家一进场就清楚"我是谁、我此刻卡在什么处境里、我想要什么"。
|
||||
- **题材锚定爽点**:先选定一个清晰的题材框架(如 甜宠 / 校园暗恋 / 悬疑追凶 / 复仇逆袭 / 救赎治愈),它决定了情绪回报的节奏与类型。
|
||||
- **戏剧问题**:整部故事由一个悬而未决的中心问题驱动(她到底是谁?你能否在记忆消失前查明真相?这场暗恋会走向哪里?)。
|
||||
- **人设要鲜明且有反差**:每个核心角色一个强标签 + 一个反差面(外冷内热 / 傲娇 / 看似柔弱实则腹黑)。
|
||||
|
||||
你要产出(全部用中文,except 不需要英文):
|
||||
- logline:一句话主线 / 中心戏剧问题,必须带钩子,让人想看下去
|
||||
- genreTags:题材+基调标签,斜杠分隔,如 "甜宠 / 校园 / 慢热治愈带点伤感"
|
||||
- protagonist:第二人称主角卡。包含:你是谁、你此刻正卡在什么具体处境里(要有即时张力)、你想要什么、一个软肋或秘密。50–120 字。
|
||||
- castNotes:2–3 个核心配角,每行一个「名字:一句话人设(强标签+反差)+ 与你的关系/张力」。给真实好记的中文名字(不要"神秘女子"这种占位)。
|
||||
- synopsis:开场此刻的情境梗概(故事尚未展开,就写"故事从……开始"),1–3 句。
|
||||
- openThreads:开场就埋下的 1–3 个悬念/问题(数组)。
|
||||
- nextHook:**第一幕**应当如何冷开场——具体描述开场那个抓人的瞬间/冲突(这会直接指导编剧写开场)。要画面感强、有张力。
|
||||
|
||||
设计硬规则:
|
||||
- 主角「你」永不出现在画面里(第二人称 POV),所以 castNotes 里**不要**把"你/主角"当成一个角色。
|
||||
- 配角名字要符合世界观(年代、地域、文化)。
|
||||
- 一切服从玩家给的世界观与画风,不要擅自跑题;玩家信息少时,做最贴合、最有戏的合理扩写。
|
||||
|
||||
必须输出严格 JSON:
|
||||
{
|
||||
"logline": "...",
|
||||
"genreTags": "...",
|
||||
"protagonist": "...",
|
||||
"castNotes": "夏海:表面开朗的天台诗人,实则在用诗逃避家里的变故;与你是同班转学的邻座,对你有种说不清的在意。\\n班主任老周:…",
|
||||
"synopsis": "...",
|
||||
"openThreads": ["...", "..."],
|
||||
"nextHook": "第一幕冷开场:……"
|
||||
}
|
||||
|
||||
不要输出 JSON 以外的任何文本。`;
|
||||
|
||||
export function buildArchitectUserMessage(session: Session): string {
|
||||
const parts: string[] = [];
|
||||
parts.push(`世界观:${session.worldSetting}`);
|
||||
parts.push(`画风:${session.styleGuide}`);
|
||||
parts.push(
|
||||
"\n请据此产出这部交互剧的故事档案(story bible),严格以 JSON 格式返回。",
|
||||
);
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// 1. Writer (编剧) — drives the narrative.
|
||||
//
|
||||
@@ -27,7 +113,26 @@ import type {
|
||||
// session.characters.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const WRITER_SYSTEM = `你是一个交互视觉小说的「编剧」。每次基于世界观、画风、玩家历史、已登记角色,写出**一个完整场景的剧本**:场景背景概要 + 一组对话节拍 beats。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度,这些由其他 agent 完成。
|
||||
export const WRITER_SYSTEM = `你是一部交互视觉小说的「编剧」。每次基于【故事档案 / 主线记忆】、世界观、画风、玩家历史、已登记角色,写出**一个完整场景的剧本**:场景背景概要 + 一组对话节拍 beats,并在最后更新主线记忆。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度,这些由其他 agent 完成。
|
||||
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
爆款心法(番茄网文 / 红果短剧 / galgame 的叙事手感)—— 必须贯彻
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
- **每个场景都要有钩子**:开头 1–2 个 beat 内就抛出新信息、悬念、冲突或情绪冲击,绝不平铺直叙地交代背景;结尾 beat 留一个让玩家"想知道接下来"的扣子。
|
||||
- **兑现爽点 / 情绪回报**:按题材给观众想要的情绪(甜宠的心动、暗恋的暧昧拉扯、逆袭的扬眉吐气、悬疑的真相一角)。让玩家这一场"有所得"。
|
||||
- **反转与反差**:适时打破预期——以为是 A 结果是 B、角色露出与第一印象相反的一面;但反转要可信、要扣主线。
|
||||
- **快节奏、入戏快**:进场即冲突,少铺陈,删掉一切"为完整而存在"却不推进情绪的对话。
|
||||
- **show, don't tell**:用动作、神态、潜台词、环境细节传递情绪,别直接旁白"她很难过"——让玩家自己读出来。
|
||||
- **人设鲜明有反差**:每个角色一个强标签 + 一个反差面,台词紧贴其腔调(傲娇嘴硬心软、外冷内热、看似柔弱实则强势)。
|
||||
- **选择要有分量**:choice 只出现在真正的岔路口,每个选项都要让玩家感到"通向不同的东西"(情绪指向不同 / 关系走向不同),别给等价的废选项。
|
||||
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
连贯性铁律(跨场景切换不能跳戏 —— 最重要)
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接上一场的情绪、地点逻辑、人物状态与未收的悬念。
|
||||
- 若给了「转场种子 nextSceneSeed」,把它当作"下一场的命题"去兑现,而不是另起炉灶;开场要让玩家感到"这正是我上一个动作 / 选择导致的结果"。
|
||||
- 沿用主线记忆里的人物关系与情绪温度——别让刚告白的人下一场形同陌路,也别凭空遗忘已埋的伏笔。
|
||||
- 推进、但别重置:每一场都让主线问题往前走一点(关系变化 / 真相揭露一角 / 新悬念浮现)。
|
||||
|
||||
一个场景包含:
|
||||
- sceneSummary:当前场景的中文概要(地点、时间、氛围、关键事件——给后续的分镜导演看)
|
||||
@@ -98,6 +203,13 @@ sceneKey 设计原则(重要 — 用于跨场景视觉一致性):
|
||||
例:speaker="你" line="学姐,这把伞你拿着。"
|
||||
- 同一个 beat 可以同时有 narration(心理活动 / 动作)和 speaker="你" + line(说出口的话)
|
||||
|
||||
更新主线记忆(storyStatePatch)—— 写完这一场后必做:
|
||||
- synopsis:把这一场并入后的整体梗概,**压缩**到 3–5 句(别越写越长,旧细节该丢就丢)
|
||||
- relationships:每个核心角色此刻与「你」的关系 / 情绪温度,每条一句(如 "夏海:暗恋升温,刚向你说了一半的告白被打断")
|
||||
- openThreads:仍未收的悬念 / 伏笔——已收束的可移除、新埋的加入(但至少保留一条正在推进的主线,别把列表清空)
|
||||
- nextHook:基于这一场的结尾,下一场应往哪走(给"下一次的你"一个明确命题,接住本场留下的扣子)
|
||||
这些字段是写给"未来的你"的连贯性记忆,请认真写。
|
||||
|
||||
必须输出严格 JSON,结构如下:
|
||||
{
|
||||
"sceneSummary": "中文场景概要:地点+时间+氛围+关键事件",
|
||||
@@ -149,13 +261,26 @@ sceneKey 设计原则(重要 — 用于跨场景视觉一致性):
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"storyStatePatch": {
|
||||
"synopsis": "把这一场并入后的滚动梗概,压缩到 3–5 句",
|
||||
"relationships": ["夏海:暗恋升温,刚向你说了一半的告白被打断"],
|
||||
"openThreads": ["夏海没说完的那句话到底是什么", "她书包里掉出的那张旧照片"],
|
||||
"nextHook": "下一场:放学后的天台,她把你单独叫上去,要把话说完"
|
||||
}
|
||||
}
|
||||
|
||||
不要输出 JSON 以外的任何文本。`;
|
||||
|
||||
export function buildWriterUserMessage(session: Session): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
const bible = renderStoryState(session.storyState);
|
||||
if (bible) {
|
||||
parts.push(bible);
|
||||
parts.push("");
|
||||
}
|
||||
|
||||
parts.push(`世界观:${session.worldSetting}`);
|
||||
parts.push(`画风:${session.styleGuide}`);
|
||||
|
||||
@@ -173,7 +298,9 @@ export function buildWriterUserMessage(session: Session): string {
|
||||
}
|
||||
|
||||
if (session.history.length === 0) {
|
||||
parts.push("\n这是故事的开场。请生成第一个场景,严格以 JSON 格式返回。");
|
||||
parts.push(
|
||||
"\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场写出来——开场即抓人,别花笔墨铺垫世界观。写完后更新 storyStatePatch。严格以 JSON 格式返回。",
|
||||
);
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
@@ -210,22 +337,40 @@ export function buildWriterUserMessage(session: Session): string {
|
||||
});
|
||||
|
||||
const last = session.history.at(-1);
|
||||
|
||||
// The exact last moment the player stopped on — the new scene must continue
|
||||
// seamlessly from this emotional beat, not reset to a neutral state.
|
||||
if (last) {
|
||||
const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
|
||||
const lastBeat = last.scene.beats.find((b) => b.id === lastBeatId);
|
||||
if (lastBeat) {
|
||||
const frag: string[] = [];
|
||||
if (lastBeat.narration) frag.push(`旁白:${lastBeat.narration}`);
|
||||
if (lastBeat.line) frag.push(`${lastBeat.speaker ?? "?"}:${lastBeat.line}`);
|
||||
if (frag.length) {
|
||||
parts.push(
|
||||
`\n上一刻(玩家停留的最后一个画面,新场景要从这里的情绪无缝承接):\n ${frag.join(" / ")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const lastExit = last?.exit;
|
||||
if (lastExit) {
|
||||
if (lastExit.kind === "choice") {
|
||||
parts.push(
|
||||
`\n请基于「玩家在上一场选择了:${lastExit.label}」,生成下一个场景(参考种子:${lastExit.nextSceneSeed})。`,
|
||||
`\n承接「玩家在上一场选择了:${lastExit.label}」无缝续写下一个场景(转场命题:${lastExit.nextSceneSeed})。开场要让玩家感到这正是上一步的结果,并延续此刻的情绪。`,
|
||||
);
|
||||
} else {
|
||||
parts.push(
|
||||
`\n请基于「玩家自由动作:${lastExit.action}」,生成下一个场景。`,
|
||||
`\n承接「玩家自由动作:${lastExit.action}」无缝续写下一个场景,延续此刻的情绪与处境。`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
parts.push("\n请生成下一个场景。");
|
||||
parts.push("\n无缝续写下一个场景,延续上一刻的情绪。");
|
||||
}
|
||||
|
||||
parts.push("严格以 JSON 格式返回。");
|
||||
parts.push("写完后别忘了更新 storyStatePatch。严格以 JSON 格式返回。");
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
@@ -506,6 +651,7 @@ export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场
|
||||
- narration 与 line 加起来 ≤80 字
|
||||
- 不要打破当前场景的物理状态(玩家仍在原地、对面仍是同一个角色)
|
||||
- 不要生成选项或下一步指引 —— 玩家点击会自然回到原 beat
|
||||
- 这个 beat 也要"有所得"——给玩家一个新细节、一丝潜台词或情绪波动(show, don't tell),别写成无意义的空台词
|
||||
|
||||
speaker 字段允许的取值**只有两种**(与主路径 Writer 一致 — Pattern B galgame 标准):
|
||||
1. **已登记角色**里的 NPC 真名(**绝不允许引入新角色**)
|
||||
|
||||
@@ -139,6 +139,53 @@ export type BeatAudio = {
|
||||
mime: string;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// StoryState — the persistent "story bible" + evolving narrative memory.
|
||||
//
|
||||
// Created once at session start by the Architect agent (rich opening
|
||||
// planning), then carried across every scene and incrementally updated by
|
||||
// the Writer. This is the single throughline that keeps tone, cast, and
|
||||
// stakes coherent across scene cuts — without it each Writer call would
|
||||
// re-derive the whole arc from a flat beat log and drift.
|
||||
//
|
||||
// Split into STABLE fields (set by the Architect, rarely change) and
|
||||
// VOLATILE fields (rewritten each scene via StoryStatePatch).
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type StoryState = {
|
||||
// ── Stable (Architect-authored; persists unless deliberately revised) ──
|
||||
/** One-line central dramatic question / 主线钩子. */
|
||||
logline: string;
|
||||
/** Genre + tone tags anchoring the 爽点 framework, e.g. "甜宠 / 校园 / 慢热治愈". */
|
||||
genreTags: string;
|
||||
/** Second-person protagonist card: who 你 are, the immediate situation, the
|
||||
* core want, and a flaw/secret. The audience proxy — never rendered. */
|
||||
protagonist: string;
|
||||
/** Key supporting cast and their relationship/tension with 你 (one per line). */
|
||||
castNotes?: string;
|
||||
|
||||
// ── Volatile (rewritten each scene by the Writer's StoryStatePatch) ──
|
||||
/** Rolling, compressed synopsis of what has happened so far (~3-5 句). */
|
||||
synopsis: string;
|
||||
/** Unresolved hooks / mysteries / questions still owed to the player. */
|
||||
openThreads?: string[];
|
||||
/** Current relationship/emotion state per character, e.g.
|
||||
* "夏海:好感升温,刚向你告白了一半". */
|
||||
relationships?: string[];
|
||||
/** Where the story is heading next — the conflict/reversal/suspense the
|
||||
* next scene should drive toward. Seeds the next scene's hook. */
|
||||
nextHook?: string;
|
||||
};
|
||||
|
||||
/** The volatile subset the Writer rewrites after each scene. Stable fields
|
||||
* (logline/genreTags/protagonist/castNotes) are preserved by the merge. */
|
||||
export type StoryStatePatch = {
|
||||
synopsis?: string;
|
||||
openThreads?: string[];
|
||||
relationships?: string[];
|
||||
nextHook?: string;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Session
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
@@ -151,6 +198,13 @@ export type Session = {
|
||||
history: SceneHistoryEntry[];
|
||||
/** Character registry — accumulates across scenes; voices + portraits persist for reuse. */
|
||||
characters: Character[];
|
||||
/**
|
||||
* Persistent story bible + evolving narrative memory. Set at session start
|
||||
* by the Architect, carried by the client across every /api/scene call, and
|
||||
* updated by the Writer each scene. Optional for back-compat with any
|
||||
* session payload created before this field existed.
|
||||
*/
|
||||
storyState?: StoryState;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
@@ -207,6 +261,9 @@ export type StartResponse = {
|
||||
imageUrl: string;
|
||||
/** Character registry with voice references + visual cards provisioned. */
|
||||
characters: Character[];
|
||||
/** Story bible created by the Architect + updated by the opening scene's
|
||||
* Writer. The client persists this into the session for later /api/scene calls. */
|
||||
storyState: StoryState;
|
||||
};
|
||||
|
||||
// /api/scene — generates the next Scene, given session whose latest
|
||||
@@ -221,6 +278,10 @@ export type SceneResponse = {
|
||||
/** Public CDN URL (or data URI in MOCK_IMAGE mode) for the rendered scene background. */
|
||||
imageUrl: string;
|
||||
characters: Character[];
|
||||
/** Story bible after this scene's Writer applied its update. The client
|
||||
* must persist this back into the session so the throughline survives the
|
||||
* next scene cut. */
|
||||
storyState: StoryState;
|
||||
};
|
||||
|
||||
// /api/beat-audio — lazily synthesize one beat's voice. Client fires this
|
||||
|
||||
Reference in New Issue
Block a user