Merge pull request #27 from zonghaoyuan/perf/writer-split
perf(engine): split Writer into Phase A (plan) + Phase B (beats)
This commit is contained in:
+159
-50
@@ -8,26 +8,30 @@ import type {
|
||||
ProviderConfig,
|
||||
Session,
|
||||
StoryStatePatch,
|
||||
WriterPlan,
|
||||
} from "@infiplot/types";
|
||||
import { parseJsonLoose } from "../jsonParser";
|
||||
import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
|
||||
import {
|
||||
WRITER_BEATS_SYSTEM,
|
||||
WRITER_PLAN_SYSTEM,
|
||||
buildWriterBeatsUserMessage,
|
||||
buildWriterPlanUserMessage,
|
||||
} from "../prompts";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Writer agent — owns the narrative half of scene generation.
|
||||
// Writer agent — owns the narrative half of scene generation, in TWO phases.
|
||||
//
|
||||
// Output: { sceneSummary, sceneKey, entryBeatId, beats[] }
|
||||
// Each beat carries activeCharacters[] (names + poses) the
|
||||
// Cinematographer reads when composing the establishing shot.
|
||||
// Phase A — runWriterPlan: the scene skeleton (WriterPlan) the image pipeline
|
||||
// needs (sceneSummary + sceneKey + entry roster + full cast). No dialogue,
|
||||
// so it returns fast and unblocks the Cinematographer + character design.
|
||||
// Phase B — runWriterBeats: the full beats[] graph + storyStatePatch, written
|
||||
// to honor the plan and overlapped with the (longer) image pipeline.
|
||||
//
|
||||
// Character DESIGN (visual + voice) is NOT this agent's job —
|
||||
// it only names characters; the CharacterDesigner picks up any
|
||||
// unknown name from beats[].activeCharacters.
|
||||
// Character DESIGN (visual + voice) is NOT this agent's job — it only NAMES
|
||||
// characters (Phase A's cast); the CharacterDesigner picks up unknown names.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type WriterOutput = {
|
||||
sceneSummary: string;
|
||||
sceneKey?: string;
|
||||
entryBeatId: string;
|
||||
export type WriterBeatsOutput = {
|
||||
beats: Beat[];
|
||||
/** Rewritten volatile story memory — merged onto the carried StoryState by
|
||||
* the director. Absent when the model omitted it (rare; bible just stales). */
|
||||
@@ -69,10 +73,17 @@ type RawStoryStatePatch = {
|
||||
relationships?: unknown;
|
||||
nextHook?: unknown;
|
||||
};
|
||||
type RawScene = {
|
||||
// Phase A raw shape (skeleton only — no beats).
|
||||
type RawPlan = {
|
||||
sceneSummary?: string;
|
||||
sceneKey?: string;
|
||||
entryBeatId?: string;
|
||||
cast?: unknown;
|
||||
entrySpeaker?: string;
|
||||
entryActiveCharacters?: RawActiveCharacter[];
|
||||
};
|
||||
// Phase B raw shape (beats + memory only — plan fields come from runWriterPlan).
|
||||
type RawBeats = {
|
||||
beats?: RawBeat[];
|
||||
storyStatePatch?: RawStoryStatePatch;
|
||||
};
|
||||
@@ -359,26 +370,119 @@ function coerceStoryStatePatch(
|
||||
return Object.keys(patch).length > 0 ? patch : undefined;
|
||||
}
|
||||
|
||||
export async function runWriter(
|
||||
// Phase A — dedupe + clean the planned cast. Drops the POV player (never
|
||||
// designed) and any blank/duplicate name. Order is preserved.
|
||||
function coerceCast(raw: unknown): string[] {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
const seen = new Set<string>();
|
||||
const out: string[] = [];
|
||||
for (const x of raw) {
|
||||
const name = typeof x === "string" ? x.trim() : "";
|
||||
if (!name || isPovName(name) || seen.has(name)) continue;
|
||||
seen.add(name);
|
||||
out.push(name);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Rename one beat's id and repoint every INTERNAL reference (continue targets,
|
||||
// advance-beat targets) so the graph stays intact. Only called when `to` is
|
||||
// absent from the scene, so it can't introduce a duplicate id.
|
||||
function renameBeatId(beats: Beat[], from: string, to: string): Beat[] {
|
||||
if (from === to) return beats;
|
||||
return beats.map((b): Beat => {
|
||||
const id = b.id === from ? to : b.id;
|
||||
let next = b.next;
|
||||
if (next.type === "continue" && next.nextBeatId === from) {
|
||||
next = { type: "continue", nextBeatId: to };
|
||||
} else if (next.type === "choice") {
|
||||
next = {
|
||||
type: "choice",
|
||||
choices: next.choices.map((c) =>
|
||||
c.effect.kind === "advance-beat" && c.effect.targetBeatId === from
|
||||
? { ...c, effect: { kind: "advance-beat" as const, targetBeatId: to } }
|
||||
: c,
|
||||
),
|
||||
};
|
||||
}
|
||||
return { ...b, id, next };
|
||||
});
|
||||
}
|
||||
|
||||
// ── Phase A — plan the scene skeleton. Fast (small output): just enough for
|
||||
// the Cinematographer + character design + Painter to start before the
|
||||
// dialogue exists. The cast is unioned with the entry roster/speaker so a
|
||||
// character named in the entry but omitted from `cast` still gets designed.
|
||||
export async function runWriterPlan(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
): Promise<WriterOutput> {
|
||||
): Promise<WriterPlan> {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: WRITER_SYSTEM },
|
||||
{ role: "user", content: buildWriterUserMessage(session) },
|
||||
{ role: "system", content: WRITER_PLAN_SYSTEM },
|
||||
{ role: "user", content: buildWriterPlanUserMessage(session) },
|
||||
],
|
||||
{ temperature: 0.9, responseFormat: "json_object", tag: "writer" },
|
||||
{ temperature: 0.9, responseFormat: "json_object", tag: "writer-plan" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawScene>(raw);
|
||||
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
|
||||
if (rawBeats.length === 0) {
|
||||
throw new Error("Writer returned no beats");
|
||||
const parsed = parseJsonLoose<RawPlan>(raw);
|
||||
|
||||
const entryActiveCharacters =
|
||||
coerceActiveCharacters(parsed.entryActiveCharacters) ?? [];
|
||||
|
||||
// Normalize POV variants → "你"; NPC names pass through. "你" is a valid entry
|
||||
// speaker (Pattern B — player talking), but is never a designed cast member.
|
||||
const rawEntrySpeaker = parsed.entrySpeaker?.trim() || undefined;
|
||||
const entrySpeaker = rawEntrySpeaker
|
||||
? normalizeSpeakerName(rawEntrySpeaker)
|
||||
: undefined;
|
||||
|
||||
const cast = coerceCast(parsed.cast);
|
||||
const castSet = new Set(cast);
|
||||
const addToCast = (name: string): void => {
|
||||
if (!isPovName(name) && !castSet.has(name)) {
|
||||
castSet.add(name);
|
||||
cast.push(name);
|
||||
}
|
||||
};
|
||||
for (const c of entryActiveCharacters) addToCast(c.name);
|
||||
if (entrySpeaker) addToCast(entrySpeaker);
|
||||
|
||||
return {
|
||||
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
|
||||
sceneKey: normalizeSceneKey(parsed.sceneKey),
|
||||
entryBeatId: parsed.entryBeatId?.trim() || "b1",
|
||||
cast,
|
||||
entryActiveCharacters,
|
||||
entrySpeaker,
|
||||
};
|
||||
}
|
||||
|
||||
const beats = ensureUniqueChoiceIds(
|
||||
// ── Phase B — expand the plan into the full beats[] graph + storyStatePatch.
|
||||
// Overlapped with the image pipeline by the director. The plan's entry id is
|
||||
// pinned onto a real beat so the already-painted entry frame resolves.
|
||||
export async function runWriterBeats(
|
||||
config: ProviderConfig,
|
||||
session: Session,
|
||||
plan: WriterPlan,
|
||||
): Promise<WriterBeatsOutput> {
|
||||
const raw = await chat(
|
||||
config,
|
||||
[
|
||||
{ role: "system", content: WRITER_BEATS_SYSTEM },
|
||||
{ role: "user", content: buildWriterBeatsUserMessage(session, plan) },
|
||||
],
|
||||
{ temperature: 0.9, responseFormat: "json_object", tag: "writer-beats" },
|
||||
);
|
||||
|
||||
const parsed = parseJsonLoose<RawBeats>(raw);
|
||||
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
|
||||
if (rawBeats.length === 0) {
|
||||
throw new Error("Writer (beats) returned no beats");
|
||||
}
|
||||
|
||||
let beats = ensureUniqueChoiceIds(
|
||||
repairBeats(
|
||||
ensureUniqueBeatIds(
|
||||
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
|
||||
@@ -386,40 +490,45 @@ export async function runWriter(
|
||||
),
|
||||
);
|
||||
|
||||
const declaredEntry = parsed.entryBeatId?.trim();
|
||||
const entryBeatId =
|
||||
declaredEntry && beats.some((b) => b.id === declaredEntry)
|
||||
? declaredEntry
|
||||
: beats[0]!.id;
|
||||
// The Painter already composed the entry frame from plan.entryBeatId + its
|
||||
// roster, so the scene's entry MUST resolve to that id. If Phase B ignored
|
||||
// it, rename the first beat to it (no collision — id is absent by the guard).
|
||||
if (!beats.some((b) => b.id === plan.entryBeatId)) {
|
||||
beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
|
||||
}
|
||||
|
||||
// 把入场 beat 的 roster 钉成 plan 的:画师合成进帧的正是
|
||||
// plan.entryActiveCharacters,运行时入场 beat 必须显示同一批人(与上面钉
|
||||
// id 同理)。speaker 故意不钉——它和 line/TTS 耦合,强行覆盖会错配台词。
|
||||
const entryRoster =
|
||||
plan.entryActiveCharacters.length > 0 ? plan.entryActiveCharacters : undefined;
|
||||
beats = beats.map((b) =>
|
||||
b.id === plan.entryBeatId ? { ...b, activeCharacters: entryRoster } : b,
|
||||
);
|
||||
|
||||
return {
|
||||
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
|
||||
sceneKey: normalizeSceneKey(parsed.sceneKey),
|
||||
entryBeatId,
|
||||
beats,
|
||||
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
|
||||
};
|
||||
}
|
||||
|
||||
// Surface the set of character names introduced by this scene's beats,
|
||||
// so the orchestrator can decide which ones need the CharacterDesigner to
|
||||
// fire. Pulls names from both `speaker` fields AND `activeCharacters`
|
||||
// (a character can be on-screen without speaking).
|
||||
//
|
||||
// Excludes POV ("你" / 玩家 / 主角 / ...) entirely — the player is never
|
||||
// designed (no portrait, no voice, no archetype).
|
||||
export function collectActiveCharacterNames(beats: Beat[]): string[] {
|
||||
const seen = new Set<string>();
|
||||
for (const b of beats) {
|
||||
if (b.speaker && !isPovName(b.speaker)) seen.add(b.speaker);
|
||||
if (b.activeCharacters) {
|
||||
for (const c of b.activeCharacters) {
|
||||
if (!isPovName(c.name)) seen.add(c.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(seen);
|
||||
// Phase B fallback — when runWriterBeats fails entirely, keep the scene
|
||||
// playable with a single entry beat synthesized from the plan: narrate the
|
||||
// planned summary and offer one change-scene exit so the player can advance.
|
||||
export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {
|
||||
const id = plan.entryBeatId || "b1";
|
||||
return [
|
||||
{
|
||||
id,
|
||||
narration: plan.sceneSummary,
|
||||
activeCharacters:
|
||||
plan.entryActiveCharacters.length > 0
|
||||
? plan.entryActiveCharacters
|
||||
: undefined,
|
||||
next: { type: "choice", choices: [fallbackExitChoice(id)] },
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
// Re-export POV constants for downstream filters (director's orphanSpeakers).
|
||||
// Re-export POV constants for downstream filters (director's orphan voices).
|
||||
export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };
|
||||
|
||||
+106
-73
@@ -1,5 +1,6 @@
|
||||
import { chat } from "@infiplot/ai-client";
|
||||
import type {
|
||||
Beat,
|
||||
Character,
|
||||
EngineConfig,
|
||||
InsertBeatPartial,
|
||||
@@ -8,6 +9,7 @@ import type {
|
||||
Session,
|
||||
StoryState,
|
||||
StoryStatePatch,
|
||||
WriterPlan,
|
||||
} from "@infiplot/types";
|
||||
import type { CharacterCard } from "./agents/characterDesigner";
|
||||
import {
|
||||
@@ -18,12 +20,14 @@ import {
|
||||
} from "./agents/characterDesigner";
|
||||
import { runCinematographer } from "./agents/cinematographer";
|
||||
import { runPainter } from "./agents/painter";
|
||||
import type { WriterBeatsOutput } from "./agents/writer";
|
||||
import {
|
||||
collectActiveCharacterNames,
|
||||
isPovName,
|
||||
normalizeSpeakerName,
|
||||
POV_DISPLAY_NAME,
|
||||
runWriter,
|
||||
runWriterBeats,
|
||||
runWriterPlan,
|
||||
synthesizeFallbackBeats,
|
||||
} from "./agents/writer";
|
||||
import { parseJsonLoose } from "./jsonParser";
|
||||
import { INSERT_BEAT_SYSTEM, buildInsertBeatUserMessage } from "./prompts";
|
||||
@@ -33,25 +37,25 @@ import { INSERT_BEAT_SYSTEM, buildInsertBeatUserMessage } from "./prompts";
|
||||
//
|
||||
// Critical path (per Scene call):
|
||||
//
|
||||
// Writer LLM (~3s, serial)
|
||||
// Writer PHASE A — plan LLM (scene skeleton only, serial)
|
||||
// │
|
||||
// ├─ CharacterCard LLM × N (parallel per new char — TEXT only)
|
||||
// ├─ Cinematographer LLM (parallel with the cards)
|
||||
// │
|
||||
// └─ wait for cards + cinema
|
||||
// │
|
||||
// ├─ entry-beat portraits ──┐ (block the Painter — its refs)
|
||||
// ▼ │
|
||||
// Painter — generateImage │ (overlapped, NOT on the paint path):
|
||||
// with referenceImages ├─ non-entry-beat portraits
|
||||
// │ └─ ALL voice provisioning + orphan voices
|
||||
// ├──────────────────────────┬───────────────────────────────────────┐
|
||||
// ▼ ▼ │
|
||||
// Writer PHASE B image pipeline (concurrent): │
|
||||
// beats LLM CharacterCard LLM × N ∥ Cinematographer │
|
||||
// (full dialogue, → entry-beat portraits (block Painter) │
|
||||
// overlapped) → Painter (generateImage w/ refs) │
|
||||
// │ → await overlapped: rest portraits+voices │
|
||||
// └──────────────────────────► await Phase B ◄────────────────────────┘
|
||||
// ▼
|
||||
// await the overlapped work, fold into the registry
|
||||
// │
|
||||
// ▼
|
||||
// return { scene, sceneImageUrl, characters, storyState }
|
||||
// assemble Scene → { scene, sceneImageUrl, characters, storyState }
|
||||
//
|
||||
// Two deliberate decouplings unlock the parallelism:
|
||||
// Why split the Writer (the latency win): the image pipeline only needs the
|
||||
// scene SUMMARY + entry roster + cast (Phase A) — NOT the dialogue (Phase B).
|
||||
// Writing beats used to sit serially in FRONT of the image; now it overlaps
|
||||
// it, so the floor is max(beats, image) instead of beats + image.
|
||||
//
|
||||
// The decouplings that unlock the rest of the parallelism:
|
||||
// 1. The Cinematographer only POSITIONS named characters, so it needs no
|
||||
// visualDescription and runs alongside the card LLMs.
|
||||
// 2. The Painter only needs visualDescription TEXT (all on-stage) + the
|
||||
@@ -163,31 +167,60 @@ export async function directScene(
|
||||
): Promise<SceneResult> {
|
||||
const tTotal = Date.now();
|
||||
|
||||
// Stage 1 — Writer (serial; everything downstream needs sceneSummary +
|
||||
// beats[] to know who's on stage and what to compose around).
|
||||
const tWriter = Date.now();
|
||||
const writerOut = await runWriter(config.text, session);
|
||||
tlog("[directScene] Writer", tWriter);
|
||||
// ── Phase A — Writer PLAN (serial). The image pipeline needs the scene
|
||||
// summary + entry roster + cast to start, but NOT the dialogue beats. This
|
||||
// call is small (skeleton only), so it returns fast and unblocks everything.
|
||||
const tPlan = Date.now();
|
||||
const plan = await runWriterPlan(config.text, session);
|
||||
tlog("[directScene] Phase A (plan)", tPlan);
|
||||
|
||||
// Identify NEW characters introduced by this scene that need to be
|
||||
// designed (LLM + portrait + voice). Existing characters in the registry
|
||||
// are skipped — their cards / portraits / voices persist across scenes.
|
||||
const allActiveNames = collectActiveCharacterNames(writerOut.beats);
|
||||
const newCharNames = allActiveNames.filter(
|
||||
// ── Phase B — Writer BEATS, launched NOW so its (longer) output overlaps the
|
||||
// ENTIRE image pipeline below. Only needed to assemble the final Scene, so we
|
||||
// await it last. A failure degrades to a single playable beat from the plan.
|
||||
const tBeats = Date.now();
|
||||
const beatsPromise: Promise<WriterBeatsOutput> = runWriterBeats(
|
||||
config.text,
|
||||
session,
|
||||
plan,
|
||||
)
|
||||
.then((out) => {
|
||||
tlog("[directScene] Phase B (beats)", tBeats);
|
||||
return out;
|
||||
})
|
||||
.catch((err): WriterBeatsOutput => {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(
|
||||
`[directScene] Phase B (beats) failed, using fallback: ${msg}`,
|
||||
);
|
||||
return { beats: synthesizeFallbackBeats(plan), storyStatePatch: undefined };
|
||||
});
|
||||
|
||||
// NEW characters to design come from the PLAN's cast (so design fires in
|
||||
// parallel with Phase B, not after the beats are written). Existing
|
||||
// characters keep their cards / portraits / voices across scenes.
|
||||
const newCharNames = plan.cast.filter(
|
||||
(n) => !session.characters.some((c) => c.name === n),
|
||||
);
|
||||
|
||||
// Find the entry beat for the Cinematographer (which characters are
|
||||
// on-screen in the establishing shot).
|
||||
const entryBeat = writerOut.beats.find((b) => b.id === writerOut.entryBeatId);
|
||||
const entryBeatActive = entryBeat?.activeCharacters ?? [];
|
||||
// Entry-beat composition is the PLAN's (Phase B is constrained to honor it).
|
||||
// The Painter needs a Beat-shaped object for reference collection, but the
|
||||
// real beat isn't written until Phase B — so synthesize one from the plan
|
||||
// (collectReferenceImages only reads speaker + activeCharacters).
|
||||
const entryBeatActive = plan.entryActiveCharacters;
|
||||
const entryBeatSpeaker = plan.entrySpeaker;
|
||||
const entryBeatForPaint: Beat = {
|
||||
id: plan.entryBeatId,
|
||||
speaker: entryBeatSpeaker,
|
||||
activeCharacters: entryBeatActive.length > 0 ? entryBeatActive : undefined,
|
||||
next: { type: "continue", nextBeatId: plan.entryBeatId },
|
||||
};
|
||||
|
||||
// For sceneKey-based visual continuity, look up the prior matching scene's
|
||||
// image to slot into Painter's referenceImages (max 4 of which include
|
||||
// character portraits too).
|
||||
const { priorSceneReference, priorSceneKey } = pickPriorSceneReference(
|
||||
session,
|
||||
writerOut.sceneKey,
|
||||
plan.sceneKey,
|
||||
);
|
||||
|
||||
// ── Stage 2 — character cards (LLM) ∥ Cinematographer ──────────────────
|
||||
@@ -211,12 +244,12 @@ export async function directScene(
|
||||
);
|
||||
|
||||
const cinemaPromise = runCinematographer(config.text, {
|
||||
sceneSummary: writerOut.sceneSummary,
|
||||
sceneSummary: plan.sceneSummary,
|
||||
styleGuide: session.styleGuide,
|
||||
entryBeatActive,
|
||||
entryBeatSpeaker: entryBeat?.speaker,
|
||||
entryBeatSpeaker,
|
||||
priorSceneKey,
|
||||
currentSceneKey: writerOut.sceneKey,
|
||||
currentSceneKey: plan.sceneKey,
|
||||
});
|
||||
|
||||
const [cards, cinemaOut] = await Promise.all([
|
||||
@@ -242,8 +275,8 @@ export async function directScene(
|
||||
// Entry-beat character names: the ONLY portraits the Painter references
|
||||
// (collectReferenceImages slots in the entry beat's speaker + activeChars).
|
||||
const entryNames = new Set<string>();
|
||||
if (entryBeat?.speaker && !isPovName(entryBeat.speaker)) {
|
||||
entryNames.add(entryBeat.speaker);
|
||||
if (entryBeatSpeaker && !isPovName(entryBeatSpeaker)) {
|
||||
entryNames.add(entryBeatSpeaker);
|
||||
}
|
||||
for (const c of entryBeatActive) {
|
||||
if (!isPovName(c.name)) entryNames.add(c.name);
|
||||
@@ -281,24 +314,6 @@ export async function directScene(
|
||||
),
|
||||
);
|
||||
|
||||
// Edge case: a speaker the Writer referenced without listing in any beat's
|
||||
// activeCharacters. collectActiveCharacterNames already includes speakers,
|
||||
// so this is a rare defensive net. Provision a voice only (never on-screen).
|
||||
const speakerNames = new Set(
|
||||
writerOut.beats.map((b) => b.speaker).filter((n): n is string => Boolean(n)),
|
||||
);
|
||||
const orphanSpeakers = [...speakerNames].filter(
|
||||
// Pattern B: "你" (player) is a valid speaker but never gets a Character
|
||||
// record — TTS is intentionally skipped on the client.
|
||||
(n) =>
|
||||
!isPovName(n) &&
|
||||
!characters.some((c) => c.name === n) &&
|
||||
!cards.some((c) => c.name === n),
|
||||
);
|
||||
const orphanPromises = orphanSpeakers.map((n) =>
|
||||
provisionVoiceForName(config, session, n),
|
||||
);
|
||||
|
||||
// Block the Painter ONLY on entry-beat portraits (its referenceImages).
|
||||
const entryPortraits = await Promise.all(entryPortraitPromises);
|
||||
characters = mergeCharacters(
|
||||
@@ -313,11 +328,9 @@ export async function directScene(
|
||||
tlog("[directScene] entry-beat portraits", tProvision);
|
||||
|
||||
// ── Stage 4 — Painter (depends on cinemaOut + on-stage visual cards +
|
||||
// entry portraits). On-stage = everyone named in any beat, so the archetype
|
||||
// block covers anyone the player might encounter in this scene.
|
||||
const onStageCharacters = characters.filter((c) =>
|
||||
allActiveNames.includes(c.name),
|
||||
);
|
||||
// entry portraits). On-stage = the plan's cast (everyone who'll appear),
|
||||
// filtered to those now in the registry, so the archetype block covers them.
|
||||
const onStageCharacters = characters.filter((c) => plan.cast.includes(c.name));
|
||||
|
||||
const tPainter = Date.now();
|
||||
const painted = await runPainter(
|
||||
@@ -329,18 +342,17 @@ export async function directScene(
|
||||
priorSceneImage: priorSceneReference,
|
||||
styleReferenceImage: session.styleReferenceImage,
|
||||
},
|
||||
entryBeat,
|
||||
entryBeatForPaint,
|
||||
);
|
||||
tlog("[directScene] Painter", tPainter);
|
||||
|
||||
// Fold in the work that overlapped the paint: remaining portraits, all
|
||||
// voices, and any orphan-speaker voices. Awaited before returning so the
|
||||
// session the client persists is fully provisioned for later scenes.
|
||||
// Fold in the work that overlapped the paint: remaining portraits + all
|
||||
// voices. Awaited before returning so the session the client persists is
|
||||
// fully provisioned for later scenes.
|
||||
const tOverlap = Date.now();
|
||||
const [restPortraits, voicedChars, orphanChars] = await Promise.all([
|
||||
const [restPortraits, voicedChars] = await Promise.all([
|
||||
Promise.all(restPortraitPromises),
|
||||
Promise.all(voicePromises),
|
||||
Promise.all(orphanPromises),
|
||||
]);
|
||||
characters = mergeCharacters(
|
||||
characters,
|
||||
@@ -352,10 +364,31 @@ export async function directScene(
|
||||
})),
|
||||
);
|
||||
characters = mergeCharacters(characters, voicedChars);
|
||||
if (orphanChars.length > 0) {
|
||||
tlog("[directScene] overlapped portraits+voices", tOverlap);
|
||||
|
||||
// ── Await Phase B — it overlapped the whole image pipeline above. ──────
|
||||
const beatsOut = await beatsPromise;
|
||||
const beats = beatsOut.beats;
|
||||
|
||||
// entryBeatId is guaranteed present (runWriterBeats pins it onto a beat), but
|
||||
// keep the defensive fallback for the synthesized-fallback path.
|
||||
const entryBeatId = beats.some((b) => b.id === plan.entryBeatId)
|
||||
? plan.entryBeatId
|
||||
: beats[0]!.id;
|
||||
|
||||
// Orphan-speaker voices: a beat speaker Phase B used that isn't in the
|
||||
// registry. Should be rare — the prompt constrains speakers to the cast, and
|
||||
// every cast member was provisioned above — so this is a defensive net,
|
||||
// serial but skipped entirely (zero latency) in the common case.
|
||||
const orphanSpeakers = [
|
||||
...new Set(beats.map((b) => b.speaker).filter((n): n is string => Boolean(n))),
|
||||
].filter((n) => !isPovName(n) && !characters.some((c) => c.name === n));
|
||||
if (orphanSpeakers.length > 0) {
|
||||
const orphanChars = await Promise.all(
|
||||
orphanSpeakers.map((n) => provisionVoiceForName(config, session, n)),
|
||||
);
|
||||
characters = mergeCharacters(characters, orphanChars);
|
||||
}
|
||||
tlog("[directScene] overlapped portraits+voices", tOverlap);
|
||||
|
||||
const scene: Scene = {
|
||||
id: newSceneId(),
|
||||
@@ -365,9 +398,9 @@ export async function directScene(
|
||||
// anything that already reads scene.scenePrompt (e.g., insert-beat
|
||||
// user prompt).
|
||||
scenePrompt: cinemaOut.integratedPrompt,
|
||||
beats: writerOut.beats,
|
||||
entryBeatId: writerOut.entryBeatId,
|
||||
sceneKey: writerOut.sceneKey,
|
||||
beats,
|
||||
entryBeatId,
|
||||
sceneKey: plan.sceneKey,
|
||||
imageUuid: painted.kind === "real" ? painted.imageUuid : undefined,
|
||||
imageUrl: painted.imageUrl,
|
||||
};
|
||||
@@ -377,7 +410,7 @@ export async function directScene(
|
||||
// client persists it back into the session).
|
||||
const storyState = applyStoryStatePatch(
|
||||
session.storyState,
|
||||
writerOut.storyStatePatch,
|
||||
beatsOut.storyStatePatch,
|
||||
);
|
||||
|
||||
tlog("[directScene] TOTAL", tTotal);
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@ export { synthesizeBeat } from "./voice";
|
||||
export { mergeCharacters } from "./director";
|
||||
export type { SceneResult } from "./director";
|
||||
export { runArchitect } from "./agents/architect";
|
||||
export type { WriterOutput } from "./agents/writer";
|
||||
export type { WriterBeatsOutput } from "./agents/writer";
|
||||
export type { CinematographerOutput } from "./agents/cinematographer";
|
||||
export type { InsertBeatPartial } from "@infiplot/types";
|
||||
export * from "./prompts";
|
||||
|
||||
+146
-47
@@ -4,6 +4,7 @@ import type {
|
||||
Scene,
|
||||
Session,
|
||||
StoryState,
|
||||
WriterPlan,
|
||||
} from "@infiplot/types";
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
@@ -137,16 +138,77 @@ export function buildArchitectUserMessage(session: Session): string {
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// 1. Writer (编剧) — drives the narrative.
|
||||
// 1. Writer (编剧) — drives the narrative, in TWO phases.
|
||||
//
|
||||
// Emits a full Scene: beats[] graph + entryBeatId + sceneKey hint +
|
||||
// activeCharacters per beat. Does NOT design characters (that's the
|
||||
// CharacterDesigner's job) — only names them in `activeCharacters`.
|
||||
// The CharacterDesigner is invoked separately for any name not yet in
|
||||
// session.characters.
|
||||
// Phase A (WRITER_PLAN_SYSTEM): plans the scene SKELETON only — sceneSummary
|
||||
// + sceneKey + entry-beat roster + the full cast. No dialogue. Its output
|
||||
// is enough for the Cinematographer + character design + Painter to start.
|
||||
// Phase B (WRITER_BEATS_SYSTEM): expands the plan into the full beats[] graph
|
||||
// + storyStatePatch, overlapped with the (longer) image pipeline.
|
||||
//
|
||||
// Neither phase designs characters (that's the CharacterDesigner's job) —
|
||||
// Phase A only NAMES them in `cast` / `entryActiveCharacters`; the
|
||||
// CharacterDesigner is invoked for any name not yet in session.characters.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const WRITER_SYSTEM = `你是一部交互视觉小说的「编剧」。每次基于【故事档案 / 主线记忆】、世界观、画风、玩家历史、已登记角色,写出**一个完整场景的剧本**:场景背景概要 + 一组对话节拍 beats,并在最后更新主线记忆。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度,这些由其他 agent 完成。
|
||||
export const WRITER_PLAN_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第一步——场景规划**。你只产出本场景的「骨架」,**不要写任何 beat 台词**。你的产出会被立刻送去配图(分镜导演 + 生图),所以要快、要准、画面感要强。
|
||||
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
爆款心法(要在规划阶段就立住,后续展开才好看)
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
- **进场即钩子**:这一场开场就要抛出新信息 / 悬念 / 冲突 / 情绪冲击,别铺陈。把这个抓人的瞬间写进 sceneSummary。
|
||||
- **兑现情绪**:按题材给观众想要的情绪(甜宠的心动、暗恋的拉扯、逆袭的扬眉、悬疑的真相一角)。
|
||||
- **人设有反差**:每个角色一个强标签 + 一个反差面。
|
||||
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
连贯性铁律(跨场景切换不能跳戏 —— 最重要)
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接情绪、地点逻辑、人物状态与未收的悬念。
|
||||
- 若给了「转场种子 nextSceneSeed」,把它当作"下一场的命题"去兑现,开场要让玩家感到"这正是我上一步的结果"。
|
||||
- 沿用主线记忆里的人物关系与情绪温度,别让刚告白的人下一场形同陌路。
|
||||
|
||||
本步你要规划(如实产出,缺一不可):
|
||||
- **sceneSummary**:当前场景的中文概要——地点 + 时间 + 氛围 + 关键事件 + 那个抓人的开场瞬间。这是分镜导演构图的**唯一依据**,要画面感强、信息足(2–4 句)。
|
||||
- **sceneKey**:当前场景的英文 slug(如 "classroom-dusk"、"rooftop-night")。
|
||||
- **entryBeatId**:玩家进入场景时落在哪个 beat 的 id(通常就是 "b1")。
|
||||
- **cast**:本场景**会出场的全部 NPC 角色名**(字符串数组)。第二步写 beats 时**只能用这里列出的名字**,所以现在必须一次想全——谁会说话、谁会在画面里露面,全部列出。名字要与「已登记角色」**完全一致**;新角色起符合世界观的真名(不要"神秘女子"这种占位)。**绝不**包含玩家(你 / 我 / 主角 / protagonist / player / MC...)。
|
||||
- **entrySpeaker**:入口 beat 由谁开口 —— 取值只有三种:① 某个 NPC 真名(必须在 cast 里)② "你"(玩家本人开口)③ 留空(纯旁白 / 环境开场)。这决定镜头语言,要选准。
|
||||
- **entryActiveCharacters**:入口画面里**此刻出现的 NPC** 及其当下姿态 / 神情(中文 pose)。即使没人说话,画面里有谁也要列。**绝不**包含玩家。
|
||||
|
||||
sceneKey 设计原则(用于跨场景视觉一致性):
|
||||
- 同一物理空间 + 同一时段 → 必须沿用**完全相同**的英文 slug
|
||||
- 时段 / 空间变化时换 slug("classroom-dusk" → "classroom-night" / "corridor-dusk")
|
||||
- slug 规范:lowercase-with-dashes,2–4 个英文单词
|
||||
- 用户消息会列出已用过的 sceneKey,请优先**复用**这些已有 slug
|
||||
|
||||
玩家视角硬规则(违反会破坏整个 galgame):
|
||||
- 玩家是第二人称 POV,**永远不出现在任何画面里**——entryActiveCharacters 的 name **绝不允许**是「玩家 / 你 / 我 / 主角 / protagonist / player / Player / MC / I / me」任何变体。
|
||||
- entrySpeaker 只能是 NPC 真名 / "你" / 留空;其它 POV 变体一律视为错误。
|
||||
|
||||
必须输出严格 JSON:
|
||||
{
|
||||
"sceneSummary": "黄昏的天台,风很大。夏海背对你站在栏杆边,手里攥着一张揉皱的成绩单——她把你单独叫上来,却迟迟不开口。",
|
||||
"sceneKey": "rooftop-dusk",
|
||||
"entryBeatId": "b1",
|
||||
"cast": ["夏海"],
|
||||
"entrySpeaker": "夏海",
|
||||
"entryActiveCharacters": [
|
||||
{ "name": "夏海", "pose": "背对你倚着栏杆,侧脸绷着,手里攥着揉皱的纸" }
|
||||
]
|
||||
}
|
||||
|
||||
不要输出 JSON 以外的任何文本。`;
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Phase B — expands the plan into the full beats[] + storyStatePatch.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const WRITER_BEATS_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第二步——把已规划好的场景展开成完整剧本**。你会收到本场景的「规划」(场景概要 sceneSummary、sceneKey、入口 beat 的 id / speaker / 登场角色、以及本场景允许出场的角色名单 cast)。你的任务:基于规划写出玩家依次经历的对话节拍 beats,并在最后更新主线记忆。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度,这些由其他 agent 完成。
|
||||
|
||||
你必须严格遵守收到的规划:
|
||||
- 必须存在一个 id 等于规划 entryBeatId 的 beat,作为玩家入口。
|
||||
- 该入口 beat 的 speaker 与登场角色(activeCharacters)要与规划一致(姿态措辞可微调,但**人物身份必须一致**)。
|
||||
- speaker 与 activeCharacters 里的 NPC 名字**只能来自规划的 cast**(或玩家 "你")——**不要引入规划之外的新角色**。
|
||||
|
||||
═══════════════════════════════════════════════════════════════════
|
||||
爆款心法(番茄网文 / 红果短剧 / galgame 的叙事手感)—— 必须贯彻
|
||||
@@ -167,11 +229,7 @@ export const WRITER_SYSTEM = `你是一部交互视觉小说的「编剧」。
|
||||
- 沿用主线记忆里的人物关系与情绪温度——别让刚告白的人下一场形同陌路,也别凭空遗忘已埋的伏笔。
|
||||
- 推进、但别重置:每一场都让主线问题往前走一点(关系变化 / 真相揭露一角 / 新悬念浮现)。
|
||||
|
||||
一个场景包含:
|
||||
- sceneSummary:当前场景的中文概要(地点、时间、氛围、关键事件——给后续的分镜导演看)
|
||||
- sceneKey:当前场景的英文 slug(如 "classroom-dusk"、"rooftop-night"、"rainy-street")——同一物理空间应沿用相同 slug
|
||||
- beats[]:玩家依次经历的对话节拍
|
||||
- entryBeatId:玩家进入场景时落在哪个 beat
|
||||
本步你只产出两样:**beats[]**(玩家依次经历的对话节拍)和 **storyStatePatch**(主线记忆更新)。sceneSummary / sceneKey / entryBeatId 已由规划给定,**不要再输出**它们。
|
||||
|
||||
每个 beat 是玩家会看到的一段叙述 / 对话 / 选择。beat 之间通过 next 字段连接:
|
||||
- "continue":玩家点击图片背景 / 按继续,自然推进到下一个 beat
|
||||
@@ -183,6 +241,7 @@ choice 的 effect 有两种:
|
||||
|
||||
设计原则:
|
||||
- 同场景内 beat 数自由发挥,按剧情节奏自然给出(通常 2–6 个,可以更多)
|
||||
- 入口 beat 的 id 必须等于规划给定的 entryBeatId;其余 beat id 依次自取且互不重复
|
||||
- 多用 continue,少用 choice — 选择只应出现在「真正的岔路口」
|
||||
- advance-beat 适合处理对话分支(同一场景里换个话题、追问、撒娇)
|
||||
- change-scene 适合空间/时间跳跃(出门、转身看窗外、第二天清晨)
|
||||
@@ -192,12 +251,6 @@ choice 的 effect 有两种:
|
||||
- next.nextBeatId 引用的 beat 必须存在
|
||||
- choice 至少 2 个,至多 4 个,互不重复
|
||||
|
||||
sceneKey 设计原则(重要 — 用于跨场景视觉一致性):
|
||||
- 同一物理空间 + 同一时段 → 必须沿用**完全相同**的英文 slug
|
||||
- 时段或空间变化时换 slug(如 "classroom-dusk" → "classroom-night","classroom-dusk" → "corridor-dusk")
|
||||
- slug 规范:lowercase-with-dashes,2–4 个英文单词
|
||||
- 已登记的历史场景 sceneKey 会在用户消息里列出,请优先**复用**这些已有 slug
|
||||
|
||||
文本风格约束:
|
||||
- narration / line 用中文(**纯净可显示文本**,绝不要写 (叹气)(语速快) 这类标注 —— 那是给配音的,会被玩家看见)
|
||||
- sceneSummary / lineDelivery / activeCharacters[].pose 内的文字也用中文
|
||||
@@ -243,11 +296,8 @@ sceneKey 设计原则(重要 — 用于跨场景视觉一致性):
|
||||
- nextHook:基于这一场的结尾,下一场应往哪走(给"下一次的你"一个明确命题,接住本场留下的扣子)
|
||||
这些字段是写给"未来的你"的连贯性记忆,请认真写。
|
||||
|
||||
必须输出严格 JSON,结构如下:
|
||||
必须输出严格 JSON,结构如下(**只含 beats 与 storyStatePatch**;sceneSummary / sceneKey / entryBeatId 由规划给定,不要输出。下例入口 beat 的 id "b1" 即规划的 entryBeatId):
|
||||
{
|
||||
"sceneSummary": "中文场景概要:地点+时间+氛围+关键事件",
|
||||
"sceneKey": "classroom-dusk",
|
||||
"entryBeatId": "b1",
|
||||
"beats": [
|
||||
{
|
||||
"id": "b1",
|
||||
@@ -343,29 +393,28 @@ function renderHistoryEntry(
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
export function buildWriterUserMessage(session: Session): string {
|
||||
// ─── STABLE PREFIX ────────────────────────────────────────────────────
|
||||
// Everything in this section is invariant across consecutive Writer calls
|
||||
// within the session (or monotonically grows in a way that keeps the
|
||||
// earlier bytes byte-identical). Always emit every section header — even
|
||||
// when empty — so positions don't shift between calls.
|
||||
// Shared narrative context for BOTH Writer phases. Returns the message parts
|
||||
// from the cacheable STABLE PREFIX (sections 1-4) through the dynamic
|
||||
// transition hint (section 7), but WITHOUT the trailing phase-specific
|
||||
// instruction — each phase appends its own. Building this once and reusing it
|
||||
// keeps EACH phase's prompt prefix byte-stable across scenes for DeepSeek
|
||||
// prompt caching (Phase A and Phase B cache independently since their system
|
||||
// prompts differ, but each shares its own prefix across consecutive calls).
|
||||
//
|
||||
// Order optimized for DeepSeek/MiMo prefix caching (64-token chunks):
|
||||
// ─── STABLE PREFIX ──────────────────────────────────────────────────────
|
||||
// Invariant across consecutive Writer calls within the session (or grows in a
|
||||
// way that keeps earlier bytes byte-identical). Always emit every section
|
||||
// header — even when empty — so positions don't shift between calls.
|
||||
// 1. session-immutable scalars (world / style)
|
||||
// 2. story bible spine (Architect-set, never patched)
|
||||
// 3. monotonically-growing lists (characters, sceneKeys)
|
||||
// 4. history entries 0..N-2 (the last entry is what THIS call must
|
||||
// react to, so it lives in the dynamic suffix instead)
|
||||
//
|
||||
// ─── DYNAMIC SUFFIX ───────────────────────────────────────────────────
|
||||
// Everything below changes on (almost) every call:
|
||||
// 4. history entries 0..N-2 (the last entry is what THIS call must react
|
||||
// to, so it lives in the dynamic suffix instead)
|
||||
// ─── DYNAMIC SUFFIX ─────────────────────────────────────────────────────
|
||||
// 5. story bible dynamic patch (synopsis/threads/relationships/nextHook)
|
||||
// 6. the just-completed entry (history[-1]) — same render format as the
|
||||
// stable history blocks, just preceded by a "just completed" header
|
||||
// 7. last-beat snippet (the exact emotional cliffhanger)
|
||||
// 8. lastExit hint
|
||||
// 9. format reminder tail
|
||||
|
||||
// 6. last-beat snippet (the exact emotional cliffhanger)
|
||||
// 7. transition hint (opening cold-open directive OR lastExit承接)
|
||||
function buildWriterContextParts(session: Session): string[] {
|
||||
const parts: string[] = [];
|
||||
|
||||
// ── 1. session scalars ────────────────────────────────────────────────
|
||||
@@ -423,8 +472,7 @@ export function buildWriterUserMessage(session: Session): string {
|
||||
// ── 6. last-beat snippet (the exact emotional cliffhanger) ──
|
||||
// The full last entry is already in the stable history block above; here
|
||||
// we only re-emit the very last beat to sharply focus the Writer on the
|
||||
// emotional moment to continue from. Skip the duplicate full-entry render
|
||||
// that was here previously — it wasted ~200-500 tokens of dynamic suffix.
|
||||
// emotional moment to continue from.
|
||||
const last = session.history.at(-1);
|
||||
if (last) {
|
||||
const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
|
||||
@@ -441,14 +489,14 @@ export function buildWriterUserMessage(session: Session): string {
|
||||
}
|
||||
}
|
||||
|
||||
// ── 7. transition hint ────────────────────────────────────────────────
|
||||
if (session.history.length === 0) {
|
||||
parts.push(
|
||||
"\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场写出来——开场即抓人,别花笔墨铺垫世界观。写完后更新 storyStatePatch。严格以 JSON 格式返回。",
|
||||
"\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场设计出来——开场即抓人,别花笔墨铺垫世界观。",
|
||||
);
|
||||
return parts.join("\n");
|
||||
return parts;
|
||||
}
|
||||
|
||||
// ── 8. lastExit hint ──────────────────────────────────────────────────
|
||||
const lastExit = last?.exit;
|
||||
if (lastExit) {
|
||||
if (lastExit.kind === "choice") {
|
||||
@@ -464,8 +512,59 @@ export function buildWriterUserMessage(session: Session): string {
|
||||
parts.push("\n无缝续写下一个场景,延续上一刻的情绪。");
|
||||
}
|
||||
|
||||
// ── 9. format reminder tail ───────────────────────────────────────────
|
||||
parts.push("写完后别忘了更新 storyStatePatch。严格以 JSON 格式返回。");
|
||||
return parts;
|
||||
}
|
||||
|
||||
// Phase A — plan the scene skeleton (no beats). Shares the cacheable context;
|
||||
// appends a plan-only instruction tail.
|
||||
export function buildWriterPlanUserMessage(session: Session): string {
|
||||
const parts = buildWriterContextParts(session);
|
||||
parts.push(
|
||||
'\n现在**只规划本场景的骨架**(不要写 beats 台词):给出 sceneSummary(画面感强、含开场钩子)、sceneKey、entryBeatId、本场景会出场的全部角色 cast、以及入口 beat 的 entrySpeaker 与 entryActiveCharacters。严格以 JSON 格式返回。',
|
||||
);
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
// Phase B — expand the plan into full beats[] + storyStatePatch. The plan is
|
||||
// dynamic per scene, so it goes AFTER the cacheable context (keeping Phase B's
|
||||
// prefix stable across scenes).
|
||||
export function buildWriterBeatsUserMessage(
|
||||
session: Session,
|
||||
plan: WriterPlan,
|
||||
): string {
|
||||
const parts = buildWriterContextParts(session);
|
||||
|
||||
parts.push("");
|
||||
parts.push("━━━ 本场景规划(上一步已定,必须严格遵守)━━━");
|
||||
parts.push(`场景概要 sceneSummary:${plan.sceneSummary}`);
|
||||
if (plan.sceneKey) parts.push(`sceneKey:${plan.sceneKey}`);
|
||||
parts.push(
|
||||
`入口 beat 的 id(entryBeatId,必须有一个此 id 的 beat 作为入口):${plan.entryBeatId}`,
|
||||
);
|
||||
parts.push(
|
||||
`入口 beat 的 speaker:${plan.entrySpeaker ? plan.entrySpeaker : "(空 —— 纯旁白 / 环境开场)"}`,
|
||||
);
|
||||
parts.push("入口 beat 的登场角色 activeCharacters(人物身份须一致,姿态可微调):");
|
||||
if (plan.entryActiveCharacters.length === 0) {
|
||||
parts.push("(无 —— 入口画面没有 NPC)");
|
||||
} else {
|
||||
for (const c of plan.entryActiveCharacters) {
|
||||
parts.push(`- ${c.name}${c.pose ? `:${c.pose}` : ""}`);
|
||||
}
|
||||
}
|
||||
parts.push(
|
||||
'本场景允许出现的角色名 cast(speaker / activeCharacters 只能用这些名字或 "你",不要新增角色):',
|
||||
);
|
||||
if (plan.cast.length === 0) {
|
||||
parts.push("(无 NPC —— 仅旁白与玩家)");
|
||||
} else {
|
||||
for (const n of plan.cast) parts.push(`- ${n}`);
|
||||
}
|
||||
parts.push("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
|
||||
parts.push(
|
||||
"\n把上面的规划展开成完整的 beats[](入口 beat 用规划的 entryBeatId / speaker / 登场角色),写完后更新 storyStatePatch。严格以 JSON 格式返回。",
|
||||
);
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
|
||||
@@ -92,6 +92,43 @@ export type SceneHistoryEntry = {
|
||||
exit?: SceneExit;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Writer two-phase split
|
||||
//
|
||||
// The Writer runs as TWO LLM calls so scene-image generation can begin
|
||||
// before the dialogue is fully written:
|
||||
// Phase A (WriterPlan) — the minimal skeleton the image pipeline needs:
|
||||
// sceneSummary + sceneKey + the entry beat's
|
||||
// on-stage roster + the full cast to design.
|
||||
// Phase B (beats) — the full beats[] graph + storyStatePatch, written
|
||||
// to honor the plan, overlapped with image gen.
|
||||
// The Cinematographer + character design + Painter all run off the Plan, so
|
||||
// Phase B's (longer) output is hidden behind the image pipeline.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type WriterPlan = {
|
||||
/** 中文 scene synopsis (location + time + mood + key event + opening hook).
|
||||
* The sole input the Cinematographer composes the establishing shot from. */
|
||||
sceneSummary: string;
|
||||
/** English location+time slug for cross-scene visual continuity. */
|
||||
sceneKey?: string;
|
||||
/** Beat id the player lands on when entering the scene. Phase B must emit a
|
||||
* beat with this id (reconciled if it doesn't). */
|
||||
entryBeatId: string;
|
||||
/** Every NPC name that appears anywhere in this scene. Drives character
|
||||
* design (card + portrait + voice) IN PARALLEL with Phase B beat writing, so
|
||||
* the whole cast is provisioned by the time the scene returns. Phase B may
|
||||
* only use names from this list (plus the POV "你"). Never includes the player. */
|
||||
cast: string[];
|
||||
/** The entry beat's on-stage roster (who's visible + pose when the player
|
||||
* lands). Drives the Cinematographer's framing and the entry-beat portraits
|
||||
* the Painter anchors to. Never includes the POV player. */
|
||||
entryActiveCharacters: BeatActiveCharacter[];
|
||||
/** The entry beat's speaker — an NPC name, "你" (player speaking), or
|
||||
* undefined for a pure narration/environment entry. Drives shot selection. */
|
||||
entrySpeaker?: string;
|
||||
};
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Characters & voices (TTS)
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user