perf(engine): split Writer into Phase A (plan) + Phase B (beats)

The Writer was the serial long pole: a single LLM call wrote the scene
skeleton AND the full beats[] graph before anything downstream could
start, so variable-length beat generation blew up tail latency.

Split it into two calls:
- Phase A (runWriterPlan): minimal skeleton the image pipeline needs
  (sceneSummary, sceneKey, entryBeatId, cast, entry roster, entry speaker).
  Serial, on the critical path, kept lightweight.
- Phase B (runWriterBeats): full beats[] + storyStatePatch, written to
  honor the plan. Launched immediately, overlaps the ENTIRE image pipeline
  (cards / cinematographer / portraits / painter), awaited last.

Critical path becomes PhaseA + max(imagePipeline, PhaseB), so the long
beat-writing is hidden behind image gen. A Phase B failure degrades to a
single playable beat synthesized from the plan.

Paired distinct-payload A/B (6 content-matched stories, baseline vs split):
- median end-to-end 42.6s -> 32.2s (-24%)
- mean 46.4s -> 33.1s (-29%)
- worst case 74.7s -> 37.6s (halved)
- no content regression: total Writer output tokens 12858 -> 13699

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
yuanzonghao
2026-06-04 11:17:34 +08:00
parent 9f4dcc097b
commit 3bf5c92841
5 changed files with 443 additions and 174 deletions
+148 -48
View File
@@ -8,26 +8,30 @@ import type {
ProviderConfig,
Session,
StoryStatePatch,
WriterPlan,
} from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import { WRITER_SYSTEM, buildWriterUserMessage } from "../prompts";
import {
WRITER_BEATS_SYSTEM,
WRITER_PLAN_SYSTEM,
buildWriterBeatsUserMessage,
buildWriterPlanUserMessage,
} from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Writer agent — owns the narrative half of scene generation.
// Writer agent — owns the narrative half of scene generation, in TWO phases.
//
// Output: { sceneSummary, sceneKey, entryBeatId, beats[] }
// Each beat carries activeCharacters[] (names + poses) the
// Cinematographer reads when composing the establishing shot.
// Phase A — runWriterPlan: the scene skeleton (WriterPlan) the image pipeline
// needs (sceneSummary + sceneKey + entry roster + full cast). No dialogue,
// so it returns fast and unblocks the Cinematographer + character design.
// Phase B — runWriterBeats: the full beats[] graph + storyStatePatch, written
// to honor the plan and overlapped with the (longer) image pipeline.
//
// Character DESIGN (visual + voice) is NOT this agent's job —
// it only names characters; the CharacterDesigner picks up any
// unknown name from beats[].activeCharacters.
// Character DESIGN (visual + voice) is NOT this agent's job — it only NAMES
// characters (Phase A's cast); the CharacterDesigner picks up unknown names.
// ──────────────────────────────────────────────────────────────────────
export type WriterOutput = {
sceneSummary: string;
sceneKey?: string;
entryBeatId: string;
export type WriterBeatsOutput = {
beats: Beat[];
/** Rewritten volatile story memory — merged onto the carried StoryState by
* the director. Absent when the model omitted it (rare; bible just stales). */
@@ -69,10 +73,17 @@ type RawStoryStatePatch = {
relationships?: unknown;
nextHook?: unknown;
};
type RawScene = {
// Phase A raw shape (skeleton only — no beats).
type RawPlan = {
sceneSummary?: string;
sceneKey?: string;
entryBeatId?: string;
cast?: unknown;
entrySpeaker?: string;
entryActiveCharacters?: RawActiveCharacter[];
};
// Phase B raw shape (beats + memory only — plan fields come from runWriterPlan).
type RawBeats = {
beats?: RawBeat[];
storyStatePatch?: RawStoryStatePatch;
};
@@ -359,26 +370,119 @@ function coerceStoryStatePatch(
return Object.keys(patch).length > 0 ? patch : undefined;
}
export async function runWriter(
// Phase A — dedupe + clean the planned cast. Drops the POV player (never
// designed) and any blank/duplicate name. Order is preserved.
function coerceCast(raw: unknown): string[] {
if (!Array.isArray(raw)) return [];
const seen = new Set<string>();
const out: string[] = [];
for (const x of raw) {
const name = typeof x === "string" ? x.trim() : "";
if (!name || isPovName(name) || seen.has(name)) continue;
seen.add(name);
out.push(name);
}
return out;
}
// Rename one beat's id and repoint every INTERNAL reference (continue targets,
// advance-beat targets) so the graph stays intact. Only called when `to` is
// absent from the scene, so it can't introduce a duplicate id.
function renameBeatId(beats: Beat[], from: string, to: string): Beat[] {
if (from === to) return beats;
return beats.map((b): Beat => {
const id = b.id === from ? to : b.id;
let next = b.next;
if (next.type === "continue" && next.nextBeatId === from) {
next = { type: "continue", nextBeatId: to };
} else if (next.type === "choice") {
next = {
type: "choice",
choices: next.choices.map((c) =>
c.effect.kind === "advance-beat" && c.effect.targetBeatId === from
? { ...c, effect: { kind: "advance-beat" as const, targetBeatId: to } }
: c,
),
};
}
return { ...b, id, next };
});
}
// ── Phase A — plan the scene skeleton. Fast (small output): just enough for
// the Cinematographer + character design + Painter to start before the
// dialogue exists. The cast is unioned with the entry roster/speaker so a
// character named in the entry but omitted from `cast` still gets designed.
export async function runWriterPlan(
config: ProviderConfig,
session: Session,
): Promise<WriterOutput> {
): Promise<WriterPlan> {
const raw = await chat(
config,
[
{ role: "system", content: WRITER_SYSTEM },
{ role: "user", content: buildWriterUserMessage(session) },
{ role: "system", content: WRITER_PLAN_SYSTEM },
{ role: "user", content: buildWriterPlanUserMessage(session) },
],
{ temperature: 0.9, responseFormat: "json_object", tag: "writer" },
{ temperature: 0.9, responseFormat: "json_object", tag: "writer-plan" },
);
const parsed = parseJsonLoose<RawScene>(raw);
const parsed = parseJsonLoose<RawPlan>(raw);
const entryActiveCharacters =
coerceActiveCharacters(parsed.entryActiveCharacters) ?? [];
// Normalize POV variants → "你"; NPC names pass through. "你" is a valid entry
// speaker (Pattern B — player talking), but is never a designed cast member.
const rawEntrySpeaker = parsed.entrySpeaker?.trim() || undefined;
const entrySpeaker = rawEntrySpeaker
? normalizeSpeakerName(rawEntrySpeaker)
: undefined;
const cast = coerceCast(parsed.cast);
const castSet = new Set(cast);
const addToCast = (name: string): void => {
if (!isPovName(name) && !castSet.has(name)) {
castSet.add(name);
cast.push(name);
}
};
for (const c of entryActiveCharacters) addToCast(c.name);
if (entrySpeaker) addToCast(entrySpeaker);
return {
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
sceneKey: normalizeSceneKey(parsed.sceneKey),
entryBeatId: parsed.entryBeatId?.trim() || "b1",
cast,
entryActiveCharacters,
entrySpeaker,
};
}
// ── Phase B — expand the plan into the full beats[] graph + storyStatePatch.
// Overlapped with the image pipeline by the director. The plan's entry id is
// pinned onto a real beat so the already-painted entry frame resolves.
export async function runWriterBeats(
config: ProviderConfig,
session: Session,
plan: WriterPlan,
): Promise<WriterBeatsOutput> {
const raw = await chat(
config,
[
{ role: "system", content: WRITER_BEATS_SYSTEM },
{ role: "user", content: buildWriterBeatsUserMessage(session, plan) },
],
{ temperature: 0.9, responseFormat: "json_object", tag: "writer-beats" },
);
const parsed = parseJsonLoose<RawBeats>(raw);
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
if (rawBeats.length === 0) {
throw new Error("Writer returned no beats");
throw new Error("Writer (beats) returned no beats");
}
const beats = ensureUniqueChoiceIds(
let beats = ensureUniqueChoiceIds(
repairBeats(
ensureUniqueBeatIds(
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
@@ -386,40 +490,36 @@ export async function runWriter(
),
);
const declaredEntry = parsed.entryBeatId?.trim();
const entryBeatId =
declaredEntry && beats.some((b) => b.id === declaredEntry)
? declaredEntry
: beats[0]!.id;
// The Painter already composed the entry frame from plan.entryBeatId + its
// roster, so the scene's entry MUST resolve to that id. If Phase B ignored
// it, rename the first beat to it (no collision — id is absent by the guard).
if (!beats.some((b) => b.id === plan.entryBeatId)) {
beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
}
return {
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
sceneKey: normalizeSceneKey(parsed.sceneKey),
entryBeatId,
beats,
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
};
}
// Surface the set of character names introduced by this scene's beats,
// so the orchestrator can decide which ones need the CharacterDesigner to
// fire. Pulls names from both `speaker` fields AND `activeCharacters`
// (a character can be on-screen without speaking).
//
// Excludes POV ("你" / 玩家 / 主角 / ...) entirely — the player is never
// designed (no portrait, no voice, no archetype).
export function collectActiveCharacterNames(beats: Beat[]): string[] {
const seen = new Set<string>();
for (const b of beats) {
if (b.speaker && !isPovName(b.speaker)) seen.add(b.speaker);
if (b.activeCharacters) {
for (const c of b.activeCharacters) {
if (!isPovName(c.name)) seen.add(c.name);
}
}
}
return Array.from(seen);
// Phase B fallback — when runWriterBeats fails entirely, keep the scene
// playable with a single entry beat synthesized from the plan: narrate the
// planned summary and offer one change-scene exit so the player can advance.
export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {
const id = plan.entryBeatId || "b1";
return [
{
id,
narration: plan.sceneSummary,
activeCharacters:
plan.entryActiveCharacters.length > 0
? plan.entryActiveCharacters
: undefined,
next: { type: "choice", choices: [fallbackExitChoice(id)] },
},
];
}
// Re-export POV constants for downstream filters (director's orphanSpeakers).
// Re-export POV constants for downstream filters (director's orphan voices).
export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };