diff --git a/lib/engine/prompts.ts b/lib/engine/prompts.ts index ef4e358..42bbfd9 100644 --- a/lib/engine/prompts.ts +++ b/lib/engine/prompts.ts @@ -28,22 +28,55 @@ import type { // the bible looks identical to every agent that consumes it. // ────────────────────────────────────────────────────────────────────── +// ── Story bible — split spine / dynamic for prefix-cache friendliness ── +// +// SPINE = Architect-set, never updated by Writer's storyStatePatch: +// logline / genreTags / protagonist / castNotes +// → goes in the STABLE PREFIX of every Writer user message +// +// DYNAMIC = patched every scene by the Writer: +// synopsis / relationships / openThreads / nextHook +// → goes in the DYNAMIC SUFFIX +// +// Keep both sections present even when empty (固定 section) so position is +// stable across calls — a missing section here would shift every byte after +// it and torch the cache. + +export function renderStoryStateSpine(s: StoryState | undefined): string { + const lines: string[] = ["【故事档案 · 主轴(不变)】"]; + lines.push(`主线(中心钩子):${s?.logline ?? "(未设定)"}`); + lines.push(`题材基调:${s?.genreTags ?? "(未设定)"}`); + lines.push(`主角「你」:${s?.protagonist ?? "(未设定)"}`); + lines.push(`核心配角:${s?.castNotes ?? "(未设定)"}`); + return lines.join("\n"); +} + +export function renderStoryStateDynamic(s: StoryState | undefined): string { + const lines: string[] = ["【故事档案 · 当前状态(每幕更新)】"]; + lines.push(`已发生(梗概):${s?.synopsis ?? "(暂无)"}`); + lines.push( + `当前关系/情绪:${ + s?.relationships?.length + ? "\n" + s.relationships.map((r) => `- ${r}`).join("\n") + : "(暂无)" + }`, + ); + lines.push( + `未收的悬念/伏笔:${ + s?.openThreads?.length + ? "\n" + s.openThreads.map((t) => `- ${t}`).join("\n") + : "(暂无)" + }`, + ); + lines.push(`接下来要往哪走(下一个钩子方向):${s?.nextHook ?? "(暂无)"}`); + return lines.join("\n"); +} + +// Back-compat for the Architect's own user message (it sees the full bible +// at session start, no caching concern there yet). export function renderStoryState(s: StoryState | undefined): string { if (!s) return ""; - const lines: string[] = ["【故事档案 / 主线记忆】"]; - if (s.logline) lines.push(`主线(中心钩子):${s.logline}`); - if (s.genreTags) lines.push(`题材基调:${s.genreTags}`); - if (s.protagonist) lines.push(`主角「你」:${s.protagonist}`); - if (s.castNotes) lines.push(`核心配角:\n${s.castNotes}`); - if (s.synopsis) lines.push(`已发生(梗概):${s.synopsis}`); - if (s.relationships?.length) { - lines.push(`当前关系/情绪:\n${s.relationships.map((r) => `- ${r}`).join("\n")}`); - } - if (s.openThreads?.length) { - lines.push(`未收的悬念/伏笔:\n${s.openThreads.map((t) => `- ${t}`).join("\n")}`); - } - if (s.nextHook) lines.push(`接下来要往哪走(下一个钩子方向):${s.nextHook}`); - return lines.join("\n"); + return renderStoryStateSpine(s) + "\n\n" + renderStoryStateDynamic(s); } // ────────────────────────────────────────────────────────────────────── @@ -272,74 +305,127 @@ sceneKey 设计原则(重要 — 用于跨场景视觉一致性): 不要输出 JSON 以外的任何文本。`; +// Render one history entry as a stable, position-independent block. Used by +// the Writer to dump both "completed past" (stable prefix) and "the entry the +// player just finished" (dynamic suffix) — same format, so the model sees a +// uniform history surface. +function renderHistoryEntry( + entry: Session["history"][number], + index: number, +): string { + const lines: string[] = [`【场景 ${index}】`]; + if (entry.scene.sceneKey) lines.push(` sceneKey: ${entry.scene.sceneKey}`); + + const visited = entry.visitedBeatIds.length + ? entry.visitedBeatIds + : [entry.scene.entryBeatId]; + const beatById = new Map(entry.scene.beats.map((b) => [b.id, b])); + const visitedBeats = visited + .map((id) => beatById.get(id)) + .filter((b): b is NonNullable => Boolean(b)); + + for (const b of visitedBeats) { + const fragments: string[] = []; + if (b.narration) fragments.push(`旁白:${b.narration}`); + if (b.line) fragments.push(`${b.speaker ?? "?"}:${b.line}`); + if (fragments.length) lines.push(" " + fragments.join(" / ")); + } + + if (entry.exit) { + if (entry.exit.kind === "choice") { + lines.push( + ` 玩家最终选择:${entry.exit.label}(去往:${entry.exit.nextSceneSeed})`, + ); + } else { + lines.push(` 玩家自由动作:${entry.exit.action}`); + } + } + return lines.join("\n"); +} + export function buildWriterUserMessage(session: Session): string { + // ─── STABLE PREFIX ──────────────────────────────────────────────────── + // Everything in this section is invariant across consecutive Writer calls + // within the session (or monotonically grows in a way that keeps the + // earlier bytes byte-identical). Always emit every section header — even + // when empty — so positions don't shift between calls. + // + // Order optimized for DeepSeek/MiMo prefix caching (64-token chunks): + // 1. session-immutable scalars (world / style) + // 2. story bible spine (Architect-set, never patched) + // 3. monotonically-growing lists (characters, sceneKeys) + // 4. history entries 0..N-2 (the last entry is what THIS call must + // react to, so it lives in the dynamic suffix instead) + // + // ─── DYNAMIC SUFFIX ─────────────────────────────────────────────────── + // Everything below changes on (almost) every call: + // 5. story bible dynamic patch (synopsis/threads/relationships/nextHook) + // 6. the just-completed entry (history[-1]) — same render format as the + // stable history blocks, just preceded by a "just completed" header + // 7. last-beat snippet (the exact emotional cliffhanger) + // 8. lastExit hint + // 9. format reminder tail + const parts: string[] = []; - const bible = renderStoryState(session.storyState); - if (bible) { - parts.push(bible); - parts.push(""); - } - + // ── 1. session scalars ──────────────────────────────────────────────── parts.push(`世界观:${session.worldSetting}`); parts.push(`画风:${session.styleGuide}`); + parts.push(""); - if (session.characters.length > 0) { - parts.push("\n已登记角色(speaker 必须用这些名字之一,或本场景新引入):"); - for (const c of session.characters) { - parts.push(`- ${c.name}`); - } - } + // ── 2. story bible — spine only (stable) ────────────────────────────── + parts.push(renderStoryStateSpine(session.storyState)); + parts.push(""); - const priorKeys = collectPriorSceneKeys(session); - if (priorKeys.length > 0) { - parts.push("\n已使用的 sceneKey(同一物理空间请沿用,不要新造):"); - for (const k of priorKeys) parts.push(`- ${k}`); - } + // ── 3a. registered characters ───────────────────────────────────────── + // SENTINEL pattern: header + a constant "after this line, entries follow" + // marker, then the entries themselves. The marker is byte-identical even + // when the list is empty, so adding a character only ever APPENDS bytes + // — earlier bytes never shift. Crucial for prefix caching: a placeholder + // like "(暂无)" that gets replaced by entries breaks the prefix the + // moment the first character is registered. + parts.push("已登记角色(speaker 必须用这些名字之一,或本场景新引入):"); + parts.push("(以下每行一个已登记角色,开场前为空。)"); + for (const c of session.characters) parts.push(`- ${c.name}`); + parts.push(""); - if (session.history.length === 0) { - parts.push( - "\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场写出来——开场即抓人,别花笔墨铺垫世界观。写完后更新 storyStatePatch。严格以 JSON 格式返回。", - ); - return parts.join("\n"); - } + // ── 3b. prior sceneKeys (sentinel pattern, same rationale) ──────────── + parts.push("已使用的 sceneKey(同一物理空间请沿用,不要新造):"); + parts.push("(以下每行一个已用过的 sceneKey,开场前为空。)"); + for (const k of collectPriorSceneKeys(session)) parts.push(`- ${k}`); + parts.push(""); - parts.push("\n场景历史(按时间顺序):"); - session.history.forEach((entry, idx) => { - const lines: string[] = [`【场景 ${idx + 1}】`]; - if (entry.scene.sceneKey) lines.push(` sceneKey: ${entry.scene.sceneKey}`); - - const visited = entry.visitedBeatIds.length - ? entry.visitedBeatIds - : [entry.scene.entryBeatId]; - const beatById = new Map(entry.scene.beats.map((b) => [b.id, b])); - const visitedBeats = visited - .map((id) => beatById.get(id)) - .filter((b): b is NonNullable => Boolean(b)); - - for (const b of visitedBeats) { - const fragments: string[] = []; - if (b.narration) fragments.push(`旁白:${b.narration}`); - if (b.line) fragments.push(`${b.speaker ?? "?"}:${b.line}`); - if (fragments.length) lines.push(" " + fragments.join(" / ")); - } - - if (entry.exit) { - if (entry.exit.kind === "choice") { - lines.push( - ` 玩家最终选择:${entry.exit.label}(去往:${entry.exit.nextSceneSeed})`, - ); - } else { - lines.push(` 玩家自由动作:${entry.exit.action}`); - } - } - parts.push(lines.join("\n")); + // ── 4. history[0..N-2] — ARCHIVED entries (sentinel, append-only) ───── + // CRITICAL: only the ALREADY-ARCHIVED entries (i.e. everything except + // history[-1]) go in the stable prefix. The last entry is still "live": + // its visitedBeatIds keeps growing as the player walks more beats in the + // current scene, and speculative prefetch triggers Writer calls that + // observe different snapshots of history[-1] mid-scene. Putting the live + // entry in the stable prefix would corrupt every Writer call's cache. + // + // Archived entries (history[0..N-2]) are immutable — once a scene is + // exited, its visitedBeatIds + exit are frozen. Safe to cache. + const archivedHistory = session.history.slice(0, -1); + parts.push("场景历史(按时间顺序,已完结):"); + parts.push("(以下每段一幕已完结的场景,开场前为空。)"); + archivedHistory.forEach((entry, idx) => { + parts.push(renderHistoryEntry(entry, idx + 1)); }); + parts.push(""); + // ════════════════ DYNAMIC SUFFIX 从这里开始 ═══════════════════════════ + // 上面 ~95% 的 prompt 长度应该已经稳定可缓存。下面每次调用都会变化。 + + // ── 5. story bible — dynamic patch ──────────────────────────────────── + parts.push(renderStoryStateDynamic(session.storyState)); + parts.push(""); + + // ── 6. last-beat snippet (the exact emotional cliffhanger) ── + // The full last entry is already in the stable history block above; here + // we only re-emit the very last beat to sharply focus the Writer on the + // emotional moment to continue from. Skip the duplicate full-entry render + // that was here previously — it wasted ~200-500 tokens of dynamic suffix. const last = session.history.at(-1); - - // The exact last moment the player stopped on — the new scene must continue - // seamlessly from this emotional beat, not reset to a neutral state. if (last) { const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId; const lastBeat = last.scene.beats.find((b) => b.id === lastBeatId); @@ -349,12 +435,20 @@ export function buildWriterUserMessage(session: Session): string { if (lastBeat.line) frag.push(`${lastBeat.speaker ?? "?"}:${lastBeat.line}`); if (frag.length) { parts.push( - `\n上一刻(玩家停留的最后一个画面,新场景要从这里的情绪无缝承接):\n ${frag.join(" / ")}`, + `上一刻(玩家停留的最后一个画面,新场景从这里的情绪无缝承接):\n ${frag.join(" / ")}`, ); } } } + if (session.history.length === 0) { + parts.push( + "\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场写出来——开场即抓人,别花笔墨铺垫世界观。写完后更新 storyStatePatch。严格以 JSON 格式返回。", + ); + return parts.join("\n"); + } + + // ── 8. lastExit hint ────────────────────────────────────────────────── const lastExit = last?.exit; if (lastExit) { if (lastExit.kind === "choice") { @@ -370,6 +464,7 @@ export function buildWriterUserMessage(session: Session): string { parts.push("\n无缝续写下一个场景,延续上一刻的情绪。"); } + // ── 9. format reminder tail ─────────────────────────────────────────── parts.push("写完后别忘了更新 storyStatePatch。严格以 JSON 格式返回。"); return parts.join("\n"); } @@ -518,6 +613,22 @@ export const CINEMATOGRAPHER_SYSTEM = `你是视觉小说的「分镜导演」 不要输出 JSON 以外的任何文本。`; +// Stable hint block — invariant across every Cinematographer call in a +// session. Front-loading this (with the session-scoped styleGuide) gives the +// prefix cache something substantial to anchor on; without it, the per-scene +// `sceneSummary` would land in the first content chunk and force the whole +// user message to miss. Long enough to land beyond the 64-token chunk +// boundary that follows the system prompt. +const CINE_STABLE_HINT = [ + "", + "以下为本次场景的输入。请基于这些信息:", + "1. 选择最合适的 shotType(依据 system prompt 的动态镜头策略 + entryBeatSpeaker)。", + "2. 写一段**只用英文**的 integratedPrompt——纯环境 + 构图 + 角色姿态/位置;服饰由画师另外通过 referenceImages 锁定,你只描述能看到的样貌与镜头。", + "3. 若上一场与本场 sceneKey 相同,**强调连续性**(时段/情绪/构图微调),而不是重新设定空间。", + "4. 严格按 system prompt 要求的 JSON schema 输出。", + "", +].join("\n"); + export function buildCinematographerUserMessage( sceneSummary: string, styleGuide: string, @@ -527,38 +638,53 @@ export function buildCinematographerUserMessage( currentSceneKey: string | undefined, ): string { const parts: string[] = []; - parts.push(`全局美术画风:${styleGuide}`); - parts.push(`\n当前场景(来自编剧):${sceneSummary}`); + // ─── STABLE PREFIX ────────────────────────────────────────────────── + // styleGuide is session-immutable; CINE_STABLE_HINT is a true constant. + // Together they're long enough to cross at least one 64-token chunk + // boundary, so every subsequent Cinematographer call in this session can + // cache-hit through this block. + parts.push(`全局美术画风:${styleGuide}`); + parts.push(CINE_STABLE_HINT); + + // ─── DYNAMIC SUFFIX ───────────────────────────────────────────────── + // Always emit every section header — even when empty — so positions don't + // shift between calls. (Caching of the dynamic section itself isn't + // expected, but stable positioning helps when adjacent calls happen to + // share a sceneSummary prefix.) + parts.push(`当前场景(来自编剧):${sceneSummary}`); + parts.push(""); + + parts.push("开场画面里的角色及其姿态:"); if (entryBeatActive.length > 0) { - parts.push("\n开场画面里的角色及其姿态:"); for (const c of entryBeatActive) { parts.push(`- ${c.name}:${c.pose ?? "(无具体姿态描述)"}`); } } else { - parts.push("\n开场画面里没有角色(纯环境)。"); + parts.push("(无角色,纯环境)"); } + parts.push(""); // entryBeatSpeaker drives the dynamic camera policy (see CINEMATOGRAPHER_SYSTEM). // "你" means the player is speaking; an NPC name means an NPC is speaking; // empty means no dialog (pure environment / narration beat). if (entryBeatSpeaker === "你") { parts.push( - '\n开场 beat 是**玩家说话**(speaker = "你")——按动态镜头策略:medium shot,NPC 居中、做听玩家说话的姿态、看向画面外。**绝不要画出玩家**。', + '开场 beat 是**玩家说话**(speaker = "你")——按动态镜头策略:medium shot,NPC 居中、做听玩家说话的姿态、看向画面外。**绝不要画出玩家**。', ); } else if (entryBeatSpeaker) { parts.push( - `\n开场 beat 是 **${entryBeatSpeaker} 在对玩家说话**(speaker = "${entryBeatSpeaker}")——按动态镜头策略:close-up 或 medium close-up,${entryBeatSpeaker} 看向画面外(看玩家),眼神交流。`, + `开场 beat 是 **${entryBeatSpeaker} 在对玩家说话**(speaker = "${entryBeatSpeaker}")——按动态镜头策略:close-up 或 medium close-up,${entryBeatSpeaker} 看向画面外(看玩家),眼神交流。`, ); } else { parts.push( - "\n开场 beat 没有 speaker(纯旁白/环境)——按动态镜头策略:wide establishing shot 展现环境氛围。", + "开场 beat 没有 speaker(纯旁白/环境)——按动态镜头策略:wide establishing shot 展现环境氛围。", ); } if (priorSceneKey && currentSceneKey && priorSceneKey === currentSceneKey) { parts.push( - `\n注意:上一场和本场 sceneKey 都是 "${currentSceneKey}"——画师会把上一张场景图作为 referenceImages 之一锚定同一空间。你的 integratedPrompt 应该**强调连续性**,描述时段/情绪/构图的细微变化,而不是完全重新设定空间。`, + `\n注意:上一场和本场 sceneKey 都是 "${currentSceneKey}"——画师会把上一张场景图作为 referenceImages 之一锚定同一空间。integratedPrompt 应强调连续性。`, ); }