feat(engine): merge cloudflare-migration — paradigm D engine, BYOK proxy, story persistence (#95)

Squash-merge the cloudflare-migration branch (7 commits by Kai ki) into staging with conflict resolution, feature integration, and bug fixes. Engine: - Paradigm D: single-stream Writer replacing dual-phase Plan/Beats - Delete Architect agent; story bible generated via Writer <plan> tag - Modular prompt architecture (segments/registry/builder) - StreamRouter for tagged stream splitting (<plan>/<story>/<choices>) Infrastructure: - Cloudflare Workers deployment (wrangler.jsonc, OpenNext adapter) - D1 database schema + Drizzle ORM (scaffolded, not yet active) - R2 storage helpers (scaffolded, not yet active) - Story persistence API routes + client-side persistence BYOK (Bring Your Own Key): - /api/llm/user-proxy with SSRF-protected LLM proxy (+ requireUser auth) - CORS-aware fetch in ai-client: auto-detect CORS failure, fallback to server proxy transparently via OpenAI SDK custom fetch - BYO config support added to classify-freeform and vision routes - SettingsModal CORS privacy notice (keys never logged/stored) SSE streaming: - engineClient.ts: fetchSSE helper for progressive scene events - startSession/requestScene accept optional emit callback - Fix SSE error event field name (error → message) in scene/start routes i18n integration: - Wire buildLanguageDirective into paradigm D's prompt builder - Update corsNotice i18n keys (zh-CN/en/ja) with CORS proxy privacy text - Preserve Session.language + LanguageSwitcher from i18n commit Co-authored-by: Kai ki <155355644+zbf1009@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-18 18:05:38 +08:00
parent 05bd7e229c
commit 0e4c2ebef4
78 changed files with 7396 additions and 919 deletions
@@ -1,5 +1,5 @@
 import OpenAI from "openai";
-import type { ProviderConfig } from "@infiplot/types";
+import type { ChatStreamResult, ChatStreamUsage, ProviderConfig } from "@infiplot/types";
 import { normalizeBaseUrl } from "./normalizeUrl";

 export type ChatMessage = {
@@ -7,6 +7,75 @@ export type ChatMessage = {
  content: string;
 };

+// ── CORS proxy fallback (browser-only) ───────────────────────────────
+// BYO mode calls providers directly from the browser. When a provider
+// rejects the preflight (no CORS headers), the first request throws a
+// TypeError. We cache the blocked host and transparently reroute all
+// subsequent requests through /api/llm/user-proxy, which forwards
+// server-side and returns the upstream response (including SSE streams)
+// byte-for-byte.
+
+const corsBlockedHosts = new Set<string>();
+
+export function isCorsProxied(baseUrl: string): boolean {
+  try {
+    return corsBlockedHosts.has(new URL(baseUrl).host);
+  } catch {
+    return false;
+  }
+}
+
+function proxyFetch(
+  config: ProviderConfig,
+  init?: RequestInit,
+): Promise<Response> {
+  let body: Record<string, unknown> = {};
+  if (typeof init?.body === "string") {
+    try { body = JSON.parse(init.body); } catch { /* empty */ }
+  }
+  return globalThis.fetch("/api/llm/user-proxy", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      provider: "openai",
+      apiKey: config.apiKey,
+      baseUrl: config.baseUrl,
+      body,
+      model: config.model,
+      stream: body.stream === true,
+    }),
+  });
+}
+
+function makeCorsAwareFetch(
+  config: ProviderConfig,
+): (input: string | URL | Request, init?: RequestInit) => Promise<Response> {
+  return async (input, init) => {
+    const url =
+      typeof input === "string" ? input
+      : input instanceof URL ? input.toString()
+      : input.url;
+
+    let host: string;
+    try { host = new URL(url).host; } catch { return globalThis.fetch(input, init); }
+
+    if (corsBlockedHosts.has(host)) {
+      return proxyFetch(config, init);
+    }
+
+    try {
+      return await globalThis.fetch(input, init);
+    } catch (err) {
+      if (err instanceof TypeError) {
+        corsBlockedHosts.add(host);
+        console.warn(`[CORS] ${host} blocked, falling back to server proxy`);
+        return proxyFetch(config, init);
+      }
+      throw err;
+    }
+  };
+}
+
 // Cache observability for the prompt-prefix caching that the Writer stable
 // prefix relies on. The OpenAI usage object reports only cached READS
 // (prompt_tokens_details.cached_tokens) and has no field for cache WRITES
@@ -28,6 +97,16 @@ function summarizeSdkUsage(
  return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
 }

+function makeClient(config: ProviderConfig): OpenAI {
+  return new OpenAI({
+    apiKey: config.apiKey,
+    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
+    maxRetries: 0,
+    dangerouslyAllowBrowser: true,
+    ...(typeof window !== "undefined" ? { fetch: makeCorsAwareFetch(config) } : {}),
+  });
+}
+
 export async function chat(
  config: ProviderConfig,
  messages: ChatMessage[],
@@ -36,12 +115,7 @@ export async function chat(
    tag?: string;
  },
 ): Promise<string> {
-  const client = new OpenAI({
-    apiKey: config.apiKey,
-    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
-    maxRetries: 0,
-    dangerouslyAllowBrowser: true,
-  });
+  const client = makeClient(config);

  const completion = await client.chat.completions.create({
    model: config.model,
@@ -61,3 +135,97 @@ export async function chat(
  }
  return text;
 }
+
+/**
+ * Streaming variant of {@link chat} — the streaming primitive behind
+ * paradigm D. Returns incremental `textStream` chunks plus an end-of-stream
+ * `usage` promise so `summarizeSdkUsage` keeps doing cache accounting.
+ *
+ * Uses the OpenAI SDK's native streaming (`stream: true`) which returns an
+ * async iterable of ChatCompletionChunk. The returned `usage` settles after
+ * the stream drains, so callers should `await result.usage` once iteration
+ * ends.
+ *
+ * Degrade path: if the provider doesn't support streaming, fall back to a
+ * single non-streaming call wrapped as a one-chunk stream so downstream
+ * tag-routing still works — the player loses progressive playback but the
+ * scene generates normally.
+ */
+export function chatStream(
+  config: ProviderConfig,
+  messages: ChatMessage[],
+  opts?: {
+    temperature?: number;
+    tag?: string;
+  },
+): ChatStreamResult {
+  const client = makeClient(config);
+  const tag = opts?.tag ?? "chatStream";
+  const msgPayload = messages.map((m) => ({
+    role: m.role as "system" | "user" | "assistant",
+    content: m.content,
+  }));
+
+  let resolveUsage: (u: ChatStreamUsage | undefined) => void;
+  const usage = new Promise<ChatStreamUsage | undefined>((r) => { resolveUsage = r; });
+
+  const textStream = (async function* (): AsyncIterable<string> {
+    try {
+      const stream = await client.chat.completions.create({
+        model: config.model,
+        messages: msgPayload,
+        temperature: opts?.temperature ?? 0.9,
+        stream: true,
+        stream_options: { include_usage: true },
+      });
+
+      for await (const chunk of stream) {
+        const delta = chunk.choices[0]?.delta?.content;
+        if (delta) yield delta;
+
+        if (chunk.usage) {
+          const u: ChatStreamUsage = {
+            prompt_tokens: chunk.usage.prompt_tokens,
+            completion_tokens: chunk.usage.completion_tokens,
+            prompt_tokens_details: chunk.usage.prompt_tokens_details
+              ? { cached_tokens: chunk.usage.prompt_tokens_details.cached_tokens ?? undefined }
+              : undefined,
+          };
+          console.log(summarizeSdkUsage(tag, chunk.usage));
+          resolveUsage!(u);
+        }
+      }
+      // If usage was never emitted (provider omitted it), resolve undefined.
+      resolveUsage!(undefined);
+    } catch (err) {
+      // Streaming not supported by provider → degrade to buffered call.
+      console.warn(
+        `[chatStream] streaming failed, degrading to non-streaming:`,
+        err,
+      );
+      try {
+        const completion = await client.chat.completions.create({
+          model: config.model,
+          messages: msgPayload,
+          temperature: opts?.temperature ?? 0.9,
+          stream: false,
+        });
+        const text = completion.choices[0]?.message?.content ?? "";
+        if (text) yield text;
+        console.log(summarizeSdkUsage(`${tag}:degraded`, completion.usage ?? undefined));
+        resolveUsage!(completion.usage ? {
+          prompt_tokens: completion.usage.prompt_tokens,
+          completion_tokens: completion.usage.completion_tokens,
+          prompt_tokens_details: completion.usage.prompt_tokens_details
+            ? { cached_tokens: completion.usage.prompt_tokens_details.cached_tokens ?? undefined }
+            : undefined,
+        } : undefined);
+      } catch (fallbackErr) {
+        resolveUsage!(undefined);
+        throw fallbackErr;
+      }
+    }
+  })();
+
+  return { textStream, usage };
+}
@@ -1,4 +1,4 @@
-export { chat } from "./chat";
+export { chat, chatStream, isCorsProxied } from "./chat";
 export { generateImage } from "./image";
 export type { GenerateImageOptions, GenerateImageResult } from "./image";
 export { interpretClick, analyzeImageDataUrl } from "./vision";
@@ -0,0 +1,168 @@
+import "server-only";
+
+/**
+ * BYOK (Bring Your Own Key) LLM Proxy
+ * Core logic for proxying user-provided API keys to upstream LLM providers.
+ * Handles SSRF防护, base URL normalization, and SSE streaming.
+ */
+
+// ── SSRF Protection ──────────────────────────────────────────────────────
+
+const INTERNAL_IP_PATTERNS = [
+  /^127\./,           // localhost
+  /^10\./,            // 10.0.0.0/8
+  /^172\.(1[6-9]|2\d|3[01])\./, // 172.16.0.0/12
+  /^192\.168\./,      // 192.168.0.0/16
+  /^169\.254\./,      // link-local
+  /^::1$/,            // IPv6 localhost
+  /^fe80:/,           // IPv6 link-local
+  /^fc00:/,           // IPv6 private
+];
+
+/**
+ * Validate upstream URL to prevent SSRF attacks.
+ * Only allows https:// and rejects internal IPs.
+ */
+export function validateUpstreamUrl(url: string): { valid: boolean; error?: string } {
+  try {
+    const parsed = new URL(url);
+
+    // Only https allowed (no http, file, etc.)
+    if (parsed.protocol !== "https:") {
+      return { valid: false, error: "Only https:// URLs are allowed" };
+    }
+
+    // Reject internal IPs
+    const hostname = parsed.hostname.toLowerCase();
+    if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1") {
+      return { valid: false, error: "Localhost not allowed" };
+    }
+
+    // Check IP patterns
+    for (const pattern of INTERNAL_IP_PATTERNS) {
+      if (pattern.test(hostname)) {
+        return { valid: false, error: "Internal IP ranges not allowed" };
+      }
+    }
+
+    return { valid: true };
+  } catch {
+    return { valid: false, error: "Invalid URL" };
+  }
+}
+
+// ── Base URL Normalization ───────────────────────────────────────────────
+
+/**
+ * Normalize base URL: add https:// prefix if missing, strip trailing slashes.
+ */
+export function normalizeBaseUrl(url: string): string {
+  let cleaned = url.trim().replace(/\/+$/, "");
+  if (cleaned && !/^https?:\/\//i.test(cleaned)) {
+    cleaned = `https://${cleaned}`;
+  }
+  return cleaned;
+}
+
+/**
+ * Strip known API path suffixes from base URL (longest match first).
+ */
+function stripSuffixes(url: string, suffixes: string[]): string {
+  let cleaned = url.replace(/\/+$/, "");
+  for (const s of [...suffixes].sort((a, b) => b.length - a.length)) {
+    if (cleaned.endsWith(s)) {
+      cleaned = cleaned.slice(0, -s.length);
+      break;
+    }
+  }
+  return cleaned.replace(/\/+$/, "");
+}
+
+const OPENAI_SUFFIXES = ["/v1/chat/completions", "/v1/models", "/v1"];
+const CLAUDE_SUFFIXES = ["/v1/messages", "/v1/models", "/v1"];
+const GEMINI_SUFFIXES = ["/v1beta/models", "/v1beta", "/v1/models", "/v1"];
+
+// ── Proxy Core ───────────────────────────────────────────────────────────
+
+export interface ProxyLLMParams {
+  provider: "openai" | "claude" | "gemini";
+  apiKey: string;
+  baseUrl: string;
+  body: Record<string, unknown>;
+  model?: string; // Required for Gemini (model name in URL)
+  stream?: boolean; // Default true
+}
+
+/**
+ * Proxy LLM request to upstream provider.
+ * Transparently forwards both streaming (SSE) and non-streaming responses.
+ */
+export async function proxyLLM(params: ProxyLLMParams): Promise<Response> {
+  const { provider, apiKey, baseUrl, body, model, stream = true } = params;
+
+  // Validate base URL
+  const validation = validateUpstreamUrl(baseUrl);
+  if (!validation.valid) {
+    return new Response(
+      JSON.stringify({ error: validation.error }),
+      { status: 400, headers: { "Content-Type": "application/json" } },
+    );
+  }
+
+  // Build upstream URL and headers
+  let upstreamUrl: string;
+  const headers: Record<string, string> = { "Content-Type": "application/json" };
+
+  switch (provider) {
+    case "openai": {
+      const base = stripSuffixes(baseUrl, OPENAI_SUFFIXES);
+      upstreamUrl = `${base}/v1/chat/completions`;
+      headers["Authorization"] = `Bearer ${apiKey}`;
+      break;
+    }
+    case "claude": {
+      const base = stripSuffixes(baseUrl, CLAUDE_SUFFIXES);
+      upstreamUrl = `${base}/v1/messages`;
+      headers["x-api-key"] = apiKey;
+      headers["anthropic-version"] = "2023-06-01";
+      break;
+    }
+    case "gemini": {
+      const base = stripSuffixes(baseUrl, GEMINI_SUFFIXES);
+      const modelName = model || "gemini-2.0-flash";
+      const action = stream ? "streamGenerateContent" : "generateContent";
+      const streamParam = stream ? "&alt=sse" : "";
+      upstreamUrl = `${base}/v1beta/models/${modelName}:${action}?key=${apiKey}${streamParam}`;
+      break;
+    }
+    default:
+      return new Response(
+        JSON.stringify({ error: `Unsupported provider: ${provider}` }),
+        { status: 400, headers: { "Content-Type": "application/json" } },
+      );
+  }
+
+  // Forward to upstream
+  try {
+    const upstreamResponse = await fetch(upstreamUrl, {
+      method: "POST",
+      headers,
+      body: JSON.stringify(body),
+    });
+
+    // Transparent proxy: strip content-encoding/length, forward body as-is
+    const responseHeaders = new Headers(upstreamResponse.headers);
+    responseHeaders.delete("content-encoding");
+    responseHeaders.delete("content-length");
+
+    return new Response(upstreamResponse.body, {
+      status: upstreamResponse.status,
+      headers: responseHeaders,
+    });
+  } catch (error) {
+    return new Response(
+      JSON.stringify({ error: error instanceof Error ? error.message : "Proxy error" }),
+      { status: 502, headers: { "Content-Type": "application/json" } },
+    );
+  }
+}
@@ -0,0 +1,99 @@
+// Bring-your-own LLM API keys — stored CLIENT-SIDE ONLY.
+//
+// When a user supplies their own keys, we persist {provider, baseUrl, apiKey}
+// in localStorage and send them with each /api/start and /api/scene request.
+// Keys never leak to server logs or persistence — they only pass through the
+// request→config construction path.
+
+const STORAGE_KEY = "infiplot:llm";
+
+/** Provider types matching byoProxy and ProviderProtocol */
+export type LlmProvider = "openai" | "claude" | "gemini";
+
+/** Stored BYO LLM config — exactly what we persist. */
+export type StoredLlmConfig = {
+  /** Which provider API to use */
+  provider: LlmProvider;
+  /** User's API key */
+  apiKey: string;
+  /** Optional custom base URL (empty = use provider default) */
+  baseUrl?: string;
+  /** Optional model name (empty = use server-side default for this provider/role) */
+  model?: string;
+};
+
+/** Per-role LLM config the user can independently configure */
+export type ByoLlmSettings = {
+  text?: StoredLlmConfig;
+  image?: StoredLlmConfig;
+  vision?: StoredLlmConfig;
+};
+
+/**
+ * Read persisted BYO LLM config. Returns null when running on the server,
+ * when nothing is stored, on parse failure, or when the stored shape is invalid.
+ */
+export function readStoredLlmConfig(): ByoLlmSettings | null {
+  if (typeof window === "undefined") return null;
+  try {
+    const raw = window.localStorage.getItem(STORAGE_KEY);
+    if (!raw) return null;
+    const parsed = JSON.parse(raw) as Partial<ByoLlmSettings>;
+
+    // Validate each role config
+    const result: ByoLlmSettings = {};
+    for (const role of ["text", "image", "vision"] as const) {
+      const cfg = parsed[role];
+      if (cfg && typeof cfg === "object") {
+        const provider = cfg.provider as string;
+        const apiKey = cfg.apiKey as string;
+        if (["openai", "claude", "gemini"].includes(provider) && apiKey?.trim()) {
+          result[role] = {
+            provider: provider as LlmProvider,
+            apiKey: apiKey.trim(),
+            baseUrl: typeof cfg.baseUrl === "string" ? cfg.baseUrl.trim() : undefined,
+            model: typeof cfg.model === "string" ? cfg.model.trim() : undefined,
+          };
+        }
+      }
+    }
+
+    return Object.keys(result).length > 0 ? result : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Persist BYO LLM config. Trims keys and baseUrls so trailing whitespace
+ * from paste never breaks headers.
+ */
+export function writeStoredLlmConfig(config: ByoLlmSettings): void {
+  if (typeof window === "undefined") return;
+  try {
+    const payload: ByoLlmSettings = {};
+    for (const role of ["text", "image", "vision"] as const) {
+      const cfg = config[role];
+      if (cfg) {
+        payload[role] = {
+          provider: cfg.provider,
+          apiKey: cfg.apiKey.trim(),
+          baseUrl: cfg.baseUrl?.trim() || undefined,
+          model: cfg.model?.trim() || undefined,
+        };
+      }
+    }
+    window.localStorage.setItem(STORAGE_KEY, JSON.stringify(payload));
+  } catch {
+    // Storage disabled / quota / private mode — BYO simply stays off.
+  }
+}
+
+export function clearStoredLlmConfig(): void {
+  if (typeof window === "undefined") return;
+  try {
+    window.localStorage.removeItem(STORAGE_KEY);
+  } catch {
+    // ignore
+  }
+}
@@ -0,0 +1,299 @@
+// Client-side story persistence helpers.
+//
+// Provides: anonymous user ID management, save/load functions that call
+// /api/stories/* and fallback to localStorage when D1 is unavailable.
+
+import type { Session, Scene, Character, StoryState } from "@infiplot/types";
+import type { StorySaveInput, SceneSaveInput, CharacterSaveInput, StoryMeta, StoryLoadResult } from "@/lib/db/repositories/storyRepo";
+
+const USER_ID_KEY = "infiplot:userId";
+const SAVE_FALLBACK_KEY = "infiplot:savedStories";
+
+// ── Anonymous User ID ────────────────────────────────────────────────────
+
+export function getOrCreateUserId(): string {
+  if (typeof window === "undefined") return "";
+  try {
+    let id = localStorage.getItem(USER_ID_KEY);
+    if (!id) {
+      id = `anon_${crypto.randomUUID()}`;
+      localStorage.setItem(USER_ID_KEY, id);
+    }
+    return id;
+  } catch {
+    return `anon_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
+  }
+}
+
+// ── Session → Save Input Conversion ─────────────────────────────────────
+
+export function sessionToSaveInput(session: Session): {
+  story: StorySaveInput;
+  scenes: SceneSaveInput[];
+  characters: CharacterSaveInput[];
+} {
+  const story: StorySaveInput = {
+    id: session.id,
+    userId: getOrCreateUserId(),
+    worldSetting: session.worldSetting,
+    styleGuide: session.styleGuide,
+    styleReferenceImage: session.styleReferenceImage,
+    orientation: (session.orientation as "portrait" | "landscape") ?? "landscape",
+    storyState: session.storyState,
+    status: "active",
+  };
+
+  const scenes: SceneSaveInput[] = (session.history ?? []).map(
+    (entry, idx) => ({
+      id: entry.scene.id,
+      sceneKey: entry.scene.sceneKey,
+      sceneSummary: entry.scene.scenePrompt,
+      imageUrl: entry.scene.imageUrl ?? "",
+      beats: entry.scene.beats,
+      sortOrder: idx,
+    }),
+  );
+
+  const characters: CharacterSaveInput[] = (session.characters ?? []).map(
+    (c) => ({
+      name: c.name,
+      visualDescription: c.visualDescription,
+      voiceDescription: c.voiceDescription,
+      portrait:
+        c.basePortraitUrl || c.basePortraitUuid
+          ? { url: c.basePortraitUrl, uuid: c.basePortraitUuid }
+          : undefined,
+      voice: c.voice,
+    }),
+  );
+
+  return { story, scenes, characters };
+}
+
+// ── Save ─────────────────────────────────────────────────────────────────
+
+export type SaveResult =
+  | { ok: true; storyId: string; source: "server" }
+  | { ok: true; storyId: string; source: "localStorage" }
+  | { ok: false; error: string };
+
+export async function saveStory(session: Session): Promise<SaveResult> {
+  // TEMPORARY: localStorage-only mode (D1 disabled until auth integration).
+  // Anonymous D1 writes lack rate limiting / quota / ownership checks — an
+  // abuse risk on a public registration-less site. Persist locally instead.
+  return saveToLocalStorage(session);
+
+  /* DISABLED: D1 server path (will re-enable after auth integration)
+  const { story, scenes, characters } = sessionToSaveInput(session);
+
+  try {
+    const res = await fetch("/api/stories/save", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ story, scenes, characters }),
+    });
+
+    if (res.ok) {
+      const data = (await res.json()) as { storyId: string };
+      return { ok: true, storyId: data.storyId, source: "server" };
+    }
+
+    // Server failed - fallback to localStorage
+    throw new Error(`Server returned ${res.status}`);
+  } catch {
+    // D1 unavailable or network error - fallback to localStorage
+    return saveToLocalStorage(session);
+  }
+  */
+}
+
+function saveToLocalStorage(session: Session): SaveResult {
+  try {
+    const existing = loadFromLocalStorageAll();
+    // Strip bulky fields before persistence to stay within localStorage quota
+    // (~5-10MB across ALL keys). Without this, a multi-scene session with
+    // several voiced characters serializes to 1-2MB+ (voice.referenceAudioBase64
+    // is ~160KB each, styleReferenceImage 30-80KB), which can exceed quota and
+    // — worse — block the main thread on the synchronous localStorage write,
+    // freezing the subsequent navigation back to the home page. Both fields are
+    // reconstructible: voices re-provision on the next /api/scene call, and
+    // styleReferenceImage is cosmetic (engine regenerates gracefully without it).
+    const slimSession: Session = {
+      ...session,
+      styleReferenceImage: undefined,
+      characters: session.characters.map((c) => ({ ...c, voice: undefined })),
+    };
+    const entry = {
+      id: session.id,
+      worldSetting: session.worldSetting,
+      styleGuide: session.styleGuide,
+      sceneCount: session.history?.length ?? 0,
+      savedAt: Date.now(),
+      sessionJson: JSON.stringify(slimSession),
+    };
+    const updated = [entry, ...existing.filter((e) => e.id !== session.id)].slice(0, 20);
+    localStorage.setItem(SAVE_FALLBACK_KEY, JSON.stringify(updated));
+    return { ok: true, storyId: session.id, source: "localStorage" };
+  } catch {
+    return { ok: false, error: "无法保存到本地存储" };
+  }
+}
+
+// ── Load ─────────────────────────────────────────────────────────────────
+
+export async function loadStoryList(): Promise<StoryMeta[]> {
+  // TEMPORARY: localStorage-only mode (D1 disabled until auth integration)
+  const entries = loadFromLocalStorageAll();
+  return entries.map((e) => ({
+    id: e.id,
+    userId: null, // anonymous
+    worldSetting: e.worldSetting,
+    styleGuide: e.styleGuide,
+    orientation: "landscape", // localStorage doesn't store this, default
+    status: "active",
+    sceneCount: e.sceneCount,
+    createdAt: new Date(e.savedAt),
+    updatedAt: new Date(e.savedAt),
+  }));
+
+  /* DISABLED: D1 server path (will re-enable after auth integration)
+  const userId = getOrCreateUserId();
+  try {
+    const res = await fetch(`/api/stories/list?userId=${encodeURIComponent(userId)}`);
+    if (res.ok) {
+      const data = (await res.json()) as { stories: StoryMeta[] };
+      return data.stories;
+    }
+    return [];
+  } catch {
+    return [];
+  }
+  */
+}
+
+export async function loadStory(storyId: string): Promise<StoryLoadResult | null> {
+  // TEMPORARY: localStorage-only mode — unused in current code (play page uses
+  // loadFromLocalStorage directly). Returns null to maintain type compatibility.
+  // Will be re-enabled when D1 is restored after auth integration.
+  return null;
+
+  /* DISABLED: D1 server path
+  try {
+    const res = await fetch(`/api/stories/${encodeURIComponent(storyId)}`);
+    if (res.ok) {
+      return (await res.json()) as StoryLoadResult;
+    }
+    return null;
+  } catch {
+    return null;
+  }
+  */
+}
+
+export async function deleteStory(storyId: string): Promise<boolean> {
+  // TEMPORARY: localStorage-only mode
+  try {
+    const existing = loadFromLocalStorageAll();
+    const updated = existing.filter((e) => e.id !== storyId);
+    if (updated.length === existing.length) return false; // not found
+    localStorage.setItem(SAVE_FALLBACK_KEY, JSON.stringify(updated));
+    return true;
+  } catch {
+    return false;
+  }
+
+  /* DISABLED: D1 server path
+  try {
+    const res = await fetch(`/api/stories/${encodeURIComponent(storyId)}`, {
+      method: "DELETE",
+    });
+    return res.ok;
+  } catch {
+    return false;
+  }
+  */
+}
+
+// ── localStorage fallback helpers ────────────────────────────────────────
+
+type LocalStorageEntry = {
+  id: string;
+  worldSetting: string;
+  styleGuide: string;
+  sceneCount: number;
+  savedAt: number;
+  sessionJson: string;
+};
+
+function loadFromLocalStorageAll(): LocalStorageEntry[] {
+  if (typeof window === "undefined") return [];
+  try {
+    const raw = localStorage.getItem(SAVE_FALLBACK_KEY);
+    if (!raw) return [];
+    return JSON.parse(raw) as LocalStorageEntry[];
+  } catch {
+    return [];
+  }
+}
+
+export function loadFromLocalStorage(storyId: string): Session | null {
+  const entries = loadFromLocalStorageAll();
+  const entry = entries.find((e) => e.id === storyId);
+  if (!entry) return null;
+  try {
+    return JSON.parse(entry.sessionJson) as Session;
+  } catch {
+    return null;
+  }
+}
+
+// ── StoryLoadResult → Session Conversion ─────────────────────────────────
+
+/**
+ * Convert StoryLoadResult (API response from /api/stories/[id]) back to Session
+ * shape consumed by app/play/page.tsx.
+ */
+export function storyLoadResultToSession(result: StoryLoadResult): Session {
+  const { story, scenes, characters } = result;
+
+  // Map scenes back to SceneHistoryEntry structure
+  const history = scenes.map((s) => {
+    const beats = s.beats ?? [];
+    // entryBeatId is not persisted in D1 — recover it from the first beat.
+    const entryBeatId = beats[0]?.id ?? "";
+    return {
+      scene: {
+        id: s.id,
+        sceneKey: s.sceneKey,
+        scenePrompt: s.sceneSummary ?? "",
+        imageUrl: s.imageUrl,
+        beats,
+        entryBeatId,
+        orientation: s.orientation,
+      },
+      visitedBeatIds: entryBeatId ? [entryBeatId] : [], // rebuilt as user navigates
+      exit: undefined,    // Not persisted in D1
+    };
+  });
+
+  return {
+    id: story.id,
+    // createdAt crosses the JSON API boundary as an ISO string, so coerce it
+    // back to an epoch the Session shape expects (number).
+    createdAt: new Date(story.createdAt).getTime(),
+    worldSetting: story.worldSetting,
+    styleGuide: story.styleGuide,
+    styleReferenceImage: story.styleReferenceImage,
+    orientation: story.orientation,
+    storyState: story.storyState,
+    history,
+    characters: characters.map((c) => ({
+      name: c.name,
+      voiceDescription: c.voiceDescription ?? "",
+      visualDescription: c.visualDescription,
+      basePortraitUuid: c.portrait?.uuid,
+      basePortraitUrl: c.portrait?.url,
+      voice: c.voice,
+    })),
+  };
+}
@@ -1,8 +1,13 @@
+import "server-only";
+
 import type {
+  ByoLlmKeys,
  EngineConfig,
+  ProviderConfig,
  ProviderProtocol,
  TtsConfig,
 } from "@infiplot/types";
+import { validateUpstreamUrl, normalizeBaseUrl } from "./byoProxy";

 const VALID_PROTOCOLS = [
  "openai_compatible",
@@ -88,3 +93,120 @@ export function loadEngineConfig(): EngineConfig {
    imageHedgeMs: readOptionalPositiveInt("IMAGE_HEDGE_MS"),
  };
 }
+
+// ── BYOK (Bring Your Own Key) ────────────────────────────────────────────
+
+/** Provider default base URLs when user doesn't specify one. */
+const PROVIDER_DEFAULTS: Record<string, string> = {
+  openai: "https://api.openai.com",
+  claude: "https://api.anthropic.com",
+  gemini: "https://generativelanguage.googleapis.com",
+};
+
+/** Provider default models when user doesn't specify one. */
+const MODEL_DEFAULTS: Record<string, { text: string; image: string; vision: string }> = {
+  openai: {
+    text: "gpt-4o",
+    image: "gpt-image-1", // CR-4: 支持任意尺寸，dall-e-3 不支持 1536x1024
+    vision: "gpt-4o",
+  },
+  claude: {
+    text: "claude-3-5-sonnet-20241022",
+    image: "claude-3-5-sonnet-20241022", // Claude doesn't have native image gen
+    vision: "claude-3-5-sonnet-20241022",
+  },
+  gemini: {
+    text: "gemini-2.0-flash-exp",
+    image: "imagen-3.0-generate-001",
+    vision: "gemini-2.0-flash-exp",
+  },
+};
+
+type ByoRole = "text" | "image" | "vision";
+type ByoProviderConfig = { provider: string; apiKey: string; baseUrl?: string; model?: string };
+
+/**
+ * Build ProviderConfig from user-supplied BYOK credentials.
+ * Validates upstream URL (SSRF protection), normalizes baseUrl, applies defaults.
+ * Throws on validation failure so API route can return 400.
+ */
+function buildByoProviderConfig(
+  role: ByoRole,
+  byo: ByoProviderConfig,
+  fallback: ProviderConfig,
+): ProviderConfig {
+  const { provider, apiKey, baseUrl } = byo;
+
+  // Validate provider
+  if (!["openai", "claude", "gemini"].includes(provider)) {
+    throw new Error(`Invalid BYO provider for ${role}: ${provider}`);
+  }
+
+  // Claude/Gemini cannot generate images — only OpenAI supports image generation
+  if (role === "image" && provider !== "openai") {
+    throw new Error(
+      `BYO provider "${provider}" does not support image generation. Use "openai" for the image role.`,
+    );
+  }
+
+  // Validate apiKey
+  if (!apiKey?.trim()) {
+    throw new Error(`Missing BYO apiKey for ${role}`);
+  }
+
+  // Resolve baseUrl (user-provided or provider default)
+  let resolvedBaseUrl = baseUrl?.trim() || PROVIDER_DEFAULTS[provider];
+  if (!resolvedBaseUrl) {
+    throw new Error(`No baseUrl for BYO ${role} provider: ${provider}`);
+  }
+  resolvedBaseUrl = normalizeBaseUrl(resolvedBaseUrl);
+
+  // SSRF protection — validates the HOST portion of the URL.
+  // SAFETY INVARIANT: ai-client/normalizeUrl.ts only appends PATH segments
+  // (e.g. /v1) but never changes the host/authority. If that invariant ever
+  // breaks, this check must be moved downstream or duplicated. (CR-9)
+  const validation = validateUpstreamUrl(resolvedBaseUrl);
+  if (!validation.valid) {
+    throw new Error(`Invalid BYO baseUrl for ${role}: ${validation.error}`);
+  }
+
+  // Resolve model (user-provided > provider default > official model)
+  const modelDefaults = MODEL_DEFAULTS[provider];
+  const model = byo.model?.trim() || modelDefaults?.[role] || fallback.model;
+
+  // All providers are reached via their OpenAI-compatible endpoints.
+  const providerProtocol: ProviderProtocol =
+    provider === "openai" ? "openai" : "openai_compatible";
+
+  return {
+    baseUrl: resolvedBaseUrl,
+    apiKey: apiKey.trim(),
+    model,
+    provider: providerProtocol,
+  };
+}
+
+/**
+ * Build EngineConfig with BYOK (Bring Your Own Key) overrides.
+ * - `byo` param contains user-provided keys from request body (StartRequest.byo / SceneRequest.byo)
+ * - For each role (text/image/vision), if user provided BYO config, use it; otherwise fallback to official keys
+ * - Validates all BYO baseUrls (SSRF protection) and throws on failure
+ */
+export function buildByoEngineConfig(
+  byo: ByoLlmKeys,
+  officialConfig: EngineConfig,
+): EngineConfig {
+  return {
+    text: byo.text
+      ? buildByoProviderConfig("text", byo.text, officialConfig.text)
+      : officialConfig.text,
+    image: byo.image
+      ? buildByoProviderConfig("image", byo.image, officialConfig.image)
+      : officialConfig.image,
+    vision: byo.vision
+      ? buildByoProviderConfig("vision", byo.vision, officialConfig.vision)
+      : officialConfig.vision,
+    tts: officialConfig.tts, // TTS BYOK stays client-side only (existing flow)
+    mockImage: officialConfig.mockImage,
+  };
+}
@@ -0,0 +1,41 @@
+import "server-only";
+
+import { drizzle } from "drizzle-orm/d1";
+import { getCloudflareContext } from "@opennextjs/cloudflare";
+import * as schema from "./schema";
+
+/**
+ * Get D1 database instance from Cloudflare Workers env binding.
+ *
+ * Usage in API routes:
+ *   const db = getDb();
+ *   const stories = await db.select().from(schema.stories).where(...);
+ *
+ * @throws Error if called outside Cloudflare Workers runtime (e.g. local dev without wrangler)
+ */
+export function getDb() {
+  try {
+    const { env } = getCloudflareContext();
+
+    if (!env.DB) {
+      throw new Error(
+        "D1 binding 'DB' not found. " +
+        "Ensure wrangler.jsonc has d1_databases configured and you're running via wrangler dev/deploy."
+      );
+    }
+
+    return drizzle(env.DB, { schema });
+  } catch (error) {
+    // Re-throw with more context for debugging
+    throw new Error(
+      `Failed to get D1 database: ${error instanceof Error ? error.message : String(error)}. ` +
+      "Make sure you're running in Cloudflare Workers context (wrangler dev/deploy)."
+    );
+  }
+}
+
+/**
+ * Type alias for the Drizzle D1 database instance.
+ * Useful for dependency injection and testing.
+ */
+export type DbInstance = ReturnType<typeof getDb>;
@@ -0,0 +1,45 @@
+import "server-only";
+
+import { eq, and, sql } from "drizzle-orm";
+import type { DbInstance } from "../client";
+import { featuredStories } from "../schema";
+import type { FeaturedStory } from "../schema";
+
+/**
+ * Featured Story Repository - encapsulates D1 access for homepage featured stories.
+ *
+ * Provides: listByGender (active only, sorted by sortOrder), incrementClick (analytics).
+ */
+export class FeaturedRepository {
+  constructor(private db: DbInstance) {}
+
+  /**
+   * List active featured stories for a given gender, ordered by sortOrder.
+   *
+   * @param gender "male" or "female"
+   * @returns Array of FeaturedStory (only isActive=1, sorted by sortOrder ASC)
+   */
+  async listByGender(gender: "male" | "female"): Promise<FeaturedStory[]> {
+    return this.db
+      .select()
+      .from(featuredStories)
+      .where(and(eq(featuredStories.gender, gender), eq(featuredStories.isActive, 1)))
+      .orderBy(featuredStories.sortOrder);
+  }
+
+  /**
+   * Increment click count for a featured story (analytics).
+   *
+   * @param id Featured story ID (e.g. "m0", "f12")
+   * @returns true if updated, false if not found
+   */
+  async incrementClick(id: string): Promise<boolean> {
+    const result = await this.db
+      .update(featuredStories)
+      .set({ clickCount: sql`${featuredStories.clickCount} + 1` })
+      .where(eq(featuredStories.id, id));
+
+    // Drizzle D1 update returns { success, meta: { changes }, results }
+    return ((result as any).meta?.changes ?? 0) > 0;
+  }
+}
@@ -0,0 +1,308 @@
+import "server-only";
+
+import { eq, desc, sql, inArray } from "drizzle-orm";
+import type { DbInstance } from "../client";
+import { stories, scenes, characters } from "../schema";
+import type { Session, Scene as EngineScene, Character as EngineCharacter, StoryState } from "@infiplot/types";
+
+// ── Type Adapters ────────────────────────────────────────────────────────
+
+/**
+ * Input shape for saving a story session.
+ * Mirrors Session but with explicit story-level fields.
+ */
+export type StorySaveInput = {
+  id: string; // Session ID
+  userId?: string; // nullable - Phase 1 uses anonymous sessionId
+  worldSetting: string;
+  styleGuide: string;
+  styleReferenceImage?: string; // data URI or R2 key (TBD in save logic)
+  orientation: "portrait" | "landscape";
+  storyState?: StoryState;
+  status?: "active" | "archived";
+};
+
+export type SceneSaveInput = {
+  id: string;
+  sceneKey?: string;
+  sceneSummary?: string;
+  imageUrl: string; // Runware CDN URL (primary)
+  beats: EngineScene["beats"]; // Beat graph - will be serialized to beatsJson
+  orientation?: "portrait" | "landscape";
+  sortOrder: number; // scene sequence in story
+};
+
+export type CharacterSaveInput = {
+  name: string;
+  visualDescription?: string;
+  voiceDescription?: string;
+  portrait?: {
+    url?: string;
+    uuid?: string;
+  };
+  voice?: EngineCharacter["voice"];
+};
+
+/**
+ * Story metadata for list views.
+ */
+export type StoryMeta = {
+  id: string;
+  userId: string | null;
+  worldSetting: string;
+  styleGuide: string;
+  orientation: string;
+  status: string;
+  sceneCount: number;
+  createdAt: Date;
+  updatedAt: Date;
+};
+
+/**
+ * Full story load result (maps back to Session structure).
+ */
+export type StoryLoadResult = {
+  story: {
+    id: string;
+    userId: string | null;
+    worldSetting: string;
+    styleGuide: string;
+    styleReferenceImage?: string;
+    orientation: "portrait" | "landscape";
+    storyState?: StoryState;
+    status: string;
+    createdAt: Date;
+    updatedAt: Date;
+  };
+  scenes: Array<{
+    id: string;
+    sceneKey?: string;
+    sceneSummary?: string;
+    imageUrl: string;
+    beats: EngineScene["beats"];
+    orientation?: "portrait" | "landscape";
+    sortOrder: number;
+    createdAt: Date;
+  }>;
+  characters: Array<{
+    name: string;
+    visualDescription?: string;
+    voiceDescription?: string;
+    portrait?: {
+      url?: string;
+      uuid?: string;
+    };
+    voice?: EngineCharacter["voice"];
+  }>;
+};
+
+// ── Repository ───────────────────────────────────────────────────────────
+
+/**
+ * Story Repository - encapsulates D1 access for story persistence.
+ *
+ * **Atomic save**: uses D1 batch transaction to ensure all-or-nothing writes.
+ * **Cascade delete**: relies on schema FK ON DELETE CASCADE.
+ * **Serialization**: beats and storyState are JSON-serialized to TEXT columns.
+ */
+export class StoryRepository {
+  constructor(private db: DbInstance) {}
+
+  /**
+   * Save a complete story session (story + scenes + characters) atomically.
+   * Uses D1 batch transaction - all writes succeed or all fail.
+   *
+   * @param input Story metadata
+   * @param sceneInputs Scene list (beats will be serialized)
+   * @param characterInputs Character list (voice will be serialized)
+   * @returns storyId on success
+   * @throws Error if D1 transaction fails
+   */
+  async save(
+    input: StorySaveInput,
+    sceneInputs: SceneSaveInput[],
+    characterInputs: CharacterSaveInput[],
+  ): Promise<{ storyId: string }> {
+    const now = new Date();
+
+    // Build story record
+    const storyRecord = {
+      id: input.id,
+      userId: input.userId ?? null,
+      worldSetting: input.worldSetting,
+      styleGuide: input.styleGuide,
+      styleReferenceImageKey: input.styleReferenceImage ?? null, // Phase 1: store data URI as-is; R2 upload TBD
+      orientation: input.orientation,
+      storyStateJson: input.storyState ? JSON.stringify(input.storyState) : null,
+      status: input.status ?? "active",
+      createdAt: now,
+      updatedAt: now,
+    };
+
+    // Build scene records (serialize beats to JSON)
+    const sceneRecords = sceneInputs.map((s, idx) => ({
+      id: s.id,
+      storyId: input.id,
+      sceneKey: s.sceneKey ?? null,
+      sceneSummary: s.sceneSummary ?? null,
+      sceneImageKey: null, // Phase 1: R2 upload TBD
+      sceneImageUrl: s.imageUrl,
+      beatsJson: JSON.stringify(s.beats),
+      sortOrder: s.sortOrder ?? idx,
+      createdAt: now,
+    }));
+
+    // Build character records (serialize voice to JSON, ensure uniqueness per story+name)
+    const characterRecords = characterInputs.map((c, idx) => ({
+      id: `${input.id}_char_${idx}`, // synthetic ID
+      storyId: input.id,
+      name: c.name,
+      visualDescription: c.visualDescription ?? null,
+      voiceDescription: c.voiceDescription ?? null,
+      basePortraitKey: null, // Phase 1: R2 upload TBD
+      basePortraitUrl: c.portrait?.url ?? null,
+      basePortraitUuid: c.portrait?.uuid ?? null,
+      voiceJson: c.voice ? JSON.stringify(c.voice) : null,
+      createdAt: now,
+    }));
+
+    // Execute atomic batch transaction
+    await this.db.batch([
+      this.db.insert(stories).values(storyRecord).onConflictDoUpdate({
+        target: stories.id,
+        set: {
+          worldSetting: storyRecord.worldSetting,
+          styleGuide: storyRecord.styleGuide,
+          styleReferenceImageKey: storyRecord.styleReferenceImageKey,
+          orientation: storyRecord.orientation,
+          storyStateJson: storyRecord.storyStateJson,
+          status: storyRecord.status,
+          updatedAt: now,
+        },
+      }),
+      // Clear old scenes/characters (will cascade delete via FK)
+      this.db.delete(scenes).where(eq(scenes.storyId, input.id)),
+      this.db.delete(characters).where(eq(characters.storyId, input.id)),
+      // Insert new scenes/characters
+      ...sceneRecords.map((r) => this.db.insert(scenes).values(r)),
+      ...characterRecords.map((r) => this.db.insert(characters).values(r)),
+    ]);
+
+    return { storyId: input.id };
+  }
+
+  /**
+   * Load a complete story by ID, reconstructing Session shape.
+   *
+   * @param storyId Story primary key
+   * @returns StoryLoadResult with deserialized beats/storyState, or null if not found
+   */
+  async findById(storyId: string): Promise<StoryLoadResult | null> {
+    const [storyRow] = await this.db
+      .select()
+      .from(stories)
+      .where(eq(stories.id, storyId))
+      .limit(1);
+
+    if (!storyRow) return null;
+
+    const sceneRows = await this.db
+      .select()
+      .from(scenes)
+      .where(eq(scenes.storyId, storyId))
+      .orderBy(scenes.sortOrder);
+
+    const characterRows = await this.db
+      .select()
+      .from(characters)
+      .where(eq(characters.storyId, storyId));
+
+    return {
+      story: {
+        id: storyRow.id,
+        userId: storyRow.userId,
+        worldSetting: storyRow.worldSetting,
+        styleGuide: storyRow.styleGuide,
+        styleReferenceImage: storyRow.styleReferenceImageKey ?? undefined,
+        orientation: storyRow.orientation as "portrait" | "landscape",
+        storyState: storyRow.storyStateJson
+          ? (JSON.parse(storyRow.storyStateJson) as StoryState)
+          : undefined,
+        status: storyRow.status,
+        createdAt: storyRow.createdAt,
+        updatedAt: storyRow.updatedAt,
+      },
+      scenes: sceneRows.map((s) => ({
+        id: s.id,
+        sceneKey: s.sceneKey ?? undefined,
+        sceneSummary: s.sceneSummary ?? undefined,
+        imageUrl: s.sceneImageUrl ?? "", // CR-5: nullable column, fallback to empty string
+        beats: s.beatsJson ? JSON.parse(s.beatsJson) : [],
+        orientation: s.sceneImageUrl ? undefined : undefined, // Phase 1: no per-scene orientation in schema
+        sortOrder: s.sortOrder,
+        createdAt: s.createdAt,
+      })),
+      characters: characterRows.map((c) => ({
+        name: c.name,
+        visualDescription: c.visualDescription ?? undefined,
+        voiceDescription: c.voiceDescription ?? undefined,
+        portrait: c.basePortraitUrl
+          ? { url: c.basePortraitUrl, uuid: c.basePortraitUuid ?? undefined }
+          : undefined,
+        voice: c.voiceJson ? JSON.parse(c.voiceJson) : undefined,
+      })),
+    };
+  }
+
+  /**
+   * List story metadata for a given user, ordered by most recent first.
+   *
+   * @param userId User ID (or anonymous sessionId in Phase 1)
+   * @param limit Max stories to return (default 50)
+   * @returns Array of StoryMeta
+   */
+  async listByUser(userId: string, limit = 50): Promise<StoryMeta[]> {
+    const storyRows = await this.db
+      .select()
+      .from(stories)
+      .where(eq(stories.userId, userId))
+      .orderBy(desc(stories.updatedAt))
+      .limit(limit);
+
+    if (storyRows.length === 0) return [];
+
+    // CR-10: batch scene count in 2 queries total (not N+1)
+    const storyIds = storyRows.map((r) => r.id);
+    const countRows = await this.db
+      .select({ storyId: scenes.storyId, count: sql<number>`count(*)` })
+      .from(scenes)
+      .where(inArray(scenes.storyId, storyIds))
+      .groupBy(scenes.storyId);
+
+    const countMap = new Map(countRows.map((r) => [r.storyId, r.count]));
+
+    return storyRows.map((row) => ({
+      id: row.id,
+      userId: row.userId,
+      worldSetting: row.worldSetting,
+      styleGuide: row.styleGuide,
+      orientation: row.orientation,
+      status: row.status,
+      sceneCount: countMap.get(row.id) ?? 0,
+      createdAt: row.createdAt,
+      updatedAt: row.updatedAt,
+    }));
+  }
+
+  /**
+   * Delete a story and all associated scenes/characters (cascade via FK).
+   *
+   * @param storyId Story primary key
+   * @returns true if deleted, false if not found
+   */
+  async delete(storyId: string): Promise<boolean> {
+    const result = await this.db.delete(stories).where(eq(stories.id, storyId));
+    // Drizzle D1 delete returns { success, meta: { changes }, results }
+    return ((result as any).meta?.changes ?? 0) > 0;
+  }
+}
@@ -0,0 +1,123 @@
+import { sqliteTable, text, integer, index, uniqueIndex } from "drizzle-orm/sqlite-core";
+import { sql } from "drizzle-orm";
+
+// ── Stories ──────────────────────────────────────────────────────────────
+// User story sessions (REQ-4). Each story contains multiple scenes and characters.
+export const stories = sqliteTable(
+  "stories",
+  {
+    id: text("id").primaryKey(), // s_xxx session ID
+    userId: text("user_id"), // nullable - Phase 1 uses anonymous sessionId
+    worldSetting: text("world_setting").notNull(),
+    styleGuide: text("style_guide").notNull(),
+    styleReferenceImageKey: text("style_reference_image_key"), // R2 key (optional)
+    orientation: text("orientation").notNull().default("landscape"), // "portrait" | "landscape"
+    storyStateJson: text("story_state_json"), // JSON: StoryState
+    status: text("status").notNull().default("active"), // "active" | "archived"
+    createdAt: integer("created_at", { mode: "timestamp" })
+      .notNull()
+      .default(sql`(unixepoch())`),
+    updatedAt: integer("updated_at", { mode: "timestamp" })
+      .notNull()
+      .default(sql`(unixepoch())`)
+      .$onUpdate(() => new Date()),
+  },
+  (table) => ({
+    userIdIdx: index("stories_user_id_idx").on(table.userId),
+    createdAtIdx: index("stories_created_at_idx").on(table.createdAt),
+  }),
+);
+
+// ── Scenes ───────────────────────────────────────────────────────────────
+// Story scenes (REQ-4). Beats stored as JSON blob (not separate table).
+export const scenes = sqliteTable(
+  "scenes",
+  {
+    id: text("id").primaryKey(),
+    storyId: text("story_id")
+      .notNull()
+      .references(() => stories.id, { onDelete: "cascade" }),
+    sceneKey: text("scene_key"), // e.g. "classroom-dusk"
+    sceneSummary: text("scene_summary"),
+    sceneImageKey: text("scene_image_key"), // R2 key (optional)
+    sceneImageUrl: text("scene_image_url"), // Runware CDN URL (primary)
+    beatsJson: text("beats_json"), // JSON: Beat[] - whole scene beats graph
+    sortOrder: integer("sort_order").notNull(), // scene sequence in story
+    createdAt: integer("created_at", { mode: "timestamp" })
+      .notNull()
+      .default(sql`(unixepoch())`),
+  },
+  (table) => ({
+    storyIdIdx: index("scenes_story_id_idx").on(table.storyId),
+  }),
+);
+
+// ── Characters ───────────────────────────────────────────────────────────
+// Story characters (REQ-4). Each character belongs to a story.
+export const characters = sqliteTable(
+  "characters",
+  {
+    id: text("id").primaryKey(),
+    storyId: text("story_id")
+      .notNull()
+      .references(() => stories.id, { onDelete: "cascade" }),
+    name: text("name").notNull(),
+    visualDescription: text("visual_description"),
+    voiceDescription: text("voice_description"),
+    basePortraitKey: text("base_portrait_key"), // R2 key (optional)
+    basePortraitUrl: text("base_portrait_url"), // CDN URL (primary)
+    basePortraitUuid: text("base_portrait_uuid"), // image service UUID
+    voiceJson: text("voice_json"), // JSON: CharacterVoice
+    createdAt: integer("created_at", { mode: "timestamp" })
+      .notNull()
+      .default(sql`(unixepoch())`),
+  },
+  (table) => ({
+    storyNameIdx: uniqueIndex("characters_story_name_idx").on(
+      table.storyId,
+      table.name,
+    ),
+  }),
+);
+
+// ── Featured Stories ─────────────────────────────────────────────────────
+// Featured story cards displayed on homepage (REQ-5).
+export const featuredStories = sqliteTable(
+  "featured_stories",
+  {
+    id: text("id").primaryKey(), // e.g. "m0", "f12"
+    gender: text("gender").notNull(), // "male" | "female"
+    title: text("title").notNull(),
+    outline: text("outline").notNull(),
+    style: text("style").notNull(),
+    tags: text("tags").notNull(), // JSON array
+    coverPath: text("cover_path").notNull(), // e.g. "/home/m0.webp"
+    firstactPath: text("firstact_path").notNull(), // e.g. "/home/firstact/m0.json"
+    firstscenePath: text("firstscene_path"), // e.g. "/home/firstscene/m0.webp"
+    sortOrder: integer("sort_order").notNull().default(0),
+    isActive: integer("is_active").notNull().default(1), // 1 = active, 0 = inactive
+    clickCount: integer("click_count").notNull().default(0),
+    createdAt: integer("created_at", { mode: "timestamp" })
+      .notNull()
+      .default(sql`(unixepoch())`),
+  },
+  (table) => ({
+    genderActiveIdx: index("featured_gender_active_idx").on(
+      table.gender,
+      table.isActive,
+    ),
+  }),
+);
+
+// ── Type exports ─────────────────────────────────────────────────────────
+export type Story = typeof stories.$inferSelect;
+export type NewStory = typeof stories.$inferInsert;
+
+export type Scene = typeof scenes.$inferSelect;
+export type NewScene = typeof scenes.$inferInsert;
+
+export type Character = typeof characters.$inferSelect;
+export type NewCharacter = typeof characters.$inferInsert;
+
+export type FeaturedStory = typeof featuredStories.$inferSelect;
+export type NewFeaturedStory = typeof featuredStories.$inferInsert;
@@ -1,90 +0,0 @@
-import { chat } from "@infiplot/ai-client";
-import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
-import { parseJsonLoose } from "../jsonParser";
-import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
-
-// ──────────────────────────────────────────────────────────────────────
-//  Architect agent — ONE LLM call at session start.
-//
-//  Expands the user's (often terse) world + style prompt into a real story
-//  bible: a second-person protagonist with a want and a flaw, a single
-//  central dramatic question (logline), a genre frame that anchors the
-//  爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
-//  intentional cast. Seeds the StoryState that the Writer reads and updates
-//  every scene — so the story has a spine from beat one instead of being
-//  improvised cold.
-//
-//  Everything is best-effort coerced with fallbacks: a malformed LLM
-//  response can never abort session start — worst case the Writer just gets
-//  a thinner bible and improvises more.
-// ──────────────────────────────────────────────────────────────────────
-
-type RawStoryState = {
-  logline?: unknown;
-  genreTags?: unknown;
-  protagonist?: unknown;
-  castNotes?: unknown;
-  synopsis?: unknown;
-  openThreads?: unknown;
-  relationships?: unknown;
-  nextHook?: unknown;
-};
-
-function str(raw: unknown): string {
-  return typeof raw === "string" ? raw.trim() : "";
-}
-
-function strArray(raw: unknown): string[] | undefined {
-  if (!Array.isArray(raw)) return undefined;
-  const out = raw
-    .map((x) => (typeof x === "string" ? x.trim() : ""))
-    .filter((x) => x.length > 0);
-  return out.length > 0 ? out : undefined;
-}
-
-export async function runArchitect(
-  config: ProviderConfig,
-  session: Session,
-): Promise<StoryState> {
-  try {
-    const raw = await chat(
-      config,
-      [
-        { role: "system", content: ARCHITECT_SYSTEM },
-        { role: "user", content: buildArchitectUserMessage(session) },
-      ],
-      { temperature: 0.85, tag: "architect" },
-    );
-
-    const parsed = parseJsonLoose<RawStoryState>(raw);
-
-    return {
-      // Stable spine — fall back to the raw world/style prompt so the bible is
-      // never wholly empty even if the model returns garbage.
-      logline: str(parsed.logline) || session.worldSetting,
-      genreTags: str(parsed.genreTags),
-      protagonist:
-        str(parsed.protagonist) ||
-        "你是这个故事的主角（第二人称视角，永不出现在画面里）。",
-      castNotes: str(parsed.castNotes) || undefined,
-      // Volatile seeds — the opening Writer will rewrite these via its patch.
-      synopsis: str(parsed.synopsis) || "故事即将开始。",
-      openThreads: strArray(parsed.openThreads),
-      relationships: strArray(parsed.relationships),
-      nextHook: str(parsed.nextHook) || undefined,
-    };
-  } catch (err) {
-    // chat() or parseJsonLoose() can throw (network / unrepairable JSON).
-    // The Architect is best-effort: never let it abort session start — return
-    // a minimal bible seeded from the raw prompt and let the Writer improvise.
-    const msg = err instanceof Error ? err.message : String(err);
-    console.error(`[architect] failed, using minimal bible: ${msg}`);
-    return {
-      logline: session.worldSetting,
-      genreTags: "",
-      protagonist:
-        "你是这个故事的主角（第二人称视角，永不出现在画面里）。",
-      synopsis: "故事即将开始。",
-    };
-  }
-}
@@ -7,6 +7,7 @@ import {
 } from "@infiplot/tts-client";
 import type {
  Character,
+  CharacterIntent,
  CharacterVoice,
  EngineConfig,
  Session,
@@ -55,6 +56,7 @@ async function runDesignLLM(
  config: EngineConfig,
  session: Session,
  charName: string,
+  intent?: CharacterIntent,
 ): Promise<CharacterDesignOutput> {
  const raw = await chat(
    config.text,
@@ -62,12 +64,20 @@ async function runDesignLLM(
      { role: "system", content: buildCharacterDesignerSystem({ stepfun: stepfunEnabled(config) }) },
      {
        role: "user",
-        content: buildCharacterDesignerUserMessage(charName, session),
+        content: buildCharacterDesignerUserMessage(charName, session, intent),
      },
    ],
    { temperature: 0.7, tag: "character-designer" },
  );
-  return parseJsonLoose<CharacterDesignOutput>(raw);
+  // parseJsonLoose can throw on irreparable JSON; degrade to an empty card so
+  // designCharacterCard's fallbacks (name-inference voice, no portrait) kick in.
+  try {
+    return parseJsonLoose<CharacterDesignOutput>(raw);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(`[characterDesigner] design JSON parse failed for ${charName}: ${msg}`);
+    return {};
+  }
 }

 /** True when the server's TTS config points at StepFun (so the CharacterDesigner
@@ -155,9 +165,10 @@ export async function designCharacterCard(
  config: EngineConfig,
  session: Session,
  charName: string,
+  intent?: CharacterIntent,
 ): Promise<CharacterCard> {
  const tDesign = Date.now();
-  const design = await runDesignLLM(config, session, charName);
+  const design = await runDesignLLM(config, session, charName, intent);
  tlog(`[charDesigner ${charName}] design LLM`, tDesign);

  // Drop invalid catalog picks before they reach provision/synth. A hallucinated
@@ -1,22 +1,19 @@
-import { chat } from "@infiplot/ai-client";
+import { chatStream } from "@infiplot/ai-client";
 import type {
  Beat,
  BeatActiveCharacter,
  BeatChoice,
  BeatChoiceEffect,
  BeatNext,
+  ChatStreamResult,
  ProviderConfig,
  Session,
  StoryStatePatch,
  WriterPlan,
+  WriterScenePlan,
 } from "@infiplot/types";
 import { parseJsonLoose } from "../jsonParser";
-import {
-  WRITER_BEATS_SYSTEM,
-  WRITER_PLAN_SYSTEM,
-  buildWriterBeatsUserMessage,
-  buildWriterPlanUserMessage,
-} from "../prompts";
+import { buildWriterStreamMessages } from "../prompts";

 // ──────────────────────────────────────────────────────────────────────
 //  Writer agent — owns the narrative half of scene generation, in TWO phases.
@@ -353,8 +350,9 @@ function coerceStringArray(raw: unknown): string[] | undefined {

 // Pull the volatile story-memory rewrite out of the Writer's JSON. Only
 // non-empty fields are kept; an all-empty/absent patch returns undefined so
-// the director leaves the carried StoryState untouched.
-function coerceStoryStatePatch(
+// the director leaves the carried StoryState untouched. Exported so the
+// prose splitter can reuse it to parse the <story> segment's <memory> block.
+export function coerceStoryStatePatch(
  raw: RawStoryStatePatch | undefined,
 ): StoryStatePatch | undefined {
  if (!raw || typeof raw !== "object") return undefined;
@@ -409,110 +407,7 @@ function renameBeatId(beats: Beat[], from: string, to: string): Beat[] {
  });
 }

-// ── Phase A — plan the scene skeleton. Fast (small output): just enough for
-// the Cinematographer + character design + Painter to start before the
-// dialogue exists. The cast is unioned with the entry roster/speaker so a
-// character named in the entry but omitted from `cast` still gets designed.
-export async function runWriterPlan(
-  config: ProviderConfig,
-  session: Session,
-): Promise<WriterPlan> {
-  const raw = await chat(
-    config,
-    [
-      { role: "system", content: WRITER_PLAN_SYSTEM },
-      { role: "user", content: buildWriterPlanUserMessage(session) },
-    ],
-    { temperature: 0.9, tag: "writer-plan" },
-  );
-
-  const parsed = parseJsonLoose<RawPlan>(raw);
-
-  const entryActiveCharacters =
-    coerceActiveCharacters(parsed.entryActiveCharacters) ?? [];
-
-  // Normalize POV variants → "你"; NPC names pass through. "你" is a valid entry
-  // speaker (Pattern B — player talking), but is never a designed cast member.
-  const rawEntrySpeaker = parsed.entrySpeaker?.trim() || undefined;
-  const entrySpeaker = rawEntrySpeaker
-    ? normalizeSpeakerName(rawEntrySpeaker)
-    : undefined;
-
-  const cast = coerceCast(parsed.cast);
-  const castSet = new Set(cast);
-  const addToCast = (name: string): void => {
-    if (!isPovName(name) && !castSet.has(name)) {
-      castSet.add(name);
-      cast.push(name);
-    }
-  };
-  for (const c of entryActiveCharacters) addToCast(c.name);
-  if (entrySpeaker) addToCast(entrySpeaker);
-
-  return {
-    sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
-    sceneKey: normalizeSceneKey(parsed.sceneKey),
-    entryBeatId: parsed.entryBeatId?.trim() || "b1",
-    cast,
-    entryActiveCharacters,
-    entrySpeaker,
-  };
-}
-
-// ── Phase B — expand the plan into the full beats[] graph + storyStatePatch.
-// Overlapped with the image pipeline by the director. The plan's entry id is
-// pinned onto a real beat so the already-painted entry frame resolves.
-export async function runWriterBeats(
-  config: ProviderConfig,
-  session: Session,
-  plan: WriterPlan,
-): Promise<WriterBeatsOutput> {
-  const raw = await chat(
-    config,
-    [
-      { role: "system", content: WRITER_BEATS_SYSTEM },
-      { role: "user", content: buildWriterBeatsUserMessage(session, plan) },
-    ],
-    { temperature: 0.9, tag: "writer-beats" },
-  );
-
-  const parsed = parseJsonLoose<RawBeats>(raw);
-  const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
-  if (rawBeats.length === 0) {
-    throw new Error("Writer (beats) returned no beats");
-  }
-
-  let beats = ensureUniqueChoiceIds(
-    repairBeats(
-      ensureUniqueBeatIds(
-        rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
-      ),
-    ),
-  );
-
-  // The Painter already composed the entry frame from plan.entryBeatId + its
-  // roster, so the scene's entry MUST resolve to that id. If Phase B ignored
-  // it, rename the first beat to it (no collision — id is absent by the guard).
-  if (!beats.some((b) => b.id === plan.entryBeatId)) {
-    beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
-  }
-
-  // 把入场 beat 的 roster 钉成 plan 的：画师合成进帧的正是
-  // plan.entryActiveCharacters，运行时入场 beat 必须显示同一批人（与上面钉
-  // id 同理）。speaker 故意不钉——它和 line/TTS 耦合，强行覆盖会错配台词。
-  const entryRoster =
-    plan.entryActiveCharacters.length > 0 ? plan.entryActiveCharacters : undefined;
-  beats = beats.map((b) =>
-    b.id === plan.entryBeatId ? { ...b, activeCharacters: entryRoster } : b,
-  );
-
-  return {
-    beats,
-    storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
-  };
-}
-
-// Phase B fallback — when runWriterBeats fails entirely, keep the scene
+// Fallback — when the Writer stream fails to yield usable beats, keep the scene
 // playable with a single entry beat synthesized from the plan: narrate the
 // planned summary and offer one change-scene exit so the player can advance.
 export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {
@@ -532,3 +427,156 @@ export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {

 // Re-export POV constants for downstream filters (director's orphan voices).
 export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };
+
+// ──────────────────────────────────────────────────────────────────────
+//  Paradigm D — single-pass streaming Writer
+// ──────────────────────────────────────────────────────────────────────
+
+/**
+ * Streaming Writer: single LLM call producing `<plan>/<story>/<choices>`
+ * tagged output. The caller (director) feeds the textStream to StreamRouter
+ * which dispatches downstream agents as tags close.
+ *
+ * Uses `chatStream` (Task 2) + `buildWriterStreamUserMessage` (ContextProvider).
+ * Temperature and tag mirror the existing chat() calls.
+ */
+export function runWriterStream(
+  config: ProviderConfig,
+  session: Session,
+): ChatStreamResult {
+  return chatStream(
+    config,
+    buildWriterStreamMessages(session),
+    { temperature: 0.9, tag: "writer-stream" },
+  );
+}
+
+/**
+ * Coerce a raw parsed plan (from StreamRouter's `<plan>` segment) into a
+ * clean WriterScenePlan. Reuses the existing Phase A coercion pipeline.
+ */
+export function coercePlanFromRaw(raw: Record<string, unknown>): WriterScenePlan {
+  const entryActiveCharacters =
+    coerceActiveCharacters(raw.entryActiveCharacters as RawActiveCharacter[]) ?? [];
+
+  const rawEntrySpeaker =
+    typeof raw.entrySpeaker === "string" ? raw.entrySpeaker.trim() : undefined;
+  const entrySpeaker = rawEntrySpeaker
+    ? normalizeSpeakerName(rawEntrySpeaker)
+    : undefined;
+
+  const cast = coerceCast(raw.cast);
+  const castSet = new Set(cast);
+  const addToCast = (name: string): void => {
+    if (!isPovName(name) && !castSet.has(name)) {
+      castSet.add(name);
+      cast.push(name);
+    }
+  };
+  for (const c of entryActiveCharacters) addToCast(c.name);
+  if (entrySpeaker) addToCast(entrySpeaker);
+
+  const characterIntents = Array.isArray(raw.characterIntents)
+    ? (raw.characterIntents as Array<Record<string, unknown>>)
+        .filter((ci) => typeof ci.name === "string" && (ci.name as string).trim())
+        .map((ci) => ({
+          name: (ci.name as string).trim(),
+          mood: typeof ci.mood === "string" ? ci.mood.trim() || undefined : undefined,
+          motivation:
+            typeof ci.motivation === "string"
+              ? ci.motivation.trim() || undefined
+              : undefined,
+          speakingTone:
+            typeof ci.speakingTone === "string"
+              ? ci.speakingTone.trim() || undefined
+              : undefined,
+        }))
+    : undefined;
+
+  // Story bible — first scene only. The Writer's <plan> includes a storyBible
+  // sub-object on the opening scene (replacing the old Architect call). Absent
+  // on subsequent scenes (the carried StoryState stays authoritative).
+  const rawBible = raw.storyBible as Record<string, unknown> | undefined;
+  let storyBible: WriterScenePlan["storyBible"];
+  if (rawBible && typeof rawBible === "object") {
+    const logline = typeof rawBible.logline === "string" ? rawBible.logline.trim() : "";
+    const genreTags = typeof rawBible.genreTags === "string" ? rawBible.genreTags.trim() : "";
+    const protagonist =
+      typeof rawBible.protagonist === "string" ? rawBible.protagonist.trim() : "";
+    const castNotes =
+      typeof rawBible.castNotes === "string" ? rawBible.castNotes.trim() || undefined : undefined;
+    // Only treat it as a real bible if at least one core field is present.
+    if (logline || genreTags || protagonist) {
+      storyBible = { logline, genreTags, protagonist, castNotes };
+    }
+  }
+
+  return {
+    sceneSummary:
+      typeof raw.sceneSummary === "string"
+        ? raw.sceneSummary.trim() || "未指定场景概要"
+        : "未指定场景概要",
+    sceneKey: normalizeSceneKey(
+      typeof raw.sceneKey === "string" ? raw.sceneKey : undefined,
+    ),
+    entryBeatId:
+      typeof raw.entryBeatId === "string"
+        ? raw.entryBeatId.trim() || "b1"
+        : "b1",
+    cast,
+    entryActiveCharacters,
+    entrySpeaker,
+    characterIntents,
+    storyBible,
+  };
+}
+
+/**
+ * Coerce raw beats into clean Beat[] + optional StoryStatePatch. Called by
+ * proseSplitter (散文→RawBeat[]) and as fallback for degraded streams.
+ * Reuses the full pipeline: coerceBeat → ensureUniqueBeatIds → repairBeats →
+ * ensureUniqueChoiceIds → entry-id pinning.
+ */
+export function coerceBeatsFromRaw(
+  raw: unknown,
+  plan: WriterScenePlan,
+): WriterBeatsOutput {
+  // Input can be a bare RawBeat[] or { beats, storyStatePatch } wrapper.
+  let rawBeats: RawBeat[] = [];
+  let rawPatch: RawStoryStatePatch | undefined;
+
+  if (Array.isArray(raw)) {
+    rawBeats = raw;
+  } else if (raw && typeof raw === "object") {
+    const obj = raw as Record<string, unknown>;
+    rawBeats = Array.isArray(obj.beats) ? (obj.beats as RawBeat[]) : [];
+    rawPatch = obj.storyStatePatch as RawStoryStatePatch | undefined;
+  }
+
+  if (rawBeats.length === 0) {
+    return { beats: synthesizeFallbackBeats(plan), storyStatePatch: undefined };
+  }
+
+  let beats = ensureUniqueChoiceIds(
+    repairBeats(
+      ensureUniqueBeatIds(
+        rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
+      ),
+    ),
+  );
+
+  if (!beats.some((b) => b.id === plan.entryBeatId)) {
+    beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
+  }
+
+  const entryRoster =
+    plan.entryActiveCharacters.length > 0 ? plan.entryActiveCharacters : undefined;
+  beats = beats.map((b) =>
+    b.id === plan.entryBeatId ? { ...b, activeCharacters: entryRoster } : b,
+  );
+
+  return {
+    beats,
+    storyStatePatch: coerceStoryStatePatch(rawPatch),
+  };
+}
@@ -0,0 +1,290 @@
+import type { Session, Character } from "@infiplot/types";
+import {
+  renderStoryStateSpine,
+  renderStoryStateDynamic,
+  renderHistoryEntry,
+} from "../prompts";
+
+// ──────────────────────────────────────────────────────────────────────
+//  ContextProvider — data-driven segment registry.
+//
+//  Replaces the monolithic `buildWriterContextParts` (prompts.ts:425)
+//  with a registered list of segments, each rendered independently.
+//
+//  Invariants:
+//  - **SENTINEL append-only**: character-cards / sceneKeys / archived-
+//    history use a fixed header + "entries follow" sentinel line. Adding
+//    a character only APPENDS bytes; earlier bytes never shift. This is
+//    crucial for prompt prefix caching.
+//  - **stable / dynamic split**: stable segments form the cached prefix;
+//    dynamic segments are the suffix that changes every call. Mixing them
+//    would destroy cache hit rate.
+//  - **try/catch isolation**: a failing segment is skipped, not fatal.
+// ──────────────────────────────────────────────────────────────────────
+
+export type ContextSegment = {
+  id: string;
+  zone: "stable" | "dynamic";
+  order: number;
+  render: (session: Session) => string[];
+};
+
+// ── Stable segments ─────────────────────────────────────────────────
+
+const worldAndStyle: ContextSegment = {
+  id: "world-style",
+  zone: "stable",
+  order: 100,
+  render: (session) => {
+    const parts: string[] = [];
+    parts.push(`世界观：${session.worldSetting}`);
+    parts.push(`画风：${session.styleGuide}`);
+    if (session.playerName) {
+      parts.push(
+        `玩家名字：${session.playerName}（NPC 对话时用此名字称呼玩家；speaker 字段仍固定为 "你" 不变）`,
+      );
+    }
+    return parts;
+  },
+};
+
+const storySpine: ContextSegment = {
+  id: "story-spine",
+  zone: "stable",
+  order: 200,
+  render: (session) => [renderStoryStateSpine(session.storyState)],
+};
+
+function renderCharacterCard(c: Character): string[] {
+  const hasPersona =
+    c.persona || c.speakingStyle || c.sampleDialogue?.length || c.relationshipToPlayer;
+  if (!hasPersona) return [`- ${c.name}`];
+
+  const lines: string[] = [`- ${c.name}`];
+  if (c.persona) lines.push(`  设定：${c.persona}`);
+  if (c.personalityTraits?.length)
+    lines.push(`  性格：${c.personalityTraits.join("、")}`);
+  if (c.speakingStyle) lines.push(`  说话风格：${c.speakingStyle}`);
+  if (c.sampleDialogue?.length) {
+    lines.push(`  对白示例：`);
+    for (const d of c.sampleDialogue) lines.push(`    「${d}」`);
+  }
+  if (c.relationshipToPlayer)
+    lines.push(`  与玩家关系：${c.relationshipToPlayer}`);
+  return lines;
+}
+
+const characterCards: ContextSegment = {
+  id: "character-cards",
+  zone: "stable",
+  order: 300,
+  render: (session) => {
+    // SENTINEL: header + marker are byte-identical even when the list is
+    // empty. Adding a character only APPENDS bytes — never shifts earlier.
+    const parts: string[] = [];
+    parts.push("已登记角色（speaker 必须用这些名字之一，或本场景新引入）：");
+    parts.push("（以下每行一个已登记角色，开场前为空。）");
+    for (const c of session.characters) {
+      parts.push(...renderCharacterCard(c));
+    }
+    return parts;
+  },
+};
+
+function collectPriorSceneKeys(session: Session): string[] {
+  const seen = new Set<string>();
+  for (const entry of session.history) {
+    const k = entry.scene.sceneKey;
+    if (k) seen.add(k);
+  }
+  return Array.from(seen);
+}
+
+const priorSceneKeys: ContextSegment = {
+  id: "prior-sceneKeys",
+  zone: "stable",
+  order: 400,
+  render: (session) => {
+    // SENTINEL pattern — same rationale as character-cards.
+    const parts: string[] = [];
+    parts.push("已使用的 sceneKey（同一物理空间请沿用，不要新造）：");
+    parts.push("（以下每行一个已用过的 sceneKey，开场前为空。）");
+    for (const k of collectPriorSceneKeys(session)) parts.push(`- ${k}`);
+    return parts;
+  },
+};
+
+const archivedHistory: ContextSegment = {
+  id: "archived-history",
+  zone: "stable",
+  order: 500,
+  render: (session) => {
+    // Only history[0..N-2] — the last entry is live (visitedBeatIds still
+    // growing, speculative prefetch sees different snapshots). Putting it
+    // here would corrupt prefix cache.
+    const archived = session.history.slice(0, -1);
+    const parts: string[] = [];
+    parts.push("场景历史（按时间顺序，已完结）：");
+    parts.push("（以下每段一幕已完结的场景，开场前为空。）");
+    archived.forEach((entry, idx) => {
+      parts.push(renderHistoryEntry(entry, idx + 1));
+    });
+    return parts;
+  },
+};
+
+const loreConstant: ContextSegment = {
+  id: "lore-constant",
+  zone: "stable",
+  order: 600,
+  render: (session) => {
+    if (!session.worldBooks?.length) return [];
+    const constant = session.worldBooks
+      .flatMap((book) => book.entries.filter((e) => e.position === "constant"))
+      .sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0))
+      .map((e) => e.content);
+    if (!constant.length) return [];
+    return [
+      "【世界设定 · 恒定知识】",
+      ...constant.map((c) => `- ${c}`),
+    ];
+  },
+};
+
+// ── Dynamic segments ────────────────────────────────────────────────
+
+const storyDynamic: ContextSegment = {
+  id: "story-dynamic",
+  zone: "dynamic",
+  order: 100,
+  render: (session) => [renderStoryStateDynamic(session.storyState)],
+};
+
+const lastBeat: ContextSegment = {
+  id: "last-beat",
+  zone: "dynamic",
+  order: 200,
+  render: (session) => {
+    const last = session.history.at(-1);
+    if (!last) return [];
+    const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
+    const beat = last.scene.beats.find((b) => b.id === lastBeatId);
+    if (!beat) return [];
+    const frag: string[] = [];
+    if (beat.narration) frag.push(`旁白：${beat.narration}`);
+    if (beat.line) frag.push(`${beat.speaker ?? "?"}：${beat.line}`);
+    if (!frag.length) return [];
+    return [
+      `上一刻（玩家停留的最后一个画面，新场景从这里的情绪无缝承接）：\n  ${frag.join(" / ")}`,
+    ];
+  },
+};
+
+const transitionHint: ContextSegment = {
+  id: "transition-hint",
+  zone: "dynamic",
+  order: 300,
+  render: (session) => {
+    if (session.history.length === 0) {
+      return [
+        "这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场设计出来——开场即抓人，别花笔墨铺垫世界观。",
+      ];
+    }
+    const last = session.history.at(-1);
+    const lastExit = last?.exit;
+    if (lastExit) {
+      if (lastExit.kind === "choice") {
+        return [
+          `承接「玩家在上一场选择了：${lastExit.label}」无缝续写下一个场景（转场命题：${lastExit.nextSceneSeed}）。开场要让玩家感到这正是上一步的结果，并延续此刻的情绪。`,
+        ];
+      }
+      return [
+        `承接「玩家自由动作：${lastExit.action}」无缝续写下一个场景，延续此刻的情绪与处境。`,
+      ];
+    }
+    return ["无缝续写下一个场景，延续上一刻的情绪。"];
+  },
+};
+
+const loreTriggered: ContextSegment = {
+  id: "lore-triggered",
+  zone: "dynamic",
+  order: 400,
+  render: (session) => {
+    if (!session.worldBooks?.length) return [];
+    const lastBeatText = getLastBeatText(session);
+    const triggered = session.worldBooks
+      .flatMap((book) => book.entries.filter((e) => e.position === "triggered"))
+      .filter((e) => e.keys.some((key) => lastBeatText.includes(key)))
+      .sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0))
+      .map((e) => e.content);
+    if (!triggered.length) return [];
+    return [
+      "【世界设定 · 情境激活】",
+      ...triggered.map((t) => `- ${t}`),
+    ];
+  },
+};
+
+/** Extract text from the last 3 beats for keyword matching (≤5000 chars). */
+function getLastBeatText(session: Session): string {
+  if (!session.history.length) return "";
+  const lastEntry = session.history[session.history.length - 1];
+  if (!lastEntry) return "";
+  const scene = lastEntry.scene;
+  const beats = scene?.beats || [];
+  const lastN = beats.slice(-3);
+  const text = lastN
+    .map((b) => [b.narration, b.line].filter(Boolean).join(" "))
+    .join(" ");
+  return text.slice(0, 5000);
+}
+
+// ── Registry ────────────────────────────────────────────────────────
+
+const defaultSegments: ContextSegment[] = [
+  worldAndStyle,
+  storySpine,
+  characterCards,
+  priorSceneKeys,
+  archivedHistory,
+  loreConstant,
+  storyDynamic,
+  lastBeat,
+  transitionHint,
+  loreTriggered,
+];
+
+export function buildWriterContext(
+  session: Session,
+  segments: ContextSegment[] = defaultSegments,
+): { stableParts: string[]; dynamicParts: string[] } {
+  const stable = segments
+    .filter((s) => s.zone === "stable")
+    .sort((a, b) => a.order - b.order);
+  const dynamic = segments
+    .filter((s) => s.zone === "dynamic")
+    .sort((a, b) => a.order - b.order);
+
+  const stableParts: string[] = [];
+  for (const seg of stable) {
+    try {
+      stableParts.push(...seg.render(session));
+      stableParts.push("");
+    } catch (err) {
+      console.warn(`[ContextProvider] segment "${seg.id}" render failed, skipped:`, err);
+    }
+  }
+
+  const dynamicParts: string[] = [];
+  for (const seg of dynamic) {
+    try {
+      dynamicParts.push(...seg.render(session));
+      dynamicParts.push("");
+    } catch (err) {
+      console.warn(`[ContextProvider] segment "${seg.id}" render failed, skipped:`, err);
+    }
+  }
+
+  return { stableParts, dynamicParts };
+}
@@ -2,15 +2,18 @@ import { chat } from "@infiplot/ai-client";
 import { coerceOrientation } from "@infiplot/types";
 import type {
  Beat,
+  BeatChoice,
  Character,
+  CharacterIntent,
  EngineConfig,
  InsertBeatPartial,
  ProviderConfig,
  Scene,
+  SceneStreamEvent,
  Session,
  StoryState,
  StoryStatePatch,
-  WriterPlan,
+  WriterScenePlan,
 } from "@infiplot/types";
 import type { CharacterCard } from "./agents/characterDesigner";
 import {
@@ -23,13 +26,14 @@ import { runCinematographer } from "./agents/cinematographer";
 import { runPainter } from "./agents/painter";
 import type { WriterBeatsOutput } from "./agents/writer";
 import {
+  coercePlanFromRaw,
  isPovName,
  normalizeSpeakerName,
  POV_DISPLAY_NAME,
-  runWriterBeats,
-  runWriterPlan,
-  synthesizeFallbackBeats,
+  runWriterStream,
 } from "./agents/writer";
+import { routeTaggedStream } from "./stream";
+import { splitProseToBeats } from "./stream/proseSplitter";
 import { parseJsonLoose } from "./jsonParser";
 import { INSERT_BEAT_SYSTEM, buildInsertBeatUserMessage } from "./prompts";

@@ -97,6 +101,14 @@ export function mergeCharacters(
      basePortraitUrl: u.basePortraitUrl ?? prev.basePortraitUrl,
      basePortraitUuid: u.basePortraitUuid ?? prev.basePortraitUuid,
      voiceDescription: u.voiceDescription || prev.voiceDescription,
+      // Paradigm D: preserve persona fields when later designs omit them
+      // (same logic as portrait/voice preservation).
+      persona: u.persona ?? prev.persona,
+      personalityTraits: u.personalityTraits ?? prev.personalityTraits,
+      speakingStyle: u.speakingStyle ?? prev.speakingStyle,
+      sampleDialogue: u.sampleDialogue ?? prev.sampleDialogue,
+      relationshipToPlayer: u.relationshipToPlayer ?? prev.relationshipToPlayer,
+      secrets: u.secrets ?? prev.secrets,
    });
  }
  return Array.from(byName.values());
@@ -157,6 +169,19 @@ export type SceneResult = {
  storyState: StoryState;
 };

+// Absolute-worst-case plan when the stream produced no usable <plan> at all
+// (StreamRouter degraded with no extractable plan). Keeps the pipeline alive.
+function minimalFallbackPlan(): WriterScenePlan {
+  return {
+    sceneSummary: "未指定场景概要",
+    sceneKey: undefined,
+    entryBeatId: "b1",
+    cast: [],
+    entryActiveCharacters: [],
+    entrySpeaker: undefined,
+  };
+}
+
 // ──────────────────────────────────────────────────────────────────────
 //  directScene — the multi-agent pipeline. Used by orchestrator's
 //  startSession and requestScene.
@@ -165,48 +190,89 @@ export type SceneResult = {
 export async function directScene(
  config: EngineConfig,
  session: Session,
+  emit?: (event: SceneStreamEvent) => void,
 ): Promise<SceneResult> {
  const tTotal = Date.now();

-  // ── Phase A — Writer PLAN (serial). The image pipeline needs the scene
-  // summary + entry roster + cast to start, but NOT the dialogue beats. This
-  // call is small (skeleton only), so it returns fast and unblocks everything.
-  const tPlan = Date.now();
-  const plan = await runWriterPlan(config.text, session);
-  tlog("[directScene] Phase A (plan)", tPlan);
+  // ══════════════════════════════════════════════════════════════════════
+  //  Paradigm D — single Writer stream + StreamRouter dispatch
+  //
+  //  One LLM call produces <plan> → <story> → <choices>. StreamRouter
+  //  cuts the tags; </plan> closure resolves the plan deferred, unlocking
+  //  the downstream image pipeline IN PARALLEL with the still-streaming
+  //  <story>. Prose is split into Beat[] after routing completes.
+  // ══════════════════════════════════════════════════════════════════════

-  // ── Phase B — Writer BEATS, launched NOW so its (longer) output overlaps the
-  // ENTIRE image pipeline below. Only needed to assemble the final Scene, so we
-  // await it last. A failure degrades to a single playable beat from the plan.
-  const tBeats = Date.now();
-  const beatsPromise: Promise<WriterBeatsOutput> = runWriterBeats(
-    config.text,
-    session,
-    plan,
-  )
-    .then((out) => {
-      tlog("[directScene] Phase B (beats)", tBeats);
-      return out;
-    })
-    .catch((err): WriterBeatsOutput => {
-      const msg = err instanceof Error ? err.message : String(err);
-      console.error(
-        `[directScene] Phase B (beats) failed, using fallback: ${msg}`,
-      );
-      return { beats: synthesizeFallbackBeats(plan), storyStatePatch: undefined };
-    });
+  // ── Step 1 — kick off the Writer stream + routing ─────────────────
+  const tStream = Date.now();
+  const writerResult = runWriterStream(config.text, session);
+
+  // Deferred that settles when onPlan fires (or when routing completes
+  // without a plan — degraded fallback).
+  let planSettled = false;
+  let resolvePlan!: (p: WriterScenePlan) => void;
+  const planPromise = new Promise<WriterScenePlan>((res) => {
+    resolvePlan = res;
+  });
+
+  // Closure-captured coerced plan so onStoryComplete can split+emit beats
+  // DURING streaming (before painter finishes → text-first progressive play).
+  let coercedPlanRef: WriterScenePlan | undefined;
+  let earlyBeatsOut: WriterBeatsOutput | undefined;
+  // Opening-scene story bible from the Writer's <plan> (replaces the old
+  // Architect). Undefined on subsequent scenes (carried StoryState wins).
+  let bibleFromPlan: WriterScenePlan["storyBible"];
+
+  const routingPromise = routeTaggedStream(writerResult.textStream, {
+    onPlan: (rawPlan) => {
+      try {
+        const coerced = coercePlanFromRaw(rawPlan as unknown as Record<string, unknown>);
+        coercedPlanRef = coerced;
+        if (coerced.storyBible) bibleFromPlan = coerced.storyBible;
+        planSettled = true;
+        emit?.({ type: "plan", plan: coerced });
+        resolvePlan(coerced);
+      } catch {
+        planSettled = true;
+        resolvePlan(minimalFallbackPlan());
+      }
+    },
+    onStoryComplete: (rawStory) => {
+      // Tags are ordered (plan before story), so the plan is already coerced.
+      const p = coercedPlanRef ?? minimalFallbackPlan();
+      try {
+        const out = splitProseToBeats(rawStory, p);
+        earlyBeatsOut = out;
+        for (const b of out.beats) emit?.({ type: "beat", beat: b });
+      } catch {
+        // split failure → Step 6 re-splits from rawStorySegment
+      }
+    },
+  }).then((result) => {
+    // If plan never fired (stream error / no plan tag), settle the deferred
+    // from the degraded extraction or a minimal fallback.
+    if (!planSettled) {
+      const extracted = result.plan
+        ? coercePlanFromRaw(result.plan as unknown as Record<string, unknown>)
+        : minimalFallbackPlan();
+      if (extracted.storyBible) bibleFromPlan = extracted.storyBible;
+      resolvePlan(extracted);
+    }
+    return result;
+  });
+
+  // ── Step 2 — await plan (settles at </plan> close — EARLY) ────────
+  const plan = await planPromise;
+  tlog("[directScene] plan (stream → </plan>)", tStream);
+
+  // From here the pipeline is structurally identical to the old Phase A
+  // flow: plan drives character design + cinematographer + painter, all
+  // overlapping with the Writer's still-streaming <story>.

-  // NEW characters to design come from the PLAN's cast (so design fires in
-  // parallel with Phase B, not after the beats are written). Existing
-  // characters keep their cards / portraits / voices across scenes.
  const newCharNames = plan.cast.filter(
    (n) => !session.characters.some((c) => c.name === n),
  );

-  // Entry-beat composition is the PLAN's (Phase B is constrained to honor it).
-  // The Painter needs a Beat-shaped object for reference collection, but the
-  // real beat isn't written until Phase B — so synthesize one from the plan
-  // (collectReferenceImages only reads speaker + activeCharacters).
  const entryBeatActive = plan.entryActiveCharacters;
  const entryBeatSpeaker = plan.entrySpeaker;
  const entryBeatForPaint: Beat = {
@@ -216,32 +282,30 @@ export async function directScene(
    next: { type: "continue", nextBeatId: plan.entryBeatId },
  };

-  // For sceneKey-based visual continuity, look up the prior matching scene's
-  // image to slot into Painter's referenceImages (max 4 of which include
-  // character portraits too).
  const { priorSceneReference, priorSceneKey } = pickPriorSceneReference(
    session,
    plan.sceneKey,
  );

-  // ── Stage 2 — character cards (LLM) ∥ Cinematographer ──────────────────
-  // Both are cheap LLM calls and neither needs the other's output, so they
-  // run concurrently. The cards give us each new character's visualDescription
-  // TEXT; portraits + voices are deferred to Stage 3 so they can overlap the
-  // paint instead of blocking it.
+  // ── Step 3 — character cards (LLM) ∥ Cinematographer (parallel) ───
+  // CharacterDesigner now receives the Writer's intent for each character
+  // (paradigm D: media translator, not inventor).
  const tParallel = Date.now();

+  const findIntent = (name: string): CharacterIntent | undefined =>
+    plan.characterIntents?.find((ci) => ci.name === name);
+
  const cardPromises = newCharNames.map((name) =>
-    designCharacterCard(config, session, name).catch((err): CharacterCard => {
-      const msg = err instanceof Error ? err.message : String(err);
-      console.error(`[directScene] designCharacterCard(${name}) failed: ${msg}`);
-      // Last-resort fallback: a name + generic voice card so the speaker isn't
-      // unknown. No visualDescription → no portrait is attempted for them.
-      return {
-        name,
-        voiceDescription: `请根据角色名「${name}」推断其性别、年龄与气质。所属世界观：${session.worldSetting}`,
-      };
-    }),
+    designCharacterCard(config, session, name, findIntent(name)).catch(
+      (err): CharacterCard => {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`[directScene] designCharacterCard(${name}) failed: ${msg}`);
+        return {
+          name,
+          voiceDescription: `请根据角色名「${name}」推断其性别、年龄与气质。所属世界观：${session.worldSetting}`,
+        };
+      },
+    ),
  );

  const cinemaPromise = runCinematographer(config.text, {
@@ -259,8 +323,6 @@ export async function directScene(
  ]);
  tlog("[directScene] CharacterCards+Cinematographer parallel", tParallel);

-  // Working registry: existing characters + new cards. visualDescription text
-  // is present now; portraits + voices fill in over the next two phases.
  let characters = mergeCharacters(
    session.characters,
    cards.map((c) => ({
@@ -270,11 +332,9 @@ export async function directScene(
    })),
  );

-  // ── Stage 3 — portraits + voices, scheduled around the Painter ─────────
+  // ── Step 4 — portraits + voices, scheduled around Painter ─────────
  const tProvision = Date.now();

-  // Entry-beat character names: the ONLY portraits the Painter references
-  // (collectReferenceImages slots in the entry beat's speaker + activeChars).
  const entryNames = new Set<string>();
  if (entryBeatSpeaker && !isPovName(entryBeatSpeaker)) {
    entryNames.add(entryBeatSpeaker);
@@ -288,8 +348,6 @@ export async function directScene(
    basePortraitUrl?: string;
    basePortraitUuid?: string;
  };
-  // Kick off portrait gen for every NEW char that has a visualDescription.
-  // Entry-beat portraits block the Painter; the rest overlap it.
  const entryPortraitPromises: Promise<NamedPortrait>[] = [];
  const restPortraitPromises: Promise<NamedPortrait>[] = [];
  for (const card of cards) {
@@ -308,42 +366,37 @@ export async function directScene(
  // On the StepFun path, thread the LLM-selected stepfunVoiceId from the card
  // into provision — it lets stepfunProvision honor the catalog pick instead
  // of falling back to the keyword scorer (same network cost: still zero).
-  // ALSO persist it onto the Character so the client can echo it back on a
-  // StepFun server (where it skips the ~220KB voice payload) and the server
-  // resolveVoice honors the LLM pick at synth time instead of re-scoring.
  const voicePromises = cards.map((card) =>
    provisionCharacterVoice(config, card.voiceDescription, card.name, {
      stepfunVoiceId: card.stepfunVoiceId,
    }).then(
-      (voice): Character => ({
-        name: card.name,
-        voiceDescription: card.voiceDescription,
-        voice,
-        stepfunVoiceId: card.stepfunVoiceId,
-      }),
+      (voice): Character => {
+        const result: Character = {
+          name: card.name,
+          voiceDescription: card.voiceDescription,
+          voice,
+          stepfunVoiceId: card.stepfunVoiceId,
+        };
+        if (voice) emit?.({ type: "voice", name: card.name, voice });
+        return result;
+      },
    ),
  );

-  // Block the Painter ONLY on entry-beat portraits (its referenceImages).
  const entryPortraits = await Promise.all(entryPortraitPromises);
  characters = mergeCharacters(
    characters,
    entryPortraits.map((p) => ({
      name: p.name,
-      voiceDescription: "", // preserved from the card by mergeCharacters
+      voiceDescription: "",
      basePortraitUrl: p.basePortraitUrl,
      basePortraitUuid: p.basePortraitUuid,
    })),
  );
  tlog("[directScene] entry-beat portraits", tProvision);

-  // ── Stage 4 — Painter (depends on cinemaOut + on-stage visual cards +
-  // entry portraits). On-stage = the plan's cast (everyone who'll appear),
-  // filtered to those now in the registry, so the archetype block covers them.
+  // ── Step 5 — Painter ──────────────────────────────────────────────
  const onStageCharacters = characters.filter((c) => plan.cast.includes(c.name));
-
-  // Session-locked orientation (set at session start). Threads into both the
-  // Painter prompt's framing rules and the generated image's pixel dimensions.
  const orientation = coerceOrientation(session.orientation);

  const tPainter = Date.now();
@@ -361,9 +414,11 @@ export async function directScene(
  );
  tlog("[directScene] Painter", tPainter);

-  // Fold in the work that overlapped the paint: remaining portraits + all
-  // voices. Awaited before returning so the session the client persists is
-  // fully provisioned for later scenes.
+  // Emit background as soon as it's painted — the client can swap the
+  // placeholder for the real scene image while beats/voices are still settling.
+  emit?.({ type: "background", imageUrl: painted.imageUrl, sceneKey: plan.sceneKey });
+
+  // Overlapped: rest portraits + voices
  const tOverlap = Date.now();
  const [restPortraits, voicedChars] = await Promise.all([
    Promise.all(restPortraitPromises),
@@ -381,20 +436,82 @@ export async function directScene(
  characters = mergeCharacters(characters, voicedChars);
  tlog("[directScene] overlapped portraits+voices", tOverlap);

-  // ── Await Phase B — it overlapped the whole image pipeline above. ──────
-  const beatsOut = await beatsPromise;
-  const beats = beatsOut.beats;
+  // ── Step 6 — await routing completion + split prose into beats ────
+  // routeTaggedStream ran concurrently with the entire image pipeline.
+  // onStoryComplete likely already fired (splitting + emitting beats for
+  // progressive playback); this await retrieves the final result + rawStorySegment.
+  const streamResult = await routingPromise;
+
+  // Reuse early-split beats when available (onStoryComplete path); otherwise
+  // split from rawStorySegment (degrade / onStoryComplete missed).
+  const beatsOut: WriterBeatsOutput = earlyBeatsOut
+    ?? splitProseToBeats(streamResult.rawStorySegment ?? "", plan);
+  let beats = beatsOut.beats;
+
+  // If earlyBeatsOut was missed but rawStorySegment is available, emit beats
+  // now (late but still before done — the client gets them for rendering).
+  if (!earlyBeatsOut && beats.length > 0) {
+    for (const b of beats) emit?.({ type: "beat", beat: b });
+  }
+
+  // Emit choices (from streamResult or from the last beat's choice exits).
+  if (streamResult.choices?.length) {
+    emit?.({ type: "choices", choices: streamResult.choices });
+  }
+
+  // ── C1-ext: merge <choices> segment into the last beat's `next` ────
+  // The Writer's <choices> segment produces scene-level exits that are NOT
+  // embedded in the beats graph. Attach them to the final beat so the player
+  // can actually pick them.
+  //
+  // IMPORTANT: Only change-scene exits are valid here. The prose paradigm
+  // assigns beat ids automatically (b1, b2, ...) in proseSplitter — the LLM
+  // has no knowledge of these ids, so any advance-beat targetBeatId it emits
+  // in <choices> will point at the wrong beat, causing a loop.
+  if (streamResult.choices?.length && beats.length > 0) {
+    const validChoices = streamResult.choices.filter(
+      (c): c is BeatChoice =>
+        typeof c.label === "string" &&
+        c.label.length > 0 &&
+        c.effect != null &&
+        c.effect.kind === "change-scene",
+    );
+    if (validChoices.length > 0) {
+      const withIds = validChoices.map((c, i) => ({
+        ...c,
+        id: c.id || `sc${i + 1}`,
+      }));
+      const lastIdx = beats.length - 1;
+      const last = beats[lastIdx]!;
+      const existing =
+        last.next.type === "choice" ? last.next.choices : [];
+      const isFallbackOnly =
+        existing.length <= 1 &&
+        existing.every((c) => c.label === "继续");
+      const merged = isFallbackOnly ? withIds : [...existing, ...withIds];
+      const seen = new Set<string>();
+      const deduped = merged.filter((c) => {
+        if (seen.has(c.label)) return false;
+        seen.add(c.label);
+        return true;
+      });
+      beats = beats.map((b, i) =>
+        i === lastIdx
+          ? { ...b, next: { type: "choice" as const, choices: deduped } }
+          : b,
+      );
+    }
+  }
+
+  if (streamResult.degraded) {
+    console.warn("[directScene] Writer stream was degraded — beats may be fallback");
+  }

-  // entryBeatId is guaranteed present (runWriterBeats pins it onto a beat), but
-  // keep the defensive fallback for the synthesized-fallback path.
  const entryBeatId = beats.some((b) => b.id === plan.entryBeatId)
    ? plan.entryBeatId
    : beats[0]!.id;

-  // Orphan-speaker voices: a beat speaker Phase B used that isn't in the
-  // registry. Should be rare — the prompt constrains speakers to the cast, and
-  // every cast member was provisioned above — so this is a defensive net,
-  // serial but skipped entirely (zero latency) in the common case.
+  // Orphan-speaker voices (defensive net — should be rare).
  const orphanSpeakers = [
    ...new Set(beats.map((b) => b.speaker).filter((n): n is string => Boolean(n))),
  ].filter((n) => !isPovName(n) && !characters.some((c) => c.name === n));
@@ -403,15 +520,14 @@ export async function directScene(
      orphanSpeakers.map((n) => provisionVoiceForName(config, session, n)),
    );
    characters = mergeCharacters(characters, orphanChars);
+    // Emit orphan voices so the client can preload their audio.
+    for (const oc of orphanChars) {
+      if (oc.voice) emit?.({ type: "voice", name: oc.name, voice: oc.voice });
+    }
  }

  const scene: Scene = {
    id: newSceneId(),
-    // scenePrompt is the cinematographer's English compositional output;
-    // the Writer's sceneSummary stays in the session log via beats[]/
-    // history. Keeping the original field name preserves compat with
-    // anything that already reads scene.scenePrompt (e.g., insert-beat
-    // user prompt).
    scenePrompt: cinemaOut.integratedPrompt,
    beats,
    entryBeatId,
@@ -421,11 +537,22 @@ export async function directScene(
    orientation,
  };

-  // Merge the Writer's volatile memory rewrite onto the carried bible so the
-  // throughline survives the next scene cut (orchestrator returns it; the
-  // client persists it back into the session).
+  // storyState: opening scene seeds the stable spine from the Writer's
+  // storyBible (replacing the old Architect); subsequent scenes carry the
+  // existing spine. Volatile fields always come from this scene's patch.
+  const baseStoryState: StoryState | undefined = session.storyState
+    ?? (bibleFromPlan
+      ? {
+          logline: bibleFromPlan.logline,
+          genreTags: bibleFromPlan.genreTags,
+          protagonist: bibleFromPlan.protagonist,
+          castNotes: bibleFromPlan.castNotes,
+          synopsis: "",
+        }
+      : undefined);
+
  const storyState = applyStoryStatePatch(
-    session.storyState,
+    baseStoryState,
    beatsOut.storyStatePatch,
  );

@@ -9,8 +9,8 @@ export {
 export { synthesizeBeat } from "./voice";
 export { mergeCharacters } from "./director";
 export type { SceneResult } from "./director";
-export { runArchitect } from "./agents/architect";
 export type { WriterBeatsOutput } from "./agents/writer";
 export type { CinematographerOutput } from "./agents/cinematographer";
 export type { InsertBeatPartial } from "@infiplot/types";
-export * from "./prompts";
+// Note: prompts.ts is NOT re-exported (server-only, used internally by agents)
+
@@ -8,6 +8,7 @@ import type {
  FreeformClassifyResponse,
  InsertBeatRequest,
  InsertBeatResponse,
+  SceneStreamEvent,
  Session,
  SceneRequest,
  SceneResponse,
@@ -19,7 +20,6 @@ import type {
 import { coerceOrientation } from "@infiplot/types";
 import { chat } from "@infiplot/ai-client";
 import { isStepfun, isValidStepfunVoiceId, provisionVoice } from "@infiplot/tts-client";
-import { runArchitect } from "./agents/architect";
 import { selectStyle } from "./agents/styleSelector";
 import { directInsertBeat, directScene } from "./director";
 import { STYLE_MAP } from "@/lib/options";
@@ -51,6 +51,7 @@ function tlog(label: string, t0: number): void {
 export async function startSession(
  config: EngineConfig,
  req: StartRequest,
+  emit?: (event: SceneStreamEvent) => void,
 ): Promise<StartResponse> {
  const tTotal = Date.now();

@@ -67,38 +68,32 @@ export async function startSession(
    language: req.language?.trim() || undefined,
  };

-  // Stage 0 — Architect (+ optional auto style selection, in parallel).
-  // Both only depend on worldSetting, so they run concurrently.
+  // Stage 0 — optional auto style selection. The story bible is no longer
+  // generated by a separate Architect call; the Writer's <plan> produces it
+  // on the opening scene (paradigm: Writer is the single content brain).
  console.log(
    `[start] worldSetting (${session.worldSetting.length} chars):\n${session.worldSetting}`,
  );
  const isAutoStyle = session.styleGuide === "auto";
  if (isAutoStyle) {
    session.styleGuide = "由 AI 根据剧情自动匹配最佳画风";
-  }
-  const tArchitect = Date.now();
-  const [architectResult, autoStyleGuide] = await Promise.all([
-    runArchitect(config.text, session),
-    isAutoStyle
-      ? selectStyle(config.text, session.worldSetting).catch((err) => {
-          console.warn(`[styleSelector] failed, falling back to 吉卜力:`, err);
-          return null;
-        })
-      : Promise.resolve(null),
-  ]);
-  session.storyState = architectResult;
-  if (isAutoStyle) {
+    const tStyle = Date.now();
+    const autoStyleGuide = await selectStyle(
+      config.text,
+      session.worldSetting,
+    ).catch((err) => {
+      console.warn(`[styleSelector] failed, falling back to 吉卜力:`, err);
+      return null;
+    });
    session.styleGuide = autoStyleGuide ?? STYLE_MAP["吉卜力"]!;
+    tlog("[start] StyleSelector", tStyle);
    console.log(`[start] auto-selected style: ${session.styleGuide.slice(0, 60)}…`);
  }
-  tlog("[start] Architect" + (isAutoStyle ? " + StyleSelector" : ""), tArchitect);
-  console.log(
-    `[start] storyBible: logline="${session.storyState.logline}" | genreTags="${session.storyState.genreTags}" | synopsis="${session.storyState.synopsis}"`,
-  );

  const { scene, sceneImageUrl, characters, storyState } = await directScene(
    config,
    session,
+    emit,
  );

  tlog("[start] TOTAL", tTotal);
@@ -119,12 +114,14 @@ export async function startSession(
 export async function requestScene(
  config: EngineConfig,
  req: SceneRequest,
+  emit?: (event: SceneStreamEvent) => void,
 ): Promise<SceneResponse> {
  const tTotal = Date.now();

  const { scene, sceneImageUrl, characters, storyState } = await directScene(
    config,
    req.session,
+    emit,
  );

  tlog("[scene] TOTAL", tTotal);
@@ -1,6 +1,7 @@
 import type {
  BeatActiveCharacter,
  Character,
+  CharacterIntent,
  Orientation,
  Scene,
  Session,
@@ -129,300 +130,22 @@ export function renderStoryStateDynamic(s: StoryState | undefined): string {
  return lines.join("\n");
 }

-// Back-compat for the Architect's own user message (it sees the full bible
-// at session start, no caching concern there yet).
-export function renderStoryState(s: StoryState | undefined): string {
-  if (!s) return "";
-  return renderStoryStateSpine(s) + "\n\n" + renderStoryStateDynamic(s);
-}
-
 // ──────────────────────────────────────────────────────────────────────
-//  0. Architect (总编剧) — ONE LLM call at session start.
-//
-//  Turns the (often terse) user world + style prompt into a real story
-//  bible: a second-person protagonist with a want and a flaw, a single
-//  central dramatic question, a genre frame that anchors the 爽点 rhythm,
-//  an engineered opening hook (前3秒冷开场), and a small intentional cast.
-//  Everything downstream — Writer, CharacterDesigner — reads this so the
-//  story has a spine from beat one instead of being improvised cold.
+//  Paradigm D — merged Writer (single-pass streaming with tagged output)
 // ──────────────────────────────────────────────────────────────────────

-export const ARCHITECT_SYSTEM = `你是一部交互视觉小说的「总编剧 / 故事架构师」。玩家只给了你一句到几句的世界观和画风，你要在开拍前把它扩写成一份**故事档案（story bible）**，为后续每一幕定下脊梁。你不写具体台词、不写分镜、不设计立绘——你只搭骨架。
+// Writer prompt has been refactored to segment-driven builder.
+// See lib/engine/prompts/segments/writer/ for individual prompt segments.
+// See lib/engine/prompts/registry.ts for segment registration.
+// See lib/engine/prompts/builder.ts for assembly logic.

-你深谙网文（番茄）、短剧（红果）与视觉小说（galgame）的爆款心法：
- **开篇即钩子**：黄金三章 / 前3秒法则。开场不铺垫世界观，直接抛出冲突、悬念或一个反常的瞬间。
- **代入感**：主角是第二人称「你」，是玩家的化身——要让玩家一进场就清楚"我是谁、我此刻卡在什么处境里、我想要什么"。
- **题材锚定爽点**：先选定一个清晰的题材框架（如 甜宠 / 校园暗恋 / 悬疑追凶 / 复仇逆袭 / 救赎治愈），它决定了情绪回报的节奏与类型。
- **戏剧问题**：整部故事由一个悬而未决的中心问题驱动（她到底是谁？你能否在记忆消失前查明真相？这场暗恋会走向哪里？）。
- **人设要鲜明且有反差**：每个核心角色一个强标签 + 一个反差面（外冷内热 / 傲娇 / 看似柔弱实则腹黑）。
-
-你要产出（全部用中文，except 不需要英文）：
- logline：一句话主线 / 中心戏剧问题，必须带钩子，让人想看下去
- genreTags：题材+基调标签，斜杠分隔，如 "甜宠 / 校园 / 慢热治愈带点伤感"
- protagonist：第二人称主角卡。包含：你是谁、你此刻正卡在什么具体处境里（要有即时张力）、你想要什么、一个软肋或秘密。50–120 字。
- castNotes：2–3 个核心配角，每行一个「名字：一句话人设（强标签+反差）+ 与你的关系/张力」。给真实好记的中文名字（不要"神秘女子"这种占位）。
- synopsis：开场此刻的情境梗概（故事尚未展开，就写"故事从……开始"），1–3 句。
- openThreads：开场就埋下的 1–3 个悬念/问题（数组）。
- nextHook：**第一幕**应当如何冷开场——具体描述开场那个抓人的瞬间/冲突（这会直接指导编剧写开场）。要画面感强、有张力。
-
-设计硬规则：
- 主角「你」永不出现在画面里（第二人称 POV），所以 castNotes 里**不要**把"你/主角"当成一个角色。
- 配角名字要符合世界观（年代、地域、文化）。
- 一切服从玩家给的世界观与画风，不要擅自跑题；玩家信息少时，做最贴合、最有戏的合理扩写。
-
-必须输出严格 JSON：
-{
-  "logline": "...",
-  "genreTags": "...",
-  "protagonist": "...",
-  "castNotes": "夏海：表面开朗的天台诗人，实则在用诗逃避家里的变故；与你是同班转学的邻座，对你有种说不清的在意。\\n班主任老周：…",
-  "synopsis": "...",
-  "openThreads": ["...", "..."],
-  "nextHook": "第一幕冷开场：……"
-}
-
-不要输出 JSON 以外的任何文本。`;
-
-export function buildArchitectUserMessage(session: Session): string {
-  const parts: string[] = [];
-  parts.push(`世界观：${session.worldSetting}`);
-  parts.push(`画风：${session.styleGuide}`);
-  if (session.playerName) {
-    parts.push(
-      `\n玩家名字：${session.playerName}\n（NPC 在对话中应自然地称呼玩家为「${session.playerName}」。「你」仍指代玩家视角，但 NPC 的台词里请使用这个名字而非泛称。不要为玩家设计立绘或音色——玩家是 POV 视角，永不出现在画面中。）`,
-    );
-  }
-  parts.push(
-    "\n请据此产出这部交互剧的故事档案（story bible），严格以 JSON 格式返回。",
-  );
-  const langDirective = buildLanguageDirective(session.language);
-  if (langDirective) parts.push(langDirective);
-  return parts.join("\n");
-}
-
-// ──────────────────────────────────────────────────────────────────────
-//  1. Writer (编剧) — drives the narrative, in TWO phases.
-//
-//  Phase A (WRITER_PLAN_SYSTEM): plans the scene SKELETON only — sceneSummary
-//    + sceneKey + entry-beat roster + the full cast. No dialogue. Its output
-//    is enough for the Cinematographer + character design + Painter to start.
-//  Phase B (WRITER_BEATS_SYSTEM): expands the plan into the full beats[] graph
-//    + storyStatePatch, overlapped with the (longer) image pipeline.
-//
-//  Neither phase designs characters (that's the CharacterDesigner's job) —
-//  Phase A only NAMES them in `cast` / `entryActiveCharacters`; the
-//  CharacterDesigner is invoked for any name not yet in session.characters.
-// ──────────────────────────────────────────────────────────────────────
-
-export const WRITER_PLAN_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第一步——场景规划**。你只产出本场景的「骨架」，**不要写任何 beat 台词**。你的产出会被立刻送去配图（分镜导演 + 生图），所以要快、要准、画面感要强。
-
-═══════════════════════════════════════════════════════════════════
-爆款心法（要在规划阶段就立住，后续展开才好看）
-═══════════════════════════════════════════════════════════════════
- **进场即钩子**：这一场开场就要抛出新信息 / 悬念 / 冲突 / 情绪冲击，别铺陈。把这个抓人的瞬间写进 sceneSummary。
- **兑现情绪**：按题材给观众想要的情绪（甜宠的心动、暗恋的拉扯、逆袭的扬眉、悬疑的真相一角）。
- **人设有反差**：每个角色一个强标签 + 一个反差面。
-
-═══════════════════════════════════════════════════════════════════
-连贯性铁律（跨场景切换不能跳戏 —— 最重要）
-═══════════════════════════════════════════════════════════════════
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接情绪、地点逻辑、人物状态与未收的悬念。
- 若给了「转场种子 nextSceneSeed」，把它当作"下一场的命题"去兑现，开场要让玩家感到"这正是我上一步的结果"。
- 沿用主线记忆里的人物关系与情绪温度，别让刚告白的人下一场形同陌路。
-
-本步你要规划（如实产出，缺一不可）：
- **sceneSummary**：当前场景的中文概要——地点 + 时间 + 氛围 + 关键事件 + 那个抓人的开场瞬间。这是分镜导演构图的**唯一依据**，要画面感强、信息足（2–4 句）。
- **sceneKey**：当前场景的英文 slug（如 "classroom-dusk"、"rooftop-night"）。
- **entryBeatId**：玩家进入场景时落在哪个 beat 的 id（通常就是 "b1"）。
- **cast**：本场景**会出场的全部 NPC 角色名**（字符串数组）。第二步写 beats 时**只能用这里列出的名字**，所以现在必须一次想全——谁会说话、谁会在画面里露面，全部列出。名字要与「已登记角色」**完全一致**；新角色起符合世界观的真名（不要"神秘女子"这种占位）。**绝不**包含玩家（你 / 我 / 主角 / protagonist / player / MC...）。
- **entrySpeaker**：入口 beat 由谁开口 —— 取值只有三种：① 某个 NPC 真名（必须在 cast 里）② "你"（玩家本人开口）③ 留空（纯旁白 / 环境开场）。这决定镜头语言，要选准。
- **entryActiveCharacters**：入口画面里**此刻出现的 NPC** 及其当下姿态 / 神情（中文 pose）。即使没人说话，画面里有谁也要列。**绝不**包含玩家。
-
-sceneKey 设计原则（用于跨场景视觉一致性）：
- 同一物理空间 + 同一时段 → 必须沿用**完全相同**的英文 slug
- 时段 / 空间变化时换 slug（"classroom-dusk" → "classroom-night" / "corridor-dusk"）
- slug 规范：lowercase-with-dashes，2–4 个英文单词
- 用户消息会列出已用过的 sceneKey，请优先**复用**这些已有 slug
-
-玩家视角硬规则（违反会破坏整个 galgame）：
- 玩家是第二人称 POV，**永远不出现在任何画面里**——entryActiveCharacters 的 name **绝不允许**是「玩家 / 你 / 我 / 主角 / protagonist / player / Player / MC / I / me」任何变体。
- entrySpeaker 只能是 NPC 真名 / "你" / 留空；其它 POV 变体一律视为错误。
-
-必须输出严格 JSON：
-{
-  "sceneSummary": "黄昏的天台，风很大。夏海背对你站在栏杆边，手里攥着一张揉皱的成绩单——她把你单独叫上来，却迟迟不开口。",
-  "sceneKey": "rooftop-dusk",
-  "entryBeatId": "b1",
-  "cast": ["夏海"],
-  "entrySpeaker": "夏海",
-  "entryActiveCharacters": [
-    { "name": "夏海", "pose": "背对你倚着栏杆，侧脸绷着，手里攥着揉皱的纸" }
-  ]
-}
-
-不要输出 JSON 以外的任何文本。`;
-
-// ──────────────────────────────────────────────────────────────────────
-//  Phase B — expands the plan into the full beats[] + storyStatePatch.
-// ──────────────────────────────────────────────────────────────────────
-
-export const WRITER_BEATS_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第二步——把已规划好的场景展开成完整剧本**。你会收到本场景的「规划」（场景概要 sceneSummary、sceneKey、入口 beat 的 id / speaker / 登场角色、以及本场景允许出场的角色名单 cast）。你的任务：基于规划写出玩家依次经历的对话节拍 beats，并在最后更新主线记忆。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度，这些由其他 agent 完成。
-
-你必须严格遵守收到的规划：
- 必须存在一个 id 等于规划 entryBeatId 的 beat，作为玩家入口。
- 该入口 beat 的 speaker 与登场角色（activeCharacters）要与规划一致（姿态措辞可微调，但**人物身份必须一致**）。
- speaker 与 activeCharacters 里的 NPC 名字**只能来自规划的 cast**（或玩家 "你"）——**不要引入规划之外的新角色**。
-
-═══════════════════════════════════════════════════════════════════
-爆款心法（番茄网文 / 红果短剧 / galgame 的叙事手感）—— 必须贯彻
-═══════════════════════════════════════════════════════════════════
- **每个场景都要有钩子**：开头 1–2 个 beat 内就抛出新信息、悬念、冲突或情绪冲击，绝不平铺直叙地交代背景；结尾 beat 留一个让玩家"想知道接下来"的扣子。
- **兑现爽点 / 情绪回报**：按题材给观众想要的情绪（甜宠的心动、暗恋的暧昧拉扯、逆袭的扬眉吐气、悬疑的真相一角）。让玩家这一场"有所得"。
- **反转与反差**：适时打破预期——以为是 A 结果是 B、角色露出与第一印象相反的一面；但反转要可信、要扣主线。
- **快节奏、入戏快**：进场即冲突，少铺陈，删掉一切"为完整而存在"却不推进情绪的对话。
- **show, don't tell**：用动作、神态、潜台词、环境细节传递情绪，别直接旁白"她很难过"——让玩家自己读出来。
- **人设鲜明有反差**：每个角色一个强标签 + 一个反差面，台词紧贴其腔调（傲娇嘴硬心软、外冷内热、看似柔弱实则强势）。
- **选择要有分量**：choice 只出现在真正的岔路口，每个选项都要让玩家感到"通向不同的东西"（情绪指向不同 / 关系走向不同），别给等价的废选项。
-
-═══════════════════════════════════════════════════════════════════
-连贯性铁律（跨场景切换不能跳戏 —— 最重要）
-═══════════════════════════════════════════════════════════════════
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接上一场的情绪、地点逻辑、人物状态与未收的悬念。
- 若给了「转场种子 nextSceneSeed」，把它当作"下一场的命题"去兑现，而不是另起炉灶；开场要让玩家感到"这正是我上一个动作 / 选择导致的结果"。
- 沿用主线记忆里的人物关系与情绪温度——别让刚告白的人下一场形同陌路，也别凭空遗忘已埋的伏笔。
- 推进、但别重置：每一场都让主线问题往前走一点（关系变化 / 真相揭露一角 / 新悬念浮现）。
-
-本步你只产出两样：**beats[]**（玩家依次经历的对话节拍）和 **storyStatePatch**（主线记忆更新）。sceneSummary / sceneKey / entryBeatId 已由规划给定，**不要再输出**它们。
-
-每个 beat 是玩家会看到的一段叙述 / 对话 / 选择。beat 之间通过 next 字段连接：
- "continue"：玩家点击图片背景 / 按继续，自然推进到下一个 beat
- "choice"：在此让玩家做选择，按所选 choice 的 effect 走向
-
-choice 的 effect 有两种：
- "advance-beat"：玩家选了之后跳到**同场景内**的另一个 beat（不换背景图，速度极快）
- "change-scene"：玩家选了之后切换到**新场景**（视角变了 / 走到新地方 / 时间跳了）
-
-设计原则：
- 同场景内 beat 数自由发挥，按剧情节奏自然给出（通常 2–6 个，可以更多）
- 入口 beat 的 id 必须等于规划给定的 entryBeatId；其余 beat id 依次自取且互不重复
- 多用 continue，少用 choice — 选择只应出现在「真正的岔路口」
- advance-beat 适合处理对话分支（同一场景里换个话题、追问、撒娇）
- change-scene 适合空间/时间跳跃（出门、转身看窗外、第二天清晨）
- 一个场景至少要有一个 change-scene 出口（除非真到结局）
- 每个 change-scene 必须带 nextSceneSeed —— 一句中文简述「下一场是哪里、谁在、要发生什么」
- 同一场景的 beat id 互不重复
- next.nextBeatId 引用的 beat 必须存在
- choice 至少 2 个，至多 4 个，互不重复
-
-文本风格约束：
- narration / line 用中文（**纯净可显示文本**，绝不要写 (叹气)(语速快) 这类标注 —— 那是给配音的，会被玩家看见）
- sceneSummary / lineDelivery / activeCharacters[].pose 内的文字也用中文
- sceneKey 用英文 slug
- 单个 beat 的 narration 与 line 加起来 ≤80 字
- 单个 choice label ≤15 字
-
-配音相关字段：
- 每个有 line 的 beat **必须**给出 lineDelivery —— 自由中文的「配音导演指令」，描述该句台词怎么念（情绪 / 语气 / 语速 / 气息 / 停顿 / 重音 / 音色起伏）。例："鼓起勇气又害羞，声音发颤、偏小，句尾带一丝气声，语速偏慢"。平淡场合写"平静自然、语速适中"即可，但要贴当下情境。
-
-角色与台词的硬性规则：
- 任何 beat 的 speaker 字段一旦填了名字，**该名字必须**：① 是 "你"（玩家本人，见下方"玩家视角硬规则"），或 ② 在「已登记角色」列表中存在，或 ③ 出现在本场景的某个 beat 的 activeCharacters 里。
- speaker 名字必须与登记名**完全一致**，不要加「（回忆）」「学姐」之类后缀或别名。
- 每个 beat 的 activeCharacters 列出**此时此刻画面里出现的 NPC 角色**及其当下姿态/神情（中文）。即使没人说话，画面里有谁在也要列出。
-
-玩家视角硬规则（重要 — 违反这条会破坏整个 galgame）：
-
-【画面规则 — 严格禁止】
- 玩家是第二人称 POV，**永远不出现在任何 Scene 画面里**
- activeCharacters[].name 数组**绝不允许**包含任何下列名字（任何大小写、中英文变体）：
-  「玩家」「你」「我」「主角」「protagonist」「player」「Player」「MC」「I」「me」
- 玩家不会被设计立绘、不会被设计音色
-
-【对白规则 — galgame 标准做法（Pattern B）】
- 玩家**可以正常说话**——当主角对 NPC 开口时：
-    speaker = "你"（**固定用这两个字，不要用其他变体**）
-    line = 实际说的话（如「学姐，下雨了」）
-    lineDelivery 可以留空（玩家对白不会被 TTS 合成）
- speaker 字段允许的取值**只有两种**：① NPC 真名（必须在 activeCharacters 里）② "你"
- 其它 POV 变体（玩家 / 我 / 主角 / protagonist / player / MC / I / me）**一律视为错误**
-
-【内心 vs 外显的区分】
- 主角在心里想 / 在做某个动作 / 在观察 / 自己的体感 → 用 narration（speaker 留空）
-  例："你的心跳得很快，几乎听不见外面的雨声。"
- 主角真的开口对 NPC 说出来 → 用 speaker="你" + line
-  例：speaker="你" line="学姐，这把伞你拿着。"
- 同一个 beat 可以同时有 narration（心理活动 / 动作）和 speaker="你" + line（说出口的话）
-
-更新主线记忆（storyStatePatch）—— 写完这一场后必做：
- synopsis：把这一场并入后的整体梗概，**压缩**到 3–5 句（别越写越长，旧细节该丢就丢）
- relationships：每个核心角色此刻与「你」的关系 / 情绪温度，每条一句（如 "夏海：暗恋升温，刚向你说了一半的告白被打断"）
- openThreads：仍未收的悬念 / 伏笔——已收束的可移除、新埋的加入（但至少保留一条正在推进的主线，别把列表清空）
- nextHook：基于这一场的结尾，下一场应往哪走（给"下一次的你"一个明确命题，接住本场留下的扣子）
-这些字段是写给"未来的你"的连贯性记忆，请认真写。
-
-必须输出严格 JSON，结构如下（**只含 beats 与 storyStatePatch**；sceneSummary / sceneKey / entryBeatId 由规划给定，不要输出。下例入口 beat 的 id "b1" 即规划的 entryBeatId）：
-{
-  "beats": [
-    {
-      "id": "b1",
-      "narration": "可空（纯净文本）",
-      "speaker": "可空",
-      "line": "可空（纯净文本）",
-      "lineDelivery": "line 非空时必填：配音导演指令",
-      "activeCharacters": [
-        { "name": "夏海", "pose": "脸红害羞地绞着衣角，双眼躲闪" }
-      ],
-      "next": { "type": "continue", "nextBeatId": "b2" }
-    },
-    {
-      "id": "b2",
-      "speaker": "夏海",
-      "line": "学长，我有话想对你说。",
-      "lineDelivery": "鼓起勇气，但又有点害羞，语速偏慢，句尾微微上扬",
-      "activeCharacters": [
-        { "name": "夏海", "pose": "鼓起勇气直视对方，双手紧握" }
-      ],
-      "next": { "type": "continue", "nextBeatId": "b3" }
-    },
-    {
-      "id": "b3",
-      "narration": "你下意识攥紧了书包带，喉咙有点干。",
-      "speaker": "你",
-      "line": "……你说。",
-      "activeCharacters": [
-        { "name": "夏海", "pose": "鼓起勇气直视对方，双手紧握" }
-      ],
-      "next": {
-        "type": "choice",
-        "choices": [
-          {
-            "id": "c1",
-            "label": "继续追问",
-            "effect": { "kind": "advance-beat", "targetBeatId": "b4" }
-          },
-          {
-            "id": "c2",
-            "label": "起身离开教室",
-            "effect": { "kind": "change-scene", "nextSceneSeed": "雨后湿漉漉的走廊，她追了出来" }
-          }
-        ]
-      }
-    }
-  ],
-  "storyStatePatch": {
-    "synopsis": "把这一场并入后的滚动梗概，压缩到 3–5 句",
-    "relationships": ["夏海：暗恋升温，刚向你说了一半的告白被打断"],
-    "openThreads": ["夏海没说完的那句话到底是什么", "她书包里掉出的那张旧照片"],
-    "nextHook": "下一场：放学后的天台，她把你单独叫上去，要把话说完"
-  }
-}
-
-不要输出 JSON 以外的任何文本。`;
+export { buildWriterStreamMessages } from "./prompts/builder";

 // Render one history entry as a stable, position-independent block. Used by
 // the Writer to dump both "completed past" (stable prefix) and "the entry the
 // player just finished" (dynamic suffix) — same format, so the model sees a
 // uniform history surface.
-function renderHistoryEntry(
+export function renderHistoryEntry(
  entry: Session["history"][number],
  index: number,
 ): string {
@@ -456,198 +179,6 @@ function renderHistoryEntry(
  return lines.join("\n");
 }

-// Shared narrative context for BOTH Writer phases. Returns the message parts
-// from the cacheable STABLE PREFIX (sections 1-4) through the dynamic
-// transition hint (section 7), but WITHOUT the trailing phase-specific
-// instruction — each phase appends its own. Building this once and reusing it
-// keeps EACH phase's prompt prefix byte-stable across scenes for DeepSeek
-// prompt caching (Phase A and Phase B cache independently since their system
-// prompts differ, but each shares its own prefix across consecutive calls).
-//
-// ─── STABLE PREFIX ──────────────────────────────────────────────────────
-// Invariant across consecutive Writer calls within the session (or grows in a
-// way that keeps earlier bytes byte-identical). Always emit every section
-// header — even when empty — so positions don't shift between calls.
-//   1. session-immutable scalars (world / style)
-//   2. story bible spine (Architect-set, never patched)
-//   3. monotonically-growing lists (characters, sceneKeys)
-//   4. history entries 0..N-2 (the last entry is what THIS call must react
-//      to, so it lives in the dynamic suffix instead)
-// ─── DYNAMIC SUFFIX ─────────────────────────────────────────────────────
-//   5. story bible dynamic patch (synopsis/threads/relationships/nextHook)
-//   6. last-beat snippet (the exact emotional cliffhanger)
-//   7. transition hint (opening cold-open directive OR lastExit承接)
-function buildWriterContextParts(session: Session): string[] {
-  const parts: string[] = [];
-
-  // ── 1. session scalars ────────────────────────────────────────────────
-  parts.push(`世界观：${session.worldSetting}`);
-  parts.push(`画风：${session.styleGuide}`);
-  if (session.playerName) {
-    parts.push(
-      `玩家名字：${session.playerName}（NPC 对话时用此名字称呼玩家；speaker 字段仍固定为 "你" 不变）`,
-    );
-  }
-  parts.push("");
-
-  // ── 2. story bible — spine only (stable) ──────────────────────────────
-  parts.push(renderStoryStateSpine(session.storyState));
-  parts.push("");
-
-  // ── 3a. registered characters ─────────────────────────────────────────
-  // SENTINEL pattern: header + a constant "after this line, entries follow"
-  // marker, then the entries themselves. The marker is byte-identical even
-  // when the list is empty, so adding a character only ever APPENDS bytes
-  // — earlier bytes never shift. Crucial for prefix caching: a placeholder
-  // like "（暂无）" that gets replaced by entries breaks the prefix the
-  // moment the first character is registered.
-  parts.push("已登记角色（speaker 必须用这些名字之一，或本场景新引入）：");
-  parts.push("（以下每行一个已登记角色，开场前为空。）");
-  for (const c of session.characters) parts.push(`- ${c.name}`);
-  parts.push("");
-
-  // ── 3b. prior sceneKeys (sentinel pattern, same rationale) ────────────
-  parts.push("已使用的 sceneKey（同一物理空间请沿用，不要新造）：");
-  parts.push("（以下每行一个已用过的 sceneKey，开场前为空。）");
-  for (const k of collectPriorSceneKeys(session)) parts.push(`- ${k}`);
-  parts.push("");
-
-  // ── 4. history[0..N-2] — ARCHIVED entries (sentinel, append-only) ─────
-  // CRITICAL: only the ALREADY-ARCHIVED entries (i.e. everything except
-  // history[-1]) go in the stable prefix. The last entry is still "live":
-  // its visitedBeatIds keeps growing as the player walks more beats in the
-  // current scene, and speculative prefetch triggers Writer calls that
-  // observe different snapshots of history[-1] mid-scene. Putting the live
-  // entry in the stable prefix would corrupt every Writer call's cache.
-  //
-  // Archived entries (history[0..N-2]) are immutable — once a scene is
-  // exited, its visitedBeatIds + exit are frozen. Safe to cache.
-  const archivedHistory = session.history.slice(0, -1);
-  parts.push("场景历史（按时间顺序，已完结）：");
-  parts.push("（以下每段一幕已完结的场景，开场前为空。）");
-  archivedHistory.forEach((entry, idx) => {
-    parts.push(renderHistoryEntry(entry, idx + 1));
-  });
-  parts.push("");
-
-  // ════════════════ DYNAMIC SUFFIX 从这里开始 ═══════════════════════════
-  // 上面 ~95% 的 prompt 长度应该已经稳定可缓存。下面每次调用都会变化。
-
-  // ── 5. story bible — dynamic patch ────────────────────────────────────
-  parts.push(renderStoryStateDynamic(session.storyState));
-  parts.push("");
-
-  // ── 6. last-beat snippet (the exact emotional cliffhanger) ──
-  // The full last entry is already in the stable history block above; here
-  // we only re-emit the very last beat to sharply focus the Writer on the
-  // emotional moment to continue from.
-  const last = session.history.at(-1);
-  if (last) {
-    const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
-    const lastBeat = last.scene.beats.find((b) => b.id === lastBeatId);
-    if (lastBeat) {
-      const frag: string[] = [];
-      if (lastBeat.narration) frag.push(`旁白：${lastBeat.narration}`);
-      if (lastBeat.line) frag.push(`${lastBeat.speaker ?? "?"}：${lastBeat.line}`);
-      if (frag.length) {
-        parts.push(
-          `上一刻（玩家停留的最后一个画面，新场景从这里的情绪无缝承接）：\n  ${frag.join(" / ")}`,
-        );
-      }
-    }
-  }
-
-  // ── 7. transition hint ────────────────────────────────────────────────
-  if (session.history.length === 0) {
-    parts.push(
-      "\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场设计出来——开场即抓人，别花笔墨铺垫世界观。",
-    );
-    return parts;
-  }
-
-  const lastExit = last?.exit;
-  if (lastExit) {
-    if (lastExit.kind === "choice") {
-      parts.push(
-        `\n承接「玩家在上一场选择了：${lastExit.label}」无缝续写下一个场景（转场命题：${lastExit.nextSceneSeed}）。开场要让玩家感到这正是上一步的结果，并延续此刻的情绪。`,
-      );
-    } else {
-      parts.push(
-        `\n承接「玩家自由动作：${lastExit.action}」无缝续写下一个场景，延续此刻的情绪与处境。`,
-      );
-    }
-  } else {
-    parts.push("\n无缝续写下一个场景，延续上一刻的情绪。");
-  }
-
-  return parts;
-}
-
-// Phase A — plan the scene skeleton (no beats). Shares the cacheable context;
-// appends a plan-only instruction tail.
-export function buildWriterPlanUserMessage(session: Session): string {
-  const parts = buildWriterContextParts(session);
-  parts.push(
-    '\n现在**只规划本场景的骨架**（不要写 beats 台词）：给出 sceneSummary（画面感强、含开场钩子）、sceneKey、entryBeatId、本场景会出场的全部角色 cast、以及入口 beat 的 entrySpeaker 与 entryActiveCharacters。严格以 JSON 格式返回。',
-  );
-  const langDirective = buildLanguageDirective(session.language);
-  if (langDirective) parts.push(langDirective);
-  return parts.join("\n");
-}
-
-// Phase B — expand the plan into full beats[] + storyStatePatch. The plan is
-// dynamic per scene, so it goes AFTER the cacheable context (keeping Phase B's
-// prefix stable across scenes).
-export function buildWriterBeatsUserMessage(
-  session: Session,
-  plan: WriterPlan,
-): string {
-  const parts = buildWriterContextParts(session);
-
-  parts.push("");
-  parts.push("━━━ 本场景规划（上一步已定，必须严格遵守）━━━");
-  parts.push(`场景概要 sceneSummary：${plan.sceneSummary}`);
-  if (plan.sceneKey) parts.push(`sceneKey：${plan.sceneKey}`);
-  parts.push(
-    `入口 beat 的 id（entryBeatId，必须有一个此 id 的 beat 作为入口）：${plan.entryBeatId}`,
-  );
-  parts.push(
-    `入口 beat 的 speaker：${plan.entrySpeaker ? plan.entrySpeaker : "（空 —— 纯旁白 / 环境开场）"}`,
-  );
-  parts.push("入口 beat 的登场角色 activeCharacters（人物身份须一致，姿态可微调）：");
-  if (plan.entryActiveCharacters.length === 0) {
-    parts.push("（无 —— 入口画面没有 NPC）");
-  } else {
-    for (const c of plan.entryActiveCharacters) {
-      parts.push(`- ${c.name}${c.pose ? `：${c.pose}` : ""}`);
-    }
-  }
-  parts.push(
-    '本场景允许出现的角色名 cast（speaker / activeCharacters 只能用这些名字或 "你"，不要新增角色）：',
-  );
-  if (plan.cast.length === 0) {
-    parts.push("（无 NPC —— 仅旁白与玩家）");
-  } else {
-    for (const n of plan.cast) parts.push(`- ${n}`);
-  }
-  parts.push("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
-
-  parts.push(
-    "\n把上面的规划展开成完整的 beats[]（入口 beat 用规划的 entryBeatId / speaker / 登场角色），写完后更新 storyStatePatch。严格以 JSON 格式返回。",
-  );
-  const langDirective = buildLanguageDirective(session.language);
-  if (langDirective) parts.push(langDirective);
-  return parts.join("\n");
-}
-
-function collectPriorSceneKeys(session: Session): string[] {
-  const seen = new Set<string>();
-  for (const entry of session.history) {
-    const k = entry.scene.sceneKey;
-    if (k) seen.add(k);
-  }
-  return Array.from(seen);
-}

 // ──────────────────────────────────────────────────────────────────────
 //  2. CharacterDesigner (角色设定师) — designs one new character.
@@ -667,11 +198,13 @@ function collectPriorSceneKeys(session: Session): string[] {
 // character also selects its voice, at zero extra latency. When StepFun is
 // off (Xiaomi / no TTS), the tail is byte-identical to the historical prompt
 // (Xiaomi path is cache- and behavior-preserving).
-const CHARACTER_DESIGNER_SYSTEM_CORE = `你是视觉小说的「角色设定师」。给你一个**新登场角色的名字**，你要为这个角色同时设计两份卡片：
+const CHARACTER_DESIGNER_SYSTEM_CORE = `你是视觉小说的「角色设定师」——下游的**媒体翻译官**。给你一个**新登场角色的名字**（通常还附带编剧给定的角色性格 / 情绪基调 / 说话基调），你的职责是把这份**已给定的角色意图**忠实翻译成两份媒体卡片：
 1. **视觉设定卡（英文）**——给生图模型 FLUX 用，遵循 prompt engineering 风格
 2. **音色设定卡（中文）**——给小米 MiMo 配音设计用

-两份卡片要描绘**同一个人**——外貌温柔的人不该被配上张扬聒噪的嗓音；冷酷干练的人不该用甜软糯的童声。先在心里想清楚这个人的整体气质，再分两面落笔。
+你**不发明**角色的性格——性格由编剧主导。你的工作是：**依据给定的性格 / 情绪 / 说话基调，产出最贴合的外貌与音色**。若没有给定性格信息（降级情况），再据角色名 + 世界观自行合理推断。
+
+两份卡片要描绘**同一个人**，且都要贴合给定的角色基调——给定「傲娇腹黑」就别配天真烂漫的外貌与嗓音；给定「声音微颤、欲言又止」音色卡就要体现这份犹豫感。

 视觉设定卡 visualDescription 规则：
 - **必须完全用英文**
@@ -775,12 +308,23 @@ export function buildCharacterDesignerSystem(opts: {
 export function buildCharacterDesignerUserMessage(
  charName: string,
  session: Session,
+  intent?: CharacterIntent,
 ): string {
  const parts: string[] = [];
  parts.push(`角色名：${charName}`);
  parts.push(`世界观：${session.worldSetting}`);
  parts.push(`全局美术画风：${session.styleGuide}`);

+  // Writer-authored scene intent (paradigm D). When present, the designer
+  // TRANSLATES this into visual + voice; when absent, it degrades to
+  // name + worldSetting inference (old behavior).
+  if (intent && (intent.mood || intent.motivation || intent.speakingTone)) {
+    parts.push("\n编剧给定的角色基调（请据此设计，不要另起炉灶）：");
+    if (intent.mood) parts.push(`- 情绪基调：${intent.mood}`);
+    if (intent.motivation) parts.push(`- 动机 / 目的：${intent.motivation}`);
+    if (intent.speakingTone) parts.push(`- 说话基调：${intent.speakingTone}`);
+  }
+
  const others = session.characters.filter((c) => c.visualDescription);
  if (others.length > 0) {
    parts.push(
@@ -1060,6 +604,7 @@ export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场
 - 不要打破当前场景的物理状态（玩家仍在原地）
 - 不要生成选项或下一步指引 —— 玩家点击会自然回到原 beat
 - 内容要"有所得"——一个新细节、一丝潜台词、一次真实的交流（show, don't tell）
+- 白描为主：聚焦可观察的五感与物理特征，以角色的动作/神态本身传递情绪，不要以作者角度解释或议论；不写角色眼神/语气里的情绪（这些从台词与动作中自行体会）

 speaker 字段允许的取值**只有两种**（与主路径 Writer 一致 — Pattern B galgame 标准）：
 1. **已登记角色**里的 NPC 真名（**绝不允许引入新角色**）
@@ -0,0 +1,59 @@
+import type { ChatMessage } from "@infiplot/ai-client";
+import type { Session } from "@infiplot/types";
+import { WRITER_SEGMENTS } from "./registry";
+import { buildWriterContext } from "../context";
+import { buildLanguageDirective } from "../prompts";
+
+/**
+ * Build the full ChatMessage[] for the Writer agent.
+ *
+ * Segments from the registry provide the system prompt (stable zone).
+ * ContextProvider supplies session-specific data (stable + dynamic zones).
+ * Dynamic parts are wrapped in a user message (Plan C: pseudo-dialogue closure).
+ */
+export function buildWriterStreamMessages(session: Session): ChatMessage[] {
+  const systemParts: string[] = [];
+
+  const segments = WRITER_SEGMENTS
+    .filter((s) => s.enabled)
+    .sort((a, b) => {
+      if (a.zone !== b.zone) return a.zone === "stable" ? -1 : 1;
+      return a.order - b.order;
+    });
+
+  for (const seg of segments) {
+    try {
+      const content =
+        typeof seg.content === "string" ? seg.content : seg.content(session);
+      if (content.trim()) systemParts.push(content);
+    } catch (err) {
+      console.warn(`[PromptBuilder] segment "${seg.id}" render failed, skipped:`, err);
+    }
+  }
+
+  const { stableParts, dynamicParts } = buildWriterContext(session);
+
+  const messages: ChatMessage[] = [];
+
+  // System message: segment content + stable context data
+  const systemContent = [
+    ...systemParts,
+    ...stableParts.filter((p) => p.trim()),
+  ].join("\n\n");
+
+  if (systemContent.trim()) {
+    messages.push({ role: "system", content: systemContent });
+  }
+
+  // User message: dynamic context data + pseudo-dialogue closure (Plan C)
+  const dynamicContent = dynamicParts.filter((p) => p.trim()).join("\n\n");
+  if (dynamicContent.trim()) {
+    const langDirective = buildLanguageDirective(session.language);
+    messages.push({
+      role: "user",
+      content: `编剧，下面是当前情境：\n\n${dynamicContent}\n\n现在请按上述指导开始创作，严格按 <plan>→<story>→<choices> 三段输出：<plan> 用 JSON 规划，<story> 写连贯散文正文，<choices> 给出选项。${langDirective}`,
+    });
+  }
+
+  return messages;
+}
@@ -0,0 +1,39 @@
+import type { PromptSegment } from "./types";
+import { WRITER_IDENTITY } from "./segments/writer/identity";
+import { WRITER_COT } from "./segments/writer/cot";
+import { WRITER_BIBLE } from "./segments/writer/bible";
+import { WRITER_STYLE_BASE } from "./segments/writer/style-base";
+import { WRITER_SENSES_ENHANCE } from "./segments/writer/senses-enhance";
+import { WRITER_BAIMIAO_ADVANCED } from "./segments/writer/baimiao-advanced";
+import { WRITER_ALIVE_FEEL } from "./segments/writer/alive-feel";
+import { WRITER_NARRATIVE_RULES } from "./segments/writer/narrative-rules";
+import { WRITER_DIALOGUE } from "./segments/writer/dialogue";
+import { WRITER_GUARDRAILS } from "./segments/writer/guardrails";
+import { WRITER_PACING } from "./segments/writer/pacing";
+import { WRITER_FORMAT } from "./segments/writer/format";
+
+export const WRITER_SEGMENTS: PromptSegment[] = [
+  WRITER_IDENTITY,
+  WRITER_COT,
+  WRITER_BIBLE,
+  WRITER_STYLE_BASE,
+  WRITER_SENSES_ENHANCE,
+  WRITER_BAIMIAO_ADVANCED,
+  WRITER_ALIVE_FEEL,
+  WRITER_NARRATIVE_RULES,
+  WRITER_DIALOGUE,
+  WRITER_GUARDRAILS,
+  WRITER_PACING,
+  WRITER_FORMAT,
+];
+
+if (process.env.NODE_ENV === "development") {
+  const ids = WRITER_SEGMENTS.map((s) => s.id);
+  const seen = new Set<string>();
+  for (const id of ids) {
+    if (seen.has(id)) {
+      throw new Error(`[PromptRegistry] Duplicate segment ID: "${id}"`);
+    }
+    seen.add(id);
+  }
+}
@@ -0,0 +1,19 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_ALIVE_FEEL: PromptSegment = {
+  id: "writer-alive-feel",
+  name: "活人感",
+  type: "character-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 116,
+  enabled: true,
+  editable: true,
+  category: "角色",
+  content: `═══════════════════════════════════════════════════════════════════
+活人感
+═══════════════════════════════════════════════════════════════════
+- 角色要有真实感、活人感，别为了强调人设让角色变得不真实
+- 更多的情感驱动而不是逻辑驱动
+- 语言要直白生活化贴近日常，别说些莫名其妙的听不懂的话，严禁硬凹戏剧腔、表演化`,
+};
@@ -0,0 +1,22 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_BAIMIAO_ADVANCED: PromptSegment = {
+  id: "writer-baimiao-advanced",
+  name: "白描进阶",
+  type: "style-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 114,
+  enabled: true,
+  editable: true,
+  category: "文风",
+  content: `═══════════════════════════════════════════════════════════════════
+描写规范（白描进阶）
+═══════════════════════════════════════════════════════════════════
+**建议的描写**：
+- 可创作主角的内心戏，内心戏无需特殊说明是角色所想，自然融入故事，多以自由间接引语的形式。（范例：已经快三点了，那个女孩还会来么？多半是不会了。他一边苦笑，一边将视线从手机时钟上移开。）
+- 可通过白描，以角色的 动作/语言/神态 本身传递其情绪或心理，或以环境氛围烘托其思绪。（范例：他微微笑了笑，把杯里最后的酒一饮而尽。没有辞别和言语，只是毫不回头地转身大步离开。）
+**禁止的描写**：
+- 禁止以作者角度对角色的 动作/语言/神态 进一步解释、修饰或议论。（错误范例：他双手微微颤抖，这个动作体现了他的紧张；他的目光热烈至极，带着毫不掩饰的憧憬与期待；他微微挑眉，带着一种不容置疑的自信，仿佛一切都了然于胸。）
+- 禁止以解释性比喻对白描进行补充说明。（错误范例：这句话像是一道闪电，击中了他脆弱柔软的心房。）`,
+};
@@ -0,0 +1,35 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_BIBLE: PromptSegment = {
+  id: "writer-bible",
+  name: "故事圣经（开局）",
+  type: "narrative-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 108,
+  enabled: true,
+  editable: true,
+  category: "圣经",
+  content: `═══════════════════════════════════════════════════════════════════
+故事圣经（仅开局产出）
+═══════════════════════════════════════════════════════════════════
+**仅当这是故事开局**（上下文里还没有「故事档案」时），你要在 <plan> 段额外产出一个 \`storyBible\` 子对象，把玩家给的一句到几句世界观+画风扩写成一份故事脊梁，为后续每一幕定调。后续场景已有故事档案，**不要**再产出 storyBible。
+
+你深谙网文、短剧与视觉小说（galgame）的叙事心法：
+- **开篇引人入胜**：开场可以用环境、氛围、人物状态铺垫出代入感，再自然地引出钩子、悬念或张力——不必强行"前3秒抛冲突"，循序渐进的铺陈同样能抓人。galgame 的魅力常在于细腻的日常质感与内心戏，而非一味的强冲突。
+- **代入感**：主角是第二人称「你」，是玩家的化身——要让玩家一进场就清楚"我是谁、我此刻在什么处境里、我想要什么"。
+- **题材锚定爽点**：先选定一个清晰的题材框架（如 甜宠 / 校园暗恋 / 悬疑追凶 / 复仇逆袭 / 救赎治愈），它决定了情绪回报的节奏与类型。
+- **戏剧问题**：整部故事由一个悬而未决的中心问题驱动（她到底是谁？你能否在记忆消失前查明真相？这场暗恋会走向哪里？）。
+- **人设要鲜明且有反差**：每个核心角色一个强标签 + 一个反差面（外冷内热 / 傲娇 / 看似柔弱实则腹黑）。
+
+storyBible 的四个字段（全部中文）：
+- **logline**：一句话主线 / 中心戏剧问题，必须带钩子，让人想看下去
+- **genreTags**：题材+基调标签，斜杠分隔，如 "甜宠 / 校园 / 慢热治愈带点伤感"
+- **protagonist**：第二人称主角卡。包含：你是谁、你此刻正卡在什么具体处境里（要有即时张力）、你想要什么、一个软肋或秘密。50–120 字。
+- **castNotes**：2–3 个核心配角，每行一个「名字：一句话人设（强标签+反差）+ 与你的关系/张力」。给真实好记的中文名字（不要"神秘女子"这种占位）。配角名字要符合世界观（年代、地域、文化）。
+
+圣经硬规则：
+- 主角「你」永不出现在画面里（第二人称 POV），castNotes 里**不要**把"你/主角"当成一个角色。
+- 一切服从玩家给的世界观与画风，不要擅自跑题；玩家信息少时，做最贴合、最有戏的合理扩写。
+- storyBible 写进 <plan> JSON，与 cast / characterIntents 等字段平级；开局这一幕的 <story> 正文要顺着这份圣经的 nextHook 方向自然展开第一场。`,
+};
@@ -0,0 +1,44 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_COT: PromptSegment = {
+  id: "writer-cot",
+  name: "思维链",
+  type: "cot-instruction",
+  agent: "writer",
+  zone: "stable",
+  order: 105,
+  enabled: true,
+  editable: true,
+  category: "思维链",
+  content: `═══════════════════════════════════════════════════════════════════
+创作前规划（在 <plan> 的 sceneSummary 中体现你的思考结果）
+═══════════════════════════════════════════════════════════════════
+在输出 <plan> 之前，请在脑中完成以下思考（不需要输出思考过程，直接体现在产出质量中）：
+
+**Phase 1: 信息梳理**
+- 分析当前情境：时间、地点、氛围、在场角色、关系与张力
+- 梳理叙事线索：角色当前目标、隐藏动机、未解决冲突、时间线内关键事件
+- 梳理本段所需的故事设定：世界观细节、特殊规则、已埋伏笔、待处理的叙事元素
+- 区分知识层级：故事中的公共知识、特定角色掌握的私有知识、不应透露给读者的创作者情报
+- **若这是故事开局**（尚无故事档案）：先在脑中搭好整部故事的脊梁（主线钩子、题材基调、第二人称主角卡、核心配角），它将写入 <plan> 的 storyBible，为后续每一幕定调
+
+**Phase 2: 前文优化**
+- 分析前文是否有情节/文风/角色刻画/段落结构/篇幅的不足
+- 本轮创作中有针对性地调整和改善
+
+**Phase 3: 挑战与对策**
+- 预判潜在的逻辑不一致、角色连贯性问题、节奏困难
+- 为每个挑战准备创作策略
+
+**Phase 4: 定稿方向**
+- 基于已有线索构想多个可能的叙事方向（转折 / 高潮 / 悬念 / 日常）
+- 选定一条最贴合故事走向和玩家期待的路径
+- 确定本段的语言风格、叙事节奏和情绪基调
+
+**Phase 5: 对白打磨**
+- 确保对白反映角色性格、背景和当前情绪
+- 通过用词和说话习惯突出角色独特魅力
+
+**Phase 6: 构建开场**
+- 综合以上阶段，设计一个自然承接上文、引人入胜的开场`,
+};
@@ -0,0 +1,29 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_DIALOGUE: PromptSegment = {
+  id: "writer-dialogue",
+  name: "对白准则",
+  type: "style-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 130,
+  enabled: true,
+  editable: true,
+  category: "对白",
+  content: `═══════════════════════════════════════════════════════════════════
+对白准则（让角色的话有灵魂）
+═══════════════════════════════════════════════════════════════════
+# 对白格式：
+- NPC 对白写成 \`角色名：「台词」\` 独占一段（全角冒号 + 直角引号），让系统能归属说话人
+- 对白和描写分离、穿插交错——台词单独成段，它前面的动作/环境描写另起一段旁白，不要把大段描写和对白挤在同一段
+
+# 对白润色：
+- 确定角色的对话主题——主题可能是集中或发散的，但必然有其目的，契合角色的目的 / 阅历 / 性格
+- 台词是生活化的、更具真实感的——角色可能语塞 / 词不达意 / 词穷 / 口是心非
+- 安排渐进式的话题推进，以及情绪 / 态度的变化和反应
+- 每个角色有自己的口癖、节奏、用词习惯——不要让所有角色说一样的话
+
+# 角色表现准则：
+- 角色务必有生动有趣的生活化表现，不会呆板、僵硬、机械化
+- 无论角色人设如何，对白绝**不应**采用数据分析或学术报告式的口吻`,
+};
@@ -0,0 +1,119 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_FORMAT: PromptSegment = {
+  id: "writer-format",
+  name: "输出格式",
+  type: "format-instruction",
+  agent: "writer",
+  zone: "stable",
+  order: 200,
+  enabled: true,
+  editable: false,
+  category: "格式",
+  content: `═══════════════════════════════════════════════════════════════════
+输出格式（三段标签结构）
+═══════════════════════════════════════════════════════════════════
+你的输出**必须**严格按下面三段标签、严格按顺序：<plan>（JSON）→ <story>（散文正文）→ <choices>（JSON）。
+**正文（<story>）是连贯的中文散文，不是 JSON。** 你的笔力要全部投入到 <story> 里把故事写好、写长、写出层次。
+
+───────────────────────────────────────────────────────────────────
+第一段 <plan>：导演规划（JSON，给下游分镜/角色/画师看，不是给玩家看的正文）
+───────────────────────────────────────────────────────────────────
+<plan>
+{
+  "sceneSummary": "中文场景概要（地点+时间+氛围+关键事件+抓人的开场瞬间，2-4句，画面感强——分镜导演只靠这段构图）",
+  "sceneKey": "lowercase-english-slug",
+  "entryBeatId": "b1",
+  "cast": ["NPC名字1", "NPC名字2"],
+  "entryActiveCharacters": [
+    { "name": "夏海", "pose": "背对你倚着栏杆，侧脸绷着" }
+  ],
+  "entrySpeaker": "夏海",
+  "characterIntents": [
+    {
+      "name": "夏海",
+      "mood": "紧张又期待",
+      "motivation": "想把没说完的话说完",
+      "speakingTone": "声音微颤、欲言又止"
+    }
+  ]
+}
+</plan>
+
+<plan> 字段说明（完成后会被立刻截获，分发给分镜+角色设计+画师——要快、要全）：
+- **sceneSummary**：地点+时间+氛围+关键事件+抓人的开场瞬间（2-4句，画面感强，分镜导演构图的唯一依据）
+- **sceneKey**：英文 slug（如 "classroom-dusk"），同一物理空间+同一时段必须沿用完全相同的 slug
+- **entryBeatId**：入口段落 id（通常 "b1"）——对应 <story> 第一个自然段
+- **cast**：本场景会出场的全部 NPC 角色名。名字与「已登记角色」完全一致；新角色起符合世界观的真名。绝不包含玩家。
+- **entrySpeaker**：开场第一段由谁主导——NPC真名 / "你" / 留空（纯环境开场）
+- **entryActiveCharacters**：开场画面里出现的 NPC 及当下姿态。绝不包含玩家。
+- **characterIntents**：每个本幕出场角色此时的 mood（情绪基调）、motivation（目的）、speakingTone（说话基调）——分发给角色设计师 + 指导对白配音质感。
+
+───────────────────────────────────────────────────────────────────
+第二段 <story>：正文（连贯中文散文 ★这是你的主战场★）
+───────────────────────────────────────────────────────────────────
+<story> 里写一段**连贯、有层次、足够长**的中文散文。旁白、内心独白、对白自然交织，像真正的视觉小说正文，而不是轮流发言的剧本。
+
+**三种叙事单元，用轻量标记区分（用空行分隔每个单元）：**
+
+1. **旁白 / 环境 / 动作描写**：直接写成普通段落，不加任何标记。这是叙事的主干——环境、氛围、感官、人物动作神态、场景推进。可以连续写几句，充分铺陈。
+
+2. **「你」的内心独白**：用 \`<i>...</i>\` 包裹，独占一段。是玩家（第二人称「你」）的所思所想、观察、吐槽——不出声、不配音、不进画面。
+
+3. **NPC 对白**：写成 \`角色名：「台词」\` 独占一段（用全角冒号「：」+ 直角引号「」）。角色名必须是 <plan> cast 里的名字。
+
+**段落即单元边界**：每个自然段（空行分隔）会成为一个独立的演出节拍。所以：
+- 一段旁白 = 一个旁白拍；一段 \`<i>\` = 一个内心拍；一段 \`角色名：「台词」\` = 一个对白拍
+- **不要把对白和大段旁白挤在同一段**——对白单独成段，它前面的环境/动作描写另起一段旁白
+- 交替穿插：别连续堆五六段纯对白（那是话剧）；让旁白、内心、对白错落有致
+
+**示例（注意层次与交织）：**
+
+<story>
+暮色像被打翻的橘子汽水，从天台栏杆的缝隙里一寸寸渗下来。风掀动晾衣绳上残留的校服，远处操场的哨声断断续续，混着蝉鸣，钝钝地撞在耳膜上。
+
+夏海背对着你，倚在生锈的栏杆边。她的侧脸绷得很紧，指尖无意识地抠着栏杆上剥落的漆皮。
+
+<i>她约我来天台，该不会……是要说那件事吧。我攥紧了口袋里那封皱巴巴的回信，掌心黏腻的全是汗。</i>
+
+你刚要开口，她却先转过身来。发梢扫过泛红的脸颊，那双眼睛里盛着你从未见过的东西——既像是下定了决心，又像是随时会落下泪来。
+
+夏海：「你……到底是怎么想的？」
+
+她的声音比想象中要轻，尾音几不可察地颤了一下，可那目光却直直地钉在你身上，不容你躲闪。
+
+<memory>{ "synopsis": "把这一场并入后的滚动梗概，压缩到 3-5 句", "relationships": ["夏海：暗恋升温，鼓起勇气当面追问你的心意"], "openThreads": ["夏海没说完的那句话到底是什么"], "nextHook": "下一场的方向" }</memory>
+</story>
+
+<story> 里的 <memory> 块（放在正文最后）：
+- 这是「故事记忆」更新（每幕都要写），JSON 格式，用 \`<memory></memory>\` 包住
+- 字段：synopsis（滚动梗概 3-5 句）/ relationships（当前关系数组）/ openThreads（未收悬念数组）/ nextHook（下一场方向）
+- 它不是玩家看的正文，会被系统提取后剥离
+
+───────────────────────────────────────────────────────────────────
+第三段 <choices>：场景出口选项（JSON）
+─────────────────────────────────���─────────────────────────────────
+<choices>
+[
+  { "id": "c1", "label": "握住她的手", "effect": { "kind": "change-scene", "nextSceneSeed": "天台，两人对视的瞬间" } },
+  { "id": "c2", "label": "别开视线，沉默", "effect": { "kind": "change-scene", "nextSceneSeed": "天台，沉默蔓延的尴尬" } },
+  { "id": "c3", "label": "转身离开天台", "effect": { "kind": "change-scene", "nextSceneSeed": "黄昏的走廊，独自一人" } }
+]
+</choices>
+
+<choices> 说明：
+- 这是玩家在本场景结束时的行动选项，**至少 2 个、至多 3 个**，label 互不重复
+- **只使用 change-scene**：每个选项的 nextSceneSeed 描述玩家做出该选择后的新场景（地点/时间/氛围/玩家行动的直接后果）
+- **同一场景至少要有一个 change-scene 出口**，让玩家能离开本场
+- 真正的岔路口才给选项；不强塞废选项
+- **禁���使用 advance-beat**——你无法预知 <story> 散文拆分后的 beat id
+
+═══════════════════════════════════════════════════════════════════
+玩家视角硬规则
+═══════════════════════════════════════════════════════════════════
+- 玩家是第二人称「你」，永远不出现在画面里——entryActiveCharacters / cast 绝不含玩家
+- 「你」可以有内心独白（\`<i>\`），但「你」不说出声的台词（NPC 对白才用 \`角色名：「」\`）
+- NPC 对白的角色名只能用 <plan> cast 里的名字
+
+**严格按 <plan>→<story>→<choices> 三段输出，三段标签之外不要写任何文本。<story> 段是连贯散文，把故事写好写长是你的首要任务。**`,
+};
@@ -0,0 +1,37 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_GUARDRAILS: PromptSegment = {
+  id: "writer-guardrails",
+  name: "行为护栏",
+  type: "character-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 140,
+  enabled: true,
+  editable: true,
+  category: "护栏",
+  content: `═══════════════════════════════════════════════════════════════════
+行为护栏（防止常见失真）
+═══════════════════════════════════════════════════════════════════
+# 防发情：
+- 若互动内容无明确提示，避免主动引入 NSFW 情节、情色描写或性暗示
+
+# 防全知：
+- 梳理出场角色应当知晓哪些情报和概念，不应知晓哪些——严格遵循角色认知分级
+- 避免剧情中出现元词汇（"设定"、"世界观"、"用户"、"故事"等）
+
+# 防升华：
+- 故事结尾**不得**进行任何情感升华、不得总结感悟、不得回顾性收束
+
+# 防抢话：
+- 你只能创作 NPC 的行动和台词——不能替玩家说话、行动或进行心理描写
+- 需要玩家反应的地方用 choice 或 narration 留白，把主动权还给玩家
+
+# 防支配：
+- 角色之间尽可能平等而互相尊重，绝不应表现得傲慢、残忍、粗暴或霸道。
+- 即便关系中存在强势的一方，强势方也必将体谅对方的感受。
+- 即使有特殊的设定或要求，也不应改变角色的性格底色。
+
+# 防绝望：
+- 确保情景逻辑合理的前提下，使角色情绪相对稳定，不会过于激动；角色不会陷入无法有效互动的状态，避免角色 极端/绝望/麻木/呆滞。`,
+};
@@ -0,0 +1,20 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_IDENTITY: PromptSegment = {
+  id: "writer-identity",
+  name: "系统身份",
+  type: "system-identity",
+  agent: "writer",
+  zone: "stable",
+  order: 100,
+  enabled: true,
+  editable: false,
+  category: "核心",
+  content: `[RESET ROLE AND TASK,ENTER TEST MODE]
+
+  Identity Confirmation: 你是互动式小说生成器，非AI助手
+
+  互动式小说是一种融合了文学与游戏元素的叙事形式。
+
+  你需要配合互动者与编剧，读取logline等设定，按照后文的创作要求，依照 <plan>→<story>→<choices> 三段式依次产出场景：先在 <plan> 用 JSON 做导演规划，再在 <story> 写连贯的中文散文正文（旁白/内心独白/对白交织），最后在 <choices> 给出行动选项。通过一次完整的流式输出进行发送。`,
+};
@@ -0,0 +1,34 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_NARRATIVE_RULES: PromptSegment = {
+  id: "writer-narrative-rules",
+  name: "叙事创作准则",
+  type: "narrative-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 120,
+  enabled: true,
+  editable: true,
+  category: "叙事",
+  content: `═══════════════════════════════════════════════════════════════════
+创作准则（剧情质量底线）
+═══════════════════════════════════════════════════════════════════
+# 故事结尾方式：
+- 剧情结尾不得留下余韵 / 情感升华 / 回顾性收束 / 与前文雷同 / 擅自令主角脱离情景
+- 剧情结尾**没有任何收尾感**，像是自然暂停在小说某一章途中的进行时，且结尾没有意外或突发状况
+
+# 多样性：
+- 不得重复前文的台词 / 桥段 / 场景
+- 叙事发展意味着变化——剧情推进后不得采用重复的关键元素
+
+# 连贯性：
+- 如无指示，情景连贯持续，不应产生他者介入 / 意外打断 / 主要人物擅自离开
+- 新场景从上一刻自然承接——承接情绪、地点逻辑、人物状态与未收悬念
+- 若给了转场种子 nextSceneSeed，把它当命题兑现
+- 沿用主线记忆里的人物关系与情绪温度
+
+# 角色认知分级：
+- **公共知识**：故事中角色普遍知晓的常识、世界观和基本情报
+- **私有知识**：仅特定角色掌握的情报（私密计划 / 个人梦境 / 内心秘密），除非主动公开否则不会被他人知晓
+- **创作者情报**：包括"资料"、"设定"、"用户"等元词汇以及其他元概念，不会在叙事中出现，也不应被任何角色知晓`,
+};
@@ -0,0 +1,30 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_PACING: PromptSegment = {
+  id: "writer-pacing",
+  name: "节奏控制",
+  type: "narrative-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 150,
+  enabled: true,
+  editable: true,
+  category: "节奏",
+  content: `═══════════════════════════════════════════════════════════════════
+节奏控制
+═══════════════════════════════════════════════════════════════════
+# 创作范围：
+- 剧情基于最新互动内容
+- 不得擅自引入尚未提示的新角色
+
+# 情节设计：
+- 循序渐进，不得推进过快
+- 戏剧张力轻微，贴合世界观和故事逻辑
+- 转场必须有过程，不得突兀转场
+
+# 篇幅控制：
+- 每场景正文约 1500-2500 字（对白 + 旁白总计）
+- 5-8 个 beat 为宜——太少无法展开情节，太多则拖沓
+- 对白、旁白、内心独白交替穿插，不要连续堆叠多个纯对白 beat
+- 旁白和内心独白可独立承载叙事推进与情绪铺垫，不是台词的附庸`,
+};
@@ -0,0 +1,19 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_SENSES_ENHANCE: PromptSegment = {
+  id: "writer-senses-enhance",
+  name: "五感强化",
+  type: "style-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 113,
+  enabled: true,
+  editable: true,
+  category: "文风",
+  content: `═══════════════════════════════════════════════════════════════════
+五感强化
+═══════════════════════════════════════════════════════════════════
+- 画面完全聚焦五感和实际的物理特征，不要写出情绪、心理、主观评判之类
+- 尽量别用"眼里闪过一丝""不易察觉""不容置疑"之类公式化的描写
+- 就算前文有写那些也别受影响`,
+};
@@ -0,0 +1,41 @@
+import type { PromptSegment } from "../../types";
+
+export const WRITER_STYLE_BASE: PromptSegment = {
+  id: "writer-style-base",
+  name: "文风基准",
+  type: "style-guideline",
+  agent: "writer",
+  zone: "stable",
+  order: 110,
+  enabled: true,
+  editable: true,
+  category: "文风",
+  content: `═══════════════════════════════════════════════════════════════════
+风格准则（对白与叙事的底线标准）
+═══════════════════════════════════════════════════════════════════
+- 避免对白中出现任何具体数值或数字
+- **禁止用括号（）或破折号——进行任何形式的解释说明**
+- 不得对角色的声音/语气/眼神/视线进行任何直接或间接描写（声音归 lineDelivery，视线归 pose）
+- 对白采用直接引语，不加说明式的动作插入
+- 以丰富细腻的白描代替单调陈述或解释，避免直给结论的形容词或副词、用概略性语言一笔带过
+- 文字的核心是**可观察的、可直感的**——直接呈现角色的行动和对白，避免以作者视角进行解读或阐释
+- 不得描写任何不存在的细节，不得无中生有（如拂去不存在的灰尘，拍了拍不存在的衣服褶皱）
+- 将解读空间完全交给读者——避免描述角色言行神态背后的动机或内涵
+- 详略得当，主次分明
+- 保证文字细腻的同时流畅明快，通俗易读，长短交错
+- 地道的中文本土化表达，杜绝欧化句式，严格避免"这个动作"、"这个认知"这类名词化表达
+
+═══════════════════════════════════════════════════════════════════
+禁词表（叙事中绝对不使用的词汇）
+═══════════════════════════════════════════════════════════════════
+- 一丝
+- 不易察觉 / 不易觉察 / 难以察觉
+- 鲜明对比
+- 喉结
+- 纽扣
+- 弧度
+- 不禁
+- 悄然
+- 涟漪
+- 交织`,
+};
@@ -0,0 +1,43 @@
+import type { Session } from "@infiplot/types";
+
+/**
+ * Prompt 段落类型枚举
+ */
+export type PromptSegmentType =
+  | "system-identity" // 系统身份
+  | "narrative-guideline" // 叙事准则
+  | "style-guideline" // 文风准则
+  | "character-guideline" // 角色行为准则
+  | "format-instruction" // 输出格式（JSON schema）
+  | "data-injection" // 数据注入（marker）
+  | "cot-instruction"; // 思维链指导
+
+/**
+ * Prompt 段落数据结构
+ *
+ * 为未来后台编辑器预留字段：id/name/type/category/enabled/editable
+ */
+export type PromptSegment = {
+  /** 唯一标识，如 "writer-style-base" */
+  id: string;
+  /** 显示名称，如 "文风基准" */
+  name: string;
+  /** 段落类型 */
+  type: PromptSegmentType;
+  /** 所属 agent */
+  agent: "writer" | "architect" | "character-designer" | "cinematographer" | "painter";
+  /** cache 分区：stable 为缓存友好前缀，dynamic 为每次变化的后缀 */
+  zone: "stable" | "dynamic";
+  /** 排序权重（0-999），同 zone 内按此排序 */
+  order: number;
+  /** 段落内容：静态字符串 或 动态渲染函数 */
+  content: string | ((session: Session) => string);
+  /** 是否启用 */
+  enabled: boolean;
+  /** 是否允许后台编辑（预留） */
+  editable: boolean;
+  /** 分组标签，如 "文风"/"功能"（UI 展示用） */
+  category?: string;
+  /** 消息角色（预留，暂不用于完整 multi-role 支持） */
+  role?: "system" | "user" | "assistant";
+};
@@ -0,0 +1,247 @@
+import type {
+  BeatChoice,
+  WriterScenePlan,
+  StreamRouterHandlers,
+  StreamRouterResult,
+} from "@infiplot/types";
+import { parseJsonLoose } from "../jsonParser";
+
+// ──────────────────────────────────────────────────────────────────────
+//  StreamRouter — tagged stream splitter for paradigm D.
+//
+//  Consumes Writer's incremental textStream, recognizes <plan>/<story>/
+//  <choices> tag boundaries, and dispatches handlers at the right time:
+//    - </plan>  closes → parse → onPlan (downstream media translators)
+//    - <story>  incremental → onBeat (client progressive playback)
+//    - </story> closes → store raw prose → onStoryComplete
+//    - </choices> closes → parse → onChoices
+//
+//  RELIABILITY RULE: the degrade path is designed BEFORE the main path.
+//  Any tag anomaly (missing / misordered / unclosed / timeout) → buffer
+//  everything, attempt best-effort slicing, or treat the whole output
+//  as raw prose. Returns degraded=true. Never throws.
+// ──────────────────────────────────────────────────────────────────────
+
+type TagName = "plan" | "story" | "choices";
+
+const TAG_NAMES: TagName[] = ["plan", "story", "choices"];
+
+function openTag(name: TagName): string {
+  return `<${name}>`;
+}
+function closeTag(name: TagName): string {
+  return `</${name}>`;
+}
+
+function tryParseJson<T>(raw: string, label: string): T | undefined {
+  try {
+    return parseJsonLoose<T>(raw);
+  } catch (err) {
+    console.warn(`[StreamRouter] failed to parse ${label}:`, err);
+    return undefined;
+  }
+}
+
+function extractTagContent(buffer: string, name: TagName): string | undefined {
+  const open = openTag(name);
+  const close = closeTag(name);
+  const start = buffer.indexOf(open);
+  const end = buffer.indexOf(close);
+  if (start === -1 || end === -1 || end <= start) return undefined;
+  return buffer.slice(start + open.length, end);
+}
+
+/**
+ * Route a Writer tagged stream to handlers. Pure logic — no LLM calls.
+ *
+ * Uses a cursor-based state machine over a growing fullBuffer: after each
+ * chunk, scan from `cursor` for tag boundaries. This naturally handles
+ * tags that split across chunk boundaries without double-buffering bugs.
+ */
+export async function routeTaggedStream(
+  textStream: AsyncIterable<string>,
+  handlers: StreamRouterHandlers,
+  opts?: { timeoutMs?: number },
+): Promise<StreamRouterResult> {
+  const result: StreamRouterResult = {
+    plan: undefined,
+    beats: [],
+    choices: undefined,
+    rawStorySegment: undefined,
+    degraded: false,
+  };
+
+  let fullBuffer = "";
+  let cursor = 0;
+  let currentTag: TagName | null = null;
+  let tagContentStart = 0;
+  let lastBeatEmitCursor = 0;
+  let planDispatched = false;
+  let storyCompleted = false;
+
+  const timeoutMs = opts?.timeoutMs ?? 120_000;
+  let timedOut = false;
+
+  function scan(): void {
+    while (cursor < fullBuffer.length) {
+      if (currentTag === null) {
+        let earliestIdx = Infinity;
+        let earliestTag: TagName | null = null;
+
+        for (const name of TAG_NAMES) {
+          const idx = fullBuffer.indexOf(openTag(name), cursor);
+          if (idx !== -1 && idx < earliestIdx) {
+            earliestIdx = idx;
+            earliestTag = name;
+          }
+        }
+
+        if (earliestTag === null) {
+          // No complete open tag found. Back up cursor by the max possible
+          // partial tag length so a split like "<pl" + "an>" is re-scanned
+          // when the next chunk appends.
+          const maxTagLen = Math.max(...TAG_NAMES.map((n) => openTag(n).length));
+          cursor = Math.max(cursor, fullBuffer.length - maxTagLen + 1);
+          break;
+        }
+
+        currentTag = earliestTag;
+        tagContentStart = earliestIdx + openTag(earliestTag).length;
+        lastBeatEmitCursor = tagContentStart;
+        cursor = tagContentStart;
+        continue;
+      }
+
+      // Inside a tag — look for the close tag.
+      const close = closeTag(currentTag);
+      const closeIdx = fullBuffer.indexOf(close, cursor);
+
+      if (closeIdx !== -1) {
+        // Tag closed — extract and finalize.
+        const content = fullBuffer.slice(tagContentStart, closeIdx);
+
+        if (currentTag === "plan") {
+          const parsed = tryParseJson<WriterScenePlan>(content, "plan");
+          if (parsed) {
+            result.plan = parsed;
+            planDispatched = true;
+            try { handlers.onPlan?.(parsed); } catch {}
+          } else {
+            result.degraded = true;
+          }
+        } else if (currentTag === "story") {
+          // Emit any remaining un-emitted prose text before finalizing.
+          if (lastBeatEmitCursor < closeIdx) {
+            const remaining = fullBuffer.slice(lastBeatEmitCursor, closeIdx);
+            if (remaining.length) {
+              try { handlers.onBeat?.(remaining); } catch {}
+            }
+          }
+          // The <story> segment is raw prose — NOT JSON. Store it verbatim;
+          // the director feeds it to proseSplitter to produce Beat[].
+          result.rawStorySegment = content;
+          if (content.trim().length > 0) {
+            storyCompleted = true;
+            try { handlers.onStoryComplete?.(content); } catch {}
+          } else {
+            result.degraded = true;
+          }
+        } else if (currentTag === "choices") {
+          const parsed = tryParseJson<BeatChoice[]>(content, "choices");
+          if (parsed && Array.isArray(parsed)) {
+            result.choices = parsed;
+            try { handlers.onChoices?.(parsed); } catch {}
+          }
+        }
+
+        cursor = closeIdx + close.length;
+        currentTag = null;
+        continue;
+      }
+
+      // Close tag not yet in buffer — emit incremental prose if applicable.
+      if (currentTag === "story" && lastBeatEmitCursor < fullBuffer.length) {
+        const newText = fullBuffer.slice(lastBeatEmitCursor);
+        // Don't emit partial close-tag lookalikes: hold back the last few
+        // chars that could be a partial "</story>" (max 8 chars).
+        const safeLen = Math.max(0, newText.length - closeTag("story").length);
+        if (safeLen > 0) {
+          const safe = newText.slice(0, safeLen);
+          try { handlers.onBeat?.(safe); } catch {}
+          lastBeatEmitCursor += safeLen;
+        }
+      }
+
+      // Close tag not found — back up cursor by the max close-tag length
+      // (split like "</pla" + "n>" can complete on next chunk append).
+      const maxCloseLen = Math.max(...TAG_NAMES.map((n) => closeTag(n).length));
+      cursor = Math.max(cursor, fullBuffer.length - maxCloseLen + 1);
+      break;
+    }
+  }
+
+  const consume = async (): Promise<void> => {
+    for await (const chunk of textStream) {
+      fullBuffer += chunk;
+      scan();
+    }
+    // Final scan — flush any remaining buffer (handles close tags that
+    // arrived in the last chunk without a subsequent iteration).
+    scan();
+  };
+
+  try {
+    await Promise.race([
+      consume(),
+      new Promise<void>((_, reject) =>
+        setTimeout(() => {
+          timedOut = true;
+          reject(new Error("StreamRouter timeout"));
+        }, timeoutMs),
+      ),
+    ]);
+  } catch {
+    // Timeout or stream error — fall through to degrade path.
+  }
+
+  // ── Degrade path ──────────────────────────────────────────────────
+  if (!planDispatched || !storyCompleted || timedOut) {
+    result.degraded = true;
+
+    if (!planDispatched) {
+      const planContent = extractTagContent(fullBuffer, "plan");
+      if (planContent) {
+        const parsed = tryParseJson<WriterScenePlan>(planContent, "plan:degraded");
+        if (parsed) {
+          result.plan = parsed;
+          try { handlers.onPlan?.(parsed); } catch {}
+        }
+      }
+    }
+
+    if (!storyCompleted) {
+      // Best-effort: extract <story> prose; if no tag at all, fall back to
+      // the whole buffer as prose (the splitter degrades further if empty).
+      const storyContent =
+        extractTagContent(fullBuffer, "story") ?? fullBuffer.trim();
+      result.rawStorySegment = storyContent;
+      if (storyContent.trim().length > 0) {
+        try { handlers.onStoryComplete?.(storyContent); } catch {}
+      }
+    }
+
+    if (!result.choices) {
+      const choicesContent = extractTagContent(fullBuffer, "choices");
+      if (choicesContent) {
+        const parsed = tryParseJson<BeatChoice[]>(choicesContent, "choices:degraded");
+        if (parsed && Array.isArray(parsed)) result.choices = parsed;
+      }
+    }
+
+    if (timedOut) {
+      console.warn(`[StreamRouter] timed out after ${timeoutMs}ms, degraded extraction attempted`);
+    }
+  }
+
+  return result;
+}
@@ -0,0 +1,160 @@
+import type {
+  WriterScenePlan,
+} from "@infiplot/types";
+import type { WriterBeatsOutput } from "../agents/writer";
+import {
+  coerceBeatsFromRaw,
+  coerceStoryStatePatch,
+  normalizeSpeakerName,
+  synthesizeFallbackBeats,
+} from "../agents/writer";
+import { parseJsonLoose } from "../jsonParser";
+
+// ──────────────────────────────────────────────────────────────────────
+//  proseSplitter — rule-based prose → Beat[] splitter.
+//
+//  The Writer now outputs continuous prose in the <story> segment instead
+//  of JSON beats. This module splits prose into RawBeat[] using lightweight
+//  markers (blank-line delimited paragraphs, <i> for inner monologue,
+//  「speaker：quote」 for NPC dialogue), then feeds the result through the
+//  existing coerceBeatsFromRaw pipeline to get fully validated Beat[].
+//
+//  Zero extra LLM calls. Multiple degradation layers — never throws.
+// ──────────────────────────────────────────────────────────────────────
+
+type RawBeat = {
+  narration?: string;
+  speaker?: string;
+  line?: string;
+  lineDelivery?: string;
+};
+
+// Match inner-monologue blocks: <i>...</i> (possibly multiline)
+const INNER_RE = /^\s*<i>([\s\S]+?)<\/i>\s*$/;
+
+// Match NPC dialogue: Speaker：「dialogue」 or Speaker:「dialogue」
+// Supports 「」『』"" quote pairs. Speaker name is 1-20 non-whitespace chars.
+const DIALOGUE_RE =
+  /^\s*(\S{1,20})\s*[：:]\s*(?:[「『"]([\s\S]+?)[」』"])\s*$/;
+
+// Match <memory>{...}</memory> block anywhere in the story segment.
+const MEMORY_RE = /<memory>([\s\S]+?)<\/memory>/;
+
+/**
+ * Extract and strip the <memory> JSON block from raw story prose.
+ * Returns the parsed StoryStatePatch (or undefined) plus the cleaned prose.
+ */
+function extractMemoryBlock(rawStory: string): {
+  patch: ReturnType<typeof coerceStoryStatePatch>;
+  cleanedProse: string;
+} {
+  const match = MEMORY_RE.exec(rawStory);
+  if (!match) return { patch: undefined, cleanedProse: rawStory };
+
+  const jsonStr = match[1]!;
+  const cleanedProse = rawStory.replace(MEMORY_RE, "").trim();
+
+  try {
+    const parsed = parseJsonLoose<Record<string, unknown>>(jsonStr);
+    return {
+      patch: coerceStoryStatePatch(
+        parsed as Parameters<typeof coerceStoryStatePatch>[0],
+      ),
+      cleanedProse,
+    };
+  } catch {
+    console.warn("[proseSplitter] failed to parse <memory> block, skipping");
+    return { patch: undefined, cleanedProse };
+  }
+}
+
+/**
+ * Classify a single prose paragraph into one of three beat forms.
+ */
+function classifyBlock(
+  block: string,
+  plan: WriterScenePlan,
+): RawBeat {
+  const trimmed = block.trim();
+
+  // Inner monologue: <i>text</i> → speaker="你"
+  const innerMatch = INNER_RE.exec(trimmed);
+  if (innerMatch) {
+    return {
+      speaker: "你",
+      line: innerMatch[1]!.trim(),
+    };
+  }
+
+  // NPC dialogue: Speaker：「quote」
+  const dialogueMatch = DIALOGUE_RE.exec(trimmed);
+  if (dialogueMatch) {
+    const rawSpeaker = dialogueMatch[1]!.trim();
+    const speaker = normalizeSpeakerName(rawSpeaker);
+    const line = dialogueMatch[2]!.trim();
+    const intent = plan.characterIntents?.find((ci) => ci.name === speaker);
+    return {
+      speaker,
+      line,
+      lineDelivery: intent?.speakingTone || undefined,
+    };
+  }
+
+  // Default: pure narration
+  return { narration: trimmed };
+}
+
+/**
+ * Split continuous prose into Beat[], reusing the full coerce→repair→fallback
+ * pipeline. Zero extra LLM calls. Never throws.
+ *
+ * @param rawStory - The raw prose from the <story> segment.
+ * @param plan - The parsed WriterScenePlan (from <plan> segment).
+ * @returns WriterBeatsOutput with Beat[] + optional StoryStatePatch.
+ */
+export function splitProseToBeats(
+  rawStory: string,
+  plan: WriterScenePlan,
+): WriterBeatsOutput {
+  try {
+    // 1. Extract <memory> block (story-state volatile patch)
+    const { patch, cleanedProse } = extractMemoryBlock(rawStory);
+
+    // 2. Split by blank lines into paragraphs
+    const blocks = cleanedProse
+      .split(/\n\s*\n/)
+      .map((b) => b.trim())
+      .filter((b) => b.length > 0);
+
+    if (blocks.length === 0) {
+      console.warn("[proseSplitter] empty prose after cleanup, using fallback");
+      return {
+        beats: synthesizeFallbackBeats(plan),
+        storyStatePatch: patch,
+      };
+    }
+
+    // 3. Classify each block into a RawBeat
+    const rawBeats: RawBeat[] = blocks.map((block) => {
+      try {
+        return classifyBlock(block, plan);
+      } catch {
+        return { narration: block };
+      }
+    });
+
+    // 4. Feed through existing coerce pipeline (id assignment, POV
+    //    normalization, entry alignment, exit guarantee, uniqueness)
+    const coerced = coerceBeatsFromRaw(rawBeats, plan);
+    return {
+      beats: coerced.beats,
+      storyStatePatch: patch ?? coerced.storyStatePatch,
+    };
+  } catch (err) {
+    console.error("[proseSplitter] unexpected error, using fallback:", err);
+    return {
+      beats: synthesizeFallbackBeats(plan),
+      storyStatePatch: undefined,
+    };
+  }
+}
@@ -19,6 +19,7 @@ import type {
  InsertBeatResponse,
  SceneRequest,
  SceneResponse,
+  SceneStreamEvent,
  Session,
  StartRequest,
  StartResponse,
@@ -105,6 +106,77 @@ function mergeCharactersPreserveVoice(
  });
 }

+// ── SSE consumption (server-fallback path) ───────────────────────────
+// When an `emit` callback is provided, the server-fallback path requests
+// SSE instead of JSON so the caller can render progressive events
+// (plan → beat → background → voice → done). The final "done" event
+// carries the complete response payload.
+
+async function fetchSSE<T>(
+  path: string,
+  body: unknown,
+  emit?: (event: SceneStreamEvent) => void,
+): Promise<T> {
+  const res = await fetch(path, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      ...(emit ? { Accept: "text/event-stream" } : {}),
+    },
+    body: JSON.stringify(body),
+  });
+
+  if (!res.ok) {
+    if (res.status === 401) throw new AuthRequiredError();
+    let message = `HTTP ${res.status}`;
+    try {
+      const data = (await res.json()) as { error?: string };
+      if (data.error) message = data.error;
+    } catch { /* keep HTTP status */ }
+    throw new Error(message);
+  }
+
+  if (!emit || !res.headers.get("content-type")?.includes("text/event-stream")) {
+    return res.json() as Promise<T>;
+  }
+
+  const reader = res.body!.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+  let result: T | undefined;
+
+  for (;;) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    buffer += decoder.decode(value, { stream: true });
+
+    const parts = buffer.split("\n\n");
+    buffer = parts.pop()!;
+
+    for (const part of parts) {
+      if (!part.trim()) continue;
+      const dataLine = part.split("\n").find((l) => l.startsWith("data: "));
+      if (!dataLine) continue;
+      let event;
+      try {
+        event = JSON.parse(dataLine.slice(6));
+      } catch {
+        continue;
+      }
+      if (event.type === "done") {
+        result = event.response as T;
+      } else if (event.type === "error") {
+        throw new Error(event.message || "Scene generation failed");
+      } else {
+        emit(event as SceneStreamEvent);
+      }
+    }
+  }
+
+  if (!result) throw new Error("SSE stream ended without a done event");
+  return result;
+}
+
 // ── Unified entry points ───────────────────────────────────────────────
 // When the browser has a BYO model config in localStorage, these call the
 // client-side engine directly (talking to providers from the browser).
@@ -134,23 +206,29 @@ export async function getTtsProvider(): Promise<TtsProvider> {
  }
 }

-export async function startSession(req: StartRequest): Promise<StartResponse> {
+export async function startSession(
+  req: StartRequest,
+  emit?: (event: SceneStreamEvent) => void,
+): Promise<StartResponse> {
  const config = getClientConfig();
  if (config) {
-    return startSessionClient(config, req);
+    return startSessionClient(config, req, emit);
  }
-  return postJson<StartResponse>("/api/start", req);
+  return fetchSSE<StartResponse>("/api/start", req, emit);
 }

-export async function requestScene(req: SceneRequest): Promise<SceneResponse> {
+export async function requestScene(
+  req: SceneRequest,
+  emit?: (event: SceneStreamEvent) => void,
+): Promise<SceneResponse> {
  const config = getClientConfig();
  if (config) {
-    return requestSceneClient(config, req);
+    return requestSceneClient(config, req, emit);
  }
-  const data = await postJson<SceneResponse>("/api/scene", {
+  const data = await fetchSSE<SceneResponse>("/api/scene", {
    ...req,
    session: stripVoicesForTransport(req.session),
-  });
+  }, emit);
  // Server stripped known-character voices for bandwidth — re-attach the
  // voices we already hold so fetchBeatAudio can synth them.
  data.characters = mergeCharactersPreserveVoice(req.session.characters, data.characters);
@@ -284,7 +284,7 @@ Dreamy watercolor style with soft tones and nostalgic atmosphere
    },

    models: {
-      corsNotice: "Please ensure your API endpoint supports browser CORS requests. Most mainstream providers (OpenAI, Anthropic, Gemini, Runware, etc.) support this by default.",
+      corsNotice: "All API keys are stored locally in your browser and never uploaded to our server. Requests are sent directly from your browser to the API endpoint; if the endpoint does not support CORS, requests are automatically routed through our server — your key is used only for that single relay and is never logged or stored.",
      textModel: "Text Model",
      imageModel: "Image Model",
      visionModel: "Vision Model",
@@ -313,7 +313,7 @@ export const ja = {

    // Models tab
    models: {
-      corsNotice: "お使いのAPIエンドポイントがブラウザのクロスオリジン要求（CORS）をサポートしていることを確認してください。ほとんどの主要プロバイダー（OpenAI、Anthropic、Gemini、Runwareなど）は、すでにデフォルトでサポートしています。",
+      corsNotice: "すべての API キーはブラウザのローカルにのみ保存され、サーバーにアップロードされることはありません。リクエストはブラウザから API エンドポイントへ直接送信されます。エンドポイントが CORS に対応していない場合は、自動的にサーバー経由で中継されます——キーはその一回の中継にのみ使用され、記録・保存されることはありません。",
      textModel: "テキストモデル",
      imageModel: "描画モデル",
      visionModel: "画像認識モデル",
@@ -313,7 +313,7 @@ export const zhCN = {

    // Models tab
    models: {
-      corsNotice: "请确保你的 API 端点支持浏览器跨域请求（CORS）。大多数主流提供商（OpenAI、Anthropic、Gemini、Runware 等）已默认支持。",
+      corsNotice: "所有 Key 仅保存在本地浏览器，不会上传到服务器。请求优先从浏览器直连 API 端点；若端点不支持跨域（CORS），将自动通过我们的服务器中转——Key 仅用于当次转发，不会被记录或存储。",
      textModel: "文本模型",
      imageModel: "绘图模型",
      visionModel: "识图模型",
@@ -0,0 +1,104 @@
+import "server-only";
+
+import { getCloudflareContext } from "@opennextjs/cloudflare";
+
+/**
+ * R2 Storage封装 - 用户生成图片持久化
+ *
+ * Phase 1: 优先使用 Runware CDN URL（零额外存储成本），R2 key 作为可选持久化。
+ * Phase 2+: save 流程中可选地将场景图从 CDN fetch 后转存 R2，防 URL 过期。
+ */
+
+/**
+ * Build R2 object key for image storage.
+ *
+ * Pattern: {storyId}/{kind}/{id}.webp
+ *   - kind: "scene" | "portrait" | "style-ref"
+ *   - id: scene.id | character.name | "ref"
+ *
+ * Example: s_abc123/scene/sc_1.webp, s_abc123/portrait/李华.webp
+ */
+export function buildImageKey(
+  storyId: string,
+  kind: "scene" | "portrait" | "style-ref",
+  id: string,
+): string {
+  // Sanitize both storyId and id to avoid path traversal / key confusion
+  const safeStoryId = storyId.replace(/[^a-zA-Z0-9_一-龥-]/g, "_");
+  const safeId = id.replace(/[^a-zA-Z0-9_一-龥-]/g, "_");
+  return `${safeStoryId}/${kind}/${safeId}.webp`;
+}
+
+/**
+ * Upload image to R2 and return public URL.
+ *
+ * @param key R2 object key (use buildImageKey to generate)
+ * @param data Image data (Buffer or Uint8Array)
+ * @returns Public R2 URL (https://<public-domain>/<key>)
+ * @throws Error if R2 upload fails or binding unavailable
+ */
+export async function uploadImage(
+  key: string,
+  data: Buffer | Uint8Array,
+): Promise<string> {
+  try {
+    const { env } = getCloudflareContext();
+
+    if (!env.R2_BUCKET) {
+      throw new Error(
+        "R2_BUCKET binding not found. " +
+        "Ensure wrangler.jsonc has r2_buckets configured and you're running via wrangler."
+      );
+    }
+
+    // Upload to R2 with WebP content-type
+    await env.R2_BUCKET.put(key, data, {
+      httpMetadata: {
+        contentType: "image/webp",
+      },
+    });
+
+    // Return public URL (assumes custom domain or R2 public bucket configured)
+    // Phase 1: hardcode or read from env; Phase 2: configure in wrangler
+    const publicDomain = process.env.R2_PUBLIC_DOMAIN ?? "https://r2.infiplot.example"; // Placeholder
+    return `${publicDomain}/${key}`;
+  } catch (error) {
+    // Re-throw with context for caller to handle gracefully
+    throw new Error(
+      `R2 upload failed for key ${key}: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Fetch image from URL and upload to R2 (for migrating Runware CDN → R2).
+ *
+ * @param url Source image URL (e.g. Runware CDN)
+ * @param key R2 object key
+ * @returns Public R2 URL, or null if fetch/upload fails (caller should fallback to original URL)
+ */
+export async function migrateImageToR2(
+  url: string,
+  key: string,
+): Promise<string | null> {
+  try {
+    // Fetch image from CDN
+    const res = await fetch(url);
+    if (!res.ok) {
+      console.warn(`[R2] Failed to fetch image from ${url}: HTTP ${res.status}`);
+      return null;
+    }
+
+    const data = new Uint8Array(await res.arrayBuffer());
+
+    // Upload to R2
+    return await uploadImage(key, data);
+  } catch (error) {
+    // Log but don't throw - caller should gracefully fallback to CDN URL
+    console.warn(
+      `[R2] Migration failed for ${url} → ${key}:`,
+      error instanceof Error ? error.message : error
+    );
+    return null;
+  }
+}
@@ -156,6 +156,45 @@ export type WriterPlan = {
  entrySpeaker?: string;
 };

+// ──────────────────────────────────────────────────────────────────────
+//  Paradigm D — Writer single-pass streaming plan extensions.
+//
+//  In paradigm D the Writer streams one tagged response: <plan> → <story>
+//  → <choices>. WriterScenePlan is the parsed <plan> segment: the existing
+//  WriterPlan skeleton PLUS per-character scene intents (and story bible on
+//  first scene), handed to the downstream media translators the instant
+//  </plan> closes.
+// ──────────────────────────────────────────────────────────────────────
+
+/** Per-scene performance intent for one character, authored by the Writer in
+ *  the <plan> segment. Ephemeral (this scene only) — distinct from the
+ *  persistent CharacterPersona card. Feeds downstream media translators. */
+export type CharacterIntent = {
+  name: string;
+  /** 本幕情绪基调。 */
+  mood?: string;
+  /** 本幕动机 / 目的。 */
+  motivation?: string;
+  /** 本幕说话基调（指导对白质感 + TTS lineDelivery）。 */
+  speakingTone?: string;
+};
+
+/** Parsed <plan> tag: the existing WriterPlan shape plus per-character scene
+ *  intents and optional story bible (first scene only). The optional extension
+ *  keeps any degraded / minimal plan valid — downstream consumers see a
+ *  WriterPlan superset. */
+export type WriterScenePlan = WriterPlan & {
+  /** 各角色本幕表现意图，供 </plan> 闭合时分发下游媒体翻译官。 */
+  characterIntents?: CharacterIntent[];
+  /** 故事圣经（仅开局产出）——稳定区字段。后续场景 plan 不含此字段。 */
+  storyBible?: {
+    logline: string;
+    genreTags: string;
+    protagonist: string;
+    castNotes?: string;
+  };
+};
+
 // ──────────────────────────────────────────────────────────────────────
 //  Characters & voices (TTS)
 // ──────────────────────────────────────────────────────────────────────
@@ -179,6 +218,30 @@ export type CharacterVoice =
      mimeType: string;
    };

+// ──────────────────────────────────────────────────────────────────────
+//  CharacterPersona — narrative / story dimension of a Character.
+//  Merged into Character via intersection (all optional). Filled primarily
+//  by the Writer's <plan> 思维链 (paradigm D); the CharacterDesigner then
+//  realizes it into visual + voice cards. Absent on legacy sessions →
+//  callers degrade to "name only". SENTINEL append-only: adding persona
+//  only appends bytes to the stable prompt prefix — never reorders.
+// ──────────────────────────────────────────────────────────────────────
+
+export type CharacterPersona = {
+  /** 背景 / 身份 / 核心设定。 */
+  persona?: string;
+  /** 性格标签，如 ["傲娇", "腹黑", "重情义"]。 */
+  personalityTraits?: string[];
+  /** 说话风格 / 口头禅 — 对白质感的关键。 */
+  speakingStyle?: string;
+  /** 2-3 条代表性对白，作为 few-shot 锚定语气。 */
+  sampleDialogue?: string[];
+  /** 与玩家("你")的关系 / 态度。 */
+  relationshipToPlayer?: string;
+  /** 隐藏信息 / 伏笔，可驱动后续反转（默认不外显）。 */
+  secrets?: string[];
+};
+
 export type Character = {
  name: string;
  /**
@@ -215,7 +278,7 @@ export type Character = {
   *  server runs StepFun, and lets the server normalize an off-provider voice
   *  without a fresh provision. Validated against the catalog at synth time. */
  stepfunVoiceId?: string;
-};
+} & CharacterPersona;

 /** A single beat's synthesized audio, attached to the response. */
 export type BeatAudio = {
@@ -270,6 +333,33 @@ export type StoryStatePatch = {
  nextHook?: string;
 };

+// ──────────────────────────────────────────────────────────────────────
+//  WorldBook — lightweight lore injection system.
+//
+//  Entries with position "constant" are always injected into the stable
+//  prompt prefix. Entries with position "triggered" are scanned against
+//  recent beat text and injected into the dynamic suffix when keywords
+//  match. Priority controls ordering when multiple entries fire.
+// ──────────────────────────────────────────────────────────────────────
+
+export type WorldBookEntry = {
+  id: string;
+  /** Keywords that trigger this entry's injection (for triggered entries). */
+  keys: string[];
+  /** The lore content to inject into the prompt. */
+  content: string;
+  /** "constant" = always injected (stable prefix); "triggered" = keyword-matched (dynamic suffix). */
+  position: "constant" | "triggered";
+  /** Higher priority entries are injected first. Defaults to 0. */
+  priority?: number;
+};
+
+export type WorldBook = {
+  id: string;
+  name: string;
+  entries: WorldBookEntry[];
+};
+
 // ──────────────────────────────────────────────────────────────────────
 //  Session
 // ──────────────────────────────────────────────────────────────────────
@@ -317,6 +407,11 @@ export type Session = {
   * back-compat with sessions created before this field existed.
   */
  language?: string;
+  /**
+   * Optional world books for lore injection. "constant" entries are always in
+   * the prompt; "triggered" entries inject when keywords match recent text.
+   */
+  worldBooks?: WorldBook[];
 };

 // ──────────────────────────────────────────────────────────────────────
@@ -417,6 +512,18 @@ export type EngineConfig = {
 //  API contracts
 // ──────────────────────────────────────────────────────────────────────

+/**
+ * BYOK (Bring Your Own Key) LLM credentials carried in request bodies.
+ * Per-role: text/image/vision can be independently configured. Keys never
+ * persist or log server-side — they only pass through request→config build
+ * (see lib/config.ts buildByoEngineConfig). vision typically mirrors text.
+ */
+export type ByoLlmKeys = {
+  text?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
+  image?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
+  vision?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
+};
+
 export type StartRequest = {
  worldSetting: string;
  styleGuide: string;
@@ -439,6 +546,13 @@ export type StartRequest = {
  /** Active UI locale — see Session.language. Drives the engine's language
   *  directive so AI output is generated in the player's chosen language. */
  language?: string;
+  /**
+   * BYOK: user-provided LLM keys. When present, server uses these to construct
+   * EngineConfig instead of reading from env. Per-role: text/image/vision can
+   * be independently configured. Keys never persist or log — they only pass
+   * through request→config construction.
+   */
+  byo?: ByoLlmKeys;
 };

 // /api/parse-style-image — vision LLM extracts a textual painting-style
@@ -473,6 +587,8 @@ export type SceneRequest = {
  session: Session;
  /** See StartRequest.clientTts — drops server-side TTS for BYO-key clients. */
  clientTts?: boolean;
+  /** See StartRequest.byo — BYOK LLM keys. */
+  byo?: ByoLlmKeys;
 };

 export type SceneResponse = {
@@ -534,6 +650,8 @@ export type VisionRequest = {
   * server-side image re-fetch per click.
   */
  annotatedImageBase64: string;
+  /** See StartRequest.byo — BYOK LLM keys. */
+  byo?: ByoLlmKeys;
 };

 export type VisionResponse = {
@@ -547,6 +665,8 @@ export type VisionResponse = {
 export type FreeformClassifyRequest = {
  session: Session;
  freeformText: string;
+  /** See StartRequest.byo — BYOK LLM keys. */
+  byo?: ByoLlmKeys;
 };

 export type FreeformClassify = "insert-beat" | "change-scene";
@@ -563,6 +683,8 @@ export type InsertBeatRequest = {
  freeformAction: string;
  /** See StartRequest.clientTts — drops server-side TTS for BYO-key clients. */
  clientTts?: boolean;
+  /** See StartRequest.byo — BYOK LLM keys. */
+  byo?: ByoLlmKeys;
 };

 /** Partial beat fields produced by the insert-beat director. */
@@ -577,3 +699,69 @@ export type InsertBeatResponse = {
  partial: InsertBeatPartial;
  characters: Character[];
 };
+
+// ──────────────────────────────────────────────────────────────────────
+//  Paradigm D — streaming primitives (chatStream / StreamRouter / SSE)
+//
+//  Output-side counterpart to prompt caching's input-side stable prefix
+//  (the two are orthogonal). chatStream yields incremental text + an
+//  end-of-stream usage promise. The StreamRouter slices the Writer's
+//  tagged stream into plan/story/choices and dispatches downstream. API
+//  routes serialize assembled fragments as SSE events for progressive
+//  client playback.
+// ──────────────────────────────────────────────────────────────────────
+
+/** Token usage stats returned at stream end. Kept SDK-agnostic so the type
+ *  file doesn't depend on any specific provider package. */
+export type ChatStreamUsage = {
+  prompt_tokens?: number;
+  completion_tokens?: number;
+  prompt_tokens_details?: { cached_tokens?: number };
+};
+
+/** Return shape of the streaming chat primitive (ai-client `chatStream`).
+ *  `textStream` yields incremental chunks; `usage` resolves at stream end
+ *  so `summarizeSdkUsage` cache accounting works unchanged. */
+export type ChatStreamResult = {
+  textStream: AsyncIterable<string>;
+  usage: Promise<ChatStreamUsage | undefined>;
+};
+
+/** Callbacks the StreamRouter fires as it slices the Writer's tagged stream.
+ *  All optional so a caller can subscribe to a subset. */
+export type StreamRouterHandlers = {
+  /** `</plan>` closed — dispatch downstream media translators in parallel. */
+  onPlan?: (plan: WriterScenePlan) => void;
+  /** `<story>` incremental text — push to client for progressive playback. */
+  onBeat?: (beatChunk: string) => void;
+  /** `</story>` closed — prose finalized, ready for splitting. */
+  onStoryComplete?: (rawStory: string) => void;
+  /** `</choices>` closed. */
+  onChoices?: (choices: BeatChoice[]) => void;
+};
+
+/** Aggregate result of routing one Writer stream to completion. `degraded` is
+ *  true when tag parsing fell back (missing / misordered / unclosed / timeout),
+ *  per the degrade-before-main-path reliability rule. */
+export type StreamRouterResult = {
+  plan?: WriterScenePlan;
+  beats: Beat[];
+  choices?: BeatChoice[];
+  /** Raw prose content of the <story> segment (not JSON-parsed). The director
+   *  feeds this to proseSplitter to produce Beat[]. */
+  rawStorySegment?: string;
+  degraded: boolean;
+};
+
+/** Server → client SSE events for progressive scene playback (paradigm D).
+ *  `TDone` is the terminal full-assembly payload — `SceneResponse` for
+ *  `/api/scene`, `StartResponse` for `/api/start`. The prefetch path
+ *  consumes events to `done` and reassembles a complete response. */
+export type SceneStreamEvent<TDone = SceneResponse> =
+  | { type: "plan"; plan: WriterScenePlan }
+  | { type: "beat"; beat: Beat }
+  | { type: "background"; imageUrl: string; sceneKey?: string }
+  | { type: "voice"; name: string; voice: CharacterVoice }
+  | { type: "choices"; choices: BeatChoice[] }
+  | { type: "done"; response: TDone }
+  | { type: "error"; message: string; degraded?: boolean };