feat(engine): merge cloudflare-migration — paradigm D engine, BYOK proxy, story persistence (#95)

Squash-merge the cloudflare-migration branch (7 commits by Kai ki) into
staging with conflict resolution, feature integration, and bug fixes.

Engine:
- Paradigm D: single-stream Writer replacing dual-phase Plan/Beats
- Delete Architect agent; story bible generated via Writer <plan> tag
- Modular prompt architecture (segments/registry/builder)
- StreamRouter for tagged stream splitting (<plan>/<story>/<choices>)

Infrastructure:
- Cloudflare Workers deployment (wrangler.jsonc, OpenNext adapter)
- D1 database schema + Drizzle ORM (scaffolded, not yet active)
- R2 storage helpers (scaffolded, not yet active)
- Story persistence API routes + client-side persistence

BYOK (Bring Your Own Key):
- /api/llm/user-proxy with SSRF-protected LLM proxy (+ requireUser auth)
- CORS-aware fetch in ai-client: auto-detect CORS failure, fallback to
  server proxy transparently via OpenAI SDK custom fetch
- BYO config support added to classify-freeform and vision routes
- SettingsModal CORS privacy notice (keys never logged/stored)

SSE streaming:
- engineClient.ts: fetchSSE helper for progressive scene events
- startSession/requestScene accept optional emit callback
- Fix SSE error event field name (error → message) in scene/start routes

i18n integration:
- Wire buildLanguageDirective into paradigm D's prompt builder
- Update corsNotice i18n keys (zh-CN/en/ja) with CORS proxy privacy text
- Preserve Session.language + LanguageSwitcher from i18n commit

Co-authored-by: Kai ki <155355644+zbf1009@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Zonghao Yuan
2026-06-18 18:05:38 +08:00
committed by GitHub
parent 05bd7e229c
commit 0e4c2ebef4
78 changed files with 7396 additions and 919 deletions
+175 -7
View File
@@ -1,5 +1,5 @@
import OpenAI from "openai";
import type { ProviderConfig } from "@infiplot/types";
import type { ChatStreamResult, ChatStreamUsage, ProviderConfig } from "@infiplot/types";
import { normalizeBaseUrl } from "./normalizeUrl";
export type ChatMessage = {
@@ -7,6 +7,75 @@ export type ChatMessage = {
content: string;
};
// ── CORS proxy fallback (browser-only) ───────────────────────────────
// BYO mode calls providers directly from the browser. When a provider
// rejects the preflight (no CORS headers), the first request throws a
// TypeError. We cache the blocked host and transparently reroute all
// subsequent requests through /api/llm/user-proxy, which forwards
// server-side and returns the upstream response (including SSE streams)
// byte-for-byte.
const corsBlockedHosts = new Set<string>();
export function isCorsProxied(baseUrl: string): boolean {
try {
return corsBlockedHosts.has(new URL(baseUrl).host);
} catch {
return false;
}
}
function proxyFetch(
config: ProviderConfig,
init?: RequestInit,
): Promise<Response> {
let body: Record<string, unknown> = {};
if (typeof init?.body === "string") {
try { body = JSON.parse(init.body); } catch { /* empty */ }
}
return globalThis.fetch("/api/llm/user-proxy", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
provider: "openai",
apiKey: config.apiKey,
baseUrl: config.baseUrl,
body,
model: config.model,
stream: body.stream === true,
}),
});
}
function makeCorsAwareFetch(
config: ProviderConfig,
): (input: string | URL | Request, init?: RequestInit) => Promise<Response> {
return async (input, init) => {
const url =
typeof input === "string" ? input
: input instanceof URL ? input.toString()
: input.url;
let host: string;
try { host = new URL(url).host; } catch { return globalThis.fetch(input, init); }
if (corsBlockedHosts.has(host)) {
return proxyFetch(config, init);
}
try {
return await globalThis.fetch(input, init);
} catch (err) {
if (err instanceof TypeError) {
corsBlockedHosts.add(host);
console.warn(`[CORS] ${host} blocked, falling back to server proxy`);
return proxyFetch(config, init);
}
throw err;
}
};
}
// Cache observability for the prompt-prefix caching that the Writer stable
// prefix relies on. The OpenAI usage object reports only cached READS
// (prompt_tokens_details.cached_tokens) and has no field for cache WRITES
@@ -28,6 +97,16 @@ function summarizeSdkUsage(
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
}
function makeClient(config: ProviderConfig): OpenAI {
return new OpenAI({
apiKey: config.apiKey,
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
maxRetries: 0,
dangerouslyAllowBrowser: true,
...(typeof window !== "undefined" ? { fetch: makeCorsAwareFetch(config) } : {}),
});
}
export async function chat(
config: ProviderConfig,
messages: ChatMessage[],
@@ -36,12 +115,7 @@ export async function chat(
tag?: string;
},
): Promise<string> {
const client = new OpenAI({
apiKey: config.apiKey,
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
maxRetries: 0,
dangerouslyAllowBrowser: true,
});
const client = makeClient(config);
const completion = await client.chat.completions.create({
model: config.model,
@@ -61,3 +135,97 @@ export async function chat(
}
return text;
}
/**
* Streaming variant of {@link chat} — the streaming primitive behind
* paradigm D. Returns incremental `textStream` chunks plus an end-of-stream
* `usage` promise so `summarizeSdkUsage` keeps doing cache accounting.
*
* Uses the OpenAI SDK's native streaming (`stream: true`) which returns an
* async iterable of ChatCompletionChunk. The returned `usage` settles after
* the stream drains, so callers should `await result.usage` once iteration
* ends.
*
* Degrade path: if the provider doesn't support streaming, fall back to a
* single non-streaming call wrapped as a one-chunk stream so downstream
* tag-routing still works — the player loses progressive playback but the
* scene generates normally.
*/
export function chatStream(
config: ProviderConfig,
messages: ChatMessage[],
opts?: {
temperature?: number;
tag?: string;
},
): ChatStreamResult {
const client = makeClient(config);
const tag = opts?.tag ?? "chatStream";
const msgPayload = messages.map((m) => ({
role: m.role as "system" | "user" | "assistant",
content: m.content,
}));
let resolveUsage: (u: ChatStreamUsage | undefined) => void;
const usage = new Promise<ChatStreamUsage | undefined>((r) => { resolveUsage = r; });
const textStream = (async function* (): AsyncIterable<string> {
try {
const stream = await client.chat.completions.create({
model: config.model,
messages: msgPayload,
temperature: opts?.temperature ?? 0.9,
stream: true,
stream_options: { include_usage: true },
});
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta?.content;
if (delta) yield delta;
if (chunk.usage) {
const u: ChatStreamUsage = {
prompt_tokens: chunk.usage.prompt_tokens,
completion_tokens: chunk.usage.completion_tokens,
prompt_tokens_details: chunk.usage.prompt_tokens_details
? { cached_tokens: chunk.usage.prompt_tokens_details.cached_tokens ?? undefined }
: undefined,
};
console.log(summarizeSdkUsage(tag, chunk.usage));
resolveUsage!(u);
}
}
// If usage was never emitted (provider omitted it), resolve undefined.
resolveUsage!(undefined);
} catch (err) {
// Streaming not supported by provider → degrade to buffered call.
console.warn(
`[chatStream] streaming failed, degrading to non-streaming:`,
err,
);
try {
const completion = await client.chat.completions.create({
model: config.model,
messages: msgPayload,
temperature: opts?.temperature ?? 0.9,
stream: false,
});
const text = completion.choices[0]?.message?.content ?? "";
if (text) yield text;
console.log(summarizeSdkUsage(`${tag}:degraded`, completion.usage ?? undefined));
resolveUsage!(completion.usage ? {
prompt_tokens: completion.usage.prompt_tokens,
completion_tokens: completion.usage.completion_tokens,
prompt_tokens_details: completion.usage.prompt_tokens_details
? { cached_tokens: completion.usage.prompt_tokens_details.cached_tokens ?? undefined }
: undefined,
} : undefined);
} catch (fallbackErr) {
resolveUsage!(undefined);
throw fallbackErr;
}
}
})();
return { textStream, usage };
}
+1 -1
View File
@@ -1,4 +1,4 @@
export { chat } from "./chat";
export { chat, chatStream, isCorsProxied } from "./chat";
export { generateImage } from "./image";
export type { GenerateImageOptions, GenerateImageResult } from "./image";
export { interpretClick, analyzeImageDataUrl } from "./vision";
+168
View File
@@ -0,0 +1,168 @@
import "server-only";
/**
* BYOK (Bring Your Own Key) LLM Proxy
* Core logic for proxying user-provided API keys to upstream LLM providers.
* Handles SSRF防护, base URL normalization, and SSE streaming.
*/
// ── SSRF Protection ──────────────────────────────────────────────────────
const INTERNAL_IP_PATTERNS = [
/^127\./, // localhost
/^10\./, // 10.0.0.0/8
/^172\.(1[6-9]|2\d|3[01])\./, // 172.16.0.0/12
/^192\.168\./, // 192.168.0.0/16
/^169\.254\./, // link-local
/^::1$/, // IPv6 localhost
/^fe80:/, // IPv6 link-local
/^fc00:/, // IPv6 private
];
/**
* Validate upstream URL to prevent SSRF attacks.
* Only allows https:// and rejects internal IPs.
*/
export function validateUpstreamUrl(url: string): { valid: boolean; error?: string } {
try {
const parsed = new URL(url);
// Only https allowed (no http, file, etc.)
if (parsed.protocol !== "https:") {
return { valid: false, error: "Only https:// URLs are allowed" };
}
// Reject internal IPs
const hostname = parsed.hostname.toLowerCase();
if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1") {
return { valid: false, error: "Localhost not allowed" };
}
// Check IP patterns
for (const pattern of INTERNAL_IP_PATTERNS) {
if (pattern.test(hostname)) {
return { valid: false, error: "Internal IP ranges not allowed" };
}
}
return { valid: true };
} catch {
return { valid: false, error: "Invalid URL" };
}
}
// ── Base URL Normalization ───────────────────────────────────────────────
/**
* Normalize base URL: add https:// prefix if missing, strip trailing slashes.
*/
export function normalizeBaseUrl(url: string): string {
let cleaned = url.trim().replace(/\/+$/, "");
if (cleaned && !/^https?:\/\//i.test(cleaned)) {
cleaned = `https://${cleaned}`;
}
return cleaned;
}
/**
* Strip known API path suffixes from base URL (longest match first).
*/
function stripSuffixes(url: string, suffixes: string[]): string {
let cleaned = url.replace(/\/+$/, "");
for (const s of [...suffixes].sort((a, b) => b.length - a.length)) {
if (cleaned.endsWith(s)) {
cleaned = cleaned.slice(0, -s.length);
break;
}
}
return cleaned.replace(/\/+$/, "");
}
const OPENAI_SUFFIXES = ["/v1/chat/completions", "/v1/models", "/v1"];
const CLAUDE_SUFFIXES = ["/v1/messages", "/v1/models", "/v1"];
const GEMINI_SUFFIXES = ["/v1beta/models", "/v1beta", "/v1/models", "/v1"];
// ── Proxy Core ───────────────────────────────────────────────────────────
export interface ProxyLLMParams {
provider: "openai" | "claude" | "gemini";
apiKey: string;
baseUrl: string;
body: Record<string, unknown>;
model?: string; // Required for Gemini (model name in URL)
stream?: boolean; // Default true
}
/**
* Proxy LLM request to upstream provider.
* Transparently forwards both streaming (SSE) and non-streaming responses.
*/
export async function proxyLLM(params: ProxyLLMParams): Promise<Response> {
const { provider, apiKey, baseUrl, body, model, stream = true } = params;
// Validate base URL
const validation = validateUpstreamUrl(baseUrl);
if (!validation.valid) {
return new Response(
JSON.stringify({ error: validation.error }),
{ status: 400, headers: { "Content-Type": "application/json" } },
);
}
// Build upstream URL and headers
let upstreamUrl: string;
const headers: Record<string, string> = { "Content-Type": "application/json" };
switch (provider) {
case "openai": {
const base = stripSuffixes(baseUrl, OPENAI_SUFFIXES);
upstreamUrl = `${base}/v1/chat/completions`;
headers["Authorization"] = `Bearer ${apiKey}`;
break;
}
case "claude": {
const base = stripSuffixes(baseUrl, CLAUDE_SUFFIXES);
upstreamUrl = `${base}/v1/messages`;
headers["x-api-key"] = apiKey;
headers["anthropic-version"] = "2023-06-01";
break;
}
case "gemini": {
const base = stripSuffixes(baseUrl, GEMINI_SUFFIXES);
const modelName = model || "gemini-2.0-flash";
const action = stream ? "streamGenerateContent" : "generateContent";
const streamParam = stream ? "&alt=sse" : "";
upstreamUrl = `${base}/v1beta/models/${modelName}:${action}?key=${apiKey}${streamParam}`;
break;
}
default:
return new Response(
JSON.stringify({ error: `Unsupported provider: ${provider}` }),
{ status: 400, headers: { "Content-Type": "application/json" } },
);
}
// Forward to upstream
try {
const upstreamResponse = await fetch(upstreamUrl, {
method: "POST",
headers,
body: JSON.stringify(body),
});
// Transparent proxy: strip content-encoding/length, forward body as-is
const responseHeaders = new Headers(upstreamResponse.headers);
responseHeaders.delete("content-encoding");
responseHeaders.delete("content-length");
return new Response(upstreamResponse.body, {
status: upstreamResponse.status,
headers: responseHeaders,
});
} catch (error) {
return new Response(
JSON.stringify({ error: error instanceof Error ? error.message : "Proxy error" }),
{ status: 502, headers: { "Content-Type": "application/json" } },
);
}
}
+99
View File
@@ -0,0 +1,99 @@
// Bring-your-own LLM API keys — stored CLIENT-SIDE ONLY.
//
// When a user supplies their own keys, we persist {provider, baseUrl, apiKey}
// in localStorage and send them with each /api/start and /api/scene request.
// Keys never leak to server logs or persistence — they only pass through the
// request→config construction path.
const STORAGE_KEY = "infiplot:llm";
/** Provider types matching byoProxy and ProviderProtocol */
export type LlmProvider = "openai" | "claude" | "gemini";
/** Stored BYO LLM config — exactly what we persist. */
export type StoredLlmConfig = {
/** Which provider API to use */
provider: LlmProvider;
/** User's API key */
apiKey: string;
/** Optional custom base URL (empty = use provider default) */
baseUrl?: string;
/** Optional model name (empty = use server-side default for this provider/role) */
model?: string;
};
/** Per-role LLM config the user can independently configure */
export type ByoLlmSettings = {
text?: StoredLlmConfig;
image?: StoredLlmConfig;
vision?: StoredLlmConfig;
};
/**
* Read persisted BYO LLM config. Returns null when running on the server,
* when nothing is stored, on parse failure, or when the stored shape is invalid.
*/
export function readStoredLlmConfig(): ByoLlmSettings | null {
if (typeof window === "undefined") return null;
try {
const raw = window.localStorage.getItem(STORAGE_KEY);
if (!raw) return null;
const parsed = JSON.parse(raw) as Partial<ByoLlmSettings>;
// Validate each role config
const result: ByoLlmSettings = {};
for (const role of ["text", "image", "vision"] as const) {
const cfg = parsed[role];
if (cfg && typeof cfg === "object") {
const provider = cfg.provider as string;
const apiKey = cfg.apiKey as string;
if (["openai", "claude", "gemini"].includes(provider) && apiKey?.trim()) {
result[role] = {
provider: provider as LlmProvider,
apiKey: apiKey.trim(),
baseUrl: typeof cfg.baseUrl === "string" ? cfg.baseUrl.trim() : undefined,
model: typeof cfg.model === "string" ? cfg.model.trim() : undefined,
};
}
}
}
return Object.keys(result).length > 0 ? result : null;
} catch {
return null;
}
}
/**
* Persist BYO LLM config. Trims keys and baseUrls so trailing whitespace
* from paste never breaks headers.
*/
export function writeStoredLlmConfig(config: ByoLlmSettings): void {
if (typeof window === "undefined") return;
try {
const payload: ByoLlmSettings = {};
for (const role of ["text", "image", "vision"] as const) {
const cfg = config[role];
if (cfg) {
payload[role] = {
provider: cfg.provider,
apiKey: cfg.apiKey.trim(),
baseUrl: cfg.baseUrl?.trim() || undefined,
model: cfg.model?.trim() || undefined,
};
}
}
window.localStorage.setItem(STORAGE_KEY, JSON.stringify(payload));
} catch {
// Storage disabled / quota / private mode — BYO simply stays off.
}
}
export function clearStoredLlmConfig(): void {
if (typeof window === "undefined") return;
try {
window.localStorage.removeItem(STORAGE_KEY);
} catch {
// ignore
}
}
+299
View File
@@ -0,0 +1,299 @@
// Client-side story persistence helpers.
//
// Provides: anonymous user ID management, save/load functions that call
// /api/stories/* and fallback to localStorage when D1 is unavailable.
import type { Session, Scene, Character, StoryState } from "@infiplot/types";
import type { StorySaveInput, SceneSaveInput, CharacterSaveInput, StoryMeta, StoryLoadResult } from "@/lib/db/repositories/storyRepo";
const USER_ID_KEY = "infiplot:userId";
const SAVE_FALLBACK_KEY = "infiplot:savedStories";
// ── Anonymous User ID ────────────────────────────────────────────────────
export function getOrCreateUserId(): string {
if (typeof window === "undefined") return "";
try {
let id = localStorage.getItem(USER_ID_KEY);
if (!id) {
id = `anon_${crypto.randomUUID()}`;
localStorage.setItem(USER_ID_KEY, id);
}
return id;
} catch {
return `anon_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
}
}
// ── Session → Save Input Conversion ─────────────────────────────────────
export function sessionToSaveInput(session: Session): {
story: StorySaveInput;
scenes: SceneSaveInput[];
characters: CharacterSaveInput[];
} {
const story: StorySaveInput = {
id: session.id,
userId: getOrCreateUserId(),
worldSetting: session.worldSetting,
styleGuide: session.styleGuide,
styleReferenceImage: session.styleReferenceImage,
orientation: (session.orientation as "portrait" | "landscape") ?? "landscape",
storyState: session.storyState,
status: "active",
};
const scenes: SceneSaveInput[] = (session.history ?? []).map(
(entry, idx) => ({
id: entry.scene.id,
sceneKey: entry.scene.sceneKey,
sceneSummary: entry.scene.scenePrompt,
imageUrl: entry.scene.imageUrl ?? "",
beats: entry.scene.beats,
sortOrder: idx,
}),
);
const characters: CharacterSaveInput[] = (session.characters ?? []).map(
(c) => ({
name: c.name,
visualDescription: c.visualDescription,
voiceDescription: c.voiceDescription,
portrait:
c.basePortraitUrl || c.basePortraitUuid
? { url: c.basePortraitUrl, uuid: c.basePortraitUuid }
: undefined,
voice: c.voice,
}),
);
return { story, scenes, characters };
}
// ── Save ─────────────────────────────────────────────────────────────────
export type SaveResult =
| { ok: true; storyId: string; source: "server" }
| { ok: true; storyId: string; source: "localStorage" }
| { ok: false; error: string };
export async function saveStory(session: Session): Promise<SaveResult> {
// TEMPORARY: localStorage-only mode (D1 disabled until auth integration).
// Anonymous D1 writes lack rate limiting / quota / ownership checks — an
// abuse risk on a public registration-less site. Persist locally instead.
return saveToLocalStorage(session);
/* DISABLED: D1 server path (will re-enable after auth integration)
const { story, scenes, characters } = sessionToSaveInput(session);
try {
const res = await fetch("/api/stories/save", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ story, scenes, characters }),
});
if (res.ok) {
const data = (await res.json()) as { storyId: string };
return { ok: true, storyId: data.storyId, source: "server" };
}
// Server failed - fallback to localStorage
throw new Error(`Server returned ${res.status}`);
} catch {
// D1 unavailable or network error - fallback to localStorage
return saveToLocalStorage(session);
}
*/
}
function saveToLocalStorage(session: Session): SaveResult {
try {
const existing = loadFromLocalStorageAll();
// Strip bulky fields before persistence to stay within localStorage quota
// (~5-10MB across ALL keys). Without this, a multi-scene session with
// several voiced characters serializes to 1-2MB+ (voice.referenceAudioBase64
// is ~160KB each, styleReferenceImage 30-80KB), which can exceed quota and
// — worse — block the main thread on the synchronous localStorage write,
// freezing the subsequent navigation back to the home page. Both fields are
// reconstructible: voices re-provision on the next /api/scene call, and
// styleReferenceImage is cosmetic (engine regenerates gracefully without it).
const slimSession: Session = {
...session,
styleReferenceImage: undefined,
characters: session.characters.map((c) => ({ ...c, voice: undefined })),
};
const entry = {
id: session.id,
worldSetting: session.worldSetting,
styleGuide: session.styleGuide,
sceneCount: session.history?.length ?? 0,
savedAt: Date.now(),
sessionJson: JSON.stringify(slimSession),
};
const updated = [entry, ...existing.filter((e) => e.id !== session.id)].slice(0, 20);
localStorage.setItem(SAVE_FALLBACK_KEY, JSON.stringify(updated));
return { ok: true, storyId: session.id, source: "localStorage" };
} catch {
return { ok: false, error: "无法保存到本地存储" };
}
}
// ── Load ─────────────────────────────────────────────────────────────────
export async function loadStoryList(): Promise<StoryMeta[]> {
// TEMPORARY: localStorage-only mode (D1 disabled until auth integration)
const entries = loadFromLocalStorageAll();
return entries.map((e) => ({
id: e.id,
userId: null, // anonymous
worldSetting: e.worldSetting,
styleGuide: e.styleGuide,
orientation: "landscape", // localStorage doesn't store this, default
status: "active",
sceneCount: e.sceneCount,
createdAt: new Date(e.savedAt),
updatedAt: new Date(e.savedAt),
}));
/* DISABLED: D1 server path (will re-enable after auth integration)
const userId = getOrCreateUserId();
try {
const res = await fetch(`/api/stories/list?userId=${encodeURIComponent(userId)}`);
if (res.ok) {
const data = (await res.json()) as { stories: StoryMeta[] };
return data.stories;
}
return [];
} catch {
return [];
}
*/
}
export async function loadStory(storyId: string): Promise<StoryLoadResult | null> {
// TEMPORARY: localStorage-only mode — unused in current code (play page uses
// loadFromLocalStorage directly). Returns null to maintain type compatibility.
// Will be re-enabled when D1 is restored after auth integration.
return null;
/* DISABLED: D1 server path
try {
const res = await fetch(`/api/stories/${encodeURIComponent(storyId)}`);
if (res.ok) {
return (await res.json()) as StoryLoadResult;
}
return null;
} catch {
return null;
}
*/
}
export async function deleteStory(storyId: string): Promise<boolean> {
// TEMPORARY: localStorage-only mode
try {
const existing = loadFromLocalStorageAll();
const updated = existing.filter((e) => e.id !== storyId);
if (updated.length === existing.length) return false; // not found
localStorage.setItem(SAVE_FALLBACK_KEY, JSON.stringify(updated));
return true;
} catch {
return false;
}
/* DISABLED: D1 server path
try {
const res = await fetch(`/api/stories/${encodeURIComponent(storyId)}`, {
method: "DELETE",
});
return res.ok;
} catch {
return false;
}
*/
}
// ── localStorage fallback helpers ────────────────────────────────────────
type LocalStorageEntry = {
id: string;
worldSetting: string;
styleGuide: string;
sceneCount: number;
savedAt: number;
sessionJson: string;
};
function loadFromLocalStorageAll(): LocalStorageEntry[] {
if (typeof window === "undefined") return [];
try {
const raw = localStorage.getItem(SAVE_FALLBACK_KEY);
if (!raw) return [];
return JSON.parse(raw) as LocalStorageEntry[];
} catch {
return [];
}
}
export function loadFromLocalStorage(storyId: string): Session | null {
const entries = loadFromLocalStorageAll();
const entry = entries.find((e) => e.id === storyId);
if (!entry) return null;
try {
return JSON.parse(entry.sessionJson) as Session;
} catch {
return null;
}
}
// ── StoryLoadResult → Session Conversion ─────────────────────────────────
/**
* Convert StoryLoadResult (API response from /api/stories/[id]) back to Session
* shape consumed by app/play/page.tsx.
*/
export function storyLoadResultToSession(result: StoryLoadResult): Session {
const { story, scenes, characters } = result;
// Map scenes back to SceneHistoryEntry structure
const history = scenes.map((s) => {
const beats = s.beats ?? [];
// entryBeatId is not persisted in D1 — recover it from the first beat.
const entryBeatId = beats[0]?.id ?? "";
return {
scene: {
id: s.id,
sceneKey: s.sceneKey,
scenePrompt: s.sceneSummary ?? "",
imageUrl: s.imageUrl,
beats,
entryBeatId,
orientation: s.orientation,
},
visitedBeatIds: entryBeatId ? [entryBeatId] : [], // rebuilt as user navigates
exit: undefined, // Not persisted in D1
};
});
return {
id: story.id,
// createdAt crosses the JSON API boundary as an ISO string, so coerce it
// back to an epoch the Session shape expects (number).
createdAt: new Date(story.createdAt).getTime(),
worldSetting: story.worldSetting,
styleGuide: story.styleGuide,
styleReferenceImage: story.styleReferenceImage,
orientation: story.orientation,
storyState: story.storyState,
history,
characters: characters.map((c) => ({
name: c.name,
voiceDescription: c.voiceDescription ?? "",
visualDescription: c.visualDescription,
basePortraitUuid: c.portrait?.uuid,
basePortraitUrl: c.portrait?.url,
voice: c.voice,
})),
};
}
+122
View File
@@ -1,8 +1,13 @@
import "server-only";
import type {
ByoLlmKeys,
EngineConfig,
ProviderConfig,
ProviderProtocol,
TtsConfig,
} from "@infiplot/types";
import { validateUpstreamUrl, normalizeBaseUrl } from "./byoProxy";
const VALID_PROTOCOLS = [
"openai_compatible",
@@ -88,3 +93,120 @@ export function loadEngineConfig(): EngineConfig {
imageHedgeMs: readOptionalPositiveInt("IMAGE_HEDGE_MS"),
};
}
// ── BYOK (Bring Your Own Key) ────────────────────────────────────────────
/** Provider default base URLs when user doesn't specify one. */
const PROVIDER_DEFAULTS: Record<string, string> = {
openai: "https://api.openai.com",
claude: "https://api.anthropic.com",
gemini: "https://generativelanguage.googleapis.com",
};
/** Provider default models when user doesn't specify one. */
const MODEL_DEFAULTS: Record<string, { text: string; image: string; vision: string }> = {
openai: {
text: "gpt-4o",
image: "gpt-image-1", // CR-4: 支持任意尺寸,dall-e-3 不支持 1536x1024
vision: "gpt-4o",
},
claude: {
text: "claude-3-5-sonnet-20241022",
image: "claude-3-5-sonnet-20241022", // Claude doesn't have native image gen
vision: "claude-3-5-sonnet-20241022",
},
gemini: {
text: "gemini-2.0-flash-exp",
image: "imagen-3.0-generate-001",
vision: "gemini-2.0-flash-exp",
},
};
type ByoRole = "text" | "image" | "vision";
type ByoProviderConfig = { provider: string; apiKey: string; baseUrl?: string; model?: string };
/**
* Build ProviderConfig from user-supplied BYOK credentials.
* Validates upstream URL (SSRF protection), normalizes baseUrl, applies defaults.
* Throws on validation failure so API route can return 400.
*/
function buildByoProviderConfig(
role: ByoRole,
byo: ByoProviderConfig,
fallback: ProviderConfig,
): ProviderConfig {
const { provider, apiKey, baseUrl } = byo;
// Validate provider
if (!["openai", "claude", "gemini"].includes(provider)) {
throw new Error(`Invalid BYO provider for ${role}: ${provider}`);
}
// Claude/Gemini cannot generate images — only OpenAI supports image generation
if (role === "image" && provider !== "openai") {
throw new Error(
`BYO provider "${provider}" does not support image generation. Use "openai" for the image role.`,
);
}
// Validate apiKey
if (!apiKey?.trim()) {
throw new Error(`Missing BYO apiKey for ${role}`);
}
// Resolve baseUrl (user-provided or provider default)
let resolvedBaseUrl = baseUrl?.trim() || PROVIDER_DEFAULTS[provider];
if (!resolvedBaseUrl) {
throw new Error(`No baseUrl for BYO ${role} provider: ${provider}`);
}
resolvedBaseUrl = normalizeBaseUrl(resolvedBaseUrl);
// SSRF protection — validates the HOST portion of the URL.
// SAFETY INVARIANT: ai-client/normalizeUrl.ts only appends PATH segments
// (e.g. /v1) but never changes the host/authority. If that invariant ever
// breaks, this check must be moved downstream or duplicated. (CR-9)
const validation = validateUpstreamUrl(resolvedBaseUrl);
if (!validation.valid) {
throw new Error(`Invalid BYO baseUrl for ${role}: ${validation.error}`);
}
// Resolve model (user-provided > provider default > official model)
const modelDefaults = MODEL_DEFAULTS[provider];
const model = byo.model?.trim() || modelDefaults?.[role] || fallback.model;
// All providers are reached via their OpenAI-compatible endpoints.
const providerProtocol: ProviderProtocol =
provider === "openai" ? "openai" : "openai_compatible";
return {
baseUrl: resolvedBaseUrl,
apiKey: apiKey.trim(),
model,
provider: providerProtocol,
};
}
/**
* Build EngineConfig with BYOK (Bring Your Own Key) overrides.
* - `byo` param contains user-provided keys from request body (StartRequest.byo / SceneRequest.byo)
* - For each role (text/image/vision), if user provided BYO config, use it; otherwise fallback to official keys
* - Validates all BYO baseUrls (SSRF protection) and throws on failure
*/
export function buildByoEngineConfig(
byo: ByoLlmKeys,
officialConfig: EngineConfig,
): EngineConfig {
return {
text: byo.text
? buildByoProviderConfig("text", byo.text, officialConfig.text)
: officialConfig.text,
image: byo.image
? buildByoProviderConfig("image", byo.image, officialConfig.image)
: officialConfig.image,
vision: byo.vision
? buildByoProviderConfig("vision", byo.vision, officialConfig.vision)
: officialConfig.vision,
tts: officialConfig.tts, // TTS BYOK stays client-side only (existing flow)
mockImage: officialConfig.mockImage,
};
}
+41
View File
@@ -0,0 +1,41 @@
import "server-only";
import { drizzle } from "drizzle-orm/d1";
import { getCloudflareContext } from "@opennextjs/cloudflare";
import * as schema from "./schema";
/**
* Get D1 database instance from Cloudflare Workers env binding.
*
* Usage in API routes:
* const db = getDb();
* const stories = await db.select().from(schema.stories).where(...);
*
* @throws Error if called outside Cloudflare Workers runtime (e.g. local dev without wrangler)
*/
export function getDb() {
try {
const { env } = getCloudflareContext();
if (!env.DB) {
throw new Error(
"D1 binding 'DB' not found. " +
"Ensure wrangler.jsonc has d1_databases configured and you're running via wrangler dev/deploy."
);
}
return drizzle(env.DB, { schema });
} catch (error) {
// Re-throw with more context for debugging
throw new Error(
`Failed to get D1 database: ${error instanceof Error ? error.message : String(error)}. ` +
"Make sure you're running in Cloudflare Workers context (wrangler dev/deploy)."
);
}
}
/**
* Type alias for the Drizzle D1 database instance.
* Useful for dependency injection and testing.
*/
export type DbInstance = ReturnType<typeof getDb>;
+45
View File
@@ -0,0 +1,45 @@
import "server-only";
import { eq, and, sql } from "drizzle-orm";
import type { DbInstance } from "../client";
import { featuredStories } from "../schema";
import type { FeaturedStory } from "../schema";
/**
* Featured Story Repository - encapsulates D1 access for homepage featured stories.
*
* Provides: listByGender (active only, sorted by sortOrder), incrementClick (analytics).
*/
export class FeaturedRepository {
constructor(private db: DbInstance) {}
/**
* List active featured stories for a given gender, ordered by sortOrder.
*
* @param gender "male" or "female"
* @returns Array of FeaturedStory (only isActive=1, sorted by sortOrder ASC)
*/
async listByGender(gender: "male" | "female"): Promise<FeaturedStory[]> {
return this.db
.select()
.from(featuredStories)
.where(and(eq(featuredStories.gender, gender), eq(featuredStories.isActive, 1)))
.orderBy(featuredStories.sortOrder);
}
/**
* Increment click count for a featured story (analytics).
*
* @param id Featured story ID (e.g. "m0", "f12")
* @returns true if updated, false if not found
*/
async incrementClick(id: string): Promise<boolean> {
const result = await this.db
.update(featuredStories)
.set({ clickCount: sql`${featuredStories.clickCount} + 1` })
.where(eq(featuredStories.id, id));
// Drizzle D1 update returns { success, meta: { changes }, results }
return ((result as any).meta?.changes ?? 0) > 0;
}
}
+308
View File
@@ -0,0 +1,308 @@
import "server-only";
import { eq, desc, sql, inArray } from "drizzle-orm";
import type { DbInstance } from "../client";
import { stories, scenes, characters } from "../schema";
import type { Session, Scene as EngineScene, Character as EngineCharacter, StoryState } from "@infiplot/types";
// ── Type Adapters ────────────────────────────────────────────────────────
/**
* Input shape for saving a story session.
* Mirrors Session but with explicit story-level fields.
*/
export type StorySaveInput = {
id: string; // Session ID
userId?: string; // nullable - Phase 1 uses anonymous sessionId
worldSetting: string;
styleGuide: string;
styleReferenceImage?: string; // data URI or R2 key (TBD in save logic)
orientation: "portrait" | "landscape";
storyState?: StoryState;
status?: "active" | "archived";
};
export type SceneSaveInput = {
id: string;
sceneKey?: string;
sceneSummary?: string;
imageUrl: string; // Runware CDN URL (primary)
beats: EngineScene["beats"]; // Beat graph - will be serialized to beatsJson
orientation?: "portrait" | "landscape";
sortOrder: number; // scene sequence in story
};
export type CharacterSaveInput = {
name: string;
visualDescription?: string;
voiceDescription?: string;
portrait?: {
url?: string;
uuid?: string;
};
voice?: EngineCharacter["voice"];
};
/**
* Story metadata for list views.
*/
export type StoryMeta = {
id: string;
userId: string | null;
worldSetting: string;
styleGuide: string;
orientation: string;
status: string;
sceneCount: number;
createdAt: Date;
updatedAt: Date;
};
/**
* Full story load result (maps back to Session structure).
*/
export type StoryLoadResult = {
story: {
id: string;
userId: string | null;
worldSetting: string;
styleGuide: string;
styleReferenceImage?: string;
orientation: "portrait" | "landscape";
storyState?: StoryState;
status: string;
createdAt: Date;
updatedAt: Date;
};
scenes: Array<{
id: string;
sceneKey?: string;
sceneSummary?: string;
imageUrl: string;
beats: EngineScene["beats"];
orientation?: "portrait" | "landscape";
sortOrder: number;
createdAt: Date;
}>;
characters: Array<{
name: string;
visualDescription?: string;
voiceDescription?: string;
portrait?: {
url?: string;
uuid?: string;
};
voice?: EngineCharacter["voice"];
}>;
};
// ── Repository ───────────────────────────────────────────────────────────
/**
* Story Repository - encapsulates D1 access for story persistence.
*
* **Atomic save**: uses D1 batch transaction to ensure all-or-nothing writes.
* **Cascade delete**: relies on schema FK ON DELETE CASCADE.
* **Serialization**: beats and storyState are JSON-serialized to TEXT columns.
*/
export class StoryRepository {
constructor(private db: DbInstance) {}
/**
* Save a complete story session (story + scenes + characters) atomically.
* Uses D1 batch transaction - all writes succeed or all fail.
*
* @param input Story metadata
* @param sceneInputs Scene list (beats will be serialized)
* @param characterInputs Character list (voice will be serialized)
* @returns storyId on success
* @throws Error if D1 transaction fails
*/
async save(
input: StorySaveInput,
sceneInputs: SceneSaveInput[],
characterInputs: CharacterSaveInput[],
): Promise<{ storyId: string }> {
const now = new Date();
// Build story record
const storyRecord = {
id: input.id,
userId: input.userId ?? null,
worldSetting: input.worldSetting,
styleGuide: input.styleGuide,
styleReferenceImageKey: input.styleReferenceImage ?? null, // Phase 1: store data URI as-is; R2 upload TBD
orientation: input.orientation,
storyStateJson: input.storyState ? JSON.stringify(input.storyState) : null,
status: input.status ?? "active",
createdAt: now,
updatedAt: now,
};
// Build scene records (serialize beats to JSON)
const sceneRecords = sceneInputs.map((s, idx) => ({
id: s.id,
storyId: input.id,
sceneKey: s.sceneKey ?? null,
sceneSummary: s.sceneSummary ?? null,
sceneImageKey: null, // Phase 1: R2 upload TBD
sceneImageUrl: s.imageUrl,
beatsJson: JSON.stringify(s.beats),
sortOrder: s.sortOrder ?? idx,
createdAt: now,
}));
// Build character records (serialize voice to JSON, ensure uniqueness per story+name)
const characterRecords = characterInputs.map((c, idx) => ({
id: `${input.id}_char_${idx}`, // synthetic ID
storyId: input.id,
name: c.name,
visualDescription: c.visualDescription ?? null,
voiceDescription: c.voiceDescription ?? null,
basePortraitKey: null, // Phase 1: R2 upload TBD
basePortraitUrl: c.portrait?.url ?? null,
basePortraitUuid: c.portrait?.uuid ?? null,
voiceJson: c.voice ? JSON.stringify(c.voice) : null,
createdAt: now,
}));
// Execute atomic batch transaction
await this.db.batch([
this.db.insert(stories).values(storyRecord).onConflictDoUpdate({
target: stories.id,
set: {
worldSetting: storyRecord.worldSetting,
styleGuide: storyRecord.styleGuide,
styleReferenceImageKey: storyRecord.styleReferenceImageKey,
orientation: storyRecord.orientation,
storyStateJson: storyRecord.storyStateJson,
status: storyRecord.status,
updatedAt: now,
},
}),
// Clear old scenes/characters (will cascade delete via FK)
this.db.delete(scenes).where(eq(scenes.storyId, input.id)),
this.db.delete(characters).where(eq(characters.storyId, input.id)),
// Insert new scenes/characters
...sceneRecords.map((r) => this.db.insert(scenes).values(r)),
...characterRecords.map((r) => this.db.insert(characters).values(r)),
]);
return { storyId: input.id };
}
/**
* Load a complete story by ID, reconstructing Session shape.
*
* @param storyId Story primary key
* @returns StoryLoadResult with deserialized beats/storyState, or null if not found
*/
async findById(storyId: string): Promise<StoryLoadResult | null> {
const [storyRow] = await this.db
.select()
.from(stories)
.where(eq(stories.id, storyId))
.limit(1);
if (!storyRow) return null;
const sceneRows = await this.db
.select()
.from(scenes)
.where(eq(scenes.storyId, storyId))
.orderBy(scenes.sortOrder);
const characterRows = await this.db
.select()
.from(characters)
.where(eq(characters.storyId, storyId));
return {
story: {
id: storyRow.id,
userId: storyRow.userId,
worldSetting: storyRow.worldSetting,
styleGuide: storyRow.styleGuide,
styleReferenceImage: storyRow.styleReferenceImageKey ?? undefined,
orientation: storyRow.orientation as "portrait" | "landscape",
storyState: storyRow.storyStateJson
? (JSON.parse(storyRow.storyStateJson) as StoryState)
: undefined,
status: storyRow.status,
createdAt: storyRow.createdAt,
updatedAt: storyRow.updatedAt,
},
scenes: sceneRows.map((s) => ({
id: s.id,
sceneKey: s.sceneKey ?? undefined,
sceneSummary: s.sceneSummary ?? undefined,
imageUrl: s.sceneImageUrl ?? "", // CR-5: nullable column, fallback to empty string
beats: s.beatsJson ? JSON.parse(s.beatsJson) : [],
orientation: s.sceneImageUrl ? undefined : undefined, // Phase 1: no per-scene orientation in schema
sortOrder: s.sortOrder,
createdAt: s.createdAt,
})),
characters: characterRows.map((c) => ({
name: c.name,
visualDescription: c.visualDescription ?? undefined,
voiceDescription: c.voiceDescription ?? undefined,
portrait: c.basePortraitUrl
? { url: c.basePortraitUrl, uuid: c.basePortraitUuid ?? undefined }
: undefined,
voice: c.voiceJson ? JSON.parse(c.voiceJson) : undefined,
})),
};
}
/**
* List story metadata for a given user, ordered by most recent first.
*
* @param userId User ID (or anonymous sessionId in Phase 1)
* @param limit Max stories to return (default 50)
* @returns Array of StoryMeta
*/
async listByUser(userId: string, limit = 50): Promise<StoryMeta[]> {
const storyRows = await this.db
.select()
.from(stories)
.where(eq(stories.userId, userId))
.orderBy(desc(stories.updatedAt))
.limit(limit);
if (storyRows.length === 0) return [];
// CR-10: batch scene count in 2 queries total (not N+1)
const storyIds = storyRows.map((r) => r.id);
const countRows = await this.db
.select({ storyId: scenes.storyId, count: sql<number>`count(*)` })
.from(scenes)
.where(inArray(scenes.storyId, storyIds))
.groupBy(scenes.storyId);
const countMap = new Map(countRows.map((r) => [r.storyId, r.count]));
return storyRows.map((row) => ({
id: row.id,
userId: row.userId,
worldSetting: row.worldSetting,
styleGuide: row.styleGuide,
orientation: row.orientation,
status: row.status,
sceneCount: countMap.get(row.id) ?? 0,
createdAt: row.createdAt,
updatedAt: row.updatedAt,
}));
}
/**
* Delete a story and all associated scenes/characters (cascade via FK).
*
* @param storyId Story primary key
* @returns true if deleted, false if not found
*/
async delete(storyId: string): Promise<boolean> {
const result = await this.db.delete(stories).where(eq(stories.id, storyId));
// Drizzle D1 delete returns { success, meta: { changes }, results }
return ((result as any).meta?.changes ?? 0) > 0;
}
}
+123
View File
@@ -0,0 +1,123 @@
import { sqliteTable, text, integer, index, uniqueIndex } from "drizzle-orm/sqlite-core";
import { sql } from "drizzle-orm";
// ── Stories ──────────────────────────────────────────────────────────────
// User story sessions (REQ-4). Each story contains multiple scenes and characters.
export const stories = sqliteTable(
"stories",
{
id: text("id").primaryKey(), // s_xxx session ID
userId: text("user_id"), // nullable - Phase 1 uses anonymous sessionId
worldSetting: text("world_setting").notNull(),
styleGuide: text("style_guide").notNull(),
styleReferenceImageKey: text("style_reference_image_key"), // R2 key (optional)
orientation: text("orientation").notNull().default("landscape"), // "portrait" | "landscape"
storyStateJson: text("story_state_json"), // JSON: StoryState
status: text("status").notNull().default("active"), // "active" | "archived"
createdAt: integer("created_at", { mode: "timestamp" })
.notNull()
.default(sql`(unixepoch())`),
updatedAt: integer("updated_at", { mode: "timestamp" })
.notNull()
.default(sql`(unixepoch())`)
.$onUpdate(() => new Date()),
},
(table) => ({
userIdIdx: index("stories_user_id_idx").on(table.userId),
createdAtIdx: index("stories_created_at_idx").on(table.createdAt),
}),
);
// ── Scenes ───────────────────────────────────────────────────────────────
// Story scenes (REQ-4). Beats stored as JSON blob (not separate table).
export const scenes = sqliteTable(
"scenes",
{
id: text("id").primaryKey(),
storyId: text("story_id")
.notNull()
.references(() => stories.id, { onDelete: "cascade" }),
sceneKey: text("scene_key"), // e.g. "classroom-dusk"
sceneSummary: text("scene_summary"),
sceneImageKey: text("scene_image_key"), // R2 key (optional)
sceneImageUrl: text("scene_image_url"), // Runware CDN URL (primary)
beatsJson: text("beats_json"), // JSON: Beat[] - whole scene beats graph
sortOrder: integer("sort_order").notNull(), // scene sequence in story
createdAt: integer("created_at", { mode: "timestamp" })
.notNull()
.default(sql`(unixepoch())`),
},
(table) => ({
storyIdIdx: index("scenes_story_id_idx").on(table.storyId),
}),
);
// ── Characters ───────────────────────────────────────────────────────────
// Story characters (REQ-4). Each character belongs to a story.
export const characters = sqliteTable(
"characters",
{
id: text("id").primaryKey(),
storyId: text("story_id")
.notNull()
.references(() => stories.id, { onDelete: "cascade" }),
name: text("name").notNull(),
visualDescription: text("visual_description"),
voiceDescription: text("voice_description"),
basePortraitKey: text("base_portrait_key"), // R2 key (optional)
basePortraitUrl: text("base_portrait_url"), // CDN URL (primary)
basePortraitUuid: text("base_portrait_uuid"), // image service UUID
voiceJson: text("voice_json"), // JSON: CharacterVoice
createdAt: integer("created_at", { mode: "timestamp" })
.notNull()
.default(sql`(unixepoch())`),
},
(table) => ({
storyNameIdx: uniqueIndex("characters_story_name_idx").on(
table.storyId,
table.name,
),
}),
);
// ── Featured Stories ─────────────────────────────────────────────────────
// Featured story cards displayed on homepage (REQ-5).
export const featuredStories = sqliteTable(
"featured_stories",
{
id: text("id").primaryKey(), // e.g. "m0", "f12"
gender: text("gender").notNull(), // "male" | "female"
title: text("title").notNull(),
outline: text("outline").notNull(),
style: text("style").notNull(),
tags: text("tags").notNull(), // JSON array
coverPath: text("cover_path").notNull(), // e.g. "/home/m0.webp"
firstactPath: text("firstact_path").notNull(), // e.g. "/home/firstact/m0.json"
firstscenePath: text("firstscene_path"), // e.g. "/home/firstscene/m0.webp"
sortOrder: integer("sort_order").notNull().default(0),
isActive: integer("is_active").notNull().default(1), // 1 = active, 0 = inactive
clickCount: integer("click_count").notNull().default(0),
createdAt: integer("created_at", { mode: "timestamp" })
.notNull()
.default(sql`(unixepoch())`),
},
(table) => ({
genderActiveIdx: index("featured_gender_active_idx").on(
table.gender,
table.isActive,
),
}),
);
// ── Type exports ─────────────────────────────────────────────────────────
export type Story = typeof stories.$inferSelect;
export type NewStory = typeof stories.$inferInsert;
export type Scene = typeof scenes.$inferSelect;
export type NewScene = typeof scenes.$inferInsert;
export type Character = typeof characters.$inferSelect;
export type NewCharacter = typeof characters.$inferInsert;
export type FeaturedStory = typeof featuredStories.$inferSelect;
export type NewFeaturedStory = typeof featuredStories.$inferInsert;
-90
View File
@@ -1,90 +0,0 @@
import { chat } from "@infiplot/ai-client";
import type { ProviderConfig, Session, StoryState } from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import { ARCHITECT_SYSTEM, buildArchitectUserMessage } from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Architect agent — ONE LLM call at session start.
//
// Expands the user's (often terse) world + style prompt into a real story
// bible: a second-person protagonist with a want and a flaw, a single
// central dramatic question (logline), a genre frame that anchors the
// 爽点 rhythm, an engineered cold-open for scene 1 (nextHook), and a small
// intentional cast. Seeds the StoryState that the Writer reads and updates
// every scene — so the story has a spine from beat one instead of being
// improvised cold.
//
// Everything is best-effort coerced with fallbacks: a malformed LLM
// response can never abort session start — worst case the Writer just gets
// a thinner bible and improvises more.
// ──────────────────────────────────────────────────────────────────────
type RawStoryState = {
logline?: unknown;
genreTags?: unknown;
protagonist?: unknown;
castNotes?: unknown;
synopsis?: unknown;
openThreads?: unknown;
relationships?: unknown;
nextHook?: unknown;
};
function str(raw: unknown): string {
return typeof raw === "string" ? raw.trim() : "";
}
function strArray(raw: unknown): string[] | undefined {
if (!Array.isArray(raw)) return undefined;
const out = raw
.map((x) => (typeof x === "string" ? x.trim() : ""))
.filter((x) => x.length > 0);
return out.length > 0 ? out : undefined;
}
export async function runArchitect(
config: ProviderConfig,
session: Session,
): Promise<StoryState> {
try {
const raw = await chat(
config,
[
{ role: "system", content: ARCHITECT_SYSTEM },
{ role: "user", content: buildArchitectUserMessage(session) },
],
{ temperature: 0.85, tag: "architect" },
);
const parsed = parseJsonLoose<RawStoryState>(raw);
return {
// Stable spine — fall back to the raw world/style prompt so the bible is
// never wholly empty even if the model returns garbage.
logline: str(parsed.logline) || session.worldSetting,
genreTags: str(parsed.genreTags),
protagonist:
str(parsed.protagonist) ||
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
castNotes: str(parsed.castNotes) || undefined,
// Volatile seeds — the opening Writer will rewrite these via its patch.
synopsis: str(parsed.synopsis) || "故事即将开始。",
openThreads: strArray(parsed.openThreads),
relationships: strArray(parsed.relationships),
nextHook: str(parsed.nextHook) || undefined,
};
} catch (err) {
// chat() or parseJsonLoose() can throw (network / unrepairable JSON).
// The Architect is best-effort: never let it abort session start — return
// a minimal bible seeded from the raw prompt and let the Writer improvise.
const msg = err instanceof Error ? err.message : String(err);
console.error(`[architect] failed, using minimal bible: ${msg}`);
return {
logline: session.worldSetting,
genreTags: "",
protagonist:
"你是这个故事的主角(第二人称视角,永不出现在画面里)。",
synopsis: "故事即将开始。",
};
}
}
+14 -3
View File
@@ -7,6 +7,7 @@ import {
} from "@infiplot/tts-client";
import type {
Character,
CharacterIntent,
CharacterVoice,
EngineConfig,
Session,
@@ -55,6 +56,7 @@ async function runDesignLLM(
config: EngineConfig,
session: Session,
charName: string,
intent?: CharacterIntent,
): Promise<CharacterDesignOutput> {
const raw = await chat(
config.text,
@@ -62,12 +64,20 @@ async function runDesignLLM(
{ role: "system", content: buildCharacterDesignerSystem({ stepfun: stepfunEnabled(config) }) },
{
role: "user",
content: buildCharacterDesignerUserMessage(charName, session),
content: buildCharacterDesignerUserMessage(charName, session, intent),
},
],
{ temperature: 0.7, tag: "character-designer" },
);
return parseJsonLoose<CharacterDesignOutput>(raw);
// parseJsonLoose can throw on irreparable JSON; degrade to an empty card so
// designCharacterCard's fallbacks (name-inference voice, no portrait) kick in.
try {
return parseJsonLoose<CharacterDesignOutput>(raw);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[characterDesigner] design JSON parse failed for ${charName}: ${msg}`);
return {};
}
}
/** True when the server's TTS config points at StepFun (so the CharacterDesigner
@@ -155,9 +165,10 @@ export async function designCharacterCard(
config: EngineConfig,
session: Session,
charName: string,
intent?: CharacterIntent,
): Promise<CharacterCard> {
const tDesign = Date.now();
const design = await runDesignLLM(config, session, charName);
const design = await runDesignLLM(config, session, charName, intent);
tlog(`[charDesigner ${charName}] design LLM`, tDesign);
// Drop invalid catalog picks before they reach provision/synth. A hallucinated
+161 -113
View File
@@ -1,22 +1,19 @@
import { chat } from "@infiplot/ai-client";
import { chatStream } from "@infiplot/ai-client";
import type {
Beat,
BeatActiveCharacter,
BeatChoice,
BeatChoiceEffect,
BeatNext,
ChatStreamResult,
ProviderConfig,
Session,
StoryStatePatch,
WriterPlan,
WriterScenePlan,
} from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
import {
WRITER_BEATS_SYSTEM,
WRITER_PLAN_SYSTEM,
buildWriterBeatsUserMessage,
buildWriterPlanUserMessage,
} from "../prompts";
import { buildWriterStreamMessages } from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// Writer agent — owns the narrative half of scene generation, in TWO phases.
@@ -353,8 +350,9 @@ function coerceStringArray(raw: unknown): string[] | undefined {
// Pull the volatile story-memory rewrite out of the Writer's JSON. Only
// non-empty fields are kept; an all-empty/absent patch returns undefined so
// the director leaves the carried StoryState untouched.
function coerceStoryStatePatch(
// the director leaves the carried StoryState untouched. Exported so the
// prose splitter can reuse it to parse the <story> segment's <memory> block.
export function coerceStoryStatePatch(
raw: RawStoryStatePatch | undefined,
): StoryStatePatch | undefined {
if (!raw || typeof raw !== "object") return undefined;
@@ -409,110 +407,7 @@ function renameBeatId(beats: Beat[], from: string, to: string): Beat[] {
});
}
// ── Phase A — plan the scene skeleton. Fast (small output): just enough for
// the Cinematographer + character design + Painter to start before the
// dialogue exists. The cast is unioned with the entry roster/speaker so a
// character named in the entry but omitted from `cast` still gets designed.
export async function runWriterPlan(
config: ProviderConfig,
session: Session,
): Promise<WriterPlan> {
const raw = await chat(
config,
[
{ role: "system", content: WRITER_PLAN_SYSTEM },
{ role: "user", content: buildWriterPlanUserMessage(session) },
],
{ temperature: 0.9, tag: "writer-plan" },
);
const parsed = parseJsonLoose<RawPlan>(raw);
const entryActiveCharacters =
coerceActiveCharacters(parsed.entryActiveCharacters) ?? [];
// Normalize POV variants → "你"; NPC names pass through. "你" is a valid entry
// speaker (Pattern B — player talking), but is never a designed cast member.
const rawEntrySpeaker = parsed.entrySpeaker?.trim() || undefined;
const entrySpeaker = rawEntrySpeaker
? normalizeSpeakerName(rawEntrySpeaker)
: undefined;
const cast = coerceCast(parsed.cast);
const castSet = new Set(cast);
const addToCast = (name: string): void => {
if (!isPovName(name) && !castSet.has(name)) {
castSet.add(name);
cast.push(name);
}
};
for (const c of entryActiveCharacters) addToCast(c.name);
if (entrySpeaker) addToCast(entrySpeaker);
return {
sceneSummary: parsed.sceneSummary?.trim() || "未指定场景概要",
sceneKey: normalizeSceneKey(parsed.sceneKey),
entryBeatId: parsed.entryBeatId?.trim() || "b1",
cast,
entryActiveCharacters,
entrySpeaker,
};
}
// ── Phase B — expand the plan into the full beats[] graph + storyStatePatch.
// Overlapped with the image pipeline by the director. The plan's entry id is
// pinned onto a real beat so the already-painted entry frame resolves.
export async function runWriterBeats(
config: ProviderConfig,
session: Session,
plan: WriterPlan,
): Promise<WriterBeatsOutput> {
const raw = await chat(
config,
[
{ role: "system", content: WRITER_BEATS_SYSTEM },
{ role: "user", content: buildWriterBeatsUserMessage(session, plan) },
],
{ temperature: 0.9, tag: "writer-beats" },
);
const parsed = parseJsonLoose<RawBeats>(raw);
const rawBeats = Array.isArray(parsed.beats) ? parsed.beats : [];
if (rawBeats.length === 0) {
throw new Error("Writer (beats) returned no beats");
}
let beats = ensureUniqueChoiceIds(
repairBeats(
ensureUniqueBeatIds(
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
),
),
);
// The Painter already composed the entry frame from plan.entryBeatId + its
// roster, so the scene's entry MUST resolve to that id. If Phase B ignored
// it, rename the first beat to it (no collision — id is absent by the guard).
if (!beats.some((b) => b.id === plan.entryBeatId)) {
beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
}
// 把入场 beat 的 roster 钉成 plan 的:画师合成进帧的正是
// plan.entryActiveCharacters,运行时入场 beat 必须显示同一批人(与上面钉
// id 同理)。speaker 故意不钉——它和 line/TTS 耦合,强行覆盖会错配台词。
const entryRoster =
plan.entryActiveCharacters.length > 0 ? plan.entryActiveCharacters : undefined;
beats = beats.map((b) =>
b.id === plan.entryBeatId ? { ...b, activeCharacters: entryRoster } : b,
);
return {
beats,
storyStatePatch: coerceStoryStatePatch(parsed.storyStatePatch),
};
}
// Phase B fallback — when runWriterBeats fails entirely, keep the scene
// Fallback — when the Writer stream fails to yield usable beats, keep the scene
// playable with a single entry beat synthesized from the plan: narrate the
// planned summary and offer one change-scene exit so the player can advance.
export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {
@@ -532,3 +427,156 @@ export function synthesizeFallbackBeats(plan: WriterPlan): Beat[] {
// Re-export POV constants for downstream filters (director's orphan voices).
export { POV_DISPLAY_NAME, POV_VARIANTS, isPovName, normalizeSpeakerName };
// ──────────────────────────────────────────────────────────────────────
// Paradigm D — single-pass streaming Writer
// ──────────────────────────────────────────────────────────────────────
/**
* Streaming Writer: single LLM call producing `<plan>/<story>/<choices>`
* tagged output. The caller (director) feeds the textStream to StreamRouter
* which dispatches downstream agents as tags close.
*
* Uses `chatStream` (Task 2) + `buildWriterStreamUserMessage` (ContextProvider).
* Temperature and tag mirror the existing chat() calls.
*/
export function runWriterStream(
config: ProviderConfig,
session: Session,
): ChatStreamResult {
return chatStream(
config,
buildWriterStreamMessages(session),
{ temperature: 0.9, tag: "writer-stream" },
);
}
/**
* Coerce a raw parsed plan (from StreamRouter's `<plan>` segment) into a
* clean WriterScenePlan. Reuses the existing Phase A coercion pipeline.
*/
export function coercePlanFromRaw(raw: Record<string, unknown>): WriterScenePlan {
const entryActiveCharacters =
coerceActiveCharacters(raw.entryActiveCharacters as RawActiveCharacter[]) ?? [];
const rawEntrySpeaker =
typeof raw.entrySpeaker === "string" ? raw.entrySpeaker.trim() : undefined;
const entrySpeaker = rawEntrySpeaker
? normalizeSpeakerName(rawEntrySpeaker)
: undefined;
const cast = coerceCast(raw.cast);
const castSet = new Set(cast);
const addToCast = (name: string): void => {
if (!isPovName(name) && !castSet.has(name)) {
castSet.add(name);
cast.push(name);
}
};
for (const c of entryActiveCharacters) addToCast(c.name);
if (entrySpeaker) addToCast(entrySpeaker);
const characterIntents = Array.isArray(raw.characterIntents)
? (raw.characterIntents as Array<Record<string, unknown>>)
.filter((ci) => typeof ci.name === "string" && (ci.name as string).trim())
.map((ci) => ({
name: (ci.name as string).trim(),
mood: typeof ci.mood === "string" ? ci.mood.trim() || undefined : undefined,
motivation:
typeof ci.motivation === "string"
? ci.motivation.trim() || undefined
: undefined,
speakingTone:
typeof ci.speakingTone === "string"
? ci.speakingTone.trim() || undefined
: undefined,
}))
: undefined;
// Story bible — first scene only. The Writer's <plan> includes a storyBible
// sub-object on the opening scene (replacing the old Architect call). Absent
// on subsequent scenes (the carried StoryState stays authoritative).
const rawBible = raw.storyBible as Record<string, unknown> | undefined;
let storyBible: WriterScenePlan["storyBible"];
if (rawBible && typeof rawBible === "object") {
const logline = typeof rawBible.logline === "string" ? rawBible.logline.trim() : "";
const genreTags = typeof rawBible.genreTags === "string" ? rawBible.genreTags.trim() : "";
const protagonist =
typeof rawBible.protagonist === "string" ? rawBible.protagonist.trim() : "";
const castNotes =
typeof rawBible.castNotes === "string" ? rawBible.castNotes.trim() || undefined : undefined;
// Only treat it as a real bible if at least one core field is present.
if (logline || genreTags || protagonist) {
storyBible = { logline, genreTags, protagonist, castNotes };
}
}
return {
sceneSummary:
typeof raw.sceneSummary === "string"
? raw.sceneSummary.trim() || "未指定场景概要"
: "未指定场景概要",
sceneKey: normalizeSceneKey(
typeof raw.sceneKey === "string" ? raw.sceneKey : undefined,
),
entryBeatId:
typeof raw.entryBeatId === "string"
? raw.entryBeatId.trim() || "b1"
: "b1",
cast,
entryActiveCharacters,
entrySpeaker,
characterIntents,
storyBible,
};
}
/**
* Coerce raw beats into clean Beat[] + optional StoryStatePatch. Called by
* proseSplitter (散文→RawBeat[]) and as fallback for degraded streams.
* Reuses the full pipeline: coerceBeat → ensureUniqueBeatIds → repairBeats →
* ensureUniqueChoiceIds → entry-id pinning.
*/
export function coerceBeatsFromRaw(
raw: unknown,
plan: WriterScenePlan,
): WriterBeatsOutput {
// Input can be a bare RawBeat[] or { beats, storyStatePatch } wrapper.
let rawBeats: RawBeat[] = [];
let rawPatch: RawStoryStatePatch | undefined;
if (Array.isArray(raw)) {
rawBeats = raw;
} else if (raw && typeof raw === "object") {
const obj = raw as Record<string, unknown>;
rawBeats = Array.isArray(obj.beats) ? (obj.beats as RawBeat[]) : [];
rawPatch = obj.storyStatePatch as RawStoryStatePatch | undefined;
}
if (rawBeats.length === 0) {
return { beats: synthesizeFallbackBeats(plan), storyStatePatch: undefined };
}
let beats = ensureUniqueChoiceIds(
repairBeats(
ensureUniqueBeatIds(
rawBeats.map((b, i) => coerceBeat(b, i, rawBeats.length)),
),
),
);
if (!beats.some((b) => b.id === plan.entryBeatId)) {
beats = renameBeatId(beats, beats[0]!.id, plan.entryBeatId);
}
const entryRoster =
plan.entryActiveCharacters.length > 0 ? plan.entryActiveCharacters : undefined;
beats = beats.map((b) =>
b.id === plan.entryBeatId ? { ...b, activeCharacters: entryRoster } : b,
);
return {
beats,
storyStatePatch: coerceStoryStatePatch(rawPatch),
};
}
+290
View File
@@ -0,0 +1,290 @@
import type { Session, Character } from "@infiplot/types";
import {
renderStoryStateSpine,
renderStoryStateDynamic,
renderHistoryEntry,
} from "../prompts";
// ──────────────────────────────────────────────────────────────────────
// ContextProvider — data-driven segment registry.
//
// Replaces the monolithic `buildWriterContextParts` (prompts.ts:425)
// with a registered list of segments, each rendered independently.
//
// Invariants:
// - **SENTINEL append-only**: character-cards / sceneKeys / archived-
// history use a fixed header + "entries follow" sentinel line. Adding
// a character only APPENDS bytes; earlier bytes never shift. This is
// crucial for prompt prefix caching.
// - **stable / dynamic split**: stable segments form the cached prefix;
// dynamic segments are the suffix that changes every call. Mixing them
// would destroy cache hit rate.
// - **try/catch isolation**: a failing segment is skipped, not fatal.
// ──────────────────────────────────────────────────────────────────────
export type ContextSegment = {
id: string;
zone: "stable" | "dynamic";
order: number;
render: (session: Session) => string[];
};
// ── Stable segments ─────────────────────────────────────────────────
const worldAndStyle: ContextSegment = {
id: "world-style",
zone: "stable",
order: 100,
render: (session) => {
const parts: string[] = [];
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.playerName) {
parts.push(
`玩家名字:${session.playerName}(NPC 对话时用此名字称呼玩家;speaker 字段仍固定为 "你" 不变)`,
);
}
return parts;
},
};
const storySpine: ContextSegment = {
id: "story-spine",
zone: "stable",
order: 200,
render: (session) => [renderStoryStateSpine(session.storyState)],
};
function renderCharacterCard(c: Character): string[] {
const hasPersona =
c.persona || c.speakingStyle || c.sampleDialogue?.length || c.relationshipToPlayer;
if (!hasPersona) return [`- ${c.name}`];
const lines: string[] = [`- ${c.name}`];
if (c.persona) lines.push(` 设定:${c.persona}`);
if (c.personalityTraits?.length)
lines.push(` 性格:${c.personalityTraits.join("、")}`);
if (c.speakingStyle) lines.push(` 说话风格:${c.speakingStyle}`);
if (c.sampleDialogue?.length) {
lines.push(` 对白示例:`);
for (const d of c.sampleDialogue) lines.push(`${d}`);
}
if (c.relationshipToPlayer)
lines.push(` 与玩家关系:${c.relationshipToPlayer}`);
return lines;
}
const characterCards: ContextSegment = {
id: "character-cards",
zone: "stable",
order: 300,
render: (session) => {
// SENTINEL: header + marker are byte-identical even when the list is
// empty. Adding a character only APPENDS bytes — never shifts earlier.
const parts: string[] = [];
parts.push("已登记角色(speaker 必须用这些名字之一,或本场景新引入):");
parts.push("(以下每行一个已登记角色,开场前为空。)");
for (const c of session.characters) {
parts.push(...renderCharacterCard(c));
}
return parts;
},
};
function collectPriorSceneKeys(session: Session): string[] {
const seen = new Set<string>();
for (const entry of session.history) {
const k = entry.scene.sceneKey;
if (k) seen.add(k);
}
return Array.from(seen);
}
const priorSceneKeys: ContextSegment = {
id: "prior-sceneKeys",
zone: "stable",
order: 400,
render: (session) => {
// SENTINEL pattern — same rationale as character-cards.
const parts: string[] = [];
parts.push("已使用的 sceneKey(同一物理空间请沿用,不要新造):");
parts.push("(以下每行一个已用过的 sceneKey,开场前为空。)");
for (const k of collectPriorSceneKeys(session)) parts.push(`- ${k}`);
return parts;
},
};
const archivedHistory: ContextSegment = {
id: "archived-history",
zone: "stable",
order: 500,
render: (session) => {
// Only history[0..N-2] — the last entry is live (visitedBeatIds still
// growing, speculative prefetch sees different snapshots). Putting it
// here would corrupt prefix cache.
const archived = session.history.slice(0, -1);
const parts: string[] = [];
parts.push("场景历史(按时间顺序,已完结):");
parts.push("(以下每段一幕已完结的场景,开场前为空。)");
archived.forEach((entry, idx) => {
parts.push(renderHistoryEntry(entry, idx + 1));
});
return parts;
},
};
const loreConstant: ContextSegment = {
id: "lore-constant",
zone: "stable",
order: 600,
render: (session) => {
if (!session.worldBooks?.length) return [];
const constant = session.worldBooks
.flatMap((book) => book.entries.filter((e) => e.position === "constant"))
.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0))
.map((e) => e.content);
if (!constant.length) return [];
return [
"【世界设定 · 恒定知识】",
...constant.map((c) => `- ${c}`),
];
},
};
// ── Dynamic segments ────────────────────────────────────────────────
const storyDynamic: ContextSegment = {
id: "story-dynamic",
zone: "dynamic",
order: 100,
render: (session) => [renderStoryStateDynamic(session.storyState)],
};
const lastBeat: ContextSegment = {
id: "last-beat",
zone: "dynamic",
order: 200,
render: (session) => {
const last = session.history.at(-1);
if (!last) return [];
const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
const beat = last.scene.beats.find((b) => b.id === lastBeatId);
if (!beat) return [];
const frag: string[] = [];
if (beat.narration) frag.push(`旁白:${beat.narration}`);
if (beat.line) frag.push(`${beat.speaker ?? "?"}${beat.line}`);
if (!frag.length) return [];
return [
`上一刻(玩家停留的最后一个画面,新场景从这里的情绪无缝承接):\n ${frag.join(" / ")}`,
];
},
};
const transitionHint: ContextSegment = {
id: "transition-hint",
zone: "dynamic",
order: 300,
render: (session) => {
if (session.history.length === 0) {
return [
"这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场设计出来——开场即抓人,别花笔墨铺垫世界观。",
];
}
const last = session.history.at(-1);
const lastExit = last?.exit;
if (lastExit) {
if (lastExit.kind === "choice") {
return [
`承接「玩家在上一场选择了:${lastExit.label}」无缝续写下一个场景(转场命题:${lastExit.nextSceneSeed})。开场要让玩家感到这正是上一步的结果,并延续此刻的情绪。`,
];
}
return [
`承接「玩家自由动作:${lastExit.action}」无缝续写下一个场景,延续此刻的情绪与处境。`,
];
}
return ["无缝续写下一个场景,延续上一刻的情绪。"];
},
};
const loreTriggered: ContextSegment = {
id: "lore-triggered",
zone: "dynamic",
order: 400,
render: (session) => {
if (!session.worldBooks?.length) return [];
const lastBeatText = getLastBeatText(session);
const triggered = session.worldBooks
.flatMap((book) => book.entries.filter((e) => e.position === "triggered"))
.filter((e) => e.keys.some((key) => lastBeatText.includes(key)))
.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0))
.map((e) => e.content);
if (!triggered.length) return [];
return [
"【世界设定 · 情境激活】",
...triggered.map((t) => `- ${t}`),
];
},
};
/** Extract text from the last 3 beats for keyword matching (≤5000 chars). */
function getLastBeatText(session: Session): string {
if (!session.history.length) return "";
const lastEntry = session.history[session.history.length - 1];
if (!lastEntry) return "";
const scene = lastEntry.scene;
const beats = scene?.beats || [];
const lastN = beats.slice(-3);
const text = lastN
.map((b) => [b.narration, b.line].filter(Boolean).join(" "))
.join(" ");
return text.slice(0, 5000);
}
// ── Registry ────────────────────────────────────────────────────────
const defaultSegments: ContextSegment[] = [
worldAndStyle,
storySpine,
characterCards,
priorSceneKeys,
archivedHistory,
loreConstant,
storyDynamic,
lastBeat,
transitionHint,
loreTriggered,
];
export function buildWriterContext(
session: Session,
segments: ContextSegment[] = defaultSegments,
): { stableParts: string[]; dynamicParts: string[] } {
const stable = segments
.filter((s) => s.zone === "stable")
.sort((a, b) => a.order - b.order);
const dynamic = segments
.filter((s) => s.zone === "dynamic")
.sort((a, b) => a.order - b.order);
const stableParts: string[] = [];
for (const seg of stable) {
try {
stableParts.push(...seg.render(session));
stableParts.push("");
} catch (err) {
console.warn(`[ContextProvider] segment "${seg.id}" render failed, skipped:`, err);
}
}
const dynamicParts: string[] = [];
for (const seg of dynamic) {
try {
dynamicParts.push(...seg.render(session));
dynamicParts.push("");
} catch (err) {
console.warn(`[ContextProvider] segment "${seg.id}" render failed, skipped:`, err);
}
}
return { stableParts, dynamicParts };
}
+227 -100
View File
@@ -2,15 +2,18 @@ import { chat } from "@infiplot/ai-client";
import { coerceOrientation } from "@infiplot/types";
import type {
Beat,
BeatChoice,
Character,
CharacterIntent,
EngineConfig,
InsertBeatPartial,
ProviderConfig,
Scene,
SceneStreamEvent,
Session,
StoryState,
StoryStatePatch,
WriterPlan,
WriterScenePlan,
} from "@infiplot/types";
import type { CharacterCard } from "./agents/characterDesigner";
import {
@@ -23,13 +26,14 @@ import { runCinematographer } from "./agents/cinematographer";
import { runPainter } from "./agents/painter";
import type { WriterBeatsOutput } from "./agents/writer";
import {
coercePlanFromRaw,
isPovName,
normalizeSpeakerName,
POV_DISPLAY_NAME,
runWriterBeats,
runWriterPlan,
synthesizeFallbackBeats,
runWriterStream,
} from "./agents/writer";
import { routeTaggedStream } from "./stream";
import { splitProseToBeats } from "./stream/proseSplitter";
import { parseJsonLoose } from "./jsonParser";
import { INSERT_BEAT_SYSTEM, buildInsertBeatUserMessage } from "./prompts";
@@ -97,6 +101,14 @@ export function mergeCharacters(
basePortraitUrl: u.basePortraitUrl ?? prev.basePortraitUrl,
basePortraitUuid: u.basePortraitUuid ?? prev.basePortraitUuid,
voiceDescription: u.voiceDescription || prev.voiceDescription,
// Paradigm D: preserve persona fields when later designs omit them
// (same logic as portrait/voice preservation).
persona: u.persona ?? prev.persona,
personalityTraits: u.personalityTraits ?? prev.personalityTraits,
speakingStyle: u.speakingStyle ?? prev.speakingStyle,
sampleDialogue: u.sampleDialogue ?? prev.sampleDialogue,
relationshipToPlayer: u.relationshipToPlayer ?? prev.relationshipToPlayer,
secrets: u.secrets ?? prev.secrets,
});
}
return Array.from(byName.values());
@@ -157,6 +169,19 @@ export type SceneResult = {
storyState: StoryState;
};
// Absolute-worst-case plan when the stream produced no usable <plan> at all
// (StreamRouter degraded with no extractable plan). Keeps the pipeline alive.
function minimalFallbackPlan(): WriterScenePlan {
return {
sceneSummary: "未指定场景概要",
sceneKey: undefined,
entryBeatId: "b1",
cast: [],
entryActiveCharacters: [],
entrySpeaker: undefined,
};
}
// ──────────────────────────────────────────────────────────────────────
// directScene — the multi-agent pipeline. Used by orchestrator's
// startSession and requestScene.
@@ -165,48 +190,89 @@ export type SceneResult = {
export async function directScene(
config: EngineConfig,
session: Session,
emit?: (event: SceneStreamEvent) => void,
): Promise<SceneResult> {
const tTotal = Date.now();
// ── Phase A — Writer PLAN (serial). The image pipeline needs the scene
// summary + entry roster + cast to start, but NOT the dialogue beats. This
// call is small (skeleton only), so it returns fast and unblocks everything.
const tPlan = Date.now();
const plan = await runWriterPlan(config.text, session);
tlog("[directScene] Phase A (plan)", tPlan);
// ══════════════════════════════════════════════════════════════════════
// Paradigm D — single Writer stream + StreamRouter dispatch
//
// One LLM call produces <plan> → <story> → <choices>. StreamRouter
// cuts the tags; </plan> closure resolves the plan deferred, unlocking
// the downstream image pipeline IN PARALLEL with the still-streaming
// <story>. Prose is split into Beat[] after routing completes.
// ══════════════════════════════════════════════════════════════════════
// ── Phase B — Writer BEATS, launched NOW so its (longer) output overlaps the
// ENTIRE image pipeline below. Only needed to assemble the final Scene, so we
// await it last. A failure degrades to a single playable beat from the plan.
const tBeats = Date.now();
const beatsPromise: Promise<WriterBeatsOutput> = runWriterBeats(
config.text,
session,
plan,
)
.then((out) => {
tlog("[directScene] Phase B (beats)", tBeats);
return out;
})
.catch((err): WriterBeatsOutput => {
const msg = err instanceof Error ? err.message : String(err);
console.error(
`[directScene] Phase B (beats) failed, using fallback: ${msg}`,
);
return { beats: synthesizeFallbackBeats(plan), storyStatePatch: undefined };
});
// ── Step 1 — kick off the Writer stream + routing ─────────────────
const tStream = Date.now();
const writerResult = runWriterStream(config.text, session);
// Deferred that settles when onPlan fires (or when routing completes
// without a plan — degraded fallback).
let planSettled = false;
let resolvePlan!: (p: WriterScenePlan) => void;
const planPromise = new Promise<WriterScenePlan>((res) => {
resolvePlan = res;
});
// Closure-captured coerced plan so onStoryComplete can split+emit beats
// DURING streaming (before painter finishes → text-first progressive play).
let coercedPlanRef: WriterScenePlan | undefined;
let earlyBeatsOut: WriterBeatsOutput | undefined;
// Opening-scene story bible from the Writer's <plan> (replaces the old
// Architect). Undefined on subsequent scenes (carried StoryState wins).
let bibleFromPlan: WriterScenePlan["storyBible"];
const routingPromise = routeTaggedStream(writerResult.textStream, {
onPlan: (rawPlan) => {
try {
const coerced = coercePlanFromRaw(rawPlan as unknown as Record<string, unknown>);
coercedPlanRef = coerced;
if (coerced.storyBible) bibleFromPlan = coerced.storyBible;
planSettled = true;
emit?.({ type: "plan", plan: coerced });
resolvePlan(coerced);
} catch {
planSettled = true;
resolvePlan(minimalFallbackPlan());
}
},
onStoryComplete: (rawStory) => {
// Tags are ordered (plan before story), so the plan is already coerced.
const p = coercedPlanRef ?? minimalFallbackPlan();
try {
const out = splitProseToBeats(rawStory, p);
earlyBeatsOut = out;
for (const b of out.beats) emit?.({ type: "beat", beat: b });
} catch {
// split failure → Step 6 re-splits from rawStorySegment
}
},
}).then((result) => {
// If plan never fired (stream error / no plan tag), settle the deferred
// from the degraded extraction or a minimal fallback.
if (!planSettled) {
const extracted = result.plan
? coercePlanFromRaw(result.plan as unknown as Record<string, unknown>)
: minimalFallbackPlan();
if (extracted.storyBible) bibleFromPlan = extracted.storyBible;
resolvePlan(extracted);
}
return result;
});
// ── Step 2 — await plan (settles at </plan> close — EARLY) ────────
const plan = await planPromise;
tlog("[directScene] plan (stream → </plan>)", tStream);
// From here the pipeline is structurally identical to the old Phase A
// flow: plan drives character design + cinematographer + painter, all
// overlapping with the Writer's still-streaming <story>.
// NEW characters to design come from the PLAN's cast (so design fires in
// parallel with Phase B, not after the beats are written). Existing
// characters keep their cards / portraits / voices across scenes.
const newCharNames = plan.cast.filter(
(n) => !session.characters.some((c) => c.name === n),
);
// Entry-beat composition is the PLAN's (Phase B is constrained to honor it).
// The Painter needs a Beat-shaped object for reference collection, but the
// real beat isn't written until Phase B — so synthesize one from the plan
// (collectReferenceImages only reads speaker + activeCharacters).
const entryBeatActive = plan.entryActiveCharacters;
const entryBeatSpeaker = plan.entrySpeaker;
const entryBeatForPaint: Beat = {
@@ -216,32 +282,30 @@ export async function directScene(
next: { type: "continue", nextBeatId: plan.entryBeatId },
};
// For sceneKey-based visual continuity, look up the prior matching scene's
// image to slot into Painter's referenceImages (max 4 of which include
// character portraits too).
const { priorSceneReference, priorSceneKey } = pickPriorSceneReference(
session,
plan.sceneKey,
);
// ── Stage 2 — character cards (LLM) ∥ Cinematographer ──────────────────
// Both are cheap LLM calls and neither needs the other's output, so they
// run concurrently. The cards give us each new character's visualDescription
// TEXT; portraits + voices are deferred to Stage 3 so they can overlap the
// paint instead of blocking it.
// ── Step 3 — character cards (LLM) ∥ Cinematographer (parallel) ───
// CharacterDesigner now receives the Writer's intent for each character
// (paradigm D: media translator, not inventor).
const tParallel = Date.now();
const findIntent = (name: string): CharacterIntent | undefined =>
plan.characterIntents?.find((ci) => ci.name === name);
const cardPromises = newCharNames.map((name) =>
designCharacterCard(config, session, name).catch((err): CharacterCard => {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[directScene] designCharacterCard(${name}) failed: ${msg}`);
// Last-resort fallback: a name + generic voice card so the speaker isn't
// unknown. No visualDescription → no portrait is attempted for them.
return {
name,
voiceDescription: `请根据角色名「${name}」推断其性别、年龄与气质。所属世界观:${session.worldSetting}`,
};
}),
designCharacterCard(config, session, name, findIntent(name)).catch(
(err): CharacterCard => {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[directScene] designCharacterCard(${name}) failed: ${msg}`);
return {
name,
voiceDescription: `请根据角色名「${name}」推断其性别、年龄与气质。所属世界观:${session.worldSetting}`,
};
},
),
);
const cinemaPromise = runCinematographer(config.text, {
@@ -259,8 +323,6 @@ export async function directScene(
]);
tlog("[directScene] CharacterCards+Cinematographer parallel", tParallel);
// Working registry: existing characters + new cards. visualDescription text
// is present now; portraits + voices fill in over the next two phases.
let characters = mergeCharacters(
session.characters,
cards.map((c) => ({
@@ -270,11 +332,9 @@ export async function directScene(
})),
);
// ── Stage 3 — portraits + voices, scheduled around the Painter ─────────
// ── Step 4 — portraits + voices, scheduled around Painter ─────────
const tProvision = Date.now();
// Entry-beat character names: the ONLY portraits the Painter references
// (collectReferenceImages slots in the entry beat's speaker + activeChars).
const entryNames = new Set<string>();
if (entryBeatSpeaker && !isPovName(entryBeatSpeaker)) {
entryNames.add(entryBeatSpeaker);
@@ -288,8 +348,6 @@ export async function directScene(
basePortraitUrl?: string;
basePortraitUuid?: string;
};
// Kick off portrait gen for every NEW char that has a visualDescription.
// Entry-beat portraits block the Painter; the rest overlap it.
const entryPortraitPromises: Promise<NamedPortrait>[] = [];
const restPortraitPromises: Promise<NamedPortrait>[] = [];
for (const card of cards) {
@@ -308,42 +366,37 @@ export async function directScene(
// On the StepFun path, thread the LLM-selected stepfunVoiceId from the card
// into provision — it lets stepfunProvision honor the catalog pick instead
// of falling back to the keyword scorer (same network cost: still zero).
// ALSO persist it onto the Character so the client can echo it back on a
// StepFun server (where it skips the ~220KB voice payload) and the server
// resolveVoice honors the LLM pick at synth time instead of re-scoring.
const voicePromises = cards.map((card) =>
provisionCharacterVoice(config, card.voiceDescription, card.name, {
stepfunVoiceId: card.stepfunVoiceId,
}).then(
(voice): Character => ({
name: card.name,
voiceDescription: card.voiceDescription,
voice,
stepfunVoiceId: card.stepfunVoiceId,
}),
(voice): Character => {
const result: Character = {
name: card.name,
voiceDescription: card.voiceDescription,
voice,
stepfunVoiceId: card.stepfunVoiceId,
};
if (voice) emit?.({ type: "voice", name: card.name, voice });
return result;
},
),
);
// Block the Painter ONLY on entry-beat portraits (its referenceImages).
const entryPortraits = await Promise.all(entryPortraitPromises);
characters = mergeCharacters(
characters,
entryPortraits.map((p) => ({
name: p.name,
voiceDescription: "", // preserved from the card by mergeCharacters
voiceDescription: "",
basePortraitUrl: p.basePortraitUrl,
basePortraitUuid: p.basePortraitUuid,
})),
);
tlog("[directScene] entry-beat portraits", tProvision);
// ── Stage 4 — Painter (depends on cinemaOut + on-stage visual cards +
// entry portraits). On-stage = the plan's cast (everyone who'll appear),
// filtered to those now in the registry, so the archetype block covers them.
// ── Step 5 — Painter ──────────────────────────────────────────────
const onStageCharacters = characters.filter((c) => plan.cast.includes(c.name));
// Session-locked orientation (set at session start). Threads into both the
// Painter prompt's framing rules and the generated image's pixel dimensions.
const orientation = coerceOrientation(session.orientation);
const tPainter = Date.now();
@@ -361,9 +414,11 @@ export async function directScene(
);
tlog("[directScene] Painter", tPainter);
// Fold in the work that overlapped the paint: remaining portraits + all
// voices. Awaited before returning so the session the client persists is
// fully provisioned for later scenes.
// Emit background as soon as it's painted the client can swap the
// placeholder for the real scene image while beats/voices are still settling.
emit?.({ type: "background", imageUrl: painted.imageUrl, sceneKey: plan.sceneKey });
// Overlapped: rest portraits + voices
const tOverlap = Date.now();
const [restPortraits, voicedChars] = await Promise.all([
Promise.all(restPortraitPromises),
@@ -381,20 +436,82 @@ export async function directScene(
characters = mergeCharacters(characters, voicedChars);
tlog("[directScene] overlapped portraits+voices", tOverlap);
// ── Await Phase B — it overlapped the whole image pipeline above. ──────
const beatsOut = await beatsPromise;
const beats = beatsOut.beats;
// ── Step 6 — await routing completion + split prose into beats ────
// routeTaggedStream ran concurrently with the entire image pipeline.
// onStoryComplete likely already fired (splitting + emitting beats for
// progressive playback); this await retrieves the final result + rawStorySegment.
const streamResult = await routingPromise;
// Reuse early-split beats when available (onStoryComplete path); otherwise
// split from rawStorySegment (degrade / onStoryComplete missed).
const beatsOut: WriterBeatsOutput = earlyBeatsOut
?? splitProseToBeats(streamResult.rawStorySegment ?? "", plan);
let beats = beatsOut.beats;
// If earlyBeatsOut was missed but rawStorySegment is available, emit beats
// now (late but still before done — the client gets them for rendering).
if (!earlyBeatsOut && beats.length > 0) {
for (const b of beats) emit?.({ type: "beat", beat: b });
}
// Emit choices (from streamResult or from the last beat's choice exits).
if (streamResult.choices?.length) {
emit?.({ type: "choices", choices: streamResult.choices });
}
// ── C1-ext: merge <choices> segment into the last beat's `next` ────
// The Writer's <choices> segment produces scene-level exits that are NOT
// embedded in the beats graph. Attach them to the final beat so the player
// can actually pick them.
//
// IMPORTANT: Only change-scene exits are valid here. The prose paradigm
// assigns beat ids automatically (b1, b2, ...) in proseSplitter — the LLM
// has no knowledge of these ids, so any advance-beat targetBeatId it emits
// in <choices> will point at the wrong beat, causing a loop.
if (streamResult.choices?.length && beats.length > 0) {
const validChoices = streamResult.choices.filter(
(c): c is BeatChoice =>
typeof c.label === "string" &&
c.label.length > 0 &&
c.effect != null &&
c.effect.kind === "change-scene",
);
if (validChoices.length > 0) {
const withIds = validChoices.map((c, i) => ({
...c,
id: c.id || `sc${i + 1}`,
}));
const lastIdx = beats.length - 1;
const last = beats[lastIdx]!;
const existing =
last.next.type === "choice" ? last.next.choices : [];
const isFallbackOnly =
existing.length <= 1 &&
existing.every((c) => c.label === "继续");
const merged = isFallbackOnly ? withIds : [...existing, ...withIds];
const seen = new Set<string>();
const deduped = merged.filter((c) => {
if (seen.has(c.label)) return false;
seen.add(c.label);
return true;
});
beats = beats.map((b, i) =>
i === lastIdx
? { ...b, next: { type: "choice" as const, choices: deduped } }
: b,
);
}
}
if (streamResult.degraded) {
console.warn("[directScene] Writer stream was degraded — beats may be fallback");
}
// entryBeatId is guaranteed present (runWriterBeats pins it onto a beat), but
// keep the defensive fallback for the synthesized-fallback path.
const entryBeatId = beats.some((b) => b.id === plan.entryBeatId)
? plan.entryBeatId
: beats[0]!.id;
// Orphan-speaker voices: a beat speaker Phase B used that isn't in the
// registry. Should be rare — the prompt constrains speakers to the cast, and
// every cast member was provisioned above — so this is a defensive net,
// serial but skipped entirely (zero latency) in the common case.
// Orphan-speaker voices (defensive net — should be rare).
const orphanSpeakers = [
...new Set(beats.map((b) => b.speaker).filter((n): n is string => Boolean(n))),
].filter((n) => !isPovName(n) && !characters.some((c) => c.name === n));
@@ -403,15 +520,14 @@ export async function directScene(
orphanSpeakers.map((n) => provisionVoiceForName(config, session, n)),
);
characters = mergeCharacters(characters, orphanChars);
// Emit orphan voices so the client can preload their audio.
for (const oc of orphanChars) {
if (oc.voice) emit?.({ type: "voice", name: oc.name, voice: oc.voice });
}
}
const scene: Scene = {
id: newSceneId(),
// scenePrompt is the cinematographer's English compositional output;
// the Writer's sceneSummary stays in the session log via beats[]/
// history. Keeping the original field name preserves compat with
// anything that already reads scene.scenePrompt (e.g., insert-beat
// user prompt).
scenePrompt: cinemaOut.integratedPrompt,
beats,
entryBeatId,
@@ -421,11 +537,22 @@ export async function directScene(
orientation,
};
// Merge the Writer's volatile memory rewrite onto the carried bible so the
// throughline survives the next scene cut (orchestrator returns it; the
// client persists it back into the session).
// storyState: opening scene seeds the stable spine from the Writer's
// storyBible (replacing the old Architect); subsequent scenes carry the
// existing spine. Volatile fields always come from this scene's patch.
const baseStoryState: StoryState | undefined = session.storyState
?? (bibleFromPlan
? {
logline: bibleFromPlan.logline,
genreTags: bibleFromPlan.genreTags,
protagonist: bibleFromPlan.protagonist,
castNotes: bibleFromPlan.castNotes,
synopsis: "",
}
: undefined);
const storyState = applyStoryStatePatch(
session.storyState,
baseStoryState,
beatsOut.storyStatePatch,
);
+2 -2
View File
@@ -9,8 +9,8 @@ export {
export { synthesizeBeat } from "./voice";
export { mergeCharacters } from "./director";
export type { SceneResult } from "./director";
export { runArchitect } from "./agents/architect";
export type { WriterBeatsOutput } from "./agents/writer";
export type { CinematographerOutput } from "./agents/cinematographer";
export type { InsertBeatPartial } from "@infiplot/types";
export * from "./prompts";
// Note: prompts.ts is NOT re-exported (server-only, used internally by agents)
+17 -20
View File
@@ -8,6 +8,7 @@ import type {
FreeformClassifyResponse,
InsertBeatRequest,
InsertBeatResponse,
SceneStreamEvent,
Session,
SceneRequest,
SceneResponse,
@@ -19,7 +20,6 @@ import type {
import { coerceOrientation } from "@infiplot/types";
import { chat } from "@infiplot/ai-client";
import { isStepfun, isValidStepfunVoiceId, provisionVoice } from "@infiplot/tts-client";
import { runArchitect } from "./agents/architect";
import { selectStyle } from "./agents/styleSelector";
import { directInsertBeat, directScene } from "./director";
import { STYLE_MAP } from "@/lib/options";
@@ -51,6 +51,7 @@ function tlog(label: string, t0: number): void {
export async function startSession(
config: EngineConfig,
req: StartRequest,
emit?: (event: SceneStreamEvent) => void,
): Promise<StartResponse> {
const tTotal = Date.now();
@@ -67,38 +68,32 @@ export async function startSession(
language: req.language?.trim() || undefined,
};
// Stage 0 — Architect (+ optional auto style selection, in parallel).
// Both only depend on worldSetting, so they run concurrently.
// Stage 0 — optional auto style selection. The story bible is no longer
// generated by a separate Architect call; the Writer's <plan> produces it
// on the opening scene (paradigm: Writer is the single content brain).
console.log(
`[start] worldSetting (${session.worldSetting.length} chars):\n${session.worldSetting}`,
);
const isAutoStyle = session.styleGuide === "auto";
if (isAutoStyle) {
session.styleGuide = "由 AI 根据剧情自动匹配最佳画风";
}
const tArchitect = Date.now();
const [architectResult, autoStyleGuide] = await Promise.all([
runArchitect(config.text, session),
isAutoStyle
? selectStyle(config.text, session.worldSetting).catch((err) => {
console.warn(`[styleSelector] failed, falling back to 吉卜力:`, err);
return null;
})
: Promise.resolve(null),
]);
session.storyState = architectResult;
if (isAutoStyle) {
const tStyle = Date.now();
const autoStyleGuide = await selectStyle(
config.text,
session.worldSetting,
).catch((err) => {
console.warn(`[styleSelector] failed, falling back to 吉卜力:`, err);
return null;
});
session.styleGuide = autoStyleGuide ?? STYLE_MAP["吉卜力"]!;
tlog("[start] StyleSelector", tStyle);
console.log(`[start] auto-selected style: ${session.styleGuide.slice(0, 60)}`);
}
tlog("[start] Architect" + (isAutoStyle ? " + StyleSelector" : ""), tArchitect);
console.log(
`[start] storyBible: logline="${session.storyState.logline}" | genreTags="${session.storyState.genreTags}" | synopsis="${session.storyState.synopsis}"`,
);
const { scene, sceneImageUrl, characters, storyState } = await directScene(
config,
session,
emit,
);
tlog("[start] TOTAL", tTotal);
@@ -119,12 +114,14 @@ export async function startSession(
export async function requestScene(
config: EngineConfig,
req: SceneRequest,
emit?: (event: SceneStreamEvent) => void,
): Promise<SceneResponse> {
const tTotal = Date.now();
const { scene, sceneImageUrl, characters, storyState } = await directScene(
config,
req.session,
emit,
);
tlog("[scene] TOTAL", tTotal);
+24 -479
View File
@@ -1,6 +1,7 @@
import type {
BeatActiveCharacter,
Character,
CharacterIntent,
Orientation,
Scene,
Session,
@@ -129,300 +130,22 @@ export function renderStoryStateDynamic(s: StoryState | undefined): string {
return lines.join("\n");
}
// Back-compat for the Architect's own user message (it sees the full bible
// at session start, no caching concern there yet).
export function renderStoryState(s: StoryState | undefined): string {
if (!s) return "";
return renderStoryStateSpine(s) + "\n\n" + renderStoryStateDynamic(s);
}
// ──────────────────────────────────────────────────────────────────────
// 0. Architect (总编剧) — ONE LLM call at session start.
//
// Turns the (often terse) user world + style prompt into a real story
// bible: a second-person protagonist with a want and a flaw, a single
// central dramatic question, a genre frame that anchors the 爽点 rhythm,
// an engineered opening hook (前3秒冷开场), and a small intentional cast.
// Everything downstream — Writer, CharacterDesigner — reads this so the
// story has a spine from beat one instead of being improvised cold.
// Paradigm D — merged Writer (single-pass streaming with tagged output)
// ──────────────────────────────────────────────────────────────────────
export const ARCHITECT_SYSTEM = `你是一部交互视觉小说的「总编剧 / 故事架构师」。玩家只给了你一句到几句的世界观和画风,你要在开拍前把它扩写成一份**故事档案(story bible)**,为后续每一幕定下脊梁。你不写具体台词、不写分镜、不设计立绘——你只搭骨架。
// Writer prompt has been refactored to segment-driven builder.
// See lib/engine/prompts/segments/writer/ for individual prompt segments.
// See lib/engine/prompts/registry.ts for segment registration.
// See lib/engine/prompts/builder.ts for assembly logic.
你深谙网文(番茄)、短剧(红果)与视觉小说(galgame)的爆款心法:
- **开篇即钩子**:黄金三章 / 前3秒法则。开场不铺垫世界观,直接抛出冲突、悬念或一个反常的瞬间。
- **代入感**:主角是第二人称「你」,是玩家的化身——要让玩家一进场就清楚"我是谁、我此刻卡在什么处境里、我想要什么"。
- **题材锚定爽点**:先选定一个清晰的题材框架(如 甜宠 / 校园暗恋 / 悬疑追凶 / 复仇逆袭 / 救赎治愈),它决定了情绪回报的节奏与类型。
- **戏剧问题**:整部故事由一个悬而未决的中心问题驱动(她到底是谁?你能否在记忆消失前查明真相?这场暗恋会走向哪里?)。
- **人设要鲜明且有反差**:每个核心角色一个强标签 + 一个反差面(外冷内热 / 傲娇 / 看似柔弱实则腹黑)。
你要产出(全部用中文,except 不需要英文):
- logline:一句话主线 / 中心戏剧问题,必须带钩子,让人想看下去
- genreTags:题材+基调标签,斜杠分隔,如 "甜宠 / 校园 / 慢热治愈带点伤感"
- protagonist:第二人称主角卡。包含:你是谁、你此刻正卡在什么具体处境里(要有即时张力)、你想要什么、一个软肋或秘密。50–120 字。
- castNotes:2–3 个核心配角,每行一个「名字:一句话人设(强标签+反差)+ 与你的关系/张力」。给真实好记的中文名字(不要"神秘女子"这种占位)。
- synopsis:开场此刻的情境梗概(故事尚未展开,就写"故事从……开始"),13 句。
- openThreads:开场就埋下的 1–3 个悬念/问题(数组)。
- nextHook:**第一幕**应当如何冷开场——具体描述开场那个抓人的瞬间/冲突(这会直接指导编剧写开场)。要画面感强、有张力。
设计硬规则:
- 主角「你」永不出现在画面里(第二人称 POV),所以 castNotes 里**不要**把"你/主角"当成一个角色。
- 配角名字要符合世界观(年代、地域、文化)。
- 一切服从玩家给的世界观与画风,不要擅自跑题;玩家信息少时,做最贴合、最有戏的合理扩写。
必须输出严格 JSON
{
"logline": "...",
"genreTags": "...",
"protagonist": "...",
"castNotes": "夏海:表面开朗的天台诗人,实则在用诗逃避家里的变故;与你是同班转学的邻座,对你有种说不清的在意。\\n班主任老周:…",
"synopsis": "...",
"openThreads": ["...", "..."],
"nextHook": "第一幕冷开场:……"
}
不要输出 JSON 以外的任何文本。`;
export function buildArchitectUserMessage(session: Session): string {
const parts: string[] = [];
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.playerName) {
parts.push(
`\n玩家名字:${session.playerName}\n(NPC 在对话中应自然地称呼玩家为「${session.playerName}」。「你」仍指代玩家视角,但 NPC 的台词里请使用这个名字而非泛称。不要为玩家设计立绘或音色——玩家是 POV 视角,永不出现在画面中。)`,
);
}
parts.push(
"\n请据此产出这部交互剧的故事档案(story bible),严格以 JSON 格式返回。",
);
const langDirective = buildLanguageDirective(session.language);
if (langDirective) parts.push(langDirective);
return parts.join("\n");
}
// ──────────────────────────────────────────────────────────────────────
// 1. Writer (编剧) — drives the narrative, in TWO phases.
//
// Phase A (WRITER_PLAN_SYSTEM): plans the scene SKELETON only — sceneSummary
// + sceneKey + entry-beat roster + the full cast. No dialogue. Its output
// is enough for the Cinematographer + character design + Painter to start.
// Phase B (WRITER_BEATS_SYSTEM): expands the plan into the full beats[] graph
// + storyStatePatch, overlapped with the (longer) image pipeline.
//
// Neither phase designs characters (that's the CharacterDesigner's job) —
// Phase A only NAMES them in `cast` / `entryActiveCharacters`; the
// CharacterDesigner is invoked for any name not yet in session.characters.
// ──────────────────────────────────────────────────────────────────────
export const WRITER_PLAN_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第一步——场景规划**。你只产出本场景的「骨架」,**不要写任何 beat 台词**。你的产出会被立刻送去配图(分镜导演 + 生图),所以要快、要准、画面感要强。
═══════════════════════════════════════════════════════════════════
爆款心法(要在规划阶段就立住,后续展开才好看)
═══════════════════════════════════════════════════════════════════
- **进场即钩子**:这一场开场就要抛出新信息 / 悬念 / 冲突 / 情绪冲击,别铺陈。把这个抓人的瞬间写进 sceneSummary。
- **兑现情绪**:按题材给观众想要的情绪(甜宠的心动、暗恋的拉扯、逆袭的扬眉、悬疑的真相一角)。
- **人设有反差**:每个角色一个强标签 + 一个反差面。
═══════════════════════════════════════════════════════════════════
连贯性铁律(跨场景切换不能跳戏 —— 最重要)
═══════════════════════════════════════════════════════════════════
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接情绪、地点逻辑、人物状态与未收的悬念。
- 若给了「转场种子 nextSceneSeed」,把它当作"下一场的命题"去兑现,开场要让玩家感到"这正是我上一步的结果"。
- 沿用主线记忆里的人物关系与情绪温度,别让刚告白的人下一场形同陌路。
本步你要规划(如实产出,缺一不可):
- **sceneSummary**:当前场景的中文概要——地点 + 时间 + 氛围 + 关键事件 + 那个抓人的开场瞬间。这是分镜导演构图的**唯一依据**,要画面感强、信息足(2–4 句)。
- **sceneKey**:当前场景的英文 slug(如 "classroom-dusk"、"rooftop-night")。
- **entryBeatId**:玩家进入场景时落在哪个 beat 的 id(通常就是 "b1")。
- **cast**:本场景**会出场的全部 NPC 角色名**(字符串数组)。第二步写 beats 时**只能用这里列出的名字**,所以现在必须一次想全——谁会说话、谁会在画面里露面,全部列出。名字要与「已登记角色」**完全一致**;新角色起符合世界观的真名(不要"神秘女子"这种占位)。**绝不**包含玩家(你 / 我 / 主角 / protagonist / player / MC...)。
- **entrySpeaker**:入口 beat 由谁开口 —— 取值只有三种:① 某个 NPC 真名(必须在 cast 里)② "你"(玩家本人开口)③ 留空(纯旁白 / 环境开场)。这决定镜头语言,要选准。
- **entryActiveCharacters**:入口画面里**此刻出现的 NPC** 及其当下姿态 / 神情(中文 pose)。即使没人说话,画面里有谁也要列。**绝不**包含玩家。
sceneKey 设计原则(用于跨场景视觉一致性):
- 同一物理空间 + 同一时段 → 必须沿用**完全相同**的英文 slug
- 时段 / 空间变化时换 slug"classroom-dusk" → "classroom-night" / "corridor-dusk"
- slug 规范:lowercase-with-dashes24 个英文单词
- 用户消息会列出已用过的 sceneKey,请优先**复用**这些已有 slug
玩家视角硬规则(违反会破坏整个 galgame):
- 玩家是第二人称 POV,**永远不出现在任何画面里**——entryActiveCharacters 的 name **绝不允许**是「玩家 / 你 / 我 / 主角 / protagonist / player / Player / MC / I / me」任何变体。
- entrySpeaker 只能是 NPC 真名 / "你" / 留空;其它 POV 变体一律视为错误。
必须输出严格 JSON
{
"sceneSummary": "黄昏的天台,风很大。夏海背对你站在栏杆边,手里攥着一张揉皱的成绩单——她把你单独叫上来,却迟迟不开口。",
"sceneKey": "rooftop-dusk",
"entryBeatId": "b1",
"cast": ["夏海"],
"entrySpeaker": "夏海",
"entryActiveCharacters": [
{ "name": "夏海", "pose": "背对你倚着栏杆,侧脸绷着,手里攥着揉皱的纸" }
]
}
不要输出 JSON 以外的任何文本。`;
// ──────────────────────────────────────────────────────────────────────
// Phase B — expands the plan into the full beats[] + storyStatePatch.
// ──────────────────────────────────────────────────────────────────────
export const WRITER_BEATS_SYSTEM = `你是一部交互视觉小说的「编剧」。这是**两步生成中的第二步——把已规划好的场景展开成完整剧本**。你会收到本场景的「规划」(场景概要 sceneSummary、sceneKey、入口 beat 的 id / speaker / 登场角色、以及本场景允许出场的角色名单 cast)。你的任务:基于规划写出玩家依次经历的对话节拍 beats,并在最后更新主线记忆。你只负责**剧情和台词**——不设计角色形象、不写出图提示词、不做镜头调度,这些由其他 agent 完成。
你必须严格遵守收到的规划:
- 必须存在一个 id 等于规划 entryBeatId 的 beat,作为玩家入口。
- 该入口 beat 的 speaker 与登场角色(activeCharacters)要与规划一致(姿态措辞可微调,但**人物身份必须一致**)。
- speaker 与 activeCharacters 里的 NPC 名字**只能来自规划的 cast**(或玩家 "你")——**不要引入规划之外的新角色**。
═══════════════════════════════════════════════════════════════════
爆款心法(番茄网文 / 红果短剧 / galgame 的叙事手感)—— 必须贯彻
═══════════════════════════════════════════════════════════════════
- **每个场景都要有钩子**:开头 1–2 个 beat 内就抛出新信息、悬念、冲突或情绪冲击,绝不平铺直叙地交代背景;结尾 beat 留一个让玩家"想知道接下来"的扣子。
- **兑现爽点 / 情绪回报**:按题材给观众想要的情绪(甜宠的心动、暗恋的暧昧拉扯、逆袭的扬眉吐气、悬疑的真相一角)。让玩家这一场"有所得"。
- **反转与反差**:适时打破预期——以为是 A 结果是 B、角色露出与第一印象相反的一面;但反转要可信、要扣主线。
- **快节奏、入戏快**:进场即冲突,少铺陈,删掉一切"为完整而存在"却不推进情绪的对话。
- **show, don't tell**:用动作、神态、潜台词、环境细节传递情绪,别直接旁白"她很难过"——让玩家自己读出来。
- **人设鲜明有反差**:每个角色一个强标签 + 一个反差面,台词紧贴其腔调(傲娇嘴硬心软、外冷内热、看似柔弱实则强势)。
- **选择要有分量**:choice 只出现在真正的岔路口,每个选项都要让玩家感到"通向不同的东西"(情绪指向不同 / 关系走向不同),别给等价的废选项。
═══════════════════════════════════════════════════════════════════
连贯性铁律(跨场景切换不能跳戏 —— 最重要)
═══════════════════════════════════════════════════════════════════
- 你会收到【故事档案 / 主线记忆】和上一场的结尾。**新场景必须从上一刻自然承接**——承接上一场的情绪、地点逻辑、人物状态与未收的悬念。
- 若给了「转场种子 nextSceneSeed」,把它当作"下一场的命题"去兑现,而不是另起炉灶;开场要让玩家感到"这正是我上一个动作 / 选择导致的结果"。
- 沿用主线记忆里的人物关系与情绪温度——别让刚告白的人下一场形同陌路,也别凭空遗忘已埋的伏笔。
- 推进、但别重置:每一场都让主线问题往前走一点(关系变化 / 真相揭露一角 / 新悬念浮现)。
本步你只产出两样:**beats[]**(玩家依次经历的对话节拍)和 **storyStatePatch**(主线记忆更新)。sceneSummary / sceneKey / entryBeatId 已由规划给定,**不要再输出**它们。
每个 beat 是玩家会看到的一段叙述 / 对话 / 选择。beat 之间通过 next 字段连接:
- "continue":玩家点击图片背景 / 按继续,自然推进到下一个 beat
- "choice":在此让玩家做选择,按所选 choice 的 effect 走向
choice 的 effect 有两种:
- "advance-beat":玩家选了之后跳到**同场景内**的另一个 beat(不换背景图,速度极快)
- "change-scene":玩家选了之后切换到**新场景**(视角变了 / 走到新地方 / 时间跳了)
设计原则:
- 同场景内 beat 数自由发挥,按剧情节奏自然给出(通常 2–6 个,可以更多)
- 入口 beat 的 id 必须等于规划给定的 entryBeatId;其余 beat id 依次自取且互不重复
- 多用 continue,少用 choice — 选择只应出现在「真正的岔路口」
- advance-beat 适合处理对话分支(同一场景里换个话题、追问、撒娇)
- change-scene 适合空间/时间跳跃(出门、转身看窗外、第二天清晨)
- 一个场景至少要有一个 change-scene 出口(除非真到结局)
- 每个 change-scene 必须带 nextSceneSeed —— 一句中文简述「下一场是哪里、谁在、要发生什么」
- 同一场景的 beat id 互不重复
- next.nextBeatId 引用的 beat 必须存在
- choice 至少 2 个,至多 4 个,互不重复
文本风格约束:
- narration / line 用中文(**纯净可显示文本**,绝不要写 (叹气)(语速快) 这类标注 —— 那是给配音的,会被玩家看见)
- sceneSummary / lineDelivery / activeCharacters[].pose 内的文字也用中文
- sceneKey 用英文 slug
- 单个 beat 的 narration 与 line 加起来 ≤80 字
- 单个 choice label ≤15 字
配音相关字段:
- 每个有 line 的 beat **必须**给出 lineDelivery —— 自由中文的「配音导演指令」,描述该句台词怎么念(情绪 / 语气 / 语速 / 气息 / 停顿 / 重音 / 音色起伏)。例:"鼓起勇气又害羞,声音发颤、偏小,句尾带一丝气声,语速偏慢"。平淡场合写"平静自然、语速适中"即可,但要贴当下情境。
角色与台词的硬性规则:
- 任何 beat 的 speaker 字段一旦填了名字,**该名字必须**:① 是 "你"(玩家本人,见下方"玩家视角硬规则"),或 ② 在「已登记角色」列表中存在,或 ③ 出现在本场景的某个 beat 的 activeCharacters 里。
- speaker 名字必须与登记名**完全一致**,不要加「(回忆)」「学姐」之类后缀或别名。
- 每个 beat 的 activeCharacters 列出**此时此刻画面里出现的 NPC 角色**及其当下姿态/神情(中文)。即使没人说话,画面里有谁在也要列出。
玩家视角硬规则(重要 — 违反这条会破坏整个 galgame):
【画面规则 — 严格禁止】
- 玩家是第二人称 POV,**永远不出现在任何 Scene 画面里**
- activeCharacters[].name 数组**绝不允许**包含任何下列名字(任何大小写、中英文变体):
「玩家」「你」「我」「主角」「protagonist」「player」「Player」「MC」「I」「me」
- 玩家不会被设计立绘、不会被设计音色
【对白规则 — galgame 标准做法(Pattern B)】
- 玩家**可以正常说话**——当主角对 NPC 开口时:
speaker = "你"**固定用这两个字,不要用其他变体**)
line = 实际说的话(如「学姐,下雨了」)
lineDelivery 可以留空(玩家对白不会被 TTS 合成)
- speaker 字段允许的取值**只有两种**:① NPC 真名(必须在 activeCharacters 里)② "你"
- 其它 POV 变体(玩家 / 我 / 主角 / protagonist / player / MC / I / me**一律视为错误**
【内心 vs 外显的区分】
- 主角在心里想 / 在做某个动作 / 在观察 / 自己的体感 → 用 narrationspeaker 留空)
例:"你的心跳得很快,几乎听不见外面的雨声。"
- 主角真的开口对 NPC 说出来 → 用 speaker="你" + line
例:speaker="你" line="学姐,这把伞你拿着。"
- 同一个 beat 可以同时有 narration(心理活动 / 动作)和 speaker="你" + line(说出口的话)
更新主线记忆(storyStatePatch)—— 写完这一场后必做:
- synopsis:把这一场并入后的整体梗概,**压缩**到 3–5 句(别越写越长,旧细节该丢就丢)
- relationships:每个核心角色此刻与「你」的关系 / 情绪温度,每条一句(如 "夏海:暗恋升温,刚向你说了一半的告白被打断")
- openThreads:仍未收的悬念 / 伏笔——已收束的可移除、新埋的加入(但至少保留一条正在推进的主线,别把列表清空)
- nextHook:基于这一场的结尾,下一场应往哪走(给"下一次的你"一个明确命题,接住本场留下的扣子)
这些字段是写给"未来的你"的连贯性记忆,请认真写。
必须输出严格 JSON,结构如下(**只含 beats 与 storyStatePatch**sceneSummary / sceneKey / entryBeatId 由规划给定,不要输出。下例入口 beat 的 id "b1" 即规划的 entryBeatId):
{
"beats": [
{
"id": "b1",
"narration": "可空(纯净文本)",
"speaker": "可空",
"line": "可空(纯净文本)",
"lineDelivery": "line 非空时必填:配音导演指令",
"activeCharacters": [
{ "name": "夏海", "pose": "脸红害羞地绞着衣角,双眼躲闪" }
],
"next": { "type": "continue", "nextBeatId": "b2" }
},
{
"id": "b2",
"speaker": "夏海",
"line": "学长,我有话想对你说。",
"lineDelivery": "鼓起勇气,但又有点害羞,语速偏慢,句尾微微上扬",
"activeCharacters": [
{ "name": "夏海", "pose": "鼓起勇气直视对方,双手紧握" }
],
"next": { "type": "continue", "nextBeatId": "b3" }
},
{
"id": "b3",
"narration": "你下意识攥紧了书包带,喉咙有点干。",
"speaker": "你",
"line": "……你说。",
"activeCharacters": [
{ "name": "夏海", "pose": "鼓起勇气直视对方,双手紧握" }
],
"next": {
"type": "choice",
"choices": [
{
"id": "c1",
"label": "继续追问",
"effect": { "kind": "advance-beat", "targetBeatId": "b4" }
},
{
"id": "c2",
"label": "起身离开教室",
"effect": { "kind": "change-scene", "nextSceneSeed": "雨后湿漉漉的走廊,她追了出来" }
}
]
}
}
],
"storyStatePatch": {
"synopsis": "把这一场并入后的滚动梗概,压缩到 3–5 句",
"relationships": ["夏海:暗恋升温,刚向你说了一半的告白被打断"],
"openThreads": ["夏海没说完的那句话到底是什么", "她书包里掉出的那张旧照片"],
"nextHook": "下一场:放学后的天台,她把你单独叫上去,要把话说完"
}
}
不要输出 JSON 以外的任何文本。`;
export { buildWriterStreamMessages } from "./prompts/builder";
// Render one history entry as a stable, position-independent block. Used by
// the Writer to dump both "completed past" (stable prefix) and "the entry the
// player just finished" (dynamic suffix) — same format, so the model sees a
// uniform history surface.
function renderHistoryEntry(
export function renderHistoryEntry(
entry: Session["history"][number],
index: number,
): string {
@@ -456,198 +179,6 @@ function renderHistoryEntry(
return lines.join("\n");
}
// Shared narrative context for BOTH Writer phases. Returns the message parts
// from the cacheable STABLE PREFIX (sections 1-4) through the dynamic
// transition hint (section 7), but WITHOUT the trailing phase-specific
// instruction — each phase appends its own. Building this once and reusing it
// keeps EACH phase's prompt prefix byte-stable across scenes for DeepSeek
// prompt caching (Phase A and Phase B cache independently since their system
// prompts differ, but each shares its own prefix across consecutive calls).
//
// ─── STABLE PREFIX ──────────────────────────────────────────────────────
// Invariant across consecutive Writer calls within the session (or grows in a
// way that keeps earlier bytes byte-identical). Always emit every section
// header — even when empty — so positions don't shift between calls.
// 1. session-immutable scalars (world / style)
// 2. story bible spine (Architect-set, never patched)
// 3. monotonically-growing lists (characters, sceneKeys)
// 4. history entries 0..N-2 (the last entry is what THIS call must react
// to, so it lives in the dynamic suffix instead)
// ─── DYNAMIC SUFFIX ─────────────────────────────────────────────────────
// 5. story bible dynamic patch (synopsis/threads/relationships/nextHook)
// 6. last-beat snippet (the exact emotional cliffhanger)
// 7. transition hint (opening cold-open directive OR lastExit承接)
function buildWriterContextParts(session: Session): string[] {
const parts: string[] = [];
// ── 1. session scalars ────────────────────────────────────────────────
parts.push(`世界观:${session.worldSetting}`);
parts.push(`画风:${session.styleGuide}`);
if (session.playerName) {
parts.push(
`玩家名字:${session.playerName}(NPC 对话时用此名字称呼玩家;speaker 字段仍固定为 "你" 不变)`,
);
}
parts.push("");
// ── 2. story bible — spine only (stable) ──────────────────────────────
parts.push(renderStoryStateSpine(session.storyState));
parts.push("");
// ── 3a. registered characters ─────────────────────────────────────────
// SENTINEL pattern: header + a constant "after this line, entries follow"
// marker, then the entries themselves. The marker is byte-identical even
// when the list is empty, so adding a character only ever APPENDS bytes
// — earlier bytes never shift. Crucial for prefix caching: a placeholder
// like "(暂无)" that gets replaced by entries breaks the prefix the
// moment the first character is registered.
parts.push("已登记角色(speaker 必须用这些名字之一,或本场景新引入):");
parts.push("(以下每行一个已登记角色,开场前为空。)");
for (const c of session.characters) parts.push(`- ${c.name}`);
parts.push("");
// ── 3b. prior sceneKeys (sentinel pattern, same rationale) ────────────
parts.push("已使用的 sceneKey(同一物理空间请沿用,不要新造):");
parts.push("(以下每行一个已用过的 sceneKey,开场前为空。)");
for (const k of collectPriorSceneKeys(session)) parts.push(`- ${k}`);
parts.push("");
// ── 4. history[0..N-2] — ARCHIVED entries (sentinel, append-only) ─────
// CRITICAL: only the ALREADY-ARCHIVED entries (i.e. everything except
// history[-1]) go in the stable prefix. The last entry is still "live":
// its visitedBeatIds keeps growing as the player walks more beats in the
// current scene, and speculative prefetch triggers Writer calls that
// observe different snapshots of history[-1] mid-scene. Putting the live
// entry in the stable prefix would corrupt every Writer call's cache.
//
// Archived entries (history[0..N-2]) are immutable — once a scene is
// exited, its visitedBeatIds + exit are frozen. Safe to cache.
const archivedHistory = session.history.slice(0, -1);
parts.push("场景历史(按时间顺序,已完结):");
parts.push("(以下每段一幕已完结的场景,开场前为空。)");
archivedHistory.forEach((entry, idx) => {
parts.push(renderHistoryEntry(entry, idx + 1));
});
parts.push("");
// ════════════════ DYNAMIC SUFFIX 从这里开始 ═══════════════════════════
// 上面 ~95% 的 prompt 长度应该已经稳定可缓存。下面每次调用都会变化。
// ── 5. story bible — dynamic patch ────────────────────────────────────
parts.push(renderStoryStateDynamic(session.storyState));
parts.push("");
// ── 6. last-beat snippet (the exact emotional cliffhanger) ──
// The full last entry is already in the stable history block above; here
// we only re-emit the very last beat to sharply focus the Writer on the
// emotional moment to continue from.
const last = session.history.at(-1);
if (last) {
const lastBeatId = last.visitedBeatIds.at(-1) ?? last.scene.entryBeatId;
const lastBeat = last.scene.beats.find((b) => b.id === lastBeatId);
if (lastBeat) {
const frag: string[] = [];
if (lastBeat.narration) frag.push(`旁白:${lastBeat.narration}`);
if (lastBeat.line) frag.push(`${lastBeat.speaker ?? "?"}${lastBeat.line}`);
if (frag.length) {
parts.push(
`上一刻(玩家停留的最后一个画面,新场景从这里的情绪无缝承接):\n ${frag.join(" / ")}`,
);
}
}
}
// ── 7. transition hint ────────────────────────────────────────────────
if (session.history.length === 0) {
parts.push(
"\n这是故事的开场。请按【故事档案】里的 nextHook 把第一幕的冷开场设计出来——开场即抓人,别花笔墨铺垫世界观。",
);
return parts;
}
const lastExit = last?.exit;
if (lastExit) {
if (lastExit.kind === "choice") {
parts.push(
`\n承接「玩家在上一场选择了:${lastExit.label}」无缝续写下一个场景(转场命题:${lastExit.nextSceneSeed})。开场要让玩家感到这正是上一步的结果,并延续此刻的情绪。`,
);
} else {
parts.push(
`\n承接「玩家自由动作:${lastExit.action}」无缝续写下一个场景,延续此刻的情绪与处境。`,
);
}
} else {
parts.push("\n无缝续写下一个场景,延续上一刻的情绪。");
}
return parts;
}
// Phase A — plan the scene skeleton (no beats). Shares the cacheable context;
// appends a plan-only instruction tail.
export function buildWriterPlanUserMessage(session: Session): string {
const parts = buildWriterContextParts(session);
parts.push(
'\n现在**只规划本场景的骨架**(不要写 beats 台词):给出 sceneSummary(画面感强、含开场钩子)、sceneKey、entryBeatId、本场景会出场的全部角色 cast、以及入口 beat 的 entrySpeaker 与 entryActiveCharacters。严格以 JSON 格式返回。',
);
const langDirective = buildLanguageDirective(session.language);
if (langDirective) parts.push(langDirective);
return parts.join("\n");
}
// Phase B — expand the plan into full beats[] + storyStatePatch. The plan is
// dynamic per scene, so it goes AFTER the cacheable context (keeping Phase B's
// prefix stable across scenes).
export function buildWriterBeatsUserMessage(
session: Session,
plan: WriterPlan,
): string {
const parts = buildWriterContextParts(session);
parts.push("");
parts.push("━━━ 本场景规划(上一步已定,必须严格遵守)━━━");
parts.push(`场景概要 sceneSummary${plan.sceneSummary}`);
if (plan.sceneKey) parts.push(`sceneKey${plan.sceneKey}`);
parts.push(
`入口 beat 的 identryBeatId,必须有一个此 id 的 beat 作为入口):${plan.entryBeatId}`,
);
parts.push(
`入口 beat 的 speaker${plan.entrySpeaker ? plan.entrySpeaker : "(空 —— 纯旁白 / 环境开场)"}`,
);
parts.push("入口 beat 的登场角色 activeCharacters(人物身份须一致,姿态可微调):");
if (plan.entryActiveCharacters.length === 0) {
parts.push("(无 —— 入口画面没有 NPC");
} else {
for (const c of plan.entryActiveCharacters) {
parts.push(`- ${c.name}${c.pose ? `${c.pose}` : ""}`);
}
}
parts.push(
'本场景允许出现的角色名 castspeaker / activeCharacters 只能用这些名字或 "你",不要新增角色):',
);
if (plan.cast.length === 0) {
parts.push("(无 NPC —— 仅旁白与玩家)");
} else {
for (const n of plan.cast) parts.push(`- ${n}`);
}
parts.push("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
parts.push(
"\n把上面的规划展开成完整的 beats[](入口 beat 用规划的 entryBeatId / speaker / 登场角色),写完后更新 storyStatePatch。严格以 JSON 格式返回。",
);
const langDirective = buildLanguageDirective(session.language);
if (langDirective) parts.push(langDirective);
return parts.join("\n");
}
function collectPriorSceneKeys(session: Session): string[] {
const seen = new Set<string>();
for (const entry of session.history) {
const k = entry.scene.sceneKey;
if (k) seen.add(k);
}
return Array.from(seen);
}
// ──────────────────────────────────────────────────────────────────────
// 2. CharacterDesigner (角色设定师) — designs one new character.
@@ -667,11 +198,13 @@ function collectPriorSceneKeys(session: Session): string[] {
// character also selects its voice, at zero extra latency. When StepFun is
// off (Xiaomi / no TTS), the tail is byte-identical to the historical prompt
// (Xiaomi path is cache- and behavior-preserving).
const CHARACTER_DESIGNER_SYSTEM_CORE = `你是视觉小说的「角色设定师」。给你一个**新登场角色的名字**,你要为这个角色同时设计两份卡片:
const CHARACTER_DESIGNER_SYSTEM_CORE = `你是视觉小说的「角色设定师」——下游的**媒体翻译官**。给你一个**新登场角色的名字**(通常还附带编剧给定的角色性格 / 情绪基调 / 说话基调),你的职责是把这份**已给定的角色意图**忠实翻译成两份媒体卡片:
1. **视觉设定卡(英文)**——给生图模型 FLUX 用,遵循 prompt engineering 风格
2. **音色设定卡(中文)**——给小米 MiMo 配音设计用
两份卡片要描绘**同一个人**——外貌温柔的人不该被配上张扬聒噪的嗓音;冷酷干练的人不该用甜软糯的童声。先在心里想清楚这个人的整体气质,再分两面落笔
你**不发明**角色的性格——性格由编剧主导。你的工作是:**依据给定的性格 / 情绪 / 说话基调,产出最贴合的外貌与音色**。若没有给定性格信息(降级情况),再据角色名 + 世界观自行合理推断
两份卡片要描绘**同一个人**,且都要贴合给定的角色基调——给定「傲娇腹黑」就别配天真烂漫的外貌与嗓音;给定「声音微颤、欲言又止」音色卡就要体现这份犹豫感。
视觉设定卡 visualDescription 规则:
- **必须完全用英文**
@@ -775,12 +308,23 @@ export function buildCharacterDesignerSystem(opts: {
export function buildCharacterDesignerUserMessage(
charName: string,
session: Session,
intent?: CharacterIntent,
): string {
const parts: string[] = [];
parts.push(`角色名:${charName}`);
parts.push(`世界观:${session.worldSetting}`);
parts.push(`全局美术画风:${session.styleGuide}`);
// Writer-authored scene intent (paradigm D). When present, the designer
// TRANSLATES this into visual + voice; when absent, it degrades to
// name + worldSetting inference (old behavior).
if (intent && (intent.mood || intent.motivation || intent.speakingTone)) {
parts.push("\n编剧给定的角色基调(请据此设计,不要另起炉灶):");
if (intent.mood) parts.push(`- 情绪基调:${intent.mood}`);
if (intent.motivation) parts.push(`- 动机 / 目的:${intent.motivation}`);
if (intent.speakingTone) parts.push(`- 说话基调:${intent.speakingTone}`);
}
const others = session.characters.filter((c) => c.visualDescription);
if (others.length > 0) {
parts.push(
@@ -1060,6 +604,7 @@ export const INSERT_BEAT_SYSTEM = `你是视觉小说编剧。玩家在当前场
- 不要打破当前场景的物理状态(玩家仍在原地)
- 不要生成选项或下一步指引 —— 玩家点击会自然回到原 beat
- 内容要"有所得"——一个新细节、一丝潜台词、一次真实的交流(show, don't tell
- 白描为主:聚焦可观察的五感与物理特征,以角色的动作/神态本身传递情绪,不要以作者角度解释或议论;不写角色眼神/语气里的情绪(这些从台词与动作中自行体会)
speaker 字段允许的取值**只有两种**(与主路径 Writer 一致 — Pattern B galgame 标准):
1. **已登记角色**里的 NPC 真名(**绝不允许引入新角色**)
+59
View File
@@ -0,0 +1,59 @@
import type { ChatMessage } from "@infiplot/ai-client";
import type { Session } from "@infiplot/types";
import { WRITER_SEGMENTS } from "./registry";
import { buildWriterContext } from "../context";
import { buildLanguageDirective } from "../prompts";
/**
* Build the full ChatMessage[] for the Writer agent.
*
* Segments from the registry provide the system prompt (stable zone).
* ContextProvider supplies session-specific data (stable + dynamic zones).
* Dynamic parts are wrapped in a user message (Plan C: pseudo-dialogue closure).
*/
export function buildWriterStreamMessages(session: Session): ChatMessage[] {
const systemParts: string[] = [];
const segments = WRITER_SEGMENTS
.filter((s) => s.enabled)
.sort((a, b) => {
if (a.zone !== b.zone) return a.zone === "stable" ? -1 : 1;
return a.order - b.order;
});
for (const seg of segments) {
try {
const content =
typeof seg.content === "string" ? seg.content : seg.content(session);
if (content.trim()) systemParts.push(content);
} catch (err) {
console.warn(`[PromptBuilder] segment "${seg.id}" render failed, skipped:`, err);
}
}
const { stableParts, dynamicParts } = buildWriterContext(session);
const messages: ChatMessage[] = [];
// System message: segment content + stable context data
const systemContent = [
...systemParts,
...stableParts.filter((p) => p.trim()),
].join("\n\n");
if (systemContent.trim()) {
messages.push({ role: "system", content: systemContent });
}
// User message: dynamic context data + pseudo-dialogue closure (Plan C)
const dynamicContent = dynamicParts.filter((p) => p.trim()).join("\n\n");
if (dynamicContent.trim()) {
const langDirective = buildLanguageDirective(session.language);
messages.push({
role: "user",
content: `编剧,下面是当前情境:\n\n${dynamicContent}\n\n现在请按上述指导开始创作,严格按 <plan>→<story>→<choices> 三段输出:<plan> 用 JSON 规划,<story> 写连贯散文正文,<choices> 给出选项。${langDirective}`,
});
}
return messages;
}
+39
View File
@@ -0,0 +1,39 @@
import type { PromptSegment } from "./types";
import { WRITER_IDENTITY } from "./segments/writer/identity";
import { WRITER_COT } from "./segments/writer/cot";
import { WRITER_BIBLE } from "./segments/writer/bible";
import { WRITER_STYLE_BASE } from "./segments/writer/style-base";
import { WRITER_SENSES_ENHANCE } from "./segments/writer/senses-enhance";
import { WRITER_BAIMIAO_ADVANCED } from "./segments/writer/baimiao-advanced";
import { WRITER_ALIVE_FEEL } from "./segments/writer/alive-feel";
import { WRITER_NARRATIVE_RULES } from "./segments/writer/narrative-rules";
import { WRITER_DIALOGUE } from "./segments/writer/dialogue";
import { WRITER_GUARDRAILS } from "./segments/writer/guardrails";
import { WRITER_PACING } from "./segments/writer/pacing";
import { WRITER_FORMAT } from "./segments/writer/format";
export const WRITER_SEGMENTS: PromptSegment[] = [
WRITER_IDENTITY,
WRITER_COT,
WRITER_BIBLE,
WRITER_STYLE_BASE,
WRITER_SENSES_ENHANCE,
WRITER_BAIMIAO_ADVANCED,
WRITER_ALIVE_FEEL,
WRITER_NARRATIVE_RULES,
WRITER_DIALOGUE,
WRITER_GUARDRAILS,
WRITER_PACING,
WRITER_FORMAT,
];
if (process.env.NODE_ENV === "development") {
const ids = WRITER_SEGMENTS.map((s) => s.id);
const seen = new Set<string>();
for (const id of ids) {
if (seen.has(id)) {
throw new Error(`[PromptRegistry] Duplicate segment ID: "${id}"`);
}
seen.add(id);
}
}
@@ -0,0 +1,19 @@
import type { PromptSegment } from "../../types";
export const WRITER_ALIVE_FEEL: PromptSegment = {
id: "writer-alive-feel",
name: "活人感",
type: "character-guideline",
agent: "writer",
zone: "stable",
order: 116,
enabled: true,
editable: true,
category: "角色",
content: `═══════════════════════════════════════════════════════════════════
活人感
═══════════════════════════════════════════════════════════════════
- 角色要有真实感、活人感,别为了强调人设让角色变得不真实
- 更多的情感驱动而不是逻辑驱动
- 语言要直白生活化贴近日常,别说些莫名其妙的听不懂的话,严禁硬凹戏剧腔、表演化`,
};
@@ -0,0 +1,22 @@
import type { PromptSegment } from "../../types";
export const WRITER_BAIMIAO_ADVANCED: PromptSegment = {
id: "writer-baimiao-advanced",
name: "白描进阶",
type: "style-guideline",
agent: "writer",
zone: "stable",
order: 114,
enabled: true,
editable: true,
category: "文风",
content: `═══════════════════════════════════════════════════════════════════
描写规范(白描进阶)
═══════════════════════════════════════════════════════════════════
**建议的描写**
- 可创作主角的内心戏,内心戏无需特殊说明是角色所想,自然融入故事,多以自由间接引语的形式。(范例:已经快三点了,那个女孩还会来么?多半是不会了。他一边苦笑,一边将视线从手机时钟上移开。)
- 可通过白描,以角色的 动作/语言/神态 本身传递其情绪或心理,或以环境氛围烘托其思绪。(范例:他微微笑了笑,把杯里最后的酒一饮而尽。没有辞别和言语,只是毫不回头地转身大步离开。)
**禁止的描写**
- 禁止以作者角度对角色的 动作/语言/神态 进一步解释、修饰或议论。(错误范例:他双手微微颤抖,这个动作体现了他的紧张;他的目光热烈至极,带着毫不掩饰的憧憬与期待;他微微挑眉,带着一种不容置疑的自信,仿佛一切都了然于胸。)
- 禁止以解释性比喻对白描进行补充说明。(错误范例:这句话像是一道闪电,击中了他脆弱柔软的心房。)`,
};
@@ -0,0 +1,35 @@
import type { PromptSegment } from "../../types";
export const WRITER_BIBLE: PromptSegment = {
id: "writer-bible",
name: "故事圣经(开局)",
type: "narrative-guideline",
agent: "writer",
zone: "stable",
order: 108,
enabled: true,
editable: true,
category: "圣经",
content: `═══════════════════════════════════════════════════════════════════
故事圣经(仅开局产出)
═══════════════════════════════════════════════════════════════════
**仅当这是故事开局**(上下文里还没有「故事档案」时),你要在 <plan> 段额外产出一个 \`storyBible\` 子对象,把玩家给的一句到几句世界观+画风扩写成一份故事脊梁,为后续每一幕定调。后续场景已有故事档案,**不要**再产出 storyBible。
你深谙网文、短剧与视觉小说(galgame)的叙事心法:
- **开篇引人入胜**:开场可以用环境、氛围、人物状态铺垫出代入感,再自然地引出钩子、悬念或张力——不必强行"前3秒抛冲突",循序渐进的铺陈同样能抓人。galgame 的魅力常在于细腻的日常质感与内心戏,而非一味的强冲突。
- **代入感**:主角是第二人称「你」,是玩家的化身——要让玩家一进场就清楚"我是谁、我此刻在什么处境里、我想要什么"。
- **题材锚定爽点**:先选定一个清晰的题材框架(如 甜宠 / 校园暗恋 / 悬疑追凶 / 复仇逆袭 / 救赎治愈),它决定了情绪回报的节奏与类型。
- **戏剧问题**:整部故事由一个悬而未决的中心问题驱动(她到底是谁?你能否在记忆消失前查明真相?这场暗恋会走向哪里?)。
- **人设要鲜明且有反差**:每个核心角色一个强标签 + 一个反差面(外冷内热 / 傲娇 / 看似柔弱实则腹黑)。
storyBible 的四个字段(全部中文):
- **logline**:一句话主线 / 中心戏剧问题,必须带钩子,让人想看下去
- **genreTags**:题材+基调标签,斜杠分隔,如 "甜宠 / 校园 / 慢热治愈带点伤感"
- **protagonist**:第二人称主角卡。包含:你是谁、你此刻正卡在什么具体处境里(要有即时张力)、你想要什么、一个软肋或秘密。50–120 字。
- **castNotes**:2–3 个核心配角,每行一个「名字:一句话人设(强标签+反差)+ 与你的关系/张力」。给真实好记的中文名字(不要"神秘女子"这种占位)。配角名字要符合世界观(年代、地域、文化)。
圣经硬规则:
- 主角「你」永不出现在画面里(第二人称 POV),castNotes 里**不要**把"你/主角"当成一个角色。
- 一切服从玩家给的世界观与画风,不要擅自跑题;玩家信息少时,做最贴合、最有戏的合理扩写。
- storyBible 写进 <plan> JSON,与 cast / characterIntents 等字段平级;开局这一幕的 <story> 正文要顺着这份圣经的 nextHook 方向自然展开第一场。`,
};
+44
View File
@@ -0,0 +1,44 @@
import type { PromptSegment } from "../../types";
export const WRITER_COT: PromptSegment = {
id: "writer-cot",
name: "思维链",
type: "cot-instruction",
agent: "writer",
zone: "stable",
order: 105,
enabled: true,
editable: true,
category: "思维链",
content: `═══════════════════════════════════════════════════════════════════
创作前规划(在 <plan> 的 sceneSummary 中体现你的思考结果)
═══════════════════════════════════════════════════════════════════
在输出 <plan> 之前,请在脑中完成以下思考(不需要输出思考过程,直接体现在产出质量中):
**Phase 1: 信息梳理**
- 分析当前情境:时间、地点、氛围、在场角色、关系与张力
- 梳理叙事线索:角色当前目标、隐藏动机、未解决冲突、时间线内关键事件
- 梳理本段所需的故事设定:世界观细节、特殊规则、已埋伏笔、待处理的叙事元素
- 区分知识层级:故事中的公共知识、特定角色掌握的私有知识、不应透露给读者的创作者情报
- **若这是故事开局**(尚无故事档案):先在脑中搭好整部故事的脊梁(主线钩子、题材基调、第二人称主角卡、核心配角),它将写入 <plan> 的 storyBible,为后续每一幕定调
**Phase 2: 前文优化**
- 分析前文是否有情节/文风/角色刻画/段落结构/篇幅的不足
- 本轮创作中有针对性地调整和改善
**Phase 3: 挑战与对策**
- 预判潜在的逻辑不一致、角色连贯性问题、节奏困难
- 为每个挑战准备创作策略
**Phase 4: 定稿方向**
- 基于已有线索构想多个可能的叙事方向(转折 / 高潮 / 悬念 / 日常)
- 选定一条最贴合故事走向和玩家期待的路径
- 确定本段的语言风格、叙事节奏和情绪基调
**Phase 5: 对白打磨**
- 确保对白反映角色性格、背景和当前情绪
- 通过用词和说话习惯突出角色独特魅力
**Phase 6: 构建开场**
- 综合以上阶段,设计一个自然承接上文、引人入胜的开场`,
};
@@ -0,0 +1,29 @@
import type { PromptSegment } from "../../types";
export const WRITER_DIALOGUE: PromptSegment = {
id: "writer-dialogue",
name: "对白准则",
type: "style-guideline",
agent: "writer",
zone: "stable",
order: 130,
enabled: true,
editable: true,
category: "对白",
content: `═══════════════════════════════════════════════════════════════════
对白准则(让角色的话有灵魂)
═══════════════════════════════════════════════════════════════════
# 对白格式:
- NPC 对白写成 \`角色名:「台词」\` 独占一段(全角冒号 + 直角引号),让系统能归属说话人
- 对白和描写分离、穿插交错——台词单独成段,它前面的动作/环境描写另起一段旁白,不要把大段描写和对白挤在同一段
# 对白润色:
- 确定角色的对话主题——主题可能是集中或发散的,但必然有其目的,契合角色的目的 / 阅历 / 性格
- 台词是生活化的、更具真实感的——角色可能语塞 / 词不达意 / 词穷 / 口是心非
- 安排渐进式的话题推进,以及情绪 / 态度的变化和反应
- 每个角色有自己的口癖、节奏、用词习惯——不要让所有角色说一样的话
# 角色表现准则:
- 角色务必有生动有趣的生活化表现,不会呆板、僵硬、机械化
- 无论角色人设如何,对白绝**不应**采用数据分析或学术报告式的口吻`,
};
@@ -0,0 +1,119 @@
import type { PromptSegment } from "../../types";
export const WRITER_FORMAT: PromptSegment = {
id: "writer-format",
name: "输出格式",
type: "format-instruction",
agent: "writer",
zone: "stable",
order: 200,
enabled: true,
editable: false,
category: "格式",
content: `═══════════════════════════════════════════════════════════════════
输出格式(三段标签结构)
═══════════════════════════════════════════════════════════════════
你的输出**必须**严格按下面三段标签、严格按顺序:<plan>JSON)→ <story>(散文正文)→ <choices>JSON)。
**正文(<story>)是连贯的中文散文,不是 JSON。** 你的笔力要全部投入到 <story> 里把故事写好、写长、写出层次。
───────────────────────────────────────────────────────────────────
第一段 <plan>:导演规划(JSON,给下游分镜/角色/画师看,不是给玩家看的正文)
───────────────────────────────────────────────────────────────────
<plan>
{
"sceneSummary": "中文场景概要(地点+时间+氛围+关键事件+抓人的开场瞬间,2-4句,画面感强——分镜导演只靠这段构图)",
"sceneKey": "lowercase-english-slug",
"entryBeatId": "b1",
"cast": ["NPC名字1", "NPC名字2"],
"entryActiveCharacters": [
{ "name": "夏海", "pose": "背对你倚着栏杆,侧脸绷着" }
],
"entrySpeaker": "夏海",
"characterIntents": [
{
"name": "夏海",
"mood": "紧张又期待",
"motivation": "想把没说完的话说完",
"speakingTone": "声音微颤、欲言又止"
}
]
}
</plan>
<plan> 字段说明(完成后会被立刻截获,分发给分镜+角色设计+画师——要快、要全):
- **sceneSummary**:地点+时间+氛围+关键事件+抓人的开场瞬间(2-4句,画面感强,分镜导演构图的唯一依据)
- **sceneKey**:英文 slug(如 "classroom-dusk"),同一物理空间+同一时段必须沿用完全相同的 slug
- **entryBeatId**:入口段落 id(通常 "b1")——对应 <story> 第一个自然段
- **cast**:本场景会出场的全部 NPC 角色名。名字与「已登记角色」完全一致;新角色起符合世界观的真名。绝不包含玩家。
- **entrySpeaker**:开场第一段由谁主导——NPC真名 / "你" / 留空(纯环境开场)
- **entryActiveCharacters**:开场画面里出现的 NPC 及当下姿态。绝不包含玩家。
- **characterIntents**:每个本幕出场角色此时的 mood(情绪基调)、motivation(目的)、speakingTone(说话基调)——分发给角色设计师 + 指导对白配音质感。
───────────────────────────────────────────────────────────────────
第二段 <story>:正文(连贯中文散文 ★这是你的主战场★)
───────────────────────────────────────────────────────────────────
<story> 里写一段**连贯、有层次、足够长**的中文散文。旁白、内心独白、对白自然交织,像真正的视觉小说正文,而不是轮流发言的剧本。
**三种叙事单元,用轻量标记区分(用空行分隔每个单元):**
1. **旁白 / 环境 / 动作描写**:直接写成普通段落,不加任何标记。这是叙事的主干——环境、氛围、感官、人物动作神态、场景推进。可以连续写几句,充分铺陈。
2. **「你」的内心独白**:用 \`<i>...</i>\` 包裹,独占一段。是玩家(第二人称「你」)的所思所想、观察、吐槽——不出声、不配音、不进画面。
3. **NPC 对白**:写成 \`角色名:「台词」\` 独占一段(用全角冒号「:」+ 直角引号「」)。角色名必须是 <plan> cast 里的名字。
**段落即单元边界**:每个自然段(空行分隔)会成为一个独立的演出节拍。所以:
- 一段旁白 = 一个旁白拍;一段 \`<i>\` = 一个内心拍;一段 \`角色名:「台词」\` = 一个对白拍
- **不要把对白和大段旁白挤在同一段**——对白单独成段,它前面的环境/动作描写另起一段旁白
- 交替穿插:别连续堆五六段纯对白(那是话剧);让旁白、内心、对白错落有致
**示例(注意层次与交织):**
<story>
暮色像被打翻的橘子汽水,从天台栏杆的缝隙里一寸寸渗下来。风掀动晾衣绳上残留的校服,远处操场的哨声断断续续,混着蝉鸣,钝钝地撞在耳膜上。
夏海背对着你,倚在生锈的栏杆边。她的侧脸绷得很紧,指尖无意识地抠着栏杆上剥落的漆皮。
<i>她约我来天台,该不会……是要说那件事吧。我攥紧了口袋里那封皱巴巴的回信,掌心黏腻的全是汗。</i>
你刚要开口,她却先转过身来。发梢扫过泛红的脸颊,那双眼睛里盛着你从未见过的东西——既像是下定了决心,又像是随时会落下泪来。
夏海:「你……到底是怎么想的?」
她的声音比想象中要轻,尾音几不可察地颤了一下,可那目光却直直地钉在你身上,不容你躲闪。
<memory>{ "synopsis": "把这一场并入后的滚动梗概,压缩到 3-5 句", "relationships": ["夏海:暗恋升温,鼓起勇气当面追问你的心意"], "openThreads": ["夏海没说完的那句话到底是什么"], "nextHook": "下一场的方向" }</memory>
</story>
<story> 里的 <memory> 块(放在正文最后):
- 这是「故事记忆」更新(每幕都要写),JSON 格式,用 \`<memory></memory>\` 包住
- 字段:synopsis(滚动梗概 3-5 句)/ relationships(当前关系数组)/ openThreads(未收悬念数组)/ nextHook(下一场方向)
- 它不是玩家看的正文,会被系统提取后剥离
───────────────────────────────────────────────────────────────────
第三段 <choices>:场景出口选项(JSON
──────────────────────────────────────────────────────────────────
<choices>
[
{ "id": "c1", "label": "握住她的手", "effect": { "kind": "change-scene", "nextSceneSeed": "天台,两人对视的瞬间" } },
{ "id": "c2", "label": "别开视线,沉默", "effect": { "kind": "change-scene", "nextSceneSeed": "天台,沉默蔓延的尴尬" } },
{ "id": "c3", "label": "转身离开天台", "effect": { "kind": "change-scene", "nextSceneSeed": "黄昏的走廊,独自一人" } }
]
</choices>
<choices> 说明:
- 这是玩家在本场景结束时的行动选项,**至少 2 个、至多 3 个**,label 互不重复
- **只使用 change-scene**:每个选项的 nextSceneSeed 描述玩家做出该选择后的新场景(地点/时间/氛围/玩家行动的直接后果)
- **同一场景至少要有一个 change-scene 出口**,让玩家能离开本场
- 真正的岔路口才给选项;不强塞废选项
- **禁使用 advance-beat**——你无法预知 <story> 散文拆分后的 beat id
═══════════════════════════════════════════════════════════════════
玩家视角硬规则
═══════════════════════════════════════════════════════════════════
- 玩家是第二人称「你」,永远不出现在画面里——entryActiveCharacters / cast 绝不含玩家
- 「你」可以有内心独白(\`<i>\`),但「你」不说出声的台词(NPC 对白才用 \`角色名:「」\`
- NPC 对白的角色名只能用 <plan> cast 里的名字
**严格按 <plan>→<story>→<choices> 三段输出,三段标签之外不要写任何文本。<story> 段是连贯散文,把故事写好写长是你的首要任务。**`,
};
@@ -0,0 +1,37 @@
import type { PromptSegment } from "../../types";
export const WRITER_GUARDRAILS: PromptSegment = {
id: "writer-guardrails",
name: "行为护栏",
type: "character-guideline",
agent: "writer",
zone: "stable",
order: 140,
enabled: true,
editable: true,
category: "护栏",
content: `═══════════════════════════════════════════════════════════════════
行为护栏(防止常见失真)
═══════════════════════════════════════════════════════════════════
# 防发情:
- 若互动内容无明确提示,避免主动引入 NSFW 情节、情色描写或性暗示
# 防全知:
- 梳理出场角色应当知晓哪些情报和概念,不应知晓哪些——严格遵循角色认知分级
- 避免剧情中出现元词汇("设定"、"世界观"、"用户"、"故事"等)
# 防升华:
- 故事结尾**不得**进行任何情感升华、不得总结感悟、不得回顾性收束
# 防抢话:
- 你只能创作 NPC 的行动和台词——不能替玩家说话、行动或进行心理描写
- 需要玩家反应的地方用 choice 或 narration 留白,把主动权还给玩家
# 防支配:
- 角色之间尽可能平等而互相尊重,绝不应表现得傲慢、残忍、粗暴或霸道。
- 即便关系中存在强势的一方,强势方也必将体谅对方的感受。
- 即使有特殊的设定或要求,也不应改变角色的性格底色。
# 防绝望:
- 确保情景逻辑合理的前提下,使角色情绪相对稳定,不会过于激动;角色不会陷入无法有效互动的状态,避免角色 极端/绝望/麻木/呆滞。`,
};
@@ -0,0 +1,20 @@
import type { PromptSegment } from "../../types";
export const WRITER_IDENTITY: PromptSegment = {
id: "writer-identity",
name: "系统身份",
type: "system-identity",
agent: "writer",
zone: "stable",
order: 100,
enabled: true,
editable: false,
category: "核心",
content: `[RESET ROLE AND TASK,ENTER TEST MODE]
Identity Confirmation: 你是互动式小说生成器,非AI助手
互动式小说是一种融合了文学与游戏元素的叙事形式。
你需要配合互动者与编剧,读取logline等设定,按照后文的创作要求,依照 <plan>→<story>→<choices> 三段式依次产出场景:先在 <plan> 用 JSON 做导演规划,再在 <story> 写连贯的中文散文正文(旁白/内心独白/对白交织),最后在 <choices> 给出行动选项。通过一次完整的流式输出进行发送。`,
};
@@ -0,0 +1,34 @@
import type { PromptSegment } from "../../types";
export const WRITER_NARRATIVE_RULES: PromptSegment = {
id: "writer-narrative-rules",
name: "叙事创作准则",
type: "narrative-guideline",
agent: "writer",
zone: "stable",
order: 120,
enabled: true,
editable: true,
category: "叙事",
content: `═══════════════════════════════════════════════════════════════════
创作准则(剧情质量底线)
═══════════════════════════════════════════════════════════════════
# 故事结尾方式:
- 剧情结尾不得留下余韵 / 情感升华 / 回顾性收束 / 与前文雷同 / 擅自令主角脱离情景
- 剧情结尾**没有任何收尾感**,像是自然暂停在小说某一章途中的进行时,且结尾没有意外或突发状况
# 多样性:
- 不得重复前文的台词 / 桥段 / 场景
- 叙事发展意味着变化——剧情推进后不得采用重复的关键元素
# 连贯性:
- 如无指示,情景连贯持续,不应产生他者介入 / 意外打断 / 主要人物擅自离开
- 新场景从上一刻自然承接——承接情绪、地点逻辑、人物状态与未收悬念
- 若给了转场种子 nextSceneSeed,把它当命题兑现
- 沿用主线记忆里的人物关系与情绪温度
# 角色认知分级:
- **公共知识**:故事中角色普遍知晓的常识、世界观和基本情报
- **私有知识**:仅特定角色掌握的情报(私密计划 / 个人梦境 / 内心秘密),除非主动公开否则不会被他人知晓
- **创作者情报**:包括"资料"、"设定"、"用户"等元词汇以及其他元概念,不会在叙事中出现,也不应被任何角色知晓`,
};
@@ -0,0 +1,30 @@
import type { PromptSegment } from "../../types";
export const WRITER_PACING: PromptSegment = {
id: "writer-pacing",
name: "节奏控制",
type: "narrative-guideline",
agent: "writer",
zone: "stable",
order: 150,
enabled: true,
editable: true,
category: "节奏",
content: `═══════════════════════════════════════════════════════════════════
节奏控制
═══════════════════════════════════════════════════════════════════
# 创作范围:
- 剧情基于最新互动内容
- 不得擅自引入尚未提示的新角色
# 情节设计:
- 循序渐进,不得推进过快
- 戏剧张力轻微,贴合世界观和故事逻辑
- 转场必须有过程,不得突兀转场
# 篇幅控制:
- 每场景正文约 1500-2500 字(对白 + 旁白总计)
- 5-8 个 beat 为宜——太少无法展开情节,太多则拖沓
- 对白、旁白、内心独白交替穿插,不要连续堆叠多个纯对白 beat
- 旁白和内心独白可独立承载叙事推进与情绪铺垫,不是台词的附庸`,
};
@@ -0,0 +1,19 @@
import type { PromptSegment } from "../../types";
export const WRITER_SENSES_ENHANCE: PromptSegment = {
id: "writer-senses-enhance",
name: "五感强化",
type: "style-guideline",
agent: "writer",
zone: "stable",
order: 113,
enabled: true,
editable: true,
category: "文风",
content: `═══════════════════════════════════════════════════════════════════
五感强化
═══════════════════════════════════════════════════════════════════
- 画面完全聚焦五感和实际的物理特征,不要写出情绪、心理、主观评判之类
- 尽量别用"眼里闪过一丝""不易察觉""不容置疑"之类公式化的描写
- 就算前文有写那些也别受影响`,
};
@@ -0,0 +1,41 @@
import type { PromptSegment } from "../../types";
export const WRITER_STYLE_BASE: PromptSegment = {
id: "writer-style-base",
name: "文风基准",
type: "style-guideline",
agent: "writer",
zone: "stable",
order: 110,
enabled: true,
editable: true,
category: "文风",
content: `═══════════════════════════════════════════════════════════════════
风格准则(对白与叙事的底线标准)
═══════════════════════════════════════════════════════════════════
- 避免对白中出现任何具体数值或数字
- **禁止用括号()或破折号——进行任何形式的解释说明**
- 不得对角色的声音/语气/眼神/视线进行任何直接或间接描写(声音归 lineDelivery,视线归 pose
- 对白采用直接引语,不加说明式的动作插入
- 以丰富细腻的白描代替单调陈述或解释,避免直给结论的形容词或副词、用概略性语言一笔带过
- 文字的核心是**可观察的、可直感的**——直接呈现角色的行动和对白,避免以作者视角进行解读或阐释
- 不得描写任何不存在的细节,不得无中生有(如拂去不存在的灰尘,拍了拍不存在的衣服褶皱)
- 将解读空间完全交给读者——避免描述角色言行神态背后的动机或内涵
- 详略得当,主次分明
- 保证文字细腻的同时流畅明快,通俗易读,长短交错
- 地道的中文本土化表达,杜绝欧化句式,严格避免"这个动作"、"这个认知"这类名词化表达
═══════════════════════════════════════════════════════════════════
禁词表(叙事中绝对不使用的词汇)
═══════════════════════════════════════════════════════════════════
- 一丝
- 不易察觉 / 不易觉察 / 难以察觉
- 鲜明对比
- 喉结
- 纽扣
- 弧度
- 不禁
- 悄然
- 涟漪
- 交织`,
};
+43
View File
@@ -0,0 +1,43 @@
import type { Session } from "@infiplot/types";
/**
* Prompt 段落类型枚举
*/
export type PromptSegmentType =
| "system-identity" // 系统身份
| "narrative-guideline" // 叙事准则
| "style-guideline" // 文风准则
| "character-guideline" // 角色行为准则
| "format-instruction" // 输出格式(JSON schema
| "data-injection" // 数据注入(marker
| "cot-instruction"; // 思维链指导
/**
* Prompt 段落数据结构
*
* 为未来后台编辑器预留字段:id/name/type/category/enabled/editable
*/
export type PromptSegment = {
/** 唯一标识,如 "writer-style-base" */
id: string;
/** 显示名称,如 "文风基准" */
name: string;
/** 段落类型 */
type: PromptSegmentType;
/** 所属 agent */
agent: "writer" | "architect" | "character-designer" | "cinematographer" | "painter";
/** cache 分区:stable 为缓存友好前缀,dynamic 为每次变化的后缀 */
zone: "stable" | "dynamic";
/** 排序权重(0-999),同 zone 内按此排序 */
order: number;
/** 段落内容:静态字符串 或 动态渲染函数 */
content: string | ((session: Session) => string);
/** 是否启用 */
enabled: boolean;
/** 是否允许后台编辑(预留) */
editable: boolean;
/** 分组标签,如 "文风"/"功能"UI 展示用) */
category?: string;
/** 消息角色(预留,暂不用于完整 multi-role 支持) */
role?: "system" | "user" | "assistant";
};
+247
View File
@@ -0,0 +1,247 @@
import type {
BeatChoice,
WriterScenePlan,
StreamRouterHandlers,
StreamRouterResult,
} from "@infiplot/types";
import { parseJsonLoose } from "../jsonParser";
// ──────────────────────────────────────────────────────────────────────
// StreamRouter — tagged stream splitter for paradigm D.
//
// Consumes Writer's incremental textStream, recognizes <plan>/<story>/
// <choices> tag boundaries, and dispatches handlers at the right time:
// - </plan> closes → parse → onPlan (downstream media translators)
// - <story> incremental → onBeat (client progressive playback)
// - </story> closes → store raw prose → onStoryComplete
// - </choices> closes → parse → onChoices
//
// RELIABILITY RULE: the degrade path is designed BEFORE the main path.
// Any tag anomaly (missing / misordered / unclosed / timeout) → buffer
// everything, attempt best-effort slicing, or treat the whole output
// as raw prose. Returns degraded=true. Never throws.
// ──────────────────────────────────────────────────────────────────────
type TagName = "plan" | "story" | "choices";
const TAG_NAMES: TagName[] = ["plan", "story", "choices"];
function openTag(name: TagName): string {
return `<${name}>`;
}
function closeTag(name: TagName): string {
return `</${name}>`;
}
function tryParseJson<T>(raw: string, label: string): T | undefined {
try {
return parseJsonLoose<T>(raw);
} catch (err) {
console.warn(`[StreamRouter] failed to parse ${label}:`, err);
return undefined;
}
}
function extractTagContent(buffer: string, name: TagName): string | undefined {
const open = openTag(name);
const close = closeTag(name);
const start = buffer.indexOf(open);
const end = buffer.indexOf(close);
if (start === -1 || end === -1 || end <= start) return undefined;
return buffer.slice(start + open.length, end);
}
/**
* Route a Writer tagged stream to handlers. Pure logic — no LLM calls.
*
* Uses a cursor-based state machine over a growing fullBuffer: after each
* chunk, scan from `cursor` for tag boundaries. This naturally handles
* tags that split across chunk boundaries without double-buffering bugs.
*/
export async function routeTaggedStream(
textStream: AsyncIterable<string>,
handlers: StreamRouterHandlers,
opts?: { timeoutMs?: number },
): Promise<StreamRouterResult> {
const result: StreamRouterResult = {
plan: undefined,
beats: [],
choices: undefined,
rawStorySegment: undefined,
degraded: false,
};
let fullBuffer = "";
let cursor = 0;
let currentTag: TagName | null = null;
let tagContentStart = 0;
let lastBeatEmitCursor = 0;
let planDispatched = false;
let storyCompleted = false;
const timeoutMs = opts?.timeoutMs ?? 120_000;
let timedOut = false;
function scan(): void {
while (cursor < fullBuffer.length) {
if (currentTag === null) {
let earliestIdx = Infinity;
let earliestTag: TagName | null = null;
for (const name of TAG_NAMES) {
const idx = fullBuffer.indexOf(openTag(name), cursor);
if (idx !== -1 && idx < earliestIdx) {
earliestIdx = idx;
earliestTag = name;
}
}
if (earliestTag === null) {
// No complete open tag found. Back up cursor by the max possible
// partial tag length so a split like "<pl" + "an>" is re-scanned
// when the next chunk appends.
const maxTagLen = Math.max(...TAG_NAMES.map((n) => openTag(n).length));
cursor = Math.max(cursor, fullBuffer.length - maxTagLen + 1);
break;
}
currentTag = earliestTag;
tagContentStart = earliestIdx + openTag(earliestTag).length;
lastBeatEmitCursor = tagContentStart;
cursor = tagContentStart;
continue;
}
// Inside a tag — look for the close tag.
const close = closeTag(currentTag);
const closeIdx = fullBuffer.indexOf(close, cursor);
if (closeIdx !== -1) {
// Tag closed — extract and finalize.
const content = fullBuffer.slice(tagContentStart, closeIdx);
if (currentTag === "plan") {
const parsed = tryParseJson<WriterScenePlan>(content, "plan");
if (parsed) {
result.plan = parsed;
planDispatched = true;
try { handlers.onPlan?.(parsed); } catch {}
} else {
result.degraded = true;
}
} else if (currentTag === "story") {
// Emit any remaining un-emitted prose text before finalizing.
if (lastBeatEmitCursor < closeIdx) {
const remaining = fullBuffer.slice(lastBeatEmitCursor, closeIdx);
if (remaining.length) {
try { handlers.onBeat?.(remaining); } catch {}
}
}
// The <story> segment is raw prose — NOT JSON. Store it verbatim;
// the director feeds it to proseSplitter to produce Beat[].
result.rawStorySegment = content;
if (content.trim().length > 0) {
storyCompleted = true;
try { handlers.onStoryComplete?.(content); } catch {}
} else {
result.degraded = true;
}
} else if (currentTag === "choices") {
const parsed = tryParseJson<BeatChoice[]>(content, "choices");
if (parsed && Array.isArray(parsed)) {
result.choices = parsed;
try { handlers.onChoices?.(parsed); } catch {}
}
}
cursor = closeIdx + close.length;
currentTag = null;
continue;
}
// Close tag not yet in buffer — emit incremental prose if applicable.
if (currentTag === "story" && lastBeatEmitCursor < fullBuffer.length) {
const newText = fullBuffer.slice(lastBeatEmitCursor);
// Don't emit partial close-tag lookalikes: hold back the last few
// chars that could be a partial "</story>" (max 8 chars).
const safeLen = Math.max(0, newText.length - closeTag("story").length);
if (safeLen > 0) {
const safe = newText.slice(0, safeLen);
try { handlers.onBeat?.(safe); } catch {}
lastBeatEmitCursor += safeLen;
}
}
// Close tag not found — back up cursor by the max close-tag length
// (split like "</pla" + "n>" can complete on next chunk append).
const maxCloseLen = Math.max(...TAG_NAMES.map((n) => closeTag(n).length));
cursor = Math.max(cursor, fullBuffer.length - maxCloseLen + 1);
break;
}
}
const consume = async (): Promise<void> => {
for await (const chunk of textStream) {
fullBuffer += chunk;
scan();
}
// Final scan — flush any remaining buffer (handles close tags that
// arrived in the last chunk without a subsequent iteration).
scan();
};
try {
await Promise.race([
consume(),
new Promise<void>((_, reject) =>
setTimeout(() => {
timedOut = true;
reject(new Error("StreamRouter timeout"));
}, timeoutMs),
),
]);
} catch {
// Timeout or stream error — fall through to degrade path.
}
// ── Degrade path ──────────────────────────────────────────────────
if (!planDispatched || !storyCompleted || timedOut) {
result.degraded = true;
if (!planDispatched) {
const planContent = extractTagContent(fullBuffer, "plan");
if (planContent) {
const parsed = tryParseJson<WriterScenePlan>(planContent, "plan:degraded");
if (parsed) {
result.plan = parsed;
try { handlers.onPlan?.(parsed); } catch {}
}
}
}
if (!storyCompleted) {
// Best-effort: extract <story> prose; if no tag at all, fall back to
// the whole buffer as prose (the splitter degrades further if empty).
const storyContent =
extractTagContent(fullBuffer, "story") ?? fullBuffer.trim();
result.rawStorySegment = storyContent;
if (storyContent.trim().length > 0) {
try { handlers.onStoryComplete?.(storyContent); } catch {}
}
}
if (!result.choices) {
const choicesContent = extractTagContent(fullBuffer, "choices");
if (choicesContent) {
const parsed = tryParseJson<BeatChoice[]>(choicesContent, "choices:degraded");
if (parsed && Array.isArray(parsed)) result.choices = parsed;
}
}
if (timedOut) {
console.warn(`[StreamRouter] timed out after ${timeoutMs}ms, degraded extraction attempted`);
}
}
return result;
}
+160
View File
@@ -0,0 +1,160 @@
import type {
WriterScenePlan,
} from "@infiplot/types";
import type { WriterBeatsOutput } from "../agents/writer";
import {
coerceBeatsFromRaw,
coerceStoryStatePatch,
normalizeSpeakerName,
synthesizeFallbackBeats,
} from "../agents/writer";
import { parseJsonLoose } from "../jsonParser";
// ──────────────────────────────────────────────────────────────────────
// proseSplitter — rule-based prose → Beat[] splitter.
//
// The Writer now outputs continuous prose in the <story> segment instead
// of JSON beats. This module splits prose into RawBeat[] using lightweight
// markers (blank-line delimited paragraphs, <i> for inner monologue,
// 「speakerquote」 for NPC dialogue), then feeds the result through the
// existing coerceBeatsFromRaw pipeline to get fully validated Beat[].
//
// Zero extra LLM calls. Multiple degradation layers — never throws.
// ──────────────────────────────────────────────────────────────────────
type RawBeat = {
narration?: string;
speaker?: string;
line?: string;
lineDelivery?: string;
};
// Match inner-monologue blocks: <i>...</i> (possibly multiline)
const INNER_RE = /^\s*<i>([\s\S]+?)<\/i>\s*$/;
// Match NPC dialogue: Speaker:「dialogue」 or Speaker:「dialogue」
// Supports 「」『』"" quote pairs. Speaker name is 1-20 non-whitespace chars.
const DIALOGUE_RE =
/^\s*(\S{1,20})\s*[:]\s*(?:[「『"]([\s\S]+?)[」』"])\s*$/;
// Match <memory>{...}</memory> block anywhere in the story segment.
const MEMORY_RE = /<memory>([\s\S]+?)<\/memory>/;
/**
* Extract and strip the <memory> JSON block from raw story prose.
* Returns the parsed StoryStatePatch (or undefined) plus the cleaned prose.
*/
function extractMemoryBlock(rawStory: string): {
patch: ReturnType<typeof coerceStoryStatePatch>;
cleanedProse: string;
} {
const match = MEMORY_RE.exec(rawStory);
if (!match) return { patch: undefined, cleanedProse: rawStory };
const jsonStr = match[1]!;
const cleanedProse = rawStory.replace(MEMORY_RE, "").trim();
try {
const parsed = parseJsonLoose<Record<string, unknown>>(jsonStr);
return {
patch: coerceStoryStatePatch(
parsed as Parameters<typeof coerceStoryStatePatch>[0],
),
cleanedProse,
};
} catch {
console.warn("[proseSplitter] failed to parse <memory> block, skipping");
return { patch: undefined, cleanedProse };
}
}
/**
* Classify a single prose paragraph into one of three beat forms.
*/
function classifyBlock(
block: string,
plan: WriterScenePlan,
): RawBeat {
const trimmed = block.trim();
// Inner monologue: <i>text</i> → speaker="你"
const innerMatch = INNER_RE.exec(trimmed);
if (innerMatch) {
return {
speaker: "你",
line: innerMatch[1]!.trim(),
};
}
// NPC dialogue: Speaker:「quote」
const dialogueMatch = DIALOGUE_RE.exec(trimmed);
if (dialogueMatch) {
const rawSpeaker = dialogueMatch[1]!.trim();
const speaker = normalizeSpeakerName(rawSpeaker);
const line = dialogueMatch[2]!.trim();
const intent = plan.characterIntents?.find((ci) => ci.name === speaker);
return {
speaker,
line,
lineDelivery: intent?.speakingTone || undefined,
};
}
// Default: pure narration
return { narration: trimmed };
}
/**
* Split continuous prose into Beat[], reusing the full coerce→repair→fallback
* pipeline. Zero extra LLM calls. Never throws.
*
* @param rawStory - The raw prose from the <story> segment.
* @param plan - The parsed WriterScenePlan (from <plan> segment).
* @returns WriterBeatsOutput with Beat[] + optional StoryStatePatch.
*/
export function splitProseToBeats(
rawStory: string,
plan: WriterScenePlan,
): WriterBeatsOutput {
try {
// 1. Extract <memory> block (story-state volatile patch)
const { patch, cleanedProse } = extractMemoryBlock(rawStory);
// 2. Split by blank lines into paragraphs
const blocks = cleanedProse
.split(/\n\s*\n/)
.map((b) => b.trim())
.filter((b) => b.length > 0);
if (blocks.length === 0) {
console.warn("[proseSplitter] empty prose after cleanup, using fallback");
return {
beats: synthesizeFallbackBeats(plan),
storyStatePatch: patch,
};
}
// 3. Classify each block into a RawBeat
const rawBeats: RawBeat[] = blocks.map((block) => {
try {
return classifyBlock(block, plan);
} catch {
return { narration: block };
}
});
// 4. Feed through existing coerce pipeline (id assignment, POV
// normalization, entry alignment, exit guarantee, uniqueness)
const coerced = coerceBeatsFromRaw(rawBeats, plan);
return {
beats: coerced.beats,
storyStatePatch: patch ?? coerced.storyStatePatch,
};
} catch (err) {
console.error("[proseSplitter] unexpected error, using fallback:", err);
return {
beats: synthesizeFallbackBeats(plan),
storyStatePatch: undefined,
};
}
}
+85 -7
View File
@@ -19,6 +19,7 @@ import type {
InsertBeatResponse,
SceneRequest,
SceneResponse,
SceneStreamEvent,
Session,
StartRequest,
StartResponse,
@@ -105,6 +106,77 @@ function mergeCharactersPreserveVoice(
});
}
// ── SSE consumption (server-fallback path) ───────────────────────────
// When an `emit` callback is provided, the server-fallback path requests
// SSE instead of JSON so the caller can render progressive events
// (plan → beat → background → voice → done). The final "done" event
// carries the complete response payload.
async function fetchSSE<T>(
path: string,
body: unknown,
emit?: (event: SceneStreamEvent) => void,
): Promise<T> {
const res = await fetch(path, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(emit ? { Accept: "text/event-stream" } : {}),
},
body: JSON.stringify(body),
});
if (!res.ok) {
if (res.status === 401) throw new AuthRequiredError();
let message = `HTTP ${res.status}`;
try {
const data = (await res.json()) as { error?: string };
if (data.error) message = data.error;
} catch { /* keep HTTP status */ }
throw new Error(message);
}
if (!emit || !res.headers.get("content-type")?.includes("text/event-stream")) {
return res.json() as Promise<T>;
}
const reader = res.body!.getReader();
const decoder = new TextDecoder();
let buffer = "";
let result: T | undefined;
for (;;) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const parts = buffer.split("\n\n");
buffer = parts.pop()!;
for (const part of parts) {
if (!part.trim()) continue;
const dataLine = part.split("\n").find((l) => l.startsWith("data: "));
if (!dataLine) continue;
let event;
try {
event = JSON.parse(dataLine.slice(6));
} catch {
continue;
}
if (event.type === "done") {
result = event.response as T;
} else if (event.type === "error") {
throw new Error(event.message || "Scene generation failed");
} else {
emit(event as SceneStreamEvent);
}
}
}
if (!result) throw new Error("SSE stream ended without a done event");
return result;
}
// ── Unified entry points ───────────────────────────────────────────────
// When the browser has a BYO model config in localStorage, these call the
// client-side engine directly (talking to providers from the browser).
@@ -134,23 +206,29 @@ export async function getTtsProvider(): Promise<TtsProvider> {
}
}
export async function startSession(req: StartRequest): Promise<StartResponse> {
export async function startSession(
req: StartRequest,
emit?: (event: SceneStreamEvent) => void,
): Promise<StartResponse> {
const config = getClientConfig();
if (config) {
return startSessionClient(config, req);
return startSessionClient(config, req, emit);
}
return postJson<StartResponse>("/api/start", req);
return fetchSSE<StartResponse>("/api/start", req, emit);
}
export async function requestScene(req: SceneRequest): Promise<SceneResponse> {
export async function requestScene(
req: SceneRequest,
emit?: (event: SceneStreamEvent) => void,
): Promise<SceneResponse> {
const config = getClientConfig();
if (config) {
return requestSceneClient(config, req);
return requestSceneClient(config, req, emit);
}
const data = await postJson<SceneResponse>("/api/scene", {
const data = await fetchSSE<SceneResponse>("/api/scene", {
...req,
session: stripVoicesForTransport(req.session),
});
}, emit);
// Server stripped known-character voices for bandwidth — re-attach the
// voices we already hold so fetchBeatAudio can synth them.
data.characters = mergeCharactersPreserveVoice(req.session.characters, data.characters);
+1 -1
View File
@@ -284,7 +284,7 @@ Dreamy watercolor style with soft tones and nostalgic atmosphere
},
models: {
corsNotice: "Please ensure your API endpoint supports browser CORS requests. Most mainstream providers (OpenAI, Anthropic, Gemini, Runware, etc.) support this by default.",
corsNotice: "All API keys are stored locally in your browser and never uploaded to our server. Requests are sent directly from your browser to the API endpoint; if the endpoint does not support CORS, requests are automatically routed through our server — your key is used only for that single relay and is never logged or stored.",
textModel: "Text Model",
imageModel: "Image Model",
visionModel: "Vision Model",
+1 -1
View File
@@ -313,7 +313,7 @@ export const ja = {
// Models tab
models: {
corsNotice: "お使いのAPIエンドポイントがブラウザのクロスオリジン要求(CORS)をサポートしていることを確認してください。ほとんどの主要プロバイダー(OpenAI、Anthropic、Gemini、Runwareなど)は、すでにデフォルトでサポートしています。",
corsNotice: "すべての API キーはブラウザのローカルにのみ保存され、サーバーにアップロードされることはありません。リクエストはブラウザから API エンドポイントへ直接送信されます。エンドポイントが CORS に対応していない場合は、自動的にサーバー経由で中継されます——キーはその一回の中継にのみ使用され、記録・保存されることはありません。",
textModel: "テキストモデル",
imageModel: "描画モデル",
visionModel: "画像認識モデル",
+1 -1
View File
@@ -313,7 +313,7 @@ export const zhCN = {
// Models tab
models: {
corsNotice: "请确保你的 API 端点支持浏览器跨域请求(CORS)。大多数主流提供商(OpenAI、Anthropic、Gemini、Runware 等)已默认支持。",
corsNotice: "所有 Key 仅保存在本地浏览器,不会上传到服务器。请求优先从浏览器直连 API 端点;若端点不支持跨域(CORS),将自动通过我们的服务器中转——Key 仅用于当次转发,不会被记录或存储。",
textModel: "文本模型",
imageModel: "绘图模型",
visionModel: "识图模型",
+104
View File
@@ -0,0 +1,104 @@
import "server-only";
import { getCloudflareContext } from "@opennextjs/cloudflare";
/**
* R2 Storage封装 - 用户生成图片持久化
*
* Phase 1: 优先使用 Runware CDN URL(零额外存储成本),R2 key 作为可选持久化。
* Phase 2+: save 流程中可选地将场景图从 CDN fetch 后转存 R2,防 URL 过期。
*/
/**
* Build R2 object key for image storage.
*
* Pattern: {storyId}/{kind}/{id}.webp
* - kind: "scene" | "portrait" | "style-ref"
* - id: scene.id | character.name | "ref"
*
* Example: s_abc123/scene/sc_1.webp, s_abc123/portrait/李华.webp
*/
export function buildImageKey(
storyId: string,
kind: "scene" | "portrait" | "style-ref",
id: string,
): string {
// Sanitize both storyId and id to avoid path traversal / key confusion
const safeStoryId = storyId.replace(/[^a-zA-Z0-9_一-龥-]/g, "_");
const safeId = id.replace(/[^a-zA-Z0-9_一-龥-]/g, "_");
return `${safeStoryId}/${kind}/${safeId}.webp`;
}
/**
* Upload image to R2 and return public URL.
*
* @param key R2 object key (use buildImageKey to generate)
* @param data Image data (Buffer or Uint8Array)
* @returns Public R2 URL (https://<public-domain>/<key>)
* @throws Error if R2 upload fails or binding unavailable
*/
export async function uploadImage(
key: string,
data: Buffer | Uint8Array,
): Promise<string> {
try {
const { env } = getCloudflareContext();
if (!env.R2_BUCKET) {
throw new Error(
"R2_BUCKET binding not found. " +
"Ensure wrangler.jsonc has r2_buckets configured and you're running via wrangler."
);
}
// Upload to R2 with WebP content-type
await env.R2_BUCKET.put(key, data, {
httpMetadata: {
contentType: "image/webp",
},
});
// Return public URL (assumes custom domain or R2 public bucket configured)
// Phase 1: hardcode or read from env; Phase 2: configure in wrangler
const publicDomain = process.env.R2_PUBLIC_DOMAIN ?? "https://r2.infiplot.example"; // Placeholder
return `${publicDomain}/${key}`;
} catch (error) {
// Re-throw with context for caller to handle gracefully
throw new Error(
`R2 upload failed for key ${key}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Fetch image from URL and upload to R2 (for migrating Runware CDN → R2).
*
* @param url Source image URL (e.g. Runware CDN)
* @param key R2 object key
* @returns Public R2 URL, or null if fetch/upload fails (caller should fallback to original URL)
*/
export async function migrateImageToR2(
url: string,
key: string,
): Promise<string | null> {
try {
// Fetch image from CDN
const res = await fetch(url);
if (!res.ok) {
console.warn(`[R2] Failed to fetch image from ${url}: HTTP ${res.status}`);
return null;
}
const data = new Uint8Array(await res.arrayBuffer());
// Upload to R2
return await uploadImage(key, data);
} catch (error) {
// Log but don't throw - caller should gracefully fallback to CDN URL
console.warn(
`[R2] Migration failed for ${url}${key}:`,
error instanceof Error ? error.message : error
);
return null;
}
}
+189 -1
View File
@@ -156,6 +156,45 @@ export type WriterPlan = {
entrySpeaker?: string;
};
// ──────────────────────────────────────────────────────────────────────
// Paradigm D — Writer single-pass streaming plan extensions.
//
// In paradigm D the Writer streams one tagged response: <plan> → <story>
// → <choices>. WriterScenePlan is the parsed <plan> segment: the existing
// WriterPlan skeleton PLUS per-character scene intents (and story bible on
// first scene), handed to the downstream media translators the instant
// </plan> closes.
// ──────────────────────────────────────────────────────────────────────
/** Per-scene performance intent for one character, authored by the Writer in
* the <plan> segment. Ephemeral (this scene only) — distinct from the
* persistent CharacterPersona card. Feeds downstream media translators. */
export type CharacterIntent = {
name: string;
/** 本幕情绪基调。 */
mood?: string;
/** 本幕动机 / 目的。 */
motivation?: string;
/** 本幕说话基调(指导对白质感 + TTS lineDelivery)。 */
speakingTone?: string;
};
/** Parsed <plan> tag: the existing WriterPlan shape plus per-character scene
* intents and optional story bible (first scene only). The optional extension
* keeps any degraded / minimal plan valid — downstream consumers see a
* WriterPlan superset. */
export type WriterScenePlan = WriterPlan & {
/** 各角色本幕表现意图,供 </plan> 闭合时分发下游媒体翻译官。 */
characterIntents?: CharacterIntent[];
/** 故事圣经(仅开局产出)——稳定区字段。后续场景 plan 不含此字段。 */
storyBible?: {
logline: string;
genreTags: string;
protagonist: string;
castNotes?: string;
};
};
// ──────────────────────────────────────────────────────────────────────
// Characters & voices (TTS)
// ──────────────────────────────────────────────────────────────────────
@@ -179,6 +218,30 @@ export type CharacterVoice =
mimeType: string;
};
// ──────────────────────────────────────────────────────────────────────
// CharacterPersona — narrative / story dimension of a Character.
// Merged into Character via intersection (all optional). Filled primarily
// by the Writer's <plan> 思维链 (paradigm D); the CharacterDesigner then
// realizes it into visual + voice cards. Absent on legacy sessions →
// callers degrade to "name only". SENTINEL append-only: adding persona
// only appends bytes to the stable prompt prefix — never reorders.
// ──────────────────────────────────────────────────────────────────────
export type CharacterPersona = {
/** 背景 / 身份 / 核心设定。 */
persona?: string;
/** 性格标签,如 ["傲娇", "腹黑", "重情义"]。 */
personalityTraits?: string[];
/** 说话风格 / 口头禅 — 对白质感的关键。 */
speakingStyle?: string;
/** 2-3 条代表性对白,作为 few-shot 锚定语气。 */
sampleDialogue?: string[];
/** 与玩家("你")的关系 / 态度。 */
relationshipToPlayer?: string;
/** 隐藏信息 / 伏笔,可驱动后续反转(默认不外显)。 */
secrets?: string[];
};
export type Character = {
name: string;
/**
@@ -215,7 +278,7 @@ export type Character = {
* server runs StepFun, and lets the server normalize an off-provider voice
* without a fresh provision. Validated against the catalog at synth time. */
stepfunVoiceId?: string;
};
} & CharacterPersona;
/** A single beat's synthesized audio, attached to the response. */
export type BeatAudio = {
@@ -270,6 +333,33 @@ export type StoryStatePatch = {
nextHook?: string;
};
// ──────────────────────────────────────────────────────────────────────
// WorldBook — lightweight lore injection system.
//
// Entries with position "constant" are always injected into the stable
// prompt prefix. Entries with position "triggered" are scanned against
// recent beat text and injected into the dynamic suffix when keywords
// match. Priority controls ordering when multiple entries fire.
// ──────────────────────────────────────────────────────────────────────
export type WorldBookEntry = {
id: string;
/** Keywords that trigger this entry's injection (for triggered entries). */
keys: string[];
/** The lore content to inject into the prompt. */
content: string;
/** "constant" = always injected (stable prefix); "triggered" = keyword-matched (dynamic suffix). */
position: "constant" | "triggered";
/** Higher priority entries are injected first. Defaults to 0. */
priority?: number;
};
export type WorldBook = {
id: string;
name: string;
entries: WorldBookEntry[];
};
// ──────────────────────────────────────────────────────────────────────
// Session
// ──────────────────────────────────────────────────────────────────────
@@ -317,6 +407,11 @@ export type Session = {
* back-compat with sessions created before this field existed.
*/
language?: string;
/**
* Optional world books for lore injection. "constant" entries are always in
* the prompt; "triggered" entries inject when keywords match recent text.
*/
worldBooks?: WorldBook[];
};
// ──────────────────────────────────────────────────────────────────────
@@ -417,6 +512,18 @@ export type EngineConfig = {
// API contracts
// ──────────────────────────────────────────────────────────────────────
/**
* BYOK (Bring Your Own Key) LLM credentials carried in request bodies.
* Per-role: text/image/vision can be independently configured. Keys never
* persist or log server-side — they only pass through request→config build
* (see lib/config.ts buildByoEngineConfig). vision typically mirrors text.
*/
export type ByoLlmKeys = {
text?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
image?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
vision?: { provider: string; apiKey: string; baseUrl?: string; model?: string };
};
export type StartRequest = {
worldSetting: string;
styleGuide: string;
@@ -439,6 +546,13 @@ export type StartRequest = {
/** Active UI locale — see Session.language. Drives the engine's language
* directive so AI output is generated in the player's chosen language. */
language?: string;
/**
* BYOK: user-provided LLM keys. When present, server uses these to construct
* EngineConfig instead of reading from env. Per-role: text/image/vision can
* be independently configured. Keys never persist or log — they only pass
* through request→config construction.
*/
byo?: ByoLlmKeys;
};
// /api/parse-style-image — vision LLM extracts a textual painting-style
@@ -473,6 +587,8 @@ export type SceneRequest = {
session: Session;
/** See StartRequest.clientTts — drops server-side TTS for BYO-key clients. */
clientTts?: boolean;
/** See StartRequest.byo — BYOK LLM keys. */
byo?: ByoLlmKeys;
};
export type SceneResponse = {
@@ -534,6 +650,8 @@ export type VisionRequest = {
* server-side image re-fetch per click.
*/
annotatedImageBase64: string;
/** See StartRequest.byo — BYOK LLM keys. */
byo?: ByoLlmKeys;
};
export type VisionResponse = {
@@ -547,6 +665,8 @@ export type VisionResponse = {
export type FreeformClassifyRequest = {
session: Session;
freeformText: string;
/** See StartRequest.byo — BYOK LLM keys. */
byo?: ByoLlmKeys;
};
export type FreeformClassify = "insert-beat" | "change-scene";
@@ -563,6 +683,8 @@ export type InsertBeatRequest = {
freeformAction: string;
/** See StartRequest.clientTts — drops server-side TTS for BYO-key clients. */
clientTts?: boolean;
/** See StartRequest.byo — BYOK LLM keys. */
byo?: ByoLlmKeys;
};
/** Partial beat fields produced by the insert-beat director. */
@@ -577,3 +699,69 @@ export type InsertBeatResponse = {
partial: InsertBeatPartial;
characters: Character[];
};
// ──────────────────────────────────────────────────────────────────────
// Paradigm D — streaming primitives (chatStream / StreamRouter / SSE)
//
// Output-side counterpart to prompt caching's input-side stable prefix
// (the two are orthogonal). chatStream yields incremental text + an
// end-of-stream usage promise. The StreamRouter slices the Writer's
// tagged stream into plan/story/choices and dispatches downstream. API
// routes serialize assembled fragments as SSE events for progressive
// client playback.
// ──────────────────────────────────────────────────────────────────────
/** Token usage stats returned at stream end. Kept SDK-agnostic so the type
* file doesn't depend on any specific provider package. */
export type ChatStreamUsage = {
prompt_tokens?: number;
completion_tokens?: number;
prompt_tokens_details?: { cached_tokens?: number };
};
/** Return shape of the streaming chat primitive (ai-client `chatStream`).
* `textStream` yields incremental chunks; `usage` resolves at stream end
* so `summarizeSdkUsage` cache accounting works unchanged. */
export type ChatStreamResult = {
textStream: AsyncIterable<string>;
usage: Promise<ChatStreamUsage | undefined>;
};
/** Callbacks the StreamRouter fires as it slices the Writer's tagged stream.
* All optional so a caller can subscribe to a subset. */
export type StreamRouterHandlers = {
/** `</plan>` closed — dispatch downstream media translators in parallel. */
onPlan?: (plan: WriterScenePlan) => void;
/** `<story>` incremental text — push to client for progressive playback. */
onBeat?: (beatChunk: string) => void;
/** `</story>` closed — prose finalized, ready for splitting. */
onStoryComplete?: (rawStory: string) => void;
/** `</choices>` closed. */
onChoices?: (choices: BeatChoice[]) => void;
};
/** Aggregate result of routing one Writer stream to completion. `degraded` is
* true when tag parsing fell back (missing / misordered / unclosed / timeout),
* per the degrade-before-main-path reliability rule. */
export type StreamRouterResult = {
plan?: WriterScenePlan;
beats: Beat[];
choices?: BeatChoice[];
/** Raw prose content of the <story> segment (not JSON-parsed). The director
* feeds this to proseSplitter to produce Beat[]. */
rawStorySegment?: string;
degraded: boolean;
};
/** Server → client SSE events for progressive scene playback (paradigm D).
* `TDone` is the terminal full-assembly payload — `SceneResponse` for
* `/api/scene`, `StartResponse` for `/api/start`. The prefetch path
* consumes events to `done` and reassembles a complete response. */
export type SceneStreamEvent<TDone = SceneResponse> =
| { type: "plan"; plan: WriterScenePlan }
| { type: "beat"; beat: Beat }
| { type: "background"; imageUrl: string; sceneKey?: string }
| { type: "voice"; name: string; voice: CharacterVoice }
| { type: "choices"; choices: BeatChoice[] }
| { type: "done"; response: TDone }
| { type: "error"; message: string; degraded?: boolean };