Files
infiplot-web/lib/types/index.ts
T
Zonghao Yuan dc5ecd60f6 refactor: flatten monorepo to single web package (#12)
Flatten the pnpm monorepo (apps/web + packages/*) into a single web package at the repo root.

- Move app/lib/components/scripts/public to root; drop apps/web and packages/* wrappers
- Rewrite tsconfig paths (@infiplot/*) to ./lib/*; turbopack.root = __dirname
- Update Vercel (no root-directory) and Cloudflare (pnpm build:cf at root) deploy paths
- Regenerate pnpm-lock.yaml to drop stale workspace importers
- Bump engines.node to >=22 to match wrangler

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 00:55:45 +08:00

346 lines
14 KiB
TypeScript

// ──────────────────────────────────────────────────────────────────────
// Beat — one dialogue / narration moment within a Scene.
// Multiple beats share the same background image; tapping or choosing
// advances among them WITHOUT regenerating the image.
// ──────────────────────────────────────────────────────────────────────
export type Beat = {
id: string;
narration?: string;
speaker?: string;
line?: string;
/** Free-form voice-acting direction for the line, sent to TTS only. Never displayed. */
lineDelivery?: string;
/**
* Characters visible in this beat with their pose / expression for this moment.
* Read by the Cinematographer when composing the scene's establishing shot —
* the beat the entry beat lands in is the visual anchor for the image.
*/
activeCharacters?: BeatActiveCharacter[];
next: BeatNext;
};
export type BeatActiveCharacter = {
name: string;
/** Free-form 中文 description of pose / expression / what the character is doing. */
pose?: string;
};
export type BeatNext =
| { type: "continue"; nextBeatId: string }
| { type: "choice"; choices: BeatChoice[] };
export type BeatChoice = {
id: string;
label: string;
effect: BeatChoiceEffect;
};
export type BeatChoiceEffect =
| { kind: "advance-beat"; targetBeatId: string }
| { kind: "change-scene"; nextSceneSeed: string };
// ──────────────────────────────────────────────────────────────────────
// Scene — one background image + a graph of beats.
// The Director emits an entire Scene per call; the player navigates
// through its beats locally with zero network until exiting.
// ──────────────────────────────────────────────────────────────────────
export type Scene = {
id: string;
scenePrompt: string;
beats: Beat[];
entryBeatId: string;
/**
* Stable English slug identifying the visual scene's location + time,
* e.g. "classroom-dusk", "rooftop-night". When the next Scene shares this
* key, the Painter slots the previous Scene's image into Runware's
* `referenceImages` (alongside character portraits) so the same physical
* space stays visually consistent across cuts.
*/
sceneKey?: string;
/**
* Runware UUID of this Scene's generated image. Cheapest form to send back
* to Runware's `referenceImages` in subsequent calls (UUID > URL > base64
* in transport cost). Not shown to the client — `imageUrl` is what renders.
*/
imageUuid?: string;
/**
* Public CDN URL of this Scene's generated image. Returned to the client for
* `<img src>` rendering; the client also feeds it through a Canvas 2D click
* annotator before posting to `/api/vision` (see
* `VisionRequest.annotatedImageBase64`).
*
* For MOCK_IMAGE=true this is a `data:image/svg+xml;...` data URI, not a
* Runware URL — the client renders both forms transparently.
*/
imageUrl?: string;
};
export type SceneExit =
| {
kind: "choice";
choiceId: string;
label: string;
nextSceneSeed: string;
}
| { kind: "freeform"; action: string };
export type SceneHistoryEntry = {
scene: Scene;
visitedBeatIds: string[];
exit?: SceneExit;
};
// ──────────────────────────────────────────────────────────────────────
// Characters & voices (TTS)
// ──────────────────────────────────────────────────────────────────────
export type CharacterVoice = {
provider: "xiaomi";
/** Xiaomi MiMo design output stored as reference audio for later clones. */
referenceAudioBase64: string;
mimeType: string;
};
export type Character = {
name: string;
/**
* 中文 voice-acting direction card. Must begin with explicit gender, then
* age / timbre / personality / speed / accent. Fed to Xiaomi MiMo's
* voicedesign endpoint when the voice is first provisioned.
*/
voiceDescription: string;
/**
* English appearance card — comma-separated visual attributes following
* Runware/FLUX prompt-engineering convention. Fed to the Painter as a
* character archetype anchor so the same face/outfit/style stays consistent
* across every scene this character appears in.
*/
visualDescription?: string;
/**
* Runware UUID for the base portrait. Generated by the CharacterDesigner
* once, reused as a `referenceImages` entry on every subsequent scene the
* character appears in. UUID is the cheapest reference form for Runware.
*/
basePortraitUuid?: string;
/**
* Public CDN URL for the base portrait. Same image as `basePortraitUuid`;
* kept around for the client (if it ever wants to render character cards)
* and as a fallback reference form for `referenceImages` when UUID is absent.
*/
basePortraitUrl?: string;
/** Xiaomi MiMo voice reference audio. */
voice?: CharacterVoice;
};
/** A single beat's synthesized audio, attached to the response. */
export type BeatAudio = {
base64: string;
mime: string;
};
// ──────────────────────────────────────────────────────────────────────
// StoryState — the persistent "story bible" + evolving narrative memory.
//
// Created once at session start by the Architect agent (rich opening
// planning), then carried across every scene and incrementally updated by
// the Writer. This is the single throughline that keeps tone, cast, and
// stakes coherent across scene cuts — without it each Writer call would
// re-derive the whole arc from a flat beat log and drift.
//
// Split into STABLE fields (set by the Architect, rarely change) and
// VOLATILE fields (rewritten each scene via StoryStatePatch).
// ──────────────────────────────────────────────────────────────────────
export type StoryState = {
// ── Stable (Architect-authored; persists unless deliberately revised) ──
/** One-line central dramatic question / 主线钩子. */
logline: string;
/** Genre + tone tags anchoring the 爽点 framework, e.g. "甜宠 / 校园 / 慢热治愈". */
genreTags: string;
/** Second-person protagonist card: who 你 are, the immediate situation, the
* core want, and a flaw/secret. The audience proxy — never rendered. */
protagonist: string;
/** Key supporting cast and their relationship/tension with 你 (one per line). */
castNotes?: string;
// ── Volatile (rewritten each scene by the Writer's StoryStatePatch) ──
/** Rolling, compressed synopsis of what has happened so far (~3-5 句). */
synopsis: string;
/** Unresolved hooks / mysteries / questions still owed to the player. */
openThreads?: string[];
/** Current relationship/emotion state per character, e.g.
* "夏海:好感升温,刚向你告白了一半". */
relationships?: string[];
/** Where the story is heading next — the conflict/reversal/suspense the
* next scene should drive toward. Seeds the next scene's hook. */
nextHook?: string;
};
/** The volatile subset the Writer rewrites after each scene. Stable fields
* (logline/genreTags/protagonist/castNotes) are preserved by the merge. */
export type StoryStatePatch = {
synopsis?: string;
openThreads?: string[];
relationships?: string[];
nextHook?: string;
};
// ──────────────────────────────────────────────────────────────────────
// Session
// ──────────────────────────────────────────────────────────────────────
export type Session = {
id: string;
createdAt: number;
worldSetting: string;
styleGuide: string;
history: SceneHistoryEntry[];
/** Character registry — accumulates across scenes; voices + portraits persist for reuse. */
characters: Character[];
/**
* Persistent story bible + evolving narrative memory. Set at session start
* by the Architect, carried by the client across every /api/scene call, and
* updated by the Writer each scene. Optional for back-compat with any
* session payload created before this field existed.
*/
storyState?: StoryState;
};
// ──────────────────────────────────────────────────────────────────────
// Vision
// ──────────────────────────────────────────────────────────────────────
export type ClickIntent = {
freeformAction: string;
reasoning: string;
};
export type VisionClassify = "insert-beat" | "change-scene";
// ──────────────────────────────────────────────────────────────────────
// Provider config
// ──────────────────────────────────────────────────────────────────────
export type ProviderConfig = {
baseUrl: string;
apiKey: string;
model: string;
};
export type TtsConfig = {
baseUrl: string;
apiKey: string;
/** Base model name; adapter derives "-voicedesign" / "-voiceclone" suffixes. */
speechModel: string;
};
export type EngineConfig = {
text: ProviderConfig;
image: ProviderConfig;
vision: ProviderConfig;
/** Optional — when missing the game runs silently (no TTS). */
tts?: TtsConfig;
/** When true the renderer returns a placeholder PNG instead of calling the image API. */
mockImage?: boolean;
};
// ──────────────────────────────────────────────────────────────────────
// API contracts
// ──────────────────────────────────────────────────────────────────────
export type StartRequest = {
worldSetting: string;
styleGuide: string;
};
export type StartResponse = {
sessionId: string;
scene: Scene;
/** Public CDN URL (or data URI in MOCK_IMAGE mode) for the rendered scene background. */
imageUrl: string;
/** Character registry with voice references + visual cards provisioned. */
characters: Character[];
/** Story bible created by the Architect + updated by the opening scene's
* Writer. The client persists this into the session for later /api/scene calls. */
storyState: StoryState;
};
// /api/scene — generates the next Scene, given session whose latest
// history entry has `exit` set. Also used for prefetch speculation
// (frontend synthesizes a speculative exit).
export type SceneRequest = {
session: Session;
};
export type SceneResponse = {
scene: Scene;
/** Public CDN URL (or data URI in MOCK_IMAGE mode) for the rendered scene background. */
imageUrl: string;
characters: Character[];
/** Story bible after this scene's Writer applied its update. The client
* must persist this back into the session so the throughline survives the
* next scene cut. */
storyState: StoryState;
};
// /api/beat-audio — lazily synthesize one beat's voice. Client fires this
// per beat after a scene loads; server has a per-call timeout so MiMo
// tail-latency cannot block the UI. A null audio response means "play silent."
export type BeatAudioRequest = {
beat: {
id: string;
line: string;
lineDelivery?: string;
};
voice: CharacterVoice;
};
export type BeatAudioResponse = {
audio: BeatAudio | null;
};
// /api/vision — interprets a background click on the current image and
// classifies whether it should insert a beat (in-scene exploration) or
// trigger a scene change.
export type VisionRequest = {
session: Session;
/**
* Raw PNG base64 (no `data:` prefix) of the scene image WITH the player's
* click marker already drawn on it by the browser's Canvas 2D. The server
* forwards this straight to the vision LLM as an OpenAI-compatible
* image_url.
*
* Annotation lives client-side so the engine has no Node-native image
* dependency (sharp doesn't run on Cloudflare Workers) and we save a
* server-side image re-fetch per click.
*/
annotatedImageBase64: string;
};
export type VisionResponse = {
intent: ClickIntent;
classify: VisionClassify;
};
// /api/insert-beat — generates a single transient beat in response to
// a freeform vision action. Does NOT regenerate the image.
export type InsertBeatRequest = {
session: Session;
freeformAction: string;
};
/** Partial beat fields produced by the insert-beat director. */
export type InsertBeatPartial = {
narration?: string;
speaker?: string;
line?: string;
lineDelivery?: string;
};
export type InsertBeatResponse = {
partial: InsertBeatPartial;
characters: Character[];
};