feat: scene/beat architecture — decouple dialogue from image generation (#2)

Replace the one-image-per-interaction model with scenes that hold multiple dialogue beats. The image regenerates only on scene-change actions; tapping through beats and in-scene choices are instant and zero-network. Squashed from #2: - feat: scene/beat architecture — decouple dialogue from image generation - fix: harden LLM-output parsing, prefetch lifecycle, and typewriter (PR review) - fix: dedupe beat ids; fallback narration on empty insert-beat (PR review #2) 🤖 Generated with [Claude Code](https://claude.com/claude-code)
2026-05-28 15:20:12 +08:00
parent d116c2e3b5
commit d1f13d51a3
13 changed files with 1275 additions and 402 deletions
@@ -1,4 +1,4 @@
-import { visionTurn } from "@yume/engine";
+import { visionDecide } from "@yume/engine";
 import type { VisionRequest } from "@yume/types";
 import { NextResponse } from "next/server";
 import { loadEngineConfig } from "@/lib/config";
@@ -23,7 +23,7 @@ export async function POST(req: Request) {

  try {
    const config = loadEngineConfig();
-    const result = await visionTurn(config, body);
+    const result = await visionDecide(config, body);
    return NextResponse.json(result);
  } catch (err) {
    const message = err instanceof Error ? err.message : "Unknown error";