feat(engine): merge cloudflare-migration — paradigm D engine, BYOK proxy, story persistence (#95)

Squash-merge the cloudflare-migration branch (7 commits by Kai ki) into staging with conflict resolution, feature integration, and bug fixes. Engine: - Paradigm D: single-stream Writer replacing dual-phase Plan/Beats - Delete Architect agent; story bible generated via Writer <plan> tag - Modular prompt architecture (segments/registry/builder) - StreamRouter for tagged stream splitting (<plan>/<story>/<choices>) Infrastructure: - Cloudflare Workers deployment (wrangler.jsonc, OpenNext adapter) - D1 database schema + Drizzle ORM (scaffolded, not yet active) - R2 storage helpers (scaffolded, not yet active) - Story persistence API routes + client-side persistence BYOK (Bring Your Own Key): - /api/llm/user-proxy with SSRF-protected LLM proxy (+ requireUser auth) - CORS-aware fetch in ai-client: auto-detect CORS failure, fallback to server proxy transparently via OpenAI SDK custom fetch - BYO config support added to classify-freeform and vision routes - SettingsModal CORS privacy notice (keys never logged/stored) SSE streaming: - engineClient.ts: fetchSSE helper for progressive scene events - startSession/requestScene accept optional emit callback - Fix SSE error event field name (error → message) in scene/start routes i18n integration: - Wire buildLanguageDirective into paradigm D's prompt builder - Update corsNotice i18n keys (zh-CN/en/ja) with CORS proxy privacy text - Preserve Session.language + LanguageSwitcher from i18n commit Co-authored-by: Kai ki <155355644+zbf1009@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-18 18:05:38 +08:00
parent 05bd7e229c
commit 0e4c2ebef4
78 changed files with 7396 additions and 919 deletions
@@ -1,5 +1,5 @@
 import OpenAI from "openai";
-import type { ProviderConfig } from "@infiplot/types";
+import type { ChatStreamResult, ChatStreamUsage, ProviderConfig } from "@infiplot/types";
 import { normalizeBaseUrl } from "./normalizeUrl";

 export type ChatMessage = {
@@ -7,6 +7,75 @@ export type ChatMessage = {
  content: string;
 };

+// ── CORS proxy fallback (browser-only) ───────────────────────────────
+// BYO mode calls providers directly from the browser. When a provider
+// rejects the preflight (no CORS headers), the first request throws a
+// TypeError. We cache the blocked host and transparently reroute all
+// subsequent requests through /api/llm/user-proxy, which forwards
+// server-side and returns the upstream response (including SSE streams)
+// byte-for-byte.
+
+const corsBlockedHosts = new Set<string>();
+
+export function isCorsProxied(baseUrl: string): boolean {
+  try {
+    return corsBlockedHosts.has(new URL(baseUrl).host);
+  } catch {
+    return false;
+  }
+}
+
+function proxyFetch(
+  config: ProviderConfig,
+  init?: RequestInit,
+): Promise<Response> {
+  let body: Record<string, unknown> = {};
+  if (typeof init?.body === "string") {
+    try { body = JSON.parse(init.body); } catch { /* empty */ }
+  }
+  return globalThis.fetch("/api/llm/user-proxy", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      provider: "openai",
+      apiKey: config.apiKey,
+      baseUrl: config.baseUrl,
+      body,
+      model: config.model,
+      stream: body.stream === true,
+    }),
+  });
+}
+
+function makeCorsAwareFetch(
+  config: ProviderConfig,
+): (input: string | URL | Request, init?: RequestInit) => Promise<Response> {
+  return async (input, init) => {
+    const url =
+      typeof input === "string" ? input
+      : input instanceof URL ? input.toString()
+      : input.url;
+
+    let host: string;
+    try { host = new URL(url).host; } catch { return globalThis.fetch(input, init); }
+
+    if (corsBlockedHosts.has(host)) {
+      return proxyFetch(config, init);
+    }
+
+    try {
+      return await globalThis.fetch(input, init);
+    } catch (err) {
+      if (err instanceof TypeError) {
+        corsBlockedHosts.add(host);
+        console.warn(`[CORS] ${host} blocked, falling back to server proxy`);
+        return proxyFetch(config, init);
+      }
+      throw err;
+    }
+  };
+}
+
 // Cache observability for the prompt-prefix caching that the Writer stable
 // prefix relies on. The OpenAI usage object reports only cached READS
 // (prompt_tokens_details.cached_tokens) and has no field for cache WRITES
@@ -28,6 +97,16 @@ function summarizeSdkUsage(
  return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
 }

+function makeClient(config: ProviderConfig): OpenAI {
+  return new OpenAI({
+    apiKey: config.apiKey,
+    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
+    maxRetries: 0,
+    dangerouslyAllowBrowser: true,
+    ...(typeof window !== "undefined" ? { fetch: makeCorsAwareFetch(config) } : {}),
+  });
+}
+
 export async function chat(
  config: ProviderConfig,
  messages: ChatMessage[],
@@ -36,12 +115,7 @@ export async function chat(
    tag?: string;
  },
 ): Promise<string> {
-  const client = new OpenAI({
-    apiKey: config.apiKey,
-    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
-    maxRetries: 0,
-    dangerouslyAllowBrowser: true,
-  });
+  const client = makeClient(config);

  const completion = await client.chat.completions.create({
    model: config.model,
@@ -61,3 +135,97 @@ export async function chat(
  }
  return text;
 }
+
+/**
+ * Streaming variant of {@link chat} — the streaming primitive behind
+ * paradigm D. Returns incremental `textStream` chunks plus an end-of-stream
+ * `usage` promise so `summarizeSdkUsage` keeps doing cache accounting.
+ *
+ * Uses the OpenAI SDK's native streaming (`stream: true`) which returns an
+ * async iterable of ChatCompletionChunk. The returned `usage` settles after
+ * the stream drains, so callers should `await result.usage` once iteration
+ * ends.
+ *
+ * Degrade path: if the provider doesn't support streaming, fall back to a
+ * single non-streaming call wrapped as a one-chunk stream so downstream
+ * tag-routing still works — the player loses progressive playback but the
+ * scene generates normally.
+ */
+export function chatStream(
+  config: ProviderConfig,
+  messages: ChatMessage[],
+  opts?: {
+    temperature?: number;
+    tag?: string;
+  },
+): ChatStreamResult {
+  const client = makeClient(config);
+  const tag = opts?.tag ?? "chatStream";
+  const msgPayload = messages.map((m) => ({
+    role: m.role as "system" | "user" | "assistant",
+    content: m.content,
+  }));
+
+  let resolveUsage: (u: ChatStreamUsage | undefined) => void;
+  const usage = new Promise<ChatStreamUsage | undefined>((r) => { resolveUsage = r; });
+
+  const textStream = (async function* (): AsyncIterable<string> {
+    try {
+      const stream = await client.chat.completions.create({
+        model: config.model,
+        messages: msgPayload,
+        temperature: opts?.temperature ?? 0.9,
+        stream: true,
+        stream_options: { include_usage: true },
+      });
+
+      for await (const chunk of stream) {
+        const delta = chunk.choices[0]?.delta?.content;
+        if (delta) yield delta;
+
+        if (chunk.usage) {
+          const u: ChatStreamUsage = {
+            prompt_tokens: chunk.usage.prompt_tokens,
+            completion_tokens: chunk.usage.completion_tokens,
+            prompt_tokens_details: chunk.usage.prompt_tokens_details
+              ? { cached_tokens: chunk.usage.prompt_tokens_details.cached_tokens ?? undefined }
+              : undefined,
+          };
+          console.log(summarizeSdkUsage(tag, chunk.usage));
+          resolveUsage!(u);
+        }
+      }
+      // If usage was never emitted (provider omitted it), resolve undefined.
+      resolveUsage!(undefined);
+    } catch (err) {
+      // Streaming not supported by provider → degrade to buffered call.
+      console.warn(
+        `[chatStream] streaming failed, degrading to non-streaming:`,
+        err,
+      );
+      try {
+        const completion = await client.chat.completions.create({
+          model: config.model,
+          messages: msgPayload,
+          temperature: opts?.temperature ?? 0.9,
+          stream: false,
+        });
+        const text = completion.choices[0]?.message?.content ?? "";
+        if (text) yield text;
+        console.log(summarizeSdkUsage(`${tag}:degraded`, completion.usage ?? undefined));
+        resolveUsage!(completion.usage ? {
+          prompt_tokens: completion.usage.prompt_tokens,
+          completion_tokens: completion.usage.completion_tokens,
+          prompt_tokens_details: completion.usage.prompt_tokens_details
+            ? { cached_tokens: completion.usage.prompt_tokens_details.cached_tokens ?? undefined }
+            : undefined,
+        } : undefined);
+      } catch (fallbackErr) {
+        resolveUsage!(undefined);
+        throw fallbackErr;
+      }
+    }
+  })();
+
+  return { textStream, usage };
+}
@@ -1,4 +1,4 @@
-export { chat } from "./chat";
+export { chat, chatStream, isCorsProxied } from "./chat";
 export { generateImage } from "./image";
 export type { GenerateImageOptions, GenerateImageResult } from "./image";
 export { interpretClick, analyzeImageDataUrl } from "./vision";