feat(engine): merge cloudflare-migration — paradigm D engine, BYOK proxy, story persistence (#95)
Squash-merge the cloudflare-migration branch (7 commits by Kai ki) into staging with conflict resolution, feature integration, and bug fixes. Engine: - Paradigm D: single-stream Writer replacing dual-phase Plan/Beats - Delete Architect agent; story bible generated via Writer <plan> tag - Modular prompt architecture (segments/registry/builder) - StreamRouter for tagged stream splitting (<plan>/<story>/<choices>) Infrastructure: - Cloudflare Workers deployment (wrangler.jsonc, OpenNext adapter) - D1 database schema + Drizzle ORM (scaffolded, not yet active) - R2 storage helpers (scaffolded, not yet active) - Story persistence API routes + client-side persistence BYOK (Bring Your Own Key): - /api/llm/user-proxy with SSRF-protected LLM proxy (+ requireUser auth) - CORS-aware fetch in ai-client: auto-detect CORS failure, fallback to server proxy transparently via OpenAI SDK custom fetch - BYO config support added to classify-freeform and vision routes - SettingsModal CORS privacy notice (keys never logged/stored) SSE streaming: - engineClient.ts: fetchSSE helper for progressive scene events - startSession/requestScene accept optional emit callback - Fix SSE error event field name (error → message) in scene/start routes i18n integration: - Wire buildLanguageDirective into paradigm D's prompt builder - Update corsNotice i18n keys (zh-CN/en/ja) with CORS proxy privacy text - Preserve Session.language + LanguageSwitcher from i18n commit Co-authored-by: Kai ki <155355644+zbf1009@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+175
-7
@@ -1,5 +1,5 @@
|
||||
import OpenAI from "openai";
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import type { ChatStreamResult, ChatStreamUsage, ProviderConfig } from "@infiplot/types";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
export type ChatMessage = {
|
||||
@@ -7,6 +7,75 @@ export type ChatMessage = {
|
||||
content: string;
|
||||
};
|
||||
|
||||
// ── CORS proxy fallback (browser-only) ───────────────────────────────
|
||||
// BYO mode calls providers directly from the browser. When a provider
|
||||
// rejects the preflight (no CORS headers), the first request throws a
|
||||
// TypeError. We cache the blocked host and transparently reroute all
|
||||
// subsequent requests through /api/llm/user-proxy, which forwards
|
||||
// server-side and returns the upstream response (including SSE streams)
|
||||
// byte-for-byte.
|
||||
|
||||
const corsBlockedHosts = new Set<string>();
|
||||
|
||||
export function isCorsProxied(baseUrl: string): boolean {
|
||||
try {
|
||||
return corsBlockedHosts.has(new URL(baseUrl).host);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function proxyFetch(
|
||||
config: ProviderConfig,
|
||||
init?: RequestInit,
|
||||
): Promise<Response> {
|
||||
let body: Record<string, unknown> = {};
|
||||
if (typeof init?.body === "string") {
|
||||
try { body = JSON.parse(init.body); } catch { /* empty */ }
|
||||
}
|
||||
return globalThis.fetch("/api/llm/user-proxy", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
provider: "openai",
|
||||
apiKey: config.apiKey,
|
||||
baseUrl: config.baseUrl,
|
||||
body,
|
||||
model: config.model,
|
||||
stream: body.stream === true,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
function makeCorsAwareFetch(
|
||||
config: ProviderConfig,
|
||||
): (input: string | URL | Request, init?: RequestInit) => Promise<Response> {
|
||||
return async (input, init) => {
|
||||
const url =
|
||||
typeof input === "string" ? input
|
||||
: input instanceof URL ? input.toString()
|
||||
: input.url;
|
||||
|
||||
let host: string;
|
||||
try { host = new URL(url).host; } catch { return globalThis.fetch(input, init); }
|
||||
|
||||
if (corsBlockedHosts.has(host)) {
|
||||
return proxyFetch(config, init);
|
||||
}
|
||||
|
||||
try {
|
||||
return await globalThis.fetch(input, init);
|
||||
} catch (err) {
|
||||
if (err instanceof TypeError) {
|
||||
corsBlockedHosts.add(host);
|
||||
console.warn(`[CORS] ${host} blocked, falling back to server proxy`);
|
||||
return proxyFetch(config, init);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Cache observability for the prompt-prefix caching that the Writer stable
|
||||
// prefix relies on. The OpenAI usage object reports only cached READS
|
||||
// (prompt_tokens_details.cached_tokens) and has no field for cache WRITES
|
||||
@@ -28,6 +97,16 @@ function summarizeSdkUsage(
|
||||
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
|
||||
}
|
||||
|
||||
function makeClient(config: ProviderConfig): OpenAI {
|
||||
return new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
|
||||
maxRetries: 0,
|
||||
dangerouslyAllowBrowser: true,
|
||||
...(typeof window !== "undefined" ? { fetch: makeCorsAwareFetch(config) } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
export async function chat(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
@@ -36,12 +115,7 @@ export async function chat(
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const client = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
|
||||
maxRetries: 0,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
const client = makeClient(config);
|
||||
|
||||
const completion = await client.chat.completions.create({
|
||||
model: config.model,
|
||||
@@ -61,3 +135,97 @@ export async function chat(
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming variant of {@link chat} — the streaming primitive behind
|
||||
* paradigm D. Returns incremental `textStream` chunks plus an end-of-stream
|
||||
* `usage` promise so `summarizeSdkUsage` keeps doing cache accounting.
|
||||
*
|
||||
* Uses the OpenAI SDK's native streaming (`stream: true`) which returns an
|
||||
* async iterable of ChatCompletionChunk. The returned `usage` settles after
|
||||
* the stream drains, so callers should `await result.usage` once iteration
|
||||
* ends.
|
||||
*
|
||||
* Degrade path: if the provider doesn't support streaming, fall back to a
|
||||
* single non-streaming call wrapped as a one-chunk stream so downstream
|
||||
* tag-routing still works — the player loses progressive playback but the
|
||||
* scene generates normally.
|
||||
*/
|
||||
export function chatStream(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts?: {
|
||||
temperature?: number;
|
||||
tag?: string;
|
||||
},
|
||||
): ChatStreamResult {
|
||||
const client = makeClient(config);
|
||||
const tag = opts?.tag ?? "chatStream";
|
||||
const msgPayload = messages.map((m) => ({
|
||||
role: m.role as "system" | "user" | "assistant",
|
||||
content: m.content,
|
||||
}));
|
||||
|
||||
let resolveUsage: (u: ChatStreamUsage | undefined) => void;
|
||||
const usage = new Promise<ChatStreamUsage | undefined>((r) => { resolveUsage = r; });
|
||||
|
||||
const textStream = (async function* (): AsyncIterable<string> {
|
||||
try {
|
||||
const stream = await client.chat.completions.create({
|
||||
model: config.model,
|
||||
messages: msgPayload,
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const delta = chunk.choices[0]?.delta?.content;
|
||||
if (delta) yield delta;
|
||||
|
||||
if (chunk.usage) {
|
||||
const u: ChatStreamUsage = {
|
||||
prompt_tokens: chunk.usage.prompt_tokens,
|
||||
completion_tokens: chunk.usage.completion_tokens,
|
||||
prompt_tokens_details: chunk.usage.prompt_tokens_details
|
||||
? { cached_tokens: chunk.usage.prompt_tokens_details.cached_tokens ?? undefined }
|
||||
: undefined,
|
||||
};
|
||||
console.log(summarizeSdkUsage(tag, chunk.usage));
|
||||
resolveUsage!(u);
|
||||
}
|
||||
}
|
||||
// If usage was never emitted (provider omitted it), resolve undefined.
|
||||
resolveUsage!(undefined);
|
||||
} catch (err) {
|
||||
// Streaming not supported by provider → degrade to buffered call.
|
||||
console.warn(
|
||||
`[chatStream] streaming failed, degrading to non-streaming:`,
|
||||
err,
|
||||
);
|
||||
try {
|
||||
const completion = await client.chat.completions.create({
|
||||
model: config.model,
|
||||
messages: msgPayload,
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
stream: false,
|
||||
});
|
||||
const text = completion.choices[0]?.message?.content ?? "";
|
||||
if (text) yield text;
|
||||
console.log(summarizeSdkUsage(`${tag}:degraded`, completion.usage ?? undefined));
|
||||
resolveUsage!(completion.usage ? {
|
||||
prompt_tokens: completion.usage.prompt_tokens,
|
||||
completion_tokens: completion.usage.completion_tokens,
|
||||
prompt_tokens_details: completion.usage.prompt_tokens_details
|
||||
? { cached_tokens: completion.usage.prompt_tokens_details.cached_tokens ?? undefined }
|
||||
: undefined,
|
||||
} : undefined);
|
||||
} catch (fallbackErr) {
|
||||
resolveUsage!(undefined);
|
||||
throw fallbackErr;
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
return { textStream, usage };
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export { chat } from "./chat";
|
||||
export { chat, chatStream, isCorsProxied } from "./chat";
|
||||
export { generateImage } from "./image";
|
||||
export type { GenerateImageOptions, GenerateImageResult } from "./image";
|
||||
export { interpretClick, analyzeImageDataUrl } from "./vision";
|
||||
|
||||
Reference in New Issue
Block a user