Files
Zonghao Yuan 0e4c2ebef4 feat(engine): merge cloudflare-migration — paradigm D engine, BYOK proxy, story persistence (#95)
Squash-merge the cloudflare-migration branch (7 commits by Kai ki) into
staging with conflict resolution, feature integration, and bug fixes.

Engine:
- Paradigm D: single-stream Writer replacing dual-phase Plan/Beats
- Delete Architect agent; story bible generated via Writer <plan> tag
- Modular prompt architecture (segments/registry/builder)
- StreamRouter for tagged stream splitting (<plan>/<story>/<choices>)

Infrastructure:
- Cloudflare Workers deployment (wrangler.jsonc, OpenNext adapter)
- D1 database schema + Drizzle ORM (scaffolded, not yet active)
- R2 storage helpers (scaffolded, not yet active)
- Story persistence API routes + client-side persistence

BYOK (Bring Your Own Key):
- /api/llm/user-proxy with SSRF-protected LLM proxy (+ requireUser auth)
- CORS-aware fetch in ai-client: auto-detect CORS failure, fallback to
  server proxy transparently via OpenAI SDK custom fetch
- BYO config support added to classify-freeform and vision routes
- SettingsModal CORS privacy notice (keys never logged/stored)

SSE streaming:
- engineClient.ts: fetchSSE helper for progressive scene events
- startSession/requestScene accept optional emit callback
- Fix SSE error event field name (error → message) in scene/start routes

i18n integration:
- Wire buildLanguageDirective into paradigm D's prompt builder
- Update corsNotice i18n keys (zh-CN/en/ja) with CORS proxy privacy text
- Preserve Session.language + LanguageSwitcher from i18n commit

Co-authored-by: Kai ki <155355644+zbf1009@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-18 18:05:38 +08:00

232 lines
7.7 KiB
TypeScript

import OpenAI from "openai";
import type { ChatStreamResult, ChatStreamUsage, ProviderConfig } from "@infiplot/types";
import { normalizeBaseUrl } from "./normalizeUrl";
export type ChatMessage = {
role: "system" | "user" | "assistant";
content: string;
};
// ── CORS proxy fallback (browser-only) ───────────────────────────────
// BYO mode calls providers directly from the browser. When a provider
// rejects the preflight (no CORS headers), the first request throws a
// TypeError. We cache the blocked host and transparently reroute all
// subsequent requests through /api/llm/user-proxy, which forwards
// server-side and returns the upstream response (including SSE streams)
// byte-for-byte.
const corsBlockedHosts = new Set<string>();
export function isCorsProxied(baseUrl: string): boolean {
try {
return corsBlockedHosts.has(new URL(baseUrl).host);
} catch {
return false;
}
}
function proxyFetch(
config: ProviderConfig,
init?: RequestInit,
): Promise<Response> {
let body: Record<string, unknown> = {};
if (typeof init?.body === "string") {
try { body = JSON.parse(init.body); } catch { /* empty */ }
}
return globalThis.fetch("/api/llm/user-proxy", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
provider: "openai",
apiKey: config.apiKey,
baseUrl: config.baseUrl,
body,
model: config.model,
stream: body.stream === true,
}),
});
}
function makeCorsAwareFetch(
config: ProviderConfig,
): (input: string | URL | Request, init?: RequestInit) => Promise<Response> {
return async (input, init) => {
const url =
typeof input === "string" ? input
: input instanceof URL ? input.toString()
: input.url;
let host: string;
try { host = new URL(url).host; } catch { return globalThis.fetch(input, init); }
if (corsBlockedHosts.has(host)) {
return proxyFetch(config, init);
}
try {
return await globalThis.fetch(input, init);
} catch (err) {
if (err instanceof TypeError) {
corsBlockedHosts.add(host);
console.warn(`[CORS] ${host} blocked, falling back to server proxy`);
return proxyFetch(config, init);
}
throw err;
}
};
}
// Cache observability for the prompt-prefix caching that the Writer stable
// prefix relies on. The OpenAI usage object reports only cached READS
// (prompt_tokens_details.cached_tokens) and has no field for cache WRITES
// (tokens written to the cache on a cold pass), so unlike the old AI SDK
// path we can show the hit rate but not the create cost. cached_tokens lives
// directly on the SDK's CompletionUsage type — no cast needed.
function summarizeSdkUsage(
tag: string,
usage: OpenAI.Completions.CompletionUsage | undefined,
): string {
if (!usage) return `[cache] ${tag} no-usage`;
const input = usage.prompt_tokens ?? 0;
const output = usage.completion_tokens ?? 0;
const cached = usage.prompt_tokens_details?.cached_tokens;
if (typeof cached === "number") {
const rate = input > 0 ? ((cached / input) * 100).toFixed(1) : "n/a";
return `[cache] ${tag} hit=${cached} input=${input} rate=${rate}% completion=${output}`;
}
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
}
function makeClient(config: ProviderConfig): OpenAI {
return new OpenAI({
apiKey: config.apiKey,
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
maxRetries: 0,
dangerouslyAllowBrowser: true,
...(typeof window !== "undefined" ? { fetch: makeCorsAwareFetch(config) } : {}),
});
}
export async function chat(
config: ProviderConfig,
messages: ChatMessage[],
opts?: {
temperature?: number;
tag?: string;
},
): Promise<string> {
const client = makeClient(config);
const completion = await client.chat.completions.create({
model: config.model,
messages: messages.map((m) => ({
role: m.role as "system" | "user" | "assistant",
content: m.content,
})),
temperature: opts?.temperature ?? 0.9,
stream: false,
});
const text = completion.choices[0]?.message?.content ?? "";
console.log(summarizeSdkUsage(opts?.tag ?? "chat", completion.usage ?? undefined));
if (text.length === 0) {
throw new Error(`Chat API returned no content.`);
}
return text;
}
/**
* Streaming variant of {@link chat} — the streaming primitive behind
* paradigm D. Returns incremental `textStream` chunks plus an end-of-stream
* `usage` promise so `summarizeSdkUsage` keeps doing cache accounting.
*
* Uses the OpenAI SDK's native streaming (`stream: true`) which returns an
* async iterable of ChatCompletionChunk. The returned `usage` settles after
* the stream drains, so callers should `await result.usage` once iteration
* ends.
*
* Degrade path: if the provider doesn't support streaming, fall back to a
* single non-streaming call wrapped as a one-chunk stream so downstream
* tag-routing still works — the player loses progressive playback but the
* scene generates normally.
*/
export function chatStream(
config: ProviderConfig,
messages: ChatMessage[],
opts?: {
temperature?: number;
tag?: string;
},
): ChatStreamResult {
const client = makeClient(config);
const tag = opts?.tag ?? "chatStream";
const msgPayload = messages.map((m) => ({
role: m.role as "system" | "user" | "assistant",
content: m.content,
}));
let resolveUsage: (u: ChatStreamUsage | undefined) => void;
const usage = new Promise<ChatStreamUsage | undefined>((r) => { resolveUsage = r; });
const textStream = (async function* (): AsyncIterable<string> {
try {
const stream = await client.chat.completions.create({
model: config.model,
messages: msgPayload,
temperature: opts?.temperature ?? 0.9,
stream: true,
stream_options: { include_usage: true },
});
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta?.content;
if (delta) yield delta;
if (chunk.usage) {
const u: ChatStreamUsage = {
prompt_tokens: chunk.usage.prompt_tokens,
completion_tokens: chunk.usage.completion_tokens,
prompt_tokens_details: chunk.usage.prompt_tokens_details
? { cached_tokens: chunk.usage.prompt_tokens_details.cached_tokens ?? undefined }
: undefined,
};
console.log(summarizeSdkUsage(tag, chunk.usage));
resolveUsage!(u);
}
}
// If usage was never emitted (provider omitted it), resolve undefined.
resolveUsage!(undefined);
} catch (err) {
// Streaming not supported by provider → degrade to buffered call.
console.warn(
`[chatStream] streaming failed, degrading to non-streaming:`,
err,
);
try {
const completion = await client.chat.completions.create({
model: config.model,
messages: msgPayload,
temperature: opts?.temperature ?? 0.9,
stream: false,
});
const text = completion.choices[0]?.message?.content ?? "";
if (text) yield text;
console.log(summarizeSdkUsage(`${tag}:degraded`, completion.usage ?? undefined));
resolveUsage!(completion.usage ? {
prompt_tokens: completion.usage.prompt_tokens,
completion_tokens: completion.usage.completion_tokens,
prompt_tokens_details: completion.usage.prompt_tokens_details
? { cached_tokens: completion.usage.prompt_tokens_details.cached_tokens ?? undefined }
: undefined,
} : undefined);
} catch (fallbackErr) {
resolveUsage!(undefined);
throw fallbackErr;
}
}
})();
return { textStream, usage };
}