Merge pull request #64 from zonghaoyuan/refactor/settings-modal

feat: add client-side model configuration and server fallback
2026-06-12 22:09:43 +08:00
parent e6004020b5 299df0d098
commit c4ffc16498
18 changed files with 1167 additions and 780 deletions
@@ -1,29 +1,24 @@
-import { generateText } from "ai";
-import type { LanguageModelUsage, ModelMessage } from "ai";
+import OpenAI from "openai";
 import type { ProviderConfig } from "@infiplot/types";
-import { createLanguageModel, resolveProtocol } from "./model";
+import { normalizeBaseUrl } from "./normalizeUrl";

 export type ChatMessage = {
  role: "system" | "user" | "assistant";
  content: string;
 };

-// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
-// so a single shape covers Anthropic, Gemini, and OpenAI-compatible providers.
 function summarizeSdkUsage(
  tag: string,
-  usage: LanguageModelUsage | undefined,
+  usage: OpenAI.Completions.CompletionUsage | undefined,
 ): string {
  if (!usage) return `[cache] ${tag} no-usage`;
-  const input = usage.inputTokens ?? 0;
-  const output = usage.outputTokens ?? 0;
-  const read = usage.inputTokenDetails?.cacheReadTokens;
-  const write = usage.inputTokenDetails?.cacheWriteTokens;
-  if (typeof read === "number" || typeof write === "number") {
-    const hit = read ?? 0;
-    const create = write ?? 0;
-    const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a";
-    return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`;
+  const input = usage.prompt_tokens ?? 0;
+  const output = usage.completion_tokens ?? 0;
+  const details = (usage as { prompt_tokens_details?: { cached_tokens?: number } }).prompt_tokens_details;
+  const cached = details?.cached_tokens;
+  if (typeof cached === "number") {
+    const rate = input > 0 ? ((cached / input) * 100).toFixed(1) : "n/a";
+    return `[cache] ${tag} hit=${cached} input=${input} rate=${rate}% completion=${output}`;
  }
  return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
 }
@@ -36,28 +31,28 @@ export async function chat(
    tag?: string;
  },
 ): Promise<string> {
-  const protocol = resolveProtocol(config);
-  const model = createLanguageModel(config, protocol);
-
-  const system = messages.find((m) => m.role === "system")?.content;
-  const convo: ModelMessage[] = messages
-    .filter((m) => m.role !== "system")
-    .map((m) => ({
-      role: m.role as "user" | "assistant",
-      content: m.content,
-    }));
-
-  const { text, usage } = await generateText({
-    model,
-    system,
-    messages: convo,
-    temperature: opts?.temperature ?? 0.9,
+  const client = new OpenAI({
+    apiKey: config.apiKey,
+    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
+    maxRetries: 0,
+    dangerouslyAllowBrowser: true,
  });

-  console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage));
+  const completion = await client.chat.completions.create({
+    model: config.model,
+    messages: messages.map((m) => ({
+      role: m.role as "system" | "user" | "assistant",
+      content: m.content,
+    })),
+    temperature: opts?.temperature ?? 0.9,
+    stream: false,
+  });

-  if (typeof text !== "string" || text.length === 0) {
-    throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`);
+  const text = completion.choices[0]?.message?.content ?? "";
+  console.log(summarizeSdkUsage(opts?.tag ?? "chat", completion.usage ?? undefined));
+
+  if (text.length === 0) {
+    throw new Error(`Chat API returned no content.`);
  }
  return text;
 }
@@ -1,6 +1,4 @@
-import { generateImage as generateImageSdk } from "ai";
-import { createOpenAI } from "@ai-sdk/openai";
-import { createGoogleGenerativeAI } from "@ai-sdk/google";
+import OpenAI, { toFile, type Uploadable } from "openai";
 import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types";
 import { fetchWithRetry } from "./fetchWithRetry";
 import { normalizeBaseUrl } from "./normalizeUrl";
@@ -48,8 +46,8 @@ export type GenerateImageOptions = {
  /**
   * Reference images (UUIDs, URLs, or base64) to condition generation on —
   * typically character portraits + the prior scene image. Runware caps at 4;
-   * we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these
-   * map to `prompt.images` (the SDK accepts public URLs or data URLs).
+   * we silently truncate beyond that. On the native OpenAI path these are
+   * fetched/decoded and sent to `images.edit`.
   */
  referenceImages?: string[];
  /** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
@@ -58,7 +56,7 @@ export type GenerateImageOptions = {
   * Output aspect, locked per session. "portrait" → 9:16 vertical for mobile;
   * default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest
   * supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792,
-   * native gpt-image 1024x1536, Gemini aspectRatio 9:16.
+   * native gpt-image 1024x1536.
   */
  orientation?: Orientation;
 };
@@ -66,8 +64,8 @@ export type GenerateImageOptions = {
 export type GenerateImageResult = {
  /**
   * Image the client can render directly. A Runware CDN URL on the Runware
-   * path; a `data:<mime>;base64,...` URI on the AI SDK paths (OpenAI/Gemini
-   * return raw bytes, not a hosted URL).
+   * path; a `data:<mime>;base64,...` URI on the native OpenAI path when GPT
+   * image models return raw bytes instead of a hosted URL.
   */
  imageUrl: string;
  /**
@@ -117,63 +115,124 @@ export async function generateImage(
  const protocol = resolveImageProtocol(config);
  switch (protocol) {
    case "openai":
-    case "google":
-      return generateImageViaAiSdk(config, prompt, options, protocol);
+      return generateImageOpenAi(config, prompt, options);
    case "runware":
      return generateImageRunware(config, prompt, options);
-    case "anthropic":
-      throw new Error(
-        'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".',
-      );
    case "openai_compatible":
    default:
      return generateImageOpenAiCompatible(config, prompt, options);
  }
 }

-// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK.
-// Unlike the fetch path, this supports reference-image editing via
-// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the
-// client a data URI and synthesize a UUID; continuity references reuse the
-// data URI rather than a provider UUID.
-async function generateImageViaAiSdk(
+// Native OpenAI (gpt-image) via the official OpenAI SDK. Unlike the compatible
+// fetch path, this supports reference-image editing through `images.edit`.
+// GPT image models return raw bytes, so we hand the client a data URI and
+// synthesize a UUID; continuity references reuse the data URI rather than a
+// provider UUID.
+async function generateImageOpenAi(
  config: ProviderConfig,
  prompt: string,
-  options: GenerateImageOptions | undefined,
-  protocol: "openai" | "google",
+  options?: GenerateImageOptions,
 ): Promise<GenerateImageResult> {
-  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
-  const imageModel =
-    protocol === "openai"
-      ? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model)
-      : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image(
-          config.model,
-        );
-
-  const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
-  const promptArg =
-    refs.length > 0 ? { text: prompt, images: refs } : prompt;
-
-  // Session-locked aspect. gpt-image takes an explicit `size` (portrait /
-  // landscape options are 1024x1536 / 1536x1024); Gemini takes an `aspectRatio`.
-  const portrait = options?.orientation === "portrait";
-  const { image } = await generateImageSdk({
-    model: imageModel,
-    prompt: promptArg,
-    ...(protocol === "openai"
-      ? { size: (portrait ? "1024x1536" : "1536x1024") as `${number}x${number}` }
-      : { aspectRatio: (portrait ? "9:16" : "16:9") as `${number}:${number}` }),
+  const client = new OpenAI({
+    apiKey: config.apiKey,
+    baseURL: normalizeBaseUrl(config.baseUrl, "openai"),
+    maxRetries: 2,
+    dangerouslyAllowBrowser: true,
  });
+  const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
+  const portrait = options?.orientation === "portrait";
+  const size = portrait ? "1024x1536" : "1536x1024";

-  return {
-    imageUrl: `data:${image.mediaType};base64,${image.base64}`,
-    imageUuid: crypto.randomUUID(),
-  };
+  const response =
+    refs.length > 0
+      ? await client.images.edit({
+          model: config.model,
+          prompt,
+          image: await Promise.all(refs.map(referenceImageToUploadable)),
+          n: 1,
+          size,
+        })
+      : await client.images.generate({
+          model: config.model,
+          prompt,
+          n: 1,
+          size,
+        });
+
+  return imageResponseToResult(response);
+}
+
+async function referenceImageToUploadable(ref: string): Promise<Uploadable> {
+  if (ref.startsWith("data:")) {
+    const response = await fetch(ref);
+    if (!response.ok) {
+      throw new Error(`Failed to read data URL reference image.`);
+    }
+    const mediaType = response.headers.get("content-type") ?? "image/png";
+    return toFile(response, `reference.${extensionFromMediaType(mediaType)}`, {
+      type: mediaType,
+    });
+  }
+
+  if (/^https?:\/\//i.test(ref)) {
+    const response = await fetch(ref);
+    if (!response.ok) {
+      throw new Error(
+        `Failed to fetch reference image ${ref}: HTTP ${response.status}`,
+      );
+    }
+    const mediaType = response.headers.get("content-type") ?? "image/png";
+    return toFile(response, filenameFromUrl(ref, mediaType), {
+      type: mediaType,
+    });
+  }
+
+  throw new Error(
+    `Native OpenAI image editing requires reference image URLs or data URLs; got "${ref.slice(0, 32)}...".`,
+  );
+}
+
+function imageResponseToResult(
+  response: OpenAI.Images.ImagesResponse,
+): GenerateImageResult {
+  const data = response.data?.[0];
+  const b64 = data?.b64_json;
+  if (b64) {
+    const format = response.output_format ?? "png";
+    return {
+      imageUrl: `data:image/${format};base64,${b64}`,
+      imageUuid: crypto.randomUUID(),
+    };
+  }
+
+  const imageUrl = data?.url;
+  if (imageUrl) {
+    return { imageUrl, imageUuid: crypto.randomUUID() };
+  }
+
+  throw new Error(`No image data in OpenAI response.`);
+}
+
+function filenameFromUrl(url: string, mediaType: string): string {
+  try {
+    const name = new URL(url).pathname.split("/").filter(Boolean).at(-1);
+    if (name && /\.[a-z0-9]+$/i.test(name)) return name;
+  } catch {
+    // Fall back to the media type below.
+  }
+  return `reference.${extensionFromMediaType(mediaType)}`;
+}
+
+function extensionFromMediaType(mediaType: string): string {
+  if (mediaType.includes("jpeg") || mediaType.includes("jpg")) return "jpg";
+  if (mediaType.includes("webp")) return "webp";
+  return "png";
 }

 // OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
 // text-to-image only — no reference images on this path; for editing/anchoring
-// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above.
+// set IMAGE_PROVIDER=openai to take the native OpenAI path above.
 async function generateImageOpenAiCompatible(
  config: ProviderConfig,
  prompt: string,
@@ -1,23 +0,0 @@
-import { createAnthropic } from "@ai-sdk/anthropic";
-import { createGoogleGenerativeAI } from "@ai-sdk/google";
-import { createOpenAI } from "@ai-sdk/openai";
-import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
-import { normalizeBaseUrl } from "./normalizeUrl";
-
-export function resolveProtocol(config: ProviderConfig): ProviderProtocol {
-  return config.provider ?? "openai_compatible";
-}
-
-export function createLanguageModel(config: ProviderConfig, protocol: ProviderProtocol) {
-  const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
-  switch (protocol) {
-    case "anthropic":
-      return createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
-    case "google":
-      return createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
-    case "openai_compatible":
-    case "openai":
-    default:
-      return createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
-  }
-}
@@ -31,8 +31,6 @@ const ENDPOINT_SUFFIX =
 const DEFAULT_VERSION_SEGMENT: Record<ProviderProtocol, string | null> = {
  openai_compatible: "v1",
  openai: "v1",
-  anthropic: "v1",
-  google: "v1beta",
  // Runware posts to the bare base URL with no version-pathed sub-resource,
  // so never inject a segment for it.
  runware: null,
@@ -1,7 +1,6 @@
-import { generateText } from "ai";
-import type { ModelMessage } from "ai";
+import OpenAI from "openai";
 import type { ProviderConfig } from "@infiplot/types";
-import { createLanguageModel, resolveProtocol } from "./model";
+import { normalizeBaseUrl } from "./normalizeUrl";

 const VISION_TIMEOUT_MS = 60_000;

@@ -22,34 +21,32 @@ export async function analyzeImageDataUrl(
  imageDataUrl: string,
  prompt: string,
 ): Promise<string> {
-  const protocol = resolveProtocol(config);
-  const model = createLanguageModel(config, protocol);
+  const client = new OpenAI({
+    apiKey: config.apiKey,
+    baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
+    maxRetries: 0,
+    timeout: VISION_TIMEOUT_MS,
+    dangerouslyAllowBrowser: true,
+  });

-  const messages: ModelMessage[] = [
-    {
-      role: "user",
-      content: [
-        { type: "text", text: prompt },
-        { type: "image", image: imageDataUrl },
-      ],
-    },
-  ];
+  const completion = await client.chat.completions.create({
+    model: config.model,
+    messages: [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: prompt },
+          { type: "image_url", image_url: { url: imageDataUrl } },
+        ],
+      },
+    ],
+    temperature: 0.2,
+    stream: false,
+  });

-  const timeoutCtrl = new AbortController();
-  const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
-  try {
-    const { text } = await generateText({
-      model,
-      messages,
-      temperature: 0.2,
-      maxRetries: 0,
-      abortSignal: timeoutCtrl.signal,
-    });
-    if (typeof text !== "string" || text.length === 0) {
-      throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`);
-    }
-    return text;
-  } finally {
-    clearTimeout(timeoutId);
+  const text = completion.choices[0]?.message?.content ?? "";
+  if (text.length === 0) {
+    throw new Error(`Vision API returned no content.`);
  }
+  return text;
 }
@@ -0,0 +1,160 @@
+import type { EngineConfig, ProviderProtocol } from "@infiplot/types";
+
+// Bring-your-own model keys — stored CLIENT-SIDE ONLY.
+//
+// When a user supplies their own text/image/vision API credentials, we persist
+// them in localStorage and the browser talks to providers directly. The keys
+// are therefore never sent to our server: no request body, no header, no log.
+
+const STORAGE_KEY = "infiplot:model";
+
+const VALID_PROTOCOLS: ProviderProtocol[] = [
+  "openai_compatible",
+  "openai",
+  "runware",
+];
+
+export type StoredModelConfig = {
+  textBaseUrl: string;
+  textApiKey: string;
+  textModel: string;
+  textProvider?: ProviderProtocol;
+  imageBaseUrl: string;
+  imageApiKey: string;
+  imageModel: string;
+  imageProvider?: ProviderProtocol;
+  visionBaseUrl: string;
+  visionApiKey: string;
+  visionModel: string;
+  visionProvider?: ProviderProtocol;
+};
+
+function isValidProtocol(p: string): p is ProviderProtocol {
+  return (VALID_PROTOCOLS as readonly string[]).includes(p);
+}
+
+function readProtocol(raw: unknown): ProviderProtocol | undefined {
+  if (typeof raw === "string" && isValidProtocol(raw)) return raw;
+  return undefined;
+}
+
+/** Read + validate the persisted model config. Returns null when running on the
+ *  server, when nothing is stored, on parse failure, or when required fields are
+ *  missing. */
+export function readStoredModelConfig(): StoredModelConfig | null {
+  if (typeof window === "undefined") return null;
+  try {
+    const raw = window.localStorage.getItem(STORAGE_KEY);
+    if (!raw) return null;
+    const parsed = JSON.parse(raw) as Partial<StoredModelConfig>;
+
+    const textBaseUrl = typeof parsed.textBaseUrl === "string" ? parsed.textBaseUrl.trim() : "";
+    const textApiKey = typeof parsed.textApiKey === "string" ? parsed.textApiKey.trim() : "";
+    const textModel = typeof parsed.textModel === "string" ? parsed.textModel.trim() : "";
+    const imageBaseUrl = typeof parsed.imageBaseUrl === "string" ? parsed.imageBaseUrl.trim() : "";
+    const imageApiKey = typeof parsed.imageApiKey === "string" ? parsed.imageApiKey.trim() : "";
+    const imageModel = typeof parsed.imageModel === "string" ? parsed.imageModel.trim() : "";
+    const visionBaseUrl = typeof parsed.visionBaseUrl === "string" ? parsed.visionBaseUrl.trim() : "";
+    const visionApiKey = typeof parsed.visionApiKey === "string" ? parsed.visionApiKey.trim() : "";
+    const visionModel = typeof parsed.visionModel === "string" ? parsed.visionModel.trim() : "";
+
+    if (
+      !textBaseUrl ||
+      !textApiKey ||
+      !textModel ||
+      !imageBaseUrl ||
+      !imageApiKey ||
+      !imageModel ||
+      !visionBaseUrl ||
+      !visionApiKey ||
+      !visionModel
+    ) {
+      return null;
+    }
+
+    return {
+      textBaseUrl,
+      textApiKey,
+      textModel,
+      textProvider: readProtocol(parsed.textProvider),
+      imageBaseUrl,
+      imageApiKey,
+      imageModel,
+      imageProvider: readProtocol(parsed.imageProvider),
+      visionBaseUrl,
+      visionApiKey,
+      visionModel,
+      visionProvider: readProtocol(parsed.visionProvider),
+    };
+  } catch {
+    return null;
+  }
+}
+
+/** Persist the model config. Trims all string fields so trailing whitespace
+ *  from pastes never breaks headers. */
+export function writeStoredModelConfig(config: StoredModelConfig): void {
+  if (typeof window === "undefined") return;
+  try {
+    const payload: StoredModelConfig = {
+      textBaseUrl: config.textBaseUrl.trim(),
+      textApiKey: config.textApiKey.trim(),
+      textModel: config.textModel.trim(),
+      textProvider: config.textProvider,
+      imageBaseUrl: config.imageBaseUrl.trim(),
+      imageApiKey: config.imageApiKey.trim(),
+      imageModel: config.imageModel.trim(),
+      imageProvider: config.imageProvider,
+      visionBaseUrl: config.visionBaseUrl.trim(),
+      visionApiKey: config.visionApiKey.trim(),
+      visionModel: config.visionModel.trim(),
+      visionProvider: config.visionProvider,
+    };
+    window.localStorage.setItem(STORAGE_KEY, JSON.stringify(payload));
+  } catch {
+    // Storage disabled / quota / private mode — BYO simply stays off.
+  }
+}
+
+export function clearStoredModelConfig(): void {
+  if (typeof window === "undefined") return;
+  try {
+    window.localStorage.removeItem(STORAGE_KEY);
+  } catch {
+    // ignore
+  }
+}
+
+/** Build a full EngineConfig from stored model config + optional TTS config.
+ *  Throws when model config is missing so callers can surface a friendly
+ *  "please configure" message. */
+export function resolveEngineConfig(
+  model: StoredModelConfig | null,
+  tts: import("@infiplot/types").TtsConfig | null,
+): EngineConfig {
+  if (!model) {
+    throw new Error("模型配置未设置。请返回首页，点击「模型设置」配置 API 参数。");
+  }
+  return {
+    text: {
+      baseUrl: model.textBaseUrl,
+      apiKey: model.textApiKey,
+      model: model.textModel,
+      provider: model.textProvider,
+    },
+    image: {
+      baseUrl: model.imageBaseUrl,
+      apiKey: model.imageApiKey,
+      model: model.imageModel,
+      provider: model.imageProvider,
+    },
+    vision: {
+      baseUrl: model.visionBaseUrl,
+      apiKey: model.visionApiKey,
+      model: model.visionModel,
+      provider: model.visionProvider,
+    },
+    tts: tts ?? undefined,
+    mockImage: false,
+  };
+}
@@ -6,8 +6,6 @@ import type {

 const VALID_PROTOCOLS = [
  "openai_compatible",
-  "anthropic",
-  "google",
  "openai",
  "runware",
 ] as const;
@@ -3,8 +3,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair";
 // Strict-then-forgiving JSON parser for LLM output. Tries in order:
 //   1. Direct JSON.parse on the trimmed text.
 //   2. Extract from ```json``` fenced block.
-//   3. Slice between first { and last } and parse.
-//   4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
+//   3. Parse the first complete JSON value prefix (handles duplicated objects).
+//   4. Slice between first { and last } and parse.
+//   5. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
 //
 // On final failure, logs the first 800 chars of the raw model output so we
 // can diagnose the actual syntax error without flooding logs or leaking
@@ -40,6 +41,67 @@ function preRepair(s: string): string {
  return s.replace(/"([^"\n:]+):(\s+)"/g, '"$1":$2"');
 }

+function firstJsonStart(s: string): number {
+  const objectStart = s.indexOf("{");
+  const arrayStart = s.indexOf("[");
+  if (objectStart === -1) return arrayStart;
+  if (arrayStart === -1) return objectStart;
+  return Math.min(objectStart, arrayStart);
+}
+
+function firstCompleteJsonValue(s: string): string | undefined {
+  const start = firstJsonStart(s);
+  if (start === -1) return undefined;
+
+  const stack: string[] = [];
+  let inString = false;
+  let escaped = false;
+
+  for (let i = start; i < s.length; i += 1) {
+    const ch = s[i]!;
+
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch === "\\") {
+        escaped = true;
+      } else if (ch === "\"") {
+        inString = false;
+      }
+      continue;
+    }
+
+    if (ch === "\"") {
+      inString = true;
+      continue;
+    }
+
+    if (ch === "{") {
+      stack.push("}");
+      continue;
+    }
+
+    if (ch === "[") {
+      stack.push("]");
+      continue;
+    }
+
+    if (ch === "}" || ch === "]") {
+      if (stack.at(-1) !== ch) return undefined;
+      stack.pop();
+      if (stack.length === 0) return s.slice(start, i + 1);
+    }
+  }
+
+  return undefined;
+}
+
+function parseFirstCompleteJsonValue<T>(s: string): T | undefined {
+  const value = firstCompleteJsonValue(s);
+  if (!value) return undefined;
+  return JSON.parse(value) as T;
+}
+
 export function parseJsonLoose<T>(raw: string): T {
  const trimmed = raw.trim();

@@ -54,10 +116,22 @@ export function parseJsonLoose<T>(raw: string): T {
    try {
      return JSON.parse(fenced[1]) as T;
    } catch {
-      // fall through
+      try {
+        const parsed = parseFirstCompleteJsonValue<T>(fenced[1]);
+        if (parsed !== undefined) return parsed;
+      } catch {
+        // fall through
+      }
    }
  }

+  try {
+    const parsed = parseFirstCompleteJsonValue<T>(trimmed);
+    if (parsed !== undefined) return parsed;
+  } catch {
+    // fall through
+  }
+
  const first = trimmed.indexOf("{");
  const last = trimmed.lastIndexOf("}");
  const slice =
@@ -0,0 +1,101 @@
+import {
+  startSession as startSessionClient,
+  requestScene as requestSceneClient,
+  visionDecide as visionDecideClient,
+  classifyFreeform as classifyFreeformClient,
+  requestInsertBeat as requestInsertBeatClient,
+} from "@infiplot/engine";
+import {
+  readStoredModelConfig,
+  resolveEngineConfig,
+} from "@/lib/clientModelConfig";
+import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
+import type {
+  FreeformClassifyRequest,
+  FreeformClassifyResponse,
+  EngineConfig,
+  InsertBeatRequest,
+  InsertBeatResponse,
+  SceneRequest,
+  SceneResponse,
+  StartRequest,
+  StartResponse,
+  VisionRequest,
+  VisionResponse,
+} from "@infiplot/types";
+
+function getClientConfig(): EngineConfig | null {
+  const modelCfg = readStoredModelConfig();
+  const ttsCfg = loadClientTtsConfig();
+  if (!modelCfg) return null;
+  return resolveEngineConfig(modelCfg, ttsCfg);
+}
+
+async function postJson<T>(path: string, body: unknown): Promise<T> {
+  const res = await fetch(path, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) {
+    let message = `HTTP ${res.status}`;
+    try {
+      const data = (await res.json()) as { error?: string };
+      if (data.error) message = data.error;
+    } catch {
+      // ignore parse failure, keep HTTP status message
+    }
+    throw new Error(message);
+  }
+  return res.json() as Promise<T>;
+}
+
+// ── Unified entry points ───────────────────────────────────────────────
+// When the browser has a BYO model config in localStorage, these call the
+// client-side engine directly (talking to providers from the browser).
+// Otherwise they fall back to the server-side API routes, which read
+// environment variables — useful for Vercel deploys that already supply keys.
+
+export async function startSession(req: StartRequest): Promise<StartResponse> {
+  const config = getClientConfig();
+  if (config) {
+    return startSessionClient(config, req);
+  }
+  return postJson<StartResponse>("/api/start", req);
+}
+
+export async function requestScene(req: SceneRequest): Promise<SceneResponse> {
+  const config = getClientConfig();
+  if (config) {
+    return requestSceneClient(config, req);
+  }
+  return postJson<SceneResponse>("/api/scene", req);
+}
+
+export async function visionDecide(req: VisionRequest): Promise<VisionResponse> {
+  const config = getClientConfig();
+  if (config) {
+    return visionDecideClient(config, req);
+  }
+  return postJson<VisionResponse>("/api/vision", req);
+}
+
+export async function classifyFreeform(
+  req: FreeformClassifyRequest,
+): Promise<FreeformClassifyResponse> {
+  const config = getClientConfig();
+  if (config) {
+    return classifyFreeformClient(config, req);
+  }
+  return postJson<FreeformClassifyResponse>("/api/classify-freeform", req);
+}
+
+export async function requestInsertBeat(
+  req: InsertBeatRequest,
+): Promise<InsertBeatResponse> {
+  const config = getClientConfig();
+  if (config) {
+    return requestInsertBeatClient(config, req);
+  }
+  return postJson<InsertBeatResponse>("/api/insert-beat", req);
+}
@@ -0,0 +1,11 @@
+export const STYLE_EXTRACTION_PROMPT = `You are a senior concept artist helping describe an image's visual style so that a text-to-image diffusion model (FLUX) can reproduce the same aesthetic on different subjects.
+
+Look at the attached image and produce a single English style-prompt string that captures ONLY its visual style — NOT its subject matter. Focus on:
+- Medium / technique (e.g., watercolor, oil painting, cel-shaded anime, 3D render, pixel art)
+- Line work and rendering (sharp ink outlines, soft shading, painterly brushstrokes, flat colors)
+- Color palette and lighting (pastel, saturated, monochrome, warm golden-hour, cool neon, high contrast)
+- Mood and atmosphere (dreamy, melancholic, cinematic, nostalgic, gritty)
+- Any recognizable artistic influence (Ghibli, Makoto Shinkai, ukiyo-e, vaporwave, cyberpunk anime, etc.)
+
+Do NOT describe the characters, objects, or scene contents. Output exactly one JSON object:
+{"stylePrompt": "<comma-separated English visual-style attributes, ~30-60 words>"}`;
@@ -8,6 +8,16 @@ import type { CharacterVoice, TtsConfig } from "@infiplot/types";
 // top-N candidates so multiple similar characters don't collapse onto the
 // same voice. Provision is a pure function — no network call needed.

+function arrayBufferToBase64(buffer: ArrayBuffer): string {
+  const bytes = new Uint8Array(buffer);
+  let binary = "";
+  const len = bytes.byteLength;
+  for (let i = 0; i < len; i++) {
+    binary += String.fromCharCode(bytes[i]!);
+  }
+  return btoa(binary);
+}
+
 const OUTPUT_FORMAT = "mp3";
 const OUTPUT_MIME = "audio/mpeg";

@@ -183,8 +193,6 @@ export async function stepfunSynthesize(
  }

  const ab = await res.arrayBuffer();
-  // Buffer is fine here — TTS routes run on runtime="nodejs". Falls back to
-  // btoa+chunks if we ever target Edge.
-  const audioBase64 = Buffer.from(ab).toString("base64");
+  const audioBase64 = arrayBufferToBase64(ab);
  return { audioBase64, mimeType: OUTPUT_MIME };
 }
@@ -327,19 +327,15 @@ export type VisionClassify = "insert-beat" | "change-scene";
 *   openai_compatible  text / vision / image  — OpenAI Chat Completions +
 *                      `/images/generations` (self-implemented fetch; the
 *                      default for text/vision when unset)
- *   anthropic          text / vision          — native Anthropic Messages (AI SDK)
- *   google             text / vision / image  — native Gemini (AI SDK); image
- *                      uses the Nano Banana family
- *   openai             image only             — OpenAI gpt-image via AI SDK,
- *                      unlocks reference-image editing (for text/vision use
- *                      openai_compatible, which already speaks OpenAI's format)
+ *   openai             image only             — OpenAI gpt-image via the
+ *                      official OpenAI SDK, unlocks reference-image editing
+ *                      (for text/vision use openai_compatible, which already
+ *                      speaks OpenAI's format)
 *   runware            image only             — Runware task-array protocol
 *                      (self-implemented; the default for runware.ai URLs)
 */
 export type ProviderProtocol =
  | "openai_compatible"
-  | "anthropic"
-  | "google"
  | "openai"
  | "runware";