From 2088bae311f194f94477ae64f8aa109ea4f088cc Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 10:15:40 +0800 Subject: [PATCH 01/13] fix(tts): replace Buffer.from with browser-compatible arrayBufferToBase64 in stepfun Signed-off-by: baizhi958216 <1475289190@qq.com> --- lib/tts-client/stepfun.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/tts-client/stepfun.ts b/lib/tts-client/stepfun.ts index dac5882..37ce8c0 100644 --- a/lib/tts-client/stepfun.ts +++ b/lib/tts-client/stepfun.ts @@ -8,6 +8,16 @@ import type { CharacterVoice, TtsConfig } from "@infiplot/types"; // top-N candidates so multiple similar characters don't collapse onto the // same voice. Provision is a pure function — no network call needed. +function arrayBufferToBase64(buffer: ArrayBuffer): string { + const bytes = new Uint8Array(buffer); + let binary = ""; + const len = bytes.byteLength; + for (let i = 0; i < len; i++) { + binary += String.fromCharCode(bytes[i]); + } + return btoa(binary); +} + const OUTPUT_FORMAT = "mp3"; const OUTPUT_MIME = "audio/mpeg"; @@ -183,8 +193,6 @@ export async function stepfunSynthesize( } const ab = await res.arrayBuffer(); - // Buffer is fine here — TTS routes run on runtime="nodejs". Falls back to - // btoa+chunks if we ever target Edge. - const audioBase64 = Buffer.from(ab).toString("base64"); + const audioBase64 = arrayBufferToBase64(ab); return { audioBase64, mimeType: OUTPUT_MIME }; } From a2dd5ad630ce9a77aebe7b1bbf4cf77530d16923 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 10:16:52 +0800 Subject: [PATCH 02/13] feat(config): add client-side model config storage and EngineConfig resolver Signed-off-by: baizhi958216 <1475289190@qq.com> --- lib/clientModelConfig.ts | 162 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 lib/clientModelConfig.ts diff --git a/lib/clientModelConfig.ts b/lib/clientModelConfig.ts new file mode 100644 index 0000000..febce55 --- /dev/null +++ b/lib/clientModelConfig.ts @@ -0,0 +1,162 @@ +import type { EngineConfig, ProviderProtocol } from "@infiplot/types"; + +// Bring-your-own model keys — stored CLIENT-SIDE ONLY. +// +// When a user supplies their own text/image/vision API credentials, we persist +// them in localStorage and the browser talks to providers directly. The keys +// are therefore never sent to our server: no request body, no header, no log. + +const STORAGE_KEY = "infiplot:model"; + +const VALID_PROTOCOLS: ProviderProtocol[] = [ + "openai_compatible", + "anthropic", + "google", + "openai", + "runware", +]; + +export type StoredModelConfig = { + textBaseUrl: string; + textApiKey: string; + textModel: string; + textProvider?: ProviderProtocol; + imageBaseUrl: string; + imageApiKey: string; + imageModel: string; + imageProvider?: ProviderProtocol; + visionBaseUrl: string; + visionApiKey: string; + visionModel: string; + visionProvider?: ProviderProtocol; +}; + +function isValidProtocol(p: string): p is ProviderProtocol { + return (VALID_PROTOCOLS as readonly string[]).includes(p); +} + +function readProtocol(raw: unknown): ProviderProtocol | undefined { + if (typeof raw === "string" && isValidProtocol(raw)) return raw; + return undefined; +} + +/** Read + validate the persisted model config. Returns null when running on the + * server, when nothing is stored, on parse failure, or when required fields are + * missing. */ +export function readStoredModelConfig(): StoredModelConfig | null { + if (typeof window === "undefined") return null; + try { + const raw = window.localStorage.getItem(STORAGE_KEY); + if (!raw) return null; + const parsed = JSON.parse(raw) as Partial; + + const textBaseUrl = typeof parsed.textBaseUrl === "string" ? parsed.textBaseUrl.trim() : ""; + const textApiKey = typeof parsed.textApiKey === "string" ? parsed.textApiKey.trim() : ""; + const textModel = typeof parsed.textModel === "string" ? parsed.textModel.trim() : ""; + const imageBaseUrl = typeof parsed.imageBaseUrl === "string" ? parsed.imageBaseUrl.trim() : ""; + const imageApiKey = typeof parsed.imageApiKey === "string" ? parsed.imageApiKey.trim() : ""; + const imageModel = typeof parsed.imageModel === "string" ? parsed.imageModel.trim() : ""; + const visionBaseUrl = typeof parsed.visionBaseUrl === "string" ? parsed.visionBaseUrl.trim() : ""; + const visionApiKey = typeof parsed.visionApiKey === "string" ? parsed.visionApiKey.trim() : ""; + const visionModel = typeof parsed.visionModel === "string" ? parsed.visionModel.trim() : ""; + + if ( + !textBaseUrl || + !textApiKey || + !textModel || + !imageBaseUrl || + !imageApiKey || + !imageModel || + !visionBaseUrl || + !visionApiKey || + !visionModel + ) { + return null; + } + + return { + textBaseUrl, + textApiKey, + textModel, + textProvider: readProtocol(parsed.textProvider), + imageBaseUrl, + imageApiKey, + imageModel, + imageProvider: readProtocol(parsed.imageProvider), + visionBaseUrl, + visionApiKey, + visionModel, + visionProvider: readProtocol(parsed.visionProvider), + }; + } catch { + return null; + } +} + +/** Persist the model config. Trims all string fields so trailing whitespace + * from pastes never breaks headers. */ +export function writeStoredModelConfig(config: StoredModelConfig): void { + if (typeof window === "undefined") return; + try { + const payload: StoredModelConfig = { + textBaseUrl: config.textBaseUrl.trim(), + textApiKey: config.textApiKey.trim(), + textModel: config.textModel.trim(), + textProvider: config.textProvider, + imageBaseUrl: config.imageBaseUrl.trim(), + imageApiKey: config.imageApiKey.trim(), + imageModel: config.imageModel.trim(), + imageProvider: config.imageProvider, + visionBaseUrl: config.visionBaseUrl.trim(), + visionApiKey: config.visionApiKey.trim(), + visionModel: config.visionModel.trim(), + visionProvider: config.visionProvider, + }; + window.localStorage.setItem(STORAGE_KEY, JSON.stringify(payload)); + } catch { + // Storage disabled / quota / private mode — BYO simply stays off. + } +} + +export function clearStoredModelConfig(): void { + if (typeof window === "undefined") return; + try { + window.localStorage.removeItem(STORAGE_KEY); + } catch { + // ignore + } +} + +/** Build a full EngineConfig from stored model config + optional TTS config. + * Throws when model config is missing so callers can surface a friendly + * "please configure" message. */ +export function resolveEngineConfig( + model: StoredModelConfig | null, + tts: import("@infiplot/types").TtsConfig | null, +): EngineConfig { + if (!model) { + throw new Error("模型配置未设置。请返回首页,点击「模型设置」配置 API 参数。"); + } + return { + text: { + baseUrl: model.textBaseUrl, + apiKey: model.textApiKey, + model: model.textModel, + provider: model.textProvider, + }, + image: { + baseUrl: model.imageBaseUrl, + apiKey: model.imageApiKey, + model: model.imageModel, + provider: model.imageProvider, + }, + vision: { + baseUrl: model.visionBaseUrl, + apiKey: model.visionApiKey, + model: model.visionModel, + provider: model.visionProvider, + }, + tts: tts ?? undefined, + mockImage: false, + }; +} From 759319bf281435df80b85124fa9331b0083705be Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 10:17:15 +0800 Subject: [PATCH 03/13] feat(config): extract STYLE_EXTRACTION_PROMPT to shared lib for client reuse Signed-off-by: baizhi958216 <1475289190@qq.com> --- lib/styleExtraction.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 lib/styleExtraction.ts diff --git a/lib/styleExtraction.ts b/lib/styleExtraction.ts new file mode 100644 index 0000000..040a7f5 --- /dev/null +++ b/lib/styleExtraction.ts @@ -0,0 +1,11 @@ +export const STYLE_EXTRACTION_PROMPT = `You are a senior concept artist helping describe an image's visual style so that a text-to-image diffusion model (FLUX) can reproduce the same aesthetic on different subjects. + +Look at the attached image and produce a single English style-prompt string that captures ONLY its visual style — NOT its subject matter. Focus on: +- Medium / technique (e.g., watercolor, oil painting, cel-shaded anime, 3D render, pixel art) +- Line work and rendering (sharp ink outlines, soft shading, painterly brushstrokes, flat colors) +- Color palette and lighting (pastel, saturated, monochrome, warm golden-hour, cool neon, high contrast) +- Mood and atmosphere (dreamy, melancholic, cinematic, nostalgic, gritty) +- Any recognizable artistic influence (Ghibli, Makoto Shinkai, ukiyo-e, vaporwave, cyberpunk anime, etc.) + +Do NOT describe the characters, objects, or scene contents. Output exactly one JSON object: +{"stylePrompt": ""}`; From 71216e1602290f48251cc697a46113812888da0d Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 10:21:36 +0800 Subject: [PATCH 04/13] feat(ui): add ModelSettingsModal for configuring text/image/vision providers Signed-off-by: baizhi958216 <1475289190@qq.com> --- components/ModelSettingsModal.tsx | 305 ++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 components/ModelSettingsModal.tsx diff --git a/components/ModelSettingsModal.tsx b/components/ModelSettingsModal.tsx new file mode 100644 index 0000000..167afb7 --- /dev/null +++ b/components/ModelSettingsModal.tsx @@ -0,0 +1,305 @@ +"use client"; + +import { useEffect, useState } from "react"; +import type { ProviderProtocol } from "@infiplot/types"; +import { + clearStoredModelConfig, + readStoredModelConfig, + writeStoredModelConfig, +} from "@/lib/clientModelConfig"; + +const PROVIDER_OPTIONS: { value: ProviderProtocol | ""; label: string }[] = [ + { value: "", label: "自动推断(推荐)" }, + { value: "openai_compatible", label: "OpenAI Compatible" }, + { value: "openai", label: "OpenAI (Native)" }, + { value: "anthropic", label: "Anthropic" }, + { value: "google", label: "Google Gemini" }, + { value: "runware", label: "Runware" }, +]; + +type ModelGroup = { + key: "text" | "image" | "vision"; + label: string; + icon: string; + baseUrl: string; + apiKey: string; + model: string; + provider: string; +}; + +export function ModelSettingsModal({ + onClose, + onSaved, +}: { + onClose: () => void; + onSaved: () => void; +}) { + const initial = readStoredModelConfig(); + + const [groups, setGroups] = useState([ + { + key: "text", + label: "文本模型", + icon: "fa-solid fa-pen-nib", + baseUrl: initial?.textBaseUrl ?? "", + apiKey: initial?.textApiKey ?? "", + model: initial?.textModel ?? "", + provider: initial?.textProvider ?? "", + }, + { + key: "image", + label: "绘图模型", + icon: "fa-solid fa-palette", + baseUrl: initial?.imageBaseUrl ?? "", + apiKey: initial?.imageApiKey ?? "", + model: initial?.imageModel ?? "", + provider: initial?.imageProvider ?? "", + }, + { + key: "vision", + label: "识图模型", + icon: "fa-solid fa-eye", + baseUrl: initial?.visionBaseUrl ?? "", + apiKey: initial?.visionApiKey ?? "", + model: initial?.visionModel ?? "", + provider: initial?.visionProvider ?? "", + }, + ]); + + const [showKeys, setShowKeys] = useState>({}); + const [shown, setShown] = useState(false); + + useEffect(() => { + const id = requestAnimationFrame(() => setShown(true)); + return () => cancelAnimationFrame(id); + }, []); + + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if (e.key === "Escape") close(); + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, []); + + const close = () => { + setShown(false); + setTimeout(onClose, 280); + }; + + const updateGroup = ( + key: string, + field: keyof Omit, + value: string, + ) => { + setGroups((prev) => + prev.map((g) => (g.key === key ? { ...g, [field]: value } : g)), + ); + }; + + const save = () => { + const [text, image, vision] = groups; + writeStoredModelConfig({ + textBaseUrl: text.baseUrl, + textApiKey: text.apiKey, + textModel: text.model, + textProvider: (text.provider as ProviderProtocol) || undefined, + imageBaseUrl: image.baseUrl, + imageApiKey: image.apiKey, + imageModel: image.model, + imageProvider: (image.provider as ProviderProtocol) || undefined, + visionBaseUrl: vision.baseUrl, + visionApiKey: vision.apiKey, + visionModel: vision.model, + visionProvider: (vision.provider as ProviderProtocol) || undefined, + }); + onSaved(); + close(); + }; + + const clearAll = () => { + clearStoredModelConfig(); + setGroups((prev) = + prev.map((g) => ({ ...g, baseUrl: "", apiKey: "", model: "", provider: "" })), + ); + onSaved(); + close(); + }; + + const hasAnySetting = groups.some( + (g) => g.baseUrl.trim() && g.apiKey.trim() && g.model.trim(), + ); + + return ( +
+
e.stopPropagation()} + className={ + "flex w-[600px] max-w-[96vw] max-h-[90vh] flex-col overflow-hidden rounded-sm border border-clay-900/15 bg-cream-50 shadow-2xl shadow-clay-900/25 transition-all duration-300 " + + (shown ? "opacity-100 scale-100" : "opacity-0 scale-95") + } + > + {/* Header */} +
+
+ + 模型设置 + + + API Key 仅保存在浏览器本地,不会发送到服务器 + +
+ +
+ +
+ {groups.map((g, idx) => ( +
+ {idx > 0 && ( +
+ )} +
+
+ + + + + {g.label} + +
+ +
+ + B A S E · U R L + + updateGroup(g.key, "baseUrl", e.target.value)} + type="text" + autoComplete="off" + spellCheck={false} + placeholder="https://api.example.com/v1" + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> +
+ +
+ + A P I · K e y + +
+ updateGroup(g.key, "apiKey", e.target.value)} + type={showKeys[g.key] ? "text" : "password"} + autoComplete="off" + spellCheck={false} + placeholder="sk-..." + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> + +
+
+ +
+ + M o d e l + + updateGroup(g.key, "model", e.target.value)} + type="text" + autoComplete="off" + spellCheck={false} + placeholder="gpt-4o / claude-3-5-sonnet / flux-1-dev ..." + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> +
+ +
+ + P r o v i d e r(可选) + + + + 留空时系统会根据 Base URL 自动推断协议。 + +
+
+
+ ))} + +
+ +
+

+ + 请确保你的 API 端点支持浏览器跨域请求(CORS)。大多数主流提供商(OpenAI、Anthropic、Gemini、Runware 等)已默认支持。 +

+
+
+ + {/* Footer */} +
+ {hasAnySetting && ( + + )} + +
+
+
+ ); +} From 6b11a225cd0c85bbffe9ffcc970a8982323bdd88 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 10:23:28 +0800 Subject: [PATCH 05/13] feat(web): add model settings button, modal, and client-side style image parsing Signed-off-by: baizhi958216 <1475289190@qq.com> --- app/page.tsx | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/app/page.tsx b/app/page.tsx index b50cf1e..b61c88f 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -12,6 +12,10 @@ import { } from "@/lib/options"; import { readStoredTtsConfig } from "@/lib/clientTtsConfig"; import { SettingsModal, readStoredPlayerName, readStoredVisionClick } from "@/components/SettingsModal"; +import { ModelSettingsModal } from "@/components/ModelSettingsModal"; +import { analyzeImageDataUrl } from "@infiplot/ai-client"; +import { readStoredModelConfig, resolveEngineConfig } from "@/lib/clientModelConfig"; +import { STYLE_EXTRACTION_PROMPT } from "@/lib/styleExtraction"; import { STORY_SHARE_STORAGE_KEY, parseStoryShareDoc } from "@/lib/storyShare"; /* ============================================================================ @@ -976,17 +980,21 @@ function StyleModal({ setParsing(true); try { const resized = await resizeImageToDataUrl(file); - const res = await fetch("/api/parse-style-image", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ imageDataUrl: resized }), - }); - if (!res.ok) { - const j = (await res.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? `${res.status}`); + const modelCfg = readStoredModelConfig(); + if (!modelCfg) { + throw new Error("请先点击首页右上角的「模型设置」配置视觉模型参数"); } - const data = (await res.json()) as { stylePrompt: string }; - setDraft(data.stylePrompt); + const config = resolveEngineConfig(modelCfg, null); + const raw = await analyzeImageDataUrl(config.vision, resized, STYLE_EXTRACTION_PROMPT); + let parsed: { stylePrompt?: string }; + try { + parsed = JSON.parse(raw); + } catch { + parsed = { stylePrompt: raw }; + } + const stylePrompt = (parsed.stylePrompt ?? "").trim(); + if (!stylePrompt) throw new Error("视觉模型返回了空的风格描述"); + setDraft(stylePrompt); setCustomStyleRefImage(resized); track("style_image_upload", { ok: true }); } catch (err) { @@ -1258,6 +1266,7 @@ export default function HomePage() { // 统一设置弹窗(名字 + 识图 + TTS Key):可选增强,数据只存浏览器。 const [settingsOpen, setSettingsOpen] = useState(false); + const [modelSettingsOpen, setModelSettingsOpen] = useState(false); const [ttsConfigured, setTtsConfigured] = useState(false); const [playerName, setPlayerName] = useState(""); const [visionClickEnabled, setVisionClickEnabled] = useState(true); @@ -1475,6 +1484,15 @@ export default function HomePage() { InfiPlot
+
); } From ab2f42bc4229f8ba97a49b44a3597e884c3cd442 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 11:14:50 +0800 Subject: [PATCH 06/13] feat(web): merge TTS settings into ModelSettingsModal, remove from SettingsModal Signed-off-by: baizhi958216 <1475289190@qq.com> --- app/page.tsx | 16 ++- components/ModelSettingsModal.tsx | 230 +++++++++++++++++++++++++++--- components/SettingsModal.tsx | 199 ++------------------------ 3 files changed, 226 insertions(+), 219 deletions(-) diff --git a/app/page.tsx b/app/page.tsx index b61c88f..59e69e9 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -1796,21 +1796,23 @@ export default function HomePage() { initialVisionClickEnabled={visionClickEnabled} onClose={() => setSettingsOpen(false)} onSaved={(settings) => { - setTtsConfigured(settings.ttsConfigured); setPlayerName(settings.playerName); setVisionClickEnabled(settings.visionClickEnabled); - if (settings.ttsConfigured && voiceRow >= 0) { - const onIdx = OPTS[voiceRow]!.items.indexOf("开启"); - if (onIdx >= 0) - setSel((s) => s.map((v, j) => (j === voiceRow ? onIdx : v))); - } }} /> )} {modelSettingsOpen && ( setModelSettingsOpen(false)} - onSaved={() => setModelSettingsOpen(false)} + onSaved={(settings) => { + setTtsConfigured(settings.ttsConfigured); + if (settings.ttsConfigured && voiceRow >= 0) { + const onIdx = OPTS[voiceRow]!.items.indexOf("开启"); + if (onIdx >= 0) + setSel((s) => s.map((v, j) => (j === voiceRow ? onIdx : v))); + } + setModelSettingsOpen(false); + }} /> )}
diff --git a/components/ModelSettingsModal.tsx b/components/ModelSettingsModal.tsx index 167afb7..659d6a5 100644 --- a/components/ModelSettingsModal.tsx +++ b/components/ModelSettingsModal.tsx @@ -7,6 +7,17 @@ import { readStoredModelConfig, writeStoredModelConfig, } from "@/lib/clientModelConfig"; +import { + clearStoredTtsConfig, + readStoredTtsConfig, + writeStoredTtsConfig, +} from "@/lib/clientTtsConfig"; +import { + findTtsPreset, + PAYG_PRESET_ID, + TTS_KEY_DOC_URL, + TTS_REGION_PRESETS, +} from "@/lib/ttsPresets"; const PROVIDER_OPTIONS: { value: ProviderProtocol | ""; label: string }[] = [ { value: "", label: "自动推断(推荐)" }, @@ -32,7 +43,7 @@ export function ModelSettingsModal({ onSaved, }: { onClose: () => void; - onSaved: () => void; + onSaved: (settings: { ttsConfigured: boolean }) => void; }) { const initial = readStoredModelConfig(); @@ -69,6 +80,22 @@ export function ModelSettingsModal({ const [showKeys, setShowKeys] = useState>({}); const [shown, setShown] = useState(false); + // TTS state + const [initialTts] = useState(() => readStoredTtsConfig()); + const initialKind = findTtsPreset(initialTts?.presetId)?.kind ?? "payg"; + const [keyType, setKeyType] = useState<"token-plan" | "payg">(initialKind); + const [regionId, setRegionId] = useState( + initialKind === "token-plan" + ? (initialTts?.presetId ?? TTS_REGION_PRESETS[0]!.id) + : TTS_REGION_PRESETS[0]!.id, + ); + const [ttsApiKey, setTtsApiKey] = useState(initialTts?.apiKey ?? ""); + const [showTtsKey, setShowTtsKey] = useState(false); + + const expectedPrefix = keyType === "payg" ? "sk-" : "tp-"; + const prefixMismatch = + ttsApiKey.trim().length > 0 && !ttsApiKey.trim().startsWith(expectedPrefix); + useEffect(() => { const id = requestAnimationFrame(() => setShown(true)); return () => cancelAnimationFrame(id); @@ -99,36 +126,51 @@ export function ModelSettingsModal({ const save = () => { const [text, image, vision] = groups; - writeStoredModelConfig({ - textBaseUrl: text.baseUrl, - textApiKey: text.apiKey, - textModel: text.model, - textProvider: (text.provider as ProviderProtocol) || undefined, - imageBaseUrl: image.baseUrl, - imageApiKey: image.apiKey, - imageModel: image.model, - imageProvider: (image.provider as ProviderProtocol) || undefined, - visionBaseUrl: vision.baseUrl, - visionApiKey: vision.apiKey, - visionModel: vision.model, - visionProvider: (vision.provider as ProviderProtocol) || undefined, - }); - onSaved(); + if (text && image && vision) { + writeStoredModelConfig({ + textBaseUrl: text.baseUrl, + textApiKey: text.apiKey, + textModel: text.model, + textProvider: (text.provider as ProviderProtocol) || undefined, + imageBaseUrl: image.baseUrl, + imageApiKey: image.apiKey, + imageModel: image.model, + imageProvider: (image.provider as ProviderProtocol) || undefined, + visionBaseUrl: vision.baseUrl, + visionApiKey: vision.apiKey, + visionModel: vision.model, + visionProvider: (vision.provider as ProviderProtocol) || undefined, + }); + } + + const key = ttsApiKey.trim(); + let ttsConfigured = false; + if (key) { + const presetId = keyType === "payg" ? PAYG_PRESET_ID : regionId; + writeStoredTtsConfig({ presetId, apiKey: key }); + ttsConfigured = true; + } else { + clearStoredTtsConfig(); + } + + onSaved({ ttsConfigured }); close(); }; const clearAll = () => { clearStoredModelConfig(); - setGroups((prev) = + clearStoredTtsConfig(); + setGroups((prev) => prev.map((g) => ({ ...g, baseUrl: "", apiKey: "", model: "", provider: "" })), ); - onSaved(); + setTtsApiKey(""); + onSaved({ ttsConfigured: false }); close(); }; - const hasAnySetting = groups.some( - (g) => g.baseUrl.trim() && g.apiKey.trim() && g.model.trim(), - ); + const hasAnySetting = + groups.some((g) => g.baseUrl.trim() && g.apiKey.trim() && g.model.trim()) || + initialTts != null; return (
+ {/* ── TTS Key Section ── */} +
+
+ + + + + 自带配音 Key + + 可选 +
+

+ 填入你自己的 + 小米 MiMo API Key + ,配音将在浏览器本地合成,Key 只保存在本地、绝不经过服务器。MiMo + TTS 目前 + 限时免费 + ,申请即可使用。 +

+ +
+ + K e y · 类 型 + +
+ {( + [ + { + kind: "payg", + label: "按量付费 Pay-as-you-go", + sub: "sk- 开头", + }, + { + kind: "token-plan", + label: "套餐 Token Plan", + sub: "tp- 开头", + }, + ] as const + ).map((t) => { + const active = keyType === t.kind; + return ( + + ); + })} +
+
+ + {keyType === "token-plan" && ( +
+ + 区 域 节 点 + +
+ {TTS_REGION_PRESETS.map((p) => { + const active = p.id === regionId; + return ( + + ); + })} +
+ + 选择与你的套餐订阅地区一致的节点(通常也是延迟最低的那个)。 + +
+ )} + +
+ + A P I · K e y + +
+ setTtsApiKey(e.target.value)} + type={showTtsKey ? "text" : "password"} + autoComplete="off" + spellCheck={false} + placeholder={ + keyType === "payg" + ? "粘贴 sk- 开头的按量 Key" + : "粘贴 tp- 开头的套餐 Key" + } + className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> + +
+ {prefixMismatch && ( + + + 此 Key 不是 {expectedPrefix} 开头,可能与所选「 + {keyType === "payg" + ? "按量付费 Pay-as-you-go" + : "套餐 Token Plan"} + 」类型不符,请确认是否填错。 + + )} + + + 如何免费申请 Key?查看图文教程 + +
+
+ +
+

diff --git a/components/SettingsModal.tsx b/components/SettingsModal.tsx index 607a872..44da808 100644 --- a/components/SettingsModal.tsx +++ b/components/SettingsModal.tsx @@ -1,17 +1,6 @@ "use client"; import { type ReactNode, useEffect, useState } from "react"; -import { - clearStoredTtsConfig, - readStoredTtsConfig, - writeStoredTtsConfig, -} from "@/lib/clientTtsConfig"; -import { - findTtsPreset, - PAYG_PRESET_ID, - TTS_KEY_DOC_URL, - TTS_REGION_PRESETS, -} from "@/lib/ttsPresets"; const PLAYER_NAME_STORAGE_KEY = "infiplot:playerName"; const VISION_CLICK_STORAGE_KEY = "infiplot:visionClick"; @@ -52,30 +41,14 @@ export function SettingsModal({ }: { initialVisionClickEnabled?: boolean; onClose: () => void; - onSaved: (settings: { ttsConfigured: boolean; playerName: string; visionClickEnabled: boolean }) => void; + onSaved: (settings: { playerName: string; visionClickEnabled: boolean }) => void; footerNote?: ReactNode; }) { - const [initialTts] = useState(() => readStoredTtsConfig()); - const initialKind = findTtsPreset(initialTts?.presetId)?.kind ?? "payg"; - const [keyType, setKeyType] = useState<"token-plan" | "payg">(initialKind); - const [regionId, setRegionId] = useState( - initialKind === "token-plan" - ? (initialTts?.presetId ?? TTS_REGION_PRESETS[0]!.id) - : TTS_REGION_PRESETS[0]!.id, - ); - const [apiKey, setApiKey] = useState(initialTts?.apiKey ?? ""); - const [showKey, setShowKey] = useState(false); - const ttsAlreadyConfigured = initialTts != null; - const [playerName, setPlayerName] = useState(() => readStoredPlayerName()); const [visionClick, setVisionClick] = useState(initialVisionClickEnabled); const [shown, setShown] = useState(false); - const expectedPrefix = keyType === "payg" ? "sk-" : "tp-"; - const prefixMismatch = - apiKey.trim().length > 0 && !apiKey.trim().startsWith(expectedPrefix); - useEffect(() => { const id = requestAnimationFrame(() => setShown(true)); return () => cancelAnimationFrame(id); @@ -94,30 +67,18 @@ export function SettingsModal({ localStorage.setItem(VISION_CLICK_STORAGE_KEY, visionClick ? "1" : "0"); } catch { /* ignore */ } - const key = apiKey.trim(); - let ttsConfigured = false; - if (key) { - const presetId = keyType === "payg" ? PAYG_PRESET_ID : regionId; - writeStoredTtsConfig({ presetId, apiKey: key }); - ttsConfigured = true; - } else { - clearStoredTtsConfig(); - ttsConfigured = false; - } - - onSaved({ ttsConfigured, playerName: name, visionClickEnabled: visionClick }); + onSaved({ playerName: name, visionClickEnabled: visionClick }); close(); }; const clearAll = () => { - clearStoredTtsConfig(); writeStoredPlayerName(""); try { localStorage.removeItem(VISION_CLICK_STORAGE_KEY); } catch { /* ignore */ } - onSaved({ ttsConfigured: false, playerName: "", visionClickEnabled: true }); + onSaved({ playerName: "", visionClickEnabled: true }); close(); }; - const hasAnySetting = ttsAlreadyConfigured || readStoredPlayerName().length > 0; + const hasAnySetting = readStoredPlayerName().length > 0; return (

-
- - {/* ── TTS Key Section ── */} -
-
- - - - - 自带配音 Key - - 可选 -
-

- 填入你自己的 - 小米 MiMo API Key - ,配音将在浏览器本地合成,Key 只保存在本地、绝不经过服务器。MiMo - TTS 目前 - 限时免费 - ,申请即可使用。 -

- -
- - K e y · 类 型 - -
- {( - [ - { - kind: "payg", - label: "按量付费 Pay-as-you-go", - sub: "sk- 开头", - }, - { - kind: "token-plan", - label: "套餐 Token Plan", - sub: "tp- 开头", - }, - ] as const - ).map((t) => { - const active = keyType === t.kind; - return ( - - ); - })} -
-
- - {keyType === "token-plan" && ( -
- - 区 域 节 点 - -
- {TTS_REGION_PRESETS.map((p) => { - const active = p.id === regionId; - return ( - - ); - })} -
- - 选择与你的套餐订阅地区一致的节点(通常也是延迟最低的那个)。 - -
- )} - -
- - A P I · K e y - -
- setApiKey(e.target.value)} - type={showKey ? "text" : "password"} - autoComplete="off" - spellCheck={false} - placeholder={ - keyType === "payg" - ? "粘贴 sk- 开头的按量 Key" - : "粘贴 tp- 开头的套餐 Key" - } - className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> - -
- {prefixMismatch && ( - - - 此 Key 不是 {expectedPrefix} 开头,可能与所选「 - {keyType === "payg" - ? "按量付费 Pay-as-you-go" - : "套餐 Token Plan"} - 」类型不符,请确认是否填错。 - - )} - - - 如何免费申请 Key?查看图文教程 - -
- - {footerNote && ( + {footerNote && ( +

{footerNote}

- )} -
+
+ )}
{/* Footer */} From b63b694940a603583095325d29e3b414c02dfb75 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 11:14:55 +0800 Subject: [PATCH 07/13] refactor(play): use client-side engine API instead of direct fetch Signed-off-by: baizhi958216 <1475289190@qq.com> --- app/play/page.tsx | 333 ++++++++++------------------------------------ 1 file changed, 71 insertions(+), 262 deletions(-) diff --git a/app/play/page.tsx b/app/play/page.tsx index b776e61..b6162f5 100644 --- a/app/play/page.tsx +++ b/app/play/page.tsx @@ -28,13 +28,20 @@ import { storyShareFilename, } from "@/lib/storyShare"; import { provisionVoice, synthesize } from "@infiplot/tts-client"; +import { + startSession, + requestScene, + visionDecide, + classifyFreeform, + requestInsertBeat, +} from "@infiplot/engine"; +import { readStoredModelConfig, resolveEngineConfig } from "@/lib/clientModelConfig"; import type { Beat, BeatChoice, Character, CharacterVoice, - FreeformClassifyResponse, - InsertBeatResponse, + EngineConfig, Orientation, Scene, SceneExit, @@ -42,44 +49,21 @@ import type { Session, StartResponse, TtsConfig, - VisionResponse, } from "@infiplot/types"; import { track } from "@/lib/analytics"; const MUTED_STORAGE_KEY = "infiplot:muted"; -// ── FOT reduction helpers ────────────────────────────────────────────── -// Strip bulky voice.referenceAudioBase64 from the session before sending it to -// the server. The engine only needs character names + visualDescriptions for -// scene generation; voice data is only used by /api/beat-audio (which receives -// the voice directly, not via session). The client retains voices locally and -// re-merges them from the response via mergeCharactersPreserveVoice. -function stripVoicesForTransport(session: Session): Session { - return { - ...session, - characters: session.characters.map((c) => ({ ...c, voice: undefined })), - }; +// ── Client-side engine config builder ────────────────────────────────── +// Reads model credentials from localStorage and assembles the EngineConfig +// that the engine expects. Called at the point of use (inside async handlers) +// so mid-session settings changes are picked up immediately. +function buildEngineConfig(): EngineConfig { + const modelCfg = readStoredModelConfig(); + const ttsCfg = loadClientTtsConfig(); + return resolveEngineConfig(modelCfg, ttsCfg); } -// Merge server-returned characters with locally-held voices. The server strips -// voice from already-known characters (P0), so only NEW characters carry voice. -// For existing characters, re-attach the voice the client already holds. -function mergeCharactersPreserveVoice( - local: Character[], - remote: Character[], -): Character[] { - const localByName = new Map(local.map((c) => [c.name, c])); - return remote.map((c) => { - const prev = localByName.get(c.name); - if (!prev) return c; - return { ...c, voice: c.voice ?? prev.voice }; - }); -} - -// Consecutive silent (no-audio) beats before we surface the BYO-key nudge to a -// non-BYO, unmuted player. Set high enough that one transient miss won't trip -// it, low enough to catch a scene that's clearly being rate-limited. -const SILENCE_NUDGE_THRESHOLD = 3; // Mobile-portrait users get a 9:16 scene image painted for them; everyone else // (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a @@ -395,19 +379,9 @@ function prefetchScenePath( const specSession = buildSpeculativeSession(baseSession, steps); const abort = new AbortController(); const promise = (async () => { - const res = await fetch("/api/scene", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ session: stripVoicesForTransport(specSession), clientTts }), - signal: abort.signal, - }); - if (!res.ok) { - const j = (await res.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? res.statusText); - } - const data = (await res.json()) as SceneResponse; + const config = buildEngineConfig(); + const data = await requestScene(config, { session: specSession, clientTts }); + if (abort.signal.aborted) throw new Error("aborted"); // Record this resolved alternate for the gallery export. Key is // (parent scene id at the choice point) : (choice id). Includes the @@ -425,12 +399,6 @@ function prefetchScenePath( // transition path awaits the same cached promise via getOrCreateBlobUrl. void getOrCreateBlobUrl(data.imageUrl); - // Re-attach locally-held voices the server stripped from known characters. - data.characters = mergeCharactersPreserveVoice( - baseSession.characters, - data.characters, - ); - // Recursive: if the resulting scene has exactly one change-scene exit, // it is a must-pass node — prefetch its child too. if (depth + 1 < PREFETCH_MAX_DEPTH) { @@ -579,12 +547,6 @@ function PlayInner() { const [orientation, setOrientation] = useState("landscape"); const [lastExitLabel, setLastExitLabel] = useState(null); // Consecutive server-side TTS misses (null audio / failed /api/beat-audio). - // Climbs when the shared server key is rate-limited by MiMo — the exact pain - // BYO fixes — so the play page can nudge non-BYO users to add their own key. - // Reset to 0 on any successful synth. Only the server path touches it. - const [silenceStrikes, setSilenceStrikes] = useState(0); - // Once the player dismisses the silence nudge, keep it gone for this session. - const [nudgeDismissed, setNudgeDismissed] = useState(false); const [settingsOpen, setSettingsOpen] = useState(false); const [visionClickEnabled, setVisionClickEnabled] = useState(true); @@ -728,8 +690,7 @@ function PlayInner() { let audioUrl: string | null = null; if (byo) { // Client-direct: provision (once per speaker, cached) + synth against - // Xiaomi with the user's own key — no /api/beat-audio round-trip and - // the key never touches our server. + // Xiaomi with the user's own key — the key never touches our server. const voice = await resolveByoVoice( provisionedVoicesRef.current, byo, @@ -745,28 +706,8 @@ function PlayInner() { ); audioUrl = `data:${out.mimeType};base64,${out.audioBase64}`; } else { - const res = await fetch("/api/beat-audio", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - beat: { id: beat.id, line: beat.line, lineDelivery: beat.lineDelivery }, - voice: speaker.voice, - }), - signal: abort.signal, - }); - if (res.status === 204) { - setSilenceStrikes((n) => Math.min(n + 1, 99)); - return; - } - if (!res.ok) { - setSilenceStrikes((n) => Math.min(n + 1, 99)); - return; - } - const blob = await res.blob(); - audioUrl = URL.createObjectURL(blob); - setSilenceStrikes(0); + // No TTS configured — silent. + return; } // Skip the state write if we've been aborted between the await and // here — beat ids are scene-local, so a late arrival from a prior @@ -774,8 +715,6 @@ function PlayInner() { // same id. if (audioUrl && !abort.signal.aborted) { setBeatAudioMap((m) => ({ ...m, [beat.id]: audioUrl })); - } else if (audioUrl?.startsWith("blob:")) { - URL.revokeObjectURL(audioUrl); } } catch { // aborted / network / Xiaomi rate-limit — silent fallback (no audio) @@ -864,26 +803,12 @@ function PlayInner() { }, [muted, prefetchSceneAudio]); const handleSettingsSaved = useCallback( - (settings: { ttsConfigured: boolean; playerName: string; visionClickEnabled: boolean }) => { + (settings: { playerName: string; visionClickEnabled: boolean }) => { setVisionClickEnabled(settings.visionClickEnabled); const nextPlayerName = settings.playerName || undefined; setSession((prev) => prev ? { ...prev, playerName: nextPlayerName } : prev); - const cfg = settings.ttsConfigured ? loadClientTtsConfig() : null; - byoTtsRef.current = cfg; - setByoTtsConfig(cfg); - if (cfg) { - setSilenceStrikes(0); - cancelBeatAudioFetches(); - setBeatAudioMap((prev) => { - for (const url of Object.values(prev)) { - if (url.startsWith("blob:")) URL.revokeObjectURL(url); - } - return {}; - }); - prefetchSceneAudio(); - } }, - [prefetchSceneAudio], + [], ); function detachRecordedReplay(): void { @@ -1260,31 +1185,22 @@ function PlayInner() { throw new Error(`找不到精选剧情:${cardName}`); }, ) - : fetch("/api/start", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - ...livePayload, + : (async () => { + const config = buildEngineConfig(); + const data = await startSession(config, { + ...livePayload!, clientTts: !!byoTtsRef.current, - }), - }).then(async (r) => { - if (!r.ok) { - const j = (await r.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? r.statusText); - } - const data = (await r.json()) as StartResponse; - // Live /api/start doesn't echo ws/sg back — splice in what we sent. + }); + // startSession doesn't echo ws/sg back — splice in what we sent. // styleReferenceImage is similarly not in StartResponse; tag it on so - // the session we build below carries it for every /api/scene call. + // the session we build below carries it for every scene call. return { ...data, worldSetting: livePayload!.worldSetting, styleGuide: livePayload!.styleGuide, styleReferenceImage: livePayload!.styleReferenceImage, }; - }); + })(); fetchStart .then(async (data) => { @@ -1430,10 +1346,7 @@ function PlayInner() { storyStateAfter: result.storyState, }, ], - characters: mergeCharactersPreserveVoice( - base.characters, - result.characters, - ), + characters: result.characters, storyState: result.storyState, }; visitedBeatsRef.current = [result.scene.entryBeatId]; @@ -1656,21 +1569,12 @@ function PlayInner() { clearPool(poolRef.current); const promise = (async () => { - const res = await fetch("/api/scene", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - session: stripVoicesForTransport(specSession), - clientTts: !!byoTtsRef.current, - }), + const config = buildEngineConfig(); + const data = await requestScene(config, { + session: specSession, + clientTts: !!byoTtsRef.current, }); - if (!res.ok) { - const j = (await res.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? res.statusText); - } - return (await res.json()) as SceneResponse; + return data; })(); void performSceneTransition(promise, exit, visited, choice.label); @@ -1688,38 +1592,23 @@ function PlayInner() { setPhase("vision-thinking"); try { - const classifyRes = await fetch("/api/classify-freeform", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - session: stripVoicesForTransport(session), - freeformText: text, - }), + const config = buildEngineConfig(); + const decision = await classifyFreeform(config, { + session, + freeformText: text, }); - if (!classifyRes.ok) { - const j = (await classifyRes.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? classifyRes.statusText); - } - const decision = (await classifyRes.json()) as FreeformClassifyResponse; if (decision.classify === "insert-beat") { // Interactive beat: NPC responds to the player's action, scene stays setPhase("inserting-beat"); - const insertRes = await fetch("/api/insert-beat", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - session: stripVoicesForTransport(session), + const { partial, characters: insertChars } = await requestInsertBeat( + config, + { + session, freeformAction: decision.freeformAction, clientTts: !!byoTtsRef.current, - }), - }); - if (!insertRes.ok) { - const j = (await insertRes.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? insertRes.statusText); - } - const { partial, characters: insertChars } = - (await insertRes.json()) as InsertBeatResponse; + }, + ); const fromBeatId = currentBeatRef.current?.id ?? currentScene.entryBeatId; @@ -1746,10 +1635,7 @@ function PlayInner() { history: session.history.map((h, i, arr) => i === arr.length - 1 ? { ...h, scene: patched, visitedBeatIds: nextVisited } : h, ), - characters: mergeCharactersPreserveVoice( - session.characters, - insertChars, - ), + characters: insertChars, }; setSession(nextSession); setCurrentScene(patched); @@ -1785,19 +1671,12 @@ function PlayInner() { }; const promise = (async () => { - const res = await fetch("/api/scene", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - session: stripVoicesForTransport(specSession), - clientTts: !!byoTtsRef.current, - }), + const config = buildEngineConfig(); + const data = await requestScene(config, { + session: specSession, + clientTts: !!byoTtsRef.current, }); - if (!res.ok) { - const j = (await res.json().catch(() => ({}))) as { error?: string }; - throw new Error(j.error ?? res.statusText); - } - return (await res.json()) as SceneResponse; + return data; })(); setPendingClick(null); @@ -1816,43 +1695,23 @@ function PlayInner() { try { const annotatedImageBase64 = await annotateClick(imageUrl, click); - const visionRes = await fetch("/api/vision", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ session: stripVoicesForTransport(session), annotatedImageBase64 }), + const config = buildEngineConfig(); + const decision = await visionDecide(config, { + session, + annotatedImageBase64, }); - if (!visionRes.ok) { - const j = (await visionRes.json().catch(() => ({}))) as { - error?: string; - }; - throw new Error(j.error ?? visionRes.statusText); - } - const decision = (await visionRes.json()) as VisionResponse; track("vision_click", { result: decision.classify }); if (decision.classify === "insert-beat") { setPhase("inserting-beat"); - const insertRes = await fetch("/api/insert-beat", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - session: stripVoicesForTransport(session), + const { partial, characters: insertChars } = await requestInsertBeat( + config, + { + session, freeformAction: decision.intent.freeformAction, clientTts: !!byoTtsRef.current, - }), - }); - if (!insertRes.ok) { - const j = (await insertRes.json().catch(() => ({}))) as { - error?: string; - }; - throw new Error(j.error ?? insertRes.statusText); - } - const { partial, characters: insertChars } = - (await insertRes.json()) as InsertBeatResponse; + }, + ); const fromBeatId = currentBeatRef.current?.id ?? currentScene.entryBeatId; @@ -1878,10 +1737,7 @@ function PlayInner() { history: session.history.map((h, i, arr) => i === arr.length - 1 ? { ...h, scene: patched } : h, ), - characters: mergeCharactersPreserveVoice( - session.characters, - insertChars, - ), + characters: insertChars, }; setSession(nextSession); setCurrentScene(patched); @@ -1920,23 +1776,12 @@ function PlayInner() { clearPool(poolRef.current); const promise = (async () => { - const res = await fetch("/api/scene", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - session: stripVoicesForTransport(specSession), - clientTts: !!byoTtsRef.current, - }), + const config = buildEngineConfig(); + const data = await requestScene(config, { + session: specSession, + clientTts: !!byoTtsRef.current, }); - if (!res.ok) { - const j = (await res.json().catch(() => ({}))) as { - error?: string; - }; - throw new Error(j.error ?? res.statusText); - } - return (await res.json()) as SceneResponse; + return data; })(); await performSceneTransition( @@ -2054,16 +1899,6 @@ function PlayInner() { const sceneCount = session?.history.length ?? 0; const beatCount = visitedBeatsRef.current.length; - // Surface the BYO-key nudge only to an unmuted, non-BYO player whose last few - // beats came back silent (shared key rate-limited) — the exact pain BYO fixes. - // Dismissible for the session. - const showSilenceNudge = - phase === "ready" && - !muted && - !byoTtsConfig && - !nudgeDismissed && - silenceStrikes >= SILENCE_NUDGE_THRESHOLD; - return (
@@ -2154,32 +1989,6 @@ function PlayInner() { /> {muted ? "静 · 音" : "有 · 声"} - - {/* Silence nudge — a compact pill right beside the mute toggle. - Clicking opens the BYO-key modal in place (no trip to the - homepage). The × dismisses it for the session. */} - {showSilenceNudge && ( - - - - - )} } /> From 94973bc6c6b54e9837f8a54d2cea08b688cabeeb Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 11:16:53 +0800 Subject: [PATCH 08/13] fix(tts): add non-null assertion in stepfun array access Signed-off-by: baizhi958216 <1475289190@qq.com> --- lib/tts-client/stepfun.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tts-client/stepfun.ts b/lib/tts-client/stepfun.ts index 37ce8c0..1597fcf 100644 --- a/lib/tts-client/stepfun.ts +++ b/lib/tts-client/stepfun.ts @@ -13,7 +13,7 @@ function arrayBufferToBase64(buffer: ArrayBuffer): string { let binary = ""; const len = bytes.byteLength; for (let i = 0; i < len; i++) { - binary += String.fromCharCode(bytes[i]); + binary += String.fromCharCode(bytes[i]!); } return btoa(binary); } From 0f8e641c4c5f204812821a2b2bad5a3a4bac986d Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 11:33:44 +0800 Subject: [PATCH 09/13] feat(web): merge SettingsModal and ModelSettingsModal with tab navigation Signed-off-by: baizhi958216 <1475289190@qq.com> --- app/globals.css | 24 ++ app/page.tsx | 30 +- app/play/page.tsx | 2 +- components/ModelSettingsModal.tsx | 493 ----------------------- components/SettingsModal.tsx | 630 ++++++++++++++++++++++++++---- 5 files changed, 582 insertions(+), 597 deletions(-) delete mode 100644 components/ModelSettingsModal.tsx diff --git a/app/globals.css b/app/globals.css index 9cb7d82..fe95fca 100644 --- a/app/globals.css +++ b/app/globals.css @@ -88,6 +88,30 @@ .vn-scrollbar::-webkit-scrollbar-corner { background: transparent; } + + /* 极细滚动条 · 无轨道背景 */ + .thin-scrollbar { + scrollbar-width: thin; + scrollbar-color: rgba(195, 155, 75, 0.5) transparent; + } + + .thin-scrollbar::-webkit-scrollbar { + width: 4px; + height: 4px; + } + + .thin-scrollbar::-webkit-scrollbar-track { + background: transparent; + } + + .thin-scrollbar::-webkit-scrollbar-thumb { + background: rgba(195, 155, 75, 0.45); + border-radius: 999px; + } + + .thin-scrollbar::-webkit-scrollbar-thumb:hover { + background: rgba(220, 180, 95, 0.7); + } } @keyframes infiplot-ripple { diff --git a/app/page.tsx b/app/page.tsx index 59e69e9..1f3673a 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -12,7 +12,6 @@ import { } from "@/lib/options"; import { readStoredTtsConfig } from "@/lib/clientTtsConfig"; import { SettingsModal, readStoredPlayerName, readStoredVisionClick } from "@/components/SettingsModal"; -import { ModelSettingsModal } from "@/components/ModelSettingsModal"; import { analyzeImageDataUrl } from "@infiplot/ai-client"; import { readStoredModelConfig, resolveEngineConfig } from "@/lib/clientModelConfig"; import { STYLE_EXTRACTION_PROMPT } from "@/lib/styleExtraction"; @@ -1264,9 +1263,9 @@ export default function HomePage() { // 顶部使用提示:默认展示,用户可点 × 永久关闭(localStorage:infiplot:hintClosed)。 const [hintClosed, setHintClosed] = useState(false); - // 统一设置弹窗(名字 + 识图 + TTS Key):可选增强,数据只存浏览器。 + // 统一设置弹窗(通用 + 模型):可选增强,数据只存浏览器。 const [settingsOpen, setSettingsOpen] = useState(false); - const [modelSettingsOpen, setModelSettingsOpen] = useState(false); + const [settingsTab, setSettingsTab] = useState<"general" | "models">("general"); const [ttsConfigured, setTtsConfigured] = useState(false); const [playerName, setPlayerName] = useState(""); const [visionClickEnabled, setVisionClickEnabled] = useState(true); @@ -1486,16 +1485,10 @@ export default function HomePage() {
- -
- -
- {groups.map((g, idx) => ( -
- {idx > 0 && ( -
- )} -
-
- - - - - {g.label} - -
- -
- - B A S E · U R L - - updateGroup(g.key, "baseUrl", e.target.value)} - type="text" - autoComplete="off" - spellCheck={false} - placeholder="https://api.example.com/v1" - className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> -
- -
- - A P I · K e y - -
- updateGroup(g.key, "apiKey", e.target.value)} - type={showKeys[g.key] ? "text" : "password"} - autoComplete="off" - spellCheck={false} - placeholder="sk-..." - className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> - -
-
- -
- - M o d e l - - updateGroup(g.key, "model", e.target.value)} - type="text" - autoComplete="off" - spellCheck={false} - placeholder="gpt-4o / claude-3-5-sonnet / flux-1-dev ..." - className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> -
- -
- - P r o v i d e r(可选) - - - - 留空时系统会根据 Base URL 自动推断协议。 - -
-
-
- ))} - -
- - {/* ── TTS Key Section ── */} -
-
- - - - - 自带配音 Key - - 可选 -
-

- 填入你自己的 - 小米 MiMo API Key - ,配音将在浏览器本地合成,Key 只保存在本地、绝不经过服务器。MiMo - TTS 目前 - 限时免费 - ,申请即可使用。 -

- -
- - K e y · 类 型 - -
- {( - [ - { - kind: "payg", - label: "按量付费 Pay-as-you-go", - sub: "sk- 开头", - }, - { - kind: "token-plan", - label: "套餐 Token Plan", - sub: "tp- 开头", - }, - ] as const - ).map((t) => { - const active = keyType === t.kind; - return ( - - ); - })} -
-
- - {keyType === "token-plan" && ( -
- - 区 域 节 点 - -
- {TTS_REGION_PRESETS.map((p) => { - const active = p.id === regionId; - return ( - - ); - })} -
- - 选择与你的套餐订阅地区一致的节点(通常也是延迟最低的那个)。 - -
- )} - -
- - A P I · K e y - -
- setTtsApiKey(e.target.value)} - type={showTtsKey ? "text" : "password"} - autoComplete="off" - spellCheck={false} - placeholder={ - keyType === "payg" - ? "粘贴 sk- 开头的按量 Key" - : "粘贴 tp- 开头的套餐 Key" - } - className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> - -
- {prefixMismatch && ( - - - 此 Key 不是 {expectedPrefix} 开头,可能与所选「 - {keyType === "payg" - ? "按量付费 Pay-as-you-go" - : "套餐 Token Plan"} - 」类型不符,请确认是否填错。 - - )} - - - 如何免费申请 Key?查看图文教程 - -
-
- -
- -
-

- - 请确保你的 API 端点支持浏览器跨域请求(CORS)。大多数主流提供商(OpenAI、Anthropic、Gemini、Runware 等)已默认支持。 -

-
-
- - {/* Footer */} -
- {hasAnySetting && ( - - )} - -
-
-
- ); -} diff --git a/components/SettingsModal.tsx b/components/SettingsModal.tsx index 44da808..083bc55 100644 --- a/components/SettingsModal.tsx +++ b/components/SettingsModal.tsx @@ -1,6 +1,23 @@ "use client"; import { type ReactNode, useEffect, useState } from "react"; +import type { ProviderProtocol } from "@infiplot/types"; +import { + clearStoredModelConfig, + readStoredModelConfig, + writeStoredModelConfig, +} from "@/lib/clientModelConfig"; +import { + clearStoredTtsConfig, + readStoredTtsConfig, + writeStoredTtsConfig, +} from "@/lib/clientTtsConfig"; +import { + findTtsPreset, + PAYG_PRESET_ID, + TTS_KEY_DOC_URL, + TTS_REGION_PRESETS, +} from "@/lib/ttsPresets"; const PLAYER_NAME_STORAGE_KEY = "infiplot:playerName"; const VISION_CLICK_STORAGE_KEY = "infiplot:visionClick"; @@ -33,52 +50,216 @@ export function readStoredVisionClick(): boolean { } } +const PROVIDER_OPTIONS: { value: ProviderProtocol | ""; label: string }[] = [ + { value: "", label: "自动推断(推荐)" }, + { value: "openai_compatible", label: "OpenAI Compatible" }, + { value: "openai", label: "OpenAI (Native)" }, + { value: "anthropic", label: "Anthropic" }, + { value: "google", label: "Google Gemini" }, + { value: "runware", label: "Runware" }, +]; + +type ModelGroup = { + key: "text" | "image" | "vision"; + label: string; + icon: string; + baseUrl: string; + apiKey: string; + model: string; + provider: string; +}; + +type TabKey = "general" | "models"; + export function SettingsModal({ + initialTab = "general", initialVisionClickEnabled = true, onClose, onSaved, footerNote, }: { + initialTab?: TabKey; initialVisionClickEnabled?: boolean; onClose: () => void; - onSaved: (settings: { playerName: string; visionClickEnabled: boolean }) => void; + onSaved: (settings: { + playerName: string; + visionClickEnabled: boolean; + ttsConfigured: boolean; + }) => void; footerNote?: ReactNode; }) { + const [activeTab, setActiveTab] = useState(initialTab); + + // ── General tab state ── const [playerName, setPlayerName] = useState(() => readStoredPlayerName()); const [visionClick, setVisionClick] = useState(initialVisionClickEnabled); - const [shown, setShown] = useState(false); + // ── Models tab state ── + const initial = readStoredModelConfig(); + const [groups, setGroups] = useState([ + { + key: "text", + label: "文本模型", + icon: "fa-solid fa-pen-nib", + baseUrl: initial?.textBaseUrl ?? "", + apiKey: initial?.textApiKey ?? "", + model: initial?.textModel ?? "", + provider: initial?.textProvider ?? "", + }, + { + key: "image", + label: "绘图模型", + icon: "fa-solid fa-palette", + baseUrl: initial?.imageBaseUrl ?? "", + apiKey: initial?.imageApiKey ?? "", + model: initial?.imageModel ?? "", + provider: initial?.imageProvider ?? "", + }, + { + key: "vision", + label: "识图模型", + icon: "fa-solid fa-eye", + baseUrl: initial?.visionBaseUrl ?? "", + apiKey: initial?.visionApiKey ?? "", + model: initial?.visionModel ?? "", + provider: initial?.visionProvider ?? "", + }, + ]); + const [showKeys, setShowKeys] = useState>({}); + // TTS state + const [initialTts] = useState(() => readStoredTtsConfig()); + const initialKind = findTtsPreset(initialTts?.presetId)?.kind ?? "payg"; + const [keyType, setKeyType] = useState<"token-plan" | "payg">(initialKind); + const [regionId, setRegionId] = useState( + initialKind === "token-plan" + ? (initialTts?.presetId ?? TTS_REGION_PRESETS[0]!.id) + : TTS_REGION_PRESETS[0]!.id, + ); + const [ttsApiKey, setTtsApiKey] = useState(initialTts?.apiKey ?? ""); + const [showTtsKey, setShowTtsKey] = useState(false); + + const expectedPrefix = keyType === "payg" ? "sk-" : "tp-"; + const prefixMismatch = + ttsApiKey.trim().length > 0 && !ttsApiKey.trim().startsWith(expectedPrefix); + + // ── Animation ── + const [shown, setShown] = useState(false); useEffect(() => { const id = requestAnimationFrame(() => setShown(true)); return () => cancelAnimationFrame(id); }, []); + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if (e.key === "Escape") close(); + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, []); + const close = () => { setShown(false); setTimeout(onClose, 280); }; - const save = () => { + // ── General actions ── + const saveGeneral = () => { const name = playerName.trim(); writeStoredPlayerName(name); - try { localStorage.setItem(VISION_CLICK_STORAGE_KEY, visionClick ? "1" : "0"); } catch { /* ignore */ } + }; - onSaved({ playerName: name, visionClickEnabled: visionClick }); + const clearGeneral = () => { + writeStoredPlayerName(""); + try { localStorage.removeItem(VISION_CLICK_STORAGE_KEY); } catch { /* ignore */ } + setPlayerName(""); + setVisionClick(true); + }; + + const hasGeneralSetting = readStoredPlayerName().length > 0; + + // ── Models actions ── + const updateGroup = ( + key: string, + field: keyof Omit, + value: string, + ) => { + setGroups((prev) => + prev.map((g) => (g.key === key ? { ...g, [field]: value } : g)), + ); + }; + + const saveModels = () => { + const [text, image, vision] = groups; + if (text && image && vision) { + writeStoredModelConfig({ + textBaseUrl: text.baseUrl, + textApiKey: text.apiKey, + textModel: text.model, + textProvider: (text.provider as ProviderProtocol) || undefined, + imageBaseUrl: image.baseUrl, + imageApiKey: image.apiKey, + imageModel: image.model, + imageProvider: (image.provider as ProviderProtocol) || undefined, + visionBaseUrl: vision.baseUrl, + visionApiKey: vision.apiKey, + visionModel: vision.model, + visionProvider: (vision.provider as ProviderProtocol) || undefined, + }); + } + + const key = ttsApiKey.trim(); + if (key) { + const presetId = keyType === "payg" ? PAYG_PRESET_ID : regionId; + writeStoredTtsConfig({ presetId, apiKey: key }); + } else { + clearStoredTtsConfig(); + } + }; + + const clearModels = () => { + clearStoredModelConfig(); + clearStoredTtsConfig(); + setGroups((prev) => + prev.map((g) => ({ ...g, baseUrl: "", apiKey: "", model: "", provider: "" })), + ); + setTtsApiKey(""); + }; + + const hasModelSetting = + groups.some((g) => g.baseUrl.trim() && g.apiKey.trim() && g.model.trim()) || + initialTts != null; + + // ── Global save / clear ── + const save = () => { + saveGeneral(); + saveModels(); + + const ttsConfigured = ttsApiKey.trim().length > 0; + onSaved({ + playerName: playerName.trim(), + visionClickEnabled: visionClick, + ttsConfigured, + }); close(); }; const clearAll = () => { - writeStoredPlayerName(""); - try { localStorage.removeItem(VISION_CLICK_STORAGE_KEY); } catch { /* ignore */ } - onSaved({ playerName: "", visionClickEnabled: true }); + clearGeneral(); + clearModels(); + onSaved({ playerName: "", visionClickEnabled: true, ttsConfigured: false }); close(); }; - const hasAnySetting = readStoredPlayerName().length > 0; + const hasAnySetting = hasGeneralSetting || hasModelSetting; + + const tabs: { key: TabKey; label: string; icon: string }[] = [ + { key: "general", label: "通用", icon: "fa-solid fa-sliders" }, + { key: "models", label: "模型", icon: "fa-solid fa-microchip" }, + ]; return (
e.stopPropagation()} className={ - "flex w-[560px] max-w-[94vw] max-h-[88vh] flex-col overflow-hidden rounded-sm border border-clay-900/15 bg-cream-50 shadow-2xl shadow-clay-900/25 transition-all duration-300 " + + "flex w-[640px] max-w-[96vw] max-h-[90vh] flex-col overflow-hidden rounded-sm border border-clay-900/15 bg-cream-50 shadow-2xl shadow-clay-900/25 transition-all duration-300 " + (shown ? "opacity-100 scale-100" : "opacity-0 scale-95") } > @@ -117,81 +298,368 @@ export function SettingsModal({
-
- {/* ── Player Name Section ── */} -
-
- - - - - 玩家名字 - -
- setPlayerName(e.target.value)} - type="text" - maxLength={20} - autoComplete="off" - spellCheck={false} - placeholder="不填则使用「你」" - className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" - /> - - NPC 会在对话中用这个名字称呼你。不填则默认以「你」称呼。 - -
+ {/* Tab bar */} +
+ {tabs.map((t) => { + const active = activeTab === t.key; + return ( + + ); + })} +
-
+ {/* Content */} +
+ {activeTab === "general" && ( + <> + {/* ── Player Name Section ── */} +
+
+ + + + + 玩家名字 + +
+ setPlayerName(e.target.value)} + type="text" + maxLength={20} + autoComplete="off" + spellCheck={false} + placeholder="不填则使用「你」" + className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> + + NPC 会在对话中用这个名字称呼你。不填则默认以「你」称呼。 + +
- {/* ── Vision Click Section ── */} -
-
- - - - - 点击画面识别 - -
-
- {( - [ - { on: true, label: "开启", icon: "fa-solid fa-wand-magic-sparkles" }, - { on: false, label: "关闭", icon: "fa-solid fa-ban" }, - ] as const - ).map((t) => { - const active = visionClick === t.on; - return ( - + ); + })} +
+ + 开启后,在选择节点点击画面会触发 AI 识图并生成新的剧情分支。 + +
+ + {footerNote && ( +
+

+ {footerNote} +

+
+ )} + + )} + + {activeTab === "models" && ( + <> + {groups.map((g, idx) => ( +
+ {idx > 0 && ( +
+ )} +
+
+ + + + + {g.label} + +
+ +
+ + B A S E · U R L + + updateGroup(g.key, "baseUrl", e.target.value)} + type="text" + autoComplete="off" + spellCheck={false} + placeholder="https://api.example.com/v1" + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> +
+ +
+ + A P I · K e y + +
+ updateGroup(g.key, "apiKey", e.target.value)} + type={showKeys[g.key] ? "text" : "password"} + autoComplete="off" + spellCheck={false} + placeholder="sk-..." + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> + +
+
+ +
+ + M o d e l + + updateGroup(g.key, "model", e.target.value)} + type="text" + autoComplete="off" + spellCheck={false} + placeholder="gpt-4o / claude-3-5-sonnet / flux-1-dev ..." + className="h-10 w-full rounded-sm border border-clay-900/15 bg-cream-100 px-4 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> +
+ +
+ + P r o v i d e r(可选) + + + + 留空时系统会根据 Base URL 自动推断协议。 + +
+
+
+ ))} + +
+ + {/* ── TTS Key Section ── */} +
+
+ + + + + 自带配音 Key + + 可选 +
+

+ 填入你自己的 + 小米 MiMo API Key + ,配音将在浏览器本地合成,Key 只保存在本地、绝不经过服务器。MiMo + TTS 目前 + 限时免费 + ,申请即可使用。 +

+ +
+ + K e y · 类 型 + +
+ {( + [ + { + kind: "payg", + label: "按量付费 Pay-as-you-go", + sub: "sk- 开头", + }, + { + kind: "token-plan", + label: "套餐 Token Plan", + sub: "tp- 开头", + }, + ] as const + ).map((t) => { + const active = keyType === t.kind; + return ( + + ); + })} +
+
+ + {keyType === "token-plan" && ( +
+ + 区 域 节 点 + +
+ {TTS_REGION_PRESETS.map((p) => { + const active = p.id === regionId; + return ( + + ); + })} +
+ + 选择与你的套餐订阅地区一致的节点(通常也是延迟最低的那个)。 + +
+ )} + +
+ + A P I · K e y + +
+ setTtsApiKey(e.target.value)} + type={showTtsKey ? "text" : "password"} + autoComplete="off" + spellCheck={false} + placeholder={ + keyType === "payg" + ? "粘贴 sk- 开头的按量 Key" + : "粘贴 tp- 开头的套餐 Key" + } + className="h-11 w-full rounded-sm border border-clay-900/15 bg-cream-100 pl-4 pr-11 font-sans text-sm text-clay-900 outline-none transition-colors focus:border-ember-500 placeholder:text-clay-400" + /> + +
+ {prefixMismatch && ( + + + 此 Key 不是 {expectedPrefix} 开头,可能与所选「 + {keyType === "payg" + ? "按量付费 Pay-as-you-go" + : "套餐 Token Plan"} + 」类型不符,请确认是否填错。 + + )} + - - {t.label} - - ); - })} -
- - 开启后,在选择节点点击画面会触发 AI 识图并生成新的剧情分支。 - -
+ + 如何免费申请 Key?查看图文教程 + +
+
- {footerNote && ( -
-

- {footerNote} -

-
+
+ +
+

+ + 请确保你的 API 端点支持浏览器跨域请求(CORS)。大多数主流提供商(OpenAI、Anthropic、Gemini、Runware 等)已默认支持。 +

+
+ )}
From 6cd7d88326f50390701adfa6873959bf858a7293 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 12:09:02 +0800 Subject: [PATCH 10/13] feat(web): fallback to server API routes when no client-side model config is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user has not configured their own model keys in localStorage, engine calls now automatically route through /api/* server routes instead of throwing "模型配置未设置". This lets Vercel deploys with server-side environment variables work out of the box. - Add lib/engineClient.ts as a unified client-side routing layer: checks localStorage for BYO config, falls back to POST /api/start, /api/scene, /api/vision, /api/classify-freeform, /api/insert-beat - Update app/play/page.tsx to use engineClient instead of direct engine imports; remove buildEngineConfig() - Update app/page.tsx style-image parsing to also fall back to /api/parse-style-image when no local model config exists Signed-off-by: zhi --- app/page.tsx | 34 ++++++++++----- app/play/page.tsx | 62 ++++++++------------------- lib/engineClient.ts | 101 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 55 deletions(-) create mode 100644 lib/engineClient.ts diff --git a/app/page.tsx b/app/page.tsx index 1f3673a..6d61a19 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -980,18 +980,30 @@ function StyleModal({ try { const resized = await resizeImageToDataUrl(file); const modelCfg = readStoredModelConfig(); - if (!modelCfg) { - throw new Error("请先点击首页右上角的「模型设置」配置视觉模型参数"); + let stylePrompt: string; + if (modelCfg) { + const config = resolveEngineConfig(modelCfg, null); + const raw = await analyzeImageDataUrl(config.vision, resized, STYLE_EXTRACTION_PROMPT); + let parsed: { stylePrompt?: string }; + try { + parsed = JSON.parse(raw); + } catch { + parsed = { stylePrompt: raw }; + } + stylePrompt = (parsed.stylePrompt ?? "").trim(); + } else { + const r = await fetch("/api/parse-style-image", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ imageDataUrl: resized }), + }); + if (!r.ok) { + const data = await r.json().catch(() => ({})); + throw new Error(data.error || `HTTP ${r.status}`); + } + const data = (await r.json()) as { stylePrompt?: string }; + stylePrompt = (data.stylePrompt ?? "").trim(); } - const config = resolveEngineConfig(modelCfg, null); - const raw = await analyzeImageDataUrl(config.vision, resized, STYLE_EXTRACTION_PROMPT); - let parsed: { stylePrompt?: string }; - try { - parsed = JSON.parse(raw); - } catch { - parsed = { stylePrompt: raw }; - } - const stylePrompt = (parsed.stylePrompt ?? "").trim(); if (!stylePrompt) throw new Error("视觉模型返回了空的风格描述"); setDraft(stylePrompt); setCustomStyleRefImage(resized); diff --git a/app/play/page.tsx b/app/play/page.tsx index 4b40f56..156695b 100644 --- a/app/play/page.tsx +++ b/app/play/page.tsx @@ -34,14 +34,12 @@ import { visionDecide, classifyFreeform, requestInsertBeat, -} from "@infiplot/engine"; -import { readStoredModelConfig, resolveEngineConfig } from "@/lib/clientModelConfig"; +} from "@/lib/engineClient"; import type { Beat, BeatChoice, Character, CharacterVoice, - EngineConfig, Orientation, Scene, SceneExit, @@ -54,17 +52,6 @@ import { track } from "@/lib/analytics"; const MUTED_STORAGE_KEY = "infiplot:muted"; -// ── Client-side engine config builder ────────────────────────────────── -// Reads model credentials from localStorage and assembles the EngineConfig -// that the engine expects. Called at the point of use (inside async handlers) -// so mid-session settings changes are picked up immediately. -function buildEngineConfig(): EngineConfig { - const modelCfg = readStoredModelConfig(); - const ttsCfg = loadClientTtsConfig(); - return resolveEngineConfig(modelCfg, ttsCfg); -} - - // Mobile-portrait users get a 9:16 scene image painted for them; everyone else // (desktop, tablet, mobile-landscape) keeps the 16:9 landscape image. Only a // touch device (coarse pointer) held upright counts as "portrait" — a mouse @@ -379,8 +366,7 @@ function prefetchScenePath( const specSession = buildSpeculativeSession(baseSession, steps); const abort = new AbortController(); const promise = (async () => { - const config = buildEngineConfig(); - const data = await requestScene(config, { session: specSession, clientTts }); + const data = await requestScene({ session: specSession, clientTts }); if (abort.signal.aborted) throw new Error("aborted"); // Record this resolved alternate for the gallery export. Key is @@ -1186,8 +1172,7 @@ function PlayInner() { }, ) : (async () => { - const config = buildEngineConfig(); - const data = await startSession(config, { + const data = await startSession({ ...livePayload!, clientTts: !!byoTtsRef.current, }); @@ -1569,8 +1554,7 @@ function PlayInner() { clearPool(poolRef.current); const promise = (async () => { - const config = buildEngineConfig(); - const data = await requestScene(config, { + const data = await requestScene({ session: specSession, clientTts: !!byoTtsRef.current, }); @@ -1592,8 +1576,7 @@ function PlayInner() { setPhase("vision-thinking"); try { - const config = buildEngineConfig(); - const decision = await classifyFreeform(config, { + const decision = await classifyFreeform({ session, freeformText: text, }); @@ -1601,14 +1584,11 @@ function PlayInner() { if (decision.classify === "insert-beat") { // Interactive beat: NPC responds to the player's action, scene stays setPhase("inserting-beat"); - const { partial, characters: insertChars } = await requestInsertBeat( - config, - { - session, - freeformAction: decision.freeformAction, - clientTts: !!byoTtsRef.current, - }, - ); + const { partial, characters: insertChars } = await requestInsertBeat({ + session, + freeformAction: decision.freeformAction, + clientTts: !!byoTtsRef.current, + }); const fromBeatId = currentBeatRef.current?.id ?? currentScene.entryBeatId; @@ -1671,8 +1651,7 @@ function PlayInner() { }; const promise = (async () => { - const config = buildEngineConfig(); - const data = await requestScene(config, { + const data = await requestScene({ session: specSession, clientTts: !!byoTtsRef.current, }); @@ -1695,8 +1674,7 @@ function PlayInner() { try { const annotatedImageBase64 = await annotateClick(imageUrl, click); - const config = buildEngineConfig(); - const decision = await visionDecide(config, { + const decision = await visionDecide({ session, annotatedImageBase64, }); @@ -1704,14 +1682,11 @@ function PlayInner() { if (decision.classify === "insert-beat") { setPhase("inserting-beat"); - const { partial, characters: insertChars } = await requestInsertBeat( - config, - { - session, - freeformAction: decision.intent.freeformAction, - clientTts: !!byoTtsRef.current, - }, - ); + const { partial, characters: insertChars } = await requestInsertBeat({ + session, + freeformAction: decision.intent.freeformAction, + clientTts: !!byoTtsRef.current, + }); const fromBeatId = currentBeatRef.current?.id ?? currentScene.entryBeatId; @@ -1776,8 +1751,7 @@ function PlayInner() { clearPool(poolRef.current); const promise = (async () => { - const config = buildEngineConfig(); - const data = await requestScene(config, { + const data = await requestScene({ session: specSession, clientTts: !!byoTtsRef.current, }); diff --git a/lib/engineClient.ts b/lib/engineClient.ts new file mode 100644 index 0000000..066e342 --- /dev/null +++ b/lib/engineClient.ts @@ -0,0 +1,101 @@ +import { + startSession as startSessionClient, + requestScene as requestSceneClient, + visionDecide as visionDecideClient, + classifyFreeform as classifyFreeformClient, + requestInsertBeat as requestInsertBeatClient, +} from "@infiplot/engine"; +import { + readStoredModelConfig, + resolveEngineConfig, +} from "@/lib/clientModelConfig"; +import { loadClientTtsConfig } from "@/lib/clientTtsConfig"; +import type { + FreeformClassifyRequest, + FreeformClassifyResponse, + EngineConfig, + InsertBeatRequest, + InsertBeatResponse, + SceneRequest, + SceneResponse, + StartRequest, + StartResponse, + VisionRequest, + VisionResponse, +} from "@infiplot/types"; + +function getClientConfig(): EngineConfig | null { + const modelCfg = readStoredModelConfig(); + const ttsCfg = loadClientTtsConfig(); + if (!modelCfg) return null; + return resolveEngineConfig(modelCfg, ttsCfg); +} + +async function postJson(path: string, body: unknown): Promise { + const res = await fetch(path, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + if (!res.ok) { + let message = `HTTP ${res.status}`; + try { + const data = (await res.json()) as { error?: string }; + if (data.error) message = data.error; + } catch { + // ignore parse failure, keep HTTP status message + } + throw new Error(message); + } + return res.json() as Promise; +} + +// ── Unified entry points ─────────────────────────────────────────────── +// When the browser has a BYO model config in localStorage, these call the +// client-side engine directly (talking to providers from the browser). +// Otherwise they fall back to the server-side API routes, which read +// environment variables — useful for Vercel deploys that already supply keys. + +export async function startSession(req: StartRequest): Promise { + const config = getClientConfig(); + if (config) { + return startSessionClient(config, req); + } + return postJson("/api/start", req); +} + +export async function requestScene(req: SceneRequest): Promise { + const config = getClientConfig(); + if (config) { + return requestSceneClient(config, req); + } + return postJson("/api/scene", req); +} + +export async function visionDecide(req: VisionRequest): Promise { + const config = getClientConfig(); + if (config) { + return visionDecideClient(config, req); + } + return postJson("/api/vision", req); +} + +export async function classifyFreeform( + req: FreeformClassifyRequest, +): Promise { + const config = getClientConfig(); + if (config) { + return classifyFreeformClient(config, req); + } + return postJson("/api/classify-freeform", req); +} + +export async function requestInsertBeat( + req: InsertBeatRequest, +): Promise { + const config = getClientConfig(); + if (config) { + return requestInsertBeatClient(config, req); + } + return postJson("/api/insert-beat", req); +} From ef3b57953b09bdcdf04500e562e1f2236aff59ea Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 16:11:44 +0800 Subject: [PATCH 11/13] refactor(ai-client): replace AI SDK adapters with OpenAI SDK --- components/SettingsModal.tsx | 2 - lib/ai-client/chat.ts | 63 +++++++------- lib/ai-client/image.ts | 155 +++++++++++++++++++++++----------- lib/ai-client/model.ts | 23 ----- lib/ai-client/normalizeUrl.ts | 2 - lib/ai-client/vision.ts | 57 ++++++------- lib/clientModelConfig.ts | 2 - lib/config.ts | 2 - lib/types/index.ts | 12 +-- package.json | 5 +- pnpm-lock.yaml | 143 +++++-------------------------- 11 files changed, 191 insertions(+), 275 deletions(-) delete mode 100644 lib/ai-client/model.ts diff --git a/components/SettingsModal.tsx b/components/SettingsModal.tsx index 083bc55..9f555e5 100644 --- a/components/SettingsModal.tsx +++ b/components/SettingsModal.tsx @@ -54,8 +54,6 @@ const PROVIDER_OPTIONS: { value: ProviderProtocol | ""; label: string }[] = [ { value: "", label: "自动推断(推荐)" }, { value: "openai_compatible", label: "OpenAI Compatible" }, { value: "openai", label: "OpenAI (Native)" }, - { value: "anthropic", label: "Anthropic" }, - { value: "google", label: "Google Gemini" }, { value: "runware", label: "Runware" }, ]; diff --git a/lib/ai-client/chat.ts b/lib/ai-client/chat.ts index f869c8f..6707c40 100644 --- a/lib/ai-client/chat.ts +++ b/lib/ai-client/chat.ts @@ -1,29 +1,24 @@ -import { generateText } from "ai"; -import type { LanguageModelUsage, ModelMessage } from "ai"; +import OpenAI from "openai"; import type { ProviderConfig } from "@infiplot/types"; -import { createLanguageModel, resolveProtocol } from "./model"; +import { normalizeBaseUrl } from "./normalizeUrl"; export type ChatMessage = { role: "system" | "user" | "assistant"; content: string; }; -// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails, -// so a single shape covers Anthropic, Gemini, and OpenAI-compatible providers. function summarizeSdkUsage( tag: string, - usage: LanguageModelUsage | undefined, + usage: OpenAI.Completions.CompletionUsage | undefined, ): string { if (!usage) return `[cache] ${tag} no-usage`; - const input = usage.inputTokens ?? 0; - const output = usage.outputTokens ?? 0; - const read = usage.inputTokenDetails?.cacheReadTokens; - const write = usage.inputTokenDetails?.cacheWriteTokens; - if (typeof read === "number" || typeof write === "number") { - const hit = read ?? 0; - const create = write ?? 0; - const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a"; - return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`; + const input = usage.prompt_tokens ?? 0; + const output = usage.completion_tokens ?? 0; + const details = (usage as { prompt_tokens_details?: { cached_tokens?: number } }).prompt_tokens_details; + const cached = details?.cached_tokens; + if (typeof cached === "number") { + const rate = input > 0 ? ((cached / input) * 100).toFixed(1) : "n/a"; + return `[cache] ${tag} hit=${cached} input=${input} rate=${rate}% completion=${output}`; } return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`; } @@ -36,28 +31,28 @@ export async function chat( tag?: string; }, ): Promise { - const protocol = resolveProtocol(config); - const model = createLanguageModel(config, protocol); - - const system = messages.find((m) => m.role === "system")?.content; - const convo: ModelMessage[] = messages - .filter((m) => m.role !== "system") - .map((m) => ({ - role: m.role as "user" | "assistant", - content: m.content, - })); - - const { text, usage } = await generateText({ - model, - system, - messages: convo, - temperature: opts?.temperature ?? 0.9, + const client = new OpenAI({ + apiKey: config.apiKey, + baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"), + maxRetries: 0, + dangerouslyAllowBrowser: true, }); - console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage)); + const completion = await client.chat.completions.create({ + model: config.model, + messages: messages.map((m) => ({ + role: m.role as "system" | "user" | "assistant", + content: m.content, + })), + temperature: opts?.temperature ?? 0.9, + stream: false, + }); - if (typeof text !== "string" || text.length === 0) { - throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`); + const text = completion.choices[0]?.message?.content ?? ""; + console.log(summarizeSdkUsage(opts?.tag ?? "chat", completion.usage ?? undefined)); + + if (text.length === 0) { + throw new Error(`Chat API returned no content.`); } return text; } diff --git a/lib/ai-client/image.ts b/lib/ai-client/image.ts index e7c5a48..5cae0f8 100644 --- a/lib/ai-client/image.ts +++ b/lib/ai-client/image.ts @@ -1,6 +1,4 @@ -import { generateImage as generateImageSdk } from "ai"; -import { createOpenAI } from "@ai-sdk/openai"; -import { createGoogleGenerativeAI } from "@ai-sdk/google"; +import OpenAI, { toFile, type Uploadable } from "openai"; import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types"; import { fetchWithRetry } from "./fetchWithRetry"; import { normalizeBaseUrl } from "./normalizeUrl"; @@ -48,8 +46,8 @@ export type GenerateImageOptions = { /** * Reference images (UUIDs, URLs, or base64) to condition generation on — * typically character portraits + the prior scene image. Runware caps at 4; - * we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these - * map to `prompt.images` (the SDK accepts public URLs or data URLs). + * we silently truncate beyond that. On the native OpenAI path these are + * fetched/decoded and sent to `images.edit`. */ referenceImages?: string[]; /** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */ @@ -58,7 +56,7 @@ export type GenerateImageOptions = { * Output aspect, locked per session. "portrait" → 9:16 vertical for mobile; * default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest * supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792, - * native gpt-image 1024x1536, Gemini aspectRatio 9:16. + * native gpt-image 1024x1536. */ orientation?: Orientation; }; @@ -66,8 +64,8 @@ export type GenerateImageOptions = { export type GenerateImageResult = { /** * Image the client can render directly. A Runware CDN URL on the Runware - * path; a `data:;base64,...` URI on the AI SDK paths (OpenAI/Gemini - * return raw bytes, not a hosted URL). + * path; a `data:;base64,...` URI on the native OpenAI path when GPT + * image models return raw bytes instead of a hosted URL. */ imageUrl: string; /** @@ -117,63 +115,124 @@ export async function generateImage( const protocol = resolveImageProtocol(config); switch (protocol) { case "openai": - case "google": - return generateImageViaAiSdk(config, prompt, options, protocol); + return generateImageOpenAi(config, prompt, options); case "runware": return generateImageRunware(config, prompt, options); - case "anthropic": - throw new Error( - 'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".', - ); case "openai_compatible": default: return generateImageOpenAiCompatible(config, prompt, options); } } -// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK. -// Unlike the fetch path, this supports reference-image editing via -// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the -// client a data URI and synthesize a UUID; continuity references reuse the -// data URI rather than a provider UUID. -async function generateImageViaAiSdk( +// Native OpenAI (gpt-image) via the official OpenAI SDK. Unlike the compatible +// fetch path, this supports reference-image editing through `images.edit`. +// GPT image models return raw bytes, so we hand the client a data URI and +// synthesize a UUID; continuity references reuse the data URI rather than a +// provider UUID. +async function generateImageOpenAi( config: ProviderConfig, prompt: string, - options: GenerateImageOptions | undefined, - protocol: "openai" | "google", + options?: GenerateImageOptions, ): Promise { - const baseURL = normalizeBaseUrl(config.baseUrl, protocol); - const imageModel = - protocol === "openai" - ? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model) - : createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image( - config.model, - ); - - const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES); - const promptArg = - refs.length > 0 ? { text: prompt, images: refs } : prompt; - - // Session-locked aspect. gpt-image takes an explicit `size` (portrait / - // landscape options are 1024x1536 / 1536x1024); Gemini takes an `aspectRatio`. - const portrait = options?.orientation === "portrait"; - const { image } = await generateImageSdk({ - model: imageModel, - prompt: promptArg, - ...(protocol === "openai" - ? { size: (portrait ? "1024x1536" : "1536x1024") as `${number}x${number}` } - : { aspectRatio: (portrait ? "9:16" : "16:9") as `${number}:${number}` }), + const client = new OpenAI({ + apiKey: config.apiKey, + baseURL: normalizeBaseUrl(config.baseUrl, "openai"), + maxRetries: 2, + dangerouslyAllowBrowser: true, }); + const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES); + const portrait = options?.orientation === "portrait"; + const size = portrait ? "1024x1536" : "1536x1024"; - return { - imageUrl: `data:${image.mediaType};base64,${image.base64}`, - imageUuid: crypto.randomUUID(), - }; + const response = + refs.length > 0 + ? await client.images.edit({ + model: config.model, + prompt, + image: await Promise.all(refs.map(referenceImageToUploadable)), + n: 1, + size, + }) + : await client.images.generate({ + model: config.model, + prompt, + n: 1, + size, + }); + + return imageResponseToResult(response); +} + +async function referenceImageToUploadable(ref: string): Promise { + if (ref.startsWith("data:")) { + const response = await fetch(ref); + if (!response.ok) { + throw new Error(`Failed to read data URL reference image.`); + } + const mediaType = response.headers.get("content-type") ?? "image/png"; + return toFile(response, `reference.${extensionFromMediaType(mediaType)}`, { + type: mediaType, + }); + } + + if (/^https?:\/\//i.test(ref)) { + const response = await fetch(ref); + if (!response.ok) { + throw new Error( + `Failed to fetch reference image ${ref}: HTTP ${response.status}`, + ); + } + const mediaType = response.headers.get("content-type") ?? "image/png"; + return toFile(response, filenameFromUrl(ref, mediaType), { + type: mediaType, + }); + } + + throw new Error( + `Native OpenAI image editing requires reference image URLs or data URLs; got "${ref.slice(0, 32)}...".`, + ); +} + +function imageResponseToResult( + response: OpenAI.Images.ImagesResponse, +): GenerateImageResult { + const data = response.data?.[0]; + const b64 = data?.b64_json; + if (b64) { + const format = response.output_format ?? "png"; + return { + imageUrl: `data:image/${format};base64,${b64}`, + imageUuid: crypto.randomUUID(), + }; + } + + const imageUrl = data?.url; + if (imageUrl) { + return { imageUrl, imageUuid: crypto.randomUUID() }; + } + + throw new Error(`No image data in OpenAI response.`); +} + +function filenameFromUrl(url: string, mediaType: string): string { + try { + const name = new URL(url).pathname.split("/").filter(Boolean).at(-1); + if (name && /\.[a-z0-9]+$/i.test(name)) return name; + } catch { + // Fall back to the media type below. + } + return `reference.${extensionFromMediaType(mediaType)}`; +} + +function extensionFromMediaType(mediaType: string): string { + if (mediaType.includes("jpeg") || mediaType.includes("jpg")) return "jpg"; + if (mediaType.includes("webp")) return "webp"; + return "png"; } // OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic // text-to-image only — no reference images on this path; for editing/anchoring -// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above. +// set IMAGE_PROVIDER=openai to take the native OpenAI path above. async function generateImageOpenAiCompatible( config: ProviderConfig, prompt: string, diff --git a/lib/ai-client/model.ts b/lib/ai-client/model.ts deleted file mode 100644 index 155e424..0000000 --- a/lib/ai-client/model.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { createAnthropic } from "@ai-sdk/anthropic"; -import { createGoogleGenerativeAI } from "@ai-sdk/google"; -import { createOpenAI } from "@ai-sdk/openai"; -import type { ProviderConfig, ProviderProtocol } from "@infiplot/types"; -import { normalizeBaseUrl } from "./normalizeUrl"; - -export function resolveProtocol(config: ProviderConfig): ProviderProtocol { - return config.provider ?? "openai_compatible"; -} - -export function createLanguageModel(config: ProviderConfig, protocol: ProviderProtocol) { - const baseURL = normalizeBaseUrl(config.baseUrl, protocol); - switch (protocol) { - case "anthropic": - return createAnthropic({ apiKey: config.apiKey, baseURL })(config.model); - case "google": - return createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model); - case "openai_compatible": - case "openai": - default: - return createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model); - } -} diff --git a/lib/ai-client/normalizeUrl.ts b/lib/ai-client/normalizeUrl.ts index 10de5f3..d4a7101 100644 --- a/lib/ai-client/normalizeUrl.ts +++ b/lib/ai-client/normalizeUrl.ts @@ -31,8 +31,6 @@ const ENDPOINT_SUFFIX = const DEFAULT_VERSION_SEGMENT: Record = { openai_compatible: "v1", openai: "v1", - anthropic: "v1", - google: "v1beta", // Runware posts to the bare base URL with no version-pathed sub-resource, // so never inject a segment for it. runware: null, diff --git a/lib/ai-client/vision.ts b/lib/ai-client/vision.ts index 12df0fa..ec15b51 100644 --- a/lib/ai-client/vision.ts +++ b/lib/ai-client/vision.ts @@ -1,7 +1,6 @@ -import { generateText } from "ai"; -import type { ModelMessage } from "ai"; +import OpenAI from "openai"; import type { ProviderConfig } from "@infiplot/types"; -import { createLanguageModel, resolveProtocol } from "./model"; +import { normalizeBaseUrl } from "./normalizeUrl"; const VISION_TIMEOUT_MS = 60_000; @@ -22,34 +21,32 @@ export async function analyzeImageDataUrl( imageDataUrl: string, prompt: string, ): Promise { - const protocol = resolveProtocol(config); - const model = createLanguageModel(config, protocol); + const client = new OpenAI({ + apiKey: config.apiKey, + baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"), + maxRetries: 0, + timeout: VISION_TIMEOUT_MS, + dangerouslyAllowBrowser: true, + }); - const messages: ModelMessage[] = [ - { - role: "user", - content: [ - { type: "text", text: prompt }, - { type: "image", image: imageDataUrl }, - ], - }, - ]; + const completion = await client.chat.completions.create({ + model: config.model, + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + { type: "image_url", image_url: { url: imageDataUrl } }, + ], + }, + ], + temperature: 0.2, + stream: false, + }); - const timeoutCtrl = new AbortController(); - const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS); - try { - const { text } = await generateText({ - model, - messages, - temperature: 0.2, - maxRetries: 0, - abortSignal: timeoutCtrl.signal, - }); - if (typeof text !== "string" || text.length === 0) { - throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`); - } - return text; - } finally { - clearTimeout(timeoutId); + const text = completion.choices[0]?.message?.content ?? ""; + if (text.length === 0) { + throw new Error(`Vision API returned no content.`); } + return text; } diff --git a/lib/clientModelConfig.ts b/lib/clientModelConfig.ts index febce55..d075a22 100644 --- a/lib/clientModelConfig.ts +++ b/lib/clientModelConfig.ts @@ -10,8 +10,6 @@ const STORAGE_KEY = "infiplot:model"; const VALID_PROTOCOLS: ProviderProtocol[] = [ "openai_compatible", - "anthropic", - "google", "openai", "runware", ]; diff --git a/lib/config.ts b/lib/config.ts index e30ea70..c733df9 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -6,8 +6,6 @@ import type { const VALID_PROTOCOLS = [ "openai_compatible", - "anthropic", - "google", "openai", "runware", ] as const; diff --git a/lib/types/index.ts b/lib/types/index.ts index c0a1989..ba0a7fa 100644 --- a/lib/types/index.ts +++ b/lib/types/index.ts @@ -327,19 +327,15 @@ export type VisionClassify = "insert-beat" | "change-scene"; * openai_compatible text / vision / image — OpenAI Chat Completions + * `/images/generations` (self-implemented fetch; the * default for text/vision when unset) - * anthropic text / vision — native Anthropic Messages (AI SDK) - * google text / vision / image — native Gemini (AI SDK); image - * uses the Nano Banana family - * openai image only — OpenAI gpt-image via AI SDK, - * unlocks reference-image editing (for text/vision use - * openai_compatible, which already speaks OpenAI's format) + * openai image only — OpenAI gpt-image via the + * official OpenAI SDK, unlocks reference-image editing + * (for text/vision use openai_compatible, which already + * speaks OpenAI's format) * runware image only — Runware task-array protocol * (self-implemented; the default for runware.ai URLs) */ export type ProviderProtocol = | "openai_compatible" - | "anthropic" - | "google" | "openai" | "runware"; diff --git a/package.json b/package.json index 751430a..94d7212 100644 --- a/package.json +++ b/package.json @@ -20,13 +20,10 @@ "deploy:cf": "opennextjs-cloudflare deploy" }, "dependencies": { - "@ai-sdk/anthropic": "^3.0.81", - "@ai-sdk/google": "^3.0.80", - "@ai-sdk/openai": "^3.0.67", - "ai": "^6.0.196", "jsonrepair": "^3.14.0", "jszip": "^3.10.1", "next": "^16.0.0", + "openai": "^6.42.0", "react": "^19.0.0", "react-dom": "^19.0.0" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 094b998..cb56ee5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,18 +8,6 @@ importers: .: dependencies: - '@ai-sdk/anthropic': - specifier: ^3.0.81 - version: 3.0.81(zod@4.4.3) - '@ai-sdk/google': - specifier: ^3.0.80 - version: 3.0.80(zod@4.4.3) - '@ai-sdk/openai': - specifier: ^3.0.67 - version: 3.0.67(zod@4.4.3) - ai: - specifier: ^6.0.196 - version: 6.0.196(zod@4.4.3) jsonrepair: specifier: ^3.14.0 version: 3.14.0 @@ -29,6 +17,9 @@ importers: next: specifier: ^16.0.0 version: 16.2.7(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + openai: + specifier: ^6.42.0 + version: 6.42.0(ws@8.20.1)(zod@4.4.3) react: specifier: ^19.0.0 version: 19.2.7 @@ -69,40 +60,6 @@ importers: packages: - '@ai-sdk/anthropic@3.0.81': - resolution: {integrity: sha512-B1JDd9Ugq9R5AgIaW3674lhGCMMYJcPUxnrZh8fzbGojgg4QvHFRv6eZahGQAUsmGHbcf74G9bdSBDLWQGY2GA==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - - '@ai-sdk/gateway@3.0.124': - resolution: {integrity: sha512-h8CrmbSG+8X0C+M/E1M4oiDHYevqwbzAPN+uLRHS0eJaatF2MZ+juNtOHXNOjk7Bsk9mD2RjYMjJO9dFkb9I7Q==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - - '@ai-sdk/google@3.0.80': - resolution: {integrity: sha512-5ORbm/yFUPO0MEvZsxBMN0cdKw2+lwU/wVn5KN3KF8Dmk1LughuDuUohMh/7iU/XFTiyB0OvmTW/tdV/J7O9zg==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - - '@ai-sdk/openai@3.0.67': - resolution: {integrity: sha512-oAiGC9eWG7IgtdsdS74bOCnAAHarAfTJhWN9x5INwnWPekL802AvF+0I5DvLzIF1MIRmNw4N8mPSL/GUVbX9Mw==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - - '@ai-sdk/provider-utils@4.0.27': - resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - - '@ai-sdk/provider@3.0.10': - resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==} - engines: {node: '>=18'} - '@alloc/quick-lru@5.2.0': resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==} engines: {node: '>=10'} @@ -1257,9 +1214,6 @@ packages: '@speed-highlight/core@1.2.15': resolution: {integrity: sha512-BMq1K3DsElxDWawkX6eLg9+CKJrTVGCBAWVuHXVUV2u0s2711qiChLSId6ikYPfxhdYocLNt3wWwSvDiTvFabw==} - '@standard-schema/spec@1.1.0': - resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} - '@swc/helpers@0.5.15': resolution: {integrity: sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==} @@ -1283,10 +1237,6 @@ packages: '@types/react@19.2.16': resolution: {integrity: sha512-esJiCAnl0kfpNdE69f3So4WJUXy95dLZydX0KwK46riIHDzHM7O9Vtf9xCHW0PXIqvgqNrswl522kA/5yx+F4w==} - '@vercel/oidc@3.2.0': - resolution: {integrity: sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug==} - engines: {node: '>= 20'} - abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -1304,12 +1254,6 @@ packages: resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} engines: {node: '>= 8.0.0'} - ai@6.0.196: - resolution: {integrity: sha512-2T45UeqKL4a11KQ14I5i1YYHOvCFrMF478E1k6PVjlQSGUvXSv4xrxIaQbUL4qgv91DADSbddwv3oR49pPAK3g==} - engines: {node: '>=18'} - peerDependencies: - zod: ^3.25.76 || ^4.1.8 - ansi-colors@4.1.3: resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==} engines: {node: '>=6'} @@ -1618,10 +1562,6 @@ packages: resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} engines: {node: '>=6'} - eventsource-parser@3.1.0: - resolution: {integrity: sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==} - engines: {node: '>=18.0.0'} - execa@5.1.1: resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==} engines: {node: '>=10'} @@ -1833,9 +1773,6 @@ packages: resolution: {integrity: sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==} hasBin: true - json-schema@0.4.0: - resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} - jsonrepair@3.14.0: resolution: {integrity: sha512-tWPGKMZf/8UPim+fcW2EfcQ/d/7aKUrP6IECz9G3Tu6Q5dX0orSleqJ9z6sSw7qrQkjF8/Edo4DvsWBZ8H+HNg==} hasBin: true @@ -2028,6 +1965,17 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} + openai@6.42.0: + resolution: {integrity: sha512-1WFEt/uXMXOLhYRNkgJWo08Y2YNvNwpVU72K7ibrWgWpNOXd4VojXLbe6SQ4bLiUQ3Y8jz4IiyVkylJCL1DtZg==} + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + package-json-from-dist@1.0.1: resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} @@ -2495,42 +2443,6 @@ packages: snapshots: - '@ai-sdk/anthropic@3.0.81(zod@4.4.3)': - dependencies: - '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3) - zod: 4.4.3 - - '@ai-sdk/gateway@3.0.124(zod@4.4.3)': - dependencies: - '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3) - '@vercel/oidc': 3.2.0 - zod: 4.4.3 - - '@ai-sdk/google@3.0.80(zod@4.4.3)': - dependencies: - '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3) - zod: 4.4.3 - - '@ai-sdk/openai@3.0.67(zod@4.4.3)': - dependencies: - '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3) - zod: 4.4.3 - - '@ai-sdk/provider-utils@4.0.27(zod@4.4.3)': - dependencies: - '@ai-sdk/provider': 3.0.10 - '@standard-schema/spec': 1.1.0 - eventsource-parser: 3.1.0 - zod: 4.4.3 - - '@ai-sdk/provider@3.0.10': - dependencies: - json-schema: 0.4.0 - '@alloc/quick-lru@5.2.0': {} '@ast-grep/napi-darwin-arm64@0.40.5': @@ -3632,7 +3544,8 @@ snapshots: - encoding - supports-color - '@opentelemetry/api@1.9.1': {} + '@opentelemetry/api@1.9.1': + optional: true '@poppinss/colors@4.1.6': dependencies: @@ -3844,8 +3757,6 @@ snapshots: '@speed-highlight/core@1.2.15': {} - '@standard-schema/spec@1.1.0': {} - '@swc/helpers@0.5.15': dependencies: tslib: 2.8.1 @@ -3873,8 +3784,6 @@ snapshots: dependencies: csstype: 3.2.3 - '@vercel/oidc@3.2.0': {} - abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 @@ -3890,14 +3799,6 @@ snapshots: dependencies: humanize-ms: 1.2.1 - ai@6.0.196(zod@4.4.3): - dependencies: - '@ai-sdk/gateway': 3.0.124(zod@4.4.3) - '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.27(zod@4.4.3) - '@opentelemetry/api': 1.9.1 - zod: 4.4.3 - ansi-colors@4.1.3: {} ansi-regex@5.0.1: {} @@ -4213,8 +4114,6 @@ snapshots: event-target-shim@5.0.1: {} - eventsource-parser@3.1.0: {} - execa@5.1.1: dependencies: cross-spawn: 7.0.6 @@ -4460,8 +4359,6 @@ snapshots: jiti@1.21.7: {} - json-schema@0.4.0: {} - jsonrepair@3.14.0: {} jszip@3.10.1: @@ -4617,6 +4514,11 @@ snapshots: dependencies: mimic-fn: 2.1.0 + openai@6.42.0(ws@8.20.1)(zod@4.4.3): + optionalDependencies: + ws: 8.20.1 + zod: 4.4.3 + package-json-from-dist@1.0.1: {} pako@1.0.11: {} @@ -5132,4 +5034,5 @@ snapshots: cookie: 1.1.1 youch-core: 0.3.3 - zod@4.4.3: {} + zod@4.4.3: + optional: true From 5608b0fdd0de79ccd1a80608864134edf183380b Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 16:11:52 +0800 Subject: [PATCH 12/13] fix(engine): tolerate duplicated JSON outputs --- lib/engine/jsonParser.ts | 80 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/lib/engine/jsonParser.ts b/lib/engine/jsonParser.ts index fc2489b..048987e 100644 --- a/lib/engine/jsonParser.ts +++ b/lib/engine/jsonParser.ts @@ -3,8 +3,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair"; // Strict-then-forgiving JSON parser for LLM output. Tries in order: // 1. Direct JSON.parse on the trimmed text. // 2. Extract from ```json``` fenced block. -// 3. Slice between first { and last } and parse. -// 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair. +// 3. Parse the first complete JSON value prefix (handles duplicated objects). +// 4. Slice between first { and last } and parse. +// 5. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair. // // On final failure, logs the first 800 chars of the raw model output so we // can diagnose the actual syntax error without flooding logs or leaking @@ -40,6 +41,67 @@ function preRepair(s: string): string { return s.replace(/"([^"\n:]+):(\s+)"/g, '"$1":$2"'); } +function firstJsonStart(s: string): number { + const objectStart = s.indexOf("{"); + const arrayStart = s.indexOf("["); + if (objectStart === -1) return arrayStart; + if (arrayStart === -1) return objectStart; + return Math.min(objectStart, arrayStart); +} + +function firstCompleteJsonValue(s: string): string | undefined { + const start = firstJsonStart(s); + if (start === -1) return undefined; + + const stack: string[] = []; + let inString = false; + let escaped = false; + + for (let i = start; i < s.length; i += 1) { + const ch = s[i]!; + + if (inString) { + if (escaped) { + escaped = false; + } else if (ch === "\\") { + escaped = true; + } else if (ch === "\"") { + inString = false; + } + continue; + } + + if (ch === "\"") { + inString = true; + continue; + } + + if (ch === "{") { + stack.push("}"); + continue; + } + + if (ch === "[") { + stack.push("]"); + continue; + } + + if (ch === "}" || ch === "]") { + if (stack.at(-1) !== ch) return undefined; + stack.pop(); + if (stack.length === 0) return s.slice(start, i + 1); + } + } + + return undefined; +} + +function parseFirstCompleteJsonValue(s: string): T | undefined { + const value = firstCompleteJsonValue(s); + if (!value) return undefined; + return JSON.parse(value) as T; +} + export function parseJsonLoose(raw: string): T { const trimmed = raw.trim(); @@ -54,10 +116,22 @@ export function parseJsonLoose(raw: string): T { try { return JSON.parse(fenced[1]) as T; } catch { - // fall through + try { + const parsed = parseFirstCompleteJsonValue(fenced[1]); + if (parsed !== undefined) return parsed; + } catch { + // fall through + } } } + try { + const parsed = parseFirstCompleteJsonValue(trimmed); + if (parsed !== undefined) return parsed; + } catch { + // fall through + } + const first = trimmed.indexOf("{"); const last = trimmed.lastIndexOf("}"); const slice = From 299df0d098e5ced0438d76e41c25b37626803ca8 Mon Sep 17 00:00:00 2001 From: baizhi958216 <1475289190@qq.com> Date: Thu, 11 Jun 2026 16:54:21 +0800 Subject: [PATCH 13/13] feat(web): remove unuse openai native adapter --- components/SettingsModal.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/components/SettingsModal.tsx b/components/SettingsModal.tsx index 9f555e5..2e8e880 100644 --- a/components/SettingsModal.tsx +++ b/components/SettingsModal.tsx @@ -53,7 +53,6 @@ export function readStoredVisionClick(): boolean { const PROVIDER_OPTIONS: { value: ProviderProtocol | ""; label: string }[] = [ { value: "", label: "自动推断(推荐)" }, { value: "openai_compatible", label: "OpenAI Compatible" }, - { value: "openai", label: "OpenAI (Native)" }, { value: "runware", label: "Runware" }, ];