Merge pull request #64 from zonghaoyuan/refactor/settings-modal
feat: add client-side model configuration and server fallback
This commit is contained in:
+29
-34
@@ -1,29 +1,24 @@
|
||||
import { generateText } from "ai";
|
||||
import type { LanguageModelUsage, ModelMessage } from "ai";
|
||||
import OpenAI from "openai";
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import { createLanguageModel, resolveProtocol } from "./model";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
export type ChatMessage = {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
};
|
||||
|
||||
// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
|
||||
// so a single shape covers Anthropic, Gemini, and OpenAI-compatible providers.
|
||||
function summarizeSdkUsage(
|
||||
tag: string,
|
||||
usage: LanguageModelUsage | undefined,
|
||||
usage: OpenAI.Completions.CompletionUsage | undefined,
|
||||
): string {
|
||||
if (!usage) return `[cache] ${tag} no-usage`;
|
||||
const input = usage.inputTokens ?? 0;
|
||||
const output = usage.outputTokens ?? 0;
|
||||
const read = usage.inputTokenDetails?.cacheReadTokens;
|
||||
const write = usage.inputTokenDetails?.cacheWriteTokens;
|
||||
if (typeof read === "number" || typeof write === "number") {
|
||||
const hit = read ?? 0;
|
||||
const create = write ?? 0;
|
||||
const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`;
|
||||
const input = usage.prompt_tokens ?? 0;
|
||||
const output = usage.completion_tokens ?? 0;
|
||||
const details = (usage as { prompt_tokens_details?: { cached_tokens?: number } }).prompt_tokens_details;
|
||||
const cached = details?.cached_tokens;
|
||||
if (typeof cached === "number") {
|
||||
const rate = input > 0 ? ((cached / input) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${cached} input=${input} rate=${rate}% completion=${output}`;
|
||||
}
|
||||
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
|
||||
}
|
||||
@@ -36,28 +31,28 @@ export async function chat(
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const protocol = resolveProtocol(config);
|
||||
const model = createLanguageModel(config, protocol);
|
||||
|
||||
const system = messages.find((m) => m.role === "system")?.content;
|
||||
const convo: ModelMessage[] = messages
|
||||
.filter((m) => m.role !== "system")
|
||||
.map((m) => ({
|
||||
role: m.role as "user" | "assistant",
|
||||
content: m.content,
|
||||
}));
|
||||
|
||||
const { text, usage } = await generateText({
|
||||
model,
|
||||
system,
|
||||
messages: convo,
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
const client = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
|
||||
maxRetries: 0,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
|
||||
console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage));
|
||||
const completion = await client.chat.completions.create({
|
||||
model: config.model,
|
||||
messages: messages.map((m) => ({
|
||||
role: m.role as "system" | "user" | "assistant",
|
||||
content: m.content,
|
||||
})),
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
if (typeof text !== "string" || text.length === 0) {
|
||||
throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`);
|
||||
const text = completion.choices[0]?.message?.content ?? "";
|
||||
console.log(summarizeSdkUsage(opts?.tag ?? "chat", completion.usage ?? undefined));
|
||||
|
||||
if (text.length === 0) {
|
||||
throw new Error(`Chat API returned no content.`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
+107
-48
@@ -1,6 +1,4 @@
|
||||
import { generateImage as generateImageSdk } from "ai";
|
||||
import { createOpenAI } from "@ai-sdk/openai";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import OpenAI, { toFile, type Uploadable } from "openai";
|
||||
import type { Orientation, ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
@@ -48,8 +46,8 @@ export type GenerateImageOptions = {
|
||||
/**
|
||||
* Reference images (UUIDs, URLs, or base64) to condition generation on —
|
||||
* typically character portraits + the prior scene image. Runware caps at 4;
|
||||
* we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these
|
||||
* map to `prompt.images` (the SDK accepts public URLs or data URLs).
|
||||
* we silently truncate beyond that. On the native OpenAI path these are
|
||||
* fetched/decoded and sent to `images.edit`.
|
||||
*/
|
||||
referenceImages?: string[];
|
||||
/** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
|
||||
@@ -58,7 +56,7 @@ export type GenerateImageOptions = {
|
||||
* Output aspect, locked per session. "portrait" → 9:16 vertical for mobile;
|
||||
* default/"landscape" → 16:9 widescreen. Mapped to each provider's nearest
|
||||
* supported size: Runware 1024×1792, OpenAI-compatible REST 1024x1792,
|
||||
* native gpt-image 1024x1536, Gemini aspectRatio 9:16.
|
||||
* native gpt-image 1024x1536.
|
||||
*/
|
||||
orientation?: Orientation;
|
||||
};
|
||||
@@ -66,8 +64,8 @@ export type GenerateImageOptions = {
|
||||
export type GenerateImageResult = {
|
||||
/**
|
||||
* Image the client can render directly. A Runware CDN URL on the Runware
|
||||
* path; a `data:<mime>;base64,...` URI on the AI SDK paths (OpenAI/Gemini
|
||||
* return raw bytes, not a hosted URL).
|
||||
* path; a `data:<mime>;base64,...` URI on the native OpenAI path when GPT
|
||||
* image models return raw bytes instead of a hosted URL.
|
||||
*/
|
||||
imageUrl: string;
|
||||
/**
|
||||
@@ -117,63 +115,124 @@ export async function generateImage(
|
||||
const protocol = resolveImageProtocol(config);
|
||||
switch (protocol) {
|
||||
case "openai":
|
||||
case "google":
|
||||
return generateImageViaAiSdk(config, prompt, options, protocol);
|
||||
return generateImageOpenAi(config, prompt, options);
|
||||
case "runware":
|
||||
return generateImageRunware(config, prompt, options);
|
||||
case "anthropic":
|
||||
throw new Error(
|
||||
'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".',
|
||||
);
|
||||
case "openai_compatible":
|
||||
default:
|
||||
return generateImageOpenAiCompatible(config, prompt, options);
|
||||
}
|
||||
}
|
||||
|
||||
// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK.
|
||||
// Unlike the fetch path, this supports reference-image editing via
|
||||
// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the
|
||||
// client a data URI and synthesize a UUID; continuity references reuse the
|
||||
// data URI rather than a provider UUID.
|
||||
async function generateImageViaAiSdk(
|
||||
// Native OpenAI (gpt-image) via the official OpenAI SDK. Unlike the compatible
|
||||
// fetch path, this supports reference-image editing through `images.edit`.
|
||||
// GPT image models return raw bytes, so we hand the client a data URI and
|
||||
// synthesize a UUID; continuity references reuse the data URI rather than a
|
||||
// provider UUID.
|
||||
async function generateImageOpenAi(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options: GenerateImageOptions | undefined,
|
||||
protocol: "openai" | "google",
|
||||
options?: GenerateImageOptions,
|
||||
): Promise<GenerateImageResult> {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
const imageModel =
|
||||
protocol === "openai"
|
||||
? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model)
|
||||
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image(
|
||||
config.model,
|
||||
);
|
||||
|
||||
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
|
||||
const promptArg =
|
||||
refs.length > 0 ? { text: prompt, images: refs } : prompt;
|
||||
|
||||
// Session-locked aspect. gpt-image takes an explicit `size` (portrait /
|
||||
// landscape options are 1024x1536 / 1536x1024); Gemini takes an `aspectRatio`.
|
||||
const portrait = options?.orientation === "portrait";
|
||||
const { image } = await generateImageSdk({
|
||||
model: imageModel,
|
||||
prompt: promptArg,
|
||||
...(protocol === "openai"
|
||||
? { size: (portrait ? "1024x1536" : "1536x1024") as `${number}x${number}` }
|
||||
: { aspectRatio: (portrait ? "9:16" : "16:9") as `${number}:${number}` }),
|
||||
const client = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: normalizeBaseUrl(config.baseUrl, "openai"),
|
||||
maxRetries: 2,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
|
||||
const portrait = options?.orientation === "portrait";
|
||||
const size = portrait ? "1024x1536" : "1536x1024";
|
||||
|
||||
return {
|
||||
imageUrl: `data:${image.mediaType};base64,${image.base64}`,
|
||||
imageUuid: crypto.randomUUID(),
|
||||
};
|
||||
const response =
|
||||
refs.length > 0
|
||||
? await client.images.edit({
|
||||
model: config.model,
|
||||
prompt,
|
||||
image: await Promise.all(refs.map(referenceImageToUploadable)),
|
||||
n: 1,
|
||||
size,
|
||||
})
|
||||
: await client.images.generate({
|
||||
model: config.model,
|
||||
prompt,
|
||||
n: 1,
|
||||
size,
|
||||
});
|
||||
|
||||
return imageResponseToResult(response);
|
||||
}
|
||||
|
||||
async function referenceImageToUploadable(ref: string): Promise<Uploadable> {
|
||||
if (ref.startsWith("data:")) {
|
||||
const response = await fetch(ref);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to read data URL reference image.`);
|
||||
}
|
||||
const mediaType = response.headers.get("content-type") ?? "image/png";
|
||||
return toFile(response, `reference.${extensionFromMediaType(mediaType)}`, {
|
||||
type: mediaType,
|
||||
});
|
||||
}
|
||||
|
||||
if (/^https?:\/\//i.test(ref)) {
|
||||
const response = await fetch(ref);
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Failed to fetch reference image ${ref}: HTTP ${response.status}`,
|
||||
);
|
||||
}
|
||||
const mediaType = response.headers.get("content-type") ?? "image/png";
|
||||
return toFile(response, filenameFromUrl(ref, mediaType), {
|
||||
type: mediaType,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Native OpenAI image editing requires reference image URLs or data URLs; got "${ref.slice(0, 32)}...".`,
|
||||
);
|
||||
}
|
||||
|
||||
function imageResponseToResult(
|
||||
response: OpenAI.Images.ImagesResponse,
|
||||
): GenerateImageResult {
|
||||
const data = response.data?.[0];
|
||||
const b64 = data?.b64_json;
|
||||
if (b64) {
|
||||
const format = response.output_format ?? "png";
|
||||
return {
|
||||
imageUrl: `data:image/${format};base64,${b64}`,
|
||||
imageUuid: crypto.randomUUID(),
|
||||
};
|
||||
}
|
||||
|
||||
const imageUrl = data?.url;
|
||||
if (imageUrl) {
|
||||
return { imageUrl, imageUuid: crypto.randomUUID() };
|
||||
}
|
||||
|
||||
throw new Error(`No image data in OpenAI response.`);
|
||||
}
|
||||
|
||||
function filenameFromUrl(url: string, mediaType: string): string {
|
||||
try {
|
||||
const name = new URL(url).pathname.split("/").filter(Boolean).at(-1);
|
||||
if (name && /\.[a-z0-9]+$/i.test(name)) return name;
|
||||
} catch {
|
||||
// Fall back to the media type below.
|
||||
}
|
||||
return `reference.${extensionFromMediaType(mediaType)}`;
|
||||
}
|
||||
|
||||
function extensionFromMediaType(mediaType: string): string {
|
||||
if (mediaType.includes("jpeg") || mediaType.includes("jpg")) return "jpg";
|
||||
if (mediaType.includes("webp")) return "webp";
|
||||
return "png";
|
||||
}
|
||||
|
||||
// OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
|
||||
// text-to-image only — no reference images on this path; for editing/anchoring
|
||||
// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above.
|
||||
// set IMAGE_PROVIDER=openai to take the native OpenAI path above.
|
||||
async function generateImageOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
import { createAnthropic } from "@ai-sdk/anthropic";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import { createOpenAI } from "@ai-sdk/openai";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
export function resolveProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
return config.provider ?? "openai_compatible";
|
||||
}
|
||||
|
||||
export function createLanguageModel(config: ProviderConfig, protocol: ProviderProtocol) {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
switch (protocol) {
|
||||
case "anthropic":
|
||||
return createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
|
||||
case "google":
|
||||
return createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
|
||||
case "openai_compatible":
|
||||
case "openai":
|
||||
default:
|
||||
return createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
|
||||
}
|
||||
}
|
||||
@@ -31,8 +31,6 @@ const ENDPOINT_SUFFIX =
|
||||
const DEFAULT_VERSION_SEGMENT: Record<ProviderProtocol, string | null> = {
|
||||
openai_compatible: "v1",
|
||||
openai: "v1",
|
||||
anthropic: "v1",
|
||||
google: "v1beta",
|
||||
// Runware posts to the bare base URL with no version-pathed sub-resource,
|
||||
// so never inject a segment for it.
|
||||
runware: null,
|
||||
|
||||
+27
-30
@@ -1,7 +1,6 @@
|
||||
import { generateText } from "ai";
|
||||
import type { ModelMessage } from "ai";
|
||||
import OpenAI from "openai";
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import { createLanguageModel, resolveProtocol } from "./model";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
const VISION_TIMEOUT_MS = 60_000;
|
||||
|
||||
@@ -22,34 +21,32 @@ export async function analyzeImageDataUrl(
|
||||
imageDataUrl: string,
|
||||
prompt: string,
|
||||
): Promise<string> {
|
||||
const protocol = resolveProtocol(config);
|
||||
const model = createLanguageModel(config, protocol);
|
||||
const client = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: normalizeBaseUrl(config.baseUrl, "openai_compatible"),
|
||||
maxRetries: 0,
|
||||
timeout: VISION_TIMEOUT_MS,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
|
||||
const messages: ModelMessage[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: prompt },
|
||||
{ type: "image", image: imageDataUrl },
|
||||
],
|
||||
},
|
||||
];
|
||||
const completion = await client.chat.completions.create({
|
||||
model: config.model,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: prompt },
|
||||
{ type: "image_url", image_url: { url: imageDataUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature: 0.2,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
const timeoutCtrl = new AbortController();
|
||||
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
|
||||
try {
|
||||
const { text } = await generateText({
|
||||
model,
|
||||
messages,
|
||||
temperature: 0.2,
|
||||
maxRetries: 0,
|
||||
abortSignal: timeoutCtrl.signal,
|
||||
});
|
||||
if (typeof text !== "string" || text.length === 0) {
|
||||
throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`);
|
||||
}
|
||||
return text;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
const text = completion.choices[0]?.message?.content ?? "";
|
||||
if (text.length === 0) {
|
||||
throw new Error(`Vision API returned no content.`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
import type { EngineConfig, ProviderProtocol } from "@infiplot/types";
|
||||
|
||||
// Bring-your-own model keys — stored CLIENT-SIDE ONLY.
|
||||
//
|
||||
// When a user supplies their own text/image/vision API credentials, we persist
|
||||
// them in localStorage and the browser talks to providers directly. The keys
|
||||
// are therefore never sent to our server: no request body, no header, no log.
|
||||
|
||||
const STORAGE_KEY = "infiplot:model";
|
||||
|
||||
const VALID_PROTOCOLS: ProviderProtocol[] = [
|
||||
"openai_compatible",
|
||||
"openai",
|
||||
"runware",
|
||||
];
|
||||
|
||||
export type StoredModelConfig = {
|
||||
textBaseUrl: string;
|
||||
textApiKey: string;
|
||||
textModel: string;
|
||||
textProvider?: ProviderProtocol;
|
||||
imageBaseUrl: string;
|
||||
imageApiKey: string;
|
||||
imageModel: string;
|
||||
imageProvider?: ProviderProtocol;
|
||||
visionBaseUrl: string;
|
||||
visionApiKey: string;
|
||||
visionModel: string;
|
||||
visionProvider?: ProviderProtocol;
|
||||
};
|
||||
|
||||
function isValidProtocol(p: string): p is ProviderProtocol {
|
||||
return (VALID_PROTOCOLS as readonly string[]).includes(p);
|
||||
}
|
||||
|
||||
function readProtocol(raw: unknown): ProviderProtocol | undefined {
|
||||
if (typeof raw === "string" && isValidProtocol(raw)) return raw;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Read + validate the persisted model config. Returns null when running on the
|
||||
* server, when nothing is stored, on parse failure, or when required fields are
|
||||
* missing. */
|
||||
export function readStoredModelConfig(): StoredModelConfig | null {
|
||||
if (typeof window === "undefined") return null;
|
||||
try {
|
||||
const raw = window.localStorage.getItem(STORAGE_KEY);
|
||||
if (!raw) return null;
|
||||
const parsed = JSON.parse(raw) as Partial<StoredModelConfig>;
|
||||
|
||||
const textBaseUrl = typeof parsed.textBaseUrl === "string" ? parsed.textBaseUrl.trim() : "";
|
||||
const textApiKey = typeof parsed.textApiKey === "string" ? parsed.textApiKey.trim() : "";
|
||||
const textModel = typeof parsed.textModel === "string" ? parsed.textModel.trim() : "";
|
||||
const imageBaseUrl = typeof parsed.imageBaseUrl === "string" ? parsed.imageBaseUrl.trim() : "";
|
||||
const imageApiKey = typeof parsed.imageApiKey === "string" ? parsed.imageApiKey.trim() : "";
|
||||
const imageModel = typeof parsed.imageModel === "string" ? parsed.imageModel.trim() : "";
|
||||
const visionBaseUrl = typeof parsed.visionBaseUrl === "string" ? parsed.visionBaseUrl.trim() : "";
|
||||
const visionApiKey = typeof parsed.visionApiKey === "string" ? parsed.visionApiKey.trim() : "";
|
||||
const visionModel = typeof parsed.visionModel === "string" ? parsed.visionModel.trim() : "";
|
||||
|
||||
if (
|
||||
!textBaseUrl ||
|
||||
!textApiKey ||
|
||||
!textModel ||
|
||||
!imageBaseUrl ||
|
||||
!imageApiKey ||
|
||||
!imageModel ||
|
||||
!visionBaseUrl ||
|
||||
!visionApiKey ||
|
||||
!visionModel
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
textBaseUrl,
|
||||
textApiKey,
|
||||
textModel,
|
||||
textProvider: readProtocol(parsed.textProvider),
|
||||
imageBaseUrl,
|
||||
imageApiKey,
|
||||
imageModel,
|
||||
imageProvider: readProtocol(parsed.imageProvider),
|
||||
visionBaseUrl,
|
||||
visionApiKey,
|
||||
visionModel,
|
||||
visionProvider: readProtocol(parsed.visionProvider),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Persist the model config. Trims all string fields so trailing whitespace
|
||||
* from pastes never breaks headers. */
|
||||
export function writeStoredModelConfig(config: StoredModelConfig): void {
|
||||
if (typeof window === "undefined") return;
|
||||
try {
|
||||
const payload: StoredModelConfig = {
|
||||
textBaseUrl: config.textBaseUrl.trim(),
|
||||
textApiKey: config.textApiKey.trim(),
|
||||
textModel: config.textModel.trim(),
|
||||
textProvider: config.textProvider,
|
||||
imageBaseUrl: config.imageBaseUrl.trim(),
|
||||
imageApiKey: config.imageApiKey.trim(),
|
||||
imageModel: config.imageModel.trim(),
|
||||
imageProvider: config.imageProvider,
|
||||
visionBaseUrl: config.visionBaseUrl.trim(),
|
||||
visionApiKey: config.visionApiKey.trim(),
|
||||
visionModel: config.visionModel.trim(),
|
||||
visionProvider: config.visionProvider,
|
||||
};
|
||||
window.localStorage.setItem(STORAGE_KEY, JSON.stringify(payload));
|
||||
} catch {
|
||||
// Storage disabled / quota / private mode — BYO simply stays off.
|
||||
}
|
||||
}
|
||||
|
||||
export function clearStoredModelConfig(): void {
|
||||
if (typeof window === "undefined") return;
|
||||
try {
|
||||
window.localStorage.removeItem(STORAGE_KEY);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a full EngineConfig from stored model config + optional TTS config.
|
||||
* Throws when model config is missing so callers can surface a friendly
|
||||
* "please configure" message. */
|
||||
export function resolveEngineConfig(
|
||||
model: StoredModelConfig | null,
|
||||
tts: import("@infiplot/types").TtsConfig | null,
|
||||
): EngineConfig {
|
||||
if (!model) {
|
||||
throw new Error("模型配置未设置。请返回首页,点击「模型设置」配置 API 参数。");
|
||||
}
|
||||
return {
|
||||
text: {
|
||||
baseUrl: model.textBaseUrl,
|
||||
apiKey: model.textApiKey,
|
||||
model: model.textModel,
|
||||
provider: model.textProvider,
|
||||
},
|
||||
image: {
|
||||
baseUrl: model.imageBaseUrl,
|
||||
apiKey: model.imageApiKey,
|
||||
model: model.imageModel,
|
||||
provider: model.imageProvider,
|
||||
},
|
||||
vision: {
|
||||
baseUrl: model.visionBaseUrl,
|
||||
apiKey: model.visionApiKey,
|
||||
model: model.visionModel,
|
||||
provider: model.visionProvider,
|
||||
},
|
||||
tts: tts ?? undefined,
|
||||
mockImage: false,
|
||||
};
|
||||
}
|
||||
@@ -6,8 +6,6 @@ import type {
|
||||
|
||||
const VALID_PROTOCOLS = [
|
||||
"openai_compatible",
|
||||
"anthropic",
|
||||
"google",
|
||||
"openai",
|
||||
"runware",
|
||||
] as const;
|
||||
|
||||
@@ -3,8 +3,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair";
|
||||
// Strict-then-forgiving JSON parser for LLM output. Tries in order:
|
||||
// 1. Direct JSON.parse on the trimmed text.
|
||||
// 2. Extract from ```json``` fenced block.
|
||||
// 3. Slice between first { and last } and parse.
|
||||
// 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
|
||||
// 3. Parse the first complete JSON value prefix (handles duplicated objects).
|
||||
// 4. Slice between first { and last } and parse.
|
||||
// 5. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
|
||||
//
|
||||
// On final failure, logs the first 800 chars of the raw model output so we
|
||||
// can diagnose the actual syntax error without flooding logs or leaking
|
||||
@@ -40,6 +41,67 @@ function preRepair(s: string): string {
|
||||
return s.replace(/"([^"\n:]+):(\s+)"/g, '"$1":$2"');
|
||||
}
|
||||
|
||||
function firstJsonStart(s: string): number {
|
||||
const objectStart = s.indexOf("{");
|
||||
const arrayStart = s.indexOf("[");
|
||||
if (objectStart === -1) return arrayStart;
|
||||
if (arrayStart === -1) return objectStart;
|
||||
return Math.min(objectStart, arrayStart);
|
||||
}
|
||||
|
||||
function firstCompleteJsonValue(s: string): string | undefined {
|
||||
const start = firstJsonStart(s);
|
||||
if (start === -1) return undefined;
|
||||
|
||||
const stack: string[] = [];
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = start; i < s.length; i += 1) {
|
||||
const ch = s[i]!;
|
||||
|
||||
if (inString) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (ch === "\\") {
|
||||
escaped = true;
|
||||
} else if (ch === "\"") {
|
||||
inString = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "\"") {
|
||||
inString = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "{") {
|
||||
stack.push("}");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "[") {
|
||||
stack.push("]");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "}" || ch === "]") {
|
||||
if (stack.at(-1) !== ch) return undefined;
|
||||
stack.pop();
|
||||
if (stack.length === 0) return s.slice(start, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function parseFirstCompleteJsonValue<T>(s: string): T | undefined {
|
||||
const value = firstCompleteJsonValue(s);
|
||||
if (!value) return undefined;
|
||||
return JSON.parse(value) as T;
|
||||
}
|
||||
|
||||
export function parseJsonLoose<T>(raw: string): T {
|
||||
const trimmed = raw.trim();
|
||||
|
||||
@@ -54,10 +116,22 @@ export function parseJsonLoose<T>(raw: string): T {
|
||||
try {
|
||||
return JSON.parse(fenced[1]) as T;
|
||||
} catch {
|
||||
// fall through
|
||||
try {
|
||||
const parsed = parseFirstCompleteJsonValue<T>(fenced[1]);
|
||||
if (parsed !== undefined) return parsed;
|
||||
} catch {
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = parseFirstCompleteJsonValue<T>(trimmed);
|
||||
if (parsed !== undefined) return parsed;
|
||||
} catch {
|
||||
// fall through
|
||||
}
|
||||
|
||||
const first = trimmed.indexOf("{");
|
||||
const last = trimmed.lastIndexOf("}");
|
||||
const slice =
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
import {
|
||||
startSession as startSessionClient,
|
||||
requestScene as requestSceneClient,
|
||||
visionDecide as visionDecideClient,
|
||||
classifyFreeform as classifyFreeformClient,
|
||||
requestInsertBeat as requestInsertBeatClient,
|
||||
} from "@infiplot/engine";
|
||||
import {
|
||||
readStoredModelConfig,
|
||||
resolveEngineConfig,
|
||||
} from "@/lib/clientModelConfig";
|
||||
import { loadClientTtsConfig } from "@/lib/clientTtsConfig";
|
||||
import type {
|
||||
FreeformClassifyRequest,
|
||||
FreeformClassifyResponse,
|
||||
EngineConfig,
|
||||
InsertBeatRequest,
|
||||
InsertBeatResponse,
|
||||
SceneRequest,
|
||||
SceneResponse,
|
||||
StartRequest,
|
||||
StartResponse,
|
||||
VisionRequest,
|
||||
VisionResponse,
|
||||
} from "@infiplot/types";
|
||||
|
||||
function getClientConfig(): EngineConfig | null {
|
||||
const modelCfg = readStoredModelConfig();
|
||||
const ttsCfg = loadClientTtsConfig();
|
||||
if (!modelCfg) return null;
|
||||
return resolveEngineConfig(modelCfg, ttsCfg);
|
||||
}
|
||||
|
||||
async function postJson<T>(path: string, body: unknown): Promise<T> {
|
||||
const res = await fetch(path, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) {
|
||||
let message = `HTTP ${res.status}`;
|
||||
try {
|
||||
const data = (await res.json()) as { error?: string };
|
||||
if (data.error) message = data.error;
|
||||
} catch {
|
||||
// ignore parse failure, keep HTTP status message
|
||||
}
|
||||
throw new Error(message);
|
||||
}
|
||||
return res.json() as Promise<T>;
|
||||
}
|
||||
|
||||
// ── Unified entry points ───────────────────────────────────────────────
|
||||
// When the browser has a BYO model config in localStorage, these call the
|
||||
// client-side engine directly (talking to providers from the browser).
|
||||
// Otherwise they fall back to the server-side API routes, which read
|
||||
// environment variables — useful for Vercel deploys that already supply keys.
|
||||
|
||||
export async function startSession(req: StartRequest): Promise<StartResponse> {
|
||||
const config = getClientConfig();
|
||||
if (config) {
|
||||
return startSessionClient(config, req);
|
||||
}
|
||||
return postJson<StartResponse>("/api/start", req);
|
||||
}
|
||||
|
||||
export async function requestScene(req: SceneRequest): Promise<SceneResponse> {
|
||||
const config = getClientConfig();
|
||||
if (config) {
|
||||
return requestSceneClient(config, req);
|
||||
}
|
||||
return postJson<SceneResponse>("/api/scene", req);
|
||||
}
|
||||
|
||||
export async function visionDecide(req: VisionRequest): Promise<VisionResponse> {
|
||||
const config = getClientConfig();
|
||||
if (config) {
|
||||
return visionDecideClient(config, req);
|
||||
}
|
||||
return postJson<VisionResponse>("/api/vision", req);
|
||||
}
|
||||
|
||||
export async function classifyFreeform(
|
||||
req: FreeformClassifyRequest,
|
||||
): Promise<FreeformClassifyResponse> {
|
||||
const config = getClientConfig();
|
||||
if (config) {
|
||||
return classifyFreeformClient(config, req);
|
||||
}
|
||||
return postJson<FreeformClassifyResponse>("/api/classify-freeform", req);
|
||||
}
|
||||
|
||||
export async function requestInsertBeat(
|
||||
req: InsertBeatRequest,
|
||||
): Promise<InsertBeatResponse> {
|
||||
const config = getClientConfig();
|
||||
if (config) {
|
||||
return requestInsertBeatClient(config, req);
|
||||
}
|
||||
return postJson<InsertBeatResponse>("/api/insert-beat", req);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
export const STYLE_EXTRACTION_PROMPT = `You are a senior concept artist helping describe an image's visual style so that a text-to-image diffusion model (FLUX) can reproduce the same aesthetic on different subjects.
|
||||
|
||||
Look at the attached image and produce a single English style-prompt string that captures ONLY its visual style — NOT its subject matter. Focus on:
|
||||
- Medium / technique (e.g., watercolor, oil painting, cel-shaded anime, 3D render, pixel art)
|
||||
- Line work and rendering (sharp ink outlines, soft shading, painterly brushstrokes, flat colors)
|
||||
- Color palette and lighting (pastel, saturated, monochrome, warm golden-hour, cool neon, high contrast)
|
||||
- Mood and atmosphere (dreamy, melancholic, cinematic, nostalgic, gritty)
|
||||
- Any recognizable artistic influence (Ghibli, Makoto Shinkai, ukiyo-e, vaporwave, cyberpunk anime, etc.)
|
||||
|
||||
Do NOT describe the characters, objects, or scene contents. Output exactly one JSON object:
|
||||
{"stylePrompt": "<comma-separated English visual-style attributes, ~30-60 words>"}`;
|
||||
@@ -8,6 +8,16 @@ import type { CharacterVoice, TtsConfig } from "@infiplot/types";
|
||||
// top-N candidates so multiple similar characters don't collapse onto the
|
||||
// same voice. Provision is a pure function — no network call needed.
|
||||
|
||||
function arrayBufferToBase64(buffer: ArrayBuffer): string {
|
||||
const bytes = new Uint8Array(buffer);
|
||||
let binary = "";
|
||||
const len = bytes.byteLength;
|
||||
for (let i = 0; i < len; i++) {
|
||||
binary += String.fromCharCode(bytes[i]!);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
const OUTPUT_FORMAT = "mp3";
|
||||
const OUTPUT_MIME = "audio/mpeg";
|
||||
|
||||
@@ -183,8 +193,6 @@ export async function stepfunSynthesize(
|
||||
}
|
||||
|
||||
const ab = await res.arrayBuffer();
|
||||
// Buffer is fine here — TTS routes run on runtime="nodejs". Falls back to
|
||||
// btoa+chunks if we ever target Edge.
|
||||
const audioBase64 = Buffer.from(ab).toString("base64");
|
||||
const audioBase64 = arrayBufferToBase64(ab);
|
||||
return { audioBase64, mimeType: OUTPUT_MIME };
|
||||
}
|
||||
|
||||
+4
-8
@@ -327,19 +327,15 @@ export type VisionClassify = "insert-beat" | "change-scene";
|
||||
* openai_compatible text / vision / image — OpenAI Chat Completions +
|
||||
* `/images/generations` (self-implemented fetch; the
|
||||
* default for text/vision when unset)
|
||||
* anthropic text / vision — native Anthropic Messages (AI SDK)
|
||||
* google text / vision / image — native Gemini (AI SDK); image
|
||||
* uses the Nano Banana family
|
||||
* openai image only — OpenAI gpt-image via AI SDK,
|
||||
* unlocks reference-image editing (for text/vision use
|
||||
* openai_compatible, which already speaks OpenAI's format)
|
||||
* openai image only — OpenAI gpt-image via the
|
||||
* official OpenAI SDK, unlocks reference-image editing
|
||||
* (for text/vision use openai_compatible, which already
|
||||
* speaks OpenAI's format)
|
||||
* runware image only — Runware task-array protocol
|
||||
* (self-implemented; the default for runware.ai URLs)
|
||||
*/
|
||||
export type ProviderProtocol =
|
||||
| "openai_compatible"
|
||||
| "anthropic"
|
||||
| "google"
|
||||
| "openai"
|
||||
| "runware";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user