feat(ai-client): multi-provider compat — native Anthropic/Google + URL tolerance
- TEXT/VISION: add native Anthropic & Google Gemini paths via Vercel AI SDK, selectable through TEXT_PROVIDER / VISION_PROVIDER (default openai_compatible) - IMAGE: expand to openai (gpt-image) / google (Nano Banana) via AI SDK alongside the existing Runware task-array and OpenAI-compatible REST paths - normalizeBaseUrl: tolerate URLs with/without /v1 (or /chat/completions); append the per-protocol version segment only for bare hosts - config: readProvider() reads *_PROVIDER; types: ProviderProtocol + provider? - deps: @ai-sdk/anthropic, @ai-sdk/google; docs in .env.example + README Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+88
-2
@@ -1,5 +1,10 @@
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import { generateText } from "ai";
|
||||
import type { LanguageModelUsage, ModelMessage } from "ai";
|
||||
import { createAnthropic } from "@ai-sdk/anthropic";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
export type ChatMessage = {
|
||||
role: "system" | "user" | "assistant";
|
||||
@@ -57,6 +62,31 @@ function summarizeUsage(tag: string, usage: Usage | undefined): string {
|
||||
return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
|
||||
}
|
||||
|
||||
// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
|
||||
// so a single shape covers Anthropic + Gemini (no per-provider probing).
|
||||
function summarizeSdkUsage(
|
||||
tag: string,
|
||||
usage: LanguageModelUsage | undefined,
|
||||
): string {
|
||||
if (!usage) return `[cache] ${tag} no-usage`;
|
||||
const input = usage.inputTokens ?? 0;
|
||||
const output = usage.outputTokens ?? 0;
|
||||
const read = usage.inputTokenDetails?.cacheReadTokens;
|
||||
const write = usage.inputTokenDetails?.cacheWriteTokens;
|
||||
if (typeof read === "number" || typeof write === "number") {
|
||||
const hit = read ?? 0;
|
||||
const create = write ?? 0;
|
||||
const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`;
|
||||
}
|
||||
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
|
||||
}
|
||||
|
||||
// text/vision default to the OpenAI-compatible wire protocol when unset.
|
||||
function resolveTextProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
return config.provider ?? "openai_compatible";
|
||||
}
|
||||
|
||||
export async function chat(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
@@ -66,7 +96,63 @@ export async function chat(
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||
const protocol = resolveTextProtocol(config);
|
||||
if (protocol === "anthropic" || protocol === "google") {
|
||||
return chatViaAiSdk(config, messages, opts, protocol);
|
||||
}
|
||||
return chatOpenAiCompatible(config, messages, opts);
|
||||
}
|
||||
|
||||
// Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent
|
||||
// (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream,
|
||||
// matching how it already tolerates loose JSON from every provider.
|
||||
async function chatViaAiSdk(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts: { temperature?: number; tag?: string } | undefined,
|
||||
protocol: "anthropic" | "google",
|
||||
): Promise<string> {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
const model =
|
||||
protocol === "anthropic"
|
||||
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
|
||||
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
|
||||
config.model,
|
||||
);
|
||||
|
||||
const system = messages.find((m) => m.role === "system")?.content;
|
||||
const convo: ModelMessage[] = messages
|
||||
.filter((m) => m.role !== "system")
|
||||
.map((m) => ({
|
||||
role: m.role as "user" | "assistant",
|
||||
content: m.content,
|
||||
}));
|
||||
|
||||
const { text, usage } = await generateText({
|
||||
model,
|
||||
system,
|
||||
messages: convo,
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
});
|
||||
|
||||
console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage));
|
||||
|
||||
if (typeof text !== "string" || text.length === 0) {
|
||||
throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
async function chatOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts?: {
|
||||
temperature?: number;
|
||||
responseFormat?: "json_object" | "text";
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
|
||||
const body: Record<string, unknown> = {
|
||||
model: config.model,
|
||||
messages,
|
||||
|
||||
+142
-47
@@ -1,5 +1,9 @@
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import { generateImage as generateImageSdk } from "ai";
|
||||
import { createOpenAI } from "@ai-sdk/openai";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
// Runware uses its own task-array protocol (not OpenAI-compatible).
|
||||
// POST <baseUrl> with [{ taskType: "imageInference", ... }]; errors come
|
||||
@@ -38,30 +42,52 @@ export type GenerateImageOptions = {
|
||||
* Reference image (UUID, public URL, or base64) for img2img. When set,
|
||||
* FLUX preserves the seed image's composition and applies `strength` to
|
||||
* deviate. NOTE: FLUX.2 [klein] 9B KV does NOT support seedImage — use
|
||||
* `referenceImages` for visual continuity instead.
|
||||
* `referenceImages` for visual continuity instead. Runware-only.
|
||||
*/
|
||||
seedImage?: string;
|
||||
/**
|
||||
* Reference images (UUIDs, URLs, or base64) to condition generation on —
|
||||
* typically character portraits + the prior scene image. Runware caps at 4;
|
||||
* we silently truncate beyond that.
|
||||
* we silently truncate beyond that. On the OpenAI/Gemini AI SDK paths these
|
||||
* map to `prompt.images` (the SDK accepts public URLs or data URLs).
|
||||
*/
|
||||
referenceImages?: string[];
|
||||
/** 0–1, FLUX needs ≥ 0.8 to actually have an effect. */
|
||||
/** 0–1, FLUX needs ≥ 0.8 to actually have an effect. Runware-only. */
|
||||
strength?: number;
|
||||
};
|
||||
|
||||
export type GenerateImageResult = {
|
||||
/** Public CDN URL of the generated image (Runware-hosted). */
|
||||
/**
|
||||
* Image the client can render directly. A Runware CDN URL on the Runware
|
||||
* path; a `data:<mime>;base64,...` URI on the AI SDK paths (OpenAI/Gemini
|
||||
* return raw bytes, not a hosted URL).
|
||||
*/
|
||||
imageUrl: string;
|
||||
/** Stable UUID for cheap re-reference in later `referenceImages`. */
|
||||
/**
|
||||
* Stable handle for cheap re-reference in later `referenceImages`. A real
|
||||
* Runware UUID on the Runware path; a synthetic UUID on other paths (those
|
||||
* re-reference via the URL/data-URL form instead).
|
||||
*/
|
||||
imageUuid: string;
|
||||
};
|
||||
|
||||
// Image roles support more protocols than text/vision. When IMAGE_PROVIDER is
|
||||
// unset we keep the historical URL-based inference so existing deployments
|
||||
// (Runware, or an OpenAI-compatible gateway) behave exactly as before.
|
||||
function inferImageProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
const isOpenAiCompat =
|
||||
!config.baseUrl.includes("runware.ai") || config.model === "image-2-vip";
|
||||
return isOpenAiCompat ? "openai_compatible" : "runware";
|
||||
}
|
||||
|
||||
function resolveImageProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
return config.provider ?? inferImageProtocol(config);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// generateImage — text-to-image (default) or referenceImages-conditioned.
|
||||
// Returns both the public URL (for client display + future references)
|
||||
// and the UUID (cheapest reference form for subsequent calls).
|
||||
// Returns both a renderable image URL and a re-reference handle (see
|
||||
// GenerateImageResult). Dispatches on the resolved wire protocol.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function generateImage(
|
||||
@@ -69,51 +95,120 @@ export async function generateImage(
|
||||
prompt: string,
|
||||
options?: GenerateImageOptions,
|
||||
): Promise<GenerateImageResult> {
|
||||
const url = config.baseUrl.replace(/\/$/, "");
|
||||
const protocol = resolveImageProtocol(config);
|
||||
switch (protocol) {
|
||||
case "openai":
|
||||
case "google":
|
||||
return generateImageViaAiSdk(config, prompt, options, protocol);
|
||||
case "runware":
|
||||
return generateImageRunware(config, prompt, options);
|
||||
case "anthropic":
|
||||
throw new Error(
|
||||
'IMAGE_PROVIDER "anthropic" does not generate images. Use "openai", "google", "runware", or "openai_compatible".',
|
||||
);
|
||||
case "openai_compatible":
|
||||
default:
|
||||
return generateImageOpenAiCompatible(config, prompt);
|
||||
}
|
||||
}
|
||||
|
||||
// 1. OpenAI-compatible route (GPTGod, DALL-E, etc.)
|
||||
const isOpenAi = !url.includes("runware.ai") || config.model === "image-2-vip";
|
||||
if (isOpenAi) {
|
||||
const endpoint = url.endsWith("/images/generations") ? url : `${url}/images/generations`;
|
||||
console.log(`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`);
|
||||
|
||||
const res = await fetchWithRetry(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
prompt: prompt,
|
||||
n: 1,
|
||||
size: "1792x1024", // Use horizontal size (16:9)
|
||||
}),
|
||||
});
|
||||
// Native OpenAI (gpt-image) / Gemini (Nano Banana) via the Vercel AI SDK.
|
||||
// Unlike the fetch path, this supports reference-image editing via
|
||||
// `prompt.images`. The SDK returns raw bytes (no hosted URL), so we hand the
|
||||
// client a data URI and synthesize a UUID; continuity references reuse the
|
||||
// data URI rather than a provider UUID.
|
||||
async function generateImageViaAiSdk(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options: GenerateImageOptions | undefined,
|
||||
protocol: "openai" | "google",
|
||||
): Promise<GenerateImageResult> {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
const imageModel =
|
||||
protocol === "openai"
|
||||
? createOpenAI({ apiKey: config.apiKey, baseURL }).image(config.model)
|
||||
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL }).image(
|
||||
config.model,
|
||||
);
|
||||
|
||||
const text = await res.text();
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
|
||||
}
|
||||
const refs = (options?.referenceImages ?? []).slice(0, MAX_REFERENCE_IMAGES);
|
||||
const promptArg =
|
||||
refs.length > 0 ? { text: prompt, images: refs } : prompt;
|
||||
|
||||
if (json.error) {
|
||||
throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
|
||||
}
|
||||
// OpenAI's image models take an explicit `size`; gpt-image's widest landscape
|
||||
// option is 1536x1024. Gemini takes an `aspectRatio` instead.
|
||||
const { image } = await generateImageSdk({
|
||||
model: imageModel,
|
||||
prompt: promptArg,
|
||||
...(protocol === "openai"
|
||||
? { size: "1536x1024" as `${number}x${number}` }
|
||||
: { aspectRatio: "16:9" as `${number}:${number}` }),
|
||||
});
|
||||
|
||||
const data = json.data?.[0];
|
||||
const imageUrl = data?.url;
|
||||
if (!imageUrl) {
|
||||
throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
|
||||
}
|
||||
// Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
|
||||
const imageUuid = crypto.randomUUID();
|
||||
return { imageUrl, imageUuid };
|
||||
return {
|
||||
imageUrl: `data:${image.mediaType};base64,${image.base64}`,
|
||||
imageUuid: crypto.randomUUID(),
|
||||
};
|
||||
}
|
||||
|
||||
// OpenAI-compatible REST route (GPTGod, DALL-E proxies, etc.). Basic
|
||||
// text-to-image only — no reference images on this path; for editing/anchoring
|
||||
// set IMAGE_PROVIDER=openai (or google) to take the AI SDK path above.
|
||||
async function generateImageOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
): Promise<GenerateImageResult> {
|
||||
const base = normalizeBaseUrl(config.baseUrl, "openai_compatible");
|
||||
const endpoint = `${base}/images/generations`;
|
||||
console.log(
|
||||
`[ai-client] Calling OpenAI-compatible image generations at: ${endpoint} with model: ${config.model}`,
|
||||
);
|
||||
|
||||
const res = await fetchWithRetry(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
prompt: prompt,
|
||||
n: 1,
|
||||
size: "1792x1024", // Use horizontal size (16:9)
|
||||
}),
|
||||
});
|
||||
|
||||
const text = await res.text();
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(`OpenAI Image API error ${res.status}: ${text.slice(0, 500)}`);
|
||||
}
|
||||
|
||||
// 2. Runware task-array route
|
||||
if (json.error) {
|
||||
throw new Error(`OpenAI Image API error: ${json.error.message || JSON.stringify(json.error)}`);
|
||||
}
|
||||
|
||||
const data = json.data?.[0];
|
||||
const imageUrl = data?.url;
|
||||
if (!imageUrl) {
|
||||
throw new Error(`No image URL in OpenAI response: ${text.slice(0, 300)}`);
|
||||
}
|
||||
// Generate a mock UUID since OpenAI compatible endpoint doesn't have UUIDs
|
||||
const imageUuid = crypto.randomUUID();
|
||||
return { imageUrl, imageUuid };
|
||||
}
|
||||
|
||||
// Runware task-array route — self-implemented to preserve the UUID/URL closed
|
||||
// loop (the official @runware/ai-sdk-provider drops both).
|
||||
async function generateImageRunware(
|
||||
config: ProviderConfig,
|
||||
prompt: string,
|
||||
options?: GenerateImageOptions,
|
||||
): Promise<GenerateImageResult> {
|
||||
const url = normalizeBaseUrl(config.baseUrl, "runware");
|
||||
|
||||
const task: Record<string, unknown> = {
|
||||
taskType: "imageInference",
|
||||
taskUUID: crypto.randomUUID(),
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
import type { ProviderProtocol } from "@infiplot/types";
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Base-URL normalization — tolerate whatever shape the user pastes.
|
||||
//
|
||||
// The README never specified whether the base URL needs a `/v1` suffix,
|
||||
// so users provide all of these for the same endpoint:
|
||||
// https://api.deepseek.com
|
||||
// https://api.deepseek.com/v1
|
||||
// https://api.deepseek.com/v1/chat/completions
|
||||
// We normalize to a canonical base the adapter can safely append its own
|
||||
// endpoint path to. This also fixes the pre-existing double-suffix bug
|
||||
// where a pasted `.../chat/completions` became `.../chat/completions/chat/completions`.
|
||||
//
|
||||
// Strategy (bare-host-only version append):
|
||||
// 1. strip trailing slashes
|
||||
// 2. strip a trailing known endpoint suffix (chat/completions, messages, …)
|
||||
// 3. only when the URL the user gave is a BARE host (scheme://host[:port]
|
||||
// with no path) do we append the protocol's default version segment.
|
||||
// Any path the user wrote (/v1, /beta, /zen/go, /chat/completions, …) is
|
||||
// treated as an explicit location and left intact — so we never turn
|
||||
// `/beta` into `/beta/v1`, and a version-less `/chat/completions`
|
||||
// endpoint is preserved.
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Endpoint paths an adapter appends itself — stripped so we keep only the base.
|
||||
const ENDPOINT_SUFFIX =
|
||||
/\/(chat\/completions|completions|responses|messages|images\/(generations|edits))\/?$/i;
|
||||
|
||||
// Default version segment to append per protocol for a bare host.
|
||||
const DEFAULT_VERSION_SEGMENT: Record<ProviderProtocol, string | null> = {
|
||||
openai_compatible: "v1",
|
||||
openai: "v1",
|
||||
anthropic: "v1",
|
||||
google: "v1beta",
|
||||
// Runware posts to the bare base URL with no version-pathed sub-resource,
|
||||
// so never inject a segment for it.
|
||||
runware: null,
|
||||
};
|
||||
|
||||
// True when `raw` is just scheme://host[:port] with no meaningful path — the
|
||||
// only shape where we infer a default version segment. A lone "/" counts as
|
||||
// bare. Falls back to a scheme-anchored regex if the URL can't be parsed.
|
||||
function isBareHost(raw: string): boolean {
|
||||
try {
|
||||
const { pathname } = new URL(raw);
|
||||
return pathname === "" || pathname === "/";
|
||||
} catch {
|
||||
return !/^[a-z][a-z0-9+.-]*:\/\/[^/]+\/.+/i.test(raw);
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizeBaseUrl(
|
||||
raw: string,
|
||||
protocol: ProviderProtocol,
|
||||
): string {
|
||||
const trimmed = raw.trim();
|
||||
let u = trimmed.replace(/\/+$/, "");
|
||||
u = u.replace(ENDPOINT_SUFFIX, "").replace(/\/+$/, "");
|
||||
|
||||
const seg = DEFAULT_VERSION_SEGMENT[protocol];
|
||||
if (seg && isBareHost(trimmed)) {
|
||||
u = `${u}/${seg}`;
|
||||
}
|
||||
return u;
|
||||
}
|
||||
+73
-3
@@ -1,5 +1,12 @@
|
||||
import type { ProviderConfig } from "@infiplot/types";
|
||||
import { generateText } from "ai";
|
||||
import type { ModelMessage } from "ai";
|
||||
import { createAnthropic } from "@ai-sdk/anthropic";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
const VISION_TIMEOUT_MS = 60_000;
|
||||
|
||||
export async function interpretClick(
|
||||
config: ProviderConfig,
|
||||
@@ -16,6 +23,11 @@ export async function interpretClick(
|
||||
);
|
||||
}
|
||||
|
||||
// text/vision default to the OpenAI-compatible wire protocol when unset.
|
||||
function resolveVisionProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
return config.provider ?? "openai_compatible";
|
||||
}
|
||||
|
||||
/**
|
||||
* General single-image vision call. Accepts a complete data URL (preserves
|
||||
* the source mime type, e.g. webp/jpeg) and lets the caller opt out of
|
||||
@@ -27,7 +39,65 @@ export async function analyzeImageDataUrl(
|
||||
prompt: string,
|
||||
opts: { responseFormat?: "json_object" | "text" } = {},
|
||||
): Promise<string> {
|
||||
const url = `${config.baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||
const protocol = resolveVisionProtocol(config);
|
||||
if (protocol === "anthropic" || protocol === "google") {
|
||||
return analyzeViaAiSdk(config, imageDataUrl, prompt, protocol);
|
||||
}
|
||||
return analyzeOpenAiCompatible(config, imageDataUrl, prompt, opts);
|
||||
}
|
||||
|
||||
// Native Anthropic / Gemini multimodal via the AI SDK. The image part takes
|
||||
// the full data URL directly; the SDK decodes it. response_format is not sent
|
||||
// (no JSON mode on Anthropic) — the engine's parseJsonLoose handles output.
|
||||
async function analyzeViaAiSdk(
|
||||
config: ProviderConfig,
|
||||
imageDataUrl: string,
|
||||
prompt: string,
|
||||
protocol: "anthropic" | "google",
|
||||
): Promise<string> {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
const model =
|
||||
protocol === "anthropic"
|
||||
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
|
||||
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
|
||||
config.model,
|
||||
);
|
||||
|
||||
const messages: ModelMessage[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: prompt },
|
||||
{ type: "image", image: imageDataUrl },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const timeoutCtrl = new AbortController();
|
||||
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
|
||||
try {
|
||||
const { text } = await generateText({
|
||||
model,
|
||||
messages,
|
||||
temperature: 0.2,
|
||||
abortSignal: timeoutCtrl.signal,
|
||||
});
|
||||
if (typeof text !== "string" || text.length === 0) {
|
||||
throw new Error(`Vision API (AI SDK ${protocol}) returned no content.`);
|
||||
}
|
||||
return text;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
async function analyzeOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
imageDataUrl: string,
|
||||
prompt: string,
|
||||
opts: { responseFormat?: "json_object" | "text" } = {},
|
||||
): Promise<string> {
|
||||
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: config.model,
|
||||
@@ -47,7 +117,7 @@ export async function analyzeImageDataUrl(
|
||||
}
|
||||
|
||||
const timeoutCtrl = new AbortController();
|
||||
const timeoutId = setTimeout(() => timeoutCtrl.abort(), 60_000);
|
||||
const timeoutId = setTimeout(() => timeoutCtrl.abort(), VISION_TIMEOUT_MS);
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
|
||||
+31
-1
@@ -1,4 +1,16 @@
|
||||
import type { EngineConfig, TtsConfig } from "@infiplot/types";
|
||||
import type {
|
||||
EngineConfig,
|
||||
ProviderProtocol,
|
||||
TtsConfig,
|
||||
} from "@infiplot/types";
|
||||
|
||||
const VALID_PROTOCOLS = [
|
||||
"openai_compatible",
|
||||
"anthropic",
|
||||
"google",
|
||||
"openai",
|
||||
"runware",
|
||||
] as const;
|
||||
|
||||
function readVar(name: string): string {
|
||||
const v = process.env[name];
|
||||
@@ -11,6 +23,21 @@ function readOptionalVar(name: string): string | undefined {
|
||||
return v && v.length > 0 ? v : undefined;
|
||||
}
|
||||
|
||||
// Optional *_PROVIDER selector. Unset → undefined, and each ai-client adapter
|
||||
// applies its own default (text/vision → openai_compatible; image → inferred
|
||||
// from the base URL). Validated eagerly so a typo fails fast at boot rather
|
||||
// than mid-request.
|
||||
function readProvider(name: string): ProviderProtocol | undefined {
|
||||
const v = readOptionalVar(name)?.trim().toLowerCase();
|
||||
if (!v) return undefined;
|
||||
if ((VALID_PROTOCOLS as readonly string[]).includes(v)) {
|
||||
return v as ProviderProtocol;
|
||||
}
|
||||
throw new Error(
|
||||
`Invalid ${name}: "${v}". Must be one of: ${VALID_PROTOCOLS.join(", ")}`,
|
||||
);
|
||||
}
|
||||
|
||||
function loadTtsConfig(): TtsConfig | undefined {
|
||||
const baseUrl = readOptionalVar("TTS_BASE_URL");
|
||||
const apiKey = readOptionalVar("TTS_API_KEY");
|
||||
@@ -28,16 +55,19 @@ export function loadEngineConfig(headers?: Headers): EngineConfig {
|
||||
baseUrl: readVar("TEXT_BASE_URL"),
|
||||
apiKey: readVar("TEXT_API_KEY"),
|
||||
model: readVar("TEXT_MODEL"),
|
||||
provider: readProvider("TEXT_PROVIDER"),
|
||||
},
|
||||
image: {
|
||||
baseUrl: readVar("IMAGE_BASE_URL"),
|
||||
apiKey: readVar("IMAGE_API_KEY"),
|
||||
model: readVar("IMAGE_MODEL"),
|
||||
provider: readProvider("IMAGE_PROVIDER"),
|
||||
},
|
||||
vision: {
|
||||
baseUrl: readVar("VISION_BASE_URL"),
|
||||
apiKey: readVar("VISION_API_KEY"),
|
||||
model: readVar("VISION_MODEL"),
|
||||
provider: readProvider("VISION_PROVIDER"),
|
||||
},
|
||||
tts: loadTtsConfig(),
|
||||
mockImage: readOptionalVar("MOCK_IMAGE") === "true",
|
||||
|
||||
@@ -268,10 +268,41 @@ export type VisionClassify = "insert-beat" | "change-scene";
|
||||
// Provider config
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Wire protocol used to talk to a model provider. Which values are valid
|
||||
* depends on the model role — each ai-client adapter accepts its own subset
|
||||
* and falls back to a sensible default for anything else:
|
||||
*
|
||||
* openai_compatible text / vision / image — OpenAI Chat Completions +
|
||||
* `/images/generations` (self-implemented fetch; the
|
||||
* default for text/vision when unset)
|
||||
* anthropic text / vision — native Anthropic Messages (AI SDK)
|
||||
* google text / vision / image — native Gemini (AI SDK); image
|
||||
* uses the Nano Banana family
|
||||
* openai image only — OpenAI gpt-image via AI SDK,
|
||||
* unlocks reference-image editing (for text/vision use
|
||||
* openai_compatible, which already speaks OpenAI's format)
|
||||
* runware image only — Runware task-array protocol
|
||||
* (self-implemented; the default for runware.ai URLs)
|
||||
*/
|
||||
export type ProviderProtocol =
|
||||
| "openai_compatible"
|
||||
| "anthropic"
|
||||
| "google"
|
||||
| "openai"
|
||||
| "runware";
|
||||
|
||||
export type ProviderConfig = {
|
||||
baseUrl: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
/**
|
||||
* Wire protocol. When unset, callers apply a role-specific default:
|
||||
* text/vision → "openai_compatible"; image → inferred from baseUrl
|
||||
* (runware.ai → "runware", otherwise "openai_compatible") so existing
|
||||
* deployments keep working without setting *_PROVIDER.
|
||||
*/
|
||||
provider?: ProviderProtocol;
|
||||
};
|
||||
|
||||
export type TtsConfig = {
|
||||
|
||||
Reference in New Issue
Block a user