83fd5717e7
- TEXT/VISION: add native Anthropic & Google Gemini paths via Vercel AI SDK, selectable through TEXT_PROVIDER / VISION_PROVIDER (default openai_compatible) - IMAGE: expand to openai (gpt-image) / google (Nano Banana) via AI SDK alongside the existing Runware task-array and OpenAI-compatible REST paths - normalizeBaseUrl: tolerate URLs with/without /v1 (or /chat/completions); append the per-protocol version segment only for bare hosts - config: readProvider() reads *_PROVIDER; types: ProviderProtocol + provider? - deps: @ai-sdk/anthropic, @ai-sdk/google; docs in .env.example + README Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
201 lines
7.0 KiB
TypeScript
201 lines
7.0 KiB
TypeScript
import { generateText } from "ai";
|
|
import type { LanguageModelUsage, ModelMessage } from "ai";
|
|
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
|
import { fetchWithRetry } from "./fetchWithRetry";
|
|
import { normalizeBaseUrl } from "./normalizeUrl";
|
|
|
|
export type ChatMessage = {
|
|
role: "system" | "user" | "assistant";
|
|
content: string;
|
|
};
|
|
|
|
// Different providers expose prompt-cache stats under different keys. We probe
|
|
// for the three forms we've seen in the wild and fall back to total tokens
|
|
// when no cache field exists.
|
|
//
|
|
// DeepSeek (v3+) usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens
|
|
// OpenAI / o-series usage.prompt_tokens_details.cached_tokens
|
|
// Anthropic / others usage.cache_read_input_tokens / cache_creation_input_tokens
|
|
// No-cache (MiMo,
|
|
// local Ollama, …) only prompt_tokens / completion_tokens — print those
|
|
// so we still get a rough cost baseline.
|
|
type Usage = {
|
|
prompt_tokens?: number;
|
|
completion_tokens?: number;
|
|
prompt_cache_hit_tokens?: number;
|
|
prompt_cache_miss_tokens?: number;
|
|
prompt_tokens_details?: { cached_tokens?: number };
|
|
cache_read_input_tokens?: number;
|
|
cache_creation_input_tokens?: number;
|
|
};
|
|
|
|
function summarizeUsage(tag: string, usage: Usage | undefined): string {
|
|
if (!usage) return `[cache] ${tag} no-usage`;
|
|
const prompt = usage.prompt_tokens ?? 0;
|
|
const completion = usage.completion_tokens ?? 0;
|
|
// DeepSeek-style
|
|
if (typeof usage.prompt_cache_hit_tokens === "number") {
|
|
const hit = usage.prompt_cache_hit_tokens;
|
|
const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit);
|
|
const denom = hit + miss;
|
|
const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
|
|
return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`;
|
|
}
|
|
// OpenAI-style
|
|
const oaiCached = usage.prompt_tokens_details?.cached_tokens;
|
|
if (typeof oaiCached === "number") {
|
|
const miss = Math.max(0, prompt - oaiCached);
|
|
const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a";
|
|
return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`;
|
|
}
|
|
// Anthropic-style
|
|
if (typeof usage.cache_read_input_tokens === "number") {
|
|
const hit = usage.cache_read_input_tokens;
|
|
const create = usage.cache_creation_input_tokens ?? 0;
|
|
const denom = hit + create + prompt;
|
|
const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
|
|
return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`;
|
|
}
|
|
// No cache field at all
|
|
return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
|
|
}
|
|
|
|
// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
|
|
// so a single shape covers Anthropic + Gemini (no per-provider probing).
|
|
function summarizeSdkUsage(
|
|
tag: string,
|
|
usage: LanguageModelUsage | undefined,
|
|
): string {
|
|
if (!usage) return `[cache] ${tag} no-usage`;
|
|
const input = usage.inputTokens ?? 0;
|
|
const output = usage.outputTokens ?? 0;
|
|
const read = usage.inputTokenDetails?.cacheReadTokens;
|
|
const write = usage.inputTokenDetails?.cacheWriteTokens;
|
|
if (typeof read === "number" || typeof write === "number") {
|
|
const hit = read ?? 0;
|
|
const create = write ?? 0;
|
|
const rate = input > 0 ? ((hit / input) * 100).toFixed(1) : "n/a";
|
|
return `[cache] ${tag} hit=${hit} create=${create} input=${input} rate=${rate}% completion=${output}`;
|
|
}
|
|
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
|
|
}
|
|
|
|
// text/vision default to the OpenAI-compatible wire protocol when unset.
|
|
function resolveTextProtocol(config: ProviderConfig): ProviderProtocol {
|
|
return config.provider ?? "openai_compatible";
|
|
}
|
|
|
|
export async function chat(
|
|
config: ProviderConfig,
|
|
messages: ChatMessage[],
|
|
opts?: {
|
|
temperature?: number;
|
|
responseFormat?: "json_object" | "text";
|
|
tag?: string;
|
|
},
|
|
): Promise<string> {
|
|
const protocol = resolveTextProtocol(config);
|
|
if (protocol === "anthropic" || protocol === "google") {
|
|
return chatViaAiSdk(config, messages, opts, protocol);
|
|
}
|
|
return chatOpenAiCompatible(config, messages, opts);
|
|
}
|
|
|
|
// Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent
|
|
// (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream,
|
|
// matching how it already tolerates loose JSON from every provider.
|
|
async function chatViaAiSdk(
|
|
config: ProviderConfig,
|
|
messages: ChatMessage[],
|
|
opts: { temperature?: number; tag?: string } | undefined,
|
|
protocol: "anthropic" | "google",
|
|
): Promise<string> {
|
|
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
|
const model =
|
|
protocol === "anthropic"
|
|
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
|
|
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
|
|
config.model,
|
|
);
|
|
|
|
const system = messages.find((m) => m.role === "system")?.content;
|
|
const convo: ModelMessage[] = messages
|
|
.filter((m) => m.role !== "system")
|
|
.map((m) => ({
|
|
role: m.role as "user" | "assistant",
|
|
content: m.content,
|
|
}));
|
|
|
|
const { text, usage } = await generateText({
|
|
model,
|
|
system,
|
|
messages: convo,
|
|
temperature: opts?.temperature ?? 0.9,
|
|
});
|
|
|
|
console.log(summarizeSdkUsage(opts?.tag ?? "chat", usage));
|
|
|
|
if (typeof text !== "string" || text.length === 0) {
|
|
throw new Error(`Chat API (AI SDK ${protocol}) returned no content.`);
|
|
}
|
|
return text;
|
|
}
|
|
|
|
async function chatOpenAiCompatible(
|
|
config: ProviderConfig,
|
|
messages: ChatMessage[],
|
|
opts?: {
|
|
temperature?: number;
|
|
responseFormat?: "json_object" | "text";
|
|
tag?: string;
|
|
},
|
|
): Promise<string> {
|
|
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
|
|
const body: Record<string, unknown> = {
|
|
model: config.model,
|
|
messages,
|
|
temperature: opts?.temperature ?? 0.9,
|
|
};
|
|
if (opts?.responseFormat === "json_object") {
|
|
body.response_format = { type: "json_object" };
|
|
}
|
|
|
|
const res = await fetchWithRetry(url, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${config.apiKey}`,
|
|
},
|
|
body: JSON.stringify(body),
|
|
});
|
|
|
|
const text = await res.text();
|
|
if (!res.ok) {
|
|
throw new Error(`Chat API error ${res.status}: ${text}`);
|
|
}
|
|
|
|
let json: {
|
|
choices: { message: { content: string } }[];
|
|
usage?: Usage;
|
|
};
|
|
try {
|
|
json = JSON.parse(text);
|
|
} catch {
|
|
throw new Error(`Chat API returned invalid JSON: ${text.slice(0, 500)}`);
|
|
}
|
|
|
|
// Guard against empty choices array or missing message/content fields
|
|
const content = json.choices?.[0]?.message?.content;
|
|
if (typeof content !== "string") {
|
|
throw new Error(
|
|
`Chat API returned no content. Response: ${text.slice(0, 500)}`
|
|
);
|
|
}
|
|
|
|
console.log(summarizeUsage(opts?.tag ?? "chat", json.usage));
|
|
|
|
return content;
|
|
}
|