refactor(ai-client): unify OpenAI-compatible path to AI SDK generateText
Eliminate the dual code path (raw fetch vs AI SDK) for text and vision. All providers now go through createLanguageModel() + generateText(), removing chatOpenAiCompatible/analyzeOpenAiCompatible, the manual Usage type, summarizeUsage, and responseFormat plumbing from 8 call sites. Key fix: @ai-sdk/openai v3 defaults to the Responses API (/responses); DeepSeek only supports Chat Completions, so we use .chat() explicitly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+17
-133
@@ -2,8 +2,8 @@ import { generateText } from "ai";
|
||||
import type { LanguageModelUsage, ModelMessage } from "ai";
|
||||
import { createAnthropic } from "@ai-sdk/anthropic";
|
||||
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
||||
import { createOpenAI } from "@ai-sdk/openai";
|
||||
import type { ProviderConfig, ProviderProtocol } from "@infiplot/types";
|
||||
import { fetchWithRetry } from "./fetchWithRetry";
|
||||
import { normalizeBaseUrl } from "./normalizeUrl";
|
||||
|
||||
export type ChatMessage = {
|
||||
@@ -11,59 +11,8 @@ export type ChatMessage = {
|
||||
content: string;
|
||||
};
|
||||
|
||||
// Different providers expose prompt-cache stats under different keys. We probe
|
||||
// for the three forms we've seen in the wild and fall back to total tokens
|
||||
// when no cache field exists.
|
||||
//
|
||||
// DeepSeek (v3+) usage.prompt_cache_hit_tokens / prompt_cache_miss_tokens
|
||||
// OpenAI / o-series usage.prompt_tokens_details.cached_tokens
|
||||
// Anthropic / others usage.cache_read_input_tokens / cache_creation_input_tokens
|
||||
// No-cache (MiMo,
|
||||
// local Ollama, …) only prompt_tokens / completion_tokens — print those
|
||||
// so we still get a rough cost baseline.
|
||||
type Usage = {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
prompt_cache_hit_tokens?: number;
|
||||
prompt_cache_miss_tokens?: number;
|
||||
prompt_tokens_details?: { cached_tokens?: number };
|
||||
cache_read_input_tokens?: number;
|
||||
cache_creation_input_tokens?: number;
|
||||
};
|
||||
|
||||
function summarizeUsage(tag: string, usage: Usage | undefined): string {
|
||||
if (!usage) return `[cache] ${tag} no-usage`;
|
||||
const prompt = usage.prompt_tokens ?? 0;
|
||||
const completion = usage.completion_tokens ?? 0;
|
||||
// DeepSeek-style
|
||||
if (typeof usage.prompt_cache_hit_tokens === "number") {
|
||||
const hit = usage.prompt_cache_hit_tokens;
|
||||
const miss = usage.prompt_cache_miss_tokens ?? Math.max(0, prompt - hit);
|
||||
const denom = hit + miss;
|
||||
const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${hit} miss=${miss} rate=${rate}% completion=${completion}`;
|
||||
}
|
||||
// OpenAI-style
|
||||
const oaiCached = usage.prompt_tokens_details?.cached_tokens;
|
||||
if (typeof oaiCached === "number") {
|
||||
const miss = Math.max(0, prompt - oaiCached);
|
||||
const rate = prompt > 0 ? ((oaiCached / prompt) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${oaiCached} miss=${miss} rate=${rate}% completion=${completion}`;
|
||||
}
|
||||
// Anthropic-style
|
||||
if (typeof usage.cache_read_input_tokens === "number") {
|
||||
const hit = usage.cache_read_input_tokens;
|
||||
const create = usage.cache_creation_input_tokens ?? 0;
|
||||
const denom = hit + create + prompt;
|
||||
const rate = denom > 0 ? ((hit / denom) * 100).toFixed(1) : "n/a";
|
||||
return `[cache] ${tag} hit=${hit} create=${create} miss=${prompt} rate=${rate}% completion=${completion}`;
|
||||
}
|
||||
// No cache field at all
|
||||
return `[cache] ${tag} prompt=${prompt} completion=${completion} (provider didn't report cache stats)`;
|
||||
}
|
||||
|
||||
// AI SDK 6 unifies cache stats across providers into usage.inputTokenDetails,
|
||||
// so a single shape covers Anthropic + Gemini (no per-provider probing).
|
||||
// so a single shape covers Anthropic, Gemini, and OpenAI-compatible providers.
|
||||
function summarizeSdkUsage(
|
||||
tag: string,
|
||||
usage: LanguageModelUsage | undefined,
|
||||
@@ -82,43 +31,34 @@ function summarizeSdkUsage(
|
||||
return `[cache] ${tag} input=${input} completion=${output} (provider didn't report cache stats)`;
|
||||
}
|
||||
|
||||
// text/vision default to the OpenAI-compatible wire protocol when unset.
|
||||
function resolveTextProtocol(config: ProviderConfig): ProviderProtocol {
|
||||
return config.provider ?? "openai_compatible";
|
||||
}
|
||||
|
||||
function createLanguageModel(config: ProviderConfig, protocol: ProviderProtocol) {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
switch (protocol) {
|
||||
case "anthropic":
|
||||
return createAnthropic({ apiKey: config.apiKey, baseURL })(config.model);
|
||||
case "google":
|
||||
return createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(config.model);
|
||||
case "openai_compatible":
|
||||
case "openai":
|
||||
default:
|
||||
return createOpenAI({ apiKey: config.apiKey, baseURL }).chat(config.model);
|
||||
}
|
||||
}
|
||||
|
||||
export async function chat(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts?: {
|
||||
temperature?: number;
|
||||
responseFormat?: "json_object" | "text";
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const protocol = resolveTextProtocol(config);
|
||||
if (protocol === "anthropic" || protocol === "google") {
|
||||
return chatViaAiSdk(config, messages, opts, protocol);
|
||||
}
|
||||
return chatOpenAiCompatible(config, messages, opts);
|
||||
}
|
||||
|
||||
// Native Anthropic / Gemini via the Vercel AI SDK. response_format is not sent
|
||||
// (Anthropic has no JSON mode); the engine relies on parseJsonLoose downstream,
|
||||
// matching how it already tolerates loose JSON from every provider.
|
||||
async function chatViaAiSdk(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts: { temperature?: number; tag?: string } | undefined,
|
||||
protocol: "anthropic" | "google",
|
||||
): Promise<string> {
|
||||
const baseURL = normalizeBaseUrl(config.baseUrl, protocol);
|
||||
const model =
|
||||
protocol === "anthropic"
|
||||
? createAnthropic({ apiKey: config.apiKey, baseURL })(config.model)
|
||||
: createGoogleGenerativeAI({ apiKey: config.apiKey, baseURL })(
|
||||
config.model,
|
||||
);
|
||||
const model = createLanguageModel(config, protocol);
|
||||
|
||||
const system = messages.find((m) => m.role === "system")?.content;
|
||||
const convo: ModelMessage[] = messages
|
||||
@@ -142,59 +82,3 @@ async function chatViaAiSdk(
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
async function chatOpenAiCompatible(
|
||||
config: ProviderConfig,
|
||||
messages: ChatMessage[],
|
||||
opts?: {
|
||||
temperature?: number;
|
||||
responseFormat?: "json_object" | "text";
|
||||
tag?: string;
|
||||
},
|
||||
): Promise<string> {
|
||||
const url = `${normalizeBaseUrl(config.baseUrl, "openai_compatible")}/chat/completions`;
|
||||
const body: Record<string, unknown> = {
|
||||
model: config.model,
|
||||
messages,
|
||||
temperature: opts?.temperature ?? 0.9,
|
||||
};
|
||||
if (opts?.responseFormat === "json_object") {
|
||||
body.response_format = { type: "json_object" };
|
||||
}
|
||||
|
||||
const res = await fetchWithRetry(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
const text = await res.text();
|
||||
if (!res.ok) {
|
||||
throw new Error(`Chat API error ${res.status}: ${text}`);
|
||||
}
|
||||
|
||||
let json: {
|
||||
choices: { message: { content: string } }[];
|
||||
usage?: Usage;
|
||||
};
|
||||
try {
|
||||
json = JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(`Chat API returned invalid JSON: ${text.slice(0, 500)}`);
|
||||
}
|
||||
|
||||
// Guard against empty choices array or missing message/content fields
|
||||
const content = json.choices?.[0]?.message?.content;
|
||||
if (typeof content !== "string") {
|
||||
throw new Error(
|
||||
`Chat API returned no content. Response: ${text.slice(0, 500)}`
|
||||
);
|
||||
}
|
||||
|
||||
console.log(summarizeUsage(opts?.tag ?? "chat", json.usage));
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user