fix: address Copilot review — SSRF validation + log truncation
- annotate.ts: add assertSafeUrl() to reject non-https/data URLs and private/reserved IPs (SSRF prevention); cap response body to 10 MB - jsonParser.ts: truncate raw model output in error log to first 800 chars to avoid flooding logs / leaking sensitive content
This commit is contained in:
@@ -1,12 +1,42 @@
|
||||
import sharp from "sharp";
|
||||
|
||||
const FETCH_TIMEOUT_MS = 5000;
|
||||
const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10 MB
|
||||
|
||||
// Validate that an imageUrl is safe to fetch server-side.
|
||||
// Only https: and data: URIs are allowed; http: is rejected to
|
||||
// prevent SSRF via private IPs / cloud metadata endpoints.
|
||||
function assertSafeUrl(url: string): void {
|
||||
if (url.startsWith("data:")) return;
|
||||
const parsed = new URL(url);
|
||||
if (parsed.protocol !== "https:") {
|
||||
throw new Error(
|
||||
`prevImageUrl must use https: or data: protocol, got ${parsed.protocol}`,
|
||||
);
|
||||
}
|
||||
const host = parsed.hostname;
|
||||
if (
|
||||
host === "localhost" ||
|
||||
host === "127.0.0.1" ||
|
||||
host === "0.0.0.0" ||
|
||||
host.startsWith("192.168.") ||
|
||||
host.startsWith("10.") ||
|
||||
/^172\.(1[6-9]|2\d|3[0-1])\./.test(host) ||
|
||||
host === "169.254.169.254"
|
||||
) {
|
||||
throw new Error(
|
||||
`prevImageUrl resolves to a private/reserved IP: ${host}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pull the bytes from an image URL or data URI into a Buffer suitable for
|
||||
// sharp. Data URIs are decoded inline (no network); http(s) URLs are fetched
|
||||
// sharp. Data URIs are decoded inline (no network); https: URLs are fetched
|
||||
// with a short timeout — if Runware's CDN is slow we'd rather fail the vision
|
||||
// step quickly than tie up a 60s Vercel function on a single image read.
|
||||
async function loadImageBuffer(imageUrl: string): Promise<Buffer> {
|
||||
assertSafeUrl(imageUrl);
|
||||
|
||||
if (imageUrl.startsWith("data:")) {
|
||||
const comma = imageUrl.indexOf(",");
|
||||
if (comma === -1) throw new Error("Malformed data URI in prevImageUrl");
|
||||
@@ -23,7 +53,18 @@ async function loadImageBuffer(imageUrl: string): Promise<Buffer> {
|
||||
`Failed to fetch prevImageUrl (${res.status}): ${imageUrl.slice(0, 120)}`,
|
||||
);
|
||||
}
|
||||
const contentLength = res.headers.get("content-length");
|
||||
if (contentLength && Number(contentLength) > MAX_IMAGE_BYTES) {
|
||||
throw new Error(
|
||||
`prevImageUrl response too large (${contentLength} bytes, max ${MAX_IMAGE_BYTES})`,
|
||||
);
|
||||
}
|
||||
const arr = await res.arrayBuffer();
|
||||
if (arr.byteLength > MAX_IMAGE_BYTES) {
|
||||
throw new Error(
|
||||
`prevImageUrl response too large (${arr.byteLength} bytes, max ${MAX_IMAGE_BYTES})`,
|
||||
);
|
||||
}
|
||||
return Buffer.from(arr);
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
|
||||
@@ -6,8 +6,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair";
|
||||
// 3. Slice between first { and last } and parse.
|
||||
// 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
|
||||
//
|
||||
// On final failure, logs the FULL raw model output so we can diagnose the
|
||||
// actual syntax error.
|
||||
// On final failure, logs the first 800 chars of the raw model output so we
|
||||
// can diagnose the actual syntax error without flooding logs or leaking
|
||||
// sensitive content.
|
||||
//
|
||||
// jsonrepair (npm package josdejong/jsonrepair — 2.3k+ stars) handles the
|
||||
// broad LLM-output failure modes: truncated JSON, missing commas/brackets,
|
||||
@@ -86,7 +87,7 @@ export function parseJsonLoose<T>(raw: string): T {
|
||||
} catch (err) {
|
||||
const isRepairErr = err instanceof JSONRepairError;
|
||||
console.error(
|
||||
`[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Full raw model output:\n${raw}`,
|
||||
`[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Raw output (first 800 chars):\n${raw.slice(0, 800)}`,
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user