fix: address Copilot review — SSRF validation + log truncation
- annotate.ts: add assertSafeUrl() to reject non-https/data URLs and private/reserved IPs (SSRF prevention); cap response body to 10 MB - jsonParser.ts: truncate raw model output in error log to first 800 chars to avoid flooding logs / leaking sensitive content
This commit is contained in:
@@ -1,12 +1,42 @@
|
|||||||
import sharp from "sharp";
|
import sharp from "sharp";
|
||||||
|
|
||||||
const FETCH_TIMEOUT_MS = 5000;
|
const FETCH_TIMEOUT_MS = 5000;
|
||||||
|
const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10 MB
|
||||||
|
|
||||||
|
// Validate that an imageUrl is safe to fetch server-side.
|
||||||
|
// Only https: and data: URIs are allowed; http: is rejected to
|
||||||
|
// prevent SSRF via private IPs / cloud metadata endpoints.
|
||||||
|
function assertSafeUrl(url: string): void {
|
||||||
|
if (url.startsWith("data:")) return;
|
||||||
|
const parsed = new URL(url);
|
||||||
|
if (parsed.protocol !== "https:") {
|
||||||
|
throw new Error(
|
||||||
|
`prevImageUrl must use https: or data: protocol, got ${parsed.protocol}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const host = parsed.hostname;
|
||||||
|
if (
|
||||||
|
host === "localhost" ||
|
||||||
|
host === "127.0.0.1" ||
|
||||||
|
host === "0.0.0.0" ||
|
||||||
|
host.startsWith("192.168.") ||
|
||||||
|
host.startsWith("10.") ||
|
||||||
|
/^172\.(1[6-9]|2\d|3[0-1])\./.test(host) ||
|
||||||
|
host === "169.254.169.254"
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
`prevImageUrl resolves to a private/reserved IP: ${host}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Pull the bytes from an image URL or data URI into a Buffer suitable for
|
// Pull the bytes from an image URL or data URI into a Buffer suitable for
|
||||||
// sharp. Data URIs are decoded inline (no network); http(s) URLs are fetched
|
// sharp. Data URIs are decoded inline (no network); https: URLs are fetched
|
||||||
// with a short timeout — if Runware's CDN is slow we'd rather fail the vision
|
// with a short timeout — if Runware's CDN is slow we'd rather fail the vision
|
||||||
// step quickly than tie up a 60s Vercel function on a single image read.
|
// step quickly than tie up a 60s Vercel function on a single image read.
|
||||||
async function loadImageBuffer(imageUrl: string): Promise<Buffer> {
|
async function loadImageBuffer(imageUrl: string): Promise<Buffer> {
|
||||||
|
assertSafeUrl(imageUrl);
|
||||||
|
|
||||||
if (imageUrl.startsWith("data:")) {
|
if (imageUrl.startsWith("data:")) {
|
||||||
const comma = imageUrl.indexOf(",");
|
const comma = imageUrl.indexOf(",");
|
||||||
if (comma === -1) throw new Error("Malformed data URI in prevImageUrl");
|
if (comma === -1) throw new Error("Malformed data URI in prevImageUrl");
|
||||||
@@ -23,7 +53,18 @@ async function loadImageBuffer(imageUrl: string): Promise<Buffer> {
|
|||||||
`Failed to fetch prevImageUrl (${res.status}): ${imageUrl.slice(0, 120)}`,
|
`Failed to fetch prevImageUrl (${res.status}): ${imageUrl.slice(0, 120)}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
const contentLength = res.headers.get("content-length");
|
||||||
|
if (contentLength && Number(contentLength) > MAX_IMAGE_BYTES) {
|
||||||
|
throw new Error(
|
||||||
|
`prevImageUrl response too large (${contentLength} bytes, max ${MAX_IMAGE_BYTES})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
const arr = await res.arrayBuffer();
|
const arr = await res.arrayBuffer();
|
||||||
|
if (arr.byteLength > MAX_IMAGE_BYTES) {
|
||||||
|
throw new Error(
|
||||||
|
`prevImageUrl response too large (${arr.byteLength} bytes, max ${MAX_IMAGE_BYTES})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
return Buffer.from(arr);
|
return Buffer.from(arr);
|
||||||
} finally {
|
} finally {
|
||||||
clearTimeout(timer);
|
clearTimeout(timer);
|
||||||
|
|||||||
@@ -6,8 +6,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair";
|
|||||||
// 3. Slice between first { and last } and parse.
|
// 3. Slice between first { and last } and parse.
|
||||||
// 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
|
// 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair.
|
||||||
//
|
//
|
||||||
// On final failure, logs the FULL raw model output so we can diagnose the
|
// On final failure, logs the first 800 chars of the raw model output so we
|
||||||
// actual syntax error.
|
// can diagnose the actual syntax error without flooding logs or leaking
|
||||||
|
// sensitive content.
|
||||||
//
|
//
|
||||||
// jsonrepair (npm package josdejong/jsonrepair — 2.3k+ stars) handles the
|
// jsonrepair (npm package josdejong/jsonrepair — 2.3k+ stars) handles the
|
||||||
// broad LLM-output failure modes: truncated JSON, missing commas/brackets,
|
// broad LLM-output failure modes: truncated JSON, missing commas/brackets,
|
||||||
@@ -86,7 +87,7 @@ export function parseJsonLoose<T>(raw: string): T {
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
const isRepairErr = err instanceof JSONRepairError;
|
const isRepairErr = err instanceof JSONRepairError;
|
||||||
console.error(
|
console.error(
|
||||||
`[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Full raw model output:\n${raw}`,
|
`[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Raw output (first 800 chars):\n${raw.slice(0, 800)}`,
|
||||||
);
|
);
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user