From 42a09c42f8e37c2b0f977120c6f5fc9fa90e48e1 Mon Sep 17 00:00:00 2001 From: yuanzonghao Date: Mon, 1 Jun 2026 16:29:08 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20address=20Copilot=20review=20=E2=80=94?= =?UTF-8?q?=20SSRF=20validation=20+=20log=20truncation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - annotate.ts: add assertSafeUrl() to reject non-https/data URLs and private/reserved IPs (SSRF prevention); cap response body to 10 MB - jsonParser.ts: truncate raw model output in error log to first 800 chars to avoid flooding logs / leaking sensitive content --- packages/engine/src/annotate.ts | 43 ++++++++++++++++++++++++++++++- packages/engine/src/jsonParser.ts | 7 ++--- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/packages/engine/src/annotate.ts b/packages/engine/src/annotate.ts index 6991301..e685d40 100644 --- a/packages/engine/src/annotate.ts +++ b/packages/engine/src/annotate.ts @@ -1,12 +1,42 @@ import sharp from "sharp"; const FETCH_TIMEOUT_MS = 5000; +const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10 MB + +// Validate that an imageUrl is safe to fetch server-side. +// Only https: and data: URIs are allowed; http: is rejected to +// prevent SSRF via private IPs / cloud metadata endpoints. +function assertSafeUrl(url: string): void { + if (url.startsWith("data:")) return; + const parsed = new URL(url); + if (parsed.protocol !== "https:") { + throw new Error( + `prevImageUrl must use https: or data: protocol, got ${parsed.protocol}`, + ); + } + const host = parsed.hostname; + if ( + host === "localhost" || + host === "127.0.0.1" || + host === "0.0.0.0" || + host.startsWith("192.168.") || + host.startsWith("10.") || + /^172\.(1[6-9]|2\d|3[0-1])\./.test(host) || + host === "169.254.169.254" + ) { + throw new Error( + `prevImageUrl resolves to a private/reserved IP: ${host}`, + ); + } +} // Pull the bytes from an image URL or data URI into a Buffer suitable for -// sharp. Data URIs are decoded inline (no network); http(s) URLs are fetched +// sharp. Data URIs are decoded inline (no network); https: URLs are fetched // with a short timeout — if Runware's CDN is slow we'd rather fail the vision // step quickly than tie up a 60s Vercel function on a single image read. async function loadImageBuffer(imageUrl: string): Promise { + assertSafeUrl(imageUrl); + if (imageUrl.startsWith("data:")) { const comma = imageUrl.indexOf(","); if (comma === -1) throw new Error("Malformed data URI in prevImageUrl"); @@ -23,7 +53,18 @@ async function loadImageBuffer(imageUrl: string): Promise { `Failed to fetch prevImageUrl (${res.status}): ${imageUrl.slice(0, 120)}`, ); } + const contentLength = res.headers.get("content-length"); + if (contentLength && Number(contentLength) > MAX_IMAGE_BYTES) { + throw new Error( + `prevImageUrl response too large (${contentLength} bytes, max ${MAX_IMAGE_BYTES})`, + ); + } const arr = await res.arrayBuffer(); + if (arr.byteLength > MAX_IMAGE_BYTES) { + throw new Error( + `prevImageUrl response too large (${arr.byteLength} bytes, max ${MAX_IMAGE_BYTES})`, + ); + } return Buffer.from(arr); } finally { clearTimeout(timer); diff --git a/packages/engine/src/jsonParser.ts b/packages/engine/src/jsonParser.ts index 68d9de9..fc2489b 100644 --- a/packages/engine/src/jsonParser.ts +++ b/packages/engine/src/jsonParser.ts @@ -6,8 +6,9 @@ import { jsonrepair, JSONRepairError } from "jsonrepair"; // 3. Slice between first { and last } and parse. // 4. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair. // -// On final failure, logs the FULL raw model output so we can diagnose the -// actual syntax error. +// On final failure, logs the first 800 chars of the raw model output so we +// can diagnose the actual syntax error without flooding logs or leaking +// sensitive content. // // jsonrepair (npm package josdejong/jsonrepair — 2.3k+ stars) handles the // broad LLM-output failure modes: truncated JSON, missing commas/brackets, @@ -86,7 +87,7 @@ export function parseJsonLoose(raw: string): T { } catch (err) { const isRepairErr = err instanceof JSONRepairError; console.error( - `[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Full raw model output:\n${raw}`, + `[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Raw output (first 800 chars):\n${raw.slice(0, 800)}`, ); throw err; }