import { jsonrepair, JSONRepairError } from "jsonrepair"; // Strict-then-forgiving JSON parser for LLM output. Tries in order: // 1. Direct JSON.parse on the trimmed text. // 2. Extract from ```json``` fenced block. // 3. Parse the first complete JSON value prefix (handles duplicated objects). // 4. Slice between first { and last } and parse. // 5. Apply targeted regex pre-repairs (see preRepair) and try jsonrepair. // // On final failure, logs the first 800 chars of the raw model output so we // can diagnose the actual syntax error without flooding logs or leaking // sensitive content. // // jsonrepair (npm package josdejong/jsonrepair — 2.3k+ stars) handles the // broad LLM-output failure modes: truncated JSON, missing commas/brackets, // single quotes, Python None/True/False, JS comments. We layer a small set // of targeted pre-repairs in front of it for failure modes jsonrepair can't // disambiguate on its own (see preRepair). // ────────────────────────────────────────────────────────────────────── // preRepair — fix specific LLM error patterns before handing to jsonrepair. // // Pattern 1: missing closing quote on a key. // Broken: "lineDelivery: "语速稍快...", // Correct: "lineDelivery": "语速稍快...", // // jsonrepair fails on this because it's ambiguous — "lineDelivery: " could // be a complete string value, leaving "语速稍快..." as a syntax error. But // if we see ":" we know structurally it should be // a key-colon-value triplet. // // Match constraints: // - The key match excludes " \n : so we can't overrun into adjacent // fields or absorb the colon as part of the key name. // - The colon must be followed by whitespace and another " (the value // string's opening quote). This is what disambiguates from a value // string that happens to contain a colon. // ────────────────────────────────────────────────────────────────────── function preRepair(s: string): string { return s.replace(/"([^"\n:]+):(\s+)"/g, '"$1":$2"'); } function firstJsonStart(s: string): number { const objectStart = s.indexOf("{"); const arrayStart = s.indexOf("["); if (objectStart === -1) return arrayStart; if (arrayStart === -1) return objectStart; return Math.min(objectStart, arrayStart); } function firstCompleteJsonValue(s: string): string | undefined { const start = firstJsonStart(s); if (start === -1) return undefined; const stack: string[] = []; let inString = false; let escaped = false; for (let i = start; i < s.length; i += 1) { const ch = s[i]!; if (inString) { if (escaped) { escaped = false; } else if (ch === "\\") { escaped = true; } else if (ch === "\"") { inString = false; } continue; } if (ch === "\"") { inString = true; continue; } if (ch === "{") { stack.push("}"); continue; } if (ch === "[") { stack.push("]"); continue; } if (ch === "}" || ch === "]") { if (stack.at(-1) !== ch) return undefined; stack.pop(); if (stack.length === 0) return s.slice(start, i + 1); } } return undefined; } function parseFirstCompleteJsonValue(s: string): T | undefined { const value = firstCompleteJsonValue(s); if (!value) return undefined; return JSON.parse(value) as T; } export function parseJsonLoose(raw: string): T { const trimmed = raw.trim(); try { return JSON.parse(trimmed) as T; } catch { // fall through } const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/); if (fenced?.[1]) { try { return JSON.parse(fenced[1]) as T; } catch { try { const parsed = parseFirstCompleteJsonValue(fenced[1]); if (parsed !== undefined) return parsed; } catch { // fall through } } } try { const parsed = parseFirstCompleteJsonValue(trimmed); if (parsed !== undefined) return parsed; } catch { // fall through } const first = trimmed.indexOf("{"); const last = trimmed.lastIndexOf("}"); const slice = first !== -1 && last > first ? trimmed.slice(first, last + 1) : trimmed; // Try the brace-sliced version first; if there were no braces at all // (slice === trimmed), this is just a second attempt at the raw text. try { return JSON.parse(slice) as T; } catch { // Targeted pre-repair (no-op on already-valid JSON) → jsonrepair. const prefixed = preRepair(slice); // If preRepair changed something, give the cheap path another shot — // the input might already be valid now without needing jsonrepair. if (prefixed !== slice) { try { return JSON.parse(prefixed) as T; } catch { // fall through to jsonrepair } } try { const repaired = jsonrepair(prefixed); return JSON.parse(repaired) as T; } catch (err) { const isRepairErr = err instanceof JSONRepairError; console.error( `[parseJsonLoose] jsonrepair ${isRepairErr ? "could not repair" : "succeeded but JSON.parse rejected its output"}. Raw output (first 800 chars):\n${raw.slice(0, 800)}`, ); throw err; } } }