Created
August 6, 2025 07:42
-
-
Save erdesigns-eu/fb9e205eef3178cff16a531166f18a69 to your computer and use it in GitHub Desktop.
JSON State Machine Parser for parsing JSON responses from LLM's, this parser fixes common issues in JSON and validates/repairs against a JSON Schema
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| export interface JSONSchema { | |
| /** | |
| * The type of the JSON value | |
| */ | |
| type?: 'object' | 'array' | 'string' | 'number' | 'boolean' | 'null'; | |
| /** | |
| * Properties of the object, if type is 'object' | |
| */ | |
| properties?: Record<string, JSONSchema>; | |
| /** | |
| * Items of the array, if type is 'array' | |
| */ | |
| items?: JSONSchema; | |
| /** | |
| * Required properties for the object | |
| */ | |
| required?: string[]; | |
| /** | |
| * Additional properties allowed in the object | |
| */ | |
| additionalProperties?: boolean | JSONSchema; | |
| /** | |
| * Pattern for string values, if type is 'string' | |
| */ | |
| patternProperties?: Record<string, JSONSchema>; | |
| /** | |
| * Minimum value for number types | |
| */ | |
| minItems?: number; | |
| /** | |
| * Maximum value for number types | |
| */ | |
| maxItems?: number; | |
| /** | |
| * Description of the schema | |
| */ | |
| description?: string; | |
| /** | |
| * Schema must match one of the provided schemas | |
| */ | |
| oneOf?: JSONSchema[]; | |
| } | |
| export interface ValidationResult { | |
| /** | |
| * Whether the JSON is valid according to the schema | |
| */ | |
| valid: boolean; | |
| /** | |
| * Errors encountered during validation | |
| */ | |
| errors: string[]; | |
| /** | |
| * Repaired JSON object if validation was successful | |
| */ | |
| repaired?: any; | |
| } | |
| /** | |
| * Options for the JSONStateMachineParser. | |
| */ | |
| export interface JSONParserOptions { | |
| /** | |
| * Allow single-quoted strings (e.g., 'value') | |
| */ | |
| allowSingleQuotedStrings?: boolean; | |
| /** | |
| * Allow trailing commas in objects and arrays. | |
| */ | |
| allowTrailingCommas?: boolean; | |
| /** | |
| * Remove leading BOM if present. | |
| */ | |
| removeBom?: boolean; | |
| /** | |
| * Normalize newline sequences to LF. | |
| */ | |
| normalizeNewlines?: boolean; | |
| /** | |
| * Remove control characters except tab, LF, CR. | |
| */ | |
| removeControlChars?: boolean; | |
| /** | |
| * Automatically convert ISO date strings to Date objects. | |
| */ | |
| parseDates?: boolean; | |
| } | |
| /** | |
| * Reviver function type for transforming parsed values. | |
| * Similar to the second parameter of JSON.parse. | |
| */ | |
| export type JSONReviver = (this: any, key: string, value: any) => any; | |
| /** | |
| * A state-machine-based JSON parser with aggressive memory reuse, cleanup, and reviver support. | |
| */ | |
| export class JSONStateMachineParser { | |
| /** | |
| * The input string to parse. | |
| * This is reset after each parse to free memory. | |
| */ | |
| private input: string = ''; | |
| /** | |
| * Current index in the input string. | |
| * This is reset after each parse to free memory. | |
| */ | |
| private index: number = 0; | |
| /** | |
| * Options for the parser, with defaults applied. | |
| * This is initialized in the constructor. | |
| */ | |
| private options: Required<JSONParserOptions>; | |
| /** | |
| * Check if a string is a valid JSON object or array structure. | |
| * This method also handles strings wrapped in ``` backticks with optional language labels. | |
| * @param raw - The string to check. | |
| * @returns True if the string is valid JSON, false otherwise. | |
| */ | |
| static isStringJson(raw: string): boolean { | |
| if (typeof raw !== 'string' || raw.length === 0) { | |
| return false; | |
| } | |
| // Extract content from backticks if present | |
| let str = raw; | |
| if (str.startsWith('```')) { | |
| const end = str.lastIndexOf('```'); | |
| if (end === -1 || end === 0) { | |
| // No closing backticks found | |
| return false; | |
| } | |
| // Find the first newline after the opening backticks and optional label | |
| let contentStart = str.indexOf('\n'); | |
| if (contentStart === -1) { | |
| // No newline found, assume no label and content starts after ``` | |
| contentStart = 3; | |
| // Skip any non-newline characters that might be a language label | |
| while (contentStart < str.length && str.charAt(contentStart) !== '\n') { | |
| contentStart++; | |
| } | |
| if (contentStart < str.length) { | |
| contentStart++; // Skip the newline | |
| } | |
| } | |
| else { | |
| contentStart++; // Skip the newline | |
| } | |
| const content = str.substring(contentStart, end); | |
| str = content || str.slice(3); | |
| } | |
| // Trim whitespace | |
| let start = 0; | |
| let finish = str.length - 1; | |
| const ws = [' ', '\t', '\n', '\r']; | |
| while (start <= finish && ws.includes(str.charAt(start))) { | |
| start++; | |
| } | |
| while (finish >= start && ws.includes(str.charAt(finish))) { | |
| finish--; | |
| } | |
| if (start > finish) { | |
| return false; | |
| } | |
| const first = str.charAt(start); | |
| const last = str.charAt(finish); | |
| // Object or array? | |
| if (first === '{' && last === '}') { | |
| return true; | |
| } | |
| if (first === '[' && last === ']') { | |
| return true; | |
| } | |
| // Literal: null, true, false | |
| const literal = str.substring(start, finish + 1); | |
| if (literal === 'null' || literal === 'true' || literal === 'false') { | |
| return true; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Create a new JSONStateMachineParser instance with optional configuration. | |
| * @param options - Configuration options for the parser. | |
| */ | |
| constructor(options: JSONParserOptions = {}) { | |
| // Destructure options with defaults. | |
| const { | |
| allowSingleQuotedStrings = true, | |
| allowTrailingCommas = true, | |
| removeBom = true, | |
| normalizeNewlines = true, | |
| removeControlChars = true, | |
| parseDates = false, | |
| } = options; | |
| // Store options with defaults applied. | |
| this.options = { | |
| allowSingleQuotedStrings, | |
| allowTrailingCommas, | |
| removeBom, | |
| normalizeNewlines, | |
| removeControlChars, | |
| parseDates, | |
| }; | |
| } | |
| /** | |
| * Parse JSON-like string into type T, cleaning buffers, applying reviver, and optional schema validation. | |
| * @param raw - The JSON or JSON-like string, optionally wrapped in ``` backticks. | |
| * @param schema - Optional JSON Schema object to validate and repair the result against. | |
| * @param reviver - Optional function to transform values; applied bottom-up. | |
| * @returns Parsed and validated value as type T. | |
| */ | |
| public parse<T = any>(raw: string, schema?: JSONSchema, reviver?: JSONReviver): T { | |
| let src = this.unwrapBackticks(raw); | |
| if (this.options.removeBom && src.charCodeAt(0) === 0xFEFF) { | |
| src = src.slice(1); | |
| } | |
| if (this.options.normalizeNewlines) { | |
| src = this.normalize(src); | |
| } | |
| if (this.options.removeControlChars) { | |
| src = this.filterControlCharacters(src); | |
| } | |
| this.input = src; | |
| this.index = 0; | |
| const result = this.parseValue(); | |
| this.skipWhitespace(); | |
| if (this.index < this.input.length) { | |
| throw new SyntaxError(`Unexpected token at ${this.index}`); | |
| } | |
| // Bottom-up reviver walk and optional ISO date conversion | |
| const rootHolder: any = { '': result }; | |
| const revived = this.applyReviver(rootHolder, '', reviver); | |
| // Clear internal buffers to prevent memory retention | |
| this.input = ''; | |
| this.index = 0; | |
| // Optional JSON Schema validation and repair | |
| if (schema) { | |
| const validationResult = this.validateAndRepair(revived, schema); | |
| if (!validationResult.valid) { | |
| throw new Error(`JSON does not match schema: ${validationResult.errors.join(', ')}`); | |
| } | |
| return validationResult.repaired as T; | |
| } | |
| return revived as T; | |
| } | |
| /** | |
| * Recursively walk and apply reviver, with date conversion if enabled. | |
| * @param holder - The object holding the value. | |
| * @param key - The key of the value to revive. | |
| * @param reviver - Optional reviver function. | |
| * @returns The revived value. | |
| */ | |
| private applyReviver(holder: any, key: string, reviver?: JSONReviver): any { | |
| let value = holder[key]; | |
| if (value && typeof value === 'object') { | |
| for (const k in value) { | |
| if (Object.prototype.hasOwnProperty.call(value, k)) { | |
| const v = this.applyReviver(value, k, reviver); | |
| if (v === undefined) { | |
| delete value[k]; | |
| } | |
| else { | |
| value[k] = v; | |
| } | |
| } | |
| } | |
| } | |
| if (this.options.parseDates && typeof value === 'string') { | |
| // Attempt to convert ISO-formatted strings to Date | |
| const d = new Date(value); | |
| if (!isNaN(d.valueOf())) { | |
| value = d; | |
| } | |
| } | |
| if (reviver) { | |
| // Reviver functions receive their parent as `this` | |
| return reviver.call(holder, key, value); | |
| } | |
| return value; | |
| } | |
| /** | |
| * Remove ``` wrappers and optional language label (e.g., json). | |
| * @param str - The input string to unwrap. | |
| * @returns The unwrapped string. | |
| */ | |
| private unwrapBackticks(str: string): string { | |
| if (!str.startsWith('```')) { | |
| return str; | |
| } | |
| const end = str.lastIndexOf('```'); | |
| if (end === -1 || end === 0) { | |
| // No closing backticks found, return original | |
| return str; | |
| } | |
| // Find the first newline after the opening backticks and optional label | |
| let contentStart = str.indexOf('\n'); | |
| if (contentStart === -1) { | |
| // No newline found, assume no label and content starts after ``` | |
| contentStart = 3; | |
| // Skip any non-newline characters that might be a language label | |
| while (contentStart < str.length && str.charAt(contentStart) !== '\n') { | |
| contentStart++; | |
| } | |
| if (contentStart < str.length) { | |
| contentStart++; // Skip the newline | |
| } | |
| } | |
| else { | |
| contentStart++; // Skip the newline | |
| } | |
| const content = str.substring(contentStart, end); | |
| return content || str.slice(3); | |
| } | |
| /** | |
| * Normalize CRLF and CR to LF without intermediate allocations. | |
| * @param str - The input string to normalize. | |
| * @returns The normalized string. | |
| */ | |
| private normalize(str: string): string { | |
| const buf: string[] = []; | |
| for (let i = 0; i < str.length; i++) { | |
| const ch = str.charAt(i); | |
| if (ch === '\r') { | |
| if (str.charAt(i + 1) === '\n') { | |
| i++; | |
| } | |
| buf.push('\n'); | |
| } | |
| else { | |
| buf.push(ch); | |
| } | |
| } | |
| return buf.join(''); | |
| } | |
| /** | |
| * Filter out control characters except \t, \n, \r. | |
| * @param str - The input string to filter. | |
| * @returns The filtered string. | |
| */ | |
| private filterControlCharacters(str: string): string { | |
| const buf: string[] = []; | |
| for (let i = 0; i < str.length; i++) { | |
| const code = str.charCodeAt(i); | |
| if (code >= 0x20 || code === 0x09 || code === 0x0A || code === 0x0D) { | |
| buf.push(str.charAt(i)); | |
| } | |
| } | |
| return buf.join(''); | |
| } | |
| /** | |
| * Parse any JSON value. | |
| * @returns The parsed value. | |
| */ | |
| private parseValue(): any { | |
| this.skipWhitespace(); | |
| if (this.index >= this.input.length) { | |
| throw new SyntaxError('Unexpected end'); | |
| } | |
| const c = this.input.charAt(this.index); | |
| if (c === '{') { | |
| return this.parseObject(); | |
| } | |
| if (c === '[') { | |
| return this.parseArray(); | |
| } | |
| if (c === '"') { | |
| return this.parseString('"'); | |
| } | |
| if (c === '\'' && this.options.allowSingleQuotedStrings) { | |
| return this.parseString('\''); | |
| } | |
| if (c === '-' || this.isDigit(c)) { | |
| return this.parseNumber(); | |
| } | |
| return this.parseLiteral(); | |
| } | |
| /** | |
| * Parse an object with lenient comma handling. | |
| * @returns The parsed object. | |
| */ | |
| private parseObject(): Record<string, any> { | |
| this.index++; | |
| const obj: Record<string, any> = {}; | |
| const loop = true; | |
| let first = true; | |
| while (loop) { | |
| this.skipWhitespace(); | |
| if (this.input.charAt(this.index) === '}') { | |
| this.index++; break; | |
| } | |
| if (!first) { | |
| if (this.input.charAt(this.index) === ',') { | |
| this.index++; | |
| } | |
| else if (this.options.allowTrailingCommas && this.input.charAt(this.index) === '}') { | |
| this.index++; break; | |
| } | |
| } | |
| first = false; | |
| const key = this.parseKey(); | |
| this.index++; // skip ':' | |
| const val = this.parseValue(); | |
| obj[key.endsWith('\\') ? key.slice(0, -1) : key] = val; | |
| } | |
| return obj; | |
| } | |
| /** | |
| * Parse array with lenient comma handling. | |
| * @returns The parsed array. | |
| */ | |
| private parseArray(): any[] { | |
| this.index++; | |
| const arr: any[] = []; | |
| const loop = true; | |
| let first = true; | |
| while (loop) { | |
| this.skipWhitespace(); | |
| if (this.input.charAt(this.index) === ']') { | |
| this.index++; break; | |
| } | |
| if (!first) { | |
| if (this.input.charAt(this.index) === ',') { | |
| this.index++; | |
| } | |
| else if (this.options.allowTrailingCommas && this.input.charAt(this.index) === ']') { | |
| this.index++; break; | |
| } | |
| } | |
| first = false; | |
| arr.push(this.parseValue()); | |
| } | |
| return arr; | |
| } | |
| /** | |
| * Parse string using buffer to minimize concatenations. | |
| * @param quote - The quote character. | |
| * @returns The parsed string. | |
| */ | |
| private parseString(quote: '"' | '\''): string { | |
| this.index++; | |
| const buf: string[] = []; | |
| while (this.index < this.input.length) { | |
| const ch = this.input.charAt(this.index++); | |
| if (ch === '\\') { | |
| const esc = this.input.charAt(this.index++) || ''; | |
| switch (esc) { | |
| case 'b': buf.push('\b'); break; | |
| case 'f': buf.push('\f'); break; | |
| case 'n': buf.push('\n'); break; | |
| case 'r': buf.push('\r'); break; | |
| case 't': buf.push('\t'); break; | |
| case 'u': { | |
| let code = 0; | |
| for (let i = 0; i < 4; i++) { | |
| const digit = this.input.charCodeAt(this.index + i); | |
| code = code * 16 + ((digit >= 48 && digit <= 57) ? digit - 48 : (digit >= 65 && digit <= 70) ? digit - 55 : (digit >= 97 && digit <= 102) ? digit - 87 : 0); | |
| } | |
| buf.push(String.fromCharCode(code)); | |
| this.index += 4; | |
| break; | |
| } | |
| default: buf.push(esc); break; | |
| } | |
| } | |
| else if (ch === quote) { | |
| const nxt = this.peekNextNonWhitespace(); | |
| if (!nxt || [',', ':', '}', ']'].includes(nxt)) { | |
| break; | |
| } | |
| buf.push(ch); | |
| } | |
| else { | |
| buf.push(ch); | |
| } | |
| } | |
| return buf.join(''); | |
| } | |
| /** | |
| * Parse a number from the input string. | |
| * Handles integers, floats, and scientific notation. | |
| * @returns The parsed number. | |
| */ | |
| private parseNumber(): number { | |
| const start = this.index; | |
| if (this.input.charAt(this.index) === '-') { | |
| this.index++; | |
| } | |
| while (this.isDigit(this.input.charAt(this.index))) { | |
| this.index++; | |
| } | |
| if (this.input.charAt(this.index) === '.') { | |
| this.index++; while (this.isDigit(this.input.charAt(this.index))) { | |
| this.index++; | |
| } | |
| } | |
| const e = this.input.charAt(this.index).toLowerCase(); | |
| if (e === 'e') { | |
| this.index++; if (['+', '-'].includes(this.input.charAt(this.index))) { | |
| this.index++; | |
| } while (this.isDigit(this.input.charAt(this.index))) { | |
| this.index++; | |
| } | |
| } | |
| const num = Number(this.input.slice(start, this.index)); | |
| if (isNaN(num)) { | |
| throw new SyntaxError('Invalid number'); | |
| } | |
| return num; | |
| } | |
| /** | |
| * Parse literal values: true, false, or null. | |
| * @returns The parsed literal value. | |
| */ | |
| private parseLiteral(): boolean | null { | |
| const rem = this.input.substring(this.index, this.index + 5); | |
| if (rem.startsWith('true')) { | |
| this.index += 4; | |
| return true; | |
| } | |
| if (rem.startsWith('false')) { | |
| this.index += 5; | |
| return false; | |
| } | |
| if (rem.startsWith('null')) { | |
| this.index += 4; | |
| return null; | |
| } | |
| throw new SyntaxError('Unexpected literal'); | |
| } | |
| /** | |
| * Parse a key from the input string. | |
| * @returns The parsed key. | |
| */ | |
| private parseKey(): string { | |
| this.skipWhitespace(); | |
| return this.parseString(this.input.charAt(this.index) as '"' | '\''); | |
| } | |
| /** | |
| * Skip whitespace characters in the input string. | |
| * This method advances the index until a non-whitespace character is found. | |
| * It handles spaces, tabs, newlines, and carriage returns. | |
| * @returns void | |
| */ | |
| private skipWhitespace(): void { | |
| while ([' ', '\t', '\n', '\r'].includes(this.input.charAt(this.index))) { | |
| this.index++; | |
| } | |
| } | |
| /** | |
| * Peek the next non-whitespace character in the input string. | |
| * This method does not advance the index, allowing the caller to check the next character without consuming it. | |
| * It skips over spaces, tabs, newlines, and carriage returns. | |
| * If no non-whitespace character is found, it returns undefined. | |
| * @returns The next non-whitespace character or undefined. | |
| */ | |
| private peekNextNonWhitespace(): string | undefined { | |
| let i = this.index; | |
| while (i < this.input.length) { | |
| const c = this.input.charAt(i++); | |
| if (![' ', '\t', '\n', '\r'].includes(c)) { | |
| return c; | |
| } | |
| } | |
| return undefined; | |
| } | |
| /** | |
| * Check if a character is a digit (0-9). | |
| * This method is used to determine if a character can be part of a numeric value. | |
| * @param c - The character to check. | |
| * @returns True if the character is a digit, false otherwise. | |
| */ | |
| private isDigit(c: string): boolean { | |
| return c >= '0' && c <= '9'; | |
| } | |
| /** | |
| * Validate and repair a value against a JSON schema. | |
| * @param value - The value to validate. | |
| * @param schema - The JSON schema to validate against. | |
| * @param path - The current path for error reporting. | |
| * @returns Validation result with errors and repaired value. | |
| */ | |
| private validateAndRepair(value: any, schema: JSONSchema, path: string = ''): ValidationResult { | |
| const errors: string[] = []; | |
| let repaired = value; | |
| // Handle oneOf validation first - try each schema until one passes | |
| if (schema.oneOf) { | |
| let bestResult: ValidationResult | null = null; | |
| let fewestErrors = Infinity; | |
| for (const subSchema of schema.oneOf) { | |
| const result = this.validateAndRepair(value, subSchema, path); | |
| // If validation passes completely, use this result | |
| if (result.valid) { | |
| return result; | |
| } | |
| // Keep track of the result with fewest errors as fallback | |
| if (result.errors.length < fewestErrors) { | |
| fewestErrors = result.errors.length; | |
| bestResult = result; | |
| } | |
| } | |
| // If no schema in oneOf matched perfectly, return the best attempt | |
| if (bestResult) { | |
| return bestResult; | |
| } | |
| // If all schemas failed badly, return an error | |
| return { | |
| valid: false, | |
| errors: [`${path}: value does not match any schema in oneOf`], | |
| repaired: value, | |
| }; | |
| } | |
| // Type validation and coercion | |
| if (schema.type) { | |
| const typeResult = this.validateType(value, schema.type, path); | |
| if (!typeResult.valid) { | |
| errors.push(...typeResult.errors); | |
| repaired = typeResult.repaired ?? repaired; | |
| } | |
| else { | |
| repaired = typeResult.repaired ?? repaired; | |
| } | |
| } | |
| // Object-specific validation | |
| if (schema.type === 'object' && repaired !== null && typeof repaired === 'object' && !Array.isArray(repaired)) { | |
| const objectResult = this.validateObject(repaired, schema, path); | |
| errors.push(...objectResult.errors); | |
| repaired = objectResult.repaired ?? repaired; | |
| } | |
| // Array-specific validation | |
| if (schema.type === 'array' && Array.isArray(repaired)) { | |
| const arrayResult = this.validateArray(repaired, schema, path); | |
| errors.push(...arrayResult.errors); | |
| repaired = arrayResult.repaired ?? repaired; | |
| } | |
| return { | |
| valid: errors.length === 0, | |
| errors, | |
| repaired, | |
| }; | |
| } | |
| /** | |
| * Validate and coerce a value to the expected type. | |
| * @param value - The value to validate. | |
| * @param expectedType - The expected type. | |
| * @param path - The current path for error reporting. | |
| * @returns Validation result with type coercion. | |
| */ | |
| private validateType(value: any, expectedType: string, path: string): ValidationResult { | |
| const errors: string[] = []; | |
| let repaired = value; | |
| switch (expectedType) { | |
| case 'string': | |
| if (typeof value !== 'string') { | |
| if (value === null || value === undefined) { | |
| repaired = ''; | |
| } | |
| else { | |
| repaired = String(value); | |
| } | |
| } | |
| break; | |
| case 'number': | |
| if (typeof value !== 'number') { | |
| const num = Number(value); | |
| if (isNaN(num)) { | |
| errors.push(`${path}: expected number, got ${typeof value}`); | |
| repaired = 0; | |
| } | |
| else { | |
| repaired = num; | |
| } | |
| } | |
| break; | |
| case 'boolean': | |
| if (typeof value !== 'boolean') { | |
| repaired = Boolean(value); | |
| } | |
| break; | |
| case 'object': | |
| if (value === null || typeof value !== 'object' || Array.isArray(value)) { | |
| if (value === null || value === undefined) { | |
| repaired = {}; | |
| } | |
| else { | |
| errors.push(`${path}: expected object, got ${typeof value}`); | |
| repaired = {}; | |
| } | |
| } | |
| break; | |
| case 'array': | |
| if (!Array.isArray(value)) { | |
| if (value === null || value === undefined) { | |
| repaired = []; | |
| } | |
| else { | |
| errors.push(`${path}: expected array, got ${typeof value}`); | |
| repaired = []; | |
| } | |
| } | |
| break; | |
| case 'null': | |
| if (value !== null) { | |
| repaired = null; | |
| } | |
| break; | |
| } | |
| return { | |
| valid: errors.length === 0, | |
| errors, | |
| repaired, | |
| }; | |
| } | |
| /** | |
| * Validate an object against a schema. | |
| * @param obj - The object to validate. | |
| * @param schema - The schema to validate against. | |
| * @param path - The current path for error reporting. | |
| * @returns Validation result. | |
| */ | |
| private validateObject(obj: any, schema: JSONSchema, path: string): ValidationResult { | |
| const errors: string[] = []; | |
| const repaired: any = {}; | |
| // Copy existing properties first | |
| for (const key in obj) { | |
| if (Object.prototype.hasOwnProperty.call(obj, key)) { | |
| repaired[key] = obj[key]; | |
| } | |
| } | |
| // Validate required properties | |
| if (schema.required) { | |
| for (const requiredProp of schema.required) { | |
| if (!(requiredProp in repaired)) { | |
| errors.push(`${path}.${requiredProp}: required property missing`); | |
| // Add default value based on property schema | |
| if (schema.properties && schema.properties[requiredProp]) { | |
| repaired[requiredProp] = this.getDefaultValue(schema.properties[requiredProp]); | |
| } | |
| else { | |
| repaired[requiredProp] = null; | |
| } | |
| } | |
| } | |
| } | |
| // Validate properties | |
| if (schema.properties) { | |
| for (const [propName, propSchema] of Object.entries(schema.properties)) { | |
| if (propName in repaired) { | |
| const result = this.validateAndRepair(repaired[propName], propSchema, `${path}.${propName}`); | |
| errors.push(...result.errors); | |
| repaired[propName] = result.repaired; | |
| } | |
| } | |
| } | |
| // Validate pattern properties | |
| if (schema.patternProperties) { | |
| for (const [pattern, patternSchema] of Object.entries(schema.patternProperties)) { | |
| const regex = new RegExp(pattern); | |
| for (const key in repaired) { | |
| if (regex.test(key)) { | |
| const result = this.validateAndRepair(repaired[key], patternSchema, `${path}.${key}`); | |
| errors.push(...result.errors); | |
| repaired[key] = result.repaired; | |
| } | |
| } | |
| } | |
| } | |
| // Handle additionalProperties | |
| if (schema.additionalProperties === false) { | |
| const allowedProps = new Set(Object.keys(schema.properties || {})); | |
| // Add pattern properties matches | |
| if (schema.patternProperties) { | |
| for (const pattern of Object.keys(schema.patternProperties)) { | |
| const regex = new RegExp(pattern); | |
| for (const key in repaired) { | |
| if (regex.test(key)) { | |
| allowedProps.add(key); | |
| } | |
| } | |
| } | |
| } | |
| for (const key in repaired) { | |
| if (!allowedProps.has(key)) { | |
| delete repaired[key]; | |
| } | |
| } | |
| } | |
| return { | |
| valid: errors.length === 0, | |
| errors, | |
| repaired, | |
| }; | |
| } | |
| /** | |
| * Validate an array against a schema. | |
| * @param arr - The array to validate. | |
| * @param schema - The schema to validate against. | |
| * @param path - The current path for error reporting. | |
| * @returns Validation result. | |
| */ | |
| private validateArray(arr: any[], schema: JSONSchema, path: string): ValidationResult { | |
| const errors: string[] = []; | |
| const repaired: any[] = []; | |
| // Validate minItems | |
| if (schema.minItems !== undefined && arr.length < schema.minItems) { | |
| errors.push(`${path}: array has ${arr.length} items, minimum is ${schema.minItems}`); | |
| // Pad array to minimum length if items schema is available | |
| while (repaired.length < schema.minItems) { | |
| if (schema.items) { | |
| repaired.push(this.getDefaultValue(schema.items)); | |
| } | |
| else { | |
| repaired.push(null); | |
| } | |
| } | |
| } | |
| // Validate maxItems | |
| if (schema.maxItems !== undefined && arr.length > schema.maxItems) { | |
| errors.push(`${path}: array has ${arr.length} items, maximum is ${schema.maxItems}`); | |
| } | |
| // Validate items | |
| for (let i = 0; i < arr.length; i++) { | |
| if (schema.maxItems === undefined || i < schema.maxItems) { | |
| if (schema.items) { | |
| const result = this.validateAndRepair(arr[i], schema.items, `${path}[${i}]`); | |
| errors.push(...result.errors); | |
| repaired[i] = result.repaired; | |
| } | |
| else { | |
| repaired[i] = arr[i]; | |
| } | |
| } | |
| } | |
| return { | |
| valid: errors.length === 0, | |
| errors, | |
| repaired, | |
| }; | |
| } | |
| /** | |
| * Get a default value for a schema type. | |
| * @param schema - The schema to get default value for. | |
| * @returns Default value. | |
| */ | |
| private getDefaultValue(schema: JSONSchema): any { | |
| switch (schema.type) { | |
| case 'string': return ''; | |
| case 'number': return 0; | |
| case 'boolean': return false; | |
| case 'array': return []; | |
| case 'object': return {}; | |
| case 'null': return null; | |
| default: return null; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment