Last active
May 23, 2023 12:49
-
-
Save dfkaye/dcb060a13470de8119833dc56933563f to your computer and use it in GitHub Desktop.
normalize a JSON string (add correct quotes, remove comments, blank lines, and so on)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 22 Feb 2020 TODO: | |
// needs trailing comma fix (allow trailing comma in arrays and object literals). | |
// see JWCC at https://nigeltao.github.io/blog/2021/json-with-commas-comments.html | |
export { normalize }; | |
// var normalize = (function() { | |
// /////////////////////////////////////////////////// | |
// REVISE THIS FILE, PUT THE MAIN FUNCTION AT TOP, | |
// HELPERS BELOW. A LOT OF THE CHAINED FUNCTIONS ARE | |
// HARD TO READ WITH ALL THE INTERSPERSED COMMENTS. | |
// /////////////////////////////////////////////////// | |
/** | |
* Helper functions and queries for normalize() method. | |
*/ | |
const BLANK_LINES = /([\n](\s)*(?:\n))/g; | |
const LINE_COMMENTS = /[\"\']*(?:[\s\w]]*)(\/\/[^\n^\"^\}]*)/g; | |
// Attempt to handle /* */ and /** javadoc style * line * line2 comments */ | |
const MULTILINE_COMMENTS = /[\"\']*(?:\s*)(\/\*+)([^\*]*[\*])([^\/]*[\/])/g; | |
const SINGLE_QUOTES = /([^\\][\'](?:[^\']*)[\'](?:[^\}^\:^\,]*))/g; | |
const UNQUOTED_WORDS = /(?:(\,|\:|\{)\s*)([\w]+)/g; | |
function addQuotes(e) { | |
// return booleans unchanged | |
if (/false|true/.test(e)) { | |
return e; | |
} | |
// return numbers unchanged | |
if (/^\d?\.?\d+$/.test(e)) { | |
return e; | |
} | |
// replaces null, undefined, and NaN with empty string | |
if (/null|undefined|NaN/.test(e)) { | |
e = ''; | |
} | |
return '"' + e + '"'; | |
} | |
function replaceUnquoted(m) { | |
return m.replace(/\w+/g, addQuotes); | |
} | |
/** | |
* @method normalize Attempts to fix a JSON string with bare keys (restore | |
* missing quotes) and single-quoted keys and values, and remove line comments, | |
* block comments, blank lines, etc. | |
* @param {string} jsonText | |
* @returns {string} | |
*/ | |
function normalize(jsonText) { | |
var fixed = jsonText | |
// 18 Oct 2018 - remove comments and blank lines | |
// 30 Oct 2018 - preserve quoted comments | |
// remove line comments | |
.replace(LINE_COMMENTS, e => { | |
// Ignore if line comment is quoted. | |
return /[\"\']([\s\w]*[\:])?/.test(e[0]) ? e : ''; | |
}) | |
// remove multi-line comments | |
.replace(MULTILINE_COMMENTS, e => { | |
// Ignore if comment is quoted. | |
return /[\"\']/.test(e[0]) ? e : ''; | |
}) | |
// remove blank lines | |
.replace(BLANK_LINES, '\n') | |
// 17,18 oct 2018 | |
// fix single quotes | |
// 15 feb 2019 | |
// escaped apostrophes | |
.replace(SINGLE_QUOTES, (m) => { | |
/* | |
* Replace leading and trailing single quotes with double quotes, trim | |
* quoted spaces, ignore quoted apostrophes. | |
*/ | |
var t = m.trim(); | |
var p = m.split(t); | |
var r = p[0] + t.replace(/^[\']/, '"') + p[1]; | |
return r.replace(/([\'])(?:[\s]*)$/, (e) => { | |
return '"' + (e.length > 1 ? e.substring(1) : ''); | |
}); | |
}) | |
// 17 october 2018 | |
// success | |
// add missing quotes | |
.replace(UNQUOTED_WORDS, replaceUnquoted) | |
// 28 December 2019 - fix [01] and { 01: 01 } | |
// e.g., replace 01 with "01" | |
.replace(/(?=[^\"^\']+)[0]+([1-9)+(?=[^\"^\']+)/g, function (e) { | |
return '"' + e + '"'; | |
}) | |
// trim it | |
.trim(); | |
return fixed; | |
} | |
// return normalize; | |
//})(); | |
/* | |
28 December 2019 - fix [01] and { 01: 01 } | |
var tests = [ | |
'[01]', | |
normalize('[01]'), | |
'{ 01: 01 }', | |
normalize('{ 01: 01 }') | |
]; | |
var results = tests.map(function(test) { | |
var result = "OK: " + test; | |
try { | |
JSON.parse(test); | |
} catch (e) { | |
result = 'Error: ' + test + '; ' + e; | |
} | |
return result; | |
}); | |
console.log(JSON.stringify(results, null, 2)); | |
*/ | |
/* | |
[ | |
"Error: [01]; SyntaxError: JSON.parse: expected ',' or ']' after array element at line 1 column 3 of the JSON data", | |
"OK: [\"01\"]", | |
"Error: { 01: 01 }; SyntaxError: JSON.parse: expected property name or '}' at line 1 column 3 of the JSON data", | |
"OK: { \"01\": \"01\" }" | |
] | |
*/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { normalize } from '/src/data/normalize.js'; | |
describe('normalize(JSONString)', function () { | |
// Expect `chai` to be global along with `describe` and `it`. | |
var assert = chai.assert; | |
describe('single transform', function () { | |
it('trims input', () => { | |
var json = ` | |
{} | |
`; | |
var result = normalize(json); | |
assert.strictEqual(result, '{}'); | |
}); | |
it('removes line comments', () => { | |
var json = ` | |
// line comment 1 | |
{ // line comment 2 | |
// line comment 3 | |
} | |
// line comment 4 | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('removes multiline comments', () => { | |
var json = ` | |
/* | |
multiline comment 1 | |
*/ | |
{/* multiline comment 2 | |
// line comment 3 | |
*/ | |
} | |
/* multi comment 4 | |
*/ | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('removes javadoc comments', () => { | |
var json = ` | |
/** | |
* javadoc 1 | |
*/ | |
{/** | |
* javadoc 2 | |
*/ | |
} | |
/** | |
* javadoc 3 | |
*/ | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('ignores quoted comments', function () { | |
var json = `{ "line": " // should remain ", "multi": " /* should remain */", "javadoc": "/** should remain * in place. */" }`; | |
var result = normalize(json); | |
assert.strictEqual(result, json); | |
}); | |
it('ignores urls', function () { | |
var json = `{ "url": "https://anything" }`; | |
var result = normalize(json); | |
assert.strictEqual(result, json); | |
}); | |
it('replaces single quoted entries with double quoted entries', () => { | |
var json = ` | |
{ | |
'name': 'first' | |
} | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
"name": "first" | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('removes blank lines', () => { | |
var json = ` | |
{ | |
} | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('adds double quotes to unquoted keys', () => { | |
var json = ` | |
{ | |
key: "value", | |
key2: "21,213" | |
} | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
"key": "value", | |
"key2": "21,213" | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('replaces null, undefined, and NaN with empty strings', () => { | |
var json = ` | |
{ | |
"null": null, | |
"undefined": undefined, | |
"NaN": NaN | |
} | |
`; | |
var actual = normalize(json); | |
var expected = `{ | |
"null": "", | |
"undefined": "", | |
"NaN": "" | |
}`; | |
assert.strictEqual(actual, expected); | |
}); | |
it('ignores unquoted booleans', function () { | |
var json = `{ "f": false, "t": true }`; | |
var result = normalize(json); | |
assert.strictEqual(result, json); | |
}); | |
it('ignores unquoted numbers', function () { | |
var json = `{ "9": 9, "-0": -0, ".2": .2 }`; | |
var result = normalize(json); | |
assert.strictEqual(result, json); | |
}); | |
it('preserves quoted apostrophes', function() { | |
var json = `{ "escaped": "Joe\'s", "not-escaped": "Joe's" }`; | |
var result = normalize(json); | |
assert.strictEqual(result, json); | |
}); | |
}); | |
describe('bulk transform', function () { | |
var textContent = ` | |
{ | |
// line comment | |
/* | |
multi line | |
*/ | |
/** | |
* javadoc | |
*/ | |
"multi": " /* should remain */ ", | |
"line": " // should remain ", | |
"javadoc": " /** should remain * in place */", | |
apostrophes: { | |
"escaped": "Joe\'s", | |
"not-escaped": "Joe's" | |
}, | |
applicant: { | |
name : { | |
first : "my first", | |
'last' : 'my last' | |
}, | |
"address" : { | |
street1 : '1234 Fifth St.', | |
'street2' : "Suite Sixteen", | |
"street3" : undefined, | |
"city" : "The Citu", | |
"state" : null, | |
"postalCode" : '12345-6789' | |
} | |
}, | |
merchantURL: "https://merchant/url" | |
} | |
`; | |
var result = normalize(textContent); | |
it('should process without throwing or runaway backtracking', () => { | |
assert.ok(result); | |
}); | |
it('should parse result', () => { | |
var data = JSON.parse(result); | |
assert.equal(data['multi'], ' /* should remain */ '); | |
assert.equal(data['line'], ' // should remain '); | |
assert.equal(data['javadoc'], ' /** should remain * in place */'); | |
assert.equal(data["apostrophes"]["not-escaped"], "Joe\'s"); | |
assert.equal(data.applicant.name.first, 'my first'); | |
assert.equal(data.applicant.name.last, 'my last'); | |
assert.equal(data.applicant.address.street1, '1234 Fifth St.'); | |
assert.equal(data.applicant.address.street2, 'Suite Sixteen'); | |
assert.strictEqual(data.applicant.address.street3, '', 'undefined value should be replaced with empty string'); | |
assert.equal(data.applicant.address.city, 'The Citu'); | |
assert.strictEqual(data.applicant.address.state, '', 'null should be replaced with empty string'); | |
assert.equal(data.applicant.address.postalCode, '12345-6789'); | |
assert.equal(data.merchantURL, "https://merchant/url"); | |
}); | |
}); | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 21 october 2018 note: | |
// API emerginng as: | |
// + JSON.normalize(string) -> string | |
// + JSON.path(object) -> map | |
// + JSON.revive(pathMap) -> object | |
// WORK IN PROGRESS | |
// 14 October 2018 | |
// new gig use case: | |
// + convert json to path-map, then | |
// + make html with the path-map | |
~(function() { | |
// 22 October 2018 | |
function template(map, key) { | |
// IN PROGRESS - FIX THIS REGEX THING | |
var name = /null|undefined/.test(key) ? '' : typeof key != 'string' ? '' + key : key; | |
var value = name in map ? map[name] : ''; | |
return ` | |
<input name="${ name }" value"${ value }"> | |
`; | |
} | |
var test = template({ 'path.to.name': 'should work' }, 'path.to.name'); | |
console.log(test); | |
var test2 = template({ 'path.to.nowhere': 'should not print this' }, 'path.to.name'); | |
console.log(test2); | |
var test3 = template({ 'path.to.nowhere': 'should see nowhere' }, 'path.to.nowhere'); | |
console.log(test3); | |
var test4 = template({}, null); | |
console.log( test4 ); | |
var name = null; | |
var test5 = template({ null: 'nullity'}, name); | |
console.log( test5 ); | |
})(); | |
///////////////////////////////////////////////////////////////////////////////////// | |
// 15 Oct | |
// more wip | |
// fix invalid json | |
/* | |
before: | |
, w | |
{ w | |
' or { plus any space then word characters | |
after: | |
" : " | |
" : { | |
*/ | |
var textContent = ` | |
{ | |
// line comment | |
/* | |
multi line | |
*/ | |
/** | |
javadoc | |
*/ | |
"multi": " /* should remain */ ", | |
"line": " // should remain ", | |
'so\'me' : entry, | |
applicant: { | |
name : { | |
first : "my first", | |
'last' : 'my last' | |
}, | |
"address" : { | |
street1 : '1234 Fifth St.', | |
'street2' : "Suite Sixteen", | |
"street3" : undefined, | |
"city" : "The Citu", | |
"state" : null, | |
"postalCode" : '12345-6789' | |
} | |
} | |
} | |
`; | |
console.info('start'); | |
console.log(textContent); | |
// 18 Oct 2018 - remove comments and blank lines | |
// 30 Oct 2018 - preserve quoted comments | |
console.info('remove comments'); | |
textContent = textContent | |
// remove line comments | |
.replace(/[\"\']*(?:\s*)(\/\/[^\n^\"^\}]*)/g, e => { | |
console.info(e); | |
return /[\"\']/.test(e[0]) ? e : ''; | |
}) | |
// remove multi-line comments | |
.replace(/([\"\']*(?:\s*)(\/\*+)[^\*]*(\*\/))/gm, e => { | |
console.info(e); | |
return /[\"\']/.test(e[0]) ? e : ''; | |
}) | |
// remove blank lines | |
.replace(/([\n]?(\s)*(?:\n))/g, '\n'); | |
console.log(textContent); | |
/* | |
{ | |
"multi": " /* should remain *\/ ", | |
"line": " // should remain ", | |
'so'me' : entry, | |
applicant: { | |
name : { | |
first : "my first", | |
'last' : 'my last' | |
}, | |
"address" : { | |
street1 : '1234 Fifth St.', | |
'street2' : "Suite Sixteen", | |
"street3" : undefined, | |
"city" : "The Citu", | |
"state" : null, | |
"postalCode" : '12345-6789' | |
} | |
} | |
} | |
*/ | |
// 17,18 oct 2018 | |
// fix single quotes | |
console.info('fix quotes') | |
var q = /([\'](?:[^\']*)[\'](?:[^\}^\:^\,]*))/g; | |
var t = textContent.replace(q, (m) => { | |
console.info("_" + m + "_"); | |
return m.replace(/^[\']/, '"').replace(/([\'])(?:[\s]*)$/, e => { | |
return '"' + (e.length > 1 ? e.substring(1) : ''); | |
}); | |
}); | |
console.warn(t); | |
/* | |
{ | |
"multi": " /* should remain *\/ ", | |
"line": " // should remain ", | |
"so'me" : entry, | |
applicant: { | |
name : { | |
first : "my first", | |
"last" : "my last" | |
}, | |
"address" : { | |
street1 : "1234 Fifth St.", | |
"street2" : "Suite Sixteen", | |
"street3" : undefined, | |
"city" : "The Citu", | |
"state" : null, | |
"postalCode" : "12345-6789" | |
} | |
} | |
} | |
*/ | |
// 17 october 2018 | |
// success | |
// add missing quotes | |
/* | |
16:59:17.972 "{ name: \"value\", address: { city: 'CITY' } }".replace(/(?:(\,|\:|\{)\s*)([\w]+)/g, (m,o,p,i,j) => { return m.replace(/\w+/g, e => { return '"' + e + '"' }); }); | |
16:59:17.923 "{ \"name\": \"value\", \"address\": { \"city\": 'CITY' } }" | |
*/ | |
function addQuotes(e) { | |
if (/null|undefined/.test(e)) { | |
e = '' | |
} | |
return '"' + e + '"' | |
} | |
function replaceUnquoted(m) { | |
return m.replace(/\w+/g, addQuotes); | |
} | |
var fixed = t.replace(/(?:(\,|\:|\{)\s*)([\w]+)/g, replaceUnquoted); | |
console.log( fixed ); | |
/* | |
{ | |
"multi": " /* should remain *\/ ", | |
"line": " // should remain ", | |
"so'me" : "entry", | |
"applicant": { | |
"name" : { | |
"first" : "my first", | |
"last" : "my last" | |
}, | |
"address" : { | |
"street1" : "1234 Fifth St.", | |
"street2" : "Suite Sixteen", | |
"street3" : "", | |
"city" : "The Citu", | |
"state" : "", | |
"postalCode" : "12345-6789" | |
} | |
} | |
} | |
*/ | |
var data = JSON.parse(fixed); | |
// 18 oct 2018 | |
console.warn(data["so'me"], | |
data.applicant.name.first, | |
data.applicant.name.last, | |
data.applicant.address.street1, | |
data.applicant.address.street2, | |
data.applicant.address.street3, | |
data.applicant.address.city, | |
data.applicant.address.state, | |
data.applicant.address.postalCode | |
); | |
/* | |
entry my first my last 1234 Fifth St. Suite Sixteen The Citu 12345-6789 | |
*/ | |
// next up: convert data to path-map... |
null in the array has not changed,
That's because it is at first position.
null in the array has not changed,
That's because it is at first position.
I've found the fix:
const UNQUOTED_WORDS = /(?:(\,|\:|\{|\[)\s*)([\w]+)/g;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I added this code to handle numbers in scientific notation: