Created
March 28, 2026 18:31
-
-
Save MAZ01001/1fd5c57efa46b87e73910e7857ae93ce to your computer and use it in GitHub Desktop.
Simple RegExp engine in JavaScript for understanding backtracking in RegExp via try-catch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // got the idea while answering https://stackoverflow.com/questions/79534604/regular-expressions-loop-code-how-backtracking-works | |
| /** | |
| * @callback MatchCallback sub-match | |
| * @param {string | ArrayLike<string>} text - string or list of characters (`{length: number, [index: number]: string}`) | |
| * @param {number} index - index into `text` (safe integer; can be out of range) | |
| * @returns {number} new `index` (safe integer; unchanged if empty match) | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {typeof Backtracking} if match fails ! (other errors should be thrown again and not silenced) | |
| */ | |
| /** @type {symbol} */ | |
| const Backtracking = Symbol.for("Backtracking"); | |
| /** | |
| * match nothing (checks if {@linkcode index} is within or at the end of {@linkcode text}) | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @returns {number} unchanged {@linkcode index} | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) ! entries are not checked (as it's not necessary) | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {typeof Backtracking} if match fails | |
| */ | |
| const matchEmpty = (text, index) => { | |
| if(typeof text !== "string" && typeof text?.length !== "number") throw new TypeError("text is not a string nor a list"); | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(index < 0 || index > text.length) throw Backtracking; | |
| return index; | |
| } | |
| /** | |
| * match when {@linkcode index} is at the end of {@linkcode text} | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {boolean} [invert] - [optional] when `true` matches if {@linkcode index} is at the start of {@linkcode text} instead - default `false` | |
| * @returns {number} unchanged {@linkcode index} | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) ! entries are not checked (as it's not necessary) | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode invert} is given but not a boolean | |
| * @throws {typeof Backtracking} if match fails | |
| */ | |
| const matchEnd = (text, index, invert) => { | |
| if(typeof text !== "string" && typeof text?.length !== "number") throw new TypeError("text is not a string nor a list"); | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(invert != null && typeof invert !== "boolean") throw new TypeError("invert is given but not a boolean"); | |
| if(invert ?? false ? index !== 0 : index !== text.length) throw Backtracking; | |
| return index; | |
| }; | |
| /** | |
| * match any (one) character at {@linkcode index} in {@linkcode text} | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {number} [amount] - [optional] number of characters to match - default `1` | |
| * @returns {number} new {@linkcode index} (unchanged when {@linkcode amount} is `0`) | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode amount} is given but not a positive safe integer | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {typeof Backtracking} if match fails */ | |
| const matchAny = (text, index, amount) => { | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(amount == null) amount = 1; | |
| else if(!Number.isSafeInteger(amount) || amount < 0) throw new TypeError("amount is given but not a positive safe integer"); | |
| if(typeof text === "string"){ | |
| if(amount === 0) return index; | |
| if(index < 0 || index + amount > text.length) throw Backtracking; | |
| return index + amount; | |
| } | |
| if(typeof text?.length === "number"){ | |
| if(amount === 0) return index; | |
| if(index < 0 || index + amount > text.length) throw Backtracking; | |
| for(const max = index + amount; index < max; ++index) | |
| if(typeof text[index] !== "string" || text[index].length === 0) throw new TypeError("text entry is not a string or empty"); | |
| return index; | |
| } | |
| throw new TypeError("text is not a string nor a list"); | |
| }; | |
| /** | |
| * match {@linkcode chars} at {@linkcode index} in {@linkcode text} | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {string | ArrayLike<string>} chars - string or list of characters to match (in order) | |
| * @returns {number} new {@linkcode index} (unchanged if {@linkcode chars} is empty) | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode chars} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {typeof Backtracking} if match fails */ | |
| const matchChars = (text, index, chars) => { | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(typeof text === "string"){ | |
| if(typeof chars === "string"){ | |
| if(chars.length === 0) return index; | |
| if(index < 0 || index + chars.length > text.length) throw Backtracking; | |
| for(let i = 0; i < chars.length; ++i) | |
| if(text[index + i] !== chars[i]) throw Backtracking; | |
| return index + chars.length; | |
| } | |
| if(typeof chars?.length === "number"){ | |
| if(chars.length === 0) return index; | |
| if(index < 0 || index + chars.length > text.length) throw Backtracking; | |
| for(let i = 0; i < chars.length; ++i){ | |
| const char = chars[i]; | |
| if(typeof char !== "string" || char.length === 0) throw new TypeError("chars entry is not a string or empty"); | |
| if(text[index + i] !== char) throw Backtracking; | |
| } | |
| return index + chars.length; | |
| } | |
| throw new TypeError("chars is not a string nor a list"); | |
| } | |
| if(typeof text?.length === "number"){ | |
| if(typeof chars === "string"){ | |
| if(chars.length === 0) return index; | |
| if(index < 0 || index + chars.length > text.length) throw Backtracking; | |
| for(let i = 0; i < chars.length; ++i){ | |
| const entry = text[index + i]; | |
| if(typeof entry !== "string" || entry.length === 0) throw new TypeError("text entry is not a string or empty"); | |
| if(entry !== chars[i]) throw Backtracking; | |
| } | |
| return index + chars.length; | |
| } | |
| if(typeof chars?.length === "number"){ | |
| if(chars.length === 0) return index; | |
| if(index < 0 || index + chars.length > text.length) throw Backtracking; | |
| for(let i = 0; i < chars.length; ++i){ | |
| const entry = text[index + i], | |
| char = chars[i]; | |
| if(typeof entry !== "string" || entry.length === 0) throw new TypeError("text entry is not a string or empty"); | |
| if(typeof char !== "string" || char.length === 0) throw new TypeError("chars entry is not a string or empty"); | |
| if(entry !== char) throw Backtracking; | |
| } | |
| return index + chars.length; | |
| } | |
| throw new TypeError("chars is not a string nor a list"); | |
| } | |
| throw new TypeError("text is not a string nor a list"); | |
| }; | |
| /** | |
| * match any (one) character of {@linkcode chars} at {@linkcode index} in {@linkcode text} | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {string} fromChar - start of character range (inclusive) | |
| * @param {string} toChar - end of character range (inclusive) | |
| * @param {boolean} [invert] - [optional] `true`: match if not in character range - default `false` | |
| * @returns {number} new {@linkcode index} | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode invert} is given but not a boolean | |
| * @throws {TypeError} if {@linkcode fromChar} is not a character (non-empty string) | |
| * @throws {TypeError} if {@linkcode toChar} is not a character (non-empty string) | |
| * @throws {RangeError} if {@linkcode toChar} is less than {@linkcode fromChar} | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {typeof Backtracking} if match fails (or matches when {@linkcode invert} is `true`) | |
| */ | |
| const matchCharRange = (text, index, fromChar, toChar, invert) => { | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(invert != null && typeof invert !== "boolean") throw new TypeError("invert is given but not a boolean"); | |
| if(typeof fromChar !== "string" || fromChar.length === 0) throw new TypeError("fromChar is not a string or empty"); | |
| if(typeof toChar !== "string" || toChar.length === 0) throw new TypeError("toChar is not a string or empty"); | |
| if(toChar < fromChar) throw new RangeError("toChar is less than fromChar"); | |
| let entry; | |
| if(typeof text === "string"){ | |
| if(index < 0 || index >= text.length) throw Backtracking; | |
| entry = text[index]; | |
| }else if(typeof text?.length === "number"){ | |
| if(index < 0 || index >= text.length) throw Backtracking; | |
| entry = text[index]; | |
| if(typeof entry !== "string" || entry.length === 0) throw new TypeError("text entry is not a string or empty"); | |
| }else throw new TypeError("text is not a string nor a list"); | |
| if((entry >= fromChar && entry <= toChar) === (invert ?? false)) throw Backtracking; | |
| return index + 1; | |
| }; | |
| /** | |
| * match any (one) character in {@linkcode chars} at {@linkcode index} in {@linkcode text} | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {ArrayLike<string>} chars - (non-empty) list of characters to try matching (in order) | |
| * @param {boolean} [invert] - [optional] `true`: match if none of {@linkcode chars} match - default `false` | |
| * @returns {number} new {@linkcode index} | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode invert} is given but not a boolean | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {TypeError} if {@linkcode chars} is not a list of characters or empty (`{length: number, [index: number]: string}` and entries are not empty) | |
| * @throws {typeof Backtracking} if all {@linkcode chars} fail to match (or if any match when {@linkcode invert} is `true`) | |
| */ | |
| const matchAnyCharIn = (text, index, chars, invert) => { | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(invert == null) invert = false; | |
| else if(typeof invert !== "boolean") throw new TypeError("invert is given but not a boolean"); | |
| if(typeof chars?.length !== "number" || chars.length === 0) throw new TypeError("chars is not a list or empty"); | |
| let entry; | |
| if(typeof text === "string"){ | |
| if(index < 0 || index >= text.length) throw Backtracking; | |
| entry = text[index]; | |
| }else if(typeof text?.length === "number"){ | |
| if(index < 0 || index >= text.length) throw Backtracking; | |
| entry = text[index]; | |
| if(typeof entry !== "string" || entry.length === 0) throw new TypeError("text entry is not a string or empty"); | |
| }else throw new TypeError("text is not a string nor a list"); | |
| for(let i = 0; i < chars.length; ++i){ | |
| const char = chars[i]; | |
| if(typeof char !== "string" || char.length === 0) throw new TypeError("chars entry is not a string or empty"); | |
| if(entry !== char) continue; | |
| if(invert) throw Backtracking; | |
| return index + 1; | |
| } | |
| if(invert) return index + 1; | |
| throw Backtracking; | |
| }; | |
| /** | |
| * match any (one) of {@linkcode matchCallbacks} starting at {@linkcode index} in {@linkcode text} (followed by {@linkcode restCallback}) | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {ArrayLike<MatchCallback>} matchCallbacks - (non-empty) list of matches to check (in order) | |
| * @param {MatchCallback} restCallback - rest of match-program (if no match follows, this should return `index` unchanged) | |
| * @returns {number} new {@linkcode index} from {@linkcode restCallback} | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) ! entries are not checked here directly | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode matchCallbacks} is not a list of matches or empty (`{length: number, [index: number]:`{@linkcode MatchCallback}`}` where {@linkcode MatchCallback} is a function that gives a safe integer) | |
| * @throws {TypeError} if {@linkcode restCallback} is not a {@linkcode MatchCallback} (function that gives a safe integer) | |
| * @throws {typeof Backtracking} if {@linkcode restCallback} fails with all {@linkcode matchCallbacks} | |
| */ | |
| const matchAnyOf = (text, index, matchCallbacks, restCallback) => { | |
| if(typeof text !== "string" && typeof text?.length !== "number") throw new TypeError("text is not a string nor a list"); | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(typeof matchCallbacks?.length !== "number" || matchCallbacks.length === 0) throw new TypeError("matchCallbacks is not a list or empty"); | |
| if(typeof restCallback !== "function") throw new TypeError("restCallback is not a function"); | |
| for(let i = 0, result; i < matchCallbacks.length; ++i){ | |
| const matchCallback = matchCallbacks[i]; | |
| if(typeof matchCallback !== "function") throw new TypeError("matchCallbacks entry is not a function"); | |
| try{ result = matchCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| continue; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("matchCallbacks entry doesn't give a safe integer"); | |
| try{ result = restCallback(text, result); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| continue; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("restCallback doesn't give a safe integer"); | |
| return result; | |
| } | |
| throw Backtracking; | |
| }; | |
| /** | |
| * match {@linkcode matchCallback} zero or more times starting at {@linkcode index} in {@linkcode text} (followed by {@linkcode restCallback}) | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {number} index - index into {@linkcode text} | |
| * @param {MatchCallback} matchCallback - sub-match to match zero or more times | |
| * @param {number} min - minimum number of loops (must be less or equal to {@linkcode max}) | |
| * @param {number} max - maximum number of loops (can be `Infinity`) | |
| * @param {boolean} greedy - `true`: match as much as possible before continuing and decrease on failure; `false`: only match minimal amount before continuing and match more on failure | |
| * @param {MatchCallback} restCallback - rest of match-program (if no match follows, this should return `index` unchanged) | |
| * @returns {number} new {@linkcode index} from {@linkcode restCallback} | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) ! entries are not checked here directly | |
| * @throws {TypeError} if {@linkcode index} is not a safe integer | |
| * @throws {TypeError} if {@linkcode matchCallback} is not a {@linkcode MatchCallback} (function that gives a safe integer) | |
| * @throws {TypeError} if {@linkcode min} or {@linkcode max} are not positive integers | |
| * @throws {RangeError} if {@linkcode min} is larger than {@linkcode max} | |
| * @throws {TypeError} if {@linkcode greedy} is not a boolean | |
| * @throws {TypeError} if {@linkcode restCallback} is not a {@linkcode MatchCallback} (function that gives a safe integer) | |
| * @throws {typeof Backtracking} if {@linkcode restCallback} fails {@linkcode min}-{@linkcode max} number of {@linkcode matchCallback} | |
| */ | |
| const matchLoop = (text, index, matchCallback, min, max, greedy, restCallback) => { | |
| if(typeof text !== "string" && typeof text?.length !== "number") throw new TypeError("text is not a string"); | |
| if(!Number.isSafeInteger(index)) throw new TypeError("index is not a safe integer"); | |
| if(typeof matchCallback !== "function") throw new TypeError("matchCallback is not a function"); | |
| if(!Number.isInteger(min) && min !== Infinity || min < 0) throw new TypeError("min is not a positive integer"); | |
| if(!Number.isInteger(max) && max !== Infinity || max < 0) throw new TypeError("max is not a positive integer"); | |
| if(min > max) throw new RangeError("min is larger than max"); | |
| if(typeof greedy !== "boolean") throw new TypeError("greedy is not a boolean"); | |
| if(typeof restCallback !== "function") throw new TypeError("restCallback is not a function"); | |
| if(greedy){ | |
| const starts = [index]; | |
| for(let result;starts.length <= max;){// match up to max but at least min times | |
| try{ result = matchCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| break; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("matchCallback doesn't give a safe integer"); | |
| starts.push(index = result); | |
| } | |
| if(starts.length <= min) throw Backtracking; | |
| for(let result;true;){// match after loop / rest of regexp | |
| try{ result = restCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| // try with one less match | |
| if(--starts.length <= min) throw Backtracking; | |
| index = starts[starts.length - 1]; | |
| continue; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("restCallback doesn't give a safe integer"); | |
| return result; | |
| } | |
| } | |
| let count = 0; | |
| for(let result;count < min; ++count){// try matching min times | |
| try{ result = matchCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| break; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("matchCallback doesn't give a safe integer"); | |
| index = result; | |
| } | |
| if(count < min) throw Backtracking; | |
| for(let result;true;){// match after loop / rest of regexp | |
| try{ result = restCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| // try matching one more time | |
| if(count >= max) throw Backtracking; | |
| try{ index = matchCallback(text, index); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| throw Backtracking; | |
| } | |
| ++count; | |
| continue; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("restCallback doesn't give a safe integer"); | |
| return result; | |
| } | |
| }; | |
| /** | |
| * tries matching {@linkcode text} via {@linkcode matchCallback} (starting at {@linkcode fromIndex} to {@linkcode toIndex} inclusive) | |
| * @param {string | ArrayLike<string>} text - string or list of characters | |
| * @param {MatchCallback} matchCallback - match-programm (can have further sub-match-programs within) | |
| * @param {number} [fromIndex] - [optional] start match from here (negative is one-indexed from end of {@linkcode text}) - default `0` (start of {@linkcode text}) | |
| * @param {number} [toIndex] - [optional] highest starting position to try matching from (negative is one-indexed from end of {@linkcode text}) - defaults to length of {@linkcode text} (to end of {@linkcode text} inclusive) | |
| * @returns {[number, number] | false} `[start index, end index]` (start inclusive; end exclusive) if matched successfully and `false` otherwise | |
| * @throws {TypeError} if {@linkcode text} is not a string nor a list of characters (`{length: number, [index: number]: string}` and entries are not empty) ! entries are not checked here directly | |
| * @throws {TypeError} if {@linkcode matchCallback} is not a {@linkcode MatchCallback} (function that gives a safe integer) | |
| * @throws {TypeError} if {@linkcode fromIndex} is given but not a safe integer | |
| * @throws {TypeError} if {@linkcode toIndex} is given but not a safe integer | |
| * @throws {RangeError} if {@linkcode fromIndex} is not in range of {@linkcode text} (length of {@linkcode text} is within range) | |
| * @throws {RangeError} if {@linkcode toIndex} is not in range of {@linkcode text} (length of {@linkcode text} is within range) | |
| * @throws {RangeError} if {@linkcode fromIndex} is larger than {@linkcode toIndex} | |
| */ | |
| const matchProgram = (text, matchCallback, fromIndex, toIndex) => { | |
| if(typeof text !== "string" || typeof text?.length !== "number") throw new TypeError("text is not a string nor a list"); | |
| if(typeof matchCallback !== "function") throw new TypeError("matchCallback is not a function"); | |
| if(fromIndex == null) fromIndex = 0; | |
| else{ | |
| if(!Number.isSafeInteger(fromIndex)) throw new TypeError("fromIndex is not a safe integer"); | |
| if(fromIndex < 0) fromIndex += text.length; | |
| if(fromIndex < 0 || fromIndex > text.length) throw new RangeError("fromIndex is out of range"); | |
| } | |
| if(toIndex == null) toIndex = text.length; | |
| else{ | |
| if(!Number.isSafeInteger(toIndex)) throw new TypeError("toIndex is not a safe integer"); | |
| if(toIndex < 0) toIndex += text.length; | |
| if(toIndex < 0 || toIndex > text.length) throw new RangeError("toIndex is out of range"); | |
| } | |
| if(fromIndex > toIndex) throw new RangeError("fromIndex is larger than toIndex"); | |
| for(let start = fromIndex, result; start <= toIndex; ++start){ | |
| try{ result = matchCallback(text, start); } | |
| catch(exception){ | |
| if(exception !== Backtracking) throw exception; | |
| continue; | |
| } | |
| if(!Number.isSafeInteger(result)) throw new TypeError("matchCallback doesn't give a safe integer"); | |
| return [start, result]; | |
| } | |
| return false; | |
| }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // copy regexp-engine-base.js here | |
| const text = "deXXXiXia ho"; | |
| // equal to /X*i/ | |
| const matchResult_Xi = matchProgram(text, | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchChars(text, index, "X"), 0, Infinity, true, | |
| (text, index) => matchChars(text, index, "i") | |
| ) | |
| ); | |
| // equal to /.*/ | |
| const matchResult_all = matchProgram(text, | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchAny(text, index), 0, Infinity, true, | |
| (text, index) => index | |
| ) | |
| ); | |
| // equal to /.*?/ | |
| const matchResult_none = matchProgram(text, | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchAny(text, index), 0, Infinity, false, | |
| (text, index) => index | |
| ) | |
| ); | |
| console.log("text: %o", text); | |
| console.log("native regexp /X*i/ => %o", text.match(/X*i/)?.[0] ?? false); | |
| console.log("native regexp /.*/ => %o", text.match(/.*/ )?.[0] ?? false); | |
| console.log("native regexp /.*?/ => %o", text.match(/.*?/)?.[0] ?? false); | |
| console.log("match-program /X*i/ => %o", matchResult_Xi === false ? false : text.substring(...matchResult_Xi)); | |
| console.log("match-program /.*/ => %o", matchResult_all === false ? false : text.substring(...matchResult_all)); | |
| console.log("match-program /.*?/ => %o", matchResult_none === false ? false : text.substring(...matchResult_none)); | |
| // text: 'deXXXiXia ho' | |
| // native regexp /X*i/ => 'XXXi' | |
| // native regexp /.*/ => 'deXXXiXia ho' | |
| // native regexp /.*?/ => '' | |
| // match-program /X*i/ => 'XXXi' | |
| // match-program /.*/ => 'deXXXiXia ho' | |
| // match-program /.*?/ => '' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // copy regexp-engine-base.js here | |
| /** @type {MatchCallback} equal to regexp `^[+-]?(?:[0-9]+\.?|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$` */ | |
| const fullMatchScientificNumber = (text, index) => { | |
| index = matchEnd(text, index, true); | |
| return matchLoop(text, index, | |
| (text, index) => matchAnyCharIn(text, index, ["+", "-"]), 0, 1, true, | |
| (text, index) => matchAnyOf(text, index, | |
| [ | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchCharRange(text, index, "0", "9"), 1, Infinity, true, | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchChars(text, index, "."), 0, 1, true, | |
| matchEmpty | |
| ) | |
| ), | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchCharRange(text, index, "0", "9"), 0, Infinity, true, | |
| (text, index) => { | |
| index = matchChars(text, index, "."); | |
| return matchLoop(text, index, | |
| (text, index) => matchCharRange(text, index, "0", "9"), 1, Infinity, true, | |
| matchEmpty | |
| ); | |
| } | |
| ) | |
| ], | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => { | |
| index = matchAnyCharIn(text, index, ["e", "E"]); | |
| return matchLoop(text, index, | |
| (text, index) => matchAnyCharIn(text, index, ["+", "-"]), 0, 1, true, | |
| (text, index) => matchLoop(text, index, | |
| (text, index) => matchCharRange(text, index, "0", "9"), 1, Infinity, true, | |
| matchEmpty | |
| ) | |
| ); | |
| }, 0, 1, true, | |
| matchEnd | |
| ) | |
| ) | |
| ); | |
| }; | |
| console.table([// random test cases via AI (GitHub Copilot "Claude Haiku 4.5") | |
| // Valid cases (38x) | |
| "0", "1", "123", ".5", ".99", "0.5", "123.456", "1.", "100.", "+0", | |
| "+123", "+.5", "+123.456", "-0", "-123", "-.5", "-123.456", "1e5", "1E5", "1e+5", | |
| "1E+5", "1e-5", "1E-5", ".5e10", ".99E30", "0e0", "123.456e789", "+1.5e+10", "-0.5e-3", "1.e5", | |
| ".2", "018e1", "01812.1", "0070e0", "0.", "0.0", "0e+0", "9999999e9999999", | |
| // Invalid cases (23x) | |
| "e5", // no digits before exponent | |
| ".e5", // no digits before or after decimal | |
| "1.2.3", // multiple decimals | |
| "1e2e3", // multiple exponents | |
| "1e", // exponent with no digits | |
| "1e+", // exponent sign with no digits | |
| "0e", // exponent with no digits | |
| " 2 ", // whitespace | |
| " 123", // leading whitespace | |
| "123 ", // trailing whitespace | |
| "1 2 3", // internal whitespace | |
| "", // empty string | |
| "+", // sign only | |
| "-", // sign only | |
| ".", // decimal point only | |
| "+-5", // double sign | |
| "-+5", // double sign | |
| "1..5", // double decimal | |
| "e10", // no leading digits | |
| "Infinity", // special float value | |
| "NaN", // special float value | |
| "1a5", // invalid character | |
| "1.2.3e4", // multiple decimals with exponent | |
| ].reduce((obj, test) => { | |
| const result = matchProgram(test, fullMatchScientificNumber, 0, 0), | |
| regex = test.match(/^[+-]?(?:[0-9]+\.?|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/)?.[0] ?? false, | |
| match = result === false ? false : test.substring(...result); | |
| obj[test] = {regex, match, equal: regex === match}; | |
| return obj; | |
| }, Object.create(null))); | |
| /* | |
| ┌─────────────────┬───────────────────┬───────────────────┬───────┐ | |
| │ (index) │ regex │ match │ equal │ | |
| ├─────────────────┼───────────────────┼───────────────────┼───────┤ | |
| │ 0 │ '0' │ '0' │ true │ | |
| │ 1 │ '1' │ '1' │ true │ | |
| │ 123 │ '123' │ '123' │ true │ | |
| │ .5 │ '.5' │ '.5' │ true │ | |
| │ .99 │ '.99' │ '.99' │ true │ | |
| │ 0.5 │ '0.5' │ '0.5' │ true │ | |
| │ 123.456 │ '123.456' │ '123.456' │ true │ | |
| │ 1. │ '1.' │ '1.' │ true │ | |
| │ 100. │ '100.' │ '100.' │ true │ | |
| │ +0 │ '+0' │ '+0' │ true │ | |
| │ +123 │ '+123' │ '+123' │ true │ | |
| │ +.5 │ '+.5' │ '+.5' │ true │ | |
| │ +123.456 │ '+123.456' │ '+123.456' │ true │ | |
| │ -0 │ '-0' │ '-0' │ true │ | |
| │ -123 │ '-123' │ '-123' │ true │ | |
| │ -.5 │ '-.5' │ '-.5' │ true │ | |
| │ -123.456 │ '-123.456' │ '-123.456' │ true │ | |
| │ 1e5 │ '1e5' │ '1e5' │ true │ | |
| │ 1E5 │ '1E5' │ '1E5' │ true │ | |
| │ 1e+5 │ '1e+5' │ '1e+5' │ true │ | |
| │ 1E+5 │ '1E+5' │ '1E+5' │ true │ | |
| │ 1e-5 │ '1e-5' │ '1e-5' │ true │ | |
| │ 1E-5 │ '1E-5' │ '1E-5' │ true │ | |
| │ .5e10 │ '.5e10' │ '.5e10' │ true │ | |
| │ .99E30 │ '.99E30' │ '.99E30' │ true │ | |
| │ 0e0 │ '0e0' │ '0e0' │ true │ | |
| │ 123.456e789 │ '123.456e789' │ '123.456e789' │ true │ | |
| │ +1.5e+10 │ '+1.5e+10' │ '+1.5e+10' │ true │ | |
| │ -0.5e-3 │ '-0.5e-3' │ '-0.5e-3' │ true │ | |
| │ 1.e5 │ '1.e5' │ '1.e5' │ true │ | |
| │ .2 │ '.2' │ '.2' │ true │ | |
| │ 018e1 │ '018e1' │ '018e1' │ true │ | |
| │ 01812.1 │ '01812.1' │ '01812.1' │ true │ | |
| │ 0070e0 │ '0070e0' │ '0070e0' │ true │ | |
| │ 0. │ '0.' │ '0.' │ true │ | |
| │ 0.0 │ '0.0' │ '0.0' │ true │ | |
| │ 0e+0 │ '0e+0' │ '0e+0' │ true │ | |
| │ 9999999e9999999 │ '9999999e9999999' │ '9999999e9999999' │ true │ | |
| │ e5 │ false │ false │ true │ | |
| │ .e5 │ false │ false │ true │ | |
| │ 1.2.3 │ false │ false │ true │ | |
| │ 1e2e3 │ false │ false │ true │ | |
| │ 1e │ false │ false │ true │ | |
| │ 1e+ │ false │ false │ true │ | |
| │ 0e │ false │ false │ true │ | |
| │ 2 │ false │ false │ true │ | |
| │ 123 │ false │ false │ true │ | |
| │ 123 │ false │ false │ true │ | |
| │ 1 2 3 │ false │ false │ true │ | |
| │ │ false │ false │ true │ | |
| │ + │ false │ false │ true │ | |
| │ - │ false │ false │ true │ | |
| │ . │ false │ false │ true │ | |
| │ +-5 │ false │ false │ true │ | |
| │ -+5 │ false │ false │ true │ | |
| │ 1..5 │ false │ false │ true │ | |
| │ e10 │ false │ false │ true │ | |
| │ Infinity │ false │ false │ true │ | |
| │ NaN │ false │ false │ true │ | |
| │ 1a5 │ false │ false │ true │ | |
| │ 1.2.3e4 │ false │ false │ true │ | |
| └─────────────────┴───────────────────┴───────────────────┴───────┘ | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment