Created
August 11, 2020 23:41
-
-
Save TarVK/3782aca1299a0fc15032941343fbf973 to your computer and use it in GitHub Desktop.
Wrote a very simple arithmetic parser in javascript to test the general techique
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/******************* | |
* All helper code * | |
*******************/ | |
/** | |
* Tokenizes the input text using the given rules | |
* @param {string} text The text to tokenize | |
* @param {{[key: string]: RegExp}} rules The rules | |
* @returns {{type: string, value: string, index: number}[]} The tokens | |
*/ | |
function tokenize(text, rules) { | |
const ruleNames = Object.keys(rules); | |
let index = 0; | |
const tokens = []; | |
outer: while (text.length > 0) { | |
for (let i = 0; i < ruleNames.length; i++) { | |
const ruleName = ruleNames[i]; | |
const rule = rules[ruleName]; | |
// Try to match the rule | |
const match = rule.exec(text); | |
if (match && match.index == 0) { | |
// If the rule matches, store the token | |
tokens.push({ type: ruleName, value: match[0], index }); | |
// Remove the text, and continue tokenizing the remaining text | |
text = text.substring(match[0].length); | |
index += match[0].length; | |
continue outer; | |
} | |
} | |
// If no rule matches the text, throw some error | |
throw Error("Unexpected token " + text[0] + " at index " + index); | |
} | |
return tokens; | |
} | |
// The tokens that are analyzed | |
let tokens; | |
/** | |
* Consumes a token | |
* @param {string} token The token to consume | |
* @throws If the expected token wasn't found | |
* @returns {string} The value of the token | |
*/ | |
function consume(token) { | |
const firstToken = tokens.shift(); // Get the first token | |
if (!firstToken || firstToken.type != token) | |
throw Error( | |
"Unexpected token, found: " + firstToken.type + " but expected: " + token | |
); | |
return firstToken.value; | |
} | |
/** | |
* Checks whether the first token is of the given type | |
* @param {string} token The token that is expected | |
* @returns {boolean} Whether the expected token was found | |
*/ | |
function peek(token) { | |
return tokens[0] && tokens[0].type == token; | |
} | |
/** | |
* Combines peek and consume, consuming a token only if matched, without throwing an error if not | |
* @param {string} token The token that is expected | |
* @returns {false|string} Whether the expected token was found | |
*/ | |
function optConsume(token) { | |
const matched = tokens[0] && tokens[0].type == token; | |
if (matched) { | |
return consume(token); | |
} | |
return false; | |
} | |
/*********************************** | |
* All the lexer and grammar rules * | |
***********************************/ | |
const lexer = { | |
lBracket: /\(/, | |
rBracket: /\)/, | |
value: /\d*\.?\d+/, | |
add: /\+/, | |
sub: /\-/, | |
mul: /\*/, | |
div: /\//, | |
}; | |
function expression() { | |
let res = term(); | |
let loop = true; | |
do { | |
if (optConsume("add")) { | |
res += term(); | |
} else if (optConsume("sub")) { | |
res -= term(); | |
} else { | |
loop = false; | |
} | |
} while (loop); | |
return res; | |
} | |
function term() { | |
let res = factor(); | |
let loop = true; | |
do { | |
if (optConsume("mul")) { | |
res *= factor(); | |
} else if (optConsume("div")) { | |
res /= factor(); | |
} else { | |
loop = false; | |
} | |
} while (loop); | |
return res; | |
} | |
function factor() { | |
let res; | |
if (peek("value")) { | |
res = parseFloat(consume("value")); | |
} else { | |
consume("lBracket"); | |
res = expression(); | |
consume("rBracket"); | |
} | |
return res; | |
} | |
tokens = tokenize("3*8/2*(2+2+3)", lexer); | |
let result = expression(); | |
console.log(result); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
And finally a version that creates an AST first and evaluates after,
allowing for
false && console.log('not logged')
to work properly.