Created
August 11, 2020 23:41
-
-
Save TarVK/3782aca1299a0fc15032941343fbf973 to your computer and use it in GitHub Desktop.
Wrote a very simple arithmetic parser in javascript to test the general techique
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/******************* | |
* All helper code * | |
*******************/ | |
/** | |
* Tokenizes the input text using the given rules | |
* @param {string} text The text to tokenize | |
* @param {{[key: string]: RegExp}} rules The rules | |
* @returns {{type: string, value: string, index: number}[]} The tokens | |
*/ | |
function tokenize(text, rules) { | |
const ruleNames = Object.keys(rules); | |
let index = 0; | |
const tokens = []; | |
outer: while (text.length > 0) { | |
for (let i = 0; i < ruleNames.length; i++) { | |
const ruleName = ruleNames[i]; | |
const rule = rules[ruleName]; | |
// Try to match the rule | |
const match = rule.exec(text); | |
if (match && match.index == 0) { | |
// If the rule matches, store the token | |
tokens.push({ type: ruleName, value: match[0], index }); | |
// Remove the text, and continue tokenizing the remaining text | |
text = text.substring(match[0].length); | |
index += match[0].length; | |
continue outer; | |
} | |
} | |
// If no rule matches the text, throw some error | |
throw Error("Unexpected token " + text[0] + " at index " + index); | |
} | |
return tokens; | |
} | |
// The tokens that are analyzed | |
let tokens; | |
/** | |
* Consumes a token | |
* @param {string} token The token to consume | |
* @throws If the expected token wasn't found | |
* @returns {string} The value of the token | |
*/ | |
function consume(token) { | |
const firstToken = tokens.shift(); // Get the first token | |
if (!firstToken || firstToken.type != token) | |
throw Error( | |
"Unexpected token, found: " + firstToken.type + " but expected: " + token | |
); | |
return firstToken.value; | |
} | |
/** | |
* Checks whether the first token is of the given type | |
* @param {string} token The token that is expected | |
* @returns {boolean} Whether the expected token was found | |
*/ | |
function peek(token) { | |
return tokens[0] && tokens[0].type == token; | |
} | |
/** | |
* Combines peek and consume, consuming a token only if matched, without throwing an error if not | |
* @param {string} token The token that is expected | |
* @returns {false|string} Whether the expected token was found | |
*/ | |
function optConsume(token) { | |
const matched = tokens[0] && tokens[0].type == token; | |
if (matched) { | |
return consume(token); | |
} | |
return false; | |
} | |
/*********************************** | |
* All the lexer and grammar rules * | |
***********************************/ | |
const lexer = { | |
lBracket: /\(/, | |
rBracket: /\)/, | |
value: /\d*\.?\d+/, | |
add: /\+/, | |
sub: /\-/, | |
mul: /\*/, | |
div: /\//, | |
}; | |
function expression() { | |
let res = term(); | |
let loop = true; | |
do { | |
if (optConsume("add")) { | |
res += term(); | |
} else if (optConsume("sub")) { | |
res -= term(); | |
} else { | |
loop = false; | |
} | |
} while (loop); | |
return res; | |
} | |
function term() { | |
let res = factor(); | |
let loop = true; | |
do { | |
if (optConsume("mul")) { | |
res *= factor(); | |
} else if (optConsume("div")) { | |
res /= factor(); | |
} else { | |
loop = false; | |
} | |
} while (loop); | |
return res; | |
} | |
function factor() { | |
let res; | |
if (peek("value")) { | |
res = parseFloat(consume("value")); | |
} else { | |
consume("lBracket"); | |
res = expression(); | |
consume("rBracket"); | |
} | |
return res; | |
} | |
tokens = tokenize("3*8/2*(2+2+3)", lexer); | |
let result = expression(); | |
console.log(result); |
And finally a version that creates an AST first and evaluates after,
allowing for false && console.log('not logged')
to work properly.
/*******************
* All helper code *
*******************/
/**
* @typedef {Object} Token
* @property {string} type The type of token
* @property {string} value The textual value of the token
* @property {number} index The index of the token in the input
*/
/**
* Tokenizes the input text using the given rules
* @param {string} text The text to tokenize
* @param {{[key: string]: RegExp}} rules The rules
* @returns {Token[]} The tokens
*/
function tokenize(text, rules) {
const ruleNames = Object.keys(rules);
let index = 0;
const tokens = [];
outer: while (text.length > 0) {
for (let i = 0; i < ruleNames.length; i++) {
const ruleName = ruleNames[i];
const rule = rules[ruleName];
// Try to match the rule
const match = rule.exec(text);
if (match && match.index == 0) {
// If the rule matches, stORe the token
tokens.push({ type: ruleName, value: match[0], index });
// Remove the text, AND continue tokenizing the remaining text
text = text.substring(match[0].length);
index += match[0].length;
continue outer;
}
}
// If no rule matches the text, throw some error
throw Error("Unexpected token " + text[0] + " at index " + index);
}
return tokens;
}
/**
* @typedef {Object} ASTNode
* @property {string} type The node type
* @property {(ASTNode|string)[]} children The children of the node
*/
/**
* Walks the given AST tree AND obtains a result using the given rules
* @param {ASTNode|Token} tree The tree to walk
* @param {{[type: string]: (children: any[], map)=>any}} rules The rules to apply fOR each node to obtain a result
* @returns {any} The result obtained from the tree
*/
function walkTree(tree, rules) {
if ("value" in tree) return tree.value;
const rule = rules[tree.type];
return rule
? rule(tree.children, (node) => walkTree(node, rules))
: tree.children;
}
/**
* Creates an AST node
* @param {string} type The type
* @param {(ASTNode|Token)[]} children The children
* @returns {ASTNode} The new node
*/
function node(type, children) {
return {
type,
children,
};
}
/**
* Removes the given token type from the list of tokens
* @param {Token[]} tokens The input tokens
* @param {string} rem The token type to remove
* @returns {Token[]} The tokens with the given type removed
*/
function removeTokens(tokens, rem) {
return tokens.filter(({ type }) => type != rem);
}
// The tokens that are analyzed
let tokens;
// Variables accessible to the text being evaluated
let vars;
/**
* Consumes a token
* @param {string} token The token to consume
* @throws If the expected token wasn't found
* @returns {string} The value of the token
*/
function consume(token) {
const firstToken = tokens.shift(); // Get the first token
if (!firstToken || firstToken.type != token)
throw Error(
"Unexpected token, expected '" +
token +
"' but found '" +
firstToken.type +
"' at index " +
firstToken.index
);
return firstToken;
}
/**
* Checks whether the first token is of the given type
* @param {string} token The token that is expected
* @returns {boolean} Whether the expected token was found
*/
function peek(token) {
return tokens[0] && tokens[0].type == token;
}
/**
* Combines peek and consume, consuming a token only if matched, without throwing an error if not
* @param {string} token The token that is expected
* @returns {false|string} Whether the expected token was found
*/
function optConsume(token) {
const matched = tokens[0] && tokens[0].type == token;
if (matched) {
return consume(token);
}
return false;
}
/** A replacement fOR the undefined constant, to not clash with js undefined */
const Undefined = Symbol("undefined");
/***********************************
* All the lexer and grammar rules *
***********************************/
const lexer = {
LEFTBRACKET: /\(/,
RIGHTBRACKET: /\)/,
LEFTSQUAREBRACKET: /\[/,
RIGHTSQUAREBRACKET: /\]/,
LEFTCURLYBRACKET: /\{/,
RIGHTCURLYBRACKET: /\}/,
COLON: /\:/,
QUESTIONMARK: /\?/,
NUMBER: /\d*\.?\d+/,
BOOLEAN: /(true|false)(?!\w)/,
LITERAL: /\w+/,
ADD: /\+/,
SUBTRACT: /\-/,
MULTIPLY: /\*/,
DIVIDE: /\//,
DOT: /\./,
COMMA: /\,/,
SPACE: /\s+/,
STRING: /(\"(\\\\|\\\"|[^"])*\")|(\'(\\\\|\\\'|[^'])*\')/,
LESSTHANEQUAL: /<=/,
GREATERTHANEQUAL: />=/,
LESSTHAN: /</,
GREATERTHAN: />/,
STRICTEQUAL: /===/,
EQUAL: /==/,
STRICTNOTEQUAL: /!==/,
NOTEQUAL: /!=/,
NOT: /!/,
OR: /\|\|/,
AND: /&&/,
};
function expression() {
let res = logic();
if (optConsume("QUESTIONMARK")) {
const t = expression();
consume("COLON");
const f = expression();
res = node("inlineIf", [res, t, f]);
}
return res;
}
function logic() {
let res = clause();
let loop = true;
do {
if (optConsume("OR")) {
res = node("or", [res, clause()]);
} else {
loop = false;
}
} while (loop);
return res;
}
function clause() {
let res = subClause();
let loop = true;
do {
if (optConsume("AND")) {
res = node("and", [res, subClause()]);
} else {
loop = false;
}
} while (loop);
return res;
}
function subClause() {
let res = comparee();
outer: while (true) {
let options = [
"LESSTHAN",
"LESSTHANEQUAL",
"GREATERTHAN",
"GREATERTHANEQUAL",
"EQUAL",
"NOTEQUAL",
"STRICTEQUAL",
"STRICTNOTEQUAL",
];
for (let option of options) {
if (optConsume(option)) {
res = node(option.toLowerCase(), [res, comparee()]);
continue outer;
}
}
break outer;
}
return res;
}
function comparee() {
let res = term();
let loop = true;
do {
if (optConsume("ADD")) {
res = node("add", [res, term()]);
} else if (optConsume("SUBTRACT")) {
res = node("subtract", [res, term()]);
} else {
loop = false;
}
} while (loop);
return res;
}
function term() {
let res = factor();
let loop = true;
do {
if (optConsume("MULTIPLY")) {
res = node("multiply", [res, factor()]);
} else if (optConsume("DIVIDE")) {
res = node("divide", [res, factor()]);
} else {
loop = false;
}
} while (loop);
return res;
}
function factor() {
let res;
let invert = undefined;
while (optConsume("NOT")) invert = !invert;
if (peek("NUMBER")) {
res = node("number", [consume("NUMBER")]);
} else if (peek("LITERAL")) {
res = node("var", [consume("LITERAL")]);
} else if (peek("STRING")) {
res = node("string", [consume("STRING")]);
} else if (peek("BOOLEAN")) {
res = node("boolean", [consume("BOOLEAN")]);
} else if (peek("LEFTSQUAREBRACKET")) {
res = array();
} else if (peek("LEFTCURLYBRACKET")) {
res = object();
} else {
consume("LEFTBRACKET");
res = expression();
consume("RIGHTBRACKET");
}
if (invert) res = node("not", [res]);
if (invert == false) res = node("not", [node("NOT", [res])]); // invert defaults to undefined
return manyAccessors(res);
}
function manyAccessors(value) {
let res = value;
let loop = true;
do {
let newRes = Undefined;
if (newRes == Undefined) newRes = optProperty(res);
if (newRes == Undefined) newRes = optIndexer(res);
if (newRes == Undefined) newRes = optCall(res);
if (newRes != Undefined) {
res = newRes;
} else {
loop = false;
}
} while (loop);
return res;
}
function optProperty(value) {
if (optConsume("DOT")) {
return node("property", [value, consume("LITERAL")]);
}
return Undefined;
}
function optIndexer(value) {
if (optConsume("LEFTSQUAREBRACKET")) {
const res = node("property", [value, expression()]);
consume("RIGHTSQUAREBRACKET");
return res;
}
return Undefined;
}
function optCall(value) {
if (optConsume("LEFTBRACKET")) {
let args = [];
while (!peek("RIGHTBRACKET")) {
if (args.length > 0) consume("COMMA");
args.push(expression());
}
let res = node("call", [value, args]);
consume("RIGHTBRACKET");
return res;
}
return Undefined;
}
function array() {
let res = [];
consume("LEFTSQUAREBRACKET");
while (!peek("RIGHTSQUAREBRACKET")) {
if (res.length > 0) consume("COMMA");
res.push(expression());
}
consume("RIGHTSQUAREBRACKET");
return node("array", [res]);
}
function object() {
let res = [];
let first = true;
consume("LEFTCURLYBRACKET");
while (!peek("RIGHTCURLYBRACKET")) {
if (!first) consume("COMMA");
let name;
if (optConsume("LEFTSQUAREBRACKET")) {
name = expression();
consume("RIGHTSQUAREBRACKET");
} else if (peek("STRING")) {
name = node("string", [consume("STRING")]);
} else {
name = consume("LITERAL");
}
consume("COLON");
res.push([name, expression()]);
first = false;
}
consume("RIGHTCURLYBRACKET");
return node("object", [res]);
}
const visitor = {
inlineIf: ([c, t, f], r) => (r(c) ? r(t) : r(f)),
or: ([a, b], r) => r(a) || r(b),
and: ([a, b], r) => r(a) && r(b),
not: ([a], r) => !r(a),
lessthan: ([a, b], r) => r(a) < r(b),
lessthanequal: ([a, b], r) => r(a) <= r(b),
greaterthan: ([a, b], r) => r(a) > r(b),
greaterthanequal: ([a, b], r) => r(a) >= r(b),
equal: ([a, b], r) => r(a) == r(b),
strictequal: ([a, b], r) => r(a) === r(b),
notequal: ([a, b], r) => r(a) != r(b),
strictnotequal: ([a, b], r) => r(a) !== r(b),
add: ([a, b], r) => r(a) + r(b),
subtract: ([a, b], r) => r(a) - r(b),
multiply: ([a, b], r) => r(a) * r(b),
divide: ([a, b], r) => r(a) / r(b),
literal: ([v]) => v,
property: ([v, p], r) => {
const value = r(v);
const prop = value[r(p)];
return prop instanceof Function ? prop.bind(value) : prop;
},
call: ([v, args], r) => r(v)(...args.map((arg) => r(arg))),
string: ([v], r) => {
const stringInp = r(v);
return stringInp.substring(1, stringInp.length - 1).replace(/\\(.)/g, "$1");
},
number: ([v], r) => parseFloat(r(v)),
boolean: ([v], r) => r(v) == "true",
var: ([v], r) => vars[r(v)],
array: ([items], r) => items.map((item) => r(item)),
object: ([props], r) => {
const res = {};
props.forEach(([key, value]) => (res[r(key)] = r(value)));
return res;
},
};
/******************************************
* Usages of lexer + parser + interpreter *
******************************************/
vars = {
console: console,
someObj: {
someVal: 56,
someAr: [34, "yes"],
},
};
tokens = tokenize(
`console.log({
sum: someObj.someAr[0] + someObj.someVal,
toString1: 2+1.toString(),
toString2: (2+2).toString() + " \\\\\\"hoi",
logic: 34 > 19 && !false,
negation: !34,
doubleNegation: !!34,
dynamicIndexing: {
[23+"test"]: false
},
array: [34, 45*7],
inlineIf: true ? false ? "notThis" : "this": "NotThisEither",
AndBreak: false && console.log('noCall'),
noAndBreak: true && console.log('call'),
"potatoes": 3
})`,
lexer
);
tokens = removeTokens(tokens, "SPACE");
let result = walkTree(expression(), visitor);
console.log(result);
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also made a more complete js expressions parser.
Unfortunately, due to the lack of a syntax tree creation step,
true || console.log("test")
will still log test instead of canceling