TarVK/SimpleArithmeticParser.js

TarVK · 2020-08-12T12:36:03Z

And finally a version that creates an AST first and evaluates after,
allowing for false && console.log('not logged') to work properly.

/*******************
 * All helper code *
 *******************/
/**
 * @typedef {Object} Token
 * @property {string} type The type of token
 * @property {string} value The textual value of the token
 * @property {number} index The index of the token in the input
 */

/**
 * Tokenizes the input text using the given rules
 * @param {string} text The text to tokenize
 * @param {{[key: string]: RegExp}} rules The rules
 * @returns {Token[]} The tokens
 */
function tokenize(text, rules) {
  const ruleNames = Object.keys(rules);
  let index = 0;
  const tokens = [];
  outer: while (text.length > 0) {
    for (let i = 0; i < ruleNames.length; i++) {
      const ruleName = ruleNames[i];
      const rule = rules[ruleName];

      // Try to match the rule
      const match = rule.exec(text);
      if (match && match.index == 0) {
        // If the rule matches, stORe the token
        tokens.push({ type: ruleName, value: match[0], index });

        // Remove the text, AND continue tokenizing the remaining text
        text = text.substring(match[0].length);
        index += match[0].length;
        continue outer;
      }
    }

    // If no rule matches the text, throw some error
    throw Error("Unexpected token " + text[0] + " at index " + index);
  }
  return tokens;
}

/**
 * @typedef {Object} ASTNode
 * @property {string} type The node type
 * @property {(ASTNode|string)[]} children The children of the node
 */
/**
 * Walks the given AST tree AND obtains a result using the given rules
 * @param {ASTNode|Token} tree The tree to walk
 * @param {{[type: string]: (children: any[], map)=>any}} rules The rules to apply fOR each node to obtain a result
 * @returns {any} The result obtained from the tree
 */
function walkTree(tree, rules) {
  if ("value" in tree) return tree.value;
  const rule = rules[tree.type];
  return rule
    ? rule(tree.children, (node) => walkTree(node, rules))
    : tree.children;
}

/**
 * Creates an AST node
 * @param {string} type The type
 * @param {(ASTNode|Token)[]} children The children
 * @returns {ASTNode} The new node
 */
function node(type, children) {
  return {
    type,
    children,
  };
}

/**
 * Removes the given token type from the list of tokens
 * @param {Token[]} tokens The input tokens
 * @param {string} rem The token type to remove
 * @returns {Token[]} The tokens with the given type removed
 */
function removeTokens(tokens, rem) {
  return tokens.filter(({ type }) => type != rem);
}

// The tokens that are analyzed
let tokens;
// Variables accessible to the text being evaluated
let vars;

/**
 * Consumes a token
 * @param {string} token The token to consume
 * @throws If the expected token wasn't found
 * @returns {string} The value of the token
 */
function consume(token) {
  const firstToken = tokens.shift(); // Get the first token
  if (!firstToken || firstToken.type != token)
    throw Error(
      "Unexpected token, expected '" +
        token +
        "' but found '" +
        firstToken.type +
        "' at index " +
        firstToken.index
    );
  return firstToken;
}

/**
 * Checks whether the first token is of the given type
 * @param {string} token The token that is expected
 * @returns {boolean} Whether the expected token was found
 */
function peek(token) {
  return tokens[0] && tokens[0].type == token;
}

/**
 * Combines peek and consume, consuming a token only if matched, without throwing an error if not
 * @param {string} token The token that is expected
 * @returns {false|string} Whether the expected token was found
 */
function optConsume(token) {
  const matched = tokens[0] && tokens[0].type == token;
  if (matched) {
    return consume(token);
  }
  return false;
}

/** A replacement fOR the undefined constant, to not clash with js undefined */
const Undefined = Symbol("undefined");

/***********************************
 * All the lexer and grammar rules *
 ***********************************/
const lexer = {
  LEFTBRACKET: /\(/,
  RIGHTBRACKET: /\)/,
  LEFTSQUAREBRACKET: /\[/,
  RIGHTSQUAREBRACKET: /\]/,
  LEFTCURLYBRACKET: /\{/,
  RIGHTCURLYBRACKET: /\}/,
  COLON: /\:/,
  QUESTIONMARK: /\?/,
  NUMBER: /\d*\.?\d+/,
  BOOLEAN: /(true|false)(?!\w)/,
  LITERAL: /\w+/,
  ADD: /\+/,
  SUBTRACT: /\-/,
  MULTIPLY: /\*/,
  DIVIDE: /\//,
  DOT: /\./,
  COMMA: /\,/,
  SPACE: /\s+/,
  STRING: /(\"(\\\\|\\\"|[^"])*\")|(\'(\\\\|\\\'|[^'])*\')/,
  LESSTHANEQUAL: /<=/,
  GREATERTHANEQUAL: />=/,
  LESSTHAN: /</,
  GREATERTHAN: />/,
  STRICTEQUAL: /===/,
  EQUAL: /==/,
  STRICTNOTEQUAL: /!==/,
  NOTEQUAL: /!=/,
  NOT: /!/,
  OR: /\|\|/,
  AND: /&&/,
};

function expression() {
  let res = logic();
  if (optConsume("QUESTIONMARK")) {
    const t = expression();
    consume("COLON");
    const f = expression();
    res = node("inlineIf", [res, t, f]);
  }
  return res;
}

function logic() {
  let res = clause();
  let loop = true;
  do {
    if (optConsume("OR")) {
      res = node("or", [res, clause()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function clause() {
  let res = subClause();
  let loop = true;
  do {
    if (optConsume("AND")) {
      res = node("and", [res, subClause()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function subClause() {
  let res = comparee();
  outer: while (true) {
    let options = [
      "LESSTHAN",
      "LESSTHANEQUAL",
      "GREATERTHAN",
      "GREATERTHANEQUAL",
      "EQUAL",
      "NOTEQUAL",
      "STRICTEQUAL",
      "STRICTNOTEQUAL",
    ];
    for (let option of options) {
      if (optConsume(option)) {
        res = node(option.toLowerCase(), [res, comparee()]);
        continue outer;
      }
    }
    break outer;
  }
  return res;
}

function comparee() {
  let res = term();
  let loop = true;
  do {
    if (optConsume("ADD")) {
      res = node("add", [res, term()]);
    } else if (optConsume("SUBTRACT")) {
      res = node("subtract", [res, term()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function term() {
  let res = factor();
  let loop = true;
  do {
    if (optConsume("MULTIPLY")) {
      res = node("multiply", [res, factor()]);
    } else if (optConsume("DIVIDE")) {
      res = node("divide", [res, factor()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function factor() {
  let res;

  let invert = undefined;
  while (optConsume("NOT")) invert = !invert;

  if (peek("NUMBER")) {
    res = node("number", [consume("NUMBER")]);
  } else if (peek("LITERAL")) {
    res = node("var", [consume("LITERAL")]);
  } else if (peek("STRING")) {
    res = node("string", [consume("STRING")]);
  } else if (peek("BOOLEAN")) {
    res = node("boolean", [consume("BOOLEAN")]);
  } else if (peek("LEFTSQUAREBRACKET")) {
    res = array();
  } else if (peek("LEFTCURLYBRACKET")) {
    res = object();
  } else {
    consume("LEFTBRACKET");
    res = expression();
    consume("RIGHTBRACKET");
  }

  if (invert) res = node("not", [res]);
  if (invert == false) res = node("not", [node("NOT", [res])]); // invert defaults to undefined

  return manyAccessors(res);
}

function manyAccessors(value) {
  let res = value;
  let loop = true;
  do {
    let newRes = Undefined;
    if (newRes == Undefined) newRes = optProperty(res);
    if (newRes == Undefined) newRes = optIndexer(res);
    if (newRes == Undefined) newRes = optCall(res);

    if (newRes != Undefined) {
      res = newRes;
    } else {
      loop = false;
    }
  } while (loop);

  return res;
}

function optProperty(value) {
  if (optConsume("DOT")) {
    return node("property", [value, consume("LITERAL")]);
  }
  return Undefined;
}

function optIndexer(value) {
  if (optConsume("LEFTSQUAREBRACKET")) {
    const res = node("property", [value, expression()]);
    consume("RIGHTSQUAREBRACKET");
    return res;
  }
  return Undefined;
}

function optCall(value) {
  if (optConsume("LEFTBRACKET")) {
    let args = [];
    while (!peek("RIGHTBRACKET")) {
      if (args.length > 0) consume("COMMA");
      args.push(expression());
    }
    let res = node("call", [value, args]);
    consume("RIGHTBRACKET");
    return res;
  }
  return Undefined;
}

function array() {
  let res = [];
  consume("LEFTSQUAREBRACKET");
  while (!peek("RIGHTSQUAREBRACKET")) {
    if (res.length > 0) consume("COMMA");
    res.push(expression());
  }
  consume("RIGHTSQUAREBRACKET");
  return node("array", [res]);
}

function object() {
  let res = [];
  let first = true;
  consume("LEFTCURLYBRACKET");
  while (!peek("RIGHTCURLYBRACKET")) {
    if (!first) consume("COMMA");

    let name;
    if (optConsume("LEFTSQUAREBRACKET")) {
      name = expression();
      consume("RIGHTSQUAREBRACKET");
    } else if (peek("STRING")) {
      name = node("string", [consume("STRING")]);
    } else {
      name = consume("LITERAL");
    }
    consume("COLON");

    res.push([name, expression()]);
    first = false;
  }
  consume("RIGHTCURLYBRACKET");
  return node("object", [res]);
}

const visitor = {
  inlineIf: ([c, t, f], r) => (r(c) ? r(t) : r(f)),
  or: ([a, b], r) => r(a) || r(b),
  and: ([a, b], r) => r(a) && r(b),
  not: ([a], r) => !r(a),
  lessthan: ([a, b], r) => r(a) < r(b),
  lessthanequal: ([a, b], r) => r(a) <= r(b),
  greaterthan: ([a, b], r) => r(a) > r(b),
  greaterthanequal: ([a, b], r) => r(a) >= r(b),
  equal: ([a, b], r) => r(a) == r(b),
  strictequal: ([a, b], r) => r(a) === r(b),
  notequal: ([a, b], r) => r(a) != r(b),
  strictnotequal: ([a, b], r) => r(a) !== r(b),
  add: ([a, b], r) => r(a) + r(b),
  subtract: ([a, b], r) => r(a) - r(b),
  multiply: ([a, b], r) => r(a) * r(b),
  divide: ([a, b], r) => r(a) / r(b),
  literal: ([v]) => v,
  property: ([v, p], r) => {
    const value = r(v);
    const prop = value[r(p)];
    return prop instanceof Function ? prop.bind(value) : prop;
  },
  call: ([v, args], r) => r(v)(...args.map((arg) => r(arg))),
  string: ([v], r) => {
    const stringInp = r(v);
    return stringInp.substring(1, stringInp.length - 1).replace(/\\(.)/g, "$1");
  },
  number: ([v], r) => parseFloat(r(v)),
  boolean: ([v], r) => r(v) == "true",
  var: ([v], r) => vars[r(v)],
  array: ([items], r) => items.map((item) => r(item)),
  object: ([props], r) => {
    const res = {};
    props.forEach(([key, value]) => (res[r(key)] = r(value)));
    return res;
  },
};

/******************************************
 * Usages of lexer + parser + interpreter *
 ******************************************/
vars = {
  console: console,
  someObj: {
    someVal: 56,
    someAr: [34, "yes"],
  },
};
tokens = tokenize(
  `console.log({
    sum: someObj.someAr[0] + someObj.someVal, 
    toString1: 2+1.toString(), 
    toString2: (2+2).toString() + " \\\\\\"hoi", 
    logic: 34 > 19 && !false, 
    negation: !34,
    doubleNegation: !!34,
    dynamicIndexing: {
      [23+"test"]: false
    },
    array: [34, 45*7],
    inlineIf: true ? false ? "notThis" : "this": "NotThisEither",
    AndBreak: false && console.log('noCall'),
    noAndBreak: true && console.log('call'),
    "potatoes": 3
  })`,
  lexer
);
tokens = removeTokens(tokens, "SPACE");
let result = walkTree(expression(), visitor);
console.log(result);

TarVK/SimpleArithmeticParser.js

TarVK commented Aug 12, 2020 •

edited

Loading

Uh oh!

	/*******************
	* All helper code *
	*******************/

	/**
	* Tokenizes the input text using the given rules
	* @param {string} text The text to tokenize
	* @param {{[key: string]: RegExp}} rules The rules
	* @returns {{type: string, value: string, index: number}[]} The tokens
	*/
	function tokenize(text, rules) {
	const ruleNames = Object.keys(rules);
	let index = 0;
	const tokens = [];
	outer: while (text.length > 0) {
	for (let i = 0; i < ruleNames.length; i++) {
	const ruleName = ruleNames[i];
	const rule = rules[ruleName];

	// Try to match the rule
	const match = rule.exec(text);
	if (match && match.index == 0) {
	// If the rule matches, store the token
	tokens.push({ type: ruleName, value: match[0], index });

	// Remove the text, and continue tokenizing the remaining text
	text = text.substring(match[0].length);
	index += match[0].length;
	continue outer;
	}
	}

	// If no rule matches the text, throw some error
	throw Error("Unexpected token " + text[0] + " at index " + index);
	}
	return tokens;
	}

	// The tokens that are analyzed
	let tokens;

	/**
	* Consumes a token
	* @param {string} token The token to consume
	* @throws If the expected token wasn't found
	* @returns {string} The value of the token
	*/
	function consume(token) {
	const firstToken = tokens.shift(); // Get the first token
	if (!firstToken \|\| firstToken.type != token)
	throw Error(
	"Unexpected token, found: " + firstToken.type + " but expected: " + token
	);
	return firstToken.value;
	}

	/**
	* Checks whether the first token is of the given type
	* @param {string} token The token that is expected
	* @returns {boolean} Whether the expected token was found
	*/
	function peek(token) {
	return tokens[0] && tokens[0].type == token;
	}

	/**
	* Combines peek and consume, consuming a token only if matched, without throwing an error if not
	* @param {string} token The token that is expected
	* @returns {false\|string} Whether the expected token was found
	*/
	function optConsume(token) {
	const matched = tokens[0] && tokens[0].type == token;
	if (matched) {
	return consume(token);
	}
	return false;
	}

	/***********************************
	* All the lexer and grammar rules *
	***********************************/
	const lexer = {
	lBracket: /\(/,
	rBracket: /\)/,
	value: /\d*\.?\d+/,
	add: /\+/,
	sub: /\-/,
	mul: /\*/,
	div: /\//,
	};

	function expression() {
	let res = term();
	let loop = true;
	do {
	if (optConsume("add")) {
	res += term();
	} else if (optConsume("sub")) {
	res -= term();
	} else {
	loop = false;
	}
	} while (loop);
	return res;
	}

	function term() {
	let res = factor();
	let loop = true;
	do {
	if (optConsume("mul")) {
	res *= factor();
	} else if (optConsume("div")) {
	res /= factor();
	} else {
	loop = false;
	}
	} while (loop);
	return res;
	}

	function factor() {
	let res;
	if (peek("value")) {
	res = parseFloat(consume("value"));
	} else {
	consume("lBracket");
	res = expression();
	consume("rBracket");
	}
	return res;
	}

	tokens = tokenize("38/2(2+2+3)", lexer);
	let result = expression();
	console.log(result);

TarVK/SimpleArithmeticParser.js

TarVK commented Aug 12, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

TarVK commented Aug 12, 2020 •

edited

Loading