Created
March 31, 2020 22:01
-
-
Save nyorain/38c4dc9c57a08041e2ef23e12526e4ed to your computer and use it in GitHub Desktop.
Simple parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "parser.hpp" | |
#include <vector> | |
#include <memory> | |
#include <string> | |
#include <variant> | |
#include <iostream> | |
template<typename ...Ts> | |
struct Visitor : Ts... { | |
Visitor(const Ts&... args) : Ts(args)... {} | |
using Ts::operator()...; | |
}; | |
std::string dump(const Expression& expr) { | |
// oh wow, this almost looks like proper programming | |
// guess C++ has pattern matching after all, eh? | |
return std::visit(Visitor{ | |
[](bool val) { return std::to_string(val); }, | |
[](double val) { return std::to_string(val); }, | |
[](std::string_view val) { return std::string(val); }, | |
[](const Identifier& id) { return std::string(id.name); }, | |
[](const List& list) { | |
std::string ret = "("; | |
auto first = true; | |
for(auto& arg : list.values) { | |
if(!first) { | |
ret += " "; | |
} | |
first = false; | |
ret += dump(arg); | |
} | |
ret += ")"; | |
return ret; | |
} | |
}, expr.value); | |
} | |
int main() { | |
std::string source = R"SRC( | |
(define nat-fold (func (n accum func) ( | |
let ((body (rec-func (n accum) ( | |
if (eq n 0) | |
accum | |
(rec (- n 1) (func accum n)) | |
)))) | |
(body n accum) | |
))) | |
)SRC"; | |
Parser parser{source}; | |
skipws(parser); | |
while(!parser.source.empty()) { | |
auto expr = nextExpression(parser); | |
std::cout << dump(expr) << "\n"; | |
skipws(parser); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "parser.hpp" | |
#include <stdexcept> | |
void skipws(std::string_view& source, Location& loc) { | |
while(!source.empty() && (std::isspace(source[0]) || source[0] == ';')) { | |
if(source[0] == ';') { // comment | |
auto n = source.find('\n'); | |
if(n == source.npos) { | |
// empty the string | |
auto rem = source.length(); | |
source = source.substr(rem); | |
loc.col += rem; | |
return; | |
} | |
++loc.row; | |
loc.col = 0; | |
source = source.substr(n + 1); | |
continue; | |
} else if(source[0] == '\n') { | |
++loc.row; | |
loc.col = 0; | |
} else { | |
++loc.col; | |
} | |
source = source.substr(1); | |
} | |
} | |
void skipws(Parser& parser) { | |
skipws(parser.source, parser.loc); | |
} | |
std::string_view consume(std::string_view& source, unsigned n, Location& loc) { | |
auto token = source.substr(0, n); | |
source = source.substr(n); | |
loc.col += n; | |
return token; | |
} | |
void throwError(std::string msg, const Location& loc) { | |
msg = std::to_string(loc.row) + ":" + std::to_string(loc.col) + ": " + msg; | |
throw std::runtime_error(msg); | |
} | |
Expression nextExpression(std::string_view& view, Location& loc) { | |
if(view.empty()) { | |
throwError("Empty expression (unexpected source end)", loc); | |
} | |
skipws(view, loc); | |
auto oloc = loc; | |
// 1: test whether it's a number | |
const char* end = &*view.end(); | |
double value = std::strtod(view.data(), const_cast<char**>(&end)); | |
if(end != view.data()) { | |
auto count = end - view.data(); | |
view = view.substr(count); | |
loc.col += count; | |
return {value, oloc}; | |
} | |
// 2: test whether it's a string | |
if(view[0] == '"') { | |
consume(view, 1, loc); // skip initial " | |
auto end = view.find('"'); | |
if(end == view.npos) { | |
throwError("Unterminated '\"'", oloc); | |
} | |
auto str = consume(view, end, loc); | |
consume(view, 1, loc); // skip final " | |
return {str, oloc}; | |
} | |
// 3: test if it's application | |
if(view[0] == '(') { | |
consume(view, 1, loc); | |
++loc.depth; | |
skipws(view, loc); | |
List list; | |
while(!view.empty() && view[0] != ')') { | |
auto e = nextExpression(view, loc); | |
list.values.push_back(e); | |
skipws(view, loc); | |
} | |
--loc.depth; | |
if(view.empty()) { | |
throwError("Unterminted '('", oloc); | |
} | |
if(view[0] != ')') { | |
throwError("Invalid termination of expression", loc); | |
} | |
consume(view, 1, loc); | |
return {list, loc}; | |
} | |
// otherwise it's an identifier | |
auto term = view.find_first_of("\n\t\r\v\f ()"); | |
if(term == view.npos) { | |
throwError("Invalid expression", oloc); | |
} | |
auto name = consume(view, term, loc); | |
if(name == "true") { | |
return {true, oloc}; | |
} else if(name == "false") { | |
return {false, oloc}; | |
} | |
return {Identifier{name}, oloc}; | |
} | |
Expression nextExpression(Parser& p) { | |
return nextExpression(p.source, p.loc); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <string> | |
#include <variant> | |
#include <vector> | |
struct Expression; | |
struct Location { | |
unsigned row {0}; | |
unsigned col {0}; | |
unsigned depth {0}; | |
}; | |
struct List { | |
std::vector<Expression> values; | |
}; | |
struct Identifier { | |
std::string_view name; | |
}; | |
struct Expression { | |
std::variant<bool, double, std::string_view, List, Identifier> value; | |
Location loc; | |
}; | |
struct Parser { | |
std::string_view source; | |
Location loc {}; | |
}; | |
void skipws(Parser& p); | |
Expression nextExpression(Parser& p); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Arguably one of the most simple parsers of a full-featured language. Parses some simple LISP-like language that can either be used to represent code but also as a simple data representation language (with a lot of parentheses, still more beautiful than something like XML though).