Last active
December 28, 2021 03:14
-
-
Save nmanumr/46aa0df92a57045f95302b7fb13ea4d0 to your computer and use it in GitHub Desktop.
CFG
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <list> | |
#include <unordered_map> | |
#include <set> | |
using namespace std; | |
#ifndef CC_CFG_H | |
#define CC_CFG_H | |
enum TokenType { | |
TERMINAL, NON_TERMINAL, EPSILON | |
}; | |
struct Token { | |
string name; | |
TokenType type; | |
}; | |
typedef list<Token> Rule; | |
struct StandaloneRule { | |
string nonTerminal; | |
list<Token> rule; | |
}; | |
struct CFG { | |
string startSymbol; | |
unordered_map<string, list<Rule>> rules; | |
}; | |
void print(const string &name, const list<Rule> &rules) { | |
cout << name << " -> "; | |
for (const auto &rule : rules) { | |
for (const auto &token: rule) | |
cout << token.name; | |
if (&rule != &rules.back()) cout << endl << " | "; | |
} | |
} | |
void print(CFG cfg) { | |
print(cfg.startSymbol, cfg.rules[cfg.startSymbol]); | |
for (const auto &rule: cfg.rules) { | |
if (rule.first == cfg.startSymbol) continue; | |
print(rule.first, rule.second); | |
} | |
} | |
set<string> getFirstSet(CFG cfg, const string &symbol, set<string> path) { | |
if (path.count(symbol) == 1) return {}; | |
else path.insert(symbol); | |
auto rules = cfg.rules[symbol]; | |
set<string> firstSet; | |
for (auto rule: rules) { | |
for (const auto &token: rule) { | |
if (token.type == TokenType::TERMINAL) { | |
firstSet.insert(token.name); | |
break; | |
} | |
if (token.type == TokenType::NON_TERMINAL) { | |
set<string> tokens = getFirstSet(cfg, token.name, path); | |
auto hasEpsilon = tokens.count("ε") == 1; | |
tokens.erase("ε"); | |
firstSet.insert(tokens.begin(), tokens.end()); | |
if (!hasEpsilon) break; | |
if (&token == &rule.back()) firstSet.insert("ε"); | |
} else if (token.type == TokenType::EPSILON && &token == &rule.back()) | |
firstSet.insert("ε"); | |
} | |
} | |
return firstSet; | |
} | |
set<string> getFirstSet(const CFG& cfg, const string &symbol) { | |
return getFirstSet(cfg, symbol, {}); | |
} | |
set<string> getFollowSet(const CFG& cfg, const string &symbol, set<string> path) { | |
if (path.count(symbol) == 1) return {}; | |
else path.insert(symbol); | |
set<string> followSet; | |
for (const auto &rules: cfg.rules) { | |
for (const auto &rule: rules.second) { | |
bool found = false; | |
for (const auto &token: rule) { | |
if (!found) { | |
if (token.name == symbol) found = true; | |
continue; | |
} | |
if (token.type == TokenType::TERMINAL) { | |
followSet.insert(token.name); | |
break; | |
} | |
if (token.type == TokenType::NON_TERMINAL) { | |
set<string> firstSet = getFirstSet(cfg, token.name); | |
auto hasEpsilon = firstSet.count("ε") == 1; | |
firstSet.erase("ε"); | |
followSet.insert(firstSet.begin(), firstSet.end()); | |
if (!hasEpsilon) break; | |
continue; | |
} | |
if (&token == &rule.back()) { | |
auto _set = getFollowSet(cfg, rules.first, path); | |
followSet.insert(_set.begin(), _set.end()); | |
} | |
} | |
} | |
} | |
if (symbol == cfg.startSymbol) | |
followSet.insert("$"); | |
return followSet; | |
} | |
set<string> getFollowSet(const CFG& cfg, const string &symbol) { | |
return getFollowSet(cfg, symbol, {}); | |
} | |
#endif //CC_CFG_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <list> | |
#include <set> | |
#include <unordered_map> | |
#include <utility> | |
#include "cfg.h" | |
using namespace std; | |
#ifndef CC_LL1_H | |
#define CC_LL1_H | |
struct Ll1Table { | |
set<string> terminals; | |
set<string> nonTerminals; | |
unordered_map<string, StandaloneRule> table; | |
}; | |
void addRule(Ll1Table &table, const string &terminal, const string &nonTerminal, const StandaloneRule &rule) { | |
table.terminals.insert(terminal); | |
table.nonTerminals.insert(nonTerminal); | |
table.table[nonTerminal + "-" + terminal] = rule; | |
} | |
StandaloneRule *getRule(Ll1Table table, const string &terminal, const string &nonTerminal) { | |
if (table.table.count(nonTerminal + "-" + terminal) == 1) { | |
return &table.table[nonTerminal + "-" + terminal]; | |
} | |
return nullptr; | |
} | |
Ll1Table toLl1Table(const CFG &cfg) { | |
Ll1Table table; | |
table.terminals.insert("$"); | |
for (const auto &rules: cfg.rules) { | |
string nonTerminal = rules.first; | |
for (Rule rule: rules.second) { | |
set<string> set; | |
if (rule.front().type == TokenType::TERMINAL) { | |
set.insert(rule.front().name); | |
} else if (rule.front().type == TokenType::NON_TERMINAL) { | |
auto firstSet = getFirstSet(cfg, rule.front().name); | |
set.insert(firstSet.begin(), firstSet.end()); | |
} else { | |
auto followSet = getFollowSet(cfg, rule.front().name); | |
set.insert(followSet.begin(), followSet.end()); | |
} | |
for (const auto &terminal: set) { | |
addRule(table, terminal, nonTerminal, {nonTerminal, rule}); | |
} | |
for (const auto &token: rule) { | |
if (token.type == TokenType::NON_TERMINAL) | |
table.nonTerminals.insert(token.name); | |
else | |
table.terminals.insert(token.name); | |
} | |
} | |
} | |
return table; | |
} | |
void print(const Ll1Table &table) { | |
list<string> nonTerminals(table.nonTerminals.begin(), table.nonTerminals.end()); | |
for (const auto &nonTerminal: nonTerminals) { | |
if (nonTerminal == nonTerminals.front()) | |
cout << " " << "\t\t\t\t | "; | |
else | |
cout << nonTerminal << "\t\t\t\t | "; | |
for (const auto &terminal: table.terminals) { | |
if (nonTerminal == nonTerminals.front()) { | |
cout << terminal << "\t\t\t\t | "; | |
} else { | |
auto rule = getRule(table, terminal, nonTerminal); | |
if (rule != nullptr) { | |
print(rule->nonTerminal, {rule->rule}); | |
cout << "\t\t\t | "; | |
} | |
else | |
cout << " \t\t\t\t | "; | |
} | |
} | |
cout << endl; | |
} | |
} | |
#endif //CC_CFG_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cfg.h" | |
#include "ll1.h" | |
using namespace std; | |
int main() { | |
Rule r1 = { | |
{.name = "id", .type = TokenType::TERMINAL}, | |
}; | |
CFG cfg = { | |
.startSymbol = "S", | |
.rules = { | |
{"S", { | |
{ | |
{.name = "A", .type = TokenType::NON_TERMINAL}, | |
{.name = "B", .type = TokenType::NON_TERMINAL}, | |
}, | |
}}, | |
{"A", { | |
{ | |
{.name = "F", .type = TokenType::NON_TERMINAL}, | |
{.name = "C", .type = TokenType::NON_TERMINAL}, | |
}, { | |
{.name = "ε", .type = TokenType::EPSILON}, | |
} | |
}}, | |
{"B", { | |
{ | |
{.name = "+", .type = TokenType::TERMINAL}, | |
{.name = "A", .type = TokenType::NON_TERMINAL}, | |
{.name = "B", .type = TokenType::NON_TERMINAL}, | |
}, { | |
{.name = "ε", .type = TokenType::EPSILON}, | |
} | |
}}, | |
{"C", { | |
{ | |
{.name = "*", .type = TokenType::TERMINAL}, | |
{.name = "F", .type = TokenType::NON_TERMINAL}, | |
{.name = "C", .type = TokenType::NON_TERMINAL}, | |
}, { | |
{.name = "ε", .type = TokenType::EPSILON}, | |
} | |
}}, | |
{"F", { | |
r1, { | |
{.name = "(", .type = TokenType::TERMINAL}, | |
{.name = "S", .type = TokenType::NON_TERMINAL}, | |
{.name = ")", .type = TokenType::TERMINAL}, | |
} | |
}}, | |
}, | |
}; | |
// print(cfg); | |
// | |
// set<string> firstSet = getFirstSet(cfg, "A"); | |
// for (const auto& token: firstSet) | |
// cout << token << ", "; | |
// | |
// set<string> followSet = getFollowSet(cfg, "A"); | |
// for (const auto& token: firstSet) | |
// cout << token << ", "; | |
auto table = toLl1Table(cfg); | |
print(table); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment