From f80543edad79489cf4cb3f81247bbd7d461d2457 Mon Sep 17 00:00:00 2001 From: Sameer Tantry Date: Wed, 15 Dec 2021 22:00:56 +0300 Subject: [PATCH 1/6] Build NFA and ParsingTable --- Grammar.cpp | 66 +++++++++++++++++++ Grammar.hpp | 65 +++++++++++++++++++ LR0.cpp | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++ LR0.hpp | 54 ++++++++++++++++ Parser.hpp | 10 +++ 5 files changed, 373 insertions(+) create mode 100644 Grammar.cpp create mode 100644 Grammar.hpp create mode 100644 LR0.cpp create mode 100644 LR0.hpp create mode 100644 Parser.hpp diff --git a/Grammar.cpp b/Grammar.cpp new file mode 100644 index 0000000..adec413 --- /dev/null +++ b/Grammar.cpp @@ -0,0 +1,66 @@ +#include "Grammar.hpp" + +Rule::Rule(const string& s) { + size_t sep = s.find(rule_separator); + if (sep == string::npos) { + throw std::runtime_error("Incorrect rule!"); + } + auto start = s.substr(0, sep); + if (start.size() > 1 || start.size() == 0) { + throw std::runtime_error("Incorrect left side for Context-free grammar!"); + } + left_ = start[0]; + right_ = s.substr(sep + rule_separator.size(), s.size()); +} + +char Rule::GetLeft() const { return left_; } + +string Rule::GetRight() const { return right_; } + +bool Rule::operator<(const Rule& other) const { + if (left_ == other.left_) { + return right_ < other.right_; + } + return left_ < other.left_; +} + +bool Rule::operator==(const Rule& other) const { + return !(*this < other) && !(other < *this); +} + +Grammar::Grammar(const set& alphabet) : alp_(alphabet) {} + +Grammar::Grammar(const vector& rules) : alp_(alp), rules_(rules) { + for (const auto& rule : rules_) { + nonterms_.insert(rule.GetLeft()); + } +} + +Grammar::Grammar(char start, const vector& rules, + const set& alphabet) + : Grammar(rules) { + start_ = start; +} + +const vector& Grammar::GetRules() const { return rules_; } + +const set& Grammar::GetAlp() const { return alp_; } + +const set& Grammar::GetNonTerms() const { return nonterms_; } + +void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); } + +vector Grammar::RulesWithLeftSide(char left) const { + vector v; + for (const Rule& rule : rules_) { + if (rule.GetLeft() == left) { + v.emplace_back(rule); + } + } + return v; +} + +void Grammar::SetStart(char c) { start_ = c; } + +char Grammar::GetStart() const { return start_; } + diff --git a/Grammar.hpp b/Grammar.hpp new file mode 100644 index 0000000..38d6baf --- /dev/null +++ b/Grammar.hpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +using std::cin; +using std::cout; +using std::map; +using std::set; +using std::string; +using std::vector; + +const string rule_separator = "->"; +const string mock_non_term_str = "1"; +const char mock_non_term = '1'; +const set alp = {'a', 'b', 'c'}; + +class Rule { + public: + Rule() = default; + Rule(const Rule&) = default; + Rule(const string&); + + Rule& operator=(const Rule&) = default; + + char GetLeft() const; + string GetRight() const; + + bool operator<(const Rule&) const; + bool operator==(const Rule&) const; + + private: + public: + char left_; + string right_ = ""; +}; + +class Grammar { + public: + Grammar() = default; + Grammar(const set& alphabet = alp); + Grammar(const vector& rules); + Grammar(char start_, const vector& rules, + const set& alphabet = alp); + Grammar(const Grammar&) = default; + + Grammar& operator=(const Grammar&) = default; + + const vector& GetRules() const; + const set& GetAlp() const; + const set& GetNonTerms() const; + void AddRule(const Rule&); + void AddLetter(char c) { alp_.insert(c); } + vector RulesWithLeftSide(char) const; + void SetStart(char); + char GetStart() const; + + private: + set alp_; + set nonterms_; + char start_; + vector rules_; +}; + diff --git a/LR0.cpp b/LR0.cpp new file mode 100644 index 0000000..2f704cd --- /dev/null +++ b/LR0.cpp @@ -0,0 +1,178 @@ +#include "LR0.hpp" + +LR0::LR0(const Grammar& grammar) : gr_(grammar) { + gr_.AddRule(Rule(mock_non_term_str + rule_separator + gr_.GetStart())); + gr_.SetStart(mock_non_term); + gr_.AddLetter(mock_non_term); + BuildNFA(); + BuildTable(); +} + +set LR0::Closure(const set& sit_set) { + set new_set = sit_set; + int64_t prev_sz = -1; + while (prev_sz != new_set.size()) { + prev_sz = new_set.size(); + for (const auto& sit : new_set) { + char symb = sit.GetSymbol(); + if (!gr_.GetNonTerms().contains(symb)) { + continue; + } + for (const auto& rule : gr_.GetRules()) { + if (rule.GetLeft() == symb) { + new_set.emplace(rule, 0); + } + } + } + } + return new_set; +} + +LR0::Situation::Situation(const Rule& rule, size_t pos) + : rule_(rule), dot_pos_(pos) {} + +set LR0::GoTo(const set& sit_set, char c) { + set new_set; + for (const auto& sit : sit_set) { + if (sit.GetSymbol() == c) { + new_set.emplace(sit.GetRule(), sit.GetDotPos() + 1); + } + } + return Closure(new_set); +} + +bool LR0::Situation::operator<(const Situation& other) const { + /*if (rule_ == other.rule_) { + return dot_pos_ < other.dot_pos_; + } + return rule_ < other.rule_;*/ + if (rule_.left_ < other.rule_.left_) { + return true; + } + if (rule_.left_ > other.rule_.left_) { + return false; + } + if (rule_.right_ < other.rule_.right_) { + return true; + } + if (rule_.right_ > other.rule_.right_) { + return false; + } + if (dot_pos_ < other.dot_pos_) { + return true; + } + if (dot_pos_ > other.dot_pos_) { + return false; + } + return false; +} + +bool LR0::Situation::operator==(const Situation& other) const { + return !(*this < other) && !(other < *this); +} + +bool LR0::Situation::operator!=(const Situation& other) const { + return !(*this == other); +} + +bool LR0::Situation::Completed() const { + return dot_pos_ == rule_.GetRight().size(); +} + +char LR0::Situation::GetSymbol() const { + if (Completed()) { + return '\0'; + } + return rule_.GetRight()[dot_pos_]; +} + +const Rule& LR0::Situation::GetRule() const { return rule_; } + +size_t LR0::Situation::GetDotPos() const { return dot_pos_; } + +void LR0::BuildNFA() { + nfa_.alp.insert(gr_.GetAlp().begin(), gr_.GetAlp().end()); + nfa_.alp.insert(gr_.GetNonTerms().begin(), gr_.GetNonTerms().end()); + nfa_.start = 0; + nfa_.q.resize(0); + int64_t prev_sz = -1; + nfa_.q.push_back(Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); + nfa_.delta.push_back(map()); + while (prev_sz != nfa_.q.size()) { + prev_sz = nfa_.q.size(); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (!new_state.empty() && find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { + nfa_.q.push_back(new_state); + } + } + } + } + nfa_.delta.resize(nfa_.q.size()); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (new_state.empty()) { + nfa_.delta[i][c] = -1; + continue; + } + nfa_.delta[i][c] = find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); + } + } + for (size_t i = 0; i < nfa_.q.size(); ++i) { + bool is_term = true; + for (const auto& sit : nfa_.q[i]) { + if (!sit.Completed()) { + is_term = false; + break; + } + } + if (is_term) { + nfa_.term_states.insert(i); + } + } +} + +void LR0::BuildTable() { + tb_.assign(nfa_.q.size(), + vector>(nfa_.alp.size() + 1, { '\0', -1 })); + l_.resize(nfa_.alp.size() + 1); + l_[0] = '$'; + for (size_t i = 0; i < nfa_.q.size(); ++i) { + size_t j = 1; + for (auto a : gr_.GetAlp()) { + int64_t num = nfa_.delta[i][a]; + if (num != -1) { + tb_[i][j] = { 's', num }; + } + l_[j] = a; + ++j; + } + for (auto A : gr_.GetNonTerms()) { + int64_t num = nfa_.delta[i][A]; + if (num != -1) { + tb_[i][j] = { 't', num }; + } + l_[j] = A; + ++j; + } + } + auto win_sit = Situation(gr_.RulesWithLeftSide(mock_non_term)[0], 1); + auto win_state = Closure({ win_sit }); + int64_t win_state_index = find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); + tb_[win_state_index][0] = win; + + for (size_t i : nfa_.term_states) { + if (i != win_state_index) { + auto sit = *(nfa_.q[i].begin()); + auto rule = sit.GetRule(); + auto f = find(gr_.GetRules().begin(), gr_.GetRules().end(), rule); + int64_t rule_number = f - gr_.GetRules().begin(); + for (size_t j = 0; j <= gr_.GetAlp().size(); ++j) { + tb_[i][j] = { 'r', rule_number }; + } + } + } +} + diff --git a/LR0.hpp b/LR0.hpp new file mode 100644 index 0000000..8f81fa2 --- /dev/null +++ b/LR0.hpp @@ -0,0 +1,54 @@ +#include "Parser.hpp" + +class LR0 : public Parser { + public: + LR0() = delete; + LR0(const Grammar&); + + bool Recognize(const string& w, Grammar grammar) override {return true;} + //bool Recognize(const string& w); + + ~LR0() {} + + private: + class Situation { + public: + Situation() = delete; + Situation(const Rule&, size_t); + + bool operator==(const Situation&) const; + bool operator!=(const Situation&) const; + bool operator<(const Situation&) const; + + bool Completed() const; + char GetSymbol() const; + const Rule& GetRule() const; + size_t GetDotPos() const; + + private: + Rule rule_; + size_t dot_pos_; + }; + class NFA { + public: + NFA() = default; + set alp; + vector> q; + set term_states; + size_t start; + vector> delta; + }; + Grammar gr_; + NFA nfa_; + vector>> tb_; + const std::pair win = { 'w', 1 }; + vector l_; + void BuildTable(); + void BuildNFA(); + + void ReBuild(const Grammar&); + + set Closure(const set&); + set GoTo(const set&, char); +}; + diff --git a/Parser.hpp b/Parser.hpp new file mode 100644 index 0000000..430c191 --- /dev/null +++ b/Parser.hpp @@ -0,0 +1,10 @@ +#include "Grammar.hpp" + +class Parser { + public: + virtual bool Recognize(const string& w, Grammar grammar) = 0; + virtual ~Parser() = default; +}; + +class LR0; + From 32b7a2236abcc5a9d62763964c222aadf7a30abf Mon Sep 17 00:00:00 2001 From: Sameer Tantry Date: Wed, 15 Dec 2021 22:06:50 +0300 Subject: [PATCH 2/6] README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e69de29..c506a24 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,3 @@ +LR(0) Parser +In: Grammar and set of words +Out: True/False From 869a9199352313711f42c80aeaaf1961da3bc7e8 Mon Sep 17 00:00:00 2001 From: samthingswrong <74266834+samthingswrong@users.noreply.github.com> Date: Wed, 15 Dec 2021 22:07:37 +0300 Subject: [PATCH 3/6] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c506a24..5b7469e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ LR(0) Parser + In: Grammar and set of words + Out: True/False From a7068fd705485a7d08b93845259af721edb34489 Mon Sep 17 00:00:00 2001 From: Sameer Tantry Date: Wed, 15 Dec 2021 23:40:30 +0300 Subject: [PATCH 4/6] Recognize implementation --- Grammar.cpp | 1 - Grammar.hpp | 1 - LR0.cpp | 51 +++++++++++++++++++++++++++++++++++++++++---------- LR0.hpp | 13 ++++++------- Parser.hpp | 1 - 5 files changed, 47 insertions(+), 20 deletions(-) diff --git a/Grammar.cpp b/Grammar.cpp index adec413..3c15d48 100644 --- a/Grammar.cpp +++ b/Grammar.cpp @@ -63,4 +63,3 @@ vector Grammar::RulesWithLeftSide(char left) const { void Grammar::SetStart(char c) { start_ = c; } char Grammar::GetStart() const { return start_; } - diff --git a/Grammar.hpp b/Grammar.hpp index 38d6baf..a9209f6 100644 --- a/Grammar.hpp +++ b/Grammar.hpp @@ -62,4 +62,3 @@ class Grammar { char start_; vector rules_; }; - diff --git a/LR0.cpp b/LR0.cpp index 2f704cd..82eda71 100644 --- a/LR0.cpp +++ b/LR0.cpp @@ -96,14 +96,16 @@ void LR0::BuildNFA() { nfa_.start = 0; nfa_.q.resize(0); int64_t prev_sz = -1; - nfa_.q.push_back(Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); + nfa_.q.push_back( + Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); nfa_.delta.push_back(map()); while (prev_sz != nfa_.q.size()) { prev_sz = nfa_.q.size(); for (size_t i = 0; i < nfa_.q.size(); ++i) { for (char c : nfa_.alp) { auto new_state = GoTo(nfa_.q[i], c); - if (!new_state.empty() && find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { + if (!new_state.empty() && + find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { nfa_.q.push_back(new_state); } } @@ -117,7 +119,8 @@ void LR0::BuildNFA() { nfa_.delta[i][c] = -1; continue; } - nfa_.delta[i][c] = find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); + nfa_.delta[i][c] = + find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); } } for (size_t i = 0; i < nfa_.q.size(); ++i) { @@ -136,7 +139,7 @@ void LR0::BuildNFA() { void LR0::BuildTable() { tb_.assign(nfa_.q.size(), - vector>(nfa_.alp.size() + 1, { '\0', -1 })); + vector>(nfa_.alp.size() + 1, {'\0', -1})); l_.resize(nfa_.alp.size() + 1); l_[0] = '$'; for (size_t i = 0; i < nfa_.q.size(); ++i) { @@ -144,7 +147,7 @@ void LR0::BuildTable() { for (auto a : gr_.GetAlp()) { int64_t num = nfa_.delta[i][a]; if (num != -1) { - tb_[i][j] = { 's', num }; + tb_[i][j] = {'s', num}; } l_[j] = a; ++j; @@ -152,15 +155,16 @@ void LR0::BuildTable() { for (auto A : gr_.GetNonTerms()) { int64_t num = nfa_.delta[i][A]; if (num != -1) { - tb_[i][j] = { 't', num }; + tb_[i][j] = {'t', num}; } l_[j] = A; ++j; } } auto win_sit = Situation(gr_.RulesWithLeftSide(mock_non_term)[0], 1); - auto win_state = Closure({ win_sit }); - int64_t win_state_index = find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); + auto win_state = Closure({win_sit}); + int64_t win_state_index = + find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); tb_[win_state_index][0] = win; for (size_t i : nfa_.term_states) { @@ -168,11 +172,38 @@ void LR0::BuildTable() { auto sit = *(nfa_.q[i].begin()); auto rule = sit.GetRule(); auto f = find(gr_.GetRules().begin(), gr_.GetRules().end(), rule); - int64_t rule_number = f - gr_.GetRules().begin(); + int64_t rule_number = f - gr_.GetRules().begin(); for (size_t j = 0; j <= gr_.GetAlp().size(); ++j) { - tb_[i][j] = { 'r', rule_number }; + tb_[i][j] = {'r', rule_number}; } } } } +bool LR0::Recognize(string& w) { + w += '$'; + stack st; + st.push("0"); + size_t i = 0; + while (i <= w.size()) { + int64_t cur_st = stoi(st.top()); + if (tb_[cur_st][num_[w[i]]].first == 's') { + st.push("" + w[i]); + st.push(std::to_string(tb_[cur_st][num_[w[i]]].second)); + ++i; + } else if (tb_[cur_st][num_[w[i]]] == win) { + return true; + } else if (tb_[cur_st][num_[w[i]]].first == 'r') { + Rule rule = gr_.GetRules()[tb_[cur_st][num_[w[i]]].second]; + for (size_t k = 0; k < rule.GetRight().size(); ++k) { + st.pop(), st.pop(); + } + int64_t prev_st = stoi(st.top()); + st.push("" + rule.GetLeft()); + st.push(std::to_string(tb_[prev_st][num_[rule.GetLeft()]].second)); + } else { + return false; + } + } + return false; +} diff --git a/LR0.hpp b/LR0.hpp index 8f81fa2..3cc8be8 100644 --- a/LR0.hpp +++ b/LR0.hpp @@ -5,14 +5,14 @@ class LR0 : public Parser { LR0() = delete; LR0(const Grammar&); - bool Recognize(const string& w, Grammar grammar) override {return true;} - //bool Recognize(const string& w); + bool Recognize(const string& w, Grammar grammar) override { return true; } + // bool Recognize(const string& w); ~LR0() {} private: class Situation { - public: + public: Situation() = delete; Situation(const Rule&, size_t); @@ -25,12 +25,12 @@ class LR0 : public Parser { const Rule& GetRule() const; size_t GetDotPos() const; - private: + private: Rule rule_; size_t dot_pos_; }; class NFA { - public: + public: NFA() = default; set alp; vector> q; @@ -41,7 +41,7 @@ class LR0 : public Parser { Grammar gr_; NFA nfa_; vector>> tb_; - const std::pair win = { 'w', 1 }; + const std::pair win = {'w', 1}; vector l_; void BuildTable(); void BuildNFA(); @@ -51,4 +51,3 @@ class LR0 : public Parser { set Closure(const set&); set GoTo(const set&, char); }; - diff --git a/Parser.hpp b/Parser.hpp index 430c191..a112321 100644 --- a/Parser.hpp +++ b/Parser.hpp @@ -7,4 +7,3 @@ class Parser { }; class LR0; - From fadc874196d0abeefce961381a25b8dfe80b8b25 Mon Sep 17 00:00:00 2001 From: Sameer Tantry Date: Thu, 16 Dec 2021 04:30:16 +0300 Subject: [PATCH 5/6] Tests --- .github/workflows/Test.yml | 24 +++++ headers/Grammar.hpp | 66 ++++++++++++ headers/LR0.hpp | 55 ++++++++++ headers/Parser.hpp | 9 ++ src/Grammar.cpp | 65 +++++++++++ src/LR0.cpp | 216 +++++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 18 ++++ test/test.cpp | 93 ++++++++++++++++ 8 files changed, 546 insertions(+) create mode 100644 .github/workflows/Test.yml create mode 100644 headers/Grammar.hpp create mode 100644 headers/LR0.hpp create mode 100644 headers/Parser.hpp create mode 100644 src/Grammar.cpp create mode 100644 src/LR0.cpp create mode 100644 test/CMakeLists.txt create mode 100644 test/test.cpp diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml new file mode 100644 index 0000000..864d9ef --- /dev/null +++ b/.github/workflows/Test.yml @@ -0,0 +1,24 @@ +name: TestEarley + +on: + push: + branches: [ master, LR0 ] + pull_request: + branches: [ master, LR0 ] +jobs: + Build-ubuntu: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: "Build and run tests" + run : | + git clone https://github.com/google/googletest + cd googletest + cd googletest + cmake .. + make + sudo make install + cd ../../ + cmake test + make + ./TestLR diff --git a/headers/Grammar.hpp b/headers/Grammar.hpp new file mode 100644 index 0000000..08719dc --- /dev/null +++ b/headers/Grammar.hpp @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include + +using std::cin; +using std::cout; +using std::map; +using std::set; +using std::string; +using std::vector; +using std::stack; + +const string rule_separator = "->"; +const string mock_non_term_str = "1"; +const char mock_non_term = '1'; +const set alp = {'a', 'b', 'c'}; + +class Rule { + public: + Rule() = default; + Rule(const Rule&) = default; + Rule(const string&); + + Rule& operator=(const Rule&) = default; + + char GetLeft() const; + string GetRight() const; + + bool operator<(const Rule&) const; + bool operator==(const Rule&) const; + + private: + public: + char left_; + string right_ = ""; +}; + +class Grammar { + public: + Grammar() = default; + Grammar(const set& alphabet = alp); + Grammar(const vector& rules); + Grammar(char start_, const vector& rules, + const set& alphabet = alp); + Grammar(const Grammar&) = default; + + Grammar& operator=(const Grammar&) = default; + + const vector& GetRules() const; + const set& GetAlp() const; + const set& GetNonTerms() const; + void AddRule(const Rule&); + void AddLetter(char c) { alp_.insert(c); } + vector RulesWithLeftSide(char) const; + void SetStart(char); + char GetStart() const; + + private: + set alp_; + set nonterms_; + char start_; + vector rules_; +}; diff --git a/headers/LR0.hpp b/headers/LR0.hpp new file mode 100644 index 0000000..e4128c3 --- /dev/null +++ b/headers/LR0.hpp @@ -0,0 +1,55 @@ +#include "Parser.hpp" + +class LR0 : public Parser { + public: + LR0() = delete; + LR0(const Grammar&); + + bool Recognize(const string& w, Grammar grammar) override { return true; } + bool Recognize(string& w); + + ~LR0() {} + + private: + class Situation { + public: + Situation() = delete; + Situation(const Rule&, size_t); + + bool operator==(const Situation&) const; + bool operator!=(const Situation&) const; + bool operator<(const Situation&) const; + + bool Completed() const; + char GetSymbol() const; + const Rule& GetRule() const; + size_t GetDotPos() const; + + private: + Rule rule_; + size_t dot_pos_; + }; + class NFA { + public: + NFA() = default; + set alp; + vector> q; + set term_states; + size_t start; + vector> delta; + }; + Grammar gr_; + NFA nfa_; + vector>> tb_; + const std::pair win = { 'w', 1 }; + vector l_; + map num_; + void BuildTable(); + void BuildNFA(); + + void ReBuild(const Grammar&); + + set Closure(const set&); + set GoTo(const set&, char); +}; + diff --git a/headers/Parser.hpp b/headers/Parser.hpp new file mode 100644 index 0000000..a112321 --- /dev/null +++ b/headers/Parser.hpp @@ -0,0 +1,9 @@ +#include "Grammar.hpp" + +class Parser { + public: + virtual bool Recognize(const string& w, Grammar grammar) = 0; + virtual ~Parser() = default; +}; + +class LR0; diff --git a/src/Grammar.cpp b/src/Grammar.cpp new file mode 100644 index 0000000..7d5e64b --- /dev/null +++ b/src/Grammar.cpp @@ -0,0 +1,65 @@ +#include "../headers/Grammar.hpp" + +Rule::Rule(const string& s) { + size_t sep = s.find(rule_separator); + if (sep == string::npos) { + throw std::runtime_error("Incorrect rule!"); + } + auto start = s.substr(0, sep); + if (start.size() > 1 || start.size() == 0) { + throw std::runtime_error("Incorrect left side for Context-free grammar!"); + } + left_ = start[0]; + right_ = s.substr(sep + rule_separator.size(), s.size()); +} + +char Rule::GetLeft() const { return left_; } + +string Rule::GetRight() const { return right_; } + +bool Rule::operator<(const Rule& other) const { + if (left_ == other.left_) { + return right_ < other.right_; + } + return left_ < other.left_; +} + +bool Rule::operator==(const Rule& other) const { + return !(*this < other) && !(other < *this); +} + +Grammar::Grammar(const set& alphabet) : alp_(alphabet) {} + +Grammar::Grammar(const vector& rules) : alp_(alp), rules_(rules) { + for (const auto& rule : rules_) { + nonterms_.insert(rule.GetLeft()); + } +} + +Grammar::Grammar(char start, const vector& rules, + const set& alphabet) + : Grammar(rules) { + start_ = start; +} + +const vector& Grammar::GetRules() const { return rules_; } + +const set& Grammar::GetAlp() const { return alp_; } + +const set& Grammar::GetNonTerms() const { return nonterms_; } + +void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); } + +vector Grammar::RulesWithLeftSide(char left) const { + vector v; + for (const Rule& rule : rules_) { + if (rule.GetLeft() == left) { + v.emplace_back(rule); + } + } + return v; +} + +void Grammar::SetStart(char c) { start_ = c; } + +char Grammar::GetStart() const { return start_; } diff --git a/src/LR0.cpp b/src/LR0.cpp new file mode 100644 index 0000000..21c68ce --- /dev/null +++ b/src/LR0.cpp @@ -0,0 +1,216 @@ +#include "../headers/LR0.hpp" + +LR0::LR0(const Grammar& grammar) : gr_(grammar) { + gr_.AddRule(Rule(mock_non_term_str + rule_separator + gr_.GetStart())); + gr_.SetStart(mock_non_term); + gr_.AddLetter(mock_non_term); + BuildNFA(); + BuildTable(); +} + +void LR0::ReBuild(const Grammar& grammar) { + gr_ = grammar; + BuildNFA(); + BuildTable(); +} + +set LR0::Closure(const set& sit_set) { + set new_set = sit_set; + int64_t prev_sz = -1; + while (prev_sz != new_set.size()) { + prev_sz = new_set.size(); + for (const auto& sit : new_set) { + char symb = sit.GetSymbol(); + if (!gr_.GetNonTerms().contains(symb)) { + continue; + } + for (const auto& rule : gr_.GetRules()) { + if (rule.GetLeft() == symb) { + new_set.emplace(rule, 0); + } + } + } + } + return new_set; +} + +LR0::Situation::Situation(const Rule& rule, size_t pos) + : rule_(rule), dot_pos_(pos) {} + +set LR0::GoTo(const set& sit_set, char c) { + set new_set; + for (const auto& sit : sit_set) { + if (sit.GetSymbol() == c) { + new_set.emplace(sit.GetRule(), sit.GetDotPos() + 1); + } + } + return Closure(new_set); +} + +bool LR0::Situation::operator<(const Situation& other) const { + /*if (rule_ == other.rule_) { + return dot_pos_ < other.dot_pos_; + } + return rule_ < other.rule_;*/ + if (rule_.left_ < other.rule_.left_) { + return true; + } + if (rule_.left_ > other.rule_.left_) { + return false; + } + if (rule_.right_ < other.rule_.right_) { + return true; + } + if (rule_.right_ > other.rule_.right_) { + return false; + } + if (dot_pos_ < other.dot_pos_) { + return true; + } + if (dot_pos_ > other.dot_pos_) { + return false; + } + return false; +} + +bool LR0::Situation::operator==(const Situation& other) const { + return !(*this < other) && !(other < *this); +} + +bool LR0::Situation::operator!=(const Situation& other) const { + return !(*this == other); +} + +bool LR0::Situation::Completed() const { + return dot_pos_ == rule_.GetRight().size(); +} + +char LR0::Situation::GetSymbol() const { + if (Completed()) { + return '\0'; + } + return rule_.GetRight()[dot_pos_]; +} + +const Rule& LR0::Situation::GetRule() const { return rule_; } + +size_t LR0::Situation::GetDotPos() const { return dot_pos_; } + +void LR0::BuildNFA() { + nfa_.alp.insert(gr_.GetAlp().begin(), gr_.GetAlp().end()); + nfa_.alp.insert(gr_.GetNonTerms().begin(), gr_.GetNonTerms().end()); + nfa_.start = 0; + nfa_.q.resize(0); + int64_t prev_sz = -1; + nfa_.q.push_back(Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); + nfa_.delta.push_back(map()); + while (prev_sz != nfa_.q.size()) { + prev_sz = nfa_.q.size(); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (!new_state.empty() && find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { + nfa_.q.push_back(new_state); + } + } + } + } + nfa_.delta.resize(nfa_.q.size()); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (new_state.empty()) { + nfa_.delta[i][c] = -1; + continue; + } + nfa_.delta[i][c] = find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); + } + } + for (size_t i = 0; i < nfa_.q.size(); ++i) { + bool is_term = true; + for (const auto& sit : nfa_.q[i]) { + if (!sit.Completed()) { + is_term = false; + break; + } + } + if (is_term) { + nfa_.term_states.insert(i); + } + } +} + +void LR0::BuildTable() { + tb_.assign(nfa_.q.size(), + vector>(nfa_.alp.size() + 1, { '\0', -1 })); + l_.resize(nfa_.alp.size() + 1); + l_[0] = '$'; + for (size_t i = 0; i < nfa_.q.size(); ++i) { + size_t j = 1; + for (auto a : gr_.GetAlp()) { + int64_t num = nfa_.delta[i][a]; + if (num != -1) { + tb_[i][j] = { 's', num }; + } + l_[j] = a; + ++j; + } + for (auto A : gr_.GetNonTerms()) { + int64_t num = nfa_.delta[i][A]; + if (num != -1) { + tb_[i][j] = { 't', num }; + } + l_[j] = A; + ++j; + } + } + auto win_sit = Situation(gr_.RulesWithLeftSide(mock_non_term)[0], 1); + auto win_state = Closure({ win_sit }); + int64_t win_state_index = find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); + tb_[win_state_index][0] = win; + + for (size_t i : nfa_.term_states) { + if (i != win_state_index) { + auto sit = *(nfa_.q[i].begin()); + auto rule = sit.GetRule(); + auto f = find(gr_.GetRules().begin(), gr_.GetRules().end(), rule); + int64_t rule_number = f - gr_.GetRules().begin(); + for (size_t j = 0; j <= gr_.GetAlp().size(); ++j) { + tb_[i][j] = { 'r', rule_number }; + } + } + } + for(size_t i = 0; i < l_.size(); ++i) { + num_[l_[i]] = i; + } +} + +bool LR0::Recognize(string& w) { + w += '$'; + stack st; + st.push("0"); + size_t i = 0; + while (i <= w.size()) { + int64_t cur_st = stoi(st.top()); + if (tb_[cur_st][num_[w[i]]].first == 's') { + st.push("" + w[i]); + st.push(std::to_string(tb_[cur_st][num_[w[i]]].second)); + ++i; + } else if (tb_[cur_st][num_[w[i]]] == win) { + return true; + } else if (tb_[cur_st][num_[w[i]]].first == 'r') { + Rule rule = gr_.GetRules()[tb_[cur_st][num_[w[i]]].second]; + for (size_t k = 0; k < rule.GetRight().size(); ++k) { + st.pop(), st.pop(); + } + int64_t prev_st = stoi(st.top()); + st.push("" + rule.GetLeft()); + st.push(std::to_string(tb_[prev_st][num_[rule.GetLeft()]].second)); + } else { + return false; + } + } + return false; +} + + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..78af40f --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.17) +project(TestLR) + +set(CMAKE_CXX_STANDARD 20) + +find_package(Threads REQUIRED) +find_package(GTest REQUIRED) + +add_executable(TestLR "../src/Grammar.cpp" "../headers/Grammar.hpp" "../src/LR0.cpp" "../headers/LR0.hpp" "../headers/Parser.hpp" "test.cpp") + +target_include_directories(${PROJECT_NAME} + PRIVATE + ${GTEST_INCLUDE_DIR}) + +target_link_libraries(${PROJECT_NAME} + PRIVATE + GTest::GTest + Threads::Threads) diff --git a/test/test.cpp b/test/test.cpp new file mode 100644 index 0000000..a2b9466 --- /dev/null +++ b/test/test.cpp @@ -0,0 +1,93 @@ +#include "../headers/LR0.hpp" +#include + +TEST(INDIAN_LECTURES_TEST, YEASY_NO) { + set alp = {'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + EXPECT_FALSE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, YEASY_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "abb"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, MEDIUM_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "aabb"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, MEDIUM_NO) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "baabb"; + EXPECT_FALSE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, HARD_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->A"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "a"; + } + w += "b"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, HARD_NO) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->A"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "a"; + } + w += "ba"; + EXPECT_FALSE(parser.Recognize(w)); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From f2c8a3a4b99a44271c68519bd13b331e9b68a463 Mon Sep 17 00:00:00 2001 From: Sameer Tantry Date: Thu, 16 Dec 2021 04:31:47 +0300 Subject: [PATCH 6/6] Remove garbage --- Grammar.cpp | 65 ---------------- Grammar.hpp | 64 ---------------- LR0.cpp | 209 ---------------------------------------------------- LR0.hpp | 53 ------------- Parser.hpp | 9 --- 5 files changed, 400 deletions(-) delete mode 100644 Grammar.cpp delete mode 100644 Grammar.hpp delete mode 100644 LR0.cpp delete mode 100644 LR0.hpp delete mode 100644 Parser.hpp diff --git a/Grammar.cpp b/Grammar.cpp deleted file mode 100644 index 3c15d48..0000000 --- a/Grammar.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "Grammar.hpp" - -Rule::Rule(const string& s) { - size_t sep = s.find(rule_separator); - if (sep == string::npos) { - throw std::runtime_error("Incorrect rule!"); - } - auto start = s.substr(0, sep); - if (start.size() > 1 || start.size() == 0) { - throw std::runtime_error("Incorrect left side for Context-free grammar!"); - } - left_ = start[0]; - right_ = s.substr(sep + rule_separator.size(), s.size()); -} - -char Rule::GetLeft() const { return left_; } - -string Rule::GetRight() const { return right_; } - -bool Rule::operator<(const Rule& other) const { - if (left_ == other.left_) { - return right_ < other.right_; - } - return left_ < other.left_; -} - -bool Rule::operator==(const Rule& other) const { - return !(*this < other) && !(other < *this); -} - -Grammar::Grammar(const set& alphabet) : alp_(alphabet) {} - -Grammar::Grammar(const vector& rules) : alp_(alp), rules_(rules) { - for (const auto& rule : rules_) { - nonterms_.insert(rule.GetLeft()); - } -} - -Grammar::Grammar(char start, const vector& rules, - const set& alphabet) - : Grammar(rules) { - start_ = start; -} - -const vector& Grammar::GetRules() const { return rules_; } - -const set& Grammar::GetAlp() const { return alp_; } - -const set& Grammar::GetNonTerms() const { return nonterms_; } - -void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); } - -vector Grammar::RulesWithLeftSide(char left) const { - vector v; - for (const Rule& rule : rules_) { - if (rule.GetLeft() == left) { - v.emplace_back(rule); - } - } - return v; -} - -void Grammar::SetStart(char c) { start_ = c; } - -char Grammar::GetStart() const { return start_; } diff --git a/Grammar.hpp b/Grammar.hpp deleted file mode 100644 index a9209f6..0000000 --- a/Grammar.hpp +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -using std::cin; -using std::cout; -using std::map; -using std::set; -using std::string; -using std::vector; - -const string rule_separator = "->"; -const string mock_non_term_str = "1"; -const char mock_non_term = '1'; -const set alp = {'a', 'b', 'c'}; - -class Rule { - public: - Rule() = default; - Rule(const Rule&) = default; - Rule(const string&); - - Rule& operator=(const Rule&) = default; - - char GetLeft() const; - string GetRight() const; - - bool operator<(const Rule&) const; - bool operator==(const Rule&) const; - - private: - public: - char left_; - string right_ = ""; -}; - -class Grammar { - public: - Grammar() = default; - Grammar(const set& alphabet = alp); - Grammar(const vector& rules); - Grammar(char start_, const vector& rules, - const set& alphabet = alp); - Grammar(const Grammar&) = default; - - Grammar& operator=(const Grammar&) = default; - - const vector& GetRules() const; - const set& GetAlp() const; - const set& GetNonTerms() const; - void AddRule(const Rule&); - void AddLetter(char c) { alp_.insert(c); } - vector RulesWithLeftSide(char) const; - void SetStart(char); - char GetStart() const; - - private: - set alp_; - set nonterms_; - char start_; - vector rules_; -}; diff --git a/LR0.cpp b/LR0.cpp deleted file mode 100644 index 82eda71..0000000 --- a/LR0.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include "LR0.hpp" - -LR0::LR0(const Grammar& grammar) : gr_(grammar) { - gr_.AddRule(Rule(mock_non_term_str + rule_separator + gr_.GetStart())); - gr_.SetStart(mock_non_term); - gr_.AddLetter(mock_non_term); - BuildNFA(); - BuildTable(); -} - -set LR0::Closure(const set& sit_set) { - set new_set = sit_set; - int64_t prev_sz = -1; - while (prev_sz != new_set.size()) { - prev_sz = new_set.size(); - for (const auto& sit : new_set) { - char symb = sit.GetSymbol(); - if (!gr_.GetNonTerms().contains(symb)) { - continue; - } - for (const auto& rule : gr_.GetRules()) { - if (rule.GetLeft() == symb) { - new_set.emplace(rule, 0); - } - } - } - } - return new_set; -} - -LR0::Situation::Situation(const Rule& rule, size_t pos) - : rule_(rule), dot_pos_(pos) {} - -set LR0::GoTo(const set& sit_set, char c) { - set new_set; - for (const auto& sit : sit_set) { - if (sit.GetSymbol() == c) { - new_set.emplace(sit.GetRule(), sit.GetDotPos() + 1); - } - } - return Closure(new_set); -} - -bool LR0::Situation::operator<(const Situation& other) const { - /*if (rule_ == other.rule_) { - return dot_pos_ < other.dot_pos_; - } - return rule_ < other.rule_;*/ - if (rule_.left_ < other.rule_.left_) { - return true; - } - if (rule_.left_ > other.rule_.left_) { - return false; - } - if (rule_.right_ < other.rule_.right_) { - return true; - } - if (rule_.right_ > other.rule_.right_) { - return false; - } - if (dot_pos_ < other.dot_pos_) { - return true; - } - if (dot_pos_ > other.dot_pos_) { - return false; - } - return false; -} - -bool LR0::Situation::operator==(const Situation& other) const { - return !(*this < other) && !(other < *this); -} - -bool LR0::Situation::operator!=(const Situation& other) const { - return !(*this == other); -} - -bool LR0::Situation::Completed() const { - return dot_pos_ == rule_.GetRight().size(); -} - -char LR0::Situation::GetSymbol() const { - if (Completed()) { - return '\0'; - } - return rule_.GetRight()[dot_pos_]; -} - -const Rule& LR0::Situation::GetRule() const { return rule_; } - -size_t LR0::Situation::GetDotPos() const { return dot_pos_; } - -void LR0::BuildNFA() { - nfa_.alp.insert(gr_.GetAlp().begin(), gr_.GetAlp().end()); - nfa_.alp.insert(gr_.GetNonTerms().begin(), gr_.GetNonTerms().end()); - nfa_.start = 0; - nfa_.q.resize(0); - int64_t prev_sz = -1; - nfa_.q.push_back( - Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); - nfa_.delta.push_back(map()); - while (prev_sz != nfa_.q.size()) { - prev_sz = nfa_.q.size(); - for (size_t i = 0; i < nfa_.q.size(); ++i) { - for (char c : nfa_.alp) { - auto new_state = GoTo(nfa_.q[i], c); - if (!new_state.empty() && - find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { - nfa_.q.push_back(new_state); - } - } - } - } - nfa_.delta.resize(nfa_.q.size()); - for (size_t i = 0; i < nfa_.q.size(); ++i) { - for (char c : nfa_.alp) { - auto new_state = GoTo(nfa_.q[i], c); - if (new_state.empty()) { - nfa_.delta[i][c] = -1; - continue; - } - nfa_.delta[i][c] = - find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); - } - } - for (size_t i = 0; i < nfa_.q.size(); ++i) { - bool is_term = true; - for (const auto& sit : nfa_.q[i]) { - if (!sit.Completed()) { - is_term = false; - break; - } - } - if (is_term) { - nfa_.term_states.insert(i); - } - } -} - -void LR0::BuildTable() { - tb_.assign(nfa_.q.size(), - vector>(nfa_.alp.size() + 1, {'\0', -1})); - l_.resize(nfa_.alp.size() + 1); - l_[0] = '$'; - for (size_t i = 0; i < nfa_.q.size(); ++i) { - size_t j = 1; - for (auto a : gr_.GetAlp()) { - int64_t num = nfa_.delta[i][a]; - if (num != -1) { - tb_[i][j] = {'s', num}; - } - l_[j] = a; - ++j; - } - for (auto A : gr_.GetNonTerms()) { - int64_t num = nfa_.delta[i][A]; - if (num != -1) { - tb_[i][j] = {'t', num}; - } - l_[j] = A; - ++j; - } - } - auto win_sit = Situation(gr_.RulesWithLeftSide(mock_non_term)[0], 1); - auto win_state = Closure({win_sit}); - int64_t win_state_index = - find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); - tb_[win_state_index][0] = win; - - for (size_t i : nfa_.term_states) { - if (i != win_state_index) { - auto sit = *(nfa_.q[i].begin()); - auto rule = sit.GetRule(); - auto f = find(gr_.GetRules().begin(), gr_.GetRules().end(), rule); - int64_t rule_number = f - gr_.GetRules().begin(); - for (size_t j = 0; j <= gr_.GetAlp().size(); ++j) { - tb_[i][j] = {'r', rule_number}; - } - } - } -} - -bool LR0::Recognize(string& w) { - w += '$'; - stack st; - st.push("0"); - size_t i = 0; - while (i <= w.size()) { - int64_t cur_st = stoi(st.top()); - if (tb_[cur_st][num_[w[i]]].first == 's') { - st.push("" + w[i]); - st.push(std::to_string(tb_[cur_st][num_[w[i]]].second)); - ++i; - } else if (tb_[cur_st][num_[w[i]]] == win) { - return true; - } else if (tb_[cur_st][num_[w[i]]].first == 'r') { - Rule rule = gr_.GetRules()[tb_[cur_st][num_[w[i]]].second]; - for (size_t k = 0; k < rule.GetRight().size(); ++k) { - st.pop(), st.pop(); - } - int64_t prev_st = stoi(st.top()); - st.push("" + rule.GetLeft()); - st.push(std::to_string(tb_[prev_st][num_[rule.GetLeft()]].second)); - } else { - return false; - } - } - return false; -} diff --git a/LR0.hpp b/LR0.hpp deleted file mode 100644 index 3cc8be8..0000000 --- a/LR0.hpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "Parser.hpp" - -class LR0 : public Parser { - public: - LR0() = delete; - LR0(const Grammar&); - - bool Recognize(const string& w, Grammar grammar) override { return true; } - // bool Recognize(const string& w); - - ~LR0() {} - - private: - class Situation { - public: - Situation() = delete; - Situation(const Rule&, size_t); - - bool operator==(const Situation&) const; - bool operator!=(const Situation&) const; - bool operator<(const Situation&) const; - - bool Completed() const; - char GetSymbol() const; - const Rule& GetRule() const; - size_t GetDotPos() const; - - private: - Rule rule_; - size_t dot_pos_; - }; - class NFA { - public: - NFA() = default; - set alp; - vector> q; - set term_states; - size_t start; - vector> delta; - }; - Grammar gr_; - NFA nfa_; - vector>> tb_; - const std::pair win = {'w', 1}; - vector l_; - void BuildTable(); - void BuildNFA(); - - void ReBuild(const Grammar&); - - set Closure(const set&); - set GoTo(const set&, char); -}; diff --git a/Parser.hpp b/Parser.hpp deleted file mode 100644 index a112321..0000000 --- a/Parser.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "Grammar.hpp" - -class Parser { - public: - virtual bool Recognize(const string& w, Grammar grammar) = 0; - virtual ~Parser() = default; -}; - -class LR0;