diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml new file mode 100644 index 0000000..864d9ef --- /dev/null +++ b/.github/workflows/Test.yml @@ -0,0 +1,24 @@ +name: TestEarley + +on: + push: + branches: [ master, LR0 ] + pull_request: + branches: [ master, LR0 ] +jobs: + Build-ubuntu: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: "Build and run tests" + run : | + git clone https://github.com/google/googletest + cd googletest + cd googletest + cmake .. + make + sudo make install + cd ../../ + cmake test + make + ./TestLR diff --git a/README.md b/README.md index e69de29..5b7469e 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,5 @@ +LR(0) Parser + +In: Grammar and set of words + +Out: True/False diff --git a/headers/Grammar.hpp b/headers/Grammar.hpp new file mode 100644 index 0000000..08719dc --- /dev/null +++ b/headers/Grammar.hpp @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include + +using std::cin; +using std::cout; +using std::map; +using std::set; +using std::string; +using std::vector; +using std::stack; + +const string rule_separator = "->"; +const string mock_non_term_str = "1"; +const char mock_non_term = '1'; +const set alp = {'a', 'b', 'c'}; + +class Rule { + public: + Rule() = default; + Rule(const Rule&) = default; + Rule(const string&); + + Rule& operator=(const Rule&) = default; + + char GetLeft() const; + string GetRight() const; + + bool operator<(const Rule&) const; + bool operator==(const Rule&) const; + + private: + public: + char left_; + string right_ = ""; +}; + +class Grammar { + public: + Grammar() = default; + Grammar(const set& alphabet = alp); + Grammar(const vector& rules); + Grammar(char start_, const vector& rules, + const set& alphabet = alp); + Grammar(const Grammar&) = default; + + Grammar& operator=(const Grammar&) = default; + + const vector& GetRules() const; + const set& GetAlp() const; + const set& GetNonTerms() const; + void AddRule(const Rule&); + void AddLetter(char c) { alp_.insert(c); } + vector RulesWithLeftSide(char) const; + void SetStart(char); + char GetStart() const; + + private: + set alp_; + set nonterms_; + char start_; + vector rules_; +}; diff --git a/headers/LR0.hpp b/headers/LR0.hpp new file mode 100644 index 0000000..e4128c3 --- /dev/null +++ b/headers/LR0.hpp @@ -0,0 +1,55 @@ +#include "Parser.hpp" + +class LR0 : public Parser { + public: + LR0() = delete; + LR0(const Grammar&); + + bool Recognize(const string& w, Grammar grammar) override { return true; } + bool Recognize(string& w); + + ~LR0() {} + + private: + class Situation { + public: + Situation() = delete; + Situation(const Rule&, size_t); + + bool operator==(const Situation&) const; + bool operator!=(const Situation&) const; + bool operator<(const Situation&) const; + + bool Completed() const; + char GetSymbol() const; + const Rule& GetRule() const; + size_t GetDotPos() const; + + private: + Rule rule_; + size_t dot_pos_; + }; + class NFA { + public: + NFA() = default; + set alp; + vector> q; + set term_states; + size_t start; + vector> delta; + }; + Grammar gr_; + NFA nfa_; + vector>> tb_; + const std::pair win = { 'w', 1 }; + vector l_; + map num_; + void BuildTable(); + void BuildNFA(); + + void ReBuild(const Grammar&); + + set Closure(const set&); + set GoTo(const set&, char); +}; + diff --git a/headers/Parser.hpp b/headers/Parser.hpp new file mode 100644 index 0000000..a112321 --- /dev/null +++ b/headers/Parser.hpp @@ -0,0 +1,9 @@ +#include "Grammar.hpp" + +class Parser { + public: + virtual bool Recognize(const string& w, Grammar grammar) = 0; + virtual ~Parser() = default; +}; + +class LR0; diff --git a/src/Grammar.cpp b/src/Grammar.cpp new file mode 100644 index 0000000..7d5e64b --- /dev/null +++ b/src/Grammar.cpp @@ -0,0 +1,65 @@ +#include "../headers/Grammar.hpp" + +Rule::Rule(const string& s) { + size_t sep = s.find(rule_separator); + if (sep == string::npos) { + throw std::runtime_error("Incorrect rule!"); + } + auto start = s.substr(0, sep); + if (start.size() > 1 || start.size() == 0) { + throw std::runtime_error("Incorrect left side for Context-free grammar!"); + } + left_ = start[0]; + right_ = s.substr(sep + rule_separator.size(), s.size()); +} + +char Rule::GetLeft() const { return left_; } + +string Rule::GetRight() const { return right_; } + +bool Rule::operator<(const Rule& other) const { + if (left_ == other.left_) { + return right_ < other.right_; + } + return left_ < other.left_; +} + +bool Rule::operator==(const Rule& other) const { + return !(*this < other) && !(other < *this); +} + +Grammar::Grammar(const set& alphabet) : alp_(alphabet) {} + +Grammar::Grammar(const vector& rules) : alp_(alp), rules_(rules) { + for (const auto& rule : rules_) { + nonterms_.insert(rule.GetLeft()); + } +} + +Grammar::Grammar(char start, const vector& rules, + const set& alphabet) + : Grammar(rules) { + start_ = start; +} + +const vector& Grammar::GetRules() const { return rules_; } + +const set& Grammar::GetAlp() const { return alp_; } + +const set& Grammar::GetNonTerms() const { return nonterms_; } + +void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); } + +vector Grammar::RulesWithLeftSide(char left) const { + vector v; + for (const Rule& rule : rules_) { + if (rule.GetLeft() == left) { + v.emplace_back(rule); + } + } + return v; +} + +void Grammar::SetStart(char c) { start_ = c; } + +char Grammar::GetStart() const { return start_; } diff --git a/src/LR0.cpp b/src/LR0.cpp new file mode 100644 index 0000000..21c68ce --- /dev/null +++ b/src/LR0.cpp @@ -0,0 +1,216 @@ +#include "../headers/LR0.hpp" + +LR0::LR0(const Grammar& grammar) : gr_(grammar) { + gr_.AddRule(Rule(mock_non_term_str + rule_separator + gr_.GetStart())); + gr_.SetStart(mock_non_term); + gr_.AddLetter(mock_non_term); + BuildNFA(); + BuildTable(); +} + +void LR0::ReBuild(const Grammar& grammar) { + gr_ = grammar; + BuildNFA(); + BuildTable(); +} + +set LR0::Closure(const set& sit_set) { + set new_set = sit_set; + int64_t prev_sz = -1; + while (prev_sz != new_set.size()) { + prev_sz = new_set.size(); + for (const auto& sit : new_set) { + char symb = sit.GetSymbol(); + if (!gr_.GetNonTerms().contains(symb)) { + continue; + } + for (const auto& rule : gr_.GetRules()) { + if (rule.GetLeft() == symb) { + new_set.emplace(rule, 0); + } + } + } + } + return new_set; +} + +LR0::Situation::Situation(const Rule& rule, size_t pos) + : rule_(rule), dot_pos_(pos) {} + +set LR0::GoTo(const set& sit_set, char c) { + set new_set; + for (const auto& sit : sit_set) { + if (sit.GetSymbol() == c) { + new_set.emplace(sit.GetRule(), sit.GetDotPos() + 1); + } + } + return Closure(new_set); +} + +bool LR0::Situation::operator<(const Situation& other) const { + /*if (rule_ == other.rule_) { + return dot_pos_ < other.dot_pos_; + } + return rule_ < other.rule_;*/ + if (rule_.left_ < other.rule_.left_) { + return true; + } + if (rule_.left_ > other.rule_.left_) { + return false; + } + if (rule_.right_ < other.rule_.right_) { + return true; + } + if (rule_.right_ > other.rule_.right_) { + return false; + } + if (dot_pos_ < other.dot_pos_) { + return true; + } + if (dot_pos_ > other.dot_pos_) { + return false; + } + return false; +} + +bool LR0::Situation::operator==(const Situation& other) const { + return !(*this < other) && !(other < *this); +} + +bool LR0::Situation::operator!=(const Situation& other) const { + return !(*this == other); +} + +bool LR0::Situation::Completed() const { + return dot_pos_ == rule_.GetRight().size(); +} + +char LR0::Situation::GetSymbol() const { + if (Completed()) { + return '\0'; + } + return rule_.GetRight()[dot_pos_]; +} + +const Rule& LR0::Situation::GetRule() const { return rule_; } + +size_t LR0::Situation::GetDotPos() const { return dot_pos_; } + +void LR0::BuildNFA() { + nfa_.alp.insert(gr_.GetAlp().begin(), gr_.GetAlp().end()); + nfa_.alp.insert(gr_.GetNonTerms().begin(), gr_.GetNonTerms().end()); + nfa_.start = 0; + nfa_.q.resize(0); + int64_t prev_sz = -1; + nfa_.q.push_back(Closure({Situation(gr_.RulesWithLeftSide(gr_.GetStart())[0], 0)})); + nfa_.delta.push_back(map()); + while (prev_sz != nfa_.q.size()) { + prev_sz = nfa_.q.size(); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (!new_state.empty() && find(nfa_.q.begin(), nfa_.q.end(), new_state) == nfa_.q.end()) { + nfa_.q.push_back(new_state); + } + } + } + } + nfa_.delta.resize(nfa_.q.size()); + for (size_t i = 0; i < nfa_.q.size(); ++i) { + for (char c : nfa_.alp) { + auto new_state = GoTo(nfa_.q[i], c); + if (new_state.empty()) { + nfa_.delta[i][c] = -1; + continue; + } + nfa_.delta[i][c] = find(nfa_.q.begin(), nfa_.q.end(), new_state) - nfa_.q.begin(); + } + } + for (size_t i = 0; i < nfa_.q.size(); ++i) { + bool is_term = true; + for (const auto& sit : nfa_.q[i]) { + if (!sit.Completed()) { + is_term = false; + break; + } + } + if (is_term) { + nfa_.term_states.insert(i); + } + } +} + +void LR0::BuildTable() { + tb_.assign(nfa_.q.size(), + vector>(nfa_.alp.size() + 1, { '\0', -1 })); + l_.resize(nfa_.alp.size() + 1); + l_[0] = '$'; + for (size_t i = 0; i < nfa_.q.size(); ++i) { + size_t j = 1; + for (auto a : gr_.GetAlp()) { + int64_t num = nfa_.delta[i][a]; + if (num != -1) { + tb_[i][j] = { 's', num }; + } + l_[j] = a; + ++j; + } + for (auto A : gr_.GetNonTerms()) { + int64_t num = nfa_.delta[i][A]; + if (num != -1) { + tb_[i][j] = { 't', num }; + } + l_[j] = A; + ++j; + } + } + auto win_sit = Situation(gr_.RulesWithLeftSide(mock_non_term)[0], 1); + auto win_state = Closure({ win_sit }); + int64_t win_state_index = find(nfa_.q.begin(), nfa_.q.end(), win_state) - nfa_.q.begin(); + tb_[win_state_index][0] = win; + + for (size_t i : nfa_.term_states) { + if (i != win_state_index) { + auto sit = *(nfa_.q[i].begin()); + auto rule = sit.GetRule(); + auto f = find(gr_.GetRules().begin(), gr_.GetRules().end(), rule); + int64_t rule_number = f - gr_.GetRules().begin(); + for (size_t j = 0; j <= gr_.GetAlp().size(); ++j) { + tb_[i][j] = { 'r', rule_number }; + } + } + } + for(size_t i = 0; i < l_.size(); ++i) { + num_[l_[i]] = i; + } +} + +bool LR0::Recognize(string& w) { + w += '$'; + stack st; + st.push("0"); + size_t i = 0; + while (i <= w.size()) { + int64_t cur_st = stoi(st.top()); + if (tb_[cur_st][num_[w[i]]].first == 's') { + st.push("" + w[i]); + st.push(std::to_string(tb_[cur_st][num_[w[i]]].second)); + ++i; + } else if (tb_[cur_st][num_[w[i]]] == win) { + return true; + } else if (tb_[cur_st][num_[w[i]]].first == 'r') { + Rule rule = gr_.GetRules()[tb_[cur_st][num_[w[i]]].second]; + for (size_t k = 0; k < rule.GetRight().size(); ++k) { + st.pop(), st.pop(); + } + int64_t prev_st = stoi(st.top()); + st.push("" + rule.GetLeft()); + st.push(std::to_string(tb_[prev_st][num_[rule.GetLeft()]].second)); + } else { + return false; + } + } + return false; +} + + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..78af40f --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.17) +project(TestLR) + +set(CMAKE_CXX_STANDARD 20) + +find_package(Threads REQUIRED) +find_package(GTest REQUIRED) + +add_executable(TestLR "../src/Grammar.cpp" "../headers/Grammar.hpp" "../src/LR0.cpp" "../headers/LR0.hpp" "../headers/Parser.hpp" "test.cpp") + +target_include_directories(${PROJECT_NAME} + PRIVATE + ${GTEST_INCLUDE_DIR}) + +target_link_libraries(${PROJECT_NAME} + PRIVATE + GTest::GTest + Threads::Threads) diff --git a/test/test.cpp b/test/test.cpp new file mode 100644 index 0000000..a2b9466 --- /dev/null +++ b/test/test.cpp @@ -0,0 +1,93 @@ +#include "../headers/LR0.hpp" +#include + +TEST(INDIAN_LECTURES_TEST, YEASY_NO) { + set alp = {'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + EXPECT_FALSE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, YEASY_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "abb"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, MEDIUM_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "aabb"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, MEDIUM_NO) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->AA"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = "baabb"; + EXPECT_FALSE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, HARD_YES) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->A"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "a"; + } + w += "b"; + EXPECT_TRUE(parser.Recognize(w)); +} + +TEST(INDIAN_LECTURES_TEST, HARD_NO) { + set alp = { 'a' , 'b' }; + vector rules = { + Rule("S->A"), + Rule("A->aA"), + Rule("A->b") + }; + Grammar gr('S', rules, alp); + LR0 parser(gr); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "a"; + } + w += "ba"; + EXPECT_FALSE(parser.Recognize(w)); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}