diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml new file mode 100644 index 0000000..90f83a8 --- /dev/null +++ b/.github/workflows/Test.yml @@ -0,0 +1,24 @@ +name: TestEarley + +on: + push: + branches: [ master, Earley ] + pull_request: + branches: [ master, Earley ] +jobs: + Build-ubuntu: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: "Build and run tests" + run : | + git clone https://github.com/google/googletest + cd googletest + cd googletest + cmake .. + make + sudo make install + cd ../../ + cmake test + make + ./TestEarley diff --git a/README.md b/README.md index e69de29..b05ac05 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,2 @@ +In: Grammar, word +Out: True/False diff --git a/headers/Grammar.hpp b/headers/Grammar.hpp new file mode 100644 index 0000000..fe7a1ba --- /dev/null +++ b/headers/Grammar.hpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +using std::cin; +using std::cout; +using std::map; +using std::set; +using std::string; +using std::vector; + +const string rule_separator = "->"; +const string mock_non_term_str = "1"; +const char mock_non_term = '1'; +const set alp = {'a', 'b', 'c', '#'}; + +class Rule { + public: + Rule() = default; + Rule(const Rule&) = default; + Rule(const string&); + + Rule& operator=(const Rule&) = default; + + char GetLeft() const; + string GetRight() const; + + bool operator<(const Rule&) const; + bool operator==(const Rule&) const; + + private: +public: char left_; + string right_ = ""; +}; + +class Grammar { +public: + Grammar() = default; + Grammar(const set& alphabet = alp); + Grammar(const vector& rules); + Grammar(char start_, const vector& rules, const set& alphabet = alp); + Grammar(const Grammar&) = default; + + Grammar& operator=(const Grammar&) = default; + + const vector& GetRules() const; + const set& GetAlp() const; + void AddRule(const Rule&); + vector RulesWithLeftSide(char) const; + void SetStart(char); + char GetStart() const; + + private: + set alp_; + char start_; + vector rules_; +}; + diff --git a/headers/Parser.hpp b/headers/Parser.hpp new file mode 100644 index 0000000..9ffb8b4 --- /dev/null +++ b/headers/Parser.hpp @@ -0,0 +1,42 @@ +#include "Grammar.hpp" + +class Parser { + public: + virtual bool Recognize(const string& w, Grammar grammar) = 0; + virtual ~Parser() = default; +}; + +class EarleyParser : public Parser { + public: + EarleyParser() = default; + + bool Recognize(const string& w, Grammar grammar) override; + + private: + class Situation; + + void Earley(const string&, const Grammar&); + void Scan(size_t, char); + void Predict(size_t, const Grammar&); + void Complete(size_t); + + vector> sit_sets_; +}; + +class EarleyParser::Situation { + public: + Situation() = delete; + Situation(const Rule&, size_t, size_t); + + bool Completed() const; + char GetSymbol() const; + + bool operator==(const Situation&) const; + bool operator!=(const Situation&) const; + bool operator<(const Situation&) const; + + Rule rule; + size_t dot_pos = 0; + size_t start_pos = 0; +}; + diff --git a/src/Grammar.cpp b/src/Grammar.cpp new file mode 100644 index 0000000..2a8b376 --- /dev/null +++ b/src/Grammar.cpp @@ -0,0 +1,62 @@ +#include "../headers/Grammar.hpp" + +Rule::Rule(const string& s) { + size_t sep = s.find(rule_separator); + if (sep == string::npos) { + throw std::runtime_error("Incorrect rule!"); + } + auto start = s.substr(0, sep); + if (start.size() > 1 || start.size() == 0) { + throw std::runtime_error("Incorrect left side for Context-free grammar!"); + } + left_ = start[0]; + right_ = s.substr(sep + rule_separator.size(), s.size()); +} + +char Rule::GetLeft() const { return left_; } + +string Rule::GetRight() const { return right_; } + +bool Rule::operator<(const Rule& other) const { + if (left_ == other.left_) { + return right_ < other.right_; + } + return left_ < other.left_; +} + +bool Rule::operator==(const Rule& other) const { + return !(*this < other && other < *this); +} + +Grammar::Grammar(const set& alphabet) + : alp_(alphabet) {} + +Grammar::Grammar(const vector& rules) : alp_(alp), rules_(rules) {} + +Grammar::Grammar(char start, const vector& rules, const set& alphabet) + : alp_(alphabet), start_(start), rules_(rules) {} + +const vector& Grammar::GetRules() const { return rules_; } + +const set& Grammar::GetAlp() const { return alp_; } + +void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); } + +vector Grammar::RulesWithLeftSide(char left) const { + vector v; + for (const Rule& rule : rules_) { + if (rule.GetLeft() == left) { + v.emplace_back(rule); + } + } + return v; +} + +void Grammar::SetStart(char c) { + start_ = c; +} + +char Grammar::GetStart() const { + return start_; +} + diff --git a/src/Parser.cpp b/src/Parser.cpp new file mode 100644 index 0000000..09a541e --- /dev/null +++ b/src/Parser.cpp @@ -0,0 +1,123 @@ +#include "../headers/Parser.hpp" + +EarleyParser::Situation::Situation(const Rule& rule, size_t dot_position, + size_t start_posistion) + : rule(rule), dot_pos(dot_position), start_pos(start_posistion) {} + +bool EarleyParser::Situation::Completed() const { + return dot_pos == rule.GetRight().size(); +} + +char EarleyParser::Situation::GetSymbol() const { + return rule.GetRight()[dot_pos]; +} + +bool EarleyParser::Situation::operator<(const Situation& other) const { + /*if (rule == other.rule) { + if (dot_pos == other.dot_pos) { + return start_pos < other.start_pos; + } + return dot_pos < other.dot_pos; + } + return rule < other.rule;*/ + if (rule.left_ < other.rule.left_) { + return true; + } + if (rule.left_ > other.rule.left_) { + return false; + } + if (rule.right_ < other.rule.right_) { + return true; + } + if (rule.right_ > other.rule.right_) { + return false; + } + if (dot_pos < other.dot_pos) { + return true; + } + if (dot_pos > other.dot_pos) { + return false; + } + return (start_pos < other.start_pos); + +} + +bool EarleyParser::Situation::operator==(const Situation& other) const { + return rule == other.rule && dot_pos == other.dot_pos && start_pos == other.start_pos; +} + +bool EarleyParser::Situation::operator!=(const Situation& other) const { + return !(*this == other); +} + +bool EarleyParser::Recognize(const string& w, Grammar grammar) { + string mock_rule = mock_non_term_str + rule_separator + string(1, grammar.GetStart()); + grammar.SetStart(mock_non_term); + grammar.AddRule(mock_rule); + Earley(w, grammar); + Situation final(mock_rule, 1, 0); + return sit_sets_[w.size()].contains(final); +} + +void EarleyParser::Earley(const string& w, const Grammar& grammar) { + sit_sets_.clear(); + sit_sets_.resize(w.size() + 1); + + Rule start_rule = grammar.RulesWithLeftSide(mock_non_term)[0]; + sit_sets_[0].emplace(start_rule, 0, 0); + + int64_t prev_sz = -1; + while (prev_sz != sit_sets_[0].size()) { + prev_sz = sit_sets_[0].size(); + + Predict(0, grammar); + Complete(0); + } + + for (size_t i = 1; i <= w.size(); ++i) { + Scan(i - 1, w[i - 1]); + + prev_sz = -1; + while (prev_sz != sit_sets_[i].size()) { + prev_sz = sit_sets_[i].size(); + + Predict(i, grammar); + Complete(i); + } + } +} + +void EarleyParser::Scan(size_t i, char c) { + for (const auto& sit : sit_sets_[i]) { + if (c == sit.GetSymbol()) { + sit_sets_[i + 1].emplace(sit.rule, sit.dot_pos + 1, sit.start_pos); + } + } +} + +void EarleyParser::Predict(size_t i, const Grammar& grammar) { + for (const auto& sit : sit_sets_[i]) { + char start = sit.GetSymbol(); + for (const auto& new_rule : grammar.RulesWithLeftSide(start)) { + Situation s(new_rule, 0, i); + sit_sets_[i].insert(s); + } + } +} + +void EarleyParser::Complete(size_t i) { + for (const auto& situation : sit_sets_[i]) { + if (situation.Completed()) { + for (const auto& possible_situation : + sit_sets_[situation.start_pos]) { + if (possible_situation.GetSymbol() == + situation.rule.GetLeft()) { + sit_sets_[i].emplace( + possible_situation.rule, possible_situation.dot_pos + 1, + possible_situation.start_pos); + } + } + } + } +} + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..377dc90 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.17) +project(TestEarley) + +set(CMAKE_CXX_STANDARD 20) + +find_package(Threads REQUIRED) +find_package(GTest REQUIRED) + +add_executable(TestEarley "../src/Grammar.cpp" "../headers/Grammar.hpp" "../src/Parser.cpp" "../headers/Parser.hpp" "test.cpp") + +target_include_directories(${PROJECT_NAME} + PRIVATE + ${GTEST_INCLUDE_DIR}) + +target_link_libraries(${PROJECT_NAME} + PRIVATE + GTest::GTest + Threads::Threads) diff --git a/test/test.cpp b/test/test.cpp new file mode 100644 index 0000000..e94c282 --- /dev/null +++ b/test/test.cpp @@ -0,0 +1,195 @@ +#include "../headers/Parser.hpp" +#include + +TEST(CBS, CORRECT_VERY_EASY) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = ""; + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, CORRECT_EASY) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "()"; + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, CORRECT_MEDIUM) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "()(())()()()"; + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, CORRECT_HARD) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "()()()()((()))()()()()()((()))"; + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, CORRECT_STRESS_0) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = ""; + for (size_t i = 0; i < 250; ++i) { + w += "()"; + } + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, CORRECT_STRESS_1) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "("; + } + for (size_t i = 0; i < 100000; ++i) { + w += ")"; + } + + EXPECT_TRUE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_VERY_EASY) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "("; + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_EASY) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "(()"; + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_MEDIUM) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "()(()))()()()"; + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_HARD) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = "()()()(()))((()))()()()()()((()))"; + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_STRESS_0) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = ""; + for (size_t i = 0; i < 250; ++i) { + w += "()"; + } + w += ")"; + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(CBS, INCORRECT_STRESS_1) { + set alp = {'(' , ')' }; + vector rules = { + Rule("S->"), + Rule("S->(S)S") + }; + EarleyParser earley_parser; + Grammar gr('S', rules, alp); + string w = ""; + for (size_t i = 0; i < 100000; ++i) { + w += "("; + } + w += ")"; + for (size_t i = 0; i < 100000; ++i) { + w += ")"; + } + + EXPECT_FALSE(earley_parser.Recognize(w, gr)); +} + +TEST(TERMINATE, WRONG_RULE_START) { + set alp = {'(' , ')' }; + + EarleyParser earley_parser; + EXPECT_ANY_THROW(Rule("SS->SS")); +} + +TEST(TERMINATE, WRONG_RULE_STRUCTURE_0) { + set alp = {'(' , ')' }; + + EarleyParser earley_parser; + EXPECT_ANY_THROW(Rule("S-SS")); +} + +TEST(TERMINATE, WRONG_RULE_STRUCTURE_1) { + set alp = {'(' , ')' }; + + EarleyParser earley_parser; + EXPECT_ANY_THROW(Rule("S-SS")); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +