Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/Test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: TestEarley

on:
push:
branches: [ master, Earley ]
pull_request:
branches: [ master, Earley ]
jobs:
Build-ubuntu:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: "Build and run tests"
run : |
git clone https://github.com/google/googletest
cd googletest
cd googletest
cmake ..
make
sudo make install
cd ../../
cmake test
make
./TestEarley
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
In: Grammar, word
Out: True/False
60 changes: 60 additions & 0 deletions headers/Grammar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector>

using std::cin;
using std::cout;
using std::map;
using std::set;
using std::string;
using std::vector;

const string rule_separator = "->";
const string mock_non_term_str = "1";
const char mock_non_term = '1';
const set<char> alp = {'a', 'b', 'c', '#'};

class Rule {
public:
Rule() = default;
Rule(const Rule&) = default;
Rule(const string&);

Rule& operator=(const Rule&) = default;

char GetLeft() const;
string GetRight() const;

bool operator<(const Rule&) const;
bool operator==(const Rule&) const;

private:
public: char left_;
string right_ = "";
};

class Grammar {
public:
Grammar() = default;
Grammar(const set<char>& alphabet = alp);
Grammar(const vector<Rule>& rules);
Grammar(char start_, const vector<Rule>& rules, const set<char>& alphabet = alp);
Grammar(const Grammar&) = default;

Grammar& operator=(const Grammar&) = default;

const vector<Rule>& GetRules() const;
const set<char>& GetAlp() const;
void AddRule(const Rule&);
vector<Rule> RulesWithLeftSide(char) const;
void SetStart(char);
char GetStart() const;

private:
set<char> alp_;
char start_;
vector<Rule> rules_;
};

42 changes: 42 additions & 0 deletions headers/Parser.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "Grammar.hpp"

class Parser {
public:
virtual bool Recognize(const string& w, Grammar grammar) = 0;
virtual ~Parser() = default;
};

class EarleyParser : public Parser {
public:
EarleyParser() = default;

bool Recognize(const string& w, Grammar grammar) override;

private:
class Situation;

void Earley(const string&, const Grammar&);
void Scan(size_t, char);
void Predict(size_t, const Grammar&);
void Complete(size_t);

vector<set<Situation>> sit_sets_;
};

class EarleyParser::Situation {
public:
Situation() = delete;
Situation(const Rule&, size_t, size_t);

bool Completed() const;
char GetSymbol() const;

bool operator==(const Situation&) const;
bool operator!=(const Situation&) const;
bool operator<(const Situation&) const;

Rule rule;
size_t dot_pos = 0;
size_t start_pos = 0;
};

62 changes: 62 additions & 0 deletions src/Grammar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include "../headers/Grammar.hpp"

Rule::Rule(const string& s) {
size_t sep = s.find(rule_separator);
if (sep == string::npos) {
throw std::runtime_error("Incorrect rule!");
}
auto start = s.substr(0, sep);
if (start.size() > 1 || start.size() == 0) {
throw std::runtime_error("Incorrect left side for Context-free grammar!");
}
left_ = start[0];
right_ = s.substr(sep + rule_separator.size(), s.size());
}

char Rule::GetLeft() const { return left_; }

string Rule::GetRight() const { return right_; }

bool Rule::operator<(const Rule& other) const {
if (left_ == other.left_) {
return right_ < other.right_;
}
return left_ < other.left_;
}

bool Rule::operator==(const Rule& other) const {
return !(*this < other && other < *this);
}

Grammar::Grammar(const set<char>& alphabet)
: alp_(alphabet) {}

Grammar::Grammar(const vector<Rule>& rules) : alp_(alp), rules_(rules) {}

Grammar::Grammar(char start, const vector<Rule>& rules, const set<char>& alphabet)
: alp_(alphabet), start_(start), rules_(rules) {}

const vector<Rule>& Grammar::GetRules() const { return rules_; }

const set<char>& Grammar::GetAlp() const { return alp_; }

void Grammar::AddRule(const Rule& rule) { rules_.emplace_back(rule); }

vector<Rule> Grammar::RulesWithLeftSide(char left) const {
vector<Rule> v;
for (const Rule& rule : rules_) {
if (rule.GetLeft() == left) {
v.emplace_back(rule);
}
}
return v;
}

void Grammar::SetStart(char c) {
start_ = c;
}

char Grammar::GetStart() const {
return start_;
}

123 changes: 123 additions & 0 deletions src/Parser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "../headers/Parser.hpp"

EarleyParser::Situation::Situation(const Rule& rule, size_t dot_position,
size_t start_posistion)
: rule(rule), dot_pos(dot_position), start_pos(start_posistion) {}

bool EarleyParser::Situation::Completed() const {
return dot_pos == rule.GetRight().size();
}

char EarleyParser::Situation::GetSymbol() const {
return rule.GetRight()[dot_pos];
}

bool EarleyParser::Situation::operator<(const Situation& other) const {
/*if (rule == other.rule) {
if (dot_pos == other.dot_pos) {
return start_pos < other.start_pos;
}
return dot_pos < other.dot_pos;
}
return rule < other.rule;*/
if (rule.left_ < other.rule.left_) {
return true;
}
if (rule.left_ > other.rule.left_) {
return false;
}
if (rule.right_ < other.rule.right_) {
return true;
}
if (rule.right_ > other.rule.right_) {
return false;
}
if (dot_pos < other.dot_pos) {
return true;
}
if (dot_pos > other.dot_pos) {
return false;
}
return (start_pos < other.start_pos);

}

bool EarleyParser::Situation::operator==(const Situation& other) const {
return rule == other.rule && dot_pos == other.dot_pos && start_pos == other.start_pos;
}

bool EarleyParser::Situation::operator!=(const Situation& other) const {
return !(*this == other);
}

bool EarleyParser::Recognize(const string& w, Grammar grammar) {
string mock_rule = mock_non_term_str + rule_separator + string(1, grammar.GetStart());
grammar.SetStart(mock_non_term);
grammar.AddRule(mock_rule);
Earley(w, grammar);
Situation final(mock_rule, 1, 0);
return sit_sets_[w.size()].contains(final);
}

void EarleyParser::Earley(const string& w, const Grammar& grammar) {
sit_sets_.clear();
sit_sets_.resize(w.size() + 1);

Rule start_rule = grammar.RulesWithLeftSide(mock_non_term)[0];
sit_sets_[0].emplace(start_rule, 0, 0);

int64_t prev_sz = -1;
while (prev_sz != sit_sets_[0].size()) {
prev_sz = sit_sets_[0].size();

Predict(0, grammar);
Complete(0);
}

for (size_t i = 1; i <= w.size(); ++i) {
Scan(i - 1, w[i - 1]);

prev_sz = -1;
while (prev_sz != sit_sets_[i].size()) {
prev_sz = sit_sets_[i].size();

Predict(i, grammar);
Complete(i);
}
}
}

void EarleyParser::Scan(size_t i, char c) {
for (const auto& sit : sit_sets_[i]) {
if (c == sit.GetSymbol()) {
sit_sets_[i + 1].emplace(sit.rule, sit.dot_pos + 1, sit.start_pos);
}
}
}

void EarleyParser::Predict(size_t i, const Grammar& grammar) {
for (const auto& sit : sit_sets_[i]) {
char start = sit.GetSymbol();
for (const auto& new_rule : grammar.RulesWithLeftSide(start)) {
Situation s(new_rule, 0, i);
sit_sets_[i].insert(s);
}
}
}

void EarleyParser::Complete(size_t i) {
for (const auto& situation : sit_sets_[i]) {
if (situation.Completed()) {
for (const auto& possible_situation :
sit_sets_[situation.start_pos]) {
if (possible_situation.GetSymbol() ==
situation.rule.GetLeft()) {
sit_sets_[i].emplace(
possible_situation.rule, possible_situation.dot_pos + 1,
possible_situation.start_pos);
}
}
}
}
}

18 changes: 18 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.17)
project(TestEarley)

set(CMAKE_CXX_STANDARD 20)

find_package(Threads REQUIRED)
find_package(GTest REQUIRED)

add_executable(TestEarley "../src/Grammar.cpp" "../headers/Grammar.hpp" "../src/Parser.cpp" "../headers/Parser.hpp" "test.cpp")

target_include_directories(${PROJECT_NAME}
PRIVATE
${GTEST_INCLUDE_DIR})

target_link_libraries(${PROJECT_NAME}
PRIVATE
GTest::GTest
Threads::Threads)
Loading