This repository was archived by the owner on May 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnb_bernoulli.m
More file actions
91 lines (65 loc) · 2.63 KB
/
nb_bernoulli.m
File metadata and controls
91 lines (65 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
clear % Loading train data and setting number of words and labels
%Load training data
load('TrainingData.mat')
sizeTrainingData=size(TrainingData,1)
%Load testing data
load('TestData.mat')
sizeTestData=size(TestData,1)
%load positive vocabulary words
positiveVocabPath=fullfile('Data','TextODS','positive.ods');
positiveVocabulary=readtable(positiveVocabPath);
%load negative vocabulary words
negativeVocabPath=fullfile('Data','TextODS','negative.ods');
negativeVocabulary=readtable(negativeVocabPath);
negativeVocabulary.Properties.VariableNames{'x2_face'} = 'words';
positiveVocabulary.Properties.VariableNames{'a_'} = 'words';
vocab = vertcat(negativeVocabulary,positiveVocabulary);
no_of_words = size(vocab,1);
labels ={1,2} ;
no_of_labels = 2;
% Loading training data
train_data = TrainingData(:,1);
train_labels = TrainingData(:,2);
no_of_docs = length(train_labels);
% setting laplacian coefficient = 1
laplacian = 1;
%Calculating priors
priors = zeros(no_of_labels,1);
train_labels=cell2mat(train_labels);
labels=cell2mat(labels);
for i=1:no_of_labels
priors(i) = ((length(find(train_labels(:,1)== labels(i))))/(length(train_labels)));
end
%Building (Words) vs (Labels) matrix
words_labels = zeros(no_of_words,no_of_labels);
for i=1:no_of_labels
docIds = find(train_labels == labels(i));
for j=1:numel(docIds)
index = docIds(j);
words_labels(index,i) = words_labels(index,i) + 1;
end
end
%load test datartest_data(:,2)
test_data = TestData(:,1);
test_labels = TestData(:,2);
test_docs_length = length(test_labels);
%Calculating posteriors
posteriors_combined = zeros(test_docs_length,no_of_labels);
test_labels=cell2mat(test_labels);
for i = 1:test_docs_length
wordIds = test_data{i};
%indices = find(test_data(:,1) == i);
for j=1:no_of_labels
posteriors_combined(i,j) = sum(log((words_labels(i,j) + laplacian)/(numel(find(train_labels(:,1) == labels(j))) + (laplacian * 2))) + log(priors(j))) ;
%posteriors_combined(i,j) = sum(log((words_labels(wordIds,j) + laplacian)/(numel(find(train_labels == j)) + (laplacian * no_of_words))) + log(priors(j))) ;
%posteriors_combined(i,j) = sum(((words_labels(wordIds,j) + laplacian)/(numel(find(train_labels == j)) + (laplacian * no_of_words))) * (priors(j))) ;
end
end
z = 1:length(test_labels);
[dummy,posteriors] = max((posteriors_combined(z,:)),[],2);
errors = posteriors(:)-test_labels(:);
errors(errors == 0) = [];
no_of_errors = length(errors);
classifier_accuracy = ((1-(no_of_errors/length(test_labels)))*100);
%fprintf('Classifier accuracy is %d',classifier_accuracy);
fprintf('Classifier accuracy is %.2f\n',classifier_accuracy);